pdftree 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdftree/__init__.py +0 -0
- pdftree/app.py +599 -0
- pdftree/pdf_utils.py +124 -0
- pdftree/screens.py +106 -0
- pdftree/styles.tcss +68 -0
- pdftree/tree_utils.py +59 -0
- pdftree/widgets.py +52 -0
- pdftree-0.1.0.dist-info/METADATA +19 -0
- pdftree-0.1.0.dist-info/RECORD +12 -0
- pdftree-0.1.0.dist-info/WHEEL +4 -0
- pdftree-0.1.0.dist-info/entry_points.txt +2 -0
- pdftree-0.1.0.dist-info/licenses/LICENSE +373 -0
pdftree/__init__.py
ADDED
|
File without changes
|
pdftree/app.py
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import shlex
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
|
|
8
|
+
import pikepdf
|
|
9
|
+
from rich.text import Text
|
|
10
|
+
from textual.app import App, ComposeResult
|
|
11
|
+
from textual.binding import Binding
|
|
12
|
+
from textual.containers import Horizontal, Vertical
|
|
13
|
+
from textual.widgets import Input, Label, RichLog
|
|
14
|
+
from textual.widgets import Tree as TextualTree
|
|
15
|
+
from textual.widgets.tree import TreeNode
|
|
16
|
+
|
|
17
|
+
from .pdf_utils import JumpReference, build_tree, is_content_stream
|
|
18
|
+
from .screens import HelpScreen, PromptScreen, UnsavedChangesScreen
|
|
19
|
+
from .tree_utils import (
|
|
20
|
+
expand_to,
|
|
21
|
+
get_node_by_path,
|
|
22
|
+
get_node_name,
|
|
23
|
+
iter_nodes,
|
|
24
|
+
rebuild_stream_label,
|
|
25
|
+
)
|
|
26
|
+
from .widgets import PageInput, PDFTree, SearchInput
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PDFTreeApp(App):
|
|
30
|
+
"""A Textual app to interactively explore PDF structures and view stream contents."""
|
|
31
|
+
|
|
32
|
+
BINDINGS = [
|
|
33
|
+
Binding("f1", "show_help", "Help", show=True),
|
|
34
|
+
Binding("H", "show_help", "Help", show=True),
|
|
35
|
+
Binding("q", "quit", "Quit", show=True),
|
|
36
|
+
Binding("g", "prompt_page", "Go to Page (g)", show=True),
|
|
37
|
+
Binding("s", "export_stream", "Save stream (s)", show=True),
|
|
38
|
+
Binding("e", "edit_stream", "Edit Stream (e)", show=True),
|
|
39
|
+
Binding("f", "normalize_stream", "Format Stream (f)", show=True),
|
|
40
|
+
Binding("w", "save_pdf", "Save PDF (w)", show=True),
|
|
41
|
+
Binding("ctrl+c", "quit", "Quit", show=True),
|
|
42
|
+
Binding("ctrl+z", "suspend_process", "Suspend", show=True),
|
|
43
|
+
Binding("ctrl+l", "redraw_screen", "Redraw", show=False),
|
|
44
|
+
Binding("/", "search_forward", "Search (/)", show=True),
|
|
45
|
+
Binding("?", "search_backward", "Search (?)", show=True),
|
|
46
|
+
Binding("n", "repeat_search_forward", "Next (n)", show=True),
|
|
47
|
+
Binding("p", "repeat_search_backward", "Prev (p)", show=True),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
TITLE = "pdftree - Interactive Object Explorer"
|
|
51
|
+
|
|
52
|
+
CSS_PATH = "styles.tcss"
|
|
53
|
+
|
|
54
|
+
def __init__(self, pdf_path: str):
|
|
55
|
+
super().__init__()
|
|
56
|
+
self.pdf_path = pdf_path
|
|
57
|
+
self.pdf = None
|
|
58
|
+
self.last_search_query: str | None = None
|
|
59
|
+
self._search_direction: str = "forward"
|
|
60
|
+
# Flag to suppress on_tree_node_selected firing when we move the cursor
|
|
61
|
+
# programmatically (search / jump). Stored on self, not on tree nodes.
|
|
62
|
+
self._programmatic_move: bool = False
|
|
63
|
+
self.obj_to_node: dict[tuple[int, int], TreeNode] = {}
|
|
64
|
+
self.is_dirty: bool = False
|
|
65
|
+
|
|
66
|
+
# -------------------------------------------------------------------------
|
|
67
|
+
# Normalize stream
|
|
68
|
+
# -------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
def action_normalize_stream(self) -> None:
|
|
71
|
+
"""Format a content stream to have one operator per line."""
|
|
72
|
+
tree = self.query_one("#tree-pane", TextualTree)
|
|
73
|
+
node = tree.cursor_node
|
|
74
|
+
|
|
75
|
+
if node is None or not isinstance(node.data, pikepdf.Stream):
|
|
76
|
+
self.query_one("#breadcrumb", Label).update(
|
|
77
|
+
"[yellow]Please select a Stream node (Red) to format.[/yellow]"
|
|
78
|
+
)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
node_name = get_node_name(node)
|
|
82
|
+
parent_name = get_node_name(node.parent) if node.parent is not None else ""
|
|
83
|
+
|
|
84
|
+
if not is_content_stream(node.data, node_name, parent_name):
|
|
85
|
+
self.query_one("#breadcrumb", Label).update(
|
|
86
|
+
(
|
|
87
|
+
f"[yellow]Not reformatting '{node_name}' (parent: '{parent_name}') as it is not a content stream.[/yellow]"
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# 1. Parse and unparse using pikepdf
|
|
94
|
+
parsed = pikepdf.parse_content_stream(node.data)
|
|
95
|
+
normalized_bytes = pikepdf.unparse_content_stream(parsed)
|
|
96
|
+
|
|
97
|
+
# 2. Check if it actually changed
|
|
98
|
+
old_bytes = node.data.read_bytes()
|
|
99
|
+
if normalized_bytes != old_bytes:
|
|
100
|
+
# 3. Write back to pikepdf
|
|
101
|
+
node.data.write(normalized_bytes)
|
|
102
|
+
|
|
103
|
+
# 4. Update the label length safely
|
|
104
|
+
rebuild_stream_label(node, len(normalized_bytes))
|
|
105
|
+
|
|
106
|
+
self.is_dirty = True
|
|
107
|
+
|
|
108
|
+
self.query_one("#breadcrumb", Label).update(
|
|
109
|
+
f"[green]Stream formatted! Length: {len(old_bytes)} -> {len(normalized_bytes)} bytes.[/green]"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Force a redraw of the detail pane to show the formatted text
|
|
113
|
+
self.call_after_refresh(self.do_jump_factory(tree, node))
|
|
114
|
+
|
|
115
|
+
else:
|
|
116
|
+
self.query_one("#breadcrumb", Label).update(
|
|
117
|
+
"[dim]Stream already formatted or unchanged.[/dim]"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
# This will catch if the user tries to format an image stream or
|
|
122
|
+
# something else that isn't a valid PDF content stream.
|
|
123
|
+
self.query_one("#breadcrumb", Label).update(
|
|
124
|
+
f"[red]Failed to format (might not be a content stream):[/red] {e}"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def do_jump_factory(self, tree, node):
|
|
128
|
+
def jump():
|
|
129
|
+
self._programmatic_move = True
|
|
130
|
+
tree.select_node(node)
|
|
131
|
+
|
|
132
|
+
return jump
|
|
133
|
+
|
|
134
|
+
# -------------------------------------------------------------------------
|
|
135
|
+
# Prompt for save on quit
|
|
136
|
+
# -------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def action_quit(self) -> None:
|
|
139
|
+
"""Override Textual's default quit to check for unsaved changes."""
|
|
140
|
+
if getattr(self, "is_dirty", False):
|
|
141
|
+
# Prompt the user if changes exist
|
|
142
|
+
self.push_screen(UnsavedChangesScreen(), self._quit_confirm_callback)
|
|
143
|
+
else:
|
|
144
|
+
# Otherwise, use Textual's native exit method
|
|
145
|
+
self.exit()
|
|
146
|
+
|
|
147
|
+
def _quit_confirm_callback(self, quit_anyway: bool) -> None:
|
|
148
|
+
"""Callback fired when the UnsavedChangesScreen is dismissed."""
|
|
149
|
+
if quit_anyway:
|
|
150
|
+
self.exit()
|
|
151
|
+
|
|
152
|
+
# -------------------------------------------------------------------------
|
|
153
|
+
# Screen helpers
|
|
154
|
+
# -------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def action_redraw_screen(self, *args, **kwargs) -> None:
|
|
157
|
+
self.screen.refresh(layout=True)
|
|
158
|
+
|
|
159
|
+
# -------------------------------------------------------------------------
|
|
160
|
+
# Page navigation
|
|
161
|
+
# -------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
def action_prompt_page(self) -> None:
|
|
164
|
+
"""Open the page jump prompt."""
|
|
165
|
+
page_input = self.query_one("#page-input", PageInput)
|
|
166
|
+
page_input.value = ""
|
|
167
|
+
page_input.display = True
|
|
168
|
+
page_input.focus()
|
|
169
|
+
|
|
170
|
+
def action_cancel_page_jump(self) -> None:
|
|
171
|
+
"""Hide the page jump prompt."""
|
|
172
|
+
page_input = self.query_one("#page-input", PageInput)
|
|
173
|
+
page_input.display = False
|
|
174
|
+
self.query_one("#tree-pane").focus()
|
|
175
|
+
|
|
176
|
+
# -------------------------------------------------------------------------
|
|
177
|
+
# Export stream
|
|
178
|
+
# -------------------------------------------------------------------------
|
|
179
|
+
|
|
180
|
+
def _save_stream_callback(self, filename: str | None) -> None:
|
|
181
|
+
"""Callback fired when the SavePromptScreen is dismissed."""
|
|
182
|
+
if not filename:
|
|
183
|
+
return # User canceled or entered an empty string
|
|
184
|
+
|
|
185
|
+
node = getattr(self, "_pending_export_node", None)
|
|
186
|
+
|
|
187
|
+
# Double check we are still on a stream just in case
|
|
188
|
+
if node is None or not isinstance(node.data, pikepdf.Stream):
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
raw_bytes = node.data.read_bytes()
|
|
193
|
+
with open(filename, "wb") as f:
|
|
194
|
+
f.write(raw_bytes)
|
|
195
|
+
|
|
196
|
+
self.query_one("#breadcrumb", Label).update(
|
|
197
|
+
f"[green]Successfully saved {len(raw_bytes)} bytes to '{filename}'[/green]"
|
|
198
|
+
)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
self.query_one("#breadcrumb", Label).update(f"[red]Failed to save file:[/red] {e}")
|
|
201
|
+
|
|
202
|
+
# -------------------------------------------------------------------------
|
|
203
|
+
# Edit stream
|
|
204
|
+
# -------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
def action_edit_stream(self) -> None:
|
|
207
|
+
"""Export stream to temp file, suspend TUI, run $EDITOR, read back."""
|
|
208
|
+
tree = self.query_one("#tree-pane", PDFTree)
|
|
209
|
+
node = tree.cursor_node
|
|
210
|
+
|
|
211
|
+
if node is None or not isinstance(node.data, pikepdf.Stream):
|
|
212
|
+
self.query_one("#breadcrumb", Label).update(
|
|
213
|
+
"[yellow]Please select a Stream node (Red) to edit.[/yellow]"
|
|
214
|
+
)
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# 1. Setup temp file
|
|
218
|
+
try:
|
|
219
|
+
old_bytes = node.data.read_bytes()
|
|
220
|
+
fd, temp_path = tempfile.mkstemp(suffix=".txt")
|
|
221
|
+
with os.fdopen(fd, "wb") as f:
|
|
222
|
+
f.write(old_bytes)
|
|
223
|
+
except Exception as e:
|
|
224
|
+
self.query_one("#breadcrumb", Label).update(f"[red]Error reading stream:[/red] {e}")
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
editor_env = os.environ.get("EDITOR", "nano" if os.name != "nt" else "notepad")
|
|
228
|
+
cmd_list = shlex.split(editor_env) + [temp_path]
|
|
229
|
+
|
|
230
|
+
# 2. Safely call the editor
|
|
231
|
+
try:
|
|
232
|
+
with self.suspend():
|
|
233
|
+
subprocess.run(cmd_list, check=True)
|
|
234
|
+
except FileNotFoundError:
|
|
235
|
+
os.remove(temp_path)
|
|
236
|
+
self.query_one("#breadcrumb", Label).update(
|
|
237
|
+
f"[red]Editor not found:[/red] '{cmd_list[0]}'. Check your $EDITOR variable."
|
|
238
|
+
)
|
|
239
|
+
return
|
|
240
|
+
except subprocess.CalledProcessError as e:
|
|
241
|
+
os.remove(temp_path)
|
|
242
|
+
self.query_one("#breadcrumb", Label).update(
|
|
243
|
+
f"[red]Editor exited with an error code:[/red] {e.returncode}"
|
|
244
|
+
)
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
# 3. Process the results
|
|
248
|
+
try:
|
|
249
|
+
with open(temp_path, "rb") as f:
|
|
250
|
+
new_bytes = f.read()
|
|
251
|
+
os.remove(temp_path)
|
|
252
|
+
|
|
253
|
+
if new_bytes != old_bytes:
|
|
254
|
+
# Write back to pikepdf
|
|
255
|
+
node.data.write(new_bytes)
|
|
256
|
+
|
|
257
|
+
rebuild_stream_label(node, len(new_bytes))
|
|
258
|
+
|
|
259
|
+
# Set dirty flag *after* successful write
|
|
260
|
+
self.is_dirty = True
|
|
261
|
+
|
|
262
|
+
self.query_one("#breadcrumb", Label).update(
|
|
263
|
+
f"[green]Stream updated! Length changed: {len(old_bytes)} -> {len(new_bytes)} bytes.[/green]"
|
|
264
|
+
)
|
|
265
|
+
self.call_after_refresh(self.do_jump_factory(tree, node))
|
|
266
|
+
else:
|
|
267
|
+
self.query_one("#breadcrumb", Label).update(
|
|
268
|
+
"[dim]Stream unchanged. Editing canceled.[/dim]"
|
|
269
|
+
)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
self.query_one("#breadcrumb", Label).update(
|
|
272
|
+
f"[red]Error saving stream data:[/red] {e}"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# -------------------------------------------------------------------------
|
|
276
|
+
# Save PDF
|
|
277
|
+
# -------------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
def action_save_pdf(self) -> None:
|
|
280
|
+
"""Prompt the user for a filename to save the entire document."""
|
|
281
|
+
p = pathlib.Path(self.pdf_path)
|
|
282
|
+
default_name = f"{p.stem}_modified{p.suffix}"
|
|
283
|
+
|
|
284
|
+
self.push_screen(
|
|
285
|
+
PromptScreen("Save Entire PDF As:", default_name), self._save_pdf_callback
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def action_export_stream(self) -> None:
|
|
289
|
+
"""Prompt to save the currently selected stream."""
|
|
290
|
+
tree = self.query_one("#tree-pane", PDFTree)
|
|
291
|
+
node = tree.cursor_node
|
|
292
|
+
|
|
293
|
+
if node is not None and isinstance(node.data, pikepdf.Stream):
|
|
294
|
+
self._pending_export_node = node
|
|
295
|
+
self.push_screen(
|
|
296
|
+
PromptScreen("Export Stream As:", "stream.bin"),
|
|
297
|
+
self._save_stream_callback,
|
|
298
|
+
)
|
|
299
|
+
else:
|
|
300
|
+
self.query_one("#breadcrumb", Label).update(
|
|
301
|
+
"[yellow]Please select a Stream node (Red) to export.[/yellow]"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def _save_pdf_callback(self, filename: str | None) -> None:
|
|
305
|
+
if not filename:
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
# Dump the in-memory pikepdf object tree back out to disk
|
|
310
|
+
self.pdf.save(filename)
|
|
311
|
+
self.is_dirty = False
|
|
312
|
+
|
|
313
|
+
self.query_one("#breadcrumb", Label).update(
|
|
314
|
+
f"[green]Successfully saved modified PDF to '{filename}'[/green]"
|
|
315
|
+
)
|
|
316
|
+
except Exception as e:
|
|
317
|
+
self.query_one("#breadcrumb", Label).update(f"[red]Failed to save PDF:[/red] {e}")
|
|
318
|
+
|
|
319
|
+
# -------------------------------------------------------------------------
|
|
320
|
+
# Help
|
|
321
|
+
# -------------------------------------------------------------------------
|
|
322
|
+
|
|
323
|
+
def action_show_help(self) -> None:
|
|
324
|
+
"""Push the help screen when the user presses a help key."""
|
|
325
|
+
self.push_screen(HelpScreen())
|
|
326
|
+
|
|
327
|
+
# -------------------------------------------------------------------------
|
|
328
|
+
# Search actions
|
|
329
|
+
# -------------------------------------------------------------------------
|
|
330
|
+
|
|
331
|
+
def action_search_forward(self) -> None:
|
|
332
|
+
self._search_direction = "forward"
|
|
333
|
+
self._open_search_bar("forward (/)")
|
|
334
|
+
|
|
335
|
+
def action_search_backward(self) -> None:
|
|
336
|
+
self._search_direction = "backward"
|
|
337
|
+
self._open_search_bar("backward (?)")
|
|
338
|
+
|
|
339
|
+
def action_repeat_search_forward(self) -> None:
|
|
340
|
+
self._search_direction = "forward"
|
|
341
|
+
self._perform_search(self.last_search_query)
|
|
342
|
+
|
|
343
|
+
def action_repeat_search_backward(self) -> None:
|
|
344
|
+
self._search_direction = "backward"
|
|
345
|
+
self._perform_search(self.last_search_query)
|
|
346
|
+
|
|
347
|
+
def action_cancel_search(self) -> None:
|
|
348
|
+
search_bar = self.query_one("#search-bar", SearchInput)
|
|
349
|
+
search_bar.display = False
|
|
350
|
+
search_bar.value = ""
|
|
351
|
+
self.query_one("#tree-pane").focus()
|
|
352
|
+
|
|
353
|
+
def _open_search_bar(self, direction_text: str) -> None:
|
|
354
|
+
search_bar = self.query_one("#search-bar", SearchInput)
|
|
355
|
+
search_bar.placeholder = (
|
|
356
|
+
f"Search {direction_text} — Enter to jump · Esc or ctrl+g to cancel"
|
|
357
|
+
)
|
|
358
|
+
search_bar.display = True
|
|
359
|
+
search_bar.focus()
|
|
360
|
+
|
|
361
|
+
def on_input_submitted(self, event: Input.Submitted) -> None:
|
|
362
|
+
event.input.display = False
|
|
363
|
+
self.query_one("#tree-pane").focus()
|
|
364
|
+
|
|
365
|
+
if event.input.id == "search-bar":
|
|
366
|
+
query = event.value.strip().lower()
|
|
367
|
+
if query:
|
|
368
|
+
self.last_search_query = query
|
|
369
|
+
self._perform_search(self.last_search_query)
|
|
370
|
+
|
|
371
|
+
elif event.input.id == "page-input":
|
|
372
|
+
self._handle_page_jump(event.value.strip())
|
|
373
|
+
|
|
374
|
+
def _handle_page_jump(self, value: str) -> None:
|
|
375
|
+
if not value:
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
page_num = int(value)
|
|
380
|
+
num_pages = len(self.pdf.pages)
|
|
381
|
+
except ValueError:
|
|
382
|
+
self.query_one("#breadcrumb", Label).update(f"[red]Invalid page number:[/red] {value}")
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
if not (1 <= page_num <= num_pages):
|
|
386
|
+
self.query_one("#breadcrumb", Label).update(
|
|
387
|
+
f"[red]Page {page_num} out of bounds (1-{num_pages})[/red]"
|
|
388
|
+
)
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
# 1. pikepdf gives us the canonical page dictionary via the flat .pages list
|
|
392
|
+
page_obj = self.pdf.pages[page_num - 1]
|
|
393
|
+
|
|
394
|
+
# 2. Extract its exact object/generation signature
|
|
395
|
+
target_node = self.obj_to_node.get(page_obj.objgen)
|
|
396
|
+
|
|
397
|
+
if target_node:
|
|
398
|
+
expand_to(target_node)
|
|
399
|
+
|
|
400
|
+
tree = self.query_one("#tree-pane", PDFTree)
|
|
401
|
+
|
|
402
|
+
self.call_after_refresh(self.do_jump_factory(tree, target_node))
|
|
403
|
+
|
|
404
|
+
self.query_one("#breadcrumb", Label).update(
|
|
405
|
+
f"[green]Jumped to Page {page_num} ({page_obj.objgen[0]}:{page_obj.objgen[1]})[/green]"
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
self.query_one("#breadcrumb", Label).update(
|
|
409
|
+
f"[red]Could not find tree node for Page {page_num}[/red]"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
def _perform_search(self, query: str | None) -> None:
|
|
413
|
+
if not query:
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
tree = self.query_one("#tree-pane", PDFTree)
|
|
417
|
+
tree.focus()
|
|
418
|
+
|
|
419
|
+
all_nodes = list(iter_nodes(tree.root))
|
|
420
|
+
|
|
421
|
+
start_node = tree.cursor_node
|
|
422
|
+
try:
|
|
423
|
+
start_idx = all_nodes.index(start_node)
|
|
424
|
+
except ValueError:
|
|
425
|
+
start_idx = -1
|
|
426
|
+
|
|
427
|
+
if self._search_direction == "forward":
|
|
428
|
+
if start_idx == -1:
|
|
429
|
+
search_sequence = all_nodes
|
|
430
|
+
else:
|
|
431
|
+
search_sequence = all_nodes[start_idx + 1 :] + all_nodes[: start_idx + 1]
|
|
432
|
+
else:
|
|
433
|
+
if start_idx == -1:
|
|
434
|
+
search_sequence = all_nodes[::-1]
|
|
435
|
+
else:
|
|
436
|
+
search_sequence = all_nodes[:start_idx][::-1] + all_nodes[start_idx:][::-1]
|
|
437
|
+
|
|
438
|
+
match = next((n for n in search_sequence if query in n.label.plain.lower()), None)
|
|
439
|
+
|
|
440
|
+
if match:
|
|
441
|
+
expand_to(match)
|
|
442
|
+
self.call_after_refresh(self.do_jump_factory(tree, match))
|
|
443
|
+
status = f"[green]Found:[/green] {query}"
|
|
444
|
+
else:
|
|
445
|
+
status = f"[red]Not found:[/red] {query}"
|
|
446
|
+
|
|
447
|
+
self.query_one("#breadcrumb", Label).update(status)
|
|
448
|
+
|
|
449
|
+
# -------------------------------------------------------------------------
|
|
450
|
+
# Layout
|
|
451
|
+
# -------------------------------------------------------------------------
|
|
452
|
+
|
|
453
|
+
def compose(self) -> ComposeResult:
|
|
454
|
+
with Horizontal():
|
|
455
|
+
yield PDFTree(f"[bold magenta]{self.pdf_path}[/bold magenta]", id="tree-pane")
|
|
456
|
+
with Vertical(id="right-pane"):
|
|
457
|
+
yield Label("Trailer", id="breadcrumb")
|
|
458
|
+
yield RichLog(id="details-pane", highlight=True, wrap=True, auto_scroll=False)
|
|
459
|
+
yield SearchInput(
|
|
460
|
+
placeholder="Search nodes (Enter to jump, Esc or ctrl+g to cancel)...",
|
|
461
|
+
id="search-bar",
|
|
462
|
+
)
|
|
463
|
+
yield PageInput(
|
|
464
|
+
placeholder="Go to page (Enter to jump, Esc or ctrl+g to cancel)...",
|
|
465
|
+
id="page-input",
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# -------------------------------------------------------------------------
|
|
469
|
+
# Lifecycle
|
|
470
|
+
# -------------------------------------------------------------------------
|
|
471
|
+
|
|
472
|
+
def on_mount(self) -> None:
|
|
473
|
+
tree = self.query_one("#tree-pane", PDFTree)
|
|
474
|
+
log = self.query_one("#details-pane", RichLog)
|
|
475
|
+
|
|
476
|
+
tree.auto_expand = False
|
|
477
|
+
tree.root.expand()
|
|
478
|
+
log.write(
|
|
479
|
+
Text.from_markup(
|
|
480
|
+
"[dim italic]Select a Stream node (Red) to view contents, "
|
|
481
|
+
"or click a ↪ Jump link to navigate.[/dim italic]"
|
|
482
|
+
)
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
self.app_resume_signal.subscribe(self, self.action_redraw_screen)
|
|
486
|
+
self.query_one("#search-bar").display = False
|
|
487
|
+
self.query_one("#page-input").display = False
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
self.pdf = pikepdf.Pdf.open(self.pdf_path)
|
|
491
|
+
with self.app.batch_update():
|
|
492
|
+
build_tree(
|
|
493
|
+
self.pdf.trailer,
|
|
494
|
+
tree.root,
|
|
495
|
+
node_registry=self.obj_to_node,
|
|
496
|
+
name="Trailer",
|
|
497
|
+
)
|
|
498
|
+
except Exception as e:
|
|
499
|
+
tree.root.add_leaf(f"[bold red]Fatal Error opening PDF: {e}[/bold red]")
|
|
500
|
+
|
|
501
|
+
self._startup_selection(tree)
|
|
502
|
+
|
|
503
|
+
def on_unmount(self) -> None:
|
|
504
|
+
if self.pdf:
|
|
505
|
+
self.pdf.close()
|
|
506
|
+
|
|
507
|
+
# -------------------------------------------------------------------------
|
|
508
|
+
# Tree events
|
|
509
|
+
# -------------------------------------------------------------------------
|
|
510
|
+
|
|
511
|
+
def on_tree_node_highlighted(self, event: TextualTree.NodeHighlighted) -> None:
|
|
512
|
+
if event.node is None:
|
|
513
|
+
return
|
|
514
|
+
# Don't overwrite a search status message with the breadcrumb
|
|
515
|
+
if self._programmatic_move:
|
|
516
|
+
self._programmatic_move = False
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
parts = []
|
|
520
|
+
curr = event.node
|
|
521
|
+
while curr is not None and curr.parent is not None:
|
|
522
|
+
parts.append(get_node_name(curr))
|
|
523
|
+
curr = curr.parent
|
|
524
|
+
parts.reverse()
|
|
525
|
+
self.query_one("#breadcrumb", Label).update(" > ".join(parts))
|
|
526
|
+
|
|
527
|
+
def on_tree_node_selected(self, event: TextualTree.NodeSelected) -> None:
|
|
528
|
+
log = self.query_one("#details-pane", RichLog)
|
|
529
|
+
tree = self.query_one("#tree-pane", TextualTree)
|
|
530
|
+
node_data = event.node.data
|
|
531
|
+
|
|
532
|
+
if not isinstance(node_data, (JumpReference, pikepdf.Stream)):
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
log.clear()
|
|
536
|
+
log.scroll_home(animate=False)
|
|
537
|
+
|
|
538
|
+
if isinstance(node_data, JumpReference):
|
|
539
|
+
target = node_data.target_node
|
|
540
|
+
if target:
|
|
541
|
+
expand_to(target)
|
|
542
|
+
self.call_after_refresh(self.do_jump_factory(tree, target))
|
|
543
|
+
log.write(Text.from_markup("[bold yellow]--- Jumped to Object ---[/bold yellow]"))
|
|
544
|
+
log.write(
|
|
545
|
+
Text.from_markup(
|
|
546
|
+
"[dim]Moved cursor to the original location of this object.[/dim]"
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
return
|
|
550
|
+
|
|
551
|
+
if isinstance(node_data, pikepdf.Stream):
|
|
552
|
+
objgen_str = ":".join(str(x) for x in node_data.objgen)
|
|
553
|
+
log.write(
|
|
554
|
+
Text.from_markup(
|
|
555
|
+
f"[bold magenta]--- Obj {objgen_str} Decompressed Stream Output ---[/bold magenta]\n"
|
|
556
|
+
)
|
|
557
|
+
)
|
|
558
|
+
try:
|
|
559
|
+
raw_bytes = node_data.read_bytes()
|
|
560
|
+
try:
|
|
561
|
+
log.write(raw_bytes.decode("utf-8"))
|
|
562
|
+
except UnicodeDecodeError:
|
|
563
|
+
log.write(
|
|
564
|
+
Text.from_markup(
|
|
565
|
+
f"[bold red]<Binary Stream: {len(raw_bytes)} bytes>[/bold red]"
|
|
566
|
+
)
|
|
567
|
+
)
|
|
568
|
+
log.write(Text.from_markup("[dim]First 500 bytes as repr:[/dim]\n"))
|
|
569
|
+
log.write(repr(raw_bytes[:500]))
|
|
570
|
+
except Exception as e:
|
|
571
|
+
log.write(Text.from_markup(f"[bold red]Error reading stream:[/bold red] {e}"))
|
|
572
|
+
|
|
573
|
+
# -------------------------------------------------------------------------
|
|
574
|
+
# Helpers
|
|
575
|
+
# -------------------------------------------------------------------------
|
|
576
|
+
|
|
577
|
+
def _startup_selection(self, tree: PDFTree) -> None:
|
|
578
|
+
pages_node = get_node_by_path(tree, ["Trailer", "/Root", "/Pages"])
|
|
579
|
+
if pages_node:
|
|
580
|
+
expand_to(pages_node)
|
|
581
|
+
pages_node.expand()
|
|
582
|
+
self.call_after_refresh(lambda: tree.select_node(pages_node))
|
|
583
|
+
tree.focus()
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def main():
|
|
587
|
+
if len(sys.argv) < 2 or "-h" in sys.argv[1:] or "--help" in sys.argv[1:]:
|
|
588
|
+
print("Usage: python tree_tui.py <file.pdf>")
|
|
589
|
+
sys.exit(1)
|
|
590
|
+
|
|
591
|
+
app = PDFTreeApp(sys.argv[1])
|
|
592
|
+
app.run()
|
|
593
|
+
|
|
594
|
+
# Force the terminal prompt below the leftover TUI ghost
|
|
595
|
+
print("\033[999;1H\n", end="")
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
if __name__ == "__main__":
|
|
599
|
+
main()
|