pdflinkcheck 1.1.47__py3-none-any.whl → 1.1.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdflinkcheck/__init__.py +51 -13
- pdflinkcheck/{analyze.py → analyze_pymupdf.py} +54 -224
- pdflinkcheck/analyze_pypdf.py +184 -0
- pdflinkcheck/analyze_pypdf_v2.py +218 -0
- pdflinkcheck/cli.py +238 -39
- pdflinkcheck/data/LICENSE +5 -24
- pdflinkcheck/data/README.md +278 -0
- pdflinkcheck/data/pyproject.toml +98 -0
- pdflinkcheck/datacopy.py +60 -0
- pdflinkcheck/dev.py +109 -0
- pdflinkcheck/gui.py +371 -74
- pdflinkcheck/io.py +118 -11
- pdflinkcheck/report.py +282 -0
- pdflinkcheck/stdlib_server.py +176 -0
- pdflinkcheck/validate.py +382 -0
- pdflinkcheck/version_info.py +83 -0
- {pdflinkcheck-1.1.47.dist-info → pdflinkcheck-1.1.73.dist-info}/METADATA +127 -71
- pdflinkcheck-1.1.73.dist-info/RECORD +21 -0
- pdflinkcheck-1.1.73.dist-info/WHEEL +4 -0
- {pdflinkcheck-1.1.47.dist-info → pdflinkcheck-1.1.73.dist-info}/entry_points.txt +1 -0
- {pdflinkcheck-1.1.47.dist-info → pdflinkcheck-1.1.73.dist-info}/licenses/LICENSE +5 -24
- pdflinkcheck/remnants.py +0 -142
- pdflinkcheck-1.1.47.dist-info/RECORD +0 -13
- pdflinkcheck-1.1.47.dist-info/WHEEL +0 -5
- pdflinkcheck-1.1.47.dist-info/top_level.txt +0 -1
pdflinkcheck/gui.py
CHANGED
|
@@ -4,10 +4,23 @@ from tkinter import filedialog, ttk, messagebox # Added messagebox
|
|
|
4
4
|
import sys
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Optional # Added Optional
|
|
7
|
+
import unicodedata
|
|
7
8
|
from importlib.resources import files
|
|
8
|
-
|
|
9
|
+
import pyhabitat
|
|
10
|
+
"""
|
|
11
|
+
try:
|
|
12
|
+
import sv_ttk
|
|
13
|
+
# Apply Sun Valley Tk theme
|
|
14
|
+
sv_ttk.set_theme("light")
|
|
15
|
+
except Exception:
|
|
16
|
+
# Theme not available in bundle — use default
|
|
17
|
+
pass
|
|
18
|
+
"""
|
|
9
19
|
# Import the core analysis function
|
|
10
|
-
from pdflinkcheck.
|
|
20
|
+
from pdflinkcheck.report import run_report
|
|
21
|
+
from pdflinkcheck.validate import run_validation
|
|
22
|
+
from pdflinkcheck.version_info import get_version_from_pyproject
|
|
23
|
+
from pdflinkcheck.io import get_first_pdf_in_cwd, get_friendly_path, PDFLINKCHECK_HOME
|
|
11
24
|
|
|
12
25
|
class RedirectText:
|
|
13
26
|
"""A class to redirect sys.stdout messages to a Tkinter Text widget."""
|
|
@@ -18,7 +31,7 @@ class RedirectText:
|
|
|
18
31
|
"""Insert the incoming string into the Text widget."""
|
|
19
32
|
self.text_widget.insert(tk.END, string)
|
|
20
33
|
self.text_widget.see(tk.END) # Scroll to the end
|
|
21
|
-
|
|
34
|
+
self.text_widget.update_idletasks() # Refresh GUI to allow real timie updates << If suppress: The mainloop will handle updates efficiently without forcing them, , but info appears outdated when a new file is analyzed. Immediate feedback is better.
|
|
22
35
|
|
|
23
36
|
def flush(self, *args):
|
|
24
37
|
"""Required for file-like objects, but does nothing here."""
|
|
@@ -27,7 +40,7 @@ class RedirectText:
|
|
|
27
40
|
class PDFLinkCheckerApp(tk.Tk):
|
|
28
41
|
def __init__(self):
|
|
29
42
|
super().__init__()
|
|
30
|
-
self.title("PDF Link Check")
|
|
43
|
+
self.title(f"PDF Link Check v{get_version_from_pyproject()}")
|
|
31
44
|
self.geometry("800x600")
|
|
32
45
|
|
|
33
46
|
# Style for the application
|
|
@@ -36,11 +49,14 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
36
49
|
|
|
37
50
|
# --- 1. Initialize Variables ---
|
|
38
51
|
self.pdf_path = tk.StringVar(value="")
|
|
39
|
-
self.
|
|
52
|
+
self.pdf_library_var = tk.StringVar(value="PyMuPDF")
|
|
53
|
+
#self.pdf_library_var.set("PyMuPDF")
|
|
40
54
|
self.max_links_var = tk.StringVar(value="50")
|
|
41
|
-
self.show_all_links_var = tk.BooleanVar(value=True)
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
55
|
+
self.show_all_links_var = tk.BooleanVar(value=True)
|
|
56
|
+
self.do_export_report_json_var = tk.BooleanVar(value=True)
|
|
57
|
+
self.do_export_report_txt_var = tk.BooleanVar(value=False)
|
|
58
|
+
self.current_report_text = None
|
|
59
|
+
self.current_report_data = None
|
|
44
60
|
|
|
45
61
|
self.supported_export_formats = ["JSON", "MD", "TXT"]
|
|
46
62
|
self.supported_export_formats = ["JSON"]
|
|
@@ -51,8 +67,36 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
51
67
|
|
|
52
68
|
# --- 3. Set Initial Dependent Widget States ---
|
|
53
69
|
self._toggle_max_links_entry()
|
|
54
|
-
self.
|
|
70
|
+
self._toggle_json_export()
|
|
71
|
+
self._toggle_txt_export()
|
|
55
72
|
|
|
73
|
+
# In class PDFLinkCheckerApp:
|
|
74
|
+
|
|
75
|
+
def _copy_pdf_path(self):
|
|
76
|
+
"""Copies the current PDF path from the Entry widget to the system clipboard."""
|
|
77
|
+
path_to_copy = self.pdf_path.get()
|
|
78
|
+
|
|
79
|
+
if path_to_copy:
|
|
80
|
+
try:
|
|
81
|
+
# Clear the clipboard
|
|
82
|
+
self.clipboard_clear()
|
|
83
|
+
# Append the path string to the clipboard
|
|
84
|
+
self.clipboard_append(path_to_copy)
|
|
85
|
+
# Notify the user (optional, but good UX)
|
|
86
|
+
messagebox.showinfo("Copied", "PDF Path copied to clipboard.")
|
|
87
|
+
except tk.TclError as e:
|
|
88
|
+
# Handle cases where clipboard access might be blocked
|
|
89
|
+
messagebox.showerror("Copy Error", f"Failed to access the system clipboard: {e}")
|
|
90
|
+
else:
|
|
91
|
+
messagebox.showwarning("Copy Failed", "The PDF Path field is empty.")
|
|
92
|
+
|
|
93
|
+
def _scroll_to_top(self):
|
|
94
|
+
"""Scrolls the output text widget to the top."""
|
|
95
|
+
self.output_text.see('1.0') # '1.0' is the index for the very first character
|
|
96
|
+
|
|
97
|
+
def _scroll_to_bottom(self):
|
|
98
|
+
"""Scrolls the output text widget to the bottom."""
|
|
99
|
+
self.output_text.see(tk.END) # tk.END is the index for the position just after the last character
|
|
56
100
|
|
|
57
101
|
def _show_license(self):
|
|
58
102
|
"""
|
|
@@ -97,69 +141,189 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
97
141
|
license_window.grab_set()
|
|
98
142
|
self.wait_window(license_window)
|
|
99
143
|
|
|
144
|
+
def _show_readme(self):
|
|
145
|
+
"""
|
|
146
|
+
Reads the embedded README.md file and displays its content in a new modal window.
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
# CORRECT WAY: Use the Traversable object's read_text() method.
|
|
150
|
+
# This handles files located inside zip archives (.pyz, pipx venvs) correctly.
|
|
151
|
+
readme_path_traversable = files("pdflinkcheck.data") / "README.md"
|
|
152
|
+
readme_content = readme_path_traversable.read_text(encoding="utf-8")
|
|
153
|
+
readme_content = sanitize_glyphs_for_tkinter(readme_content)
|
|
154
|
+
|
|
155
|
+
except FileNotFoundError:
|
|
156
|
+
messagebox.showerror(
|
|
157
|
+
"Readme Error",
|
|
158
|
+
"README.md file not found within the installation package (pdflinkcheck.data/README.md). Check build process."
|
|
159
|
+
)
|
|
160
|
+
return
|
|
161
|
+
except Exception as e:
|
|
162
|
+
messagebox.showerror("Read Error", f"Failed to read embedded README.md file: {e}")
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
# --- Display in a New Toplevel Window ---
|
|
166
|
+
readme_window = tk.Toplevel(self)
|
|
167
|
+
readme_window.title("pdflinkcheck README.md")
|
|
168
|
+
readme_window.geometry("600x400")
|
|
169
|
+
|
|
170
|
+
# Text widget for content
|
|
171
|
+
text_widget = tk.Text(readme_window, wrap=tk.WORD, font=('Monospace', 10), padx=10, pady=10)
|
|
172
|
+
text_widget.insert(tk.END, readme_content)
|
|
173
|
+
text_widget.config(state=tk.DISABLED)
|
|
174
|
+
|
|
175
|
+
# Scrollbar
|
|
176
|
+
scrollbar = ttk.Scrollbar(readme_window, command=text_widget.yview)
|
|
177
|
+
text_widget['yscrollcommand'] = scrollbar.set
|
|
178
|
+
|
|
179
|
+
# Layout
|
|
180
|
+
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
|
181
|
+
text_widget.pack(fill='both', expand=True)
|
|
182
|
+
|
|
183
|
+
# Make the window modal (optional, but good practice for notices)
|
|
184
|
+
readme_window.transient(self)
|
|
185
|
+
readme_window.grab_set()
|
|
186
|
+
self.wait_window(readme_window)
|
|
187
|
+
|
|
100
188
|
def _create_widgets(self):
|
|
101
189
|
# --- Control Frame (Top) ---
|
|
102
190
|
control_frame = ttk.Frame(self, padding="10")
|
|
103
191
|
control_frame.pack(fill='x')
|
|
104
192
|
|
|
105
193
|
# Row 0: File Selection
|
|
106
|
-
ttk.Label(control_frame, text="PDF Path:").grid(row=0, column=0, padx=5, pady=5, sticky='w')
|
|
107
|
-
ttk.Entry(control_frame, textvariable=self.pdf_path, width=60).grid(row=0, column=1, padx=5, pady=5, sticky='ew')
|
|
108
|
-
ttk.Button(control_frame, text="Browse...", command=self._select_pdf).grid(row=0, column=2, padx=5, pady=5)
|
|
109
|
-
|
|
110
|
-
# Row 1: Remnants and Max Links Label/Entry
|
|
111
|
-
ttk.Checkbutton(
|
|
112
|
-
control_frame,
|
|
113
|
-
text="Check for Remnants (URLs/Emails)",
|
|
114
|
-
variable=self.check_remnants_var
|
|
115
|
-
).grid(row=1, column=0, padx=5, pady=5, sticky='w')
|
|
116
194
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
195
|
+
# === File Selection Frame (Row 0) ===
|
|
196
|
+
file_selection_frame = ttk.Frame(control_frame)
|
|
197
|
+
file_selection_frame.grid(row=0, column=0, columnspan=3, padx=0, pady=5, sticky='ew')
|
|
198
|
+
|
|
199
|
+
# Elements are now packed/gridded within file_selection_frame
|
|
200
|
+
|
|
201
|
+
# Label
|
|
202
|
+
ttk.Label(file_selection_frame, text="PDF Path:").pack(side=tk.LEFT, padx=(0, 5))
|
|
203
|
+
|
|
204
|
+
# Entry (Path Display)
|
|
205
|
+
ttk.Entry(file_selection_frame, textvariable=self.pdf_path, width=50).pack(side=tk.LEFT, fill='x', expand=True, padx=5)
|
|
206
|
+
# The Entry field (column 1) must expand horizontally within its frame
|
|
207
|
+
# Since we are using PACK for this frame, we use fill='x', expand=True on the Entry.
|
|
208
|
+
|
|
209
|
+
# Browse Button
|
|
210
|
+
ttk.Button(file_selection_frame, text="Browse...", command=self._select_pdf).pack(side=tk.LEFT, padx=(5, 5))
|
|
123
211
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
).pack(side=tk.LEFT, padx=(0, 5)) # Pack Checkbutton to the left with small internal padding
|
|
130
|
-
self.export_report_format = ttk.Combobox(
|
|
131
|
-
export_group_frame,
|
|
132
|
-
textvariable=self.export_report_format_var,
|
|
133
|
-
values=self.supported_export_formats,
|
|
134
|
-
state='readonly', # Prevents user from typing invalid values
|
|
135
|
-
width=5
|
|
136
|
-
)
|
|
137
|
-
self.export_report_format.set(self.supported_export_formats[0]) # Set default text
|
|
138
|
-
self.export_report_format.pack(side=tk.LEFT)
|
|
139
|
-
# Pack Entry tightly next to it
|
|
212
|
+
# Copy Button
|
|
213
|
+
# NOTE: Removed leading spaces from " Copy Path"
|
|
214
|
+
ttk.Button(file_selection_frame, text="Copy Path", command=self._copy_pdf_path).pack(side=tk.LEFT, padx=(0, 0))
|
|
215
|
+
|
|
216
|
+
# === END: File Selection Frame ===
|
|
140
217
|
|
|
218
|
+
# --- Report brevity options ----
|
|
219
|
+
report_brevity_frame = ttk.LabelFrame(control_frame, text="Report Brevity Options:")
|
|
220
|
+
#report_brevity_frame.grid(row=1, column=0, columnspan=2, padx=5, pady=1, sticky='nsew')
|
|
221
|
+
report_brevity_frame.grid(row=1, column=0, padx=5, pady=5, sticky='nsew')
|
|
222
|
+
#
|
|
141
223
|
ttk.Checkbutton(
|
|
142
|
-
|
|
143
|
-
text="Show All Links
|
|
224
|
+
report_brevity_frame,
|
|
225
|
+
text="Show All Links.",
|
|
144
226
|
variable=self.show_all_links_var,
|
|
145
227
|
command=self._toggle_max_links_entry
|
|
146
|
-
).
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
228
|
+
).pack(side='left', padx=5, pady=1)
|
|
229
|
+
|
|
230
|
+
ttk.Label(report_brevity_frame, text="Max Links to Display:").pack(side='left', padx=5, pady=1)
|
|
231
|
+
self.max_links_entry = ttk.Entry(report_brevity_frame, textvariable=self.max_links_var, width=4)
|
|
232
|
+
self.max_links_entry.pack(side='left', padx=5, pady=5)
|
|
233
|
+
|
|
234
|
+
# --- PDF Library Selection ---
|
|
235
|
+
# Create a labeled group for the PDF options
|
|
236
|
+
pdf_library_frame = ttk.LabelFrame(control_frame, text="Select PDF Library:")
|
|
237
|
+
pdf_library_frame.grid(row=1, column=1, padx=5, pady=5, sticky='nsew')
|
|
238
|
+
|
|
239
|
+
# Radio options inside the frame
|
|
240
|
+
ttk.Radiobutton(
|
|
241
|
+
pdf_library_frame,
|
|
242
|
+
text="PyMuPDF",
|
|
243
|
+
variable=self.pdf_library_var,
|
|
244
|
+
value="PyMuPDF",
|
|
245
|
+
|
|
246
|
+
).pack(side='left', padx=5, pady=1)
|
|
247
|
+
|
|
248
|
+
ttk.Radiobutton(
|
|
249
|
+
pdf_library_frame,
|
|
250
|
+
text="pypdf",
|
|
251
|
+
variable=self.pdf_library_var,
|
|
252
|
+
value="pypdf",
|
|
253
|
+
).pack(side='left', padx=5, pady=1)
|
|
254
|
+
|
|
255
|
+
export_group_frame = ttk.LabelFrame(control_frame, text="Export Format:")
|
|
256
|
+
#export_group_frame = ttk.LabelFrame(control_frame, text = "Export Filetype Selection:")
|
|
257
|
+
export_group_frame.grid(row=1, column=2, padx=5, pady=5, sticky='nseew') # Placed in the original Checkbutton's column
|
|
151
258
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
259
|
+
ttk.Checkbutton(
|
|
260
|
+
export_group_frame,
|
|
261
|
+
#text="Export Report",
|
|
262
|
+
text = "JSON" ,
|
|
263
|
+
variable=self.do_export_report_json_var
|
|
264
|
+
).pack(side=tk.LEFT, padx=(0, 5)) # Pack Checkbutton to the left with small internal padding
|
|
265
|
+
ttk.Checkbutton(
|
|
266
|
+
export_group_frame,
|
|
267
|
+
text = "TXT" ,
|
|
268
|
+
#state=tk.DISABLED,
|
|
269
|
+
variable=self.do_export_report_txt_var,
|
|
270
|
+
).pack(side=tk.LEFT, padx=(0, 5)) # Pack Checkbutton to the left with small internal padding
|
|
155
271
|
|
|
272
|
+
# Row 3: Run Button, Export Filetype selection, License Button, and readme button
|
|
273
|
+
# 1. Run Button (Spans columns 0 and 1)
|
|
274
|
+
run_analysis_btn = ttk.Button(control_frame, text="▶ Run Analysis", command=self._run_report_gui, style='Accent.TButton')
|
|
275
|
+
run_analysis_btn.grid(row=3, column=0, columnspan=2, pady=10, sticky='ew', padx=(0, 5))
|
|
276
|
+
|
|
277
|
+
run_validation_btn = ttk.Button(control_frame, text="▶ Run Validation", command=self._run_validation_gui, style='Accent.TButton')
|
|
278
|
+
run_validation_btn.grid(row=4, column=0, columnspan=2, pady=10, sticky='ew', padx=(0, 5))
|
|
279
|
+
# Ensure the run button frame expands to fill its column
|
|
280
|
+
#run_analysis_btn.grid_columnconfigure(0, weight=1)
|
|
281
|
+
|
|
282
|
+
# 2. Create a Frame to hold the two file link buttons (This frame goes into column 2)
|
|
283
|
+
info_btn_frame = ttk.Frame(control_frame)
|
|
284
|
+
info_btn_frame.grid(row=3, column=2, columnspan=1, pady=10, sticky='ew', padx=(5, 0))
|
|
285
|
+
# Ensure the info button frame expands to fill its column
|
|
286
|
+
info_btn_frame.grid_columnconfigure(0, weight=1)
|
|
287
|
+
info_btn_frame.grid_columnconfigure(1, weight=1)
|
|
288
|
+
|
|
289
|
+
# 3. Place License and Readme buttons inside the new frame
|
|
290
|
+
license_btn = ttk.Button(info_btn_frame, text="License", command=self._show_license)
|
|
291
|
+
# Use PACK or a 2-column GRID inside the info_btn_frame. GRID is cleaner here.
|
|
292
|
+
license_btn.grid(row=0, column=0, sticky='ew', padx=(0, 2)) # Left side of the frame
|
|
293
|
+
|
|
294
|
+
readme_btn = ttk.Button(info_btn_frame, text="Readme", command=self._show_readme)
|
|
295
|
+
readme_btn.grid(row=0, column=1, sticky='ew', padx=(2, 0)) # Right side of the frame
|
|
296
|
+
|
|
297
|
+
# Force the columns to distribute space evenly
|
|
298
|
+
control_frame.grid_columnconfigure(0, weight=2)
|
|
156
299
|
control_frame.grid_columnconfigure(1, weight=1)
|
|
300
|
+
control_frame.grid_columnconfigure(2, weight=1)
|
|
157
301
|
|
|
158
302
|
# --- Output Frame (Bottom) ---
|
|
159
|
-
output_frame = ttk.Frame(self, padding=
|
|
303
|
+
output_frame = ttk.Frame(self, padding=(10, 2, 10, 10)) # Left, Top, Right, Bottom
|
|
160
304
|
output_frame.pack(fill='both', expand=True)
|
|
161
305
|
|
|
162
|
-
|
|
306
|
+
output_header_frame = ttk.Frame(output_frame)
|
|
307
|
+
output_header_frame.pack(fill='x', pady=(0, 5))
|
|
308
|
+
|
|
309
|
+
# Label
|
|
310
|
+
ttk.Label(output_header_frame, text="Analysis Report Output:").pack(side=tk.LEFT, fill='x', expand=True)
|
|
311
|
+
|
|
312
|
+
# Scroll to Bottom Button # put this first so that it on the right when the Top button is added on the left.
|
|
313
|
+
bottom_btn = ttk.Button(output_header_frame, text="▼ Bottom", command=self._scroll_to_bottom, width=8)
|
|
314
|
+
bottom_btn.pack(side=tk.RIGHT, padx=(0, 5))
|
|
315
|
+
|
|
316
|
+
# Scroll to Top Button
|
|
317
|
+
top_btn = ttk.Button(output_header_frame, text="▲ Top", command=self._scroll_to_top, width=6)
|
|
318
|
+
top_btn.pack(side=tk.RIGHT, padx=(5, 5))
|
|
319
|
+
|
|
320
|
+
# Open Report Button
|
|
321
|
+
self.open_report_btn = ttk.Button(output_header_frame, text="Open Report", command=self._open_report_text)
|
|
322
|
+
self.open_report_btn.pack(side=tk.RIGHT, padx=(5, 5))
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# ----------------------------------------------------
|
|
326
|
+
|
|
163
327
|
|
|
164
328
|
# Scrollable Text Widget for output
|
|
165
329
|
# Use an internal frame for text and scrollbar to ensure correct packing
|
|
@@ -186,7 +350,8 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
186
350
|
filetypes=[("PDF files", "*.pdf"), ("All files", "*.*")]
|
|
187
351
|
)
|
|
188
352
|
if file_path:
|
|
189
|
-
self.pdf_path.set(file_path)
|
|
353
|
+
self.pdf_path.set(get_friendly_path(file_path))
|
|
354
|
+
|
|
190
355
|
|
|
191
356
|
def _toggle_max_links_entry(self):
|
|
192
357
|
"""Disables/enables the max_links entry based on show_all_links_var."""
|
|
@@ -194,20 +359,41 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
194
359
|
self.max_links_entry.config(state=tk.DISABLED)
|
|
195
360
|
else:
|
|
196
361
|
self.max_links_entry.config(state=tk.NORMAL)
|
|
362
|
+
|
|
363
|
+
def _toggle_json_export(self):
|
|
364
|
+
"""Checkbox toggle for json filetype report."""
|
|
365
|
+
if self.do_export_report_json_var.get():
|
|
366
|
+
pass # placeholder # no side effects
|
|
367
|
+
|
|
368
|
+
def _toggle_txt_export(self):
|
|
369
|
+
"""Checkbox toggle for TXT filetype report."""
|
|
370
|
+
if self.do_export_report_txt_var.get():
|
|
371
|
+
pass # placeholder # no side effects
|
|
372
|
+
|
|
373
|
+
def _assess_pdf_path_str(self):
|
|
374
|
+
pdf_path_str = self.pdf_path.get().strip()
|
|
375
|
+
if not pdf_path_str:
|
|
376
|
+
pdf_path_str = get_first_pdf_in_cwd()
|
|
377
|
+
if not pdf_path_str:
|
|
378
|
+
self._display_error("Error: No PDF found in current directory.")
|
|
379
|
+
return
|
|
197
380
|
|
|
198
|
-
|
|
199
|
-
"""Enables/disables the report file export."""
|
|
200
|
-
if self.do_export_report_var.get():
|
|
201
|
-
self.export_report_format.config(state=tk.NORMAL)
|
|
202
|
-
else:
|
|
203
|
-
self.export_report_format.config(state=tk.DISABLED)
|
|
381
|
+
p = Path(pdf_path_str).expanduser().resolve()
|
|
204
382
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
if not Path(pdf_path_str).exists():
|
|
208
|
-
self._display_error("Error: PDF file not found or path is invalid.")
|
|
383
|
+
if not p.exists():
|
|
384
|
+
self._display_error(f"Error: PDF file not found at: {p}")
|
|
209
385
|
return
|
|
386
|
+
|
|
387
|
+
# Use the resolved string version for the rest of the function
|
|
388
|
+
pdf_path_str_assessed = str(p)
|
|
389
|
+
return pdf_path_str_assessed
|
|
390
|
+
|
|
391
|
+
def _run_report_gui(self):
|
|
210
392
|
|
|
393
|
+
pdf_path_str = self._assess_pdf_path_str()
|
|
394
|
+
if not pdf_path_str:
|
|
395
|
+
return
|
|
396
|
+
|
|
211
397
|
if self.show_all_links_var.get():
|
|
212
398
|
max_links_to_pass = 0
|
|
213
399
|
else:
|
|
@@ -220,10 +406,16 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
220
406
|
self._display_error("Error: Max Links must be an integer.")
|
|
221
407
|
return
|
|
222
408
|
|
|
223
|
-
export_format = None
|
|
224
|
-
|
|
225
|
-
|
|
409
|
+
export_format = None # default value, if selection is not made (if selection is not active)
|
|
410
|
+
export_format = ""
|
|
411
|
+
if self.do_export_report_json_var.get():
|
|
412
|
+
export_format = export_format + "JSON"
|
|
413
|
+
if self.do_export_report_txt_var.get():
|
|
414
|
+
export_format = export_format + "TXT"
|
|
226
415
|
|
|
416
|
+
pdf_library = self._discern_pdf_library()
|
|
417
|
+
|
|
418
|
+
|
|
227
419
|
# 1. Clear previous output and enable editing
|
|
228
420
|
self.output_text.config(state=tk.NORMAL)
|
|
229
421
|
self.output_text.delete('1.0', tk.END)
|
|
@@ -234,24 +426,81 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
234
426
|
|
|
235
427
|
try:
|
|
236
428
|
# 3. Call the core logic function
|
|
237
|
-
self.output_text.insert(tk.END, "--- Starting Analysis ---\n")
|
|
238
|
-
|
|
429
|
+
#self.output_text.insert(tk.END, "--- Starting Analysis ---\n")
|
|
430
|
+
report_results = run_report(
|
|
239
431
|
pdf_path=pdf_path_str,
|
|
240
|
-
check_remnants=self.check_remnants_var.get(),
|
|
241
432
|
max_links=max_links_to_pass,
|
|
242
|
-
export_format=export_format
|
|
433
|
+
export_format=export_format,
|
|
434
|
+
pdf_library = pdf_library,
|
|
243
435
|
)
|
|
244
|
-
self.
|
|
436
|
+
self.current_report_text = report_results.get("text", "")
|
|
437
|
+
self.current_report_data = report_results.get("data", {})
|
|
438
|
+
|
|
439
|
+
#self.output_text.insert(tk.END, "\n--- Analysis Complete ---\n")
|
|
245
440
|
|
|
246
441
|
except Exception as e:
|
|
247
|
-
|
|
442
|
+
# Inform the user in the GUI with a clean message
|
|
248
443
|
self._display_error(f"An unexpected error occurred during analysis: {e}")
|
|
249
444
|
|
|
250
445
|
finally:
|
|
251
446
|
# 4. Restore standard output and disable editing
|
|
252
447
|
sys.stdout = original_stdout
|
|
253
448
|
self.output_text.config(state=tk.DISABLED)
|
|
449
|
+
|
|
450
|
+
def _run_validation_gui(self):
|
|
451
|
+
|
|
452
|
+
pdf_path_str = self._assess_pdf_path_str()
|
|
453
|
+
if not pdf_path_str:
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
pdf_library = self._discern_pdf_library()
|
|
457
|
+
|
|
458
|
+
# 1. Clear previous output and enable editing
|
|
459
|
+
self.output_text.config(state=tk.NORMAL)
|
|
460
|
+
self.output_text.delete('1.0', tk.END)
|
|
254
461
|
|
|
462
|
+
# 2. Redirect standard output to the Text widget
|
|
463
|
+
original_stdout = sys.stdout
|
|
464
|
+
sys.stdout = RedirectText(self.output_text)
|
|
465
|
+
|
|
466
|
+
if not self.current_report_data:
|
|
467
|
+
self._run_report_gui()
|
|
468
|
+
report_results = self.current_report_data
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
# 3. Call the core logic function
|
|
472
|
+
#self.output_text.insert(tk.END, "--- Starting Analysis ---\n")
|
|
473
|
+
validation_results = run_validation(
|
|
474
|
+
report_results=report_results,
|
|
475
|
+
pdf_path=pdf_path_str,
|
|
476
|
+
pdf_library=pdf_library,
|
|
477
|
+
export_json=True,
|
|
478
|
+
print_bool=True
|
|
479
|
+
)
|
|
480
|
+
self.current_report_text = report_results.get("text", "")
|
|
481
|
+
self.current_report_data = report_results.get("data", {})
|
|
482
|
+
|
|
483
|
+
#self.output_text.insert(tk.END, "\n--- Analysis Complete ---\n")
|
|
484
|
+
|
|
485
|
+
except Exception as e:
|
|
486
|
+
# Inform the user in the GUI with a clean message
|
|
487
|
+
self._display_error(f"An unexpected error occurred during analysis: {e}")
|
|
488
|
+
|
|
489
|
+
finally:
|
|
490
|
+
# 4. Restore standard output and disable editing
|
|
491
|
+
sys.stdout = original_stdout
|
|
492
|
+
self.output_text.config(state=tk.DISABLED)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _discern_pdf_library(self):
|
|
496
|
+
selected_lib = self.pdf_library_var.get().lower()
|
|
497
|
+
|
|
498
|
+
if selected_lib == "pymupdf":
|
|
499
|
+
print("Using high-speed PyMuPDF engine.")
|
|
500
|
+
elif selected_lib == "pypdf":
|
|
501
|
+
print("Using pure-python pypdf engine.")
|
|
502
|
+
return selected_lib
|
|
503
|
+
|
|
255
504
|
def _display_error(self, message):
|
|
256
505
|
# Ensure output is in normal state to write
|
|
257
506
|
original_state = self.output_text.cget('state')
|
|
@@ -261,10 +510,58 @@ class PDFLinkCheckerApp(tk.Tk):
|
|
|
261
510
|
#self.output_text.delete('1.0', tk.END)
|
|
262
511
|
self.output_text.insert(tk.END, f"[ERROR] {message}\n", 'error')
|
|
263
512
|
self.output_text.tag_config('error', foreground='red')
|
|
513
|
+
self.output_text.see(tk.END)
|
|
264
514
|
|
|
265
515
|
# Restore state
|
|
266
516
|
self.output_text.config(state=tk.DISABLED)
|
|
267
517
|
|
|
518
|
+
def _open_report_text(self):
|
|
519
|
+
"""Opens the LATEST analysis text in an editor, regardless of export settings."""
|
|
520
|
+
# 1. Check our internal buffer, not the window or the disk
|
|
521
|
+
if not self.current_report_text:
|
|
522
|
+
messagebox.showwarning("Open Failed", "No analysis data available. Please run an analysis first.")
|
|
523
|
+
return
|
|
524
|
+
|
|
525
|
+
try:
|
|
526
|
+
# 2. Always create a 'viewing' file in a temp directory or .tmp folder
|
|
527
|
+
# This prevents clobbering an actual user-saved report.
|
|
528
|
+
pdf_name = Path(self.pdf_path.get()).stem if self.pdf_path.get() else "report"
|
|
529
|
+
view_path = PDFLINKCHECK_HOME / f"LAST_REPORT_{pdf_name}.txt"
|
|
530
|
+
|
|
531
|
+
# 3. Write our buffer to this 'View' file
|
|
532
|
+
view_path.write_text(self.current_report_text, encoding="utf-8")
|
|
533
|
+
|
|
534
|
+
# 4. Open with pyhabitat
|
|
535
|
+
pyhabitat.edit_textfile(view_path)
|
|
536
|
+
|
|
537
|
+
except Exception as e:
|
|
538
|
+
messagebox.showerror("View Error", f"Could not launch editor: {e}")
|
|
539
|
+
|
|
540
|
+
"""
|
|
541
|
+
def toggle_theme():
|
|
542
|
+
try:
|
|
543
|
+
current = sv_ttk.get_theme()
|
|
544
|
+
sv_ttk.set_theme("dark" if current == "light" else "light")
|
|
545
|
+
except Exception:
|
|
546
|
+
pass
|
|
547
|
+
"""
|
|
548
|
+
def sanitize_glyphs_for_tkinter(text: str) -> str:
|
|
549
|
+
"""
|
|
550
|
+
Converts complex Unicode characters (like emojis and symbols)
|
|
551
|
+
into their closest ASCII representation, ignoring those that
|
|
552
|
+
cannot be mapped. This prevents the 'empty square' issue in Tkinter.
|
|
553
|
+
"""
|
|
554
|
+
# 1. Normalize the text (NFKD converts composite characters to their base parts)
|
|
555
|
+
normalized = unicodedata.normalize('NFKD', text)
|
|
556
|
+
|
|
557
|
+
# 2. Encode to ASCII and decode back.
|
|
558
|
+
# The 'ignore' flag is crucial: it removes any characters
|
|
559
|
+
# that don't have an ASCII representation.
|
|
560
|
+
sanitized = normalized.encode('ascii', 'ignore').decode('utf-8')
|
|
561
|
+
|
|
562
|
+
# 3. Clean up any resulting double spaces or artifacts
|
|
563
|
+
sanitized = sanitized.replace(' ', ' ')
|
|
564
|
+
return sanitized
|
|
268
565
|
|
|
269
566
|
def auto_close_window(root, delay_ms:int = 0):
|
|
270
567
|
"""
|
|
@@ -290,4 +587,4 @@ def start_gui(time_auto_close:int=0):
|
|
|
290
587
|
print("pdflinkcheck: gui closed.")
|
|
291
588
|
|
|
292
589
|
if __name__ == "__main__":
|
|
293
|
-
start_gui()
|
|
590
|
+
start_gui()
|