pdflinkcheck 1.1.94__py3-none-any.whl → 1.2.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. pdflinkcheck/__init__.py +88 -18
  2. pdflinkcheck/__main__.py +6 -0
  3. pdflinkcheck/analysis_pdfium.py +131 -0
  4. pdflinkcheck/{analyze_pymupdf.py → analysis_pymupdf.py} +99 -141
  5. pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} +51 -39
  6. pdflinkcheck/cli.py +52 -48
  7. pdflinkcheck/data/LICENSE +18 -15
  8. pdflinkcheck/data/README.md +23 -25
  9. pdflinkcheck/data/pyproject.toml +17 -26
  10. pdflinkcheck/datacopy.py +16 -1
  11. pdflinkcheck/dev.py +2 -2
  12. pdflinkcheck/environment.py +14 -2
  13. pdflinkcheck/gui.py +346 -563
  14. pdflinkcheck/helpers.py +88 -0
  15. pdflinkcheck/io.py +24 -6
  16. pdflinkcheck/report.py +598 -97
  17. pdflinkcheck/security.py +189 -0
  18. pdflinkcheck/splash.py +38 -0
  19. pdflinkcheck/stdlib_server.py +7 -21
  20. pdflinkcheck/stdlib_server_alt.py +571 -0
  21. pdflinkcheck/tk_utils.py +188 -0
  22. pdflinkcheck/update_msix_version.py +2 -0
  23. pdflinkcheck/validate.py +104 -170
  24. pdflinkcheck/version_info.py +2 -2
  25. {pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/METADATA +41 -40
  26. {pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/RECORD +34 -27
  27. pdflinkcheck-1.2.29.dist-info/WHEEL +5 -0
  28. {pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/entry_points.txt +0 -1
  29. pdflinkcheck-1.2.29.dist-info/licenses/LICENSE +27 -0
  30. pdflinkcheck-1.2.29.dist-info/top_level.txt +1 -0
  31. pdflinkcheck/analyze_pypdf_v2.py +0 -217
  32. pdflinkcheck-1.1.94.dist-info/WHEEL +0 -4
  33. pdflinkcheck-1.1.94.dist-info/licenses/LICENSE +0 -24
  34. {pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-AGPL3 +0 -0
  35. {pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-MIT +0 -0
@@ -0,0 +1,188 @@
1
+ # src/pdflinkcheck/tk_utils.py
2
+ import tkinter as tk
3
+ import subprocess
4
+ import re
5
+ import platform
6
+
7
+
8
+
9
+ def get_primary_monitor_geometry():
10
+ """
11
+ Queries xrandr to find the actual primary monitor's dimensions and offsets.
12
+ Returns (width, height, x_offset, y_offset) or None.
13
+
14
+ Not used.
15
+ """
16
+ try:
17
+ # Query xrandr for the primary monitor
18
+ result = subprocess.run(['xrandr', '--query'], capture_output=True, text=True, check=True)
19
+ # Look for a line like: "DP-0 connected primary 1920x1080+1200+0"
20
+ match = re.search(r'(\d+)x(\d+)\+(\d+)\+(\d+)', re.search(r'^.*primary.*$', result.stdout, re.M).group())
21
+ if match:
22
+ return map(int, match.groups())
23
+ except Exception:
24
+ return None
25
+
26
+ def center_window_on_primary_stable(window: tk.Toplevel | tk.Tk, width: int, height: int):
27
+ """
28
+ Docstring for center_window_on_primary_stable
29
+
30
+ :param window: Description
31
+ :type window: tk.Toplevel | tk.Tk
32
+ :param width: Description
33
+ :type width: int
34
+ :param height: Description
35
+ :type height: int
36
+
37
+ Not used.
38
+ """
39
+ window.update_idletasks()
40
+
41
+ # 1. Try to assess via X11/XRandR (Best for WSL2)
42
+ geom = get_primary_monitor_geometry()
43
+
44
+ if geom:
45
+ pw, ph, px, py = geom
46
+ print(f"[DEBUG] XRandR Primary: {pw}x{ph} at +{px}+{py}")
47
+ else:
48
+ # 2. Fallback: Center on Mouse Pointer (Best for Multi-monitor without XRandR)
49
+ # Since we can't find 'Primary', we put it where the user's attention is.
50
+ pw, ph = window.winfo_screenwidth(), window.winfo_screenheight()
51
+ px, py = 0, 0
52
+
53
+ # If it's a giant span, let's just use the pointer as the anchor
54
+ if pw > 2500:
55
+ pointer_x = window.winfo_pointerx()
56
+ pointer_y = window.winfo_pointery()
57
+ # We treat a 1920x1080 box around the pointer as our 'virtual primary'
58
+ px, py = pointer_x - 960, pointer_y - 540
59
+ pw, ph = 1920, 1080
60
+
61
+ # 3. Final Math
62
+ x = px + (pw // 2) - (width // 2)
63
+ y = py + (ph // 2) - (height // 2)
64
+
65
+ # Final clamp to ensure it's not off-screen
66
+ x = max(0, x)
67
+ y = max(0, y)
68
+
69
+ print(f"[DEBUG] Final Positioning: x={x}, y={y}")
70
+ window.geometry(f"{width}x{height}+{int(x)}+{int(y)}")
71
+
72
+
73
+ def get_monitor_geometries():
74
+ """
75
+ Queries xrandr to find all connected monitor dimensions and offsets.
76
+ Returns a list of dicts: [{'w', 'h', 'x', 'y', 'is_primary'}]
77
+ Essential for WSL2/WSLg multi-monitor accuracy.
78
+
79
+ Active.
80
+ """
81
+ monitors = []
82
+ os_name = platform.system()
83
+
84
+ # --- LINUX / WSL2 Logic ---
85
+ if os_name == "Linux":
86
+ try:
87
+ # Run xrandr
88
+ xrandr_result = subprocess.run(['xrandr', '--query'], capture_output=True, text=True, check=True)
89
+ #print(f"xrandr_result = {xrandr_result}")
90
+ # Regex to find: "1920x1080+1920+0" or "1920x1080+0+0"
91
+ # We look for lines that contain 'connected' and a geometry string
92
+ lines = xrandr_result.stdout.splitlines()
93
+ for line in lines:
94
+ if " connected " in line:
95
+ is_primary = "primary" in line
96
+ match = re.search(r'(\d+)x(\d+)\+(\d+)\+(\d+)', line)
97
+ if match:
98
+ w, h, x, y = map(int, match.groups())
99
+ monitors.append({
100
+ 'w': w, 'h': h, 'x': x, 'y': y,
101
+ 'is_primary': is_primary
102
+ })
103
+ except Exception as e:
104
+ print(f"[DEBUG] xrandr query failed: {e}")
105
+
106
+ # --- WINDOWS Native Logic ---
107
+ if os_name == "Windows":
108
+ # On native Windows, we can use ctypes to call GetSystemMetrics
109
+ # or rely on the fact that the Primary monitor is almost always at 0,0
110
+ # and its size is reported by winfo_screenwidth if we don't have multiple monitors
111
+ # (For true multi-monitor on native Windows, win32api is usually needed)
112
+ pass
113
+
114
+ return monitors
115
+
116
+ def center_window_on_primary_goose(window: tk.Toplevel | tk.Tk, width: int, height: int):
117
+ """
118
+ Standardizes window centering by identifying the physical monitor
119
+ bounds and offsets.
120
+
121
+ :param window: Description
122
+ :type window: tk.Toplevel | tk.Tk
123
+ :param width: Description
124
+ :type width: int
125
+ :param height: Description
126
+ :type height: int
127
+
128
+ Active.
129
+ """
130
+ window.update_idletasks()
131
+
132
+ monitors = get_monitor_geometries()
133
+ target_monitor = None
134
+
135
+ if monitors:
136
+ # 1. Prefer the one explicitly marked 'primary'
137
+ target_monitor = next((m for m in monitors if m['is_primary']), None)
138
+
139
+ # 2. Fallback to the first monitor (usually the one at +0+0)
140
+ if not target_monitor:
141
+ target_monitor = monitors[0]
142
+
143
+ print(f"[DEBUG] Assessed Monitor: {target_monitor['w']}x{target_monitor['h']} at +{target_monitor['x']}+{target_monitor['y']}")
144
+ else:
145
+ print("[DEBUG] No monitors found via xrandr. Falling back to screenwidth.")
146
+ # Total fallback: use winfo_screenwidth but assume 1080p width
147
+ # to avoid the L-gap if it's clearly a massive span.
148
+ sw = window.winfo_screenwidth()
149
+ sh = window.winfo_screenheight()
150
+ target_monitor = {
151
+ 'w': 1920 if sw > 2500 else sw,
152
+ 'h': 1080 if sh > 2000 else sh,
153
+ 'x': 0, 'y': 0
154
+ }
155
+
156
+ # 3. Calculate Center relative to the identified monitor's geometry
157
+ x = target_monitor['x'] + (target_monitor['w'] // 2) - (width // 2)
158
+ y = target_monitor['y'] + (target_monitor['h'] // 2) - (height // 2)
159
+
160
+ print(f"[DEBUG] Final Positioning: x={x}, y={y}")
161
+ window.geometry(f"{width}x{height}+{int(x)}+{int(y)}")
162
+
163
+ def center_window_on_primary(window: tk.Toplevel | tk.Tk, width: int, height: int):
164
+ window.update_idletasks()
165
+ monitors = get_monitor_geometries()
166
+
167
+ target = None
168
+ if monitors:
169
+ target = next((m for m in monitors if m['is_primary']), monitors[0])
170
+
171
+ if target:
172
+ # Use the precisely assessed hardware monitor
173
+ x = target['x'] + (target['w'] // 2) - (width // 2)
174
+ y = target['y'] + (target['h'] // 2) - (height // 2)
175
+ else:
176
+ # Fallback for Windows/Mac where xrandr doesn't exist
177
+ # We use wm_maxsize which is surprisingly accurate for the 'Primary' on Windows/Mac
178
+ pw, ph = window.wm_maxsize()
179
+
180
+ # If maxsize is also reporting the huge span (rare on native),
181
+ # then we use your 1920/1080 safe-zone heuristic.
182
+ if pw > 2500:
183
+ pw, ph = 1920, 1080
184
+
185
+ x = (pw // 2) - (width // 2)
186
+ y = (ph // 2) - (height // 2)
187
+
188
+ window.geometry(f"{width}x{height}+{int(x)}+{int(y)}")
@@ -1,3 +1,5 @@
1
+ # src/pdflinkcheck/update_msix_version.py
2
+ from __future__ import annotations
1
3
  from pathlib import Path
2
4
  from pdflinkcheck.version_info import get_version_from_pyproject
3
5
 
pdflinkcheck/validate.py CHANGED
@@ -1,29 +1,34 @@
1
1
  #!/usr/bin/env python3
2
2
  # SPDX-License-Identifier: MIT
3
3
  # src/pdflinkcheck/validate.py
4
-
4
+ from __future__ import annotations
5
5
  import sys
6
6
  from pathlib import Path
7
7
  from typing import Dict, Any
8
8
 
9
9
  from pdflinkcheck.io import get_friendly_path
10
- from pdflinkcheck.environment import pymupdf_is_available
10
+ from pdflinkcheck.helpers import PageRef # Importing the established helper
11
11
 
12
12
  SEP_COUNT=28
13
13
 
14
+ START_INDEX = 0
15
+ # Internal 0-based start
16
+ # Define the offset.
17
+ # The PDF engines are 0-based.
18
+ # We will add +1 only for the HUMAN REASON strings.
19
+
20
+
14
21
  def run_validation(
15
22
  report_results: Dict[str, Any],
16
23
  pdf_path: str,
17
- pdf_library: str = "pypdf",
18
24
  check_external: bool = False
19
25
  ) -> Dict[str, Any]:
20
26
  """
21
- Validates links during run_report() using a partial completion of the data dict.
27
+ Validates links during run_report_*() using a partial completion of the data dict.
22
28
 
23
29
  Args:
24
30
  report_results: The dict returned by run_report_and_call_exports()
25
31
  pdf_path: Path to the original PDF (needed for relative file checks and page count)
26
- pdf_library: Engine used ("pypdf" or "pymupdf")
27
32
  check_external: Whether to validate HTTP URLs (requires network + requests)
28
33
 
29
34
  Returns:
@@ -34,30 +39,17 @@ def run_validation(
34
39
 
35
40
  all_links = data.get("external_links", []) + data.get("internal_links", [])
36
41
  toc = data.get("toc", [])
42
+ total_pages = metadata.get("file_overview", {}).get("total_pages",None)
37
43
 
38
44
  if not all_links and not toc:
39
45
  print("No links or TOC to validate.")
40
46
  return {"summary-stats": {"valid": 0, "broken": 0}, "issues": []}
41
47
 
42
- # Get total page count (critical for internal validation)
43
- try:
44
- if pymupdf_is_available() and pdf_library == "pymupdf":
45
- import fitz
46
- doc = fitz.open(pdf_path)
47
- total_pages = doc.page_count
48
- doc.close()
49
- else:
50
- from pypdf import PdfReader
51
- reader = PdfReader(pdf_path)
52
- total_pages = len(reader.pages)
53
- except Exception as e:
54
- print(f"Could not determine page count: {e}")
55
- total_pages = None
56
48
 
57
49
  pdf_dir = Path(pdf_path).parent
58
50
 
59
51
  issues = []
60
- valid_count = 0
52
+ valid_count = 0 # add more granulaity for types of valid links
61
53
  file_found_count = 0
62
54
  broken_file_count = 0
63
55
  broken_page_count = 0
@@ -68,37 +60,56 @@ def run_validation(
68
60
 
69
61
  # Validate active links
70
62
  #print("DEBUG validate: entering loop with", len(all_links), "links")
71
- for i, link in enumerate(all_links):
63
+ for link in all_links:
72
64
  link_type = link.get("type")
73
65
  status = "valid"
74
66
  reason = None
67
+
75
68
  if link_type in ("Internal (GoTo/Dest)", "Internal (Resolved Action)"):
76
69
  dest_page_raw = link.get("destination_page")
77
- if dest_page_raw is None:
78
- status = "no-destinstion-page"
79
- reason = "No destination page resolved"
80
- else:
70
+
71
+ if dest_page_raw is not None:
72
+
81
73
  try:
82
- target_page = int(dest_page_raw)
83
- #target_page = int(link.get("destination_page"))
84
- if not isinstance(target_page, int):
74
+ # Use PageRef to handle translation
75
+ target_page_ref = PageRef.from_index(int(dest_page_raw))
76
+ #target_page = int(dest_page_raw)
77
+
78
+ # 1. Immediate Failure: Below 0
79
+ if target_page_ref.machine < START_INDEX:
85
80
  status = "broken-page"
86
- reason = f"Target page not a number: {target_page}"
87
- elif (1 <= target_page) and total_pages is None:
81
+ # We use target_page + 1 to show the user what they "saw"
82
+ reason = f"Target page {target_page_ref.human} is invalid (negative index)."
83
+
84
+ # 2. Case: We don't know the max page count
85
+ elif total_pages is None:
86
+ # If it's 0 or higher, we assume it might be okay but can't be sure
88
87
  status = "unknown-reasonableness"
89
- reason = "Total page count unavailable, but the page number is reasonable"
90
- elif (1 <= target_page <= total_pages):
91
- status = "valid"
92
- reason = f"Page {target_page} within range (1–{total_pages})"
93
- elif target_page < 1:
94
- status = "broken-page"
95
- reason = f"TOC targets page negative {target_page}."
96
- elif not (1 <= target_page <= total_pages):
88
+ reason = f"Page {target_page_ref.human} seems reasonable, but total page count is unavailable."
89
+
90
+ # 3. Case: Out of Upper Bounds
91
+ elif target_page_ref.machine >= total_pages:
97
92
  status = "broken-page"
98
- reason = f"Page {target_page} out of range (1–{total_pages})"
93
+ # User sees 1-based, e.g., "Page 101 out of range (1-100)"
94
+ reason = f"Page {target_page_ref.human} out of range (1–{total_pages})"
95
+
96
+ # 4. Case: Perfect Match
97
+ else:
98
+ status = "valid"
99
+ reason = f"Page {target_page_ref.human} within range (1–{total_pages})"
100
+
101
+ except (ValueError, TypeError):
102
+ status = "broken-page"
103
+ reason = f"Invalid page value: {dest_page_raw}"
104
+
99
105
  except (ValueError, TypeError):
100
106
  status = "broken-page"
101
107
  reason = f"Invalid page value: {dest_page_raw}"
108
+
109
+ elif dest_page_raw is None:
110
+ status = "no-destinstion-page"
111
+ reason = "No destination page resolved"
112
+
102
113
  elif link_type == "Remote (GoToR)":
103
114
  remote_file = link.get("remote_file")
104
115
  if not remote_file:
@@ -149,40 +160,62 @@ def run_validation(
149
160
  elif status == "no-destinstion-page":
150
161
  no_destination_page_count += 1
151
162
  issues.append(link_with_val)
163
+
152
164
  # Validate TOC entries
153
165
  for entry in toc:
154
- target_page = int(entry.get("target_page"))
155
- if isinstance(target_page, int):
156
- if (1 <= target_page) and total_pages is None:
157
- reason = "Page count unknown"
166
+ try:
167
+ # Coerce to int; we expect 0-based index from the engine
168
+ # In the context of the ing Map, -1 acts as a "Sentinel Value." It represents a state that is strictly outside the "Machine" range
169
+ target_page_raw = int(entry.get("target_page", -1))
170
+ target_page_ref = PageRef.from_index(int(target_page_raw))
171
+
172
+ status = "valid"
173
+ reason = ""
174
+
175
+ # 1. Check for negative indices (anything below our START_INDEX)
176
+ if target_page_ref.machine < START_INDEX:
177
+ status = "broken-page"
178
+ broken_page_count += 1
179
+ # User sees Page 0 or lower as the problem
180
+ reason = f"TOC targets invalid page number: {target_page_ref.human}"
181
+
182
+ # 2. Case: total_pages is unknown
183
+ elif total_pages is None:
158
184
  status = "unknown-reasonableness"
159
185
  unknown_reasonableness_count += 1
160
- elif target_page < 1:
186
+ reason = f"Page {target_page_ref.human} unknown (could not verify total pages)"
187
+
188
+ # 3. Case: Out of range (Upper Bound)
189
+ # Index 100 in a 100-page doc (total_pages=100) is out of bounds
190
+ elif target_page_ref.machine >= total_pages:
161
191
  status = "broken-page"
162
- broken_count += 1
163
- reason = f"TOC targets negative page: {target_page}."
164
- elif 1 <= target_page <= total_pages:
192
+ broken_page_count += 1
193
+ reason = f"TOC targets page {target_page_ref.human} (out of 1–{total_pages})"
194
+
195
+ # 4. Valid Case
196
+ else:
197
+ status = "valid"
165
198
  valid_count += 1
199
+ # We skip issues.append for valid TOC entries to keep the issues list clean
166
200
  continue
167
- else:
168
- status = "broken-page"
169
- reason = f"TOC targets page {target_page} (out of 1–{total_pages})"
170
- broken_count += 1
171
- else:
201
+
202
+ except (ValueError, TypeError):
172
203
  status = "broken-page"
173
- reason = f"Invalid page: {target_page}"
174
- broken_count += 1
204
+ broken_page_count += 1
205
+ reason = f"Invalid page reference: {entry.get('target_page')}"
175
206
 
207
+ # Only reaches here if status is not "valid" (because of 'continue' above)
176
208
  issues.append({
177
209
  "type": "TOC Entry",
178
- "title": entry["title"],
179
- "level": entry["level"],
180
- "target_page": target_page,
210
+ "title": entry.get("title", "Untitled"),
211
+ "level": entry.get("level", 0),
212
+ "target_page": target_page_ref.machine, # Stored as 0-indexed for data consistency
181
213
  "validation": {"status": status, "reason": reason}
182
214
  })
183
-
215
+
216
+ total_checked = metadata.get("link_counts",{}).get("total_links_count",0) + metadata.get("link_counts",{}).get("toc_entry_count",0)
184
217
  summary_stats = {
185
- "total_checked": len(all_links) + len(toc),
218
+ "total_checked": total_checked,
186
219
  "valid": valid_count,
187
220
  "file-found": file_found_count,
188
221
  "broken-page": broken_page_count,
@@ -190,8 +223,7 @@ def run_validation(
190
223
  "no_destination_page_count": no_destination_page_count,
191
224
  "unknown-web": unknown_web_count,
192
225
  "unknown-reasonableness": unknown_reasonableness_count,
193
- "unknown-link": unknown_link_count,
194
- #"unknown": len(all_links) + len(toc) - valid_count - broken_count # nah this is not granuar enough
226
+ "unknown-link": unknown_link_count
195
227
  }
196
228
 
197
229
 
@@ -211,7 +243,9 @@ def run_validation(
211
243
  log(f"PDF Path = {get_friendly_path(pdf_path)}")
212
244
  log(f"Total items checked: {summary_stats['total_checked']}")
213
245
  log(f"✅ Valid: {summary_stats['valid']}")
214
- log(f"🌐 Web Addresses (Not Checked): {summary_stats['unknown-web']}")
246
+ #log(f" Valid: {summary_stats['valid']}")
247
+ #log(f"✅ Valid: {summary_stats['valid']}")
248
+ log(f"🌐 Web Addresses (Ping Each: OFF): {summary_stats['unknown-web']}")
215
249
  log(f"⚠️ Unknown Page Reasonableness (Due to Missing Total Page Count): {summary_stats['unknown-reasonableness']}")
216
250
  log(f"⚠️ Unsupported PDF Links: {summary_stats['unknown-link']}")
217
251
  log(f"❌ Broken Page Reference (Page number beyond scope of availability): {summary_stats['broken-page']}")
@@ -230,6 +264,14 @@ def run_validation(
230
264
  log("{:<5} | {:<12} | {:<30} | {}".format(i, link_type, text, reason))
231
265
  if len(issues) > 25:
232
266
  log(f"... and {len(issues) - 25} more issues")
267
+
268
+ elif summary_stats.get('total_checked', 0) == 0:
269
+ # Check if this was a total crash or just an empty PDF
270
+ if summary_stats.get('is_error_fallback'):
271
+ log("\nStatus: Validation could not be performed due to a processing error.")
272
+ else:
273
+ log("\nStatus: No links or TOC entries were found to validate.")
274
+
233
275
  else:
234
276
  log("Success: No broken links or TOC issues!")
235
277
 
@@ -249,111 +291,3 @@ def run_validation(
249
291
  }
250
292
 
251
293
  return validation_results
252
-
253
-
254
- def run_validation_more_readable_slop(pdf_path: str = None, pdf_library: str = "pypdf", check_external_links:bool = False) -> Dict[str, Any]:
255
- """
256
- Experimental. Ignore for now.
257
-
258
- Extends the report logic by programmatically testing every extracted link.
259
- Validates Internal Jumps (page bounds), External URIs (HTTP status),
260
- and Launch actions (file existence).
261
- """
262
- if check_external_links:
263
- import requests
264
-
265
- # 1. Setup Library Engine (Reuse logic)
266
- pdf_library = pdf_library.lower()
267
- if pdf_library == "pypdf":
268
- from pdflinkcheck.analyze_pypdf import extract_links_pypdf as extract_links
269
- else:
270
- from pdflinkcheck.analyze_pymupdf import extract_links_pymupdf as extract_links
271
-
272
- if pdf_path is None:
273
- pdf_path = get_first_pdf_in_cwd()
274
-
275
- if not pdf_path:
276
- print("Error: No PDF found for validation.")
277
- return {}
278
-
279
- print(f"\nValidating links in {Path(pdf_path).name}...")
280
-
281
- # 2. Extract links and initialize validation counters
282
- links = extract_links(pdf_path)
283
- total_links = len(links)
284
- results = {"valid": [], "broken": [], "error": []}
285
-
286
- # 3. Validation Loop
287
- for i, link in enumerate(links, 1):
288
- # Progress indicator for long manuals
289
- sys.stdout.write(f"\rChecking link {i}/{total_links}...")
290
- sys.stdout.flush()
291
-
292
- link_type = link.get('type')
293
- status = {"is_valid": False, "reason": "Unknown Type"}
294
-
295
- # --- A. Validate Internal Jumps ---
296
- if "Internal" in link_type:
297
- target_page = link.get('destination_page')
298
- if isinstance(target_page, int) and target_page > 0:
299
- # In a real run, you'd compare against reader.pages_count
300
- status = {"is_valid": True, "reason": "Resolves"}
301
- else:
302
- status = {"is_valid": False, "reason": f"Invalid Page: {target_page}"}
303
-
304
- # --- B. Validate Web URIs ---
305
- elif link_type == 'External (URI)':
306
-
307
- url = link.get('url')
308
- if url and url.startswith("http") and check_external_links:
309
- try:
310
- # Use a short timeout and HEAD request to be polite/fast
311
- resp = requests.head(url, timeout=5, allow_redirects=True)
312
- if resp.status_code < 400:
313
- status = {"is_valid": True, "reason": f"HTTP {resp.status_code}"}
314
- else:
315
- status = {"is_valid": False, "reason": f"HTTP {resp.status_code}"}
316
- except Exception as e:
317
- status = {"is_valid": False, "reason": "Connection Failed"}
318
- else:
319
- status = {"is_valid": False, "reason": "Malformed URL"}
320
-
321
- # --- C. Validate Local File/Launch Links ---
322
- elif link_type == 'Launch' or 'remote_file' in link:
323
- file_path = link.get('remote_file') or link.get('url')
324
- if file_path:
325
- # Clean URI formatting
326
- clean_path = file_path.replace("file://", "").replace("%20", " ")
327
- # Check relative to the PDF's location
328
- abs_path = Path(pdf_path).parent / clean_path
329
- if abs_path.exists():
330
- status = {"is_valid": True, "reason": "File Exists"}
331
- else:
332
- status = {"is_valid": False, "reason": "File Missing"}
333
-
334
- # Append result
335
- link['validation'] = status
336
- if status['is_valid']:
337
- results['valid'].append(link)
338
- else:
339
- results['broken'].append(link)
340
-
341
- print("\n" + "=" * SEP_COUNT)
342
- print(f"--- Validation Summary Stats for {Path(pdf_path).name} ---")
343
- print(f"Total Checked: {total_links}")
344
- print(f"✅ Valid: {len(results['valid'])}")
345
- print(f"❌ Broken: {len(results['broken'])}")
346
- print("=" * SEP_COUNT)
347
-
348
- # 4. Print Detail Report for Broken Links
349
- if results['broken']:
350
- print("\n## ❌ Broken Links Found:")
351
- print("{:<5} | {:<5} | {:<30} | {}".format("Idx", "Page", "Reason", "Target"))
352
- print("-" * SEP_COUNT)
353
- for i, link in enumerate(results['broken'], 1):
354
- target = link.get('url') or link.get('destination_page') or link.get('remote_file')
355
- print("{:<5} | {:<5} | {:<30} | {}".format(
356
- i, link['page'], link['validation']['reason'], str(target)[:30]
357
- ))
358
-
359
- return results
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # SPDX-License-Identifier: MIT
3
3
  # src/pdflinkcheck/version_info.py
4
-
4
+ from __future__ import annotations
5
5
  import re
6
6
  from pathlib import Path
7
7
  import sys
@@ -83,4 +83,4 @@ def get_version_from_pyproject() -> str:
83
83
  match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', poetry_section.group(1))
84
84
  if match: return match.group(1)
85
85
 
86
- return "0.0.0"
86
+ return "0.0.0"