pdflinkcheck 1.1.73__py3-none-any.whl → 1.2.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdflinkcheck/__init__.py +88 -21
- pdflinkcheck/__main__.py +6 -0
- pdflinkcheck/analysis_pdfium.py +131 -0
- pdflinkcheck/{analyze_pymupdf.py → analysis_pymupdf.py} +109 -145
- pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} +67 -37
- pdflinkcheck/cli.py +111 -116
- pdflinkcheck/data/I Have Questions.md +51 -0
- pdflinkcheck/data/LICENSE +20 -654
- pdflinkcheck/data/README.md +65 -67
- pdflinkcheck/data/icons/BoxArt-1080x1080.png +0 -0
- pdflinkcheck/data/icons/Logo-150x150.png +0 -0
- pdflinkcheck/data/icons/Logo-300x300.png +0 -0
- pdflinkcheck/data/icons/Logo-71x71.png +0 -0
- pdflinkcheck/data/icons/PosterArt-720x1080.png +0 -0
- pdflinkcheck/data/icons/SmallLogo-44x44.png +0 -0
- pdflinkcheck/data/icons/SplashScreen-620x300.png +0 -0
- pdflinkcheck/data/icons/StoreLogo-50x50.png +0 -0
- pdflinkcheck/data/icons/WideLogo-310x150.png +0 -0
- pdflinkcheck/data/icons/red_pdf_512px.ico +0 -0
- pdflinkcheck/data/pyproject.toml +25 -37
- pdflinkcheck/data/themes/forest/forest-dark/border-accent-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/border-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/border-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/border-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/border-invalid.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/card.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-tri-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-tri-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-tri-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-unsel-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-unsel-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-unsel-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/check-unsel-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/combo-button-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/combo-button-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/combo-button-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/down.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/empty.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/hor-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/hor-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/hor-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/notebook.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/off-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/off-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/off-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/on-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/on-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/on-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-tri-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-tri-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-tri-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/rect-accent-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/rect-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/rect-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/rect-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/right.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/scale-hor.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/scale-vert.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/separator.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/sizegrip.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/spin-button-up.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/tab-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/tab-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/tab-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/tree-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/tree-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/up.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/vert-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/vert-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark/vert-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-dark.tcl +536 -0
- pdflinkcheck/data/themes/forest/forest-light/border-accent-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/border-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/border-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/border-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/border-invalid.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/card.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-tri-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-tri-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-tri-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-unsel-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-unsel-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-unsel-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/check-unsel-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/combo-button-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/combo-button-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/combo-button-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/down-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/down.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/empty.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/hor-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/hor-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/hor-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/notebook.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/off-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/off-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/off-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/on-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/on-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/on-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-tri-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-tri-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-tri-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-unsel-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-unsel-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-unsel-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/radio-unsel-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/rect-accent-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/rect-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/rect-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/rect-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/right-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/right.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/scale-hor.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/scale-vert.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/separator.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/sizegrip.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/spin-button-down-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/spin-button-down-focus.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/spin-button-up.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/tab-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/tab-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/tab-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-hor-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-hor-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-hor-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-vert-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-vert-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/thumb-vert-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/tree-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/tree-pressed.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/up.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/vert-accent.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/vert-basic.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light/vert-hover.png +0 -0
- pdflinkcheck/data/themes/forest/forest-light.tcl +544 -0
- pdflinkcheck/datacopy.py +18 -1
- pdflinkcheck/dev.py +12 -25
- pdflinkcheck/environment.py +76 -0
- pdflinkcheck/gui.py +366 -457
- pdflinkcheck/helpers.py +88 -0
- pdflinkcheck/io.py +27 -23
- pdflinkcheck/report.py +692 -121
- pdflinkcheck/security.py +189 -0
- pdflinkcheck/splash.py +38 -0
- pdflinkcheck/stdlib_server.py +14 -20
- pdflinkcheck/stdlib_server_alt.py +571 -0
- pdflinkcheck/tk_utils.py +188 -0
- pdflinkcheck/update_msix_version.py +49 -0
- pdflinkcheck/validate.py +129 -218
- pdflinkcheck/version_info.py +6 -3
- {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.2.29.dist-info}/METADATA +84 -81
- pdflinkcheck-1.2.29.dist-info/RECORD +183 -0
- pdflinkcheck-1.2.29.dist-info/WHEEL +5 -0
- {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.2.29.dist-info}/entry_points.txt +0 -1
- pdflinkcheck-1.2.29.dist-info/licenses/LICENSE +27 -0
- pdflinkcheck-1.2.29.dist-info/licenses/LICENSE-MIT +9 -0
- pdflinkcheck-1.2.29.dist-info/top_level.txt +1 -0
- pdflinkcheck/analyze_pypdf_v2.py +0 -218
- pdflinkcheck-1.1.73.dist-info/RECORD +0 -21
- pdflinkcheck-1.1.73.dist-info/WHEEL +0 -4
- /pdflinkcheck-1.1.73.dist-info/licenses/LICENSE → /pdflinkcheck-1.2.29.dist-info/licenses/LICENSE-AGPL3 +0 -0
pdflinkcheck/analyze_pypdf_v2.py
DELETED
|
@@ -1,218 +0,0 @@
|
|
|
1
|
-
# src/pdflinkcheck/analyze_pypdf.py
|
|
2
|
-
import sys
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Dict, Any, List
|
|
6
|
-
|
|
7
|
-
from pypdf import PdfReader
|
|
8
|
-
from pypdf.generic import Destination, NameObject, IndirectObject
|
|
9
|
-
|
|
10
|
-
from pdflinkcheck.report import run_report
|
|
11
|
-
#from pdflinkcheck.validate import run_validation
|
|
12
|
-
|
|
13
|
-
"""
|
|
14
|
-
Inspect target PDF for both URI links and GoTo links, using only pypdf (no PyMuPDF/Fitz).
|
|
15
|
-
Fully fixed and improved version as of December 2025 (compatible with pypdf >= 4.0).
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
def get_anchor_text_pypdf(page, rect) -> str:
|
|
19
|
-
"""
|
|
20
|
-
Extracts text that falls within or near the link's bounding box using a visitor function.
|
|
21
|
-
This is a reliable pure-pypdf method for associating visible text with a link annotation.
|
|
22
|
-
"""
|
|
23
|
-
if not rect:
|
|
24
|
-
return "N/A: Missing Rect"
|
|
25
|
-
|
|
26
|
-
# PDF coordinates: bottom-left origin. Rect is [x0, y0, x1, y1]
|
|
27
|
-
# Standardize Rect: [x_min, y_min, x_max, y_max]
|
|
28
|
-
# Some PDF generators write Rect as [x_max, y_max, x_min, y_min]
|
|
29
|
-
x_min, y_min, x_max, y_max = rect[0], rect[1], rect[2], rect[3]
|
|
30
|
-
if x_min > x_max: x_min, x_max = x_max, x_min
|
|
31
|
-
if y_min > y_max: y_min, y_max = y_max, y_min
|
|
32
|
-
|
|
33
|
-
parts: List[str] = []
|
|
34
|
-
|
|
35
|
-
def visitor_body(text: str, cm, tm, font_dict, font_size):
|
|
36
|
-
# tm[4] and tm[5] are the (x, y) coordinates of the text insertion point
|
|
37
|
-
x, y = tm[4], tm[5]
|
|
38
|
-
|
|
39
|
-
# Guard against missing font_size
|
|
40
|
-
actual_font_size = font_size if font_size else 10
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# Approximate Center-Alignment Check
|
|
44
|
-
# Since tm[4/5] is usually the bottom-left of the character,
|
|
45
|
-
# we shift our 'check point' slightly up and to the right based
|
|
46
|
-
# on font size to approximate the center of the character.
|
|
47
|
-
char_center_x = x + (actual_font_size / 4)
|
|
48
|
-
char_center_y = y + (actual_font_size / 3)
|
|
49
|
-
|
|
50
|
-
# Asymmetric Tolerance
|
|
51
|
-
# We use a tighter vertical tolerance (3pt) to avoid catching lines above/below.
|
|
52
|
-
# We use a wider horizontal tolerance (10pt) to catch kerning/spacing issues.
|
|
53
|
-
v_tol = 3
|
|
54
|
-
h_tol = 10
|
|
55
|
-
if (x_min - h_tol) <= char_center_x <= (x_max + h_tol) and \
|
|
56
|
-
(y_min - v_tol) <= char_center_y <= (y_max + v_tol):
|
|
57
|
-
if text.strip():
|
|
58
|
-
parts.append(text)
|
|
59
|
-
|
|
60
|
-
# Extract text using the visitor – this preserves drawing order
|
|
61
|
-
page.extract_text(visitor_text=visitor_body)
|
|
62
|
-
|
|
63
|
-
raw = "".join(parts)
|
|
64
|
-
cleaned = " ".join(raw.split()).strip()
|
|
65
|
-
|
|
66
|
-
return cleaned if cleaned else "Graphic/Empty Link"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def resolve_pypdf_destination(reader: PdfReader, dest) -> str:
|
|
70
|
-
"""
|
|
71
|
-
Resolves any form of destination (/Dest or /A /D) to a human-readable page number.
|
|
72
|
-
Uses the official pypdf helper when possible for maximum reliability.
|
|
73
|
-
"""
|
|
74
|
-
try:
|
|
75
|
-
if dest is None:
|
|
76
|
-
return "N/A"
|
|
77
|
-
|
|
78
|
-
# If it's an IndirectObject, resolve it first
|
|
79
|
-
if isinstance(dest, (IndirectObject, NameObject)):
|
|
80
|
-
dest = dest.get_object()
|
|
81
|
-
|
|
82
|
-
# Named destinations or explicit destinations are handled correctly by this method
|
|
83
|
-
if isinstance(dest, Destination):
|
|
84
|
-
return str(reader.get_destination_page_number(dest) + 1)
|
|
85
|
-
|
|
86
|
-
# Direct array or indirect reference
|
|
87
|
-
page_num = reader.get_destination_page_number(dest)
|
|
88
|
-
return str(page_num + 1)
|
|
89
|
-
|
|
90
|
-
except Exception:
|
|
91
|
-
return "Unknown/Error"
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def extract_links_pypdf(pdf_path: Path | str) -> List[Dict[str, Any]]:
|
|
95
|
-
"""
|
|
96
|
-
Extract all link annotations (URI, internal GoTo, remote GoToR) using pure pypdf.
|
|
97
|
-
Output schema matches typical reporting needs.
|
|
98
|
-
"""
|
|
99
|
-
reader = PdfReader(pdf_path)
|
|
100
|
-
|
|
101
|
-
all_links: List[Dict[str, Any]] = []
|
|
102
|
-
|
|
103
|
-
for i, page in enumerate(reader.pages):
|
|
104
|
-
page_num = i + 1
|
|
105
|
-
|
|
106
|
-
if "/Annots" not in page:
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
annots = page["/Annots"]
|
|
110
|
-
for annot_ref in annots:
|
|
111
|
-
try:
|
|
112
|
-
annot = annot_ref.get_object()
|
|
113
|
-
except Exception:
|
|
114
|
-
continue # Corrupted annotation – skip
|
|
115
|
-
|
|
116
|
-
if annot.get("/Subtype") != "/Link":
|
|
117
|
-
continue
|
|
118
|
-
|
|
119
|
-
rect = annot.get("/Rect")
|
|
120
|
-
anchor_text = get_anchor_text_pypdf(page, rect)
|
|
121
|
-
|
|
122
|
-
link_dict: Dict[str, Any] = {
|
|
123
|
-
"page": page_num,
|
|
124
|
-
"rect": list(rect) if rect else None,
|
|
125
|
-
"link_text": anchor_text,
|
|
126
|
-
"type": "Other Action",
|
|
127
|
-
"target": "Unknown",
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
action = annot.get("/A")
|
|
131
|
-
|
|
132
|
-
# External URI link
|
|
133
|
-
if action and action.get("/URI"):
|
|
134
|
-
uri = action["/URI"]
|
|
135
|
-
link_dict.update({
|
|
136
|
-
"type": "External (URI)",
|
|
137
|
-
"url": str(uri),
|
|
138
|
-
"target": str(uri),
|
|
139
|
-
})
|
|
140
|
-
|
|
141
|
-
# Internal GoTo – can be /Dest directly or inside /A /D
|
|
142
|
-
elif annot.get("/Dest") or (action and action.get("/D")):
|
|
143
|
-
dest = annot.get("/Dest") or (action and action["/D"])
|
|
144
|
-
target_page = resolve_pypdf_destination(reader, dest)
|
|
145
|
-
link_dict.update({
|
|
146
|
-
"type": "Internal (GoTo/Dest)",
|
|
147
|
-
"destination_page": target_page,
|
|
148
|
-
"target": f"Page {target_page}",
|
|
149
|
-
})
|
|
150
|
-
|
|
151
|
-
# Remote GoToR (links to another PDF file)
|
|
152
|
-
elif action and action.get("/S") == "/GoToR":
|
|
153
|
-
file_spec = action.get("/F")
|
|
154
|
-
remote_file = str(file_spec) if file_spec else "Unknown File"
|
|
155
|
-
remote_dest = action.get("/D")
|
|
156
|
-
remote_target = f"File: {remote_file}"
|
|
157
|
-
if remote_dest:
|
|
158
|
-
remote_target += f" → Dest: {remote_dest}"
|
|
159
|
-
link_dict.update({
|
|
160
|
-
"type": "Remote (GoToR)",
|
|
161
|
-
"remote_file": remote_file,
|
|
162
|
-
"target": remote_target,
|
|
163
|
-
})
|
|
164
|
-
|
|
165
|
-
all_links.append(link_dict)
|
|
166
|
-
|
|
167
|
-
return all_links
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def extract_toc_pypdf(pdf_path: Path | str) -> List[Dict[str, Any]]:
|
|
171
|
-
"""
|
|
172
|
-
Extract the PDF outline (bookmarks / table of contents) using pypdf.
|
|
173
|
-
Correctly handles nested structure and uses the official page resolution method.
|
|
174
|
-
"""
|
|
175
|
-
try:
|
|
176
|
-
reader = PdfReader(pdf_path)
|
|
177
|
-
outline = reader.outline
|
|
178
|
-
if not outline:
|
|
179
|
-
return []
|
|
180
|
-
|
|
181
|
-
toc_data: List[Dict[str, Any]] = []
|
|
182
|
-
|
|
183
|
-
def flatten_outline(items: List, level: int = 1):
|
|
184
|
-
for item in items:
|
|
185
|
-
if isinstance(item, Destination):
|
|
186
|
-
try:
|
|
187
|
-
page_num = reader.get_destination_page_number(item) + 1
|
|
188
|
-
except Exception:
|
|
189
|
-
page_num = "N/A"
|
|
190
|
-
|
|
191
|
-
toc_data.append({
|
|
192
|
-
"level": level,
|
|
193
|
-
"title": item.title or "(Untitled)",
|
|
194
|
-
"target_page": page_num,
|
|
195
|
-
})
|
|
196
|
-
elif isinstance(item, list):
|
|
197
|
-
# Recurse into child entries
|
|
198
|
-
flatten_outline(item, level + 1)
|
|
199
|
-
|
|
200
|
-
flatten_outline(outline)
|
|
201
|
-
return toc_data
|
|
202
|
-
|
|
203
|
-
except Exception as e:
|
|
204
|
-
print(f"TOC extraction error: {e}", file=sys.stderr)
|
|
205
|
-
return []
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def call_stable():
|
|
209
|
-
"""
|
|
210
|
-
Entry point for command-line execution or integration with reporting module.
|
|
211
|
-
"""
|
|
212
|
-
run_report(library_pdf="pypdf")
|
|
213
|
-
# run_validation(library_pdf="pypdf") # Uncomment if validation step is needed
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if __name__ == "__main__":
|
|
217
|
-
call_stable()
|
|
218
|
-
# pypdf version updates
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
pdflinkcheck/__init__.py,sha256=KyoFlScM3kPrp1HjcxHDFEf4YflsoYclVF99-rerl3E,2510
|
|
2
|
-
pdflinkcheck/analyze_pymupdf.py,sha256=Be17KJQnTX9OoAluoE2GzPXC3mDCo7VGCNuwc9ilosc,12452
|
|
3
|
-
pdflinkcheck/analyze_pypdf.py,sha256=gHF9o6EY4sie727vS6YjTCQSzw_XWZape4xEk-l4lRI,6397
|
|
4
|
-
pdflinkcheck/analyze_pypdf_v2.py,sha256=dAvq2OoiN1MjptWSgOrAlArg0A98Hvpr105BKXJBrjE,7563
|
|
5
|
-
pdflinkcheck/cli.py,sha256=8PTkbK4msbhYB2NUCkUv8DWU7lO2qYg8qQKT_cB2U6w,12634
|
|
6
|
-
pdflinkcheck/data/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
7
|
-
pdflinkcheck/data/README.md,sha256=9tM77vu5jTpFQplL2A-ysyVyOQg8QZISsmtcmEfQXZM,11650
|
|
8
|
-
pdflinkcheck/data/pyproject.toml,sha256=IKRstIH-yOU_cdZ-fThQikyWPtgdbrOoADxf6VGe958,2955
|
|
9
|
-
pdflinkcheck/datacopy.py,sha256=pZysPvfsvRe3qvA-du8XJvwZFxEOB_1ygEvhEj_Zj2Y,2503
|
|
10
|
-
pdflinkcheck/dev.py,sha256=e-0353spmVPPQGB2aJ_QbEDtJQGQFBSLrrfSccJGwII,4783
|
|
11
|
-
pdflinkcheck/gui.py,sha256=TYjP0vCDtuyRYMi6-c2JdCgif4FWNKyrwdye13FTv_8,24434
|
|
12
|
-
pdflinkcheck/io.py,sha256=ZdvKUumFIR8Ql89WToaVDqnosAo43H6sCRnbqwspE80,7943
|
|
13
|
-
pdflinkcheck/report.py,sha256=HynyhOD1E0VCbH3Z_fL3dxTz_4qdmMZUW8Ahe_2Nk-U,11896
|
|
14
|
-
pdflinkcheck/stdlib_server.py,sha256=NKDPi-cfrBnYtG7mIxSI1eR1XSt8bxyan9YpdDAwhEU,6138
|
|
15
|
-
pdflinkcheck/validate.py,sha256=EkxZqHsdTJVlilumiG4F3l6_0SrYNCOkHb-b3gtotEA,14455
|
|
16
|
-
pdflinkcheck/version_info.py,sha256=dRVbs9U97YKisB1cLqVC2IoNrHCYw3z9TG8aldqTVOk,3211
|
|
17
|
-
pdflinkcheck-1.1.73.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
18
|
-
pdflinkcheck-1.1.73.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
19
|
-
pdflinkcheck-1.1.73.dist-info/entry_points.txt,sha256=OJs4WkAziNGSoZ2KP0FgYOj2JdL6EW8UphJebWJnz3c,55
|
|
20
|
-
pdflinkcheck-1.1.73.dist-info/METADATA,sha256=piuZTNoUyZvhw6uvlQufU2pIG2jmF9D_7LhTi-r47Hc,13568
|
|
21
|
-
pdflinkcheck-1.1.73.dist-info/RECORD,,
|
|
File without changes
|