pdflinkcheck 1.1.72__py3-none-any.whl → 1.1.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdflinkcheck/data/pyproject.toml +1 -1
- pdflinkcheck/report.py +17 -15
- pdflinkcheck/validate.py +10 -8
- {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.73.dist-info}/METADATA +1 -1
- {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.73.dist-info}/RECORD +8 -8
- {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.73.dist-info}/WHEEL +0 -0
- {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.73.dist-info}/entry_points.txt +0 -0
- {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.73.dist-info}/licenses/LICENSE +0 -0
pdflinkcheck/data/pyproject.toml
CHANGED
pdflinkcheck/report.py
CHANGED
|
@@ -8,6 +8,8 @@ import pyhabitat
|
|
|
8
8
|
from pdflinkcheck.io import error_logger, export_report_json, export_report_txt, get_first_pdf_in_cwd, get_friendly_path, LOG_FILE_PATH
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
SEP_COUNT=28
|
|
12
|
+
|
|
11
13
|
def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "JSON", pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
|
|
12
14
|
"""
|
|
13
15
|
Core high-level PDF link analysis logic.
|
|
@@ -91,22 +93,22 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
|
|
|
91
93
|
uri_and_other = uri_links + other_links
|
|
92
94
|
|
|
93
95
|
# --- ANALYSIS SUMMARY (Using your print logic) ---
|
|
94
|
-
log("\n" + "=" *
|
|
96
|
+
log("\n" + "=" * SEP_COUNT)
|
|
95
97
|
log(f"--- Link Analysis Results for {Path(pdf_path).name} ---")
|
|
96
98
|
log(f"Total active links: {len(extracted_links)} (External: {len(uri_links)}, Internal Jumps: {total_internal_links}, Other: {len(other_links)})")
|
|
97
99
|
log(f"Total **structural TOC entries (bookmarks)** found: {toc_entry_count}")
|
|
98
|
-
log("=" *
|
|
100
|
+
log("=" * SEP_COUNT)
|
|
99
101
|
|
|
100
102
|
# --- Section 1: TOC ---
|
|
101
103
|
str_structural_toc = print_structural_toc(structural_toc)
|
|
102
104
|
log(str_structural_toc)
|
|
103
105
|
|
|
104
106
|
# --- Section 2: ACTIVE INTERNAL JUMPS ---
|
|
105
|
-
log("\n" + "=" *
|
|
107
|
+
log("\n" + "=" * SEP_COUNT)
|
|
106
108
|
log(f"## Active Internal Jumps (GoTo & Resolved Actions) - {total_internal_links} found")
|
|
107
|
-
log("=" *
|
|
109
|
+
log("=" * SEP_COUNT)
|
|
108
110
|
log("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Jumps To Page"))
|
|
109
|
-
log("-" *
|
|
111
|
+
log("-" * SEP_COUNT)
|
|
110
112
|
|
|
111
113
|
all_internal = goto_links + resolved_action_links
|
|
112
114
|
if total_internal_links > 0:
|
|
@@ -118,13 +120,13 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
|
|
|
118
120
|
log(f"... and {len(all_internal) - limit} more links (use --max-links 0 to show all).")
|
|
119
121
|
else:
|
|
120
122
|
log(" No internal GoTo or Resolved Action links found.")
|
|
121
|
-
log("-" *
|
|
123
|
+
log("-" * SEP_COUNT)
|
|
122
124
|
|
|
123
125
|
# --- Section 3: ACTIVE URI LINKS ---
|
|
124
|
-
log("\n" + "=" *
|
|
126
|
+
log("\n" + "=" * SEP_COUNT)
|
|
125
127
|
log(f"## Active URI Links (External & Other) - {len(uri_and_other)} found")
|
|
126
128
|
log("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Target URI/Action"))
|
|
127
|
-
log("=" *
|
|
129
|
+
log("=" * SEP_COUNT)
|
|
128
130
|
|
|
129
131
|
if uri_and_other:
|
|
130
132
|
for i, link in enumerate(uri_and_other[:limit], 1):
|
|
@@ -136,7 +138,7 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
|
|
|
136
138
|
|
|
137
139
|
else:
|
|
138
140
|
log(" No external or 'Other' links found.")
|
|
139
|
-
log("-" *
|
|
141
|
+
log("-" * SEP_COUNT)
|
|
140
142
|
|
|
141
143
|
log("\n--- Analysis Complete ---\n")
|
|
142
144
|
|
|
@@ -209,9 +211,9 @@ def print_structural_toc_print(structural_toc:dict)->str|None:
|
|
|
209
211
|
Args:
|
|
210
212
|
structural_toc: A list of TOC dictionaries.
|
|
211
213
|
"""
|
|
212
|
-
print("\n" + "=" *
|
|
214
|
+
print("\n" + "=" * SEP_COUNT)
|
|
213
215
|
print("## Structural Table of Contents (PDF Bookmarks/Outline)")
|
|
214
|
-
print("=" *
|
|
216
|
+
print("=" * SEP_COUNT)
|
|
215
217
|
if not structural_toc:
|
|
216
218
|
print("No structural TOC (bookmarks/outline) found.")
|
|
217
219
|
return
|
|
@@ -228,7 +230,7 @@ def print_structural_toc_print(structural_toc:dict)->str|None:
|
|
|
228
230
|
page_str = str(item['target_page']).rjust(page_width)
|
|
229
231
|
print(f"{indent}{item['title']} . . . page {page_str}")
|
|
230
232
|
|
|
231
|
-
print("-" *
|
|
233
|
+
print("-" * SEP_COUNT)
|
|
232
234
|
|
|
233
235
|
|
|
234
236
|
def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
|
|
@@ -243,9 +245,9 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
|
|
|
243
245
|
A formatted string of the structural TOC.
|
|
244
246
|
"""
|
|
245
247
|
lines = []
|
|
246
|
-
lines.append("\n" + "=" *
|
|
248
|
+
lines.append("\n" + "=" * SEP_COUNT)
|
|
247
249
|
lines.append("## Structural Table of Contents (PDF Bookmarks/Outline)")
|
|
248
|
-
lines.append("=" *
|
|
250
|
+
lines.append("=" * SEP_COUNT)
|
|
249
251
|
|
|
250
252
|
if not structural_toc:
|
|
251
253
|
msg = "No structural TOC (bookmarks/outline) found."
|
|
@@ -269,7 +271,7 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
|
|
|
269
271
|
|
|
270
272
|
lines.append(f"{indent}{item['title']} . . . page {page_str}")
|
|
271
273
|
|
|
272
|
-
lines.append("-" *
|
|
274
|
+
lines.append("-" * SEP_COUNT)
|
|
273
275
|
|
|
274
276
|
# Final aggregation
|
|
275
277
|
str_structural_toc = "\n".join(lines)
|
pdflinkcheck/validate.py
CHANGED
|
@@ -7,6 +7,8 @@ from typing import Dict, Any
|
|
|
7
7
|
from pdflinkcheck.report import run_report
|
|
8
8
|
from pdflinkcheck.io import get_friendly_path, export_validation_json
|
|
9
9
|
|
|
10
|
+
SEP_COUNT=28
|
|
11
|
+
|
|
10
12
|
def run_validation(
|
|
11
13
|
report_results: Dict[str, Any],
|
|
12
14
|
pdf_path: str,
|
|
@@ -192,9 +194,9 @@ def run_validation(
|
|
|
192
194
|
def log(msg: str):
|
|
193
195
|
validation_buffer.append(msg)
|
|
194
196
|
|
|
195
|
-
log("\n" + "=" *
|
|
197
|
+
log("\n" + "=" * SEP_COUNT)
|
|
196
198
|
log("## Validation Results")
|
|
197
|
-
log("=" *
|
|
199
|
+
log("=" * SEP_COUNT)
|
|
198
200
|
log(f"PDF Path = {get_friendly_path(pdf_path)}")
|
|
199
201
|
log(f"Total items checked: {summary_stats['total_checked']}")
|
|
200
202
|
log(f"✅ Valid: {summary_stats['valid']}")
|
|
@@ -203,12 +205,12 @@ def run_validation(
|
|
|
203
205
|
log(f"⚠️ Unsupported PDF Links: {summary_stats['unknown-link']}")
|
|
204
206
|
log(f"❌ Broken Page Reference: {summary_stats['broken-page']}")
|
|
205
207
|
log(f"❌ Broken File Reference: {summary_stats['broken-file']}")
|
|
206
|
-
log("=" *
|
|
208
|
+
log("=" * SEP_COUNT)
|
|
207
209
|
|
|
208
210
|
if issues:
|
|
209
211
|
log("\n## Issues Found")
|
|
210
212
|
log("{:<5} | {:<12} | {:<30} | {}".format("Idx", "Type", "Text", "Problem"))
|
|
211
|
-
log("-" *
|
|
213
|
+
log("-" * SEP_COUNT)
|
|
212
214
|
for i, issue in enumerate(issues[:25], 1):
|
|
213
215
|
link_type = issue.get("type", "Link")
|
|
214
216
|
text = issue.get("link_text", "") or issue.get("title", "") or "N/A"
|
|
@@ -330,18 +332,18 @@ def run_validation_more_readable_slop(pdf_path: str = None, pdf_library: str = "
|
|
|
330
332
|
else:
|
|
331
333
|
results['broken'].append(link)
|
|
332
334
|
|
|
333
|
-
print("\n" + "=" *
|
|
335
|
+
print("\n" + "=" * SEP_COUNT)
|
|
334
336
|
print(f"--- Validation Summary Stats for {Path(pdf_path).name} ---")
|
|
335
337
|
print(f"Total Checked: {total_links}")
|
|
336
338
|
print(f"✅ Valid: {len(results['valid'])}")
|
|
337
339
|
print(f"❌ Broken: {len(results['broken'])}")
|
|
338
|
-
print("=" *
|
|
340
|
+
print("=" * SEP_COUNT)
|
|
339
341
|
|
|
340
342
|
# 4. Print Detail Report for Broken Links
|
|
341
343
|
if results['broken']:
|
|
342
344
|
print("\n## ❌ Broken Links Found:")
|
|
343
345
|
print("{:<5} | {:<5} | {:<30} | {}".format("Idx", "Page", "Reason", "Target"))
|
|
344
|
-
print("-" *
|
|
346
|
+
print("-" * SEP_COUNT)
|
|
345
347
|
for i, link in enumerate(results['broken'], 1):
|
|
346
348
|
target = link.get('url') or link.get('destination_page') or link.get('remote_file')
|
|
347
349
|
print("{:<5} | {:<5} | {:<30} | {}".format(
|
|
@@ -377,4 +379,4 @@ if __name__ == "__main__":
|
|
|
377
379
|
print_bool=True
|
|
378
380
|
)
|
|
379
381
|
|
|
380
|
-
export_validation_results()
|
|
382
|
+
export_validation_results()
|
|
@@ -5,17 +5,17 @@ pdflinkcheck/analyze_pypdf_v2.py,sha256=dAvq2OoiN1MjptWSgOrAlArg0A98Hvpr105BKXJB
|
|
|
5
5
|
pdflinkcheck/cli.py,sha256=8PTkbK4msbhYB2NUCkUv8DWU7lO2qYg8qQKT_cB2U6w,12634
|
|
6
6
|
pdflinkcheck/data/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
7
7
|
pdflinkcheck/data/README.md,sha256=9tM77vu5jTpFQplL2A-ysyVyOQg8QZISsmtcmEfQXZM,11650
|
|
8
|
-
pdflinkcheck/data/pyproject.toml,sha256=
|
|
8
|
+
pdflinkcheck/data/pyproject.toml,sha256=IKRstIH-yOU_cdZ-fThQikyWPtgdbrOoADxf6VGe958,2955
|
|
9
9
|
pdflinkcheck/datacopy.py,sha256=pZysPvfsvRe3qvA-du8XJvwZFxEOB_1ygEvhEj_Zj2Y,2503
|
|
10
10
|
pdflinkcheck/dev.py,sha256=e-0353spmVPPQGB2aJ_QbEDtJQGQFBSLrrfSccJGwII,4783
|
|
11
11
|
pdflinkcheck/gui.py,sha256=TYjP0vCDtuyRYMi6-c2JdCgif4FWNKyrwdye13FTv_8,24434
|
|
12
12
|
pdflinkcheck/io.py,sha256=ZdvKUumFIR8Ql89WToaVDqnosAo43H6sCRnbqwspE80,7943
|
|
13
|
-
pdflinkcheck/report.py,sha256=
|
|
13
|
+
pdflinkcheck/report.py,sha256=HynyhOD1E0VCbH3Z_fL3dxTz_4qdmMZUW8Ahe_2Nk-U,11896
|
|
14
14
|
pdflinkcheck/stdlib_server.py,sha256=NKDPi-cfrBnYtG7mIxSI1eR1XSt8bxyan9YpdDAwhEU,6138
|
|
15
|
-
pdflinkcheck/validate.py,sha256=
|
|
15
|
+
pdflinkcheck/validate.py,sha256=EkxZqHsdTJVlilumiG4F3l6_0SrYNCOkHb-b3gtotEA,14455
|
|
16
16
|
pdflinkcheck/version_info.py,sha256=dRVbs9U97YKisB1cLqVC2IoNrHCYw3z9TG8aldqTVOk,3211
|
|
17
|
-
pdflinkcheck-1.1.
|
|
18
|
-
pdflinkcheck-1.1.
|
|
19
|
-
pdflinkcheck-1.1.
|
|
20
|
-
pdflinkcheck-1.1.
|
|
21
|
-
pdflinkcheck-1.1.
|
|
17
|
+
pdflinkcheck-1.1.73.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
18
|
+
pdflinkcheck-1.1.73.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
19
|
+
pdflinkcheck-1.1.73.dist-info/entry_points.txt,sha256=OJs4WkAziNGSoZ2KP0FgYOj2JdL6EW8UphJebWJnz3c,55
|
|
20
|
+
pdflinkcheck-1.1.73.dist-info/METADATA,sha256=piuZTNoUyZvhw6uvlQufU2pIG2jmF9D_7LhTi-r47Hc,13568
|
|
21
|
+
pdflinkcheck-1.1.73.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|