pdflinkcheck 1.1.72__py3-none-any.whl → 1.1.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. pdflinkcheck/__init__.py +2 -5
  2. pdflinkcheck/analyze_pymupdf.py +12 -6
  3. pdflinkcheck/analyze_pypdf.py +25 -7
  4. pdflinkcheck/analyze_pypdf_v2.py +5 -6
  5. pdflinkcheck/cli.py +82 -91
  6. pdflinkcheck/data/I Have Questions.md +51 -0
  7. pdflinkcheck/data/LICENSE +17 -654
  8. pdflinkcheck/data/README.md +49 -49
  9. pdflinkcheck/data/icons/BoxArt-1080x1080.png +0 -0
  10. pdflinkcheck/data/icons/Logo-150x150.png +0 -0
  11. pdflinkcheck/data/icons/Logo-300x300.png +0 -0
  12. pdflinkcheck/data/icons/Logo-71x71.png +0 -0
  13. pdflinkcheck/data/icons/PosterArt-720x1080.png +0 -0
  14. pdflinkcheck/data/icons/SmallLogo-44x44.png +0 -0
  15. pdflinkcheck/data/icons/SplashScreen-620x300.png +0 -0
  16. pdflinkcheck/data/icons/StoreLogo-50x50.png +0 -0
  17. pdflinkcheck/data/icons/WideLogo-310x150.png +0 -0
  18. pdflinkcheck/data/icons/red_pdf_512px.ico +0 -0
  19. pdflinkcheck/data/pyproject.toml +20 -23
  20. pdflinkcheck/data/themes/forest/forest-dark/border-accent-hover.png +0 -0
  21. pdflinkcheck/data/themes/forest/forest-dark/border-accent.png +0 -0
  22. pdflinkcheck/data/themes/forest/forest-dark/border-basic.png +0 -0
  23. pdflinkcheck/data/themes/forest/forest-dark/border-hover.png +0 -0
  24. pdflinkcheck/data/themes/forest/forest-dark/border-invalid.png +0 -0
  25. pdflinkcheck/data/themes/forest/forest-dark/card.png +0 -0
  26. pdflinkcheck/data/themes/forest/forest-dark/check-accent.png +0 -0
  27. pdflinkcheck/data/themes/forest/forest-dark/check-basic.png +0 -0
  28. pdflinkcheck/data/themes/forest/forest-dark/check-hover.png +0 -0
  29. pdflinkcheck/data/themes/forest/forest-dark/check-tri-accent.png +0 -0
  30. pdflinkcheck/data/themes/forest/forest-dark/check-tri-basic.png +0 -0
  31. pdflinkcheck/data/themes/forest/forest-dark/check-tri-hover.png +0 -0
  32. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-accent.png +0 -0
  33. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-basic.png +0 -0
  34. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-hover.png +0 -0
  35. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-pressed.png +0 -0
  36. pdflinkcheck/data/themes/forest/forest-dark/combo-button-basic.png +0 -0
  37. pdflinkcheck/data/themes/forest/forest-dark/combo-button-focus.png +0 -0
  38. pdflinkcheck/data/themes/forest/forest-dark/combo-button-hover.png +0 -0
  39. pdflinkcheck/data/themes/forest/forest-dark/down.png +0 -0
  40. pdflinkcheck/data/themes/forest/forest-dark/empty.png +0 -0
  41. pdflinkcheck/data/themes/forest/forest-dark/hor-accent.png +0 -0
  42. pdflinkcheck/data/themes/forest/forest-dark/hor-basic.png +0 -0
  43. pdflinkcheck/data/themes/forest/forest-dark/hor-hover.png +0 -0
  44. pdflinkcheck/data/themes/forest/forest-dark/notebook.png +0 -0
  45. pdflinkcheck/data/themes/forest/forest-dark/off-accent.png +0 -0
  46. pdflinkcheck/data/themes/forest/forest-dark/off-basic.png +0 -0
  47. pdflinkcheck/data/themes/forest/forest-dark/off-hover.png +0 -0
  48. pdflinkcheck/data/themes/forest/forest-dark/on-accent.png +0 -0
  49. pdflinkcheck/data/themes/forest/forest-dark/on-basic.png +0 -0
  50. pdflinkcheck/data/themes/forest/forest-dark/on-hover.png +0 -0
  51. pdflinkcheck/data/themes/forest/forest-dark/radio-accent.png +0 -0
  52. pdflinkcheck/data/themes/forest/forest-dark/radio-basic.png +0 -0
  53. pdflinkcheck/data/themes/forest/forest-dark/radio-hover.png +0 -0
  54. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-accent.png +0 -0
  55. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-basic.png +0 -0
  56. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-hover.png +0 -0
  57. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-accent.png +0 -0
  58. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-basic.png +0 -0
  59. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-hover.png +0 -0
  60. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-pressed.png +0 -0
  61. pdflinkcheck/data/themes/forest/forest-dark/rect-accent-hover.png +0 -0
  62. pdflinkcheck/data/themes/forest/forest-dark/rect-accent.png +0 -0
  63. pdflinkcheck/data/themes/forest/forest-dark/rect-basic.png +0 -0
  64. pdflinkcheck/data/themes/forest/forest-dark/rect-hover.png +0 -0
  65. pdflinkcheck/data/themes/forest/forest-dark/right.png +0 -0
  66. pdflinkcheck/data/themes/forest/forest-dark/scale-hor.png +0 -0
  67. pdflinkcheck/data/themes/forest/forest-dark/scale-vert.png +0 -0
  68. pdflinkcheck/data/themes/forest/forest-dark/separator.png +0 -0
  69. pdflinkcheck/data/themes/forest/forest-dark/sizegrip.png +0 -0
  70. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-basic.png +0 -0
  71. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-focus.png +0 -0
  72. pdflinkcheck/data/themes/forest/forest-dark/spin-button-up.png +0 -0
  73. pdflinkcheck/data/themes/forest/forest-dark/tab-accent.png +0 -0
  74. pdflinkcheck/data/themes/forest/forest-dark/tab-basic.png +0 -0
  75. pdflinkcheck/data/themes/forest/forest-dark/tab-hover.png +0 -0
  76. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-accent.png +0 -0
  77. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-basic.png +0 -0
  78. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-hover.png +0 -0
  79. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-accent.png +0 -0
  80. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-basic.png +0 -0
  81. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-hover.png +0 -0
  82. pdflinkcheck/data/themes/forest/forest-dark/tree-basic.png +0 -0
  83. pdflinkcheck/data/themes/forest/forest-dark/tree-pressed.png +0 -0
  84. pdflinkcheck/data/themes/forest/forest-dark/up.png +0 -0
  85. pdflinkcheck/data/themes/forest/forest-dark/vert-accent.png +0 -0
  86. pdflinkcheck/data/themes/forest/forest-dark/vert-basic.png +0 -0
  87. pdflinkcheck/data/themes/forest/forest-dark/vert-hover.png +0 -0
  88. pdflinkcheck/data/themes/forest/forest-dark.tcl +536 -0
  89. pdflinkcheck/data/themes/forest/forest-light/border-accent-hover.png +0 -0
  90. pdflinkcheck/data/themes/forest/forest-light/border-accent.png +0 -0
  91. pdflinkcheck/data/themes/forest/forest-light/border-basic.png +0 -0
  92. pdflinkcheck/data/themes/forest/forest-light/border-hover.png +0 -0
  93. pdflinkcheck/data/themes/forest/forest-light/border-invalid.png +0 -0
  94. pdflinkcheck/data/themes/forest/forest-light/card.png +0 -0
  95. pdflinkcheck/data/themes/forest/forest-light/check-accent.png +0 -0
  96. pdflinkcheck/data/themes/forest/forest-light/check-basic.png +0 -0
  97. pdflinkcheck/data/themes/forest/forest-light/check-hover.png +0 -0
  98. pdflinkcheck/data/themes/forest/forest-light/check-tri-accent.png +0 -0
  99. pdflinkcheck/data/themes/forest/forest-light/check-tri-basic.png +0 -0
  100. pdflinkcheck/data/themes/forest/forest-light/check-tri-hover.png +0 -0
  101. pdflinkcheck/data/themes/forest/forest-light/check-unsel-accent.png +0 -0
  102. pdflinkcheck/data/themes/forest/forest-light/check-unsel-basic.png +0 -0
  103. pdflinkcheck/data/themes/forest/forest-light/check-unsel-hover.png +0 -0
  104. pdflinkcheck/data/themes/forest/forest-light/check-unsel-pressed.png +0 -0
  105. pdflinkcheck/data/themes/forest/forest-light/combo-button-basic.png +0 -0
  106. pdflinkcheck/data/themes/forest/forest-light/combo-button-focus.png +0 -0
  107. pdflinkcheck/data/themes/forest/forest-light/combo-button-hover.png +0 -0
  108. pdflinkcheck/data/themes/forest/forest-light/down-focus.png +0 -0
  109. pdflinkcheck/data/themes/forest/forest-light/down.png +0 -0
  110. pdflinkcheck/data/themes/forest/forest-light/empty.png +0 -0
  111. pdflinkcheck/data/themes/forest/forest-light/hor-accent.png +0 -0
  112. pdflinkcheck/data/themes/forest/forest-light/hor-basic.png +0 -0
  113. pdflinkcheck/data/themes/forest/forest-light/hor-hover.png +0 -0
  114. pdflinkcheck/data/themes/forest/forest-light/notebook.png +0 -0
  115. pdflinkcheck/data/themes/forest/forest-light/off-accent.png +0 -0
  116. pdflinkcheck/data/themes/forest/forest-light/off-basic.png +0 -0
  117. pdflinkcheck/data/themes/forest/forest-light/off-hover.png +0 -0
  118. pdflinkcheck/data/themes/forest/forest-light/on-accent.png +0 -0
  119. pdflinkcheck/data/themes/forest/forest-light/on-basic.png +0 -0
  120. pdflinkcheck/data/themes/forest/forest-light/on-hover.png +0 -0
  121. pdflinkcheck/data/themes/forest/forest-light/radio-accent.png +0 -0
  122. pdflinkcheck/data/themes/forest/forest-light/radio-basic.png +0 -0
  123. pdflinkcheck/data/themes/forest/forest-light/radio-hover.png +0 -0
  124. pdflinkcheck/data/themes/forest/forest-light/radio-tri-accent.png +0 -0
  125. pdflinkcheck/data/themes/forest/forest-light/radio-tri-basic.png +0 -0
  126. pdflinkcheck/data/themes/forest/forest-light/radio-tri-hover.png +0 -0
  127. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-accent.png +0 -0
  128. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-basic.png +0 -0
  129. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-hover.png +0 -0
  130. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-pressed.png +0 -0
  131. pdflinkcheck/data/themes/forest/forest-light/rect-accent-hover.png +0 -0
  132. pdflinkcheck/data/themes/forest/forest-light/rect-accent.png +0 -0
  133. pdflinkcheck/data/themes/forest/forest-light/rect-basic.png +0 -0
  134. pdflinkcheck/data/themes/forest/forest-light/rect-hover.png +0 -0
  135. pdflinkcheck/data/themes/forest/forest-light/right-focus.png +0 -0
  136. pdflinkcheck/data/themes/forest/forest-light/right.png +0 -0
  137. pdflinkcheck/data/themes/forest/forest-light/scale-hor.png +0 -0
  138. pdflinkcheck/data/themes/forest/forest-light/scale-vert.png +0 -0
  139. pdflinkcheck/data/themes/forest/forest-light/separator.png +0 -0
  140. pdflinkcheck/data/themes/forest/forest-light/sizegrip.png +0 -0
  141. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-basic.png +0 -0
  142. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-focus.png +0 -0
  143. pdflinkcheck/data/themes/forest/forest-light/spin-button-up.png +0 -0
  144. pdflinkcheck/data/themes/forest/forest-light/tab-accent.png +0 -0
  145. pdflinkcheck/data/themes/forest/forest-light/tab-basic.png +0 -0
  146. pdflinkcheck/data/themes/forest/forest-light/tab-hover.png +0 -0
  147. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-accent.png +0 -0
  148. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-basic.png +0 -0
  149. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-hover.png +0 -0
  150. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-accent.png +0 -0
  151. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-basic.png +0 -0
  152. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-hover.png +0 -0
  153. pdflinkcheck/data/themes/forest/forest-light/tree-basic.png +0 -0
  154. pdflinkcheck/data/themes/forest/forest-light/tree-pressed.png +0 -0
  155. pdflinkcheck/data/themes/forest/forest-light/up.png +0 -0
  156. pdflinkcheck/data/themes/forest/forest-light/vert-accent.png +0 -0
  157. pdflinkcheck/data/themes/forest/forest-light/vert-basic.png +0 -0
  158. pdflinkcheck/data/themes/forest/forest-light/vert-hover.png +0 -0
  159. pdflinkcheck/data/themes/forest/forest-light.tcl +544 -0
  160. pdflinkcheck/datacopy.py +2 -0
  161. pdflinkcheck/dev.py +10 -23
  162. pdflinkcheck/environment.py +64 -0
  163. pdflinkcheck/gui.py +229 -103
  164. pdflinkcheck/io.py +4 -18
  165. pdflinkcheck/report.py +161 -89
  166. pdflinkcheck/stdlib_server.py +14 -6
  167. pdflinkcheck/update_msix_version.py +47 -0
  168. pdflinkcheck/validate.py +59 -80
  169. pdflinkcheck/version_info.py +5 -2
  170. {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.94.dist-info}/METADATA +54 -52
  171. pdflinkcheck-1.1.94.dist-info/RECORD +176 -0
  172. pdflinkcheck-1.1.94.dist-info/licenses/LICENSE +24 -0
  173. pdflinkcheck-1.1.94.dist-info/licenses/LICENSE-MIT +9 -0
  174. pdflinkcheck-1.1.72.dist-info/RECORD +0 -21
  175. {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.94.dist-info}/WHEEL +0 -0
  176. {pdflinkcheck-1.1.72.dist-info → pdflinkcheck-1.1.94.dist-info}/entry_points.txt +0 -0
  177. /pdflinkcheck-1.1.72.dist-info/licenses/LICENSE → /pdflinkcheck-1.1.94.dist-info/licenses/LICENSE-AGPL3 +0 -0
pdflinkcheck/report.py CHANGED
@@ -1,3 +1,5 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
1
3
  # pdflinkcheck/report.py
2
4
 
3
5
  import sys
@@ -6,9 +8,30 @@ from typing import Optional, Dict, Any
6
8
  import pyhabitat
7
9
 
8
10
  from pdflinkcheck.io import error_logger, export_report_json, export_report_txt, get_first_pdf_in_cwd, get_friendly_path, LOG_FILE_PATH
11
+ from pdflinkcheck.environment import pymupdf_is_available
12
+ from pdflinkcheck.validate import run_validation
9
13
 
14
+ SEP_COUNT=28
15
+
16
+ def run_report_and_call_exports(pdf_path: str = None, max_links: int = 0, export_format: str = "JSON", pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
17
+ # The meat and potatoes
18
+ report_results = run_report_and_validtion(
19
+ pdf_path=str(pdf_path),
20
+ max_links=max_links,
21
+ pdf_library = pdf_library,
22
+ )
23
+ if export_format:
24
+ report_data_dict = report_results["data"]
25
+ report_buffer_str = report_results["text"]
26
+ if "JSON" in export_format.upper():
27
+ export_report_json(report_data_dict, pdf_path, pdf_library)
28
+
29
+ if "TXT" in export_format.upper():
30
+ export_report_txt(report_buffer_str, pdf_path, pdf_library)
31
+ return report_results
32
+
10
33
 
11
- def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "JSON", pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
34
+ def run_report_and_validtion(pdf_path: str = None, max_links: int = 0, pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
12
35
  """
13
36
  Core high-level PDF link analysis logic.
14
37
 
@@ -34,8 +57,8 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
34
57
 
35
58
  # Helper to handle conditional printing and mandatory buffering
36
59
  def log(msg: str):
37
- if print_bool: # this should not be here
38
- print(msg) # this should not be here. esure elsewhere then remove
60
+ if print_bool:
61
+ print(msg)
39
62
  report_buffer.append(msg)
40
63
 
41
64
  # Expected: "pypdf" or "PyMuPDF"
@@ -44,26 +67,38 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
44
67
  if pdf_library in allowed_libraries and pdf_library == "pypdf":
45
68
  from pdflinkcheck.analyze_pypdf import (extract_links_pypdf as extract_links, extract_toc_pypdf as extract_toc)
46
69
  elif pdf_library in allowed_libraries and pdf_library == "pymupdf":
47
- try:
48
- import fitz
49
- except ImportError:
70
+ if not pymupdf_is_available():
50
71
  print("PyMuPDF was explicitly requested as the PDF Engine")
51
- print("Use pypdf instead, or install PyMuPDF. ")
72
+ print("Switch the PDF library to 'pypdf' instead, or install PyMuPDF. ")
52
73
  print("To install PyMuPDF locally, try: `uv sync --extra full` OR `pip install .[full]`")
53
74
  if pyhabitat.on_termux():
54
75
  print(f"pyhabitat.on_termux() = {pyhabitat.on_termux()}")
55
76
  print("PyMuPDF is not expected to work on Termux. Use pypdf.")
56
77
  print("\n")
57
- return
78
+ #return
79
+ raise ImportError(f"The 'fitz' module is required for this functionality. Original error: {e}") from e
58
80
  from pdflinkcheck.analyze_pymupdf import (extract_links_pymupdf as extract_links, extract_toc_pymupdf as extract_toc)
59
81
 
60
82
  log("\n--- Starting Analysis ... ---\n")
61
- if pdf_path is None:
62
- pdf_path = get_first_pdf_in_cwd()
63
83
  if pdf_path is None:
64
84
  log("pdf_path is None")
65
85
  log("Tip: Drop a PDF in the current folder or pass in a path arg.")
66
- return
86
+ empty_report = {
87
+ "data": {
88
+ "external_links": [],
89
+ "internal_links": [],
90
+ "toc": []
91
+ },
92
+ "text": "\n".join(report_buffer),
93
+ "metadata": {
94
+ "pdf_name": Path(pdf_path).name,
95
+ "library_used": pdf_library,
96
+ "total_links": 0
97
+ }
98
+ }
99
+
100
+ return empty_report
101
+
67
102
  try:
68
103
  log(f"Target file: {get_friendly_path(pdf_path)}")
69
104
  log(f"PDF Engine: {pdf_library}")
@@ -78,7 +113,21 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
78
113
  if not extracted_links and not structural_toc:
79
114
  log(f"\nNo hyperlinks or structural TOC found in {Path(pdf_path).name}.")
80
115
  log("(This is common for scanned/image-only PDFs.)")
81
- return {}
116
+
117
+ empty_result = {
118
+ "data": {
119
+ "external_links": [],
120
+ "internal_links": [],
121
+ "toc": []
122
+ },
123
+ "text": "\n".join(report_buffer),
124
+ "metadata": {
125
+ "pdf_name": Path(pdf_path).name,
126
+ "library_used": pdf_library,
127
+ "total_links": 0
128
+ }
129
+ }
130
+ return empty_result
82
131
 
83
132
  # 3. Separate the lists based on the 'type' key
84
133
  uri_links = [link for link in extracted_links if link['type'] == 'External (URI)']
@@ -89,24 +138,25 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
89
138
  total_internal_links = len(goto_links) + len(resolved_action_links)
90
139
  limit = max_links if max_links > 0 else None
91
140
  uri_and_other = uri_links + other_links
141
+
142
+ str_structural_toc = get_structural_toc(structural_toc)
92
143
 
93
144
  # --- ANALYSIS SUMMARY (Using your print logic) ---
94
- log("\n" + "=" * 70)
145
+ log("\n" + "=" * SEP_COUNT)
95
146
  log(f"--- Link Analysis Results for {Path(pdf_path).name} ---")
96
147
  log(f"Total active links: {len(extracted_links)} (External: {len(uri_links)}, Internal Jumps: {total_internal_links}, Other: {len(other_links)})")
97
148
  log(f"Total **structural TOC entries (bookmarks)** found: {toc_entry_count}")
98
- log("=" * 70)
149
+ log("=" * SEP_COUNT)
99
150
 
100
151
  # --- Section 1: TOC ---
101
- str_structural_toc = print_structural_toc(structural_toc)
102
152
  log(str_structural_toc)
103
153
 
104
154
  # --- Section 2: ACTIVE INTERNAL JUMPS ---
105
- log("\n" + "=" * 70)
155
+ log("\n" + "=" * SEP_COUNT)
106
156
  log(f"## Active Internal Jumps (GoTo & Resolved Actions) - {total_internal_links} found")
107
- log("=" * 70)
157
+ log("=" * SEP_COUNT)
108
158
  log("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Jumps To Page"))
109
- log("-" * 70)
159
+ log("-" * SEP_COUNT)
110
160
 
111
161
  all_internal = goto_links + resolved_action_links
112
162
  if total_internal_links > 0:
@@ -118,13 +168,13 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
118
168
  log(f"... and {len(all_internal) - limit} more links (use --max-links 0 to show all).")
119
169
  else:
120
170
  log(" No internal GoTo or Resolved Action links found.")
121
- log("-" * 70)
171
+ log("-" * SEP_COUNT)
122
172
 
123
173
  # --- Section 3: ACTIVE URI LINKS ---
124
- log("\n" + "=" * 70)
174
+ log("\n" + "=" * SEP_COUNT)
125
175
  log(f"## Active URI Links (External & Other) - {len(uri_and_other)} found")
126
176
  log("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Target URI/Action"))
127
- log("=" * 70)
177
+ log("=" * SEP_COUNT)
128
178
 
129
179
  if uri_and_other:
130
180
  for i, link in enumerate(uri_and_other[:limit], 1):
@@ -136,45 +186,53 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
136
186
 
137
187
  else:
138
188
  log(" No external or 'Other' links found.")
139
- log("-" * 70)
140
-
141
- log("\n--- Analysis Complete ---\n")
189
+ log("-" * SEP_COUNT)
142
190
 
143
- # Final aggregation of the buffer into one string
144
- report_buffer_str = "\n".join(report_buffer)
145
191
 
146
192
  # Return the collected data for potential future JSON/other output
147
- final_report_data_dict = {
193
+ report_data_dict = {
148
194
  "external_links": uri_links,
149
195
  "internal_links": all_internal,
150
- "toc": structural_toc
196
+ "toc": structural_toc,
197
+ "validation": {}
151
198
  }
152
199
 
153
- # 5. Export Report
154
- #if export_format:
155
- # # Assuming export_to will hold the output format string (e.g., "JSON")
156
- # export_report_data(final_report_data_dict, Path(pdf_path).name, export_format, pdf_library)
157
-
158
- if export_format:
159
- fmt_upper = export_format.upper()
160
-
161
- if "JSON" in fmt_upper:
162
- export_report_json(final_report_data_dict, pdf_path, pdf_library)
163
-
164
- if "TXT" in fmt_upper:
165
- export_report_txt(report_buffer_str, pdf_path, pdf_library)
166
-
167
- report_results = {
168
- "data": final_report_data_dict, # The structured JSON-ready dict
169
- "text": report_buffer_str, # The human-readable string
200
+ intermediate_report_results = {
201
+ "data": report_data_dict, # The structured JSON-ready dict
202
+ "text": "",
170
203
  "metadata": { # Helpful for the GUI/Logs
171
204
  "pdf_name": Path(pdf_path).name,
172
205
  "library_used": pdf_library,
173
206
  "total_links": len(extracted_links)
174
207
  }
175
208
  }
209
+
210
+ log("\n--- Analysis Complete ---")
211
+
212
+ validation_results = run_validation(report_results=intermediate_report_results,
213
+ pdf_path=pdf_path,
214
+ pdf_library=pdf_library)
215
+ log(validation_results.get("summary-txt",""))
216
+ report_results = intermediate_report_results
217
+
218
+ # Final aggregation of the buffer into one string, after the last call to log()
219
+ report_buffer_str = "\n".join(report_buffer)
220
+
221
+ report_results["data"]["validation"].update(validation_results)
222
+ #report_results["text"].update(report_buffer_str) # The human-readable string
223
+ report_results["text"] = report_buffer_str
224
+
225
+ # 5. Export Report
226
+ #if export_format:
227
+ # # Assuming export_to will hold the output format string (e.g., "JSON")
228
+ # export_report_data(report_data_dict, Path(pdf_path).name, export_format, pdf_library)
229
+
230
+ if print_bool:
231
+ print(report_buffer_str)
232
+
176
233
  # Return a clean results object
177
234
  return report_results
235
+
178
236
  except Exception as e:
179
237
  # Specific handling for common read failures
180
238
  if "invalid pdf header" in str(e).lower() or "EOF marker not found" in str(e) or "stream has ended unexpectedly" in str(e):
@@ -194,44 +252,36 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
194
252
  }
195
253
  }
196
254
 
255
+ #except Exception as e:
256
+ # # Log the critical failure
257
+ # error_logger.error(f"Critical failure during run_report for {pdf_path}: {e}", exc_info=True)
258
+ # log(f"FATAL: Analysis failed. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
259
+ # raise # Allow the exception to propagate or handle gracefully
197
260
  except Exception as e:
198
- # Log the critical failure
199
261
  error_logger.error(f"Critical failure during run_report for {pdf_path}: {e}", exc_info=True)
200
- log(f"FATAL: Analysis failed. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
201
- raise # Allow the exception to propagate or handle gracefully
202
-
203
-
204
- def print_structural_toc_print(structural_toc:dict)->str|None:
205
- """
206
- Prints the structural TOC data (bookmarks/outline) in a clean,
207
- hierarchical, and readable console format.
208
-
209
- Args:
210
- structural_toc: A list of TOC dictionaries.
211
- """
212
- print("\n" + "=" * 70)
213
- print("## Structural Table of Contents (PDF Bookmarks/Outline)")
214
- print("=" * 70)
215
- if not structural_toc:
216
- print("No structural TOC (bookmarks/outline) found.")
217
- return
218
-
219
- # Determine max page width for consistent alignment (optional but nice)
220
- max_page = max(item['target_page'] for item in structural_toc) if structural_toc else 1
221
- page_width = len(str(max_page))
222
-
223
- # Iterate and format
224
- for item in structural_toc:
225
- # Use level for indentation (e.g., Level 1 = 0 spaces, Level 2 = 4 spaces, Level 3 = 8 spaces)
226
- indent = " " * 4 * (item['level'] - 1)
227
- # Format the title and target page number
228
- page_str = str(item['target_page']).rjust(page_width)
229
- print(f"{indent}{item['title']} . . . page {page_str}")
230
-
231
- print("-" * 70)
232
-
233
-
234
- def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
262
+ log(f"FATAL: Analysis failed: {str(e)}. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
263
+
264
+ # Always return a safe empty result on error
265
+ return {
266
+ "data": {
267
+ "external_links": [],
268
+ "internal_links": [],
269
+ "toc": [],
270
+ "validation": {}
271
+ },
272
+ "text": "\n".join(report_buffer + [
273
+ "\n--- Analysis failed ---",
274
+ f"Error: {str(e)}",
275
+ "No links or TOC extracted."
276
+ ]),
277
+ "metadata": {
278
+ "pdf_name": Path(pdf_path).name,
279
+ "library_used": pdf_library,
280
+ "total_links": 0
281
+ }
282
+ }
283
+
284
+ def get_structural_toc(structural_toc: list) -> str:
235
285
  """
236
286
  Formats the structural TOC data into a hierarchical string and optionally prints it.
237
287
 
@@ -243,16 +293,14 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
243
293
  A formatted string of the structural TOC.
244
294
  """
245
295
  lines = []
246
- lines.append("\n" + "=" * 70)
296
+ lines.append("\n" + "=" * SEP_COUNT)
247
297
  lines.append("## Structural Table of Contents (PDF Bookmarks/Outline)")
248
- lines.append("=" * 70)
298
+ lines.append("=" * SEP_COUNT)
249
299
 
250
300
  if not structural_toc:
251
301
  msg = "No structural TOC (bookmarks/outline) found."
252
302
  lines.append(msg)
253
303
  output = "\n".join(lines)
254
- if print_bool:
255
- print(output)
256
304
  return output
257
305
 
258
306
  # Determine max page width for consistent alignment
@@ -269,12 +317,36 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
269
317
 
270
318
  lines.append(f"{indent}{item['title']} . . . page {page_str}")
271
319
 
272
- lines.append("-" * 70)
320
+ lines.append("-" * SEP_COUNT)
273
321
 
274
322
  # Final aggregation
275
323
  str_structural_toc = "\n".join(lines)
276
-
277
- if print_bool:
278
- print(str_structural_toc)
279
324
 
280
325
  return str_structural_toc
326
+
327
+ if __name__ == "__main__":
328
+
329
+ from pdflinkcheck.io import get_first_pdf_in_cwd
330
+ pdf_path = get_first_pdf_in_cwd()
331
+ # Run analysis first
332
+
333
+ if pymupdf_is_available():
334
+ pdf_library = "pymupdf"
335
+ else:
336
+ pdf_library = "pypdf"
337
+ report = run_report(
338
+ pdf_path=pdf_path,
339
+ max_links=0,
340
+ export_format="",
341
+ pdf_library=pdf_library,
342
+ print_bool=True # We handle printing in validation
343
+ )
344
+
345
+ if not report or not report.get("data"):
346
+ print("No data extracted — nothing to validate.")
347
+ sys.exit(1)
348
+
349
+ else:
350
+ print("Success!")
351
+ print(f"list(report['data']) = {list(report['data'])}")
352
+
@@ -1,3 +1,5 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
1
3
  # src/pdflinkcheck/stdlib_server.py
2
4
  import http.server
3
5
  import socketserver
@@ -8,7 +10,7 @@ import os
8
10
  from pathlib import Path
9
11
  import email # This replaces cgi for multipart parsing
10
12
 
11
- from pdflinkcheck.report import run_report
13
+ from pdflinkcheck.report import run_report_and_call_exports
12
14
 
13
15
  PORT = 8000
14
16
 
@@ -17,8 +19,8 @@ HTML_FORM = """
17
19
  <html>
18
20
  <head><title>pdflinkcheck Stdlib Server</title></head>
19
21
  <body style="font-family: sans-serif; max-width: 800px; margin: 40px auto;">
20
- <h1>pdflinkcheck API (Pure Stdlib, without cgi)</h1>
21
- <p>Upload a PDF for link/TOC analysis. Zero third-party deps, future-proof.</p>
22
+ <h1>pdflinkcheck API (pure stdlib)</h1>
23
+ <p>Upload a PDF for link/TOC analysis.</p>
22
24
  <form action="/" method="post" enctype="multipart/form-data">
23
25
  <p><input type="file" name="file" accept=".pdf" required></p>
24
26
  <p>
@@ -33,9 +35,13 @@ HTML_FORM = """
33
35
  <input type="number" name="max_links" value="0" min="0">
34
36
  </p>
35
37
  <p><button type="submit">Analyze PDF</button></p>
38
+ <!--p>
39
+ <button type="submit" name="action" value="analyze">Analyze PDF</button>
40
+ <button type="submit" name="action" value="validate">Validate PDF</button>
41
+ </p-->
36
42
  </form>
37
43
  <hr>
38
- <p>Returns JSON. Works on Termux & Python 3.13+.</p>
44
+ <p>Returns JSON.</p>
39
45
  </body>
40
46
  </html>
41
47
  """
@@ -130,18 +136,20 @@ class PDFLinkCheckHandler(http.server.SimpleHTTPRequestHandler):
130
136
  tmp_file.write(file_item)
131
137
  tmp_path = tmp_file.name
132
138
 
133
- result = run_report(
139
+ result = run_report_and_call_exports(
134
140
  pdf_path=tmp_path,
135
141
  max_links=max_links if max_links > 0 else 0,
136
142
  export_format="",
137
143
  pdf_library=pdf_library,
138
144
  print_bool=False
139
145
  )
146
+ metadata = result.get("metadata", {"total_links": 0, "pdf_name": file_filename})
147
+ total_links = metadata.get("total_links", 0)
140
148
 
141
149
  response = {
142
150
  "filename": file_filename,
143
151
  "pdf_library_used": pdf_library,
144
- "total_links": result["metadata"]["total_links"],
152
+ "total_links": total_links,
145
153
  "data": result["data"],
146
154
  "text_report": result["text"]
147
155
  }
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+ from pdflinkcheck.version_info import get_version_from_pyproject
3
+
4
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
5
+
6
+ UNVERSIONED_MANIFEST = PROJECT_ROOT / "msix" / "AppxManifest_unversioned.xml"
7
+ OUTPUT_MANIFEST = PROJECT_ROOT / "msix" / "AppxManifest.xml"
8
+
9
+
10
+ PLACEHOLDER = "@@VERSION_PLACEHOLDER@@"
11
+
12
+
13
+ def generate_versioned_manifest(version):
14
+
15
+ # Pad to four parts: 1.1 -> 1.1.0.0, 1.1.92 -> 1.1.92.0
16
+ parts = version.split(".")
17
+ if len(parts) == 2:
18
+ parts += ["0", "0"]
19
+ elif len(parts) == 3:
20
+ parts.append("0")
21
+ elif len(parts) > 4:
22
+ raise ValueError(f"Version has too many parts: {version}")
23
+
24
+ msix_version = ".".join(parts[:4])
25
+
26
+ if not UNVERSIONED_MANIFEST.exists():
27
+ raise FileNotFoundError(f"Unversioned manifest not found: {UNVERSIONED_MANIFEST}")
28
+
29
+ text = UNVERSIONED_MANIFEST.read_text(encoding="utf-8")
30
+
31
+ placeholder_full = f'Version="{PLACEHOLDER}"'
32
+
33
+ if placeholder_full not in text:
34
+ raise ValueError(f"Placeholder {placeholder_full} not found in the unversioned manifest!")
35
+
36
+ updated_text = text.replace(placeholder_full, f'Version="{msix_version}"')
37
+
38
+ # Ensure the directory exists and write the new manifest
39
+ OUTPUT_MANIFEST.parent.mkdir(parents=True, exist_ok=True)
40
+ OUTPUT_MANIFEST.write_text(updated_text, encoding="utf-8")
41
+
42
+ print(f"Successfully generated AppxManifest.xml with version {msix_version}")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ version = get_version_from_pyproject()
47
+ generate_versioned_manifest(version)