pdflinkcheck 1.1.73__py3-none-any.whl → 1.1.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. pdflinkcheck/__init__.py +2 -5
  2. pdflinkcheck/analyze_pymupdf.py +12 -6
  3. pdflinkcheck/analyze_pypdf.py +25 -7
  4. pdflinkcheck/analyze_pypdf_v2.py +5 -6
  5. pdflinkcheck/cli.py +82 -91
  6. pdflinkcheck/data/I Have Questions.md +51 -0
  7. pdflinkcheck/data/LICENSE +17 -654
  8. pdflinkcheck/data/README.md +49 -49
  9. pdflinkcheck/data/icons/BoxArt-1080x1080.png +0 -0
  10. pdflinkcheck/data/icons/Logo-150x150.png +0 -0
  11. pdflinkcheck/data/icons/Logo-300x300.png +0 -0
  12. pdflinkcheck/data/icons/Logo-71x71.png +0 -0
  13. pdflinkcheck/data/icons/PosterArt-720x1080.png +0 -0
  14. pdflinkcheck/data/icons/SmallLogo-44x44.png +0 -0
  15. pdflinkcheck/data/icons/SplashScreen-620x300.png +0 -0
  16. pdflinkcheck/data/icons/StoreLogo-50x50.png +0 -0
  17. pdflinkcheck/data/icons/WideLogo-310x150.png +0 -0
  18. pdflinkcheck/data/icons/red_pdf_512px.ico +0 -0
  19. pdflinkcheck/data/pyproject.toml +20 -23
  20. pdflinkcheck/data/themes/forest/forest-dark/border-accent-hover.png +0 -0
  21. pdflinkcheck/data/themes/forest/forest-dark/border-accent.png +0 -0
  22. pdflinkcheck/data/themes/forest/forest-dark/border-basic.png +0 -0
  23. pdflinkcheck/data/themes/forest/forest-dark/border-hover.png +0 -0
  24. pdflinkcheck/data/themes/forest/forest-dark/border-invalid.png +0 -0
  25. pdflinkcheck/data/themes/forest/forest-dark/card.png +0 -0
  26. pdflinkcheck/data/themes/forest/forest-dark/check-accent.png +0 -0
  27. pdflinkcheck/data/themes/forest/forest-dark/check-basic.png +0 -0
  28. pdflinkcheck/data/themes/forest/forest-dark/check-hover.png +0 -0
  29. pdflinkcheck/data/themes/forest/forest-dark/check-tri-accent.png +0 -0
  30. pdflinkcheck/data/themes/forest/forest-dark/check-tri-basic.png +0 -0
  31. pdflinkcheck/data/themes/forest/forest-dark/check-tri-hover.png +0 -0
  32. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-accent.png +0 -0
  33. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-basic.png +0 -0
  34. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-hover.png +0 -0
  35. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-pressed.png +0 -0
  36. pdflinkcheck/data/themes/forest/forest-dark/combo-button-basic.png +0 -0
  37. pdflinkcheck/data/themes/forest/forest-dark/combo-button-focus.png +0 -0
  38. pdflinkcheck/data/themes/forest/forest-dark/combo-button-hover.png +0 -0
  39. pdflinkcheck/data/themes/forest/forest-dark/down.png +0 -0
  40. pdflinkcheck/data/themes/forest/forest-dark/empty.png +0 -0
  41. pdflinkcheck/data/themes/forest/forest-dark/hor-accent.png +0 -0
  42. pdflinkcheck/data/themes/forest/forest-dark/hor-basic.png +0 -0
  43. pdflinkcheck/data/themes/forest/forest-dark/hor-hover.png +0 -0
  44. pdflinkcheck/data/themes/forest/forest-dark/notebook.png +0 -0
  45. pdflinkcheck/data/themes/forest/forest-dark/off-accent.png +0 -0
  46. pdflinkcheck/data/themes/forest/forest-dark/off-basic.png +0 -0
  47. pdflinkcheck/data/themes/forest/forest-dark/off-hover.png +0 -0
  48. pdflinkcheck/data/themes/forest/forest-dark/on-accent.png +0 -0
  49. pdflinkcheck/data/themes/forest/forest-dark/on-basic.png +0 -0
  50. pdflinkcheck/data/themes/forest/forest-dark/on-hover.png +0 -0
  51. pdflinkcheck/data/themes/forest/forest-dark/radio-accent.png +0 -0
  52. pdflinkcheck/data/themes/forest/forest-dark/radio-basic.png +0 -0
  53. pdflinkcheck/data/themes/forest/forest-dark/radio-hover.png +0 -0
  54. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-accent.png +0 -0
  55. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-basic.png +0 -0
  56. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-hover.png +0 -0
  57. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-accent.png +0 -0
  58. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-basic.png +0 -0
  59. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-hover.png +0 -0
  60. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-pressed.png +0 -0
  61. pdflinkcheck/data/themes/forest/forest-dark/rect-accent-hover.png +0 -0
  62. pdflinkcheck/data/themes/forest/forest-dark/rect-accent.png +0 -0
  63. pdflinkcheck/data/themes/forest/forest-dark/rect-basic.png +0 -0
  64. pdflinkcheck/data/themes/forest/forest-dark/rect-hover.png +0 -0
  65. pdflinkcheck/data/themes/forest/forest-dark/right.png +0 -0
  66. pdflinkcheck/data/themes/forest/forest-dark/scale-hor.png +0 -0
  67. pdflinkcheck/data/themes/forest/forest-dark/scale-vert.png +0 -0
  68. pdflinkcheck/data/themes/forest/forest-dark/separator.png +0 -0
  69. pdflinkcheck/data/themes/forest/forest-dark/sizegrip.png +0 -0
  70. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-basic.png +0 -0
  71. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-focus.png +0 -0
  72. pdflinkcheck/data/themes/forest/forest-dark/spin-button-up.png +0 -0
  73. pdflinkcheck/data/themes/forest/forest-dark/tab-accent.png +0 -0
  74. pdflinkcheck/data/themes/forest/forest-dark/tab-basic.png +0 -0
  75. pdflinkcheck/data/themes/forest/forest-dark/tab-hover.png +0 -0
  76. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-accent.png +0 -0
  77. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-basic.png +0 -0
  78. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-hover.png +0 -0
  79. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-accent.png +0 -0
  80. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-basic.png +0 -0
  81. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-hover.png +0 -0
  82. pdflinkcheck/data/themes/forest/forest-dark/tree-basic.png +0 -0
  83. pdflinkcheck/data/themes/forest/forest-dark/tree-pressed.png +0 -0
  84. pdflinkcheck/data/themes/forest/forest-dark/up.png +0 -0
  85. pdflinkcheck/data/themes/forest/forest-dark/vert-accent.png +0 -0
  86. pdflinkcheck/data/themes/forest/forest-dark/vert-basic.png +0 -0
  87. pdflinkcheck/data/themes/forest/forest-dark/vert-hover.png +0 -0
  88. pdflinkcheck/data/themes/forest/forest-dark.tcl +536 -0
  89. pdflinkcheck/data/themes/forest/forest-light/border-accent-hover.png +0 -0
  90. pdflinkcheck/data/themes/forest/forest-light/border-accent.png +0 -0
  91. pdflinkcheck/data/themes/forest/forest-light/border-basic.png +0 -0
  92. pdflinkcheck/data/themes/forest/forest-light/border-hover.png +0 -0
  93. pdflinkcheck/data/themes/forest/forest-light/border-invalid.png +0 -0
  94. pdflinkcheck/data/themes/forest/forest-light/card.png +0 -0
  95. pdflinkcheck/data/themes/forest/forest-light/check-accent.png +0 -0
  96. pdflinkcheck/data/themes/forest/forest-light/check-basic.png +0 -0
  97. pdflinkcheck/data/themes/forest/forest-light/check-hover.png +0 -0
  98. pdflinkcheck/data/themes/forest/forest-light/check-tri-accent.png +0 -0
  99. pdflinkcheck/data/themes/forest/forest-light/check-tri-basic.png +0 -0
  100. pdflinkcheck/data/themes/forest/forest-light/check-tri-hover.png +0 -0
  101. pdflinkcheck/data/themes/forest/forest-light/check-unsel-accent.png +0 -0
  102. pdflinkcheck/data/themes/forest/forest-light/check-unsel-basic.png +0 -0
  103. pdflinkcheck/data/themes/forest/forest-light/check-unsel-hover.png +0 -0
  104. pdflinkcheck/data/themes/forest/forest-light/check-unsel-pressed.png +0 -0
  105. pdflinkcheck/data/themes/forest/forest-light/combo-button-basic.png +0 -0
  106. pdflinkcheck/data/themes/forest/forest-light/combo-button-focus.png +0 -0
  107. pdflinkcheck/data/themes/forest/forest-light/combo-button-hover.png +0 -0
  108. pdflinkcheck/data/themes/forest/forest-light/down-focus.png +0 -0
  109. pdflinkcheck/data/themes/forest/forest-light/down.png +0 -0
  110. pdflinkcheck/data/themes/forest/forest-light/empty.png +0 -0
  111. pdflinkcheck/data/themes/forest/forest-light/hor-accent.png +0 -0
  112. pdflinkcheck/data/themes/forest/forest-light/hor-basic.png +0 -0
  113. pdflinkcheck/data/themes/forest/forest-light/hor-hover.png +0 -0
  114. pdflinkcheck/data/themes/forest/forest-light/notebook.png +0 -0
  115. pdflinkcheck/data/themes/forest/forest-light/off-accent.png +0 -0
  116. pdflinkcheck/data/themes/forest/forest-light/off-basic.png +0 -0
  117. pdflinkcheck/data/themes/forest/forest-light/off-hover.png +0 -0
  118. pdflinkcheck/data/themes/forest/forest-light/on-accent.png +0 -0
  119. pdflinkcheck/data/themes/forest/forest-light/on-basic.png +0 -0
  120. pdflinkcheck/data/themes/forest/forest-light/on-hover.png +0 -0
  121. pdflinkcheck/data/themes/forest/forest-light/radio-accent.png +0 -0
  122. pdflinkcheck/data/themes/forest/forest-light/radio-basic.png +0 -0
  123. pdflinkcheck/data/themes/forest/forest-light/radio-hover.png +0 -0
  124. pdflinkcheck/data/themes/forest/forest-light/radio-tri-accent.png +0 -0
  125. pdflinkcheck/data/themes/forest/forest-light/radio-tri-basic.png +0 -0
  126. pdflinkcheck/data/themes/forest/forest-light/radio-tri-hover.png +0 -0
  127. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-accent.png +0 -0
  128. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-basic.png +0 -0
  129. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-hover.png +0 -0
  130. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-pressed.png +0 -0
  131. pdflinkcheck/data/themes/forest/forest-light/rect-accent-hover.png +0 -0
  132. pdflinkcheck/data/themes/forest/forest-light/rect-accent.png +0 -0
  133. pdflinkcheck/data/themes/forest/forest-light/rect-basic.png +0 -0
  134. pdflinkcheck/data/themes/forest/forest-light/rect-hover.png +0 -0
  135. pdflinkcheck/data/themes/forest/forest-light/right-focus.png +0 -0
  136. pdflinkcheck/data/themes/forest/forest-light/right.png +0 -0
  137. pdflinkcheck/data/themes/forest/forest-light/scale-hor.png +0 -0
  138. pdflinkcheck/data/themes/forest/forest-light/scale-vert.png +0 -0
  139. pdflinkcheck/data/themes/forest/forest-light/separator.png +0 -0
  140. pdflinkcheck/data/themes/forest/forest-light/sizegrip.png +0 -0
  141. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-basic.png +0 -0
  142. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-focus.png +0 -0
  143. pdflinkcheck/data/themes/forest/forest-light/spin-button-up.png +0 -0
  144. pdflinkcheck/data/themes/forest/forest-light/tab-accent.png +0 -0
  145. pdflinkcheck/data/themes/forest/forest-light/tab-basic.png +0 -0
  146. pdflinkcheck/data/themes/forest/forest-light/tab-hover.png +0 -0
  147. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-accent.png +0 -0
  148. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-basic.png +0 -0
  149. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-hover.png +0 -0
  150. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-accent.png +0 -0
  151. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-basic.png +0 -0
  152. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-hover.png +0 -0
  153. pdflinkcheck/data/themes/forest/forest-light/tree-basic.png +0 -0
  154. pdflinkcheck/data/themes/forest/forest-light/tree-pressed.png +0 -0
  155. pdflinkcheck/data/themes/forest/forest-light/up.png +0 -0
  156. pdflinkcheck/data/themes/forest/forest-light/vert-accent.png +0 -0
  157. pdflinkcheck/data/themes/forest/forest-light/vert-basic.png +0 -0
  158. pdflinkcheck/data/themes/forest/forest-light/vert-hover.png +0 -0
  159. pdflinkcheck/data/themes/forest/forest-light.tcl +544 -0
  160. pdflinkcheck/datacopy.py +2 -0
  161. pdflinkcheck/dev.py +10 -23
  162. pdflinkcheck/environment.py +64 -0
  163. pdflinkcheck/gui.py +229 -103
  164. pdflinkcheck/io.py +4 -18
  165. pdflinkcheck/report.py +148 -78
  166. pdflinkcheck/stdlib_server.py +14 -6
  167. pdflinkcheck/update_msix_version.py +47 -0
  168. pdflinkcheck/validate.py +50 -73
  169. pdflinkcheck/version_info.py +5 -2
  170. {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.1.94.dist-info}/METADATA +54 -52
  171. pdflinkcheck-1.1.94.dist-info/RECORD +176 -0
  172. pdflinkcheck-1.1.94.dist-info/licenses/LICENSE +24 -0
  173. pdflinkcheck-1.1.94.dist-info/licenses/LICENSE-MIT +9 -0
  174. pdflinkcheck-1.1.73.dist-info/RECORD +0 -21
  175. {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.1.94.dist-info}/WHEEL +0 -0
  176. {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.1.94.dist-info}/entry_points.txt +0 -0
  177. /pdflinkcheck-1.1.73.dist-info/licenses/LICENSE → /pdflinkcheck-1.1.94.dist-info/licenses/LICENSE-AGPL3 +0 -0
pdflinkcheck/report.py CHANGED
@@ -1,3 +1,5 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
1
3
  # pdflinkcheck/report.py
2
4
 
3
5
  import sys
@@ -6,11 +8,30 @@ from typing import Optional, Dict, Any
6
8
  import pyhabitat
7
9
 
8
10
  from pdflinkcheck.io import error_logger, export_report_json, export_report_txt, get_first_pdf_in_cwd, get_friendly_path, LOG_FILE_PATH
9
-
11
+ from pdflinkcheck.environment import pymupdf_is_available
12
+ from pdflinkcheck.validate import run_validation
10
13
 
11
14
  SEP_COUNT=28
15
+
16
+ def run_report_and_call_exports(pdf_path: str = None, max_links: int = 0, export_format: str = "JSON", pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
17
+ # The meat and potatoes
18
+ report_results = run_report_and_validtion(
19
+ pdf_path=str(pdf_path),
20
+ max_links=max_links,
21
+ pdf_library = pdf_library,
22
+ )
23
+ if export_format:
24
+ report_data_dict = report_results["data"]
25
+ report_buffer_str = report_results["text"]
26
+ if "JSON" in export_format.upper():
27
+ export_report_json(report_data_dict, pdf_path, pdf_library)
28
+
29
+ if "TXT" in export_format.upper():
30
+ export_report_txt(report_buffer_str, pdf_path, pdf_library)
31
+ return report_results
32
+
12
33
 
13
- def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "JSON", pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
34
+ def run_report_and_validtion(pdf_path: str = None, max_links: int = 0, pdf_library: str = "pypdf", print_bool:bool=True) -> Dict[str, Any]:
14
35
  """
15
36
  Core high-level PDF link analysis logic.
16
37
 
@@ -36,8 +57,8 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
36
57
 
37
58
  # Helper to handle conditional printing and mandatory buffering
38
59
  def log(msg: str):
39
- if print_bool: # this should not be here
40
- print(msg) # this should not be here. esure elsewhere then remove
60
+ if print_bool:
61
+ print(msg)
41
62
  report_buffer.append(msg)
42
63
 
43
64
  # Expected: "pypdf" or "PyMuPDF"
@@ -46,26 +67,38 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
46
67
  if pdf_library in allowed_libraries and pdf_library == "pypdf":
47
68
  from pdflinkcheck.analyze_pypdf import (extract_links_pypdf as extract_links, extract_toc_pypdf as extract_toc)
48
69
  elif pdf_library in allowed_libraries and pdf_library == "pymupdf":
49
- try:
50
- import fitz
51
- except ImportError:
70
+ if not pymupdf_is_available():
52
71
  print("PyMuPDF was explicitly requested as the PDF Engine")
53
- print("Use pypdf instead, or install PyMuPDF. ")
72
+ print("Switch the PDF library to 'pypdf' instead, or install PyMuPDF. ")
54
73
  print("To install PyMuPDF locally, try: `uv sync --extra full` OR `pip install .[full]`")
55
74
  if pyhabitat.on_termux():
56
75
  print(f"pyhabitat.on_termux() = {pyhabitat.on_termux()}")
57
76
  print("PyMuPDF is not expected to work on Termux. Use pypdf.")
58
77
  print("\n")
59
- return
78
+ #return
79
+ raise ImportError(f"The 'fitz' module is required for this functionality. Original error: {e}") from e
60
80
  from pdflinkcheck.analyze_pymupdf import (extract_links_pymupdf as extract_links, extract_toc_pymupdf as extract_toc)
61
81
 
62
82
  log("\n--- Starting Analysis ... ---\n")
63
- if pdf_path is None:
64
- pdf_path = get_first_pdf_in_cwd()
65
83
  if pdf_path is None:
66
84
  log("pdf_path is None")
67
85
  log("Tip: Drop a PDF in the current folder or pass in a path arg.")
68
- return
86
+ empty_report = {
87
+ "data": {
88
+ "external_links": [],
89
+ "internal_links": [],
90
+ "toc": []
91
+ },
92
+ "text": "\n".join(report_buffer),
93
+ "metadata": {
94
+ "pdf_name": Path(pdf_path).name,
95
+ "library_used": pdf_library,
96
+ "total_links": 0
97
+ }
98
+ }
99
+
100
+ return empty_report
101
+
69
102
  try:
70
103
  log(f"Target file: {get_friendly_path(pdf_path)}")
71
104
  log(f"PDF Engine: {pdf_library}")
@@ -80,7 +113,21 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
80
113
  if not extracted_links and not structural_toc:
81
114
  log(f"\nNo hyperlinks or structural TOC found in {Path(pdf_path).name}.")
82
115
  log("(This is common for scanned/image-only PDFs.)")
83
- return {}
116
+
117
+ empty_result = {
118
+ "data": {
119
+ "external_links": [],
120
+ "internal_links": [],
121
+ "toc": []
122
+ },
123
+ "text": "\n".join(report_buffer),
124
+ "metadata": {
125
+ "pdf_name": Path(pdf_path).name,
126
+ "library_used": pdf_library,
127
+ "total_links": 0
128
+ }
129
+ }
130
+ return empty_result
84
131
 
85
132
  # 3. Separate the lists based on the 'type' key
86
133
  uri_links = [link for link in extracted_links if link['type'] == 'External (URI)']
@@ -91,6 +138,8 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
91
138
  total_internal_links = len(goto_links) + len(resolved_action_links)
92
139
  limit = max_links if max_links > 0 else None
93
140
  uri_and_other = uri_links + other_links
141
+
142
+ str_structural_toc = get_structural_toc(structural_toc)
94
143
 
95
144
  # --- ANALYSIS SUMMARY (Using your print logic) ---
96
145
  log("\n" + "=" * SEP_COUNT)
@@ -100,7 +149,6 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
100
149
  log("=" * SEP_COUNT)
101
150
 
102
151
  # --- Section 1: TOC ---
103
- str_structural_toc = print_structural_toc(structural_toc)
104
152
  log(str_structural_toc)
105
153
 
106
154
  # --- Section 2: ACTIVE INTERNAL JUMPS ---
@@ -140,43 +188,51 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
140
188
  log(" No external or 'Other' links found.")
141
189
  log("-" * SEP_COUNT)
142
190
 
143
- log("\n--- Analysis Complete ---\n")
144
-
145
- # Final aggregation of the buffer into one string
146
- report_buffer_str = "\n".join(report_buffer)
147
191
 
148
192
  # Return the collected data for potential future JSON/other output
149
- final_report_data_dict = {
193
+ report_data_dict = {
150
194
  "external_links": uri_links,
151
195
  "internal_links": all_internal,
152
- "toc": structural_toc
196
+ "toc": structural_toc,
197
+ "validation": {}
153
198
  }
154
199
 
155
- # 5. Export Report
156
- #if export_format:
157
- # # Assuming export_to will hold the output format string (e.g., "JSON")
158
- # export_report_data(final_report_data_dict, Path(pdf_path).name, export_format, pdf_library)
159
-
160
- if export_format:
161
- fmt_upper = export_format.upper()
162
-
163
- if "JSON" in fmt_upper:
164
- export_report_json(final_report_data_dict, pdf_path, pdf_library)
165
-
166
- if "TXT" in fmt_upper:
167
- export_report_txt(report_buffer_str, pdf_path, pdf_library)
168
-
169
- report_results = {
170
- "data": final_report_data_dict, # The structured JSON-ready dict
171
- "text": report_buffer_str, # The human-readable string
200
+ intermediate_report_results = {
201
+ "data": report_data_dict, # The structured JSON-ready dict
202
+ "text": "",
172
203
  "metadata": { # Helpful for the GUI/Logs
173
204
  "pdf_name": Path(pdf_path).name,
174
205
  "library_used": pdf_library,
175
206
  "total_links": len(extracted_links)
176
207
  }
177
208
  }
209
+
210
+ log("\n--- Analysis Complete ---")
211
+
212
+ validation_results = run_validation(report_results=intermediate_report_results,
213
+ pdf_path=pdf_path,
214
+ pdf_library=pdf_library)
215
+ log(validation_results.get("summary-txt",""))
216
+ report_results = intermediate_report_results
217
+
218
+ # Final aggregation of the buffer into one string, after the last call to log()
219
+ report_buffer_str = "\n".join(report_buffer)
220
+
221
+ report_results["data"]["validation"].update(validation_results)
222
+ #report_results["text"].update(report_buffer_str) # The human-readable string
223
+ report_results["text"] = report_buffer_str
224
+
225
+ # 5. Export Report
226
+ #if export_format:
227
+ # # Assuming export_to will hold the output format string (e.g., "JSON")
228
+ # export_report_data(report_data_dict, Path(pdf_path).name, export_format, pdf_library)
229
+
230
+ if print_bool:
231
+ print(report_buffer_str)
232
+
178
233
  # Return a clean results object
179
234
  return report_results
235
+
180
236
  except Exception as e:
181
237
  # Specific handling for common read failures
182
238
  if "invalid pdf header" in str(e).lower() or "EOF marker not found" in str(e) or "stream has ended unexpectedly" in str(e):
@@ -196,44 +252,36 @@ def run_report(pdf_path: str = None, max_links: int = 0, export_format: str = "
196
252
  }
197
253
  }
198
254
 
255
+ #except Exception as e:
256
+ # # Log the critical failure
257
+ # error_logger.error(f"Critical failure during run_report for {pdf_path}: {e}", exc_info=True)
258
+ # log(f"FATAL: Analysis failed. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
259
+ # raise # Allow the exception to propagate or handle gracefully
199
260
  except Exception as e:
200
- # Log the critical failure
201
261
  error_logger.error(f"Critical failure during run_report for {pdf_path}: {e}", exc_info=True)
202
- log(f"FATAL: Analysis failed. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
203
- raise # Allow the exception to propagate or handle gracefully
204
-
205
-
206
- def print_structural_toc_print(structural_toc:dict)->str|None:
207
- """
208
- Prints the structural TOC data (bookmarks/outline) in a clean,
209
- hierarchical, and readable console format.
210
-
211
- Args:
212
- structural_toc: A list of TOC dictionaries.
213
- """
214
- print("\n" + "=" * SEP_COUNT)
215
- print("## Structural Table of Contents (PDF Bookmarks/Outline)")
216
- print("=" * SEP_COUNT)
217
- if not structural_toc:
218
- print("No structural TOC (bookmarks/outline) found.")
219
- return
220
-
221
- # Determine max page width for consistent alignment (optional but nice)
222
- max_page = max(item['target_page'] for item in structural_toc) if structural_toc else 1
223
- page_width = len(str(max_page))
224
-
225
- # Iterate and format
226
- for item in structural_toc:
227
- # Use level for indentation (e.g., Level 1 = 0 spaces, Level 2 = 4 spaces, Level 3 = 8 spaces)
228
- indent = " " * 4 * (item['level'] - 1)
229
- # Format the title and target page number
230
- page_str = str(item['target_page']).rjust(page_width)
231
- print(f"{indent}{item['title']} . . . page {page_str}")
232
-
233
- print("-" * SEP_COUNT)
234
-
235
-
236
- def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
262
+ log(f"FATAL: Analysis failed: {str(e)}. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
263
+
264
+ # Always return a safe empty result on error
265
+ return {
266
+ "data": {
267
+ "external_links": [],
268
+ "internal_links": [],
269
+ "toc": [],
270
+ "validation": {}
271
+ },
272
+ "text": "\n".join(report_buffer + [
273
+ "\n--- Analysis failed ---",
274
+ f"Error: {str(e)}",
275
+ "No links or TOC extracted."
276
+ ]),
277
+ "metadata": {
278
+ "pdf_name": Path(pdf_path).name,
279
+ "library_used": pdf_library,
280
+ "total_links": 0
281
+ }
282
+ }
283
+
284
+ def get_structural_toc(structural_toc: list) -> str:
237
285
  """
238
286
  Formats the structural TOC data into a hierarchical string and optionally prints it.
239
287
 
@@ -253,8 +301,6 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
253
301
  msg = "No structural TOC (bookmarks/outline) found."
254
302
  lines.append(msg)
255
303
  output = "\n".join(lines)
256
- if print_bool:
257
- print(output)
258
304
  return output
259
305
 
260
306
  # Determine max page width for consistent alignment
@@ -275,8 +321,32 @@ def print_structural_toc(structural_toc: list, print_bool: bool = False) -> str:
275
321
 
276
322
  # Final aggregation
277
323
  str_structural_toc = "\n".join(lines)
278
-
279
- if print_bool:
280
- print(str_structural_toc)
281
324
 
282
325
  return str_structural_toc
326
+
327
+ if __name__ == "__main__":
328
+
329
+ from pdflinkcheck.io import get_first_pdf_in_cwd
330
+ pdf_path = get_first_pdf_in_cwd()
331
+ # Run analysis first
332
+
333
+ if pymupdf_is_available():
334
+ pdf_library = "pymupdf"
335
+ else:
336
+ pdf_library = "pypdf"
337
+ report = run_report(
338
+ pdf_path=pdf_path,
339
+ max_links=0,
340
+ export_format="",
341
+ pdf_library=pdf_library,
342
+ print_bool=True # We handle printing in validation
343
+ )
344
+
345
+ if not report or not report.get("data"):
346
+ print("No data extracted — nothing to validate.")
347
+ sys.exit(1)
348
+
349
+ else:
350
+ print("Success!")
351
+ print(f"list(report['data']) = {list(report['data'])}")
352
+
@@ -1,3 +1,5 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
1
3
  # src/pdflinkcheck/stdlib_server.py
2
4
  import http.server
3
5
  import socketserver
@@ -8,7 +10,7 @@ import os
8
10
  from pathlib import Path
9
11
  import email # This replaces cgi for multipart parsing
10
12
 
11
- from pdflinkcheck.report import run_report
13
+ from pdflinkcheck.report import run_report_and_call_exports
12
14
 
13
15
  PORT = 8000
14
16
 
@@ -17,8 +19,8 @@ HTML_FORM = """
17
19
  <html>
18
20
  <head><title>pdflinkcheck Stdlib Server</title></head>
19
21
  <body style="font-family: sans-serif; max-width: 800px; margin: 40px auto;">
20
- <h1>pdflinkcheck API (Pure Stdlib, without cgi)</h1>
21
- <p>Upload a PDF for link/TOC analysis. Zero third-party deps, future-proof.</p>
22
+ <h1>pdflinkcheck API (pure stdlib)</h1>
23
+ <p>Upload a PDF for link/TOC analysis.</p>
22
24
  <form action="/" method="post" enctype="multipart/form-data">
23
25
  <p><input type="file" name="file" accept=".pdf" required></p>
24
26
  <p>
@@ -33,9 +35,13 @@ HTML_FORM = """
33
35
  <input type="number" name="max_links" value="0" min="0">
34
36
  </p>
35
37
  <p><button type="submit">Analyze PDF</button></p>
38
+ <!--p>
39
+ <button type="submit" name="action" value="analyze">Analyze PDF</button>
40
+ <button type="submit" name="action" value="validate">Validate PDF</button>
41
+ </p-->
36
42
  </form>
37
43
  <hr>
38
- <p>Returns JSON. Works on Termux & Python 3.13+.</p>
44
+ <p>Returns JSON.</p>
39
45
  </body>
40
46
  </html>
41
47
  """
@@ -130,18 +136,20 @@ class PDFLinkCheckHandler(http.server.SimpleHTTPRequestHandler):
130
136
  tmp_file.write(file_item)
131
137
  tmp_path = tmp_file.name
132
138
 
133
- result = run_report(
139
+ result = run_report_and_call_exports(
134
140
  pdf_path=tmp_path,
135
141
  max_links=max_links if max_links > 0 else 0,
136
142
  export_format="",
137
143
  pdf_library=pdf_library,
138
144
  print_bool=False
139
145
  )
146
+ metadata = result.get("metadata", {"total_links": 0, "pdf_name": file_filename})
147
+ total_links = metadata.get("total_links", 0)
140
148
 
141
149
  response = {
142
150
  "filename": file_filename,
143
151
  "pdf_library_used": pdf_library,
144
- "total_links": result["metadata"]["total_links"],
152
+ "total_links": total_links,
145
153
  "data": result["data"],
146
154
  "text_report": result["text"]
147
155
  }
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+ from pdflinkcheck.version_info import get_version_from_pyproject
3
+
4
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
5
+
6
+ UNVERSIONED_MANIFEST = PROJECT_ROOT / "msix" / "AppxManifest_unversioned.xml"
7
+ OUTPUT_MANIFEST = PROJECT_ROOT / "msix" / "AppxManifest.xml"
8
+
9
+
10
+ PLACEHOLDER = "@@VERSION_PLACEHOLDER@@"
11
+
12
+
13
+ def generate_versioned_manifest(version):
14
+
15
+ # Pad to four parts: 1.1 -> 1.1.0.0, 1.1.92 -> 1.1.92.0
16
+ parts = version.split(".")
17
+ if len(parts) == 2:
18
+ parts += ["0", "0"]
19
+ elif len(parts) == 3:
20
+ parts.append("0")
21
+ elif len(parts) > 4:
22
+ raise ValueError(f"Version has too many parts: {version}")
23
+
24
+ msix_version = ".".join(parts[:4])
25
+
26
+ if not UNVERSIONED_MANIFEST.exists():
27
+ raise FileNotFoundError(f"Unversioned manifest not found: {UNVERSIONED_MANIFEST}")
28
+
29
+ text = UNVERSIONED_MANIFEST.read_text(encoding="utf-8")
30
+
31
+ placeholder_full = f'Version="{PLACEHOLDER}"'
32
+
33
+ if placeholder_full not in text:
34
+ raise ValueError(f"Placeholder {placeholder_full} not found in the unversioned manifest!")
35
+
36
+ updated_text = text.replace(placeholder_full, f'Version="{msix_version}"')
37
+
38
+ # Ensure the directory exists and write the new manifest
39
+ OUTPUT_MANIFEST.parent.mkdir(parents=True, exist_ok=True)
40
+ OUTPUT_MANIFEST.write_text(updated_text, encoding="utf-8")
41
+
42
+ print(f"Successfully generated AppxManifest.xml with version {msix_version}")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ version = get_version_from_pyproject()
47
+ generate_versioned_manifest(version)