pdflinkcheck 1.1.73__py3-none-any.whl → 1.2.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. pdflinkcheck/__init__.py +88 -21
  2. pdflinkcheck/__main__.py +6 -0
  3. pdflinkcheck/analysis_pdfium.py +131 -0
  4. pdflinkcheck/{analyze_pymupdf.py → analysis_pymupdf.py} +109 -145
  5. pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} +67 -37
  6. pdflinkcheck/cli.py +111 -116
  7. pdflinkcheck/data/I Have Questions.md +51 -0
  8. pdflinkcheck/data/LICENSE +20 -654
  9. pdflinkcheck/data/README.md +65 -67
  10. pdflinkcheck/data/icons/BoxArt-1080x1080.png +0 -0
  11. pdflinkcheck/data/icons/Logo-150x150.png +0 -0
  12. pdflinkcheck/data/icons/Logo-300x300.png +0 -0
  13. pdflinkcheck/data/icons/Logo-71x71.png +0 -0
  14. pdflinkcheck/data/icons/PosterArt-720x1080.png +0 -0
  15. pdflinkcheck/data/icons/SmallLogo-44x44.png +0 -0
  16. pdflinkcheck/data/icons/SplashScreen-620x300.png +0 -0
  17. pdflinkcheck/data/icons/StoreLogo-50x50.png +0 -0
  18. pdflinkcheck/data/icons/WideLogo-310x150.png +0 -0
  19. pdflinkcheck/data/icons/red_pdf_512px.ico +0 -0
  20. pdflinkcheck/data/pyproject.toml +25 -37
  21. pdflinkcheck/data/themes/forest/forest-dark/border-accent-hover.png +0 -0
  22. pdflinkcheck/data/themes/forest/forest-dark/border-accent.png +0 -0
  23. pdflinkcheck/data/themes/forest/forest-dark/border-basic.png +0 -0
  24. pdflinkcheck/data/themes/forest/forest-dark/border-hover.png +0 -0
  25. pdflinkcheck/data/themes/forest/forest-dark/border-invalid.png +0 -0
  26. pdflinkcheck/data/themes/forest/forest-dark/card.png +0 -0
  27. pdflinkcheck/data/themes/forest/forest-dark/check-accent.png +0 -0
  28. pdflinkcheck/data/themes/forest/forest-dark/check-basic.png +0 -0
  29. pdflinkcheck/data/themes/forest/forest-dark/check-hover.png +0 -0
  30. pdflinkcheck/data/themes/forest/forest-dark/check-tri-accent.png +0 -0
  31. pdflinkcheck/data/themes/forest/forest-dark/check-tri-basic.png +0 -0
  32. pdflinkcheck/data/themes/forest/forest-dark/check-tri-hover.png +0 -0
  33. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-accent.png +0 -0
  34. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-basic.png +0 -0
  35. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-hover.png +0 -0
  36. pdflinkcheck/data/themes/forest/forest-dark/check-unsel-pressed.png +0 -0
  37. pdflinkcheck/data/themes/forest/forest-dark/combo-button-basic.png +0 -0
  38. pdflinkcheck/data/themes/forest/forest-dark/combo-button-focus.png +0 -0
  39. pdflinkcheck/data/themes/forest/forest-dark/combo-button-hover.png +0 -0
  40. pdflinkcheck/data/themes/forest/forest-dark/down.png +0 -0
  41. pdflinkcheck/data/themes/forest/forest-dark/empty.png +0 -0
  42. pdflinkcheck/data/themes/forest/forest-dark/hor-accent.png +0 -0
  43. pdflinkcheck/data/themes/forest/forest-dark/hor-basic.png +0 -0
  44. pdflinkcheck/data/themes/forest/forest-dark/hor-hover.png +0 -0
  45. pdflinkcheck/data/themes/forest/forest-dark/notebook.png +0 -0
  46. pdflinkcheck/data/themes/forest/forest-dark/off-accent.png +0 -0
  47. pdflinkcheck/data/themes/forest/forest-dark/off-basic.png +0 -0
  48. pdflinkcheck/data/themes/forest/forest-dark/off-hover.png +0 -0
  49. pdflinkcheck/data/themes/forest/forest-dark/on-accent.png +0 -0
  50. pdflinkcheck/data/themes/forest/forest-dark/on-basic.png +0 -0
  51. pdflinkcheck/data/themes/forest/forest-dark/on-hover.png +0 -0
  52. pdflinkcheck/data/themes/forest/forest-dark/radio-accent.png +0 -0
  53. pdflinkcheck/data/themes/forest/forest-dark/radio-basic.png +0 -0
  54. pdflinkcheck/data/themes/forest/forest-dark/radio-hover.png +0 -0
  55. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-accent.png +0 -0
  56. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-basic.png +0 -0
  57. pdflinkcheck/data/themes/forest/forest-dark/radio-tri-hover.png +0 -0
  58. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-accent.png +0 -0
  59. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-basic.png +0 -0
  60. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-hover.png +0 -0
  61. pdflinkcheck/data/themes/forest/forest-dark/radio-unsel-pressed.png +0 -0
  62. pdflinkcheck/data/themes/forest/forest-dark/rect-accent-hover.png +0 -0
  63. pdflinkcheck/data/themes/forest/forest-dark/rect-accent.png +0 -0
  64. pdflinkcheck/data/themes/forest/forest-dark/rect-basic.png +0 -0
  65. pdflinkcheck/data/themes/forest/forest-dark/rect-hover.png +0 -0
  66. pdflinkcheck/data/themes/forest/forest-dark/right.png +0 -0
  67. pdflinkcheck/data/themes/forest/forest-dark/scale-hor.png +0 -0
  68. pdflinkcheck/data/themes/forest/forest-dark/scale-vert.png +0 -0
  69. pdflinkcheck/data/themes/forest/forest-dark/separator.png +0 -0
  70. pdflinkcheck/data/themes/forest/forest-dark/sizegrip.png +0 -0
  71. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-basic.png +0 -0
  72. pdflinkcheck/data/themes/forest/forest-dark/spin-button-down-focus.png +0 -0
  73. pdflinkcheck/data/themes/forest/forest-dark/spin-button-up.png +0 -0
  74. pdflinkcheck/data/themes/forest/forest-dark/tab-accent.png +0 -0
  75. pdflinkcheck/data/themes/forest/forest-dark/tab-basic.png +0 -0
  76. pdflinkcheck/data/themes/forest/forest-dark/tab-hover.png +0 -0
  77. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-accent.png +0 -0
  78. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-basic.png +0 -0
  79. pdflinkcheck/data/themes/forest/forest-dark/thumb-hor-hover.png +0 -0
  80. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-accent.png +0 -0
  81. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-basic.png +0 -0
  82. pdflinkcheck/data/themes/forest/forest-dark/thumb-vert-hover.png +0 -0
  83. pdflinkcheck/data/themes/forest/forest-dark/tree-basic.png +0 -0
  84. pdflinkcheck/data/themes/forest/forest-dark/tree-pressed.png +0 -0
  85. pdflinkcheck/data/themes/forest/forest-dark/up.png +0 -0
  86. pdflinkcheck/data/themes/forest/forest-dark/vert-accent.png +0 -0
  87. pdflinkcheck/data/themes/forest/forest-dark/vert-basic.png +0 -0
  88. pdflinkcheck/data/themes/forest/forest-dark/vert-hover.png +0 -0
  89. pdflinkcheck/data/themes/forest/forest-dark.tcl +536 -0
  90. pdflinkcheck/data/themes/forest/forest-light/border-accent-hover.png +0 -0
  91. pdflinkcheck/data/themes/forest/forest-light/border-accent.png +0 -0
  92. pdflinkcheck/data/themes/forest/forest-light/border-basic.png +0 -0
  93. pdflinkcheck/data/themes/forest/forest-light/border-hover.png +0 -0
  94. pdflinkcheck/data/themes/forest/forest-light/border-invalid.png +0 -0
  95. pdflinkcheck/data/themes/forest/forest-light/card.png +0 -0
  96. pdflinkcheck/data/themes/forest/forest-light/check-accent.png +0 -0
  97. pdflinkcheck/data/themes/forest/forest-light/check-basic.png +0 -0
  98. pdflinkcheck/data/themes/forest/forest-light/check-hover.png +0 -0
  99. pdflinkcheck/data/themes/forest/forest-light/check-tri-accent.png +0 -0
  100. pdflinkcheck/data/themes/forest/forest-light/check-tri-basic.png +0 -0
  101. pdflinkcheck/data/themes/forest/forest-light/check-tri-hover.png +0 -0
  102. pdflinkcheck/data/themes/forest/forest-light/check-unsel-accent.png +0 -0
  103. pdflinkcheck/data/themes/forest/forest-light/check-unsel-basic.png +0 -0
  104. pdflinkcheck/data/themes/forest/forest-light/check-unsel-hover.png +0 -0
  105. pdflinkcheck/data/themes/forest/forest-light/check-unsel-pressed.png +0 -0
  106. pdflinkcheck/data/themes/forest/forest-light/combo-button-basic.png +0 -0
  107. pdflinkcheck/data/themes/forest/forest-light/combo-button-focus.png +0 -0
  108. pdflinkcheck/data/themes/forest/forest-light/combo-button-hover.png +0 -0
  109. pdflinkcheck/data/themes/forest/forest-light/down-focus.png +0 -0
  110. pdflinkcheck/data/themes/forest/forest-light/down.png +0 -0
  111. pdflinkcheck/data/themes/forest/forest-light/empty.png +0 -0
  112. pdflinkcheck/data/themes/forest/forest-light/hor-accent.png +0 -0
  113. pdflinkcheck/data/themes/forest/forest-light/hor-basic.png +0 -0
  114. pdflinkcheck/data/themes/forest/forest-light/hor-hover.png +0 -0
  115. pdflinkcheck/data/themes/forest/forest-light/notebook.png +0 -0
  116. pdflinkcheck/data/themes/forest/forest-light/off-accent.png +0 -0
  117. pdflinkcheck/data/themes/forest/forest-light/off-basic.png +0 -0
  118. pdflinkcheck/data/themes/forest/forest-light/off-hover.png +0 -0
  119. pdflinkcheck/data/themes/forest/forest-light/on-accent.png +0 -0
  120. pdflinkcheck/data/themes/forest/forest-light/on-basic.png +0 -0
  121. pdflinkcheck/data/themes/forest/forest-light/on-hover.png +0 -0
  122. pdflinkcheck/data/themes/forest/forest-light/radio-accent.png +0 -0
  123. pdflinkcheck/data/themes/forest/forest-light/radio-basic.png +0 -0
  124. pdflinkcheck/data/themes/forest/forest-light/radio-hover.png +0 -0
  125. pdflinkcheck/data/themes/forest/forest-light/radio-tri-accent.png +0 -0
  126. pdflinkcheck/data/themes/forest/forest-light/radio-tri-basic.png +0 -0
  127. pdflinkcheck/data/themes/forest/forest-light/radio-tri-hover.png +0 -0
  128. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-accent.png +0 -0
  129. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-basic.png +0 -0
  130. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-hover.png +0 -0
  131. pdflinkcheck/data/themes/forest/forest-light/radio-unsel-pressed.png +0 -0
  132. pdflinkcheck/data/themes/forest/forest-light/rect-accent-hover.png +0 -0
  133. pdflinkcheck/data/themes/forest/forest-light/rect-accent.png +0 -0
  134. pdflinkcheck/data/themes/forest/forest-light/rect-basic.png +0 -0
  135. pdflinkcheck/data/themes/forest/forest-light/rect-hover.png +0 -0
  136. pdflinkcheck/data/themes/forest/forest-light/right-focus.png +0 -0
  137. pdflinkcheck/data/themes/forest/forest-light/right.png +0 -0
  138. pdflinkcheck/data/themes/forest/forest-light/scale-hor.png +0 -0
  139. pdflinkcheck/data/themes/forest/forest-light/scale-vert.png +0 -0
  140. pdflinkcheck/data/themes/forest/forest-light/separator.png +0 -0
  141. pdflinkcheck/data/themes/forest/forest-light/sizegrip.png +0 -0
  142. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-basic.png +0 -0
  143. pdflinkcheck/data/themes/forest/forest-light/spin-button-down-focus.png +0 -0
  144. pdflinkcheck/data/themes/forest/forest-light/spin-button-up.png +0 -0
  145. pdflinkcheck/data/themes/forest/forest-light/tab-accent.png +0 -0
  146. pdflinkcheck/data/themes/forest/forest-light/tab-basic.png +0 -0
  147. pdflinkcheck/data/themes/forest/forest-light/tab-hover.png +0 -0
  148. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-accent.png +0 -0
  149. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-basic.png +0 -0
  150. pdflinkcheck/data/themes/forest/forest-light/thumb-hor-hover.png +0 -0
  151. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-accent.png +0 -0
  152. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-basic.png +0 -0
  153. pdflinkcheck/data/themes/forest/forest-light/thumb-vert-hover.png +0 -0
  154. pdflinkcheck/data/themes/forest/forest-light/tree-basic.png +0 -0
  155. pdflinkcheck/data/themes/forest/forest-light/tree-pressed.png +0 -0
  156. pdflinkcheck/data/themes/forest/forest-light/up.png +0 -0
  157. pdflinkcheck/data/themes/forest/forest-light/vert-accent.png +0 -0
  158. pdflinkcheck/data/themes/forest/forest-light/vert-basic.png +0 -0
  159. pdflinkcheck/data/themes/forest/forest-light/vert-hover.png +0 -0
  160. pdflinkcheck/data/themes/forest/forest-light.tcl +544 -0
  161. pdflinkcheck/datacopy.py +18 -1
  162. pdflinkcheck/dev.py +12 -25
  163. pdflinkcheck/environment.py +76 -0
  164. pdflinkcheck/gui.py +366 -457
  165. pdflinkcheck/helpers.py +88 -0
  166. pdflinkcheck/io.py +27 -23
  167. pdflinkcheck/report.py +692 -121
  168. pdflinkcheck/security.py +189 -0
  169. pdflinkcheck/splash.py +38 -0
  170. pdflinkcheck/stdlib_server.py +14 -20
  171. pdflinkcheck/stdlib_server_alt.py +571 -0
  172. pdflinkcheck/tk_utils.py +188 -0
  173. pdflinkcheck/update_msix_version.py +49 -0
  174. pdflinkcheck/validate.py +129 -218
  175. pdflinkcheck/version_info.py +6 -3
  176. {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.2.29.dist-info}/METADATA +84 -81
  177. pdflinkcheck-1.2.29.dist-info/RECORD +183 -0
  178. pdflinkcheck-1.2.29.dist-info/WHEEL +5 -0
  179. {pdflinkcheck-1.1.73.dist-info → pdflinkcheck-1.2.29.dist-info}/entry_points.txt +0 -1
  180. pdflinkcheck-1.2.29.dist-info/licenses/LICENSE +27 -0
  181. pdflinkcheck-1.2.29.dist-info/licenses/LICENSE-MIT +9 -0
  182. pdflinkcheck-1.2.29.dist-info/top_level.txt +1 -0
  183. pdflinkcheck/analyze_pypdf_v2.py +0 -218
  184. pdflinkcheck-1.1.73.dist-info/RECORD +0 -21
  185. pdflinkcheck-1.1.73.dist-info/WHEEL +0 -4
  186. /pdflinkcheck-1.1.73.dist-info/licenses/LICENSE → /pdflinkcheck-1.2.29.dist-info/licenses/LICENSE-AGPL3 +0 -0
@@ -0,0 +1,189 @@
1
+ """
2
+ pdflinkcheck.security
3
+
4
+ Offline, deterministic link‑risk scoring for PDF hyperlinks.
5
+
6
+ This module intentionally avoids any heuristics that depend on PDF text
7
+ extraction quality (e.g., anchor text analysis), because real‑world PDFs
8
+ often contain inconsistent OCR output, concatenated strings, or placeholder
9
+ text. Only URL‑structure‑based signals are used.
10
+
11
+ Stable, low‑maintenance, and fully offline.
12
+ """
13
+
14
+ from __future__ import annotations
15
+ from dataclasses import dataclass, asdict
16
+ from urllib.parse import urlparse, parse_qs
17
+ import ipaddress
18
+ from typing import List, Dict, Optional
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Static rule tables (embedded; no external files)
23
+ # ---------------------------------------------------------------------------
24
+
25
+ # Top level domain (tld)
26
+ SUSPICIOUS_TLDS = {
27
+ "xyz", "top", "click", "link", "rest", "gq", "ml", "cf", "tk"
28
+ }
29
+
30
+ # Tracking parameters
31
+ """
32
+ These parameters collectively allow detailed attribution of website traffic and conversions:
33
+ - **utm_** parameters are universal for tracking campaigns across all traffic sources.
34
+ - **fbclid** and **gclid** are platform-specific identifiers for Facebook and Google Ads.
35
+ - **mc_eid** is specific to email marketing, like Mailchimp campaigns.
36
+ """
37
+ TRACKING_PARAMS = {
38
+ "utm_source", "utm_medium", "utm_campaign",
39
+ "fbclid", "gclid", "mc_eid"
40
+ }
41
+
42
+ # Minimal homoglyph table (expandable)
43
+ """
44
+ "а" → Latin "a" (Cyrillic small letter a, U+0430 vs Latin a U+0061)
45
+ "е" → Latin "e" (Cyrillic small letter ie, U+0435 vs Latin e U+0065)
46
+ "і" → Latin "i" (Cyrillic small letter i, U+0456 vs Latin i U+0069)
47
+ "ο" → Latin "o" (Greek small omicron, U+03BF vs Latin o U+006F)
48
+ "р" → Latin "p" (Cyrillic small er, U+0440 vs Latin p U+0070)
49
+ "ѕ" → Latin "s" (Cyrillic small letter dze, U+0455 vs Latin s U+0073)
50
+ "у" → Latin "y" (Cyrillic small letter u, U+0443 vs Latin y U+0079)
51
+
52
+ These characters have distinct Unicode code points from their Latin lookalikes
53
+ but are visually nearly identical, making them classic homoglyphs.
54
+ The purpose of such mappings is often to detect or simulate homoglyph attacks,
55
+ such as phishing domains, email spoofing, or source code obfuscation,
56
+ where attackers substitute visually similar characters from alternate scripts to deceive users or systems.
57
+ """
58
+ HOMOGLYPHS = {
59
+ "а": "a", # Cyrillic
60
+ "е": "e",
61
+ "і": "i",
62
+ "ο": "o",
63
+ "р": "p",
64
+ "ѕ": "s",
65
+ "у": "y",
66
+ }
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Data structures
70
+ # ---------------------------------------------------------------------------
71
+
72
+ @dataclass
73
+ class RiskReason:
74
+ rule_id: str
75
+ description: str
76
+ weight: int
77
+
78
+
79
+ @dataclass
80
+ class LinkRiskResult:
81
+ url: str
82
+ score: int
83
+ level: str
84
+ reasons: List[RiskReason]
85
+
86
+ def to_dict(self) -> Dict[str, object]:
87
+ d = asdict(self)
88
+ d["reasons"] = [asdict(r) for r in self.reasons]
89
+ return d
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Helper functions
94
+ # ---------------------------------------------------------------------------
95
+
96
+ def _is_ip(host: str) -> bool:
97
+ try:
98
+ ipaddress.ip_address(host)
99
+ return True
100
+ except Exception:
101
+ return False
102
+
103
+
104
+ def _contains_homoglyphs(s: str) -> bool:
105
+ return any(ch in HOMOGLYPHS for ch in s)
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Core scoring function (URL‑structure‑based only)
110
+ # ---------------------------------------------------------------------------
111
+
112
+ def score_link(url: str) -> LinkRiskResult:
113
+ reasons: List[RiskReason] = []
114
+ score = 0
115
+
116
+ parsed = urlparse(url)
117
+ host = parsed.hostname or ""
118
+ query = parsed.query or ""
119
+
120
+ # IP‑based URL
121
+ if _is_ip(host):
122
+ reasons.append(RiskReason("ip_host", "URL uses a raw IP address.", 3))
123
+ score += 3
124
+
125
+ # Suspicious TLD
126
+ if "." in host:
127
+ tld = host.rsplit(".", 1)[-1].lower()
128
+ if tld in SUSPICIOUS_TLDS:
129
+ reasons.append(RiskReason("suspicious_tld", f"TLD '.{tld}' is commonly abused.", 2))
130
+ score += 2
131
+
132
+ # Non‑standard port
133
+ if parsed.port not in (None, 80, 443):
134
+ reasons.append(RiskReason("nonstandard_port", f"Non‑standard port {parsed.port}.", 2))
135
+ score += 2
136
+
137
+ # Long URL
138
+ if len(url) > 200:
139
+ reasons.append(RiskReason("long_url", "URL is unusually long.", 1))
140
+ score += 1
141
+
142
+ # Tracking parameters
143
+ params = parse_qs(query)
144
+ tracking_hits = sum(1 for p in params if p.lower() in TRACKING_PARAMS)
145
+ if tracking_hits:
146
+ reasons.append(RiskReason("tracking_params", f"{tracking_hits} tracking parameters found.", 1))
147
+ score += 1
148
+
149
+ # Homoglyph detection
150
+ if _contains_homoglyphs(host + parsed.path):
151
+ reasons.append(RiskReason("homoglyph_suspected", "URL contains homoglyph characters.", 3))
152
+ score += 3
153
+ # Risk level mapping
154
+ if score == 0:
155
+ level = "none"
156
+ elif score <= 2:
157
+ level = "low"
158
+ elif score <= 6:
159
+ level = "medium"
160
+ else:
161
+ level = "high"
162
+
163
+
164
+ return LinkRiskResult(url, score, level, reasons)
165
+
166
+
167
+ # ---------------------------------------------------------------------------
168
+ # Report‑level risk computation (mirrors validate.py)
169
+ # ---------------------------------------------------------------------------
170
+
171
+ def compute_risk(report: Dict[str, object]) -> Dict[str, object]:
172
+ external_links = report.get("data", {}).get("external_links", [])
173
+ results = []
174
+
175
+ for link in external_links:
176
+ url = link.get("url") or link.get("remote_file") or link.get("target")
177
+ if url:
178
+ results.append(score_link(url).to_dict())
179
+
180
+ return {
181
+ "risk_summary": {
182
+ "total_external": len(external_links),
183
+ "scored": len(results),
184
+ "high_risk": sum(1 for r in results if r["level"] == "high"),
185
+ "medium_risk": sum(1 for r in results if r["level"] == "medium"),
186
+ "low_risk": sum(1 for r in results if r["level"] == "low"),
187
+ },
188
+ "risk_details": results
189
+ }
pdflinkcheck/splash.py ADDED
@@ -0,0 +1,38 @@
1
+ # src/pdflinkcheck/splash.py
2
+ import tkinter as tk
3
+ from tkinter import ttk
4
+ from pdflinkcheck.tk_utils import center_window_on_primary
5
+
6
+ class SplashFrame:
7
+
8
+ def __init__(self, parent):
9
+ self.top = tk.Toplevel(parent)
10
+ self.top.withdraw()
11
+ self.top.overrideredirect(True)
12
+ self.top.configure(bg="#2b2b2b")
13
+
14
+ # 1. Define dimensions
15
+ width, height = 300, 80
16
+ # Use generalized centering
17
+ #center_window_on_primary(self.top, width, height)
18
+
19
+
20
+ # UI Components
21
+ tk.Label(self.top, text="PDF LINK CHECK", fg="white", bg="#2b2b2b",
22
+ font=("Arial", 12, "bold")).pack(pady=(15, 5))
23
+
24
+ self.progress = ttk.Progressbar(self.top, mode='indeterminate', length=250)
25
+ self.progress.pack(pady=10, padx=20)
26
+ self.progress.start(15)
27
+
28
+ # Force the OS to acknowledge the window's existence
29
+ self.top.update_idletasks()
30
+
31
+ # Center and then reveal
32
+ center_window_on_primary(self.top, width, height)
33
+ self.top.deiconify()
34
+
35
+ def teardown(self):
36
+ """Cleanly shutdown the splash window."""
37
+ self.progress.stop()
38
+ self.top.destroy()
@@ -1,4 +1,7 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
1
3
  # src/pdflinkcheck/stdlib_server.py
4
+ from __future__ import annotations
2
5
  import http.server
3
6
  import socketserver
4
7
  import json
@@ -8,7 +11,7 @@ import os
8
11
  from pathlib import Path
9
12
  import email # This replaces cgi for multipart parsing
10
13
 
11
- from pdflinkcheck.report import run_report
14
+ from pdflinkcheck.report import run_report_and_call_exports
12
15
 
13
16
  PORT = 8000
14
17
 
@@ -17,25 +20,22 @@ HTML_FORM = """
17
20
  <html>
18
21
  <head><title>pdflinkcheck Stdlib Server</title></head>
19
22
  <body style="font-family: sans-serif; max-width: 800px; margin: 40px auto;">
20
- <h1>pdflinkcheck API (Pure Stdlib, without cgi)</h1>
21
- <p>Upload a PDF for link/TOC analysis. Zero third-party deps, future-proof.</p>
23
+ <h1>pdflinkcheck API (pure stdlib)</h1>
24
+ <p>Upload a PDF for link/TOC analysis.</p>
22
25
  <form action="/" method="post" enctype="multipart/form-data">
23
26
  <p><input type="file" name="file" accept=".pdf" required></p>
24
27
  <p>
25
28
  <label>Engine:</label>
26
29
  <select name="pdf_library">
27
30
  <option value="pypdf" selected>pypdf (pure Python, Termux-friendly)</option>
28
- <option value="pymupdf">pymupdf (faster, if installed)</option>
31
+ <option value="pymupdf">PyMyPPD (fast, AGPL3)</option>
32
+ <option value="pdfium">PDFium (fast, permissive)</option>
29
33
  </select>
30
34
  </p>
31
- <p>
32
- <label>Max links to show (0 = all):</label>
33
- <input type="number" name="max_links" value="0" min="0">
34
- </p>
35
35
  <p><button type="submit">Analyze PDF</button></p>
36
36
  </form>
37
37
  <hr>
38
- <p>Returns JSON. Works on Termux & Python 3.13+.</p>
38
+ <p>Returns JSON.</p>
39
39
  </body>
40
40
  </html>
41
41
  """
@@ -90,7 +90,6 @@ class PDFLinkCheckHandler(http.server.SimpleHTTPRequestHandler):
90
90
  # Extract parts
91
91
  file_item = None
92
92
  pdf_library = "pypdf"
93
- max_links = 0
94
93
 
95
94
  for part in msg.get_payload():
96
95
  disposition = part.get("Content-Disposition", "")
@@ -109,16 +108,10 @@ class PDFLinkCheckHandler(http.server.SimpleHTTPRequestHandler):
109
108
 
110
109
  elif name == "pdf_library":
111
110
  pdf_library = part.get_payload(decode=True).decode().lower()
112
- if pdf_library not in {"pypdf", "pymupdf"}:
111
+ if pdf_library not in {"pypdf", "pymupdf", "pdfium"}:
113
112
  self._send_json_error("Invalid pdf_library", 400)
114
113
  return
115
114
 
116
- elif name == "max_links":
117
- try:
118
- max_links = int(part.get_payload(decode=True).decode())
119
- except ValueError:
120
- max_links = 0
121
-
122
115
  if not file_item:
123
116
  self._send_json_error("No PDF file uploaded", 400)
124
117
  return
@@ -130,18 +123,19 @@ class PDFLinkCheckHandler(http.server.SimpleHTTPRequestHandler):
130
123
  tmp_file.write(file_item)
131
124
  tmp_path = tmp_file.name
132
125
 
133
- result = run_report(
126
+ result = run_report_and_call_exports(
134
127
  pdf_path=tmp_path,
135
- max_links=max_links if max_links > 0 else 0,
136
128
  export_format="",
137
129
  pdf_library=pdf_library,
138
130
  print_bool=False
139
131
  )
132
+
133
+ total_links_count = result.get("metadata",{}).get("link_counts",{}).get("total_links_count", 0)
140
134
 
141
135
  response = {
142
136
  "filename": file_filename,
143
137
  "pdf_library_used": pdf_library,
144
- "total_links": result["metadata"]["total_links"],
138
+ "total_links_count": total_links_count,
145
139
  "data": result["data"],
146
140
  "text_report": result["text"]
147
141
  }