pdflinkcheck 1.1.7__py3-none-any.whl → 1.1.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdflinkcheck/gui.py CHANGED
@@ -1,8 +1,10 @@
1
1
  # src/pdflinkcheck/gui.py
2
2
  import tkinter as tk
3
- from tkinter import filedialog, ttk
3
+ from tkinter import filedialog, ttk, messagebox # Added messagebox
4
4
  import sys
5
5
  from pathlib import Path
6
+ from typing import Optional # Added Optional
7
+ from importlib.resources import files
6
8
 
7
9
  # Import the core analysis function
8
10
  from pdflinkcheck.analyze import run_analysis
@@ -16,60 +18,140 @@ class RedirectText:
16
18
  """Insert the incoming string into the Text widget."""
17
19
  self.text_widget.insert(tk.END, string)
18
20
  self.text_widget.see(tk.END) # Scroll to the end
19
- self.text_widget.update_idletasks() # Refresh GUI
21
+ ## self.text_widget.update_idletasks() # Refresh GUI << Suppress: The mainloop will handle updates efficiently without forcing them.
20
22
 
21
- def flush(self):
23
+ def flush(self, *args):
22
24
  """Required for file-like objects, but does nothing here."""
23
25
  pass
24
26
 
25
27
  class PDFLinkCheckerApp(tk.Tk):
26
28
  def __init__(self):
27
29
  super().__init__()
28
- self.title("PDF Link Checker")
30
+ self.title("PDF Link Check")
29
31
  self.geometry("800x600")
30
32
 
31
33
  # Style for the application
32
34
  style = ttk.Style(self)
33
35
  style.theme_use('clam')
34
36
 
37
+ # --- 1. Initialize Variables ---
35
38
  self.pdf_path = tk.StringVar(value="")
36
- self.check_remnants_var = tk.BooleanVar(value=True)
39
+ self.check_remnants_var = tk.BooleanVar(value=True)
37
40
  self.max_links_var = tk.StringVar(value="50")
38
- self.show_all_links_var = tk.BooleanVar(value=False)
41
+ self.show_all_links_var = tk.BooleanVar(value=True)
42
+ self.export_report_format_var = tk.StringVar(value="JSON")
43
+ self.do_export_report_var = tk.BooleanVar(value=True)
44
+
45
+ self.supported_export_formats = ["JSON", "MD", "TXT"]
46
+ self.supported_export_formats = ["JSON"]
47
+
39
48
 
49
+ # --- 2. Create Widgets ---
40
50
  self._create_widgets()
51
+
52
+ # --- 3. Set Initial Dependent Widget States ---
53
+ self._toggle_max_links_entry()
54
+ self._toggle_export_report()
55
+
56
+
57
+ def _show_license(self):
58
+ """
59
+ Reads the embedded LICENSE file (AGPLv3) and displays its content in a new modal window.
60
+ """
61
+ try:
62
+ # CORRECT WAY: Use the Traversable object's read_text() method.
63
+ # This handles files located inside zip archives (.pyz, pipx venvs) correctly.
64
+ license_path_traversable = files("pdflinkcheck.data") / "LICENSE"
65
+ license_content = license_path_traversable.read_text(encoding="utf-8")
66
+
67
+ except FileNotFoundError:
68
+ messagebox.showerror(
69
+ "License Error",
70
+ "LICENSE file not found within the installation package (pdflinkcheck.data/LICENSE). Check build process."
71
+ )
72
+ return
73
+ except Exception as e:
74
+ messagebox.showerror("Read Error", f"Failed to read embedded LICENSE file: {e}")
75
+ return
76
+
77
+ # --- Display in a New Toplevel Window ---
78
+ license_window = tk.Toplevel(self)
79
+ license_window.title("Software License")
80
+ license_window.geometry("600x400")
81
+
82
+ # Text widget for content
83
+ text_widget = tk.Text(license_window, wrap=tk.WORD, font=('Monospace', 10), padx=10, pady=10)
84
+ text_widget.insert(tk.END, license_content)
85
+ text_widget.config(state=tk.DISABLED)
86
+
87
+ # Scrollbar
88
+ scrollbar = ttk.Scrollbar(license_window, command=text_widget.yview)
89
+ text_widget['yscrollcommand'] = scrollbar.set
90
+
91
+ # Layout
92
+ scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
93
+ text_widget.pack(fill='both', expand=True)
94
+
95
+ # Make the window modal (optional, but good practice for notices)
96
+ license_window.transient(self)
97
+ license_window.grab_set()
98
+ self.wait_window(license_window)
41
99
 
42
100
  def _create_widgets(self):
43
101
  # --- Control Frame (Top) ---
44
102
  control_frame = ttk.Frame(self, padding="10")
45
103
  control_frame.pack(fill='x')
46
104
 
47
- # File Selection
105
+ # Row 0: File Selection
48
106
  ttk.Label(control_frame, text="PDF Path:").grid(row=0, column=0, padx=5, pady=5, sticky='w')
49
107
  ttk.Entry(control_frame, textvariable=self.pdf_path, width=60).grid(row=0, column=1, padx=5, pady=5, sticky='ew')
50
108
  ttk.Button(control_frame, text="Browse...", command=self._select_pdf).grid(row=0, column=2, padx=5, pady=5)
51
109
 
52
- # Options
110
+ # Row 1: Remnants and Max Links Label/Entry
53
111
  ttk.Checkbutton(
54
112
  control_frame,
55
113
  text="Check for Remnants (URLs/Emails)",
56
114
  variable=self.check_remnants_var
57
115
  ).grid(row=1, column=0, padx=5, pady=5, sticky='w')
58
116
 
117
+ ttk.Label(control_frame, text="Max Links to Display:").grid(row=1, column=1, padx=5, pady=5, sticky='e')
118
+ self.max_links_entry = ttk.Entry(control_frame, textvariable=self.max_links_var, width=10)
119
+ self.max_links_entry.grid(row=1, column=2, padx=5, pady=5, sticky='w')
120
+
121
+ export_group_frame = ttk.Frame(control_frame)
122
+ export_group_frame.grid(row=2, column=0, padx=5, pady=5, sticky='w') # Placed in the original Checkbutton's column
123
+
124
+ ttk.Checkbutton(
125
+ export_group_frame,
126
+ text="Export Report",
127
+ variable=self.do_export_report_var,
128
+ command=self._toggle_export_report
129
+ ).pack(side=tk.LEFT, padx=(0, 5)) # Pack Checkbutton to the left with small internal padding
130
+ self.export_report_format = ttk.Combobox(
131
+ export_group_frame,
132
+ textvariable=self.export_report_format_var,
133
+ values=self.supported_export_formats,
134
+ state='readonly', # Prevents user from typing invalid values
135
+ width=5
136
+ )
137
+ self.export_report_format.set(self.supported_export_formats[0]) # Set default text
138
+ self.export_report_format.pack(side=tk.LEFT)
139
+ # Pack Entry tightly next to it
140
+
59
141
  ttk.Checkbutton(
60
142
  control_frame,
61
143
  text="Show All Links (Override Max)",
62
144
  variable=self.show_all_links_var,
63
- # Optional: Disable max_links entry when this is checked
64
145
  command=self._toggle_max_links_entry
65
- ).grid(row=2, column=0, padx=5, pady=5, sticky='w')
146
+ ).grid(row=2, column=2, padx=5, pady=5, sticky='w')
66
147
 
67
- ttk.Label(control_frame, text="Max Links to Display:").grid(row=1, column=1, padx=5, pady=5, sticky='e')
68
- self.max_links_entry = ttk.Entry(control_frame, textvariable=self.max_links_var, width=10)
69
- self.max_links_entry.grid(row=1, column=2, padx=5, pady=5, sticky='w')
148
+ # Row 3: Run Button and License Button
149
+ run_btn = ttk.Button(control_frame, text="▶ Run Analysis", command=self._run_analysis_gui, style='Accent.TButton')
150
+ run_btn.grid(row=3, column=0, columnspan=2, pady=10, sticky='ew', padx=(0, 5))
70
151
 
71
- # Run Button
72
- ttk.Button(control_frame, text="▶ Run Analysis", command=self._run_analysis_gui, style='Accent.TButton').grid(row=2, column=0, columnspan=3, pady=10)
152
+ license_btn = ttk.Button(control_frame, text="Show License", command=self._show_license)
153
+ license_btn.grid(row=3, column=2, columnspan=1, pady=10, sticky='ew', padx=(5, 0)) # Sticky 'ew' makes it fill
154
+
73
155
 
74
156
  control_frame.grid_columnconfigure(1, weight=1)
75
157
 
@@ -80,16 +162,26 @@ class PDFLinkCheckerApp(tk.Tk):
80
162
  ttk.Label(output_frame, text="Analysis Report Output:").pack(fill='x')
81
163
 
82
164
  # Scrollable Text Widget for output
83
- self.output_text = tk.Text(output_frame, wrap=tk.WORD, state=tk.DISABLED, bg='#333333', fg='white', font=('Monospace', 10))
84
- self.output_text.pack(fill='both', expand=True, padx=5, pady=5)
165
+ # Use an internal frame for text and scrollbar to ensure correct packing
166
+ text_scroll_frame = ttk.Frame(output_frame)
167
+ text_scroll_frame.pack(fill='both', expand=True, padx=5, pady=5)
85
168
 
86
- # Scrollbar
87
- scrollbar = ttk.Scrollbar(output_frame, command=self.output_text.yview)
88
- self.output_text['yscrollcommand'] = scrollbar.set
169
+ self.output_text = tk.Text(text_scroll_frame, wrap=tk.WORD, state=tk.DISABLED, bg='#333333', fg='white', font=('Monospace', 10))
170
+ self.output_text.pack(side=tk.LEFT, fill='both', expand=True) # Text fills and expands
171
+
172
+ # Scrollbar (Scrollbar must be packed AFTER the text widget)
173
+ scrollbar = ttk.Scrollbar(text_scroll_frame, command=self.output_text.yview)
89
174
  scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
175
+ self.output_text['yscrollcommand'] = scrollbar.set # Link text widget back to scrollbar
90
176
 
91
177
  def _select_pdf(self):
178
+ if self.pdf_path.get():
179
+ initialdir = str(Path(self.pdf_path.get()).parent)
180
+ else:
181
+ initialdir = str(Path.cwd())
182
+
92
183
  file_path = filedialog.askopenfilename(
184
+ initialdir=initialdir,
93
185
  defaultextension=".pdf",
94
186
  filetypes=[("PDF files", "*.pdf"), ("All files", "*.*")]
95
187
  )
@@ -103,6 +195,13 @@ class PDFLinkCheckerApp(tk.Tk):
103
195
  else:
104
196
  self.max_links_entry.config(state=tk.NORMAL)
105
197
 
198
+ def _toggle_export_report(self):
199
+ """Enables/disables the report file export."""
200
+ if self.do_export_report_var.get():
201
+ self.export_report_format.config(state=tk.NORMAL)
202
+ else:
203
+ self.export_report_format.config(state=tk.DISABLED)
204
+
106
205
  def _run_analysis_gui(self):
107
206
  pdf_path_str = self.pdf_path.get()
108
207
  if not Path(pdf_path_str).exists():
@@ -110,18 +209,21 @@ class PDFLinkCheckerApp(tk.Tk):
110
209
  return
111
210
 
112
211
  if self.show_all_links_var.get():
113
- # Pass 0 to the backend, which analyze.py interprets as "Show All"
114
212
  max_links_to_pass = 0
115
213
  else:
116
214
  try:
117
215
  max_links_to_pass = int(self.max_links_var.get())
118
- if max_links_to_pass <= 0:
216
+ if max_links_to_pass < 1:
119
217
  self._display_error("Error: Max Links must be a positive number (or use 'Show All').")
120
218
  return
121
219
  except ValueError:
122
220
  self._display_error("Error: Max Links must be an integer.")
123
221
  return
124
222
 
223
+ export_format = None
224
+ if self.do_export_report_var.get():
225
+ export_format = self.export_report_format_var.get().lower()
226
+
125
227
  # 1. Clear previous output and enable editing
126
228
  self.output_text.config(state=tk.NORMAL)
127
229
  self.output_text.delete('1.0', tk.END)
@@ -136,11 +238,13 @@ class PDFLinkCheckerApp(tk.Tk):
136
238
  run_analysis(
137
239
  pdf_path=pdf_path_str,
138
240
  check_remnants=self.check_remnants_var.get(),
139
- max_links=max_links_to_pass
241
+ max_links=max_links_to_pass,
242
+ export_format=export_format
140
243
  )
141
244
  self.output_text.insert(tk.END, "\n--- Analysis Complete ---\n")
142
245
 
143
246
  except Exception as e:
247
+ self.output_text.insert(tk.END, "\n")
144
248
  self._display_error(f"An unexpected error occurred during analysis: {e}")
145
249
 
146
250
  finally:
@@ -149,17 +253,41 @@ class PDFLinkCheckerApp(tk.Tk):
149
253
  self.output_text.config(state=tk.DISABLED)
150
254
 
151
255
  def _display_error(self, message):
152
- self.output_text.config(state=tk.NORMAL)
153
- self.output_text.delete('1.0', tk.END)
256
+ # Ensure output is in normal state to write
257
+ original_state = self.output_text.cget('state')
258
+ if original_state == tk.DISABLED:
259
+ self.output_text.config(state=tk.NORMAL)
260
+
261
+ #self.output_text.delete('1.0', tk.END)
154
262
  self.output_text.insert(tk.END, f"[ERROR] {message}\n", 'error')
155
263
  self.output_text.tag_config('error', foreground='red')
264
+
265
+ # Restore state
156
266
  self.output_text.config(state=tk.DISABLED)
157
267
 
158
268
 
159
- def start_gui():
160
- """Entry point function to launch the application."""
161
- app = PDFLinkCheckerApp()
162
- app.mainloop()
269
+ def auto_close_window(root, delay_ms:int = 0):
270
+ """
271
+ Schedules the Tkinter window to be destroyed after a specified delay.
272
+ """
273
+ if delay_ms > 0:
274
+ print(f"Window is set to automatically close in {delay_ms/1000} seconds.")
275
+ root.after(delay_ms, root.destroy)
276
+ else:
277
+ return
278
+
279
+
280
+ def start_gui(time_auto_close:int=0):
281
+ """
282
+ Entry point function to launch the application.
283
+ """
284
+ print("pdflinkcheck: start_gui ...")
285
+ tk_app = PDFLinkCheckerApp()
286
+
287
+ auto_close_window(tk_app, time_auto_close)
288
+
289
+ tk_app.mainloop()
290
+ print("pdflinkcheck: gui closed.")
163
291
 
164
292
  if __name__ == "__main__":
165
293
  start_gui()
pdflinkcheck/io.py ADDED
@@ -0,0 +1,106 @@
1
+ # src/pdflinkcheck/io.py
2
+ import logging
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, Any, Union, List
6
+
7
+ # --- Configuration ---
8
+
9
+ # Define the base directory for pdflinkcheck data (~/.pdflinkcheck)
10
+ try:
11
+ # Use the home directory and append the tool's name
12
+ PDFLINKCHECK_HOME = Path.home() / ".pdflinkcheck"
13
+ except Exception:
14
+ # Fallback if Path.home() fails in certain environments (e.g., some CI runners)
15
+ PDFLINKCHECK_HOME = Path("/tmp/.pdflinkcheck_temp")
16
+
17
+ # Ensure the directory exists
18
+ PDFLINKCHECK_HOME.mkdir(parents=True, exist_ok=True)
19
+
20
+ # Define the log file path
21
+ LOG_FILE_PATH = PDFLINKCHECK_HOME / "pdflinkcheck_errors.log"
22
+
23
+ # --- Logging Setup ---
24
+
25
+ # Set up a basic logger for error tracking
26
+ def setup_error_logger():
27
+ """
28
+ Configures a basic logger that writes errors and warnings to a file
29
+ in the PDFLINKCHECK_HOME directory.
30
+ """
31
+ # Create the logger instance
32
+ logger = logging.getLogger('pdflinkcheck_logger')
33
+ logger.setLevel(logging.WARNING) # Log WARNING and above
34
+
35
+ # Prevent propagation to the root logger (which might print to console)
36
+ logger.propagate = False
37
+
38
+ # Create file handler
39
+ file_handler = logging.FileHandler(LOG_FILE_PATH, mode='a')
40
+ file_handler.setLevel(logging.WARNING)
41
+
42
+ # Create formatter
43
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
44
+ file_handler.setFormatter(formatter)
45
+
46
+ # Check if the handler is already added (prevents duplicate log entries)
47
+ if not any(isinstance(handler, logging.FileHandler) for handler in logger.handlers):
48
+ logger.addHandler(file_handler)
49
+
50
+ return logger
51
+
52
+ # Initialize the logger instance
53
+ error_logger = setup_error_logger()
54
+
55
+ # --- Export Functionality ---
56
+
57
+ def export_report_data(
58
+ report_data: Dict[str, Any],
59
+ pdf_filename: str,
60
+ export_format: str = "JSON"
61
+ ) -> Path:
62
+ """
63
+ Exports the structured analysis report data to a file in the
64
+ PDFLINKCHECK_HOME directory.
65
+
66
+ Args:
67
+ report_data: The dictionary containing the results from run_analysis.
68
+ pdf_filename: The base filename of the PDF being analyzed (used for the output file name).
69
+ export_format: The desired output format ('json' currently supported).
70
+
71
+ Returns:
72
+ The path object pointing to the successfully created report file.
73
+
74
+ Raises:
75
+ ValueError: If the export_format is not supported.
76
+ """
77
+ if export_format.upper() != "JSON":
78
+ error_logger.error(f"Unsupported export format requested: {export_format}")
79
+ raise ValueError("Only 'JSON' format is currently supported for report export.")
80
+
81
+ # Create an output file name based on the PDF name and a timestamp
82
+ base_name = Path(pdf_filename).stem
83
+ output_filename = f"{base_name}_report.json"
84
+ output_path = PDFLINKCHECK_HOME / output_filename
85
+
86
+ try:
87
+ with open(output_path, 'w', encoding='utf-8') as f:
88
+ # Use indent for readability
89
+ json.dump(report_data, f, indent=4)
90
+
91
+ print(f"\nReport successfully exported to: {output_path}")
92
+ return output_path
93
+
94
+ except Exception as e:
95
+ error_logger.error(f"Failed to export report to JSON: {e}", exc_info=True)
96
+ # Re-raise the exception after logging for caller to handle
97
+ raise RuntimeError(f"Report export failed due to an I/O error: {e}")
98
+
99
+ # Example of how an external module can log an error:
100
+ # from pdflinkcheck.io import error_logger
101
+ # try:
102
+ # ...
103
+ # except Exception as e:
104
+ # error_logger.exception("An exception occurred during link extraction.")
105
+
106
+
@@ -0,0 +1,266 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdflinkcheck
3
+ Version: 1.1.47
4
+ Summary: A purpose-built PDF link analysis and reporting tool with GUI and CLI.
5
+ Author-email: George Clayton Bennett <george.bennett@memphistn.gov>
6
+ Project-URL: Homepage, https://github.com/city-of-memphis-wastewater/pdflinkcheck
7
+ Project-URL: Repository, https://github.com/city-of-memphis-wastewater/pdflinkcheck
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3 :: Only
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Programming Language :: Python :: 3.14
15
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Intended Audience :: End Users/Desktop
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: Intended Audience :: Science/Research
20
+ Classifier: Intended Audience :: Other Audience
21
+ Classifier: Topic :: File Formats
22
+ Classifier: Topic :: Office/Business
23
+ Classifier: Topic :: Text Processing :: General
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Environment :: Console
26
+ Classifier: Environment :: MacOS X
27
+ Classifier: Environment :: Win32 (MS Windows)
28
+ Classifier: Typing :: Typed
29
+ Classifier: Development Status :: 4 - Beta
30
+ Requires-Python: >=3.10
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: pyhabitat>=1.0.53
34
+ Requires-Dist: pymupdf>=1.26.6
35
+ Requires-Dist: rich>=14.2.0
36
+ Requires-Dist: typer>=0.20.0
37
+ Provides-Extra: dev
38
+ Requires-Dist: ruff>=0.1.13; extra == "dev"
39
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
40
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
41
+ Dynamic: license-file
42
+
43
+ # pdflinkcheck
44
+
45
+ A purpose-built tool for comprehensive analysis of hyperlinks and link remnants within PDF documents, primarily using the PyMuPDF library. Use the CLI or the GUI.
46
+
47
+ -----
48
+
49
+ ![Screenshot of the pdflinkcheck GUI](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_gui_v1.1.32.png)
50
+
51
+ -----
52
+
53
+ ## 📥 Access and Installation
54
+
55
+ The recommended way to use `pdflinkcheck` is to either install the CLI with `pipx` or to download the appropriate latest binary for your system from [Releases](https://github.com/City-of-Memphis-Wastewater/pdflinkcheck/releases/).
56
+
57
+ ### 🚀 Recommended Access (Binary Files)
58
+
59
+ For the most user-typical experience, download the single-file binary matching your OS.
60
+
61
+ | **File Type** | **Primary Use Case** | **Recommended Launch Method** |
62
+ | :--- | :--- | :--- |
63
+ | **Executable (.exe, .elf, .pyz)** | **GUI (Double-Click)** | Double-click the file (use the accompanying `.bat` file on Windows). |
64
+ | **PYZ (Python Zip App)** | **CLI (Terminal)** | Run using your system's `python` command: `python pdflinkcheck-VERSION.pyz analyze ...` |
65
+
66
+ ### Installation via pipx
67
+
68
+ For an isolated environment where you can access `pdflinkcheck` from any terminal:
69
+
70
+ ```bash
71
+ # Ensure you have pipx installed first (if not, run: pip install pipx)
72
+ pipx install pdflinkcheck
73
+ ```
74
+
75
+ -----
76
+
77
+ ## 💻 Graphical User Interface (GUI)
78
+
79
+ The tool can be run as simple cross-platform graphical interface (Tkinter).
80
+
81
+ ### Launching the GUI
82
+
83
+ There are three ways to launch the GUI interface:
84
+
85
+ 1. **Implicit Launch:** Run the main command with no arguments, subcommands, or flags (`pdflinkcheck`).
86
+ 2. **Explicit Command:** Use the dedicated GUI subcommand (`pdflinkcheck gui`).
87
+ 3. **Binary Double-Click:**
88
+ * **Windows:** Double-click the `pdflinkcheck-VERSION-gui.bat` file.
89
+ * **macOS/Linux:** Double-click the downloaded `.pyz` or `.elf` file.
90
+
91
+ ### Planned GUI Updates
92
+
93
+ We are actively working on the following enhancements:
94
+
95
+ * **Report Export:** Functionality to export the full analysis report to a plain text file.
96
+ * **License Visibility:** A dedicated "License Info" button within the GUI to display the terms of the AGPLv3+ license.
97
+
98
+ -----
99
+
100
+ ## 🚀 CLI Usage
101
+
102
+ The core functionality is accessed via the `analyze` command. All commands include the built-in `--help` flag for quick reference.
103
+
104
+ ### Available Commands
105
+
106
+ |**Command**|**Description**|
107
+ |---|---|
108
+ |`pdflinkcheck analyze`|Analyzes a PDF file for links and remnants.|
109
+ |`pdflinkcheck gui`|Explicitly launch the Graphical User Interface.|
110
+ |`pdflinkcheck license`|**Displays the full AGPLv3+ license text in the terminal.**|
111
+
112
+ ### `analyze` Command Options
113
+
114
+ |**Option**|**Description**|**Default**|
115
+ |---|---|---|
116
+ |`<PDF_PATH>`|**Required.** The path to the PDF file to analyze.|N/A|
117
+ |`--check-remnants / --no-check-remnants`|Toggle scanning the text layer for unlinked URLs/Emails.|`--check-remnants`|
118
+ |`--max-links INTEGER`|Maximum number of links/remnants to display in the detailed report sections. Use `0` to show all.|`0` (Show All)|
119
+ |`--export-format FORMAT`|Format for the exported report. If specified, the report is saved to a file named after the PDF. Currently supported: `JSON`.|`JSON`|
120
+ |`--help`|Show command help and exit.|N/A|
121
+
122
+ ### `gui` Command Options
123
+
124
+ | **Option** | **Description** | **Default** |
125
+ | ---------------------- | ------------------------------------------------------------------------------------------------------------- | -------------- |
126
+ | `--auto-close INTEGER` | **(For testing/automation only).** Delay in milliseconds after which the GUI window will automatically close. | `0` (Disabled) |
127
+ #### Example Runs
128
+
129
+
130
+
131
+ ```bash
132
+ # Analyze a document, show all links/remnants, and save the report as JSON
133
+ pdflinkcheck analyze "TE Maxson WWTF O&M Manual.pdf" --export-format JSON
134
+
135
+ # Analyze a document but skip the time-consuming remnant check
136
+ pdflinkcheck analyze "another_doc.pdf" --no-check-remnants
137
+
138
+ # Analyze a document but keep the print block short, showing only the first 10 links for each type
139
+ pdflinkcheck analyze "TE Maxson WWTF O&M Manual.pdf" --max-links 10
140
+
141
+ # Show the GUI for only a moment, like in a build check
142
+ pdflinkcheck gui --auto-close 3000
143
+ ```
144
+
145
+
146
+ -----
147
+
148
+ ## 📦 Library Access (Advanced)
149
+
150
+ For developers importing `pdflinkcheck` into other Python projects, the core analysis functions are exposed directly in the root namespace:
151
+
152
+ |**Function**|**Description**|
153
+ |---|---|
154
+ |`run_analysis()`|**(Primary function)** Performs the full analysis, prints to console, and handles file export.|
155
+ |`extract_links()`|Low-level function to retrieve all explicit links (URIs, GoTo, etc.) from a PDF path.|
156
+ |`extract_toc()`|Low-level function to extract the PDF's internal Table of Contents (bookmarks/outline).|
157
+
158
+ Python
159
+
160
+ ```
161
+ from pdflinkcheck.analyze import run_analysis, extract_links, extract_toc
162
+ ```
163
+
164
+ -----
165
+
166
+ ## ✨ Features
167
+
168
+ * **Active Link Extraction:** Identifies and categorizes all programmed links (External URIs, Internal GoTo/Destinations, Remote Jumps).
169
+ * **Anchor Text Retrieval:** Extracts the visible text corresponding to each link's bounding box.
170
+ * **Remnant Detection:** Scans the document's text layer for unlinked URIs and email addresses that should potentially be converted into active links.
171
+ * **Structural TOC:** Extracts the PDF's internal Table of Contents (bookmarks/outline).
172
+
173
+ -----
174
+
175
+ ## 📜 License Implications (AGPLv3+)
176
+
177
+ **pdflinkcheck is licensed under the GNU Affero General Public License version 3 or later (AGPLv3+).**
178
+
179
+ This license has significant implications for **distribution and network use**, particularly for organizations:
180
+
181
+ * **Source Code Provision:** If you distribute this tool (modified or unmodified) to anyone, you **must** provide the full source code under the same license.
182
+ * **Network Interaction (Affero Clause):** If you modify this tool and make the modified version available to users over a computer network (e.g., as a web service or backend), you **must** also offer the source code to those network users.
183
+
184
+ > **Before deploying or modifying this tool for organizational use, especially for internal web services or distribution, please ensure compliance with the AGPLv3+ terms.**
185
+
186
+ -----
187
+
188
+ ## 🥚 Optional REPL‑Friendly GUI Access (Easter Egg)
189
+
190
+ For users who prefer exploring tools interactively—especially those coming from MATLAB or other REPL‑first environments—`pdflinkcheck` includes an optional Easter egg that exposes the GUI launcher directly in the library namespace.
191
+
192
+ This feature is **disabled by default** and has **no effect on normal imports**.
193
+
194
+ ### Enabling the Easter Egg
195
+
196
+ Set the environment variable before importing the library:
197
+
198
+ ```python
199
+ import os
200
+ os.environ["PDFLINKCHECK_GUI_EASTEREGG"] = "true"
201
+
202
+ import pdflinkcheck
203
+ pdflinkcheck.start_gui()
204
+ ```
205
+
206
+ Accepted values include: `true`, `1`, `yes`, `on` (case‑insensitive).
207
+
208
+ ### Purpose
209
+
210
+ This opt‑in behavior is designed to make the library feel welcoming to beginners who are experimenting in a Python REPL for the first time. When enabled, the `start_gui()` function becomes available at the top level:
211
+
212
+ ```python
213
+ pdflinkcheck.start_gui()
214
+ ```
215
+
216
+ If the `PDFLINKCHECK_GUI_EASTEREGG` environment variable is not set—or if GUI support is unavailable—`pdflinkcheck` behaves as a normal library with no GUI functions exposed.
217
+
218
+ -----
219
+
220
+ ## ⚠️ Compatibility Notes
221
+
222
+ ### Platform Compatibility:
223
+
224
+ This tool relies on the `PyMuPDF` library.
225
+ All testing has failed to run in a **Termux (Android)** environment due to underlying C/C++ library compilation issues with PyMuPDF.
226
+ It is recommended for use on standard Linux, macOS, or Windows operating systems.
227
+
228
+ #### Termux Compatibility as a Key Goal
229
+ A key goal of City-of-Memphis-Wastewater is to release all software as Termux-compatible. Unfortunately, that simply isn't possible with PyMuPDF as a dependency.
230
+ We tried alternative PDF libaries like `pdfminer`, `pdfplumber`, and `borb`, but none of these offered the level of detail concerning GoTo links.
231
+ Due to Termux compatibility goals, we do not generally make Tkinter-based interfaces, so that was a fun, minimalist opportunity on this project.
232
+
233
+ Termux compatibility is important in the modern age as Android devices are common among technicians, field engineers, and maintenace staff.
234
+ Android is the most common operating system in the Global South.
235
+ We aim to produce stable software that can do the most possible good.
236
+
237
+ We love web-stack GUIs served locally as a final product.
238
+ All that packaged up into a Termux-compatible ELF or PYZ - What could be better!
239
+
240
+ In the future we may find a work-around and be able to drop the PyMuPDF dependency.
241
+ This would have lots of implications:
242
+ - Reduced artifact size.
243
+ - Alpine-compatible Docker image.
244
+ - Web-stack GUI rather than Tkinter, to be compatible with Termux.
245
+ - A different license from the AGPL3, if we choose at that time.
246
+
247
+ In the meantime, the standalone binaries and pipx installation provide excellent cross-platform support on Windows, macOS, and standard Linux desktops/laptops.
248
+
249
+ ### Document Compatibility:
250
+ While `pdflinkcheck` uses the robust PyMuPDF library, not all PDF files can be processed successfully. This tool is designed primarily for digitally generated (vector-based) PDFs.
251
+
252
+ Processing may fail or yield incomplete results for:
253
+ * **Scanned PDFs** (images of text) that lack an accessible text layer.
254
+ * **Encrypted or Password-Protected** documents.
255
+ * **Malformed or non-standard** PDF files.
256
+
257
+ -----
258
+
259
+ ## Run from Source (Developers)
260
+
261
+ ```bash
262
+ git clone http://github.com/city-of-memphis-wastewater/pdflinkcheck.git
263
+ cd pdflinkcheck
264
+ uv sync
265
+ uv run python src/pdflinkcheck/cli.py --help
266
+ ```