PyPI - pdflinkcheck - Versions diffs - 1.1.94__py3-none-any.whl → 1.2.29__py3-none-any.whl - Mend

pdflinkcheck 1.1.94py3-none-any.whl → 1.2.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

pdflinkcheck/__init__.py +88 -18
pdflinkcheck/__main__.py +6 -0
pdflinkcheck/analysis_pdfium.py +131 -0
pdflinkcheck/{analyze_pymupdf.py → analysis_pymupdf.py} +99 -141
pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} +51 -39
pdflinkcheck/cli.py +52 -48
pdflinkcheck/data/LICENSE +18 -15
pdflinkcheck/data/README.md +23 -25
pdflinkcheck/data/pyproject.toml +17 -26
pdflinkcheck/datacopy.py +16 -1
pdflinkcheck/dev.py +2 -2
pdflinkcheck/environment.py +14 -2
pdflinkcheck/gui.py +346 -563
pdflinkcheck/helpers.py +88 -0
pdflinkcheck/io.py +24 -6
pdflinkcheck/report.py +598 -97
pdflinkcheck/security.py +189 -0
pdflinkcheck/splash.py +38 -0
pdflinkcheck/stdlib_server.py +7 -21
pdflinkcheck/stdlib_server_alt.py +571 -0
pdflinkcheck/tk_utils.py +188 -0
pdflinkcheck/update_msix_version.py +2 -0
pdflinkcheck/validate.py +104 -170
pdflinkcheck/version_info.py +2 -2
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/METADATA +41 -40
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/RECORD +34 -27
pdflinkcheck-1.2.29.dist-info/WHEEL +5 -0
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/entry_points.txt +0 -1
pdflinkcheck-1.2.29.dist-info/licenses/LICENSE +27 -0
pdflinkcheck-1.2.29.dist-info/top_level.txt +1 -0
pdflinkcheck/analyze_pypdf_v2.py +0 -217
pdflinkcheck-1.1.94.dist-info/WHEEL +0 -4
pdflinkcheck-1.1.94.dist-info/licenses/LICENSE +0 -24
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-AGPL3 +0 -0
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-MIT +0 -0

pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} RENAMED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: MIT
-# src/pdflinkcheck/analyze_pypdf.py
+# src/pdflinkcheck/analysis_pypdf.py
+from __future__ import annotations
 import sys
 from pathlib import Path
 import logging
@@ -8,6 +9,7 @@ from typing import Dict, Any, Optional, List
 from pypdf import PdfReader
 from pypdf.generic import Destination, NameObject, ArrayObject, IndirectObject
+from pdflinkcheck.helpers import PageRef
 from pdflinkcheck.io import error_logger, export_report_data, get_first_pdf_in_cwd, LOG_FILE_PATH
@@ -16,7 +18,28 @@ from pdflinkcheck.io import error_logger, export_report_data, get_first_pdf_in_c
 Inspect target PDF for both URI links and for GoTo links, using only pypdf, not Fitz
 """
-def get_anchor_text_pypdf(page, rect) -> str:
+def analyze_pdf(pdf_path: str):
+    data = {}
+    data["links"] = []
+    data["toc"] = []
+    data["file_ov"] = {}
+    try:
+        reader = PdfReader(pdf_path)
+    except Exception as e:
+        print(f"pypdf.PdfReader() failed: {e}")
+        return data
+    extracted_links = _extract_links_pypdf(reader)
+    structural_toc = _extract_toc_pypdf(reader)
+    page_count = len(reader.pages)
+    data["links"] = extracted_links
+    data["toc"] = structural_toc
+    data["file_ov"]["total_pages"] = page_count
+    return data
+def _get_anchor_text_pypdf(page, rect) -> str:
     """
     Extracts text within the link's bounding box using a visitor function.
     Reliable for finding text associated with a link without PyMuPDF.
@@ -33,7 +56,7 @@ def get_anchor_text_pypdf(page, rect) -> str:
     parts: List[str] = []
-    def visitor_body(text, cm, tm, font_dict, font_size):
+    def _visitor_body(text, cm, tm, font_dict, font_size):
         # tm[4], tm[5] are the current text insertion point coordinates (x, y)
         x, y = tm[4], tm[5]
@@ -44,17 +67,18 @@ def get_anchor_text_pypdf(page, rect) -> str:
             if text.strip():
                 parts.append(text)
-    page.extract_text(visitor_text=visitor_body)
+    page.extract_text(visitor_text=_visitor_body)
     raw_extracted = "".join(parts)
     cleaned = " ".join(raw_extracted.split()).strip()
     return cleaned if cleaned else "Graphic/Empty Link"
-def resolve_pypdf_destination(reader: PdfReader, dest, obj_id_to_page: dict) -> Optional[int]:
+def _resolve_pypdf_destination(reader: PdfReader, dest, obj_id_to_page: dict) -> Optional[int]:
     try:
         if isinstance(dest, Destination):
-            return dest.page_number + 1  # Return int directly
+            # .page_number in pypdf is already 0-indexed
+            return dest.page_number
         if isinstance(dest, IndirectObject):
             return obj_id_to_page.get(dest.idnum)
@@ -67,42 +91,25 @@ def resolve_pypdf_destination(reader: PdfReader, dest, obj_id_to_page: dict) ->
     except Exception:
         return None
-def resolve_pypdf_destination_(reader: PdfReader, dest, obj_id_to_page: dict) -> str:
-    """
-    Resolves a Destination object or IndirectObject to a 1-based page number string.
-    """
-    try:
-        if isinstance(dest, Destination):
-            return str(dest.page_number + 1)
-        if isinstance(dest, IndirectObject):
-            return str(obj_id_to_page.get(dest.idnum, "Unknown"))
-        if isinstance(dest, ArrayObject) and len(dest) > 0:
-            if isinstance(dest[0], IndirectObject):
-                return str(obj_id_to_page.get(dest[0].idnum, "Unknown"))
-        return "Unknown"
-    except Exception:
-        return "Error Resolving"
-def extract_links_pypdf(pdf_path):
+def _extract_links_pypdf(reader: PdfReader) -> List[Dict[str, Any]]:
     """
     Termux-compatible link extraction using pure-Python pypdf.
     Matches the reporting schema of the PyMuPDF version.
     """
-    reader = PdfReader(pdf_path)
     # Pre-map Object IDs to Page Numbers for fast internal link resolution
     obj_id_to_page = {
-        page.indirect_reference.idnum: i + 1
+        page.indirect_reference.idnum: i
         for i, page in enumerate(reader.pages)
     }
     all_links = []
     for i, page in enumerate(reader.pages):
-        page_num = i + 1
+        #page_num = i
+        # Use PageRef to stay consistent
+        page_source = PageRef.from_index(i)
         if "/Annots" not in page:
             continue
@@ -112,10 +119,10 @@ def extract_links_pypdf(pdf_path):
                 continue
             rect = obj.get("/Rect")
-            anchor_text = get_anchor_text_pypdf(page, rect)
+            anchor_text = _get_anchor_text_pypdf(page, rect)
             link_dict = {
-                'page': page_num,
+                'page': page_source.machine,
                 'rect': list(rect) if rect else None,
                 'link_text': anchor_text,
                 'type': 'Other Action',
@@ -134,13 +141,16 @@ def extract_links_pypdf(pdf_path):
             # Handle GoTo (Internal)
             elif "/Dest" in obj or ("/A" in obj and "/D" in obj["/A"]):
                 dest = obj.get("/Dest") or obj["/A"].get("/D")
-                target_page = resolve_pypdf_destination(reader, dest, obj_id_to_page)
+                target_page = _resolve_pypdf_destination(reader, dest, obj_id_to_page)
                 # print(f"DEBUG: resolved target_page = {target_page} (type: {type(target_page)})")
-                link_dict.update({
-                    'type': 'Internal (GoTo/Dest)',
-                    'destination_page': target_page,
-                    'target': f"Page {target_page}"
-                })
+                if target_page is not None:
+                    dest_page = PageRef.from_index(target_page)
+                    link_dict.update({
+                        'type': 'Internal (GoTo/Dest)',
+                        'destination_page': dest_page.machine,
+                        #'target': f"Page {target_page}"
+                        'target': dest_page.machine
+                    })
             # Handle Remote GoTo (GoToR)
             elif "/A" in obj and obj["/A"].get("/S") == "/GoToR":
@@ -156,9 +166,8 @@ def extract_links_pypdf(pdf_path):
     return all_links
-def extract_toc_pypdf(pdf_path: str) -> List[Dict[str, Any]]:
+def _extract_toc_pypdf(reader: PdfReader) -> List[Dict[str, Any]]:
     try:
-        reader = PdfReader(pdf_path)
         # Note: outline is a property, not a method.
         toc_tree = reader.outline
         toc_data = []
@@ -169,7 +178,10 @@ def extract_toc_pypdf(pdf_path: str) -> List[Dict[str, Any]]:
                     # Using the reader directly is the only way to avoid
                     # the 'Destination' object has no attribute error
                     try:
-                        page_num = reader.get_destination_page_number(item) + 1
+                        page_num_raw = reader.get_destination_page_number(item)
+                        # page_num_raw is 0-indexed. Use PageRef to store it.
+                        ref = PageRef.from_index(page_num_raw)
+                        page_num = ref.machine
                     except:
                         page_num = "N/A"

pdflinkcheck/cli.py CHANGED Viewed

@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: MIT
 # src/pdflinkcheck/cli.py
+from __future__ import annotations
 import typer
 from typing import Literal
 from typer.models import OptionInfo
 from rich.console import Console
 from pathlib import Path
-from pdflinkcheck.report import run_report_and_call_exports # Assuming core logic moves here
+from pdflinkcheck.report import run_report_and_call_exports # Assuming core logic moves here
 from typing import Dict, Optional, Union, List
 import pyhabitat
 import sys
@@ -20,18 +21,48 @@ from pdflinkcheck.io import get_first_pdf_in_cwd
 console = Console() # to be above the tkinter check, in case of console.print
+# Force Rich to always enable colors, even when running from a .pyz bundle
+os.environ["FORCE_COLOR"] = "1"
+# Optional but helpful for full terminal feature detection
+os.environ["TERM"] = "xterm-256color"
 app = typer.Typer(
     name="pdflinkcheck",
     help=f"A command-line tool for comprehensive PDF link analysis and reporting. (v{get_version_from_pyproject()})",
     add_completion=False,
     invoke_without_command = True,
     no_args_is_help = False,
+    context_settings={"ignore_unknown_options": True,
+                      "allow_extra_args": True,
+                      "help_option_names": ["-h", "--help"]},
 )
+def debug_callback(value: bool):
+#def debug_callback(ctx: typer.Context, value: bool):
+    if value:
+        # This runs IMMEDIATELY when --debug is parsed, even before --help
+         # 1. Access the list of all command-line arguments
+        full_command_list = sys.argv
+        # 2. Join the list into a single string to recreate the command
+        command_string = " ".join(full_command_list)
+        # 3. Print the command
+        typer.echo(f"command:\n{command_string}\n")
+    return value
+if "--show-command" in sys.argv or "--debug" in sys.argv:
+    debug_callback(True)
 @app.callback()
 def main(ctx: typer.Context,
     version: Optional[bool] = typer.Option(
     None, "--version", is_flag=True, help="Show the version."
+    ),
+    debug: bool = typer.Option(
+        False, "--debug", is_flag=True, help="Enable verbose debug logging and echo the full command string."
+    ),
+    show_command: bool = typer.Option(
+        False, "--show-command", is_flag=True, help="Echo the full command string to the console before execution."
     )
     ):
     """
@@ -44,13 +75,6 @@ def main(ctx: typer.Context,
     if ctx.invoked_subcommand is None:
         gui_command()
         raise typer.Exit(code=0)
-    # 1. Access the list of all command-line arguments
-    full_command_list = sys.argv
-    # 2. Join the list into a single string to recreate the command
-    command_string = " ".join(full_command_list)
-    # 3. Print the command
-    typer.echo(f"command:\n{command_string}\n")
 # help-tree() command: fragile, experimental, defaults to not being included.
@@ -89,7 +113,6 @@ def docs_command(
         try:
             license_path = files("pdflinkcheck.data") / "LICENSE"
             license_text = license_path.read_text(encoding="utf-8")
             console.print(f"\n[bold green]=== GNU AFFERO GENERAL PUBLIC LICENSE V3+ ===[/bold green]")
             console.print(license_text, highlight=False)
@@ -127,24 +150,6 @@ def tools_command(
     if clear_cache:
         clear_all_caches()
-"""
-def validate_pdf_commands(
-    pdf_path: Optional[Path] = typer.Argument(
-        None,
-        exists=True,
-        file_okay=True,
-        dir_okay=False,
-        readable=True,
-        resolve_path=True,
-        help="Path to the PDF file to validate. If omitted, searches current directory."
-    ),
-    pdf_library: Literal["pypdf", "pymupdf"] = typer.Option(
-        "pypdf",
-        "--library", "-l",
-        envvar="PDF_ENGINE",
-        help="PDF parsing engine: pypdf (pure Python) or pymupdf (faster, if available)"
-    ),
-    """
 @app.command(name="analyze") # Added a command name 'analyze' for clarity
 def analyze_pdf( # Renamed function for clarity
     pdf_path: Optional[Path] = typer.Argument(
@@ -158,22 +163,21 @@ def analyze_pdf( # Renamed function for clarity
     ),
     export_format: Optional[Literal["JSON", "TXT", "JSON,TXT", "NONE"]] = typer.Option(
         "JSON,TXT",
-        "--export-format","-e",
+        "--format","-f",
         case_sensitive=False,
         help="Export format. Use 'None' to suppress file export.",
     ),
-    max_links: int = typer.Option(
-        0,
-        "--max-links", "-m",
-        min=0,
-        help="Report brevity control. Use 0 to show all."
-    ),
-    pdf_library: Literal["pypdf", "pymupdf"] = typer.Option(
+    pdf_library: Literal["auto","pdfium","pypdf", "pymupdf"] = typer.Option(
         assess_default_pdf_library(),
-        "--pdf-library","-p",
+        "--engine","-e",
         envvar="PDF_ENGINE",
-        help="PDF parsing library. pypdf (pure Python) or pymupdf (faster, if available).",
+        help="PDF parsing library. pypdf (pure Python), pymupdf (fast, AGPL3+ licensed), pdfium (fast, BSD-3 licensed).",
+    ),
+    print_bool: bool = typer.Option(
+        True,
+        "--print/--quiet",
+        help="Print or do not print the analysis and validation report to console."
     )
 ):
     """
@@ -223,13 +227,14 @@ def analyze_pdf( # Renamed function for clarity
         if not valid and "NONE" not in requested_formats:
             typer.echo(f"Warning: No valid formats found in '{export_format}'. Supported: JSON, TXT.")
     # The meat and potatoes
     report_results = run_report_and_call_exports(
         pdf_path=str(pdf_path),
-        max_links=max_links,
         export_format = export_formats,
         pdf_library = pdf_library,
+        print_bool = print_bool,
     )
     if not report_results or not report_results.get("data"):
@@ -238,14 +243,14 @@ def analyze_pdf( # Renamed function for clarity
     validation_results = report_results["data"]["validation"]
     # Optional: fail on broken links
-    broken_count = validation_results["summary-stats"]["broken-page"] + validation_results["summary-stats"]["broken-file"]
+    broken_page_count = validation_results["summary-stats"]["broken-page"] + validation_results["summary-stats"]["broken-file"]
-    if broken_count > 0:
-        console.print(f"\n[bold yellow]Warning:[/bold yellow] {broken_count} broken link(s) found.")
-    else:
-        console.print(f"\n[bold green]Success:[/bold green] No broken links or TOC issues!\n")
+    if broken_page_count > 0:
+        console.print(f"\n[bold yellow]Warning:[/bold yellow] {broken_page_count} broken link(s) found.")
+    #else:
+    #    console.print(f"\n[bold green]Success:[/bold green] No broken links or TOC issues!\n")
-    raise typer.Exit(code=0 if broken_count == 0 else 1)
+    raise typer.Exit(code=0 if broken_page_count == 0 else 1)
 @app.command(name="serve")
 def serve(
@@ -265,7 +270,7 @@ def serve(
         console.print("   → [yellow]Reload mode enabled[/yellow]")
     # Import here to avoid slow imports on other commands
-    from pdflinkcheck.stdlib_server import ThreadedTCPServer, PDFLinkCheckHandler
+    from pdflinkcheck.stdlib_server_alt import ThreadedTCPServer, PDFLinkCheckHandler
     import socketserver
     try:
@@ -294,8 +299,6 @@ def gui_command(
     """
     Launch tkinter-based GUI.
     """
-    # --- START FIX ---
     assured_auto_close_value = 0
     if isinstance(auto_close, OptionInfo):
@@ -307,11 +310,12 @@ def gui_command(
         # Case 2: Called explicitly by Typer (pdflinkcheck gui -c 3000)
         # Typer has successfully converted the command line argument, and auto_close is an int.
         assured_auto_close_value = int(auto_close)
-    # --- END FIX ---
     if not pyhabitat.tkinter_is_available():
         _gui_failure_msg()
         return
+    #from pdflinkcheck.gui import start_gui
+    #from pdflinkcheck.gui_alt import start_gui
     from pdflinkcheck.gui import start_gui
     start_gui(time_auto_close = assured_auto_close_value)

pdflinkcheck/data/LICENSE CHANGED Viewed

@@ -1,24 +1,27 @@
-**Copyright © 2025 George Clayton Bennett**
-<https://github.com/City-of-Memphis-Wastewater/pdflinkcheck>
-Some distributed binaries of this project include the PyMuPDF library, which is licensed under **AGPL‑3.0‑or‑later**.
-Any binary that incorporates PyMuPDF is therefore distributed under **AGPL‑3.0‑or‑later**.
+Some distributed binaries of this project include the PyMuPDF library, which is licensed under **AGPL3.0orlater**.
+Any binary that incorporates PyMuPDF is therefore distributed under **AGPL3.0orlater**.
 Other binaries use only the `pypdf` library and do not include PyMuPDF; these binaries are distributed under the **MIT License**.
-For AGPL‑licensed binaries, the complete corresponding source code must be made available to anyone who possesses a copy, upon request.
+For AGPLlicensed binaries, the complete corresponding source code must be made available to anyone who possesses a copy, upon request.
 This obligation applies only to recipients of those binaries, and hosting the source code in GitHub Releases satisfies this requirement.
-A binary becomes AGPL‑licensed only when built with the optional `"full"` dependency group (as defined in `pyproject.toml` under `[project.optional-dependencies]`) or when PyMuPDF is otherwise included in the build environment.
-The **source code of pdflinkcheck itself** remains licensed under the **MIT License**; only the distributed binary becomes AGPL‑licensed when PyMuPDF is included.
+A binary becomes AGPLlicensed only when built with the optional `"full"` dependency group (as defined in `pyproject.toml` under `[project.optional-dependencies]`) or when PyMuPDF is otherwise included in the build environment.
+The **source code of pdflinkcheck itself** remains licensed under the **MIT License**; only the distributed binary becomes AGPLlicensed when PyMuPDF is included.
-Source code for each released version is available in the `pdflinkcheck‑VERSION.tar.gz` files on the project’s GitHub Releases page.
+Source code for each released version is available in the `pdflinkcheckVERSION.tar.gz` files on the projects GitHub Releases page.
-Full‑text copies of **LICENSE‑MIT** and **LICENSE‑AGPL3** are included in the root of the repository.
+Fulltext copies of **LICENSEMIT** and **LICENSEAGPL3** are included in the root of the repository.
 **Links:**
-- Project source code: https://github.com/City-of-Memphis-Wastewater/pdflinkcheck
-- PyMuPDF source code: https://github.com/pymupdf/PyMuPDF
-- pypdf source code: https://github.com/py-pdf/pypdf
-- AGPLv3 text (FSF): https://www.gnu.org/licenses/agpl-3.0.html
-- MIT License text: https://opensource.org/license/mit
+- Project source code: https://github.com/City-of-Memphis-Wastewater/pdflinkcheck
+- PyMuPDF source code: https://github.com/pymupdf/PyMuPDF
+- pypdfium2 source code: https://github.com/pypdfium2-team/pypdfium2
+- PDFium source code: https://pdfium.googlesource.com/pdfium/
+- pypdf source code: https://github.com/py-pdf/pypdf
+- AGPLv3 text (FSF): https://www.gnu.org/licenses/agpl-3.0.html
+- MIT License text: https://opensource.org/license/mit
+- BSD-3 License text: https://opensource.org/license/bsd-3-clause
+- Apache-v2 License text: https://opensource.org/license/apache-2-0
+Copyright 2025 George Clayton Bennett

pdflinkcheck/data/README.md CHANGED Viewed

@@ -4,7 +4,7 @@ A purpose-built tool for comprehensive analysis of hyperlinks and GoTo links wit
 -----
-![Screenshot of the pdflinkcheck GUI](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_gui_v1.1.92.png)
+![Screenshot of the pdflinkcheck GUI](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_gui_v1.1.97.png)
 -----
@@ -19,7 +19,7 @@ For the most user-typical experience, download the single-file binary matching y
 | **File Type** | **Primary Use Case** | **Recommended Launch Method** |
 | :--- | :--- | :--- |
 | **Executable (.exe, .elf)** | **GUI** | Double-click the file. |
-| **PYZ (Python Zip App)** | **CLI** or **GUI** | Run using your system's `python` command: `python pdflinkcheck-VERSION.pyz --help` |
+| **PYZ (Python Zip App)** | **CLI** or **GUI** | Run using your system's `python` command: `python pdflinkcheck-VERSION.pyz --help` |
 ### Installation via pipx
@@ -53,7 +53,7 @@ Ways to launch the GUI interface:
 The core functionality is accessed via the `analyze` command.
 `pdflinkcheck --help`:
-![Screenshot of the pdflinkcheck CLI Tree Help](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_cli_v1.1.92.png)
+![Screenshot of the pdflinkcheck CLI Tree Help](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_cli_v1.1.97.png)
 See the Help Tree by unlocking the help-tree CLI command, using the DEV_TYPER_HELP_TREE env var.
@@ -63,7 +63,7 @@ DEV_TYPER_HELP_TREE=1 pdflinkcheck help-tree` # bash
 $env:DEV_TYPER_HELP_TREE = "1"; pdflinkcheck help-tree` # PowerShell
 ```
-![Screenshot of the pdflinkcheck CLI Tree Help](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_cli_v1.1.92_tree_help.png)
+![Screenshot of the pdflinkcheck CLI Tree Help](https://raw.githubusercontent.com/City-of-Memphis-Wastewater/pdflinkcheck/main/assets/pdflinkcheck_cli_v1.1.97_tree_help.png)
@@ -84,7 +84,6 @@ $env:DEV_TYPER_HELP_TREE = "1"; pdflinkcheck help-tree` # PowerShell
 |`<PDF_PATH>`|**Required.** The path to the PDF file to analyze.|N/A|
 |`--pdf-library / -p`|Select engine: `pymupdf` or `pypdf`.|`pypdf`|
 |`--export-format / -e`|Export to `JSON`, `TXT`, or `None` to suppress file output.|`JSON`|
-|`--max-links / -m`|Maximum links to display per section. Use `0` for all.|`0`|
 ### `gui` Command Options
@@ -98,9 +97,6 @@ $env:DEV_TYPER_HELP_TREE = "1"; pdflinkcheck help-tree` # PowerShell
 # Analyze a document, show all links, and save the report as JSON and TXT
 pdflinkcheck analyze "TE Maxson WWTF O&M Manual.pdf" --export-format JSON,TXT
-# Analyze a document but keep the print block short, showing only the first 10 links for each type
-pdflinkcheck analyze "TE Maxson WWTF O&M Manual.pdf" --max-links 10
 # Show the GUI for only a moment, like in a build check
 pdflinkcheck gui --auto-close 3000
@@ -112,22 +108,23 @@ pdflinkcheck docs --license --readme
 ## 📦 Library Access (Advanced)
-For developers importing `pdflinkcheck` into other Python projects, the core analysis functions are exposed directly in the root namespace:
+For developers importing `pdflinkcheck` into other Python projects, the core analysis functions are exposed directly in the root namespace. The various `analysis_pdf_*` functions each use a different library to extract the target PDF's internal TOC, external links, and metadata.
-|**Function**|**Description**|
-|---|---|
-|`run_report()`|**(Primary function)** Performs the full analysis, prints to console, and handles file export.|
-|`extract_links_pynupdf()`|Function to retrieve all explicit links (URIs, GoTo, etc.) from a PDF path.|
-|`extract_toc_pymupdf()`|Function to extract the PDF's internal Table of Contents (bookmarks/outline).|
-|`extract_links_pynupdf()`|Function to retrieve all explicit links (URIs, GoTo, etc.) from a PDF path, using the pypdf library.|
-|`extract_toc_pymupdf()`|Function to extract the PDF's internal Table of Contents (bookmarks/outline), using the pypdf library.|
+|**Function**|**Library**|**Description**|
+|---|---|---|
+|`run_report()`|pdflinkcheck | **(Primary function)** Performs the full analysis, prints to console, and handles file export.|
+|`analyze_pdf_pdfium()`| pypdfium2 | Fast, ~10 mb, Permissively licensed |
+|`analyze_pdf_pymupdf()`| PyMuPDF | Fast, ~30 mb, AGPL3+ licensed |
+|`analyze_pdf_pypdf()`| pypdf library | Slow, ~2 mb, Permissively licensed |
 Exanple:
 ```python
-from pdflinkcheck.report import run_report
-from pdflinkcheck.analysis_pymupdf import extract_links_pymupdf, extract_toc_pymupdf                                                                          130 from pdflinkcheck.analysis_pymupdf import extract_links_pynupdf, extract_toc_pymupdf
-from pdflinkcheck.analysis_pypdf import extract_links_pypdf, extract_toc_pypdf
+from pdflinkcheck import ( run_report,
+                          analyze_pdf_pymupdf,
+                          analyze_pdf_pypdf,
+                          analyze_pdf_pdfium,
+                          )
 file = "document1.pdf"
 report_data = run_report(file)
@@ -194,24 +191,24 @@ Termux compatibility is important in the modern age, because Android devices are
 Android is the most common operating system in the Global South.
 We aim to produce stable software that can do the most possible good.
-Now `pdflinkcheck` can run on Termux by using the `pypdf` engine.
+Now `pdflinkcheck` can run on Termux by using the `pypdf` engine and the `pdfium` engine.
 Benefits:
 - `pypdf`-only artifacts, to reduce size to about 6% compared to artifacts that include `PyMuPDF`.
 - Web-stack GUI as an alternative to the Tkinter GUI, which can be run locally on Termux or as a web app.
 ### PDF Library Selection
-At long last, `PyMuPDF` is an optional dependency. All testing comparing `pyp df` and `PyMuPDF` has shown identical validation performance. However `PyMuPDF` is much faster. The benfit of `pypdf` is small size of packages and cross-platform compatibility.
+At long last, `PyMuPDF` is an optional dependency. All testing comparing `pypdf` and `PyMuPDF` has shown identical validation performance. However `PyMuPDF` is much faster. The benfit of `pypdf` is small size of packages and cross-platform compatibility. We have recently added a PDFium option, which circumvents the AGPL3+.
 Expecte that all binaries and artifacts contain PyMuPDF, unlss they are built on Android. The GUI and CLI interfaces both allow selection of the library; if PyMuPDF is selected but is not available, the user will be warned.
 To install the complete version use one of these options:
 ```bash
-pip install "pdflinkcheck[full]"
-pipx install "pdflinkcheck[full]"
-uv tool install "pdflinkcheck[full]"
-uv add "pdflinkcheck[full]"
+pip install "pdflinkcheck[mupdf]"
+pipx install "pdflinkcheck[pdfium]"
+uv tool install "pdflinkcheck[pdfium]"
+uv add "pdflinkcheck[pdfium]"
 ```
 ---
@@ -271,6 +268,7 @@ The source code of pdflinkcheck itself remains licensed under the **MIT License*
 Links:
 - Source code: https://github.com/City-of-Memphis-Wastewater/pdflinkcheck/
 - PyMuPDF source code: https://github.com/pymupdf/PyMuPDF/
+- pypdfium2 source code: https://github.com/pypdfium2-team/pypdfium2
 - pypdf source code: https://github.com/py-pdf/pypdf/
 - AGPLv3 text (FSF): https://www.gnu.org/licenses/agpl-3.0.html
 - MIT License text: https://opensource.org/license/mit

pdflinkcheck/data/pyproject.toml CHANGED Viewed

@@ -1,11 +1,11 @@
 [project]
 name = "pdflinkcheck"
-version = "1.1.94"
+version = "1.2.29"
 description = "A purpose-built PDF link analysis and reporting tool with GUI and CLI."
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.9"
 dependencies = [
-    "pyhabitat>=1.0.53",
+    "pyhabitat>=1.1.5",
     "pypdf>=6.4.2",
     "rich>=14.2.0",
     "typer>=0.20.0",
@@ -17,13 +17,12 @@ license-files = ["LICENSE", "LICENSE-MIT", "LICENSE-AGPL3"]
 classifiers=[
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: 3.14",
-    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
-    "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
     "Intended Audience :: End Users/Desktop",
     "Intended Audience :: Developers", # library and documentation
@@ -54,42 +53,34 @@ Repository = "https://github.com/city-of-memphis-wastewater/pdflinkcheck"
 [project.scripts]
 pdflinkcheck = "pdflinkcheck.cli:app"
 [project.optional-dependencies]
-# This allows users to do: pip install pdflinkcheck[full]
 # If you choose to include PyMuPDF, you must comply with the AGPL3
-full = [
-    #"pymupdf>=1.26.7 ; platform_system == 'Linux' and platform_machine != 'aarch64'" # to avoid on termux
-    "pymupdf>=1.26.7"  # let them try
-]
-#mobilegui = [
-#    "kivy>=2.3.1",
-#    "kivymd",
+mupdf = ["pymupdf>=1.24.0,<2.0.0"] # fails on termux
+pdfium = ["pypdfium2>=5.2.0,<6.0.0"]
+full = ["pymupdf>=1.24.0,<2.0.0", "pypdfium2>=5.2.0,<6.0.0"]
+#rust = [
+#    "pdflinkcheck-rust>=0.1.13"
 #]
 [dependency-groups]
 dev = [
     "build>=1.3.0",
-    "pyinstaller>=6.17.0",
+    "pyinstaller>=6.17.0 ; platform_system == 'Linux' and platform_machine != 'aarch64'", # to avoid on termux
     "shiv>=1.0.8",
     "ruff>=0.7.0 ; platform_system == 'Linux' and platform_machine != 'aarch64'", # to avoid on termux
     "pytest>=8.0.0",
     "pytest-cov>=4.1.0",
 ]
+#[build-system]
+#requires = ["uv_build"]
+#build-backend = "uv_build"
 [build-system]
-requires = ["uv_build"]
-build-backend = "uv_build"
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
 [tool.uv.sources]
 pdflinkcheck = { path = "src/pdflinkcheck" }
-kivymd = { git = "https://github.com/kivymd/KivyMD.git" }
-# https://docs.astral.sh/uv/concepts/preview/#available-preview-features
-#[tool.uv]
-#preview = true
-# uv handles the data path automatically

pdflinkcheck 1.1.94__py3-none-any.whl → 1.2.29__py3-none-any.whl

pdflinkcheck 1.1.94py3-none-any.whl → 1.2.29py3-none-any.whl