PyPI - pdflinkcheck - Versions diffs - 1.1.94__py3-none-any.whl → 1.2.29__py3-none-any.whl - Mend

pdflinkcheck 1.1.94py3-none-any.whl → 1.2.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

pdflinkcheck/__init__.py +88 -18
pdflinkcheck/__main__.py +6 -0
pdflinkcheck/analysis_pdfium.py +131 -0
pdflinkcheck/{analyze_pymupdf.py → analysis_pymupdf.py} +99 -141
pdflinkcheck/{analyze_pypdf.py → analysis_pypdf.py} +51 -39
pdflinkcheck/cli.py +52 -48
pdflinkcheck/data/LICENSE +18 -15
pdflinkcheck/data/README.md +23 -25
pdflinkcheck/data/pyproject.toml +17 -26
pdflinkcheck/datacopy.py +16 -1
pdflinkcheck/dev.py +2 -2
pdflinkcheck/environment.py +14 -2
pdflinkcheck/gui.py +346 -563
pdflinkcheck/helpers.py +88 -0
pdflinkcheck/io.py +24 -6
pdflinkcheck/report.py +598 -97
pdflinkcheck/security.py +189 -0
pdflinkcheck/splash.py +38 -0
pdflinkcheck/stdlib_server.py +7 -21
pdflinkcheck/stdlib_server_alt.py +571 -0
pdflinkcheck/tk_utils.py +188 -0
pdflinkcheck/update_msix_version.py +2 -0
pdflinkcheck/validate.py +104 -170
pdflinkcheck/version_info.py +2 -2
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/METADATA +41 -40
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/RECORD +34 -27
pdflinkcheck-1.2.29.dist-info/WHEEL +5 -0
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/entry_points.txt +0 -1
pdflinkcheck-1.2.29.dist-info/licenses/LICENSE +27 -0
pdflinkcheck-1.2.29.dist-info/top_level.txt +1 -0
pdflinkcheck/analyze_pypdf_v2.py +0 -217
pdflinkcheck-1.1.94.dist-info/WHEEL +0 -4
pdflinkcheck-1.1.94.dist-info/licenses/LICENSE +0 -24
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-AGPL3 +0 -0
{pdflinkcheck-1.1.94.dist-info → pdflinkcheck-1.2.29.dist-info}/licenses/LICENSE-MIT +0 -0

pdflinkcheck/helpers.py ADDED Viewed

@@ -0,0 +1,88 @@
+# src/pdflinkcheck/helpers.py
+from __future__ import annotations
+from pprint import pprint
+from typing import Any
+"""
+Helper functions
+"""
+def debug_head(label: str, data: Any, n: int = 3):
+    """Helper to cleanly print the first N items of a list or dict."""
+    print(f"\n--- [DEBUG: {label}] ---")
+    if isinstance(data, list):
+        pprint(data[:n], indent=2, compact=True, width=100)
+    elif isinstance(data, dict):
+        # Print first N keys
+        head_dict = {k: data[k] for k in list(data.keys())[:n]}
+        pprint(head_dict, indent=2, compact=True, width=100)
+    else:
+        print(data)
+class PageRef:
+    """
+    A simple translator to handle the 0-to-1 index conversion
+    without the 'Double Bump' risk.
+    """
+    def __init__(self, index: int):
+        self.index = index  # The 0-based physical index
+    @property
+    def human(self) -> int:
+        """The 1-based page number for humans."""
+        return self.index + 1
+    @property
+    def machine(self) -> int:
+        """Alias for index. The 0-based page number for machines."""
+        return self.index
+    @classmethod
+    def corrected_down(cls, human_num: int) -> "PageRef":
+        """Explicitly compensates for 1-based data (e.g., PyMuPDF TOC)."""
+        return cls.from_human(human_num)
+    @classmethod
+    def from_pymupdf_total_page_count(cls, total_pages: int) -> "PageRef":
+        """
+        Converts PyMuPDF's doc.page_count into a PageRef
+        representing the final valid machine-facing index.
+        """
+        return cls.from_human(total_pages)
+    @classmethod
+    def from_human(cls, human_num: int) -> "PageRef":
+        """Creates a PageRef from a 1-based human page number (e.g., from TOC)."""
+        return cls(human_num - 1)
+    @classmethod
+    def from_index(cls, physical_index: int) -> "PageRef":
+        """Creates a PageRef from a 0-based physical index (e.g., from links)."""
+        return cls(physical_index)
+    def __int__(self):
+        return self.index
+    def __str__(self):
+        return str(self.human)
+    def __repr__(self):
+        return f"PageRef(index={self.index}, human={self.human})"
+"""
+### Indexing Map: Physical (0) vs. Logical (1)
+| **File**              | **Context**      | **Index Rule**      | **Reasoning**                                                                                          |
+| --------------------- | ---------------- | ------------------- | ------------------------------------------------------------------------------------------------------ |
+| `ffi.py` (Rust bridge)| Data Extraction  | **0-indexing only** | Rust's `pdf-extract` and `lopdf` crates are 0-indexed. Data should stay raw.                           |
+| `analysis_pypdf.py`   | Data Extraction  | **0-indexing only** | `pypdf` is 0-indexed. Your previous `+ 1` hacks have been removed.                                     |
+| `analysis_pymupdf.py` | Data Extraction  | **Mixed**           | **Internal:** 0-indexed. **TOC:** `get_toc()` is natively 1-indexed. Needs normalization.              |
+| `validate.py`         | Logic/Validation | **Mixed**           | **Logic:** Uses `START_INDEX=0` for boundary checks. **Strings:** Formats error messages as 1-indexed. |
+| `report.py`           | Output/Reporting | **Mixed**           | **Data:** Keeps dictionary values at 0. **Display:** Formats CLI tables as 1-indexed.                  |
+| `helpers.py`          | Translation      | **Mixed**           | The `PageRef` class acts as the "Border Control" between 0 and 1.                                      |
+| `__init__.py`         | API Surface      | **0-indexing only** | If exposing a library, users expect 0-indexed lists of pages/links.                                    |
+"""

pdflinkcheck/io.py CHANGED Viewed

@@ -1,11 +1,14 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: MIT
 # src/pdflinkcheck/io.py
+from __future__ import annotations
 import logging
 import json
 import sys
 from pathlib import Path
 from typing import Dict, Any, Union, List, Optional
+from datetime import datetime
+import time
 # --- Configuration ---
@@ -116,12 +119,13 @@ def export_report_json(
     """Exports structured dictionary results to a .json file."""
     base_name = Path(pdf_filename).stem
-    output_path = PDFLINKCHECK_HOME / f"{base_name}_{pdf_library}_report.json"
+    output_path = PDFLINKCHECK_HOME / f"{base_name}_{pdf_library}_{get_unique_unix_time()}_report.json"
+    print("For more details, explore the exported file(s).")
     try:
         with open(output_path, 'w', encoding='utf-8') as f:
             json.dump(report_data, f, indent=4)
-        print(f"\nJSON report exported: {get_friendly_path(output_path)}")
+        print(f"JSON report exported: {get_friendly_path(output_path)}")
         return output_path
     except Exception as e:
         error_logger.error(f"JSON export failed: {e}", exc_info=True)
@@ -133,13 +137,13 @@ def export_report_txt(
     pdf_library: str
 ) -> Path:
     """Exports the formatted string buffer to a .txt file."""
-    #pdf_filename = implement_non_redundant_naming(pdf_filename)
+    #pdf_filename = implement_non_redundant_naming(pdf_filename)
     base_name = Path(pdf_filename).stem
-    output_path = PDFLINKCHECK_HOME / f"{base_name}_{pdf_library}_report.txt"
+    output_path = PDFLINKCHECK_HOME / f"{base_name}_{pdf_library}_{get_unique_unix_time()}_report.txt"
     try:
         output_path.write_text(report_text, encoding='utf-8')
-        print(f"\nTXT report exported: {get_friendly_path(output_path)}")
+        print(f"TXT report exported: {get_friendly_path(output_path)}")
         return output_path
     except Exception as e:
         error_logger.error(f"TXT export failed: {e}", exc_info=True)
@@ -154,6 +158,20 @@ def get_friendly_path(full_path: str) -> str:
     except ValueError:
         return str(p)
+def get_unique_unix_time():
+        """
+        Get the unix time for right now.
+        Purpose: When added to a filename, this ensures a unique filename, to avoid overwrites for otherwise identical filenames.
+        Pros:
+        - cheap, easy, no reason to check for collision
+        Cons:
+        - Longer than YYYYMMDDalpha
+        - not human readable
+        """
+        return int(time.mktime(datetime.now().timetuple()))
 def get_first_pdf_in_cwd() -> Optional[str]:
     """
     Scans the current working directory (CWD) for the first file ending
@@ -196,4 +214,4 @@ def get_first_pdf_in_cwd() -> Optional[str]:
     except Exception as e:
         # Handle potential permissions errors or other issues
         print(f"Error while searching for PDF in CWD: {e}", file=sys.stderr)
-        return None
+        return None

pdflinkcheck 1.1.94__py3-none-any.whl → 1.2.29__py3-none-any.whl

pdflinkcheck 1.1.94py3-none-any.whl → 1.2.29py3-none-any.whl