PyPI - pdflinkcheck - Versions diffs - 1.1.7__py3-none-any.whl → 1.1.47__py3-none-any.whl - Mend

pdflinkcheck 1.1.7py3-none-any.whl → 1.1.47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pdflinkcheck/__init__.py +31 -0
pdflinkcheck/analyze.py +306 -128
pdflinkcheck/cli.py +97 -20
pdflinkcheck/data/LICENSE +680 -0
pdflinkcheck/gui.py +157 -29
pdflinkcheck/io.py +106 -0
pdflinkcheck-1.1.47.dist-info/METADATA +266 -0
pdflinkcheck-1.1.47.dist-info/RECORD +13 -0
{pdflinkcheck-1.1.7.dist-info → pdflinkcheck-1.1.47.dist-info}/entry_points.txt +0 -1
pdflinkcheck-1.1.47.dist-info/licenses/LICENSE +680 -0
pdflinkcheck-1.1.7.dist-info/METADATA +0 -109
pdflinkcheck-1.1.7.dist-info/RECORD +0 -10
{pdflinkcheck-1.1.7.dist-info → pdflinkcheck-1.1.47.dist-info}/WHEEL +0 -0
{pdflinkcheck-1.1.7.dist-info → pdflinkcheck-1.1.47.dist-info}/top_level.txt +0 -0

pdflinkcheck/__init__.py CHANGED Viewed

@@ -0,0 +1,31 @@
+# Library functions
+from pdflinkcheck.analyze import run_analysis, extract_links, extract_toc
+# For the kids. This is what I wanted when learning Python in a mysterious new REPL.
+# Is this Pythonic? No. Oh well. PEP 8, PEP 20.
+import os
+flag = os.environ.get('PDFLINKCHECK_GUI_EASTEREGG', '')
+pdflibkcheck_gui_lib_func_load = str(flag).strip().lower() in ('true', '1', 'yes', 'on')
+if pdflibkcheck_gui_lib_func_load:
+    try:
+        import pyhabitat # pyhabitat is a dependency of this package already
+        if pyhabitat.tkinter_is_available():
+            from pdflinkcheck.gui import start_gui
+    except ImportError:
+        # Optional: log or ignore silently
+        pass
+# Breadcrumbs, for stumbling upon.
+if pdflibkcheck_gui_lib_func_load:
+    __pdflinkcheck_gui_easteregg_enabled__ = True
+else:
+    __pdflinkcheck_gui_easteregg_enabled__ = False
+# Define __all__ such that the library functions are self documenting.
+__all__ = [
+    "run_analysis",
+    "extract_links",
+    "extract_toc",
+    "start_gui" if pdflibkcheck_gui_lib_func_load else None,
+]

pdflinkcheck/analyze.py CHANGED Viewed

@@ -1,23 +1,33 @@
 import sys
 from pathlib import Path
 import logging
-from typing import Dict, Any
+from typing import Dict, Any, Optional
+# ... other imports ...
 # Configure logging to suppress low-level pdfminer messages
 logging.getLogger("fitz").setLevel(logging.ERROR)
 import fitz # PyMuPDF
 from pdflinkcheck.remnants import find_link_remnants
+from pdflinkcheck.io import error_logger, export_report_data, LOG_FILE_PATH
 """
 Inspect target PDF for both URI links and for GoTo links.
 """
 # Helper function: Prioritize 'from'
 def get_link_rect(link_dict):
     """
-    Retrieves the bounding box for the link using the reliable 'from' key.
-    Returns the rect coordinates (tuple of 4 floats) or None.
+    Retrieves the bounding box for the link using the reliable 'from' key
+    provided by PyMuPDF's link dictionary.
+    Args:
+        link_dict: A dictionary representing a single link/annotation
+                   returned by `page.get_links()`.
+    Returns:
+        A tuple of four floats (x0, y0, x1, y1) representing the
+        rectangular coordinates of the link on the page, or None if the
+        bounding box data is missing.
     """
     # 1. Use the 'from' key, which returns a fitz.Rect object or None
     rect_obj = link_dict.get('from')
@@ -30,25 +40,19 @@ def get_link_rect(link_dict):
     # 3. Fallback to None if 'from' is missing
     return None
-def get_pdf_file():
-    example_path = f"/mnt/c/Users/george.bennett/Downloads/TE Maxson WWTF O&M Manual DRAFT - Sections 1-6 - April 2025 (3).pdf"
-    example_path = "TE Maxson WWTF O&M Manual.pdf"
-    print(f"example path = {example_path}")
-    pdf_file = input(f"Paste path to PDF file (or press Enter to accept example): ")
-    if not pdf_file:
-        pdf_file = example_path
-    if not Path(pdf_file).exists:
-        print("File not found!")
-        sys.exit(1)
-    return pdf_file
 def get_anchor_text(page, link_rect):
     """
-    Extracts text content using the link's bounding box.
-    Returns the cleaned text or a placeholder if no text is found.
+    Extracts text content using the link's bounding box coordinates.
+    The bounding box is slightly expanded to ensure full characters are captured.
+    Args:
+        page: The fitz.Page object where the link is located.
+        link_rect: A tuple of four floats (x0, y0, x1, y1) representing the
+                   link's bounding box.
+    Returns:
+        The cleaned, extracted text string, or a placeholder message
+        if no text is found or if an error occurs.
     """
     if not link_rect:
         return "N/A: Missing Rect"
@@ -90,7 +94,15 @@ def get_anchor_text(page, link_rect):
 def analyze_toc_fitz(doc):
     """
-    Extracts the structured Table of Contents (bookmarks/outline) from the PDF.
+    Extracts the structural Table of Contents (PDF Bookmarks/Outline)
+    from the PDF document using PyMuPDF's built-in functionality.
+    Args:
+        doc: The open fitz.Document object.
+    Returns:
+        A list of dictionaries, where each dictionary represents a TOC entry
+        with 'level', 'title', and 'target_page' (1-indexed).
     """
     toc = doc.get_toc()
     toc_data = []
@@ -107,12 +119,68 @@ def analyze_toc_fitz(doc):
 # 2. Updated Main Inspection Function to Include Text Extraction
-def inspect_pdf_hyperlinks_fitz(pdf_path):
-    links_data = []
+#def inspect_pdf_hyperlinks_fitz(pdf_path):
+def extract_toc(pdf_path):
+    """
+    Opens a PDF, iterates through all pages and extracts the structural table of contents (TOC/bookmarks).
+    Args:
+        pdf_path: The file system path (str) to the target PDF document.
+    Returns:
+        A list of dictionaries representing the structural TOC/bookmarks.
+    """
     try:
         doc = fitz.open(pdf_path)
         structural_toc = analyze_toc_fitz(doc)
+    except Exception as e:
+        print(f"An error occurred: {e}", file=sys.stderr)
+    return structural_toc
+def serialize_fitz_object(obj):
+    """Converts a fitz object (Point, Rect, Matrix) to a serializable type."""
+    # Meant to avoid known Point errors like: '[ERROR] An unexpected error occurred during analysis: Report export failed due to an I/O error: Object of type Point is not JSON serializable'
+    if obj is None:
+        return None
+    # 1. Handle fitz.Point (has x, y)
+    if hasattr(obj, 'x') and hasattr(obj, 'y') and not hasattr(obj, 'x0'):
+        return (obj.x, obj.y)
+    # 2. Handle fitz.Rect and fitz.IRect (has x0, y0)
+    if hasattr(obj, 'x0') and hasattr(obj, 'y0'):
+        return (obj.x0, obj.y0, obj.x1, obj.y1)
+    # 3. Handle fitz.Matrix (has a, b, c, d, e, f)
+    if hasattr(obj, 'a') and hasattr(obj, 'b') and hasattr(obj, 'c'):
+        return (obj.a, obj.b, obj.c, obj.d, obj.e, obj.f)
+    # 4. Fallback: If it's still not a primitive type, convert it to a string
+    if not isinstance(obj, (str, int, float, bool, list, tuple, dict)):
+        # Examples: hasattr(value, 'rect') and hasattr(value, 'point'):
+        # This handles Rect and Point objects that may slip through
+        return str(obj)
+    # Otherwise, return the object as is (it's already primitive)
+    return obj
+def extract_links(pdf_path):
+    """
+    Opens a PDF, iterates through all pages and extracts all link annotations.
+    It categorizes the links into External, Internal, or Other actions, and extracts the anchor text.
+    Args:
+        pdf_path: The file system path (str) to the target PDF document.
+    Returns:
+        A list of dictionaries, where each dictionary is a comprehensive
+           representation of an active hyperlink found in the PDF.
+    """
+    links_data = []
+    try:
+        doc = fitz.open(pdf_path)
         for page_num in range(doc.page_count):
             page = doc.load_page(page_num)
@@ -143,13 +211,22 @@ def inspect_pdf_hyperlinks_fitz(pdf_path):
                 link_dict = {
-                    'page': int(page_num) + 1,
+                    'page': int(page_num) + 1, # accurate for link location, add 1
                     'rect': link_rect,
                     'link_text': anchor_text,
                     'xref':xref
                 }
+                # A. Clean Geom. Objects: Use the helper function on 'to' / 'destination'
+                # Use the clean serialize_fitz_object() helper function on all keys that might contain objects
+                destination_view = serialize_fitz_object(link.get('to'))
+                # B. Correct Internal Link Page Numbering (The -1 correction hack)
+                # This will be skipped by URI, which is not expected to have a page key
+                target_page_num_reported = "N/A"
+                if link.get('page') is not None:
+                    target_page_num_reported = int(link.get('page')) # accurate for link target, don't add 1 (weird)
                 if link['kind'] == fitz.LINK_URI:
                     target =  link.get('uri', 'URI (Unknown Target)')
                     link_dict.update({
@@ -159,12 +236,11 @@ def inspect_pdf_hyperlinks_fitz(pdf_path):
                     })
                 elif link['kind'] == fitz.LINK_GOTO:
-                    target_page_num = link.get('page') + 1 # fitz pages are 0-indexed
-                    target = f"Page {target_page_num}"
+                    target = f"Page {target_page_num_reported}"
                     link_dict.update({
                         'type': 'Internal (GoTo/Dest)',
-                        'destination_page': int(link.get('page')) + 1,
-                        'destination_view': link.get('to'),
+                        'destination_page': target_page_num_reported,
+                        'destination_view': destination_view,
                         'target': target
                     })
@@ -172,15 +248,17 @@ def inspect_pdf_hyperlinks_fitz(pdf_path):
                     link_dict.update({
                         'type': 'Remote (GoToR)',
                         'remote_file': link.get('file'),
-                        'destination': link.get('to')
+                        'destination': destination_view
                     })
                 elif link.get('page') is not None and link['kind'] != fitz.LINK_GOTO:
+                    target = f"Page {target_page_num_reported}"
                     link_dict.update({
                         'type': 'Internal (Resolved Action)',
-                        'destination_page': int(link.get('page')) + 1,
-                        'destination_view': link.get('to'),
-                        'source_kind': link.get('kind')
+                        'destination_page': target_page_num_reported,
+                        'destination_view': destination_view,
+                        'source_kind': link.get('kind'),
+                        'target': target
                     })
                 else:
@@ -190,20 +268,32 @@ def inspect_pdf_hyperlinks_fitz(pdf_path):
                         'action_kind': link.get('kind'),
                         'target': target
                     })
+                ## --- General Serialization Cleaner ---
+                #for key, value in link_dict.items():
+                #    if hasattr(value, 'rect') and hasattr(value, 'point'):
+                #        # This handles Rect and Point objects that may slip through
+                #        link_dict[key] = str(value)
+                ## --- End Cleaner ---
                 links_data.append(link_dict)
         doc.close()
     except Exception as e:
         print(f"An error occurred: {e}", file=sys.stderr)
-    return links_data, structural_toc
+    return links_data
 def print_structural_toc(structural_toc):
     """
-    Prints the structural TOC data in a clean, hierarchical, and readable format.
+    Prints the structural TOC data (bookmarks/outline) in a clean,
+    hierarchical, and readable console format.
+    Args:
+        structural_toc: A list of TOC dictionaries returned by `analyze_toc_fitz`.
     """
-    print("\n## 📚 Structural Table of Contents (PDF Bookmarks/Outline)")
-    print("-" * 50)
+    print("\n" + "=" * 70)
+    print("## Structural Table of Contents (PDF Bookmarks/Outline)")
+    print("=" * 70)
     if not structural_toc:
         print("No structural TOC (bookmarks/outline) found.")
         return
@@ -220,108 +310,196 @@ def print_structural_toc(structural_toc):
         page_str = str(item['target_page']).rjust(page_width)
         print(f"{indent}{item['title']} . . . page {page_str}")
-    print("-" * 50)
+    print("-" * 70)
-def run_analysis(pdf_path: str, check_remnants: bool, max_links: int) -> Dict[str, Any]:
-    """
-    Core PDF analysis logic using PyMuPDF. Extracts links, remnants, and TOC.
-    The printing is done inside this function.
-    max_links: If <= 0, all links will be displayed.
+def get_first_pdf_in_cwd() -> Optional[str]:
     """
-    print(f"Running PyMuPDF analysis on {Path(pdf_path).name}...")
+    Scans the current working directory (CWD) for the first file ending
+    with a '.pdf' extension (case-insensitive).
-    # 1. Extract all active links and TOC
-    extracted_links, structural_toc = inspect_pdf_hyperlinks_fitz(pdf_path)
-    toc_entry_count = len(structural_toc)
+    This is intended as a convenience function for running the tool
+    without explicitly specifying a path.
+    Returns:
+        The absolute path (as a string) to the first PDF file found,
+        or None if no PDF files are present in the CWD.
+    """
+    # 1. Get the current working directory (CWD)
+    cwd = Path.cwd()
-    # 2. Find link remnants
-    remnants = []
-    if check_remnants:
-        remnants = find_link_remnants(pdf_path, extracted_links) # Pass active links to exclude them
-    if not extracted_links and not remnants and not structural_toc:
-         print(f"\nNo hyperlinks, remnants, or structural TOC found in {Path(pdf_path).name}.")
-         return {}
-    # 3. Separate the lists based on the 'type' key
-    uri_links = [link for link in extracted_links if link['type'] == 'External (URI)']
-    goto_links = [link for link in extracted_links if link['type'] == 'Internal (GoTo/Dest)']
-    resolved_action_links = [link for link in extracted_links if link['type'] == 'Internal (Resolved Action)']
-    other_links = [link for link in extracted_links if link['type'] not in ['External (URI)', 'Internal (GoTo/Dest)', 'Internal (Resolved Action)']]
-    total_internal_links = len(goto_links) + len(resolved_action_links)
+    # 2. Use Path.glob to find files matching the pattern.
+    #    We use '**/*.pdf' to also search nested directories if desired,
+    #    but typically for a single PDF in CWD, '*.pdf' is enough.
+    #    Let's stick to files directly in the CWD for simplicity.
-    # --- ANALYSIS SUMMARY (Using your print logic) ---
-    print(f"\n--- Link Analysis Results for {Path(pdf_path).name} ---")
-    print(f"Total active links: {len(extracted_links)} (External: {len(uri_links)}, Internal Jumps: {total_internal_links}, Other: {len(other_links)})")
-    print(f"Total **structural TOC entries (bookmarks)** found: {toc_entry_count}")
-    print(f"Total **potential missing links** found: {len(remnants)}")
-    print("-" * 50)
-    limit = max_links if max_links > 0 else None
-    uri_and_other = uri_links + other_links
+    # We use list comprehension with next() for efficiency, or a simple loop.
+    # Using Path.glob('*.pdf') to search the CWD for files ending in .pdf
+    # We make it case-insensitive by checking both '*.pdf' and '*.PDF'
-    # --- Section 1: ACTIVE URI LINKS ---
-    print(f"\n## 🔗 Active URI Links (External & Other) - {len(uri_and_other)} found")
-    print("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Target URI/Action"))
-    print("-" * 75)
+    # Note: On Unix systems, glob is case-sensitive by default.
+    # The most cross-platform safe way is to iterate and check the suffix.
-    if uri_and_other:
-        for i, link in enumerate(uri_and_other[:limit], 1):
-            target = link.get('url') or link.get('remote_file') or link.get('target')
-            link_text = link.get('link_text', 'N/A')
-            print("{:<5} | {:<5} | {:<40} | {}".format(i, link['page'], link_text[:40], target))
-        if limit is not None and len(uri_and_other) > limit:
-            print(f"... and {len(uri_and_other) - limit} more links (use --max-links to see all or --max-links 0 to show all).")
-    else:
-        print("  No external or 'Other' links found.")
-    # --- Section 2: ACTIVE INTERNAL JUMPS ---
-    print(f"\n## 🖱️ Active Internal Jumps (GoTo & Resolved Actions) - {total_internal_links} found")
-    print("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Jumps To Page"))
-    print("-" * 75)
+    try:
+        # Check for files in the current directory only
+        # Iterating over the generator stops as soon as the first match is found.
+        first_pdf_path = next(
+            p.resolve() for p in cwd.iterdir()
+            if p.is_file() and p.suffix.lower() == '.pdf'
+        )
+        return str(first_pdf_path)
+    except StopIteration:
+        # If the generator runs out of items, no PDF was found
+        return None
+    except Exception as e:
+        # Handle potential permissions errors or other issues
+        print(f"Error while searching for PDF in CWD: {e}", file=sys.stderr)
+        return None
+def run_analysis(pdf_path: str = None, check_remnants: bool = True, max_links: int = 0, export_format: Optional[str] = "JSON") -> Dict[str, Any]:
+    """
+    Core high-level PDF link analysis logic.
-    all_internal = goto_links + resolved_action_links
-    if total_internal_links > 0:
-        for i, link in enumerate(all_internal[:limit], 1):
-            link_text = link.get('link_text', 'N/A')
-            print("{:<5} | {:<5} | {:<40} | {}".format(i, link['page'], link_text[:40], link['destination_page']))
-        if limit is not None and len(all_internal) > limit:
-             print(f"... and {len(all_internal) - limit} more links (use --max-links to see all or --max-links 0 to show all).")
-    else:
-        print("  No internal GoTo or Resolved Action links found.")
+    This function orchestrates the extraction of active links and TOC
+    using PyMuPDF, finds link remnants (plain text URLs/emails), and
+    prints a comprehensive, user-friendly report to the console.
+    Args:
+        pdf_path: The file system path (str) to the target PDF document.
+        check_remnants: Boolean flag to enable/disable scanning for plain text
+                        links that are not active hyperlinks.
+        max_links: Maximum number of links/remnants to display in each console
+                   section. If <= 0, all links will be displayed.
+    Returns:
+        A dictionary containing the structured results of the analysis:
+        'external_links', 'internal_links', 'remnants', and 'toc'.
+    """
+    if pdf_path is None:
+        pdf_path = get_first_pdf_in_cwd()
+    if pdf_path is None:
+        print("pdf_path is None")
+        print("Tip: Drop a PDF in the current folder or pass in a path arg.")
+        return
+    try:
+        print(f"Running PyMuPDF analysis on {Path(pdf_path).name}...")
+        # 1. Extract all active links and TOC
+        extracted_links = extract_links(pdf_path)
+        structural_toc = extract_toc(pdf_path)
+        toc_entry_count = len(structural_toc)
-    # --- Section 3: REMNANTS ---
-    print("\n" + "=" * 70)
-    print(f"## ⚠️ Link Remnants (Potential Missing Links to Fix) - {len(remnants)} found")
-    print("=" * 70)
-    if remnants:
-        print("{:<5} | {:<5} | {:<15} | {}".format("Idx", "Page", "Remnant Type", "Text Found (Needs Hyperlink)"))
-        print("-" * 75)
-        for i, remnant in enumerate(remnants[:max_links], 1):
-            print("{:<5} | {:<5} | {:<15} | {}".format(i, remnant['page'], remnant['type'], remnant['text']))
-        if len(remnants) > max_links:
-             print(f"... and {len(remnants) - max_links} more remnants (use --max-links to see all).")
-    else:
-        print("  No URI or Email remnants found that are not already active links.")
+        # 2. Find link remnants
+        remnants = []
+        if check_remnants:
+            remnants = find_link_remnants(pdf_path, extracted_links) # Pass active links to exclude them
+        if not extracted_links and not remnants and not structural_toc:
+            print(f"\nNo hyperlinks, remnants, or structural TOC found in {Path(pdf_path).name}.")
+            return {}
+        # 3. Separate the lists based on the 'type' key
+        uri_links = [link for link in extracted_links if link['type'] == 'External (URI)']
+        goto_links = [link for link in extracted_links if link['type'] == 'Internal (GoTo/Dest)']
+        resolved_action_links = [link for link in extracted_links if link['type'] == 'Internal (Resolved Action)']
+        other_links = [link for link in extracted_links if link['type'] not in ['External (URI)', 'Internal (GoTo/Dest)', 'Internal (Resolved Action)']]
+        total_internal_links = len(goto_links) + len(resolved_action_links)
-    # --- Section 4: TOC ---
-    print_structural_toc(structural_toc)
-    # Return the collected data for potential future JSON/other output
-    return {
-        "external_links": uri_links,
-        "internal_links": all_internal,
-        "remnants": remnants,
-        "toc": structural_toc
-    }
+        # --- ANALYSIS SUMMARY (Using your print logic) ---
+        print("\n" + "✪" * 70)
+        print(f"--- Link Analysis Results for {Path(pdf_path).name} ---")
+        print(f"Total active links: {len(extracted_links)} (External: {len(uri_links)}, Internal Jumps: {total_internal_links}, Other: {len(other_links)})")
+        print(f"Total **structural TOC entries (bookmarks)** found: {toc_entry_count}")
+        print(f"Total **potential missing links** found: {len(remnants)}")
+        print("✪" * 70)
+        limit = max_links if max_links > 0 else None
+        uri_and_other = uri_links + other_links
+        # --- Section 1: ACTIVE URI LINKS ---
+        print("\n" + "=" * 70)
+        print(f"## Active URI Links (External & Other) - {len(uri_and_other)} found")
+        print("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Target URI/Action"))
+        print("=" * 70)
+        if uri_and_other:
+            for i, link in enumerate(uri_and_other[:limit], 1):
+                target = link.get('url') or link.get('remote_file') or link.get('target')
+                link_text = link.get('link_text', 'N/A')
+                print("{:<5} | {:<5} | {:<40} | {}".format(i, link['page'], link_text[:40], target))
+            if limit is not None and len(uri_and_other) > limit:
+                print(f"... and {len(uri_and_other) - limit} more links (use --max-links to see all or --max-links 0 to show all).")
+        else:
+            print(" No external or 'Other' links found.")
+        # --- Section 2: ACTIVE INTERNAL JUMPS ---
+        print("\n" + "=" * 70)
+        print(f"## Active Internal Jumps (GoTo & Resolved Actions) - {total_internal_links} found")
+        print("=" * 70)
+        print("{:<5} | {:<5} | {:<40} | {}".format("Idx", "Page", "Anchor Text", "Jumps To Page"))
+        print("-" * 70)
+        all_internal = goto_links + resolved_action_links
+        if total_internal_links > 0:
+            for i, link in enumerate(all_internal[:limit], 1):
+                link_text = link.get('link_text', 'N/A')
+                print("{:<5} | {:<5} | {:<40} | {}".format(i, link['page'], link_text[:40], link['destination_page']))
+            if limit is not None and len(all_internal) > limit:
+                print(f"... and {len(all_internal) - limit} more links (use --max-links to see all or --max-links 0 to show all).")
+        else:
+            print(" No internal GoTo or Resolved Action links found.")
+        # --- Section 3: REMNANTS ---
+        print("\n" + "=" * 70)
+        print(f"## ⚠️ Link Remnants (Potential Missing Links to Fix) - {len(remnants)} found")
+        print("=" * 70)
+        if remnants:
+            print("{:<5} | {:<5} | {:<15} | {}".format("Idx", "Page", "Remnant Type", "Text Found (Needs Hyperlink)"))
+            print("-" * 70)
+            for i, remnant in enumerate(remnants[:limit], 1):
+                print("{:<5} | {:<5} | {:<15} | {}".format(i, remnant['page'], remnant['type'], remnant['text']))
+            if max_links!=0 and len(remnants) > max_links:
+                print(f"... and {len(remnants) - max_links} more remnants (use --max-links to see all).")
+        else:
+            print(" No URI or Email remnants found that are not already active links.")
+        # --- Section 4: TOC ---
+        print_structural_toc(structural_toc)
+        # Return the collected data for potential future JSON/other output
+        final_report_data =  {
+            "external_links": uri_links,
+            "internal_links": all_internal,
+            "remnants": remnants,
+            "toc": structural_toc
+        }
+        # 5. Export Report
+        if export_format:
+            # Assuming export_to will hold the output format string (e.g., "JSON")
+            export_report_data(final_report_data, Path(pdf_path).name, export_format)
+        return final_report_data
+    except Exception as e:
+        # Log the critical failure
+        error_logger.error(f"Critical failure during run_analysis for {pdf_path}: {e}", exc_info=True)
+        print(f"FATAL: Analysis failed. Check logs at {LOG_FILE_PATH}", file=sys.stderr)
+        raise # Allow the exception to propagate or handle gracefully
 def call_stable():
+    """
+    Placeholder function for command-line execution (e.g., in __main__).
+    Note: This requires defining PROJECT_NAME, CLI_MAIN_FILE, etc., or
+    passing them as arguments to run_analysis.
+    """
     print("Begin analysis...")
     run_analysis()
     print("Analysis complete.")

pdflinkcheck 1.1.7__py3-none-any.whl → 1.1.47__py3-none-any.whl

pdflinkcheck 1.1.7py3-none-any.whl → 1.1.47py3-none-any.whl