PyPI - refcheck - Versions diffs - 0.1.0__py3-none-any.whl - Mend

refcheck 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

refcheck/__init__.py +0 -0
refcheck/log_conf.py +25 -0
refcheck/main.py +142 -0
refcheck/parsers.py +104 -0
refcheck/utils.py +91 -0
refcheck/validators.py +89 -0
refcheck-0.1.0.dist-info/LICENSE +21 -0
refcheck-0.1.0.dist-info/METADATA +93 -0
refcheck-0.1.0.dist-info/RECORD +10 -0
refcheck-0.1.0.dist-info/WHEEL +4 -0

refcheck/__init__.py ADDED Viewed

File without changes

refcheck/log_conf.py ADDED Viewed

@@ -0,0 +1,25 @@
+import logging
+def setup_logging(verbose=False):
+    # Get the root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.DEBUG if verbose else logging.INFO)
+    # Clear any existing handlers
+    if root_logger.handlers:
+        for handler in root_logger.handlers:
+            root_logger.removeHandler(handler)
+    # Create a console handler
+    console_handler = logging.StreamHandler()
+    if verbose:
+        console_handler.setLevel(logging.INFO)
+        console_formatter = logging.Formatter("[%(levelname)s]  %(message)s")
+    else:
+        console_handler.setLevel(logging.CRITICAL)
+        console_formatter = logging.Formatter("%(asctime)s - %(message)s")
+    console_handler.setFormatter(console_formatter)
+    root_logger.addHandler(console_handler)

refcheck/main.py ADDED Viewed

@@ -0,0 +1,142 @@
+import os
+import sys
+import logging
+from typing import List, Tuple
+from dataclasses import dataclass
+from refcheck.log_conf import setup_logging
+from refcheck.parsers import parse_markdown_file, init_arg_parser
+from refcheck.validators import is_valid_remote_reference, file_exists, is_valid_markdown_reference
+from refcheck.utils import (
+    get_markdown_files_from_args,
+    print_green_background,
+    print_red_background,
+    print_red,
+    print_green,
+)
+logger = logging.getLogger()
+@dataclass
+class Reference:
+    file: str
+    ref: str
+    line_num: int
+@dataclass
+class BrokenReference(Reference):
+    status: str
+class ReferenceChecker:
+    def __init__(self, no_color: bool):
+        self.no_color = no_color
+        self.broken_references: List[BrokenReference] = []
+    def check_remote_references(self, file: str, remote_refs: List[Tuple[str, int]]):
+        logger.info("Checking remote references...")
+        for url, line_num in remote_refs:
+            logger.info(f"Checking remote reference: {url}")
+            if is_valid_remote_reference(url):
+                status = print_green_background("OK", self.no_color)
+            else:
+                status = print_red_background("BROKEN", self.no_color)
+                self.broken_references.append(BrokenReference(file, url, line_num, status))
+            print(f"{file}:{line_num}: {url} - {status}")
+    def check_local_references(self, file: str, local_refs: List[Tuple[str, int]]):
+        for ref, line_num in local_refs:
+            logger.info(f"Checking local reference: {ref}")
+            if ".md" in ref or "#" in ref:
+                self.check_markdown_reference(file, ref, line_num)
+            else:
+                self.check_asset_reference(file, ref, line_num)
+    def check_markdown_reference(self, file: str, ref: str, line_num: int):
+        if is_valid_markdown_reference(ref, file):
+            status = print_green_background("OK", self.no_color)
+        else:
+            status = print_red_background("BROKEN", self.no_color)
+            self.broken_references.append(BrokenReference(file, ref, line_num, status))
+        print(f"{file}:{line_num}: {ref} - {status}")
+    def check_asset_reference(self, file: str, ref: str, line_num: int):
+        asset_path = os.path.join(os.path.dirname(file), ref)
+        if file_exists(asset_path):
+            status = print_green_background("OK", self.no_color)
+        else:
+            status = print_red_background("BROKEN", self.no_color)
+            self.broken_references.append(BrokenReference(file, ref, line_num, status))
+        print(f"{file}:{line_num}: {ref} - {status}")
+    def print_summary(self):
+        print("\nReference check complete.")
+        print("\n============================| Summary |=============================")
+        if self.broken_references:
+            print(print_red(f"[!] {len(self.broken_references)} broken references found:", self.no_color))
+            self.broken_references = sorted(self.broken_references, key=lambda x: (x.file, x.line_num))
+            for broken_ref in self.broken_references:
+                print(f"{broken_ref.file}:{broken_ref.line_num}: {broken_ref.ref}")
+        else:
+            print(print_green("\U0001F389 No broken references.", self.no_color))
+        print("====================================================================")
+def main() -> bool:
+    parser = init_arg_parser()
+    args = parser.parse_args()
+    # Check if the user has provided any files or directories
+    if not args.paths:
+        parser.print_help()
+        return False
+    setup_logging(verbose=args.verbose)  # Setup logging based on the --verbose flag
+    no_color = args.no_color
+    # Retrieve all markdown files specified by the user
+    markdown_files = get_markdown_files_from_args(args.paths, args.exclude)
+    if not markdown_files:
+        print("[!] No Markdown files specified or found.")
+        return False
+    print(f"[+] {len(markdown_files)} Markdown files to check.")
+    for file in markdown_files:
+        print(f"- {file}")
+    checker = ReferenceChecker(no_color)
+    for file in markdown_files:
+        print(f"\n[+] Checking {file}...")
+        references = parse_markdown_file(file)
+        remote_refs = (
+            references["http_links"] + references["inline_links"] + references["raw_links"] + references["html_links"]
+        )
+        local_refs = references["file_refs"] + references["html_images"]
+        if not remote_refs and not local_refs:
+            print("-> No references found.")
+            continue
+        if args.check_remote:
+            checker.check_remote_references(file, remote_refs)
+        else:
+            logger.warning("Skipping remote reference check. Enable with arg --check-remote.")
+        checker.check_local_references(file, local_refs)
+    checker.print_summary()
+    return not bool(checker.broken_references)
+if __name__ == "__main__":
+    if main():
+        sys.exit(0)
+    else:
+        sys.exit(1)

refcheck/parsers.py ADDED Viewed

@@ -0,0 +1,104 @@
+import re
+import argparse
+from re import Pattern
+# HTTP/HTTPS Links - inline, footnotes, and remote images
+HTTP_LINK_PATTERN = re.compile(r"\[(.*?)\]\((https?://.*?)\)")  # all links in []() and ![]()
+INLINE_LINK_PATTERN = re.compile(r"<(https?://\S+)>")  # <http://example.com>
+RAW_LINK_PATTERN = re.compile(r"(^| )(?:(https?://\S+))")  # all links that are surrounded by nothing or spaces
+HTML_LINK_PATTERN = re.compile(r"<a\s+(?:[^>]*?\s+)?href=([\"\'])(.*?)\1")  # <a href="http://example.com">
+# Local File References - scripts, markdown files, and local images
+FILE_PATTERN = re.compile(r"\[(.*?)\]\((?!http)(.*?)\)")  # all local files in []() and ![]()
+HTML_IMAGE_PATTERN = re.compile(r"<img\s+(?:[^>]*?\s+)?src=([\"\'])(.*?)\1")  # <img src="image.png">
+def parse_markdown_file(file_path: str) -> dict:
+    """Parse a markdown file to extract references."""
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            content = file.read()
+    except FileNotFoundError:
+        print(f"Error: The file {file_path} was not found.")
+        return {}
+    except IOError as e:
+        print(f"Error: An I/O error occurred while reading the file {file_path}: {e}")
+        return {}
+    http_links = _find_matches_with_line_numbers(HTTP_LINK_PATTERN, content, group=2)
+    inline_links = _find_matches_with_line_numbers(INLINE_LINK_PATTERN, content, group=1)
+    raw_links = _find_matches_with_line_numbers(RAW_LINK_PATTERN, content, group=2)
+    html_links = _find_matches_with_line_numbers(HTML_LINK_PATTERN, content, group=2)
+    file_refs = _find_matches_with_line_numbers(FILE_PATTERN, content, group=2)
+    html_images = _find_matches_with_line_numbers(HTML_IMAGE_PATTERN, content, group=2)
+    return {
+        "http_links": http_links,
+        "inline_links": inline_links,
+        "raw_links": raw_links,
+        "html_links": html_links,
+        "file_refs": file_refs,
+        "html_images": html_images,
+    }
+def _find_matches_with_line_numbers(pattern: Pattern[str], text: str, group: int = 0) -> list:
+    """Find regex matches along with their line numbers."""
+    matches_with_line_numbers = []
+    for match in re.finditer(pattern, text):
+        start_pos = match.start(group)
+        line_number = text.count("\n", 0, start_pos) + 1
+        matches_with_line_numbers.append((match.group(group), line_number))
+    return matches_with_line_numbers
+# ============================== ARGUMENT PARSER ===============================
+class CustomFormatter(argparse.HelpFormatter):
+    def _format_action_invocation(self, action):
+        if not action.option_strings:
+            (metavar,) = self._metavar_formatter(action, action.dest)(1)
+            return metavar
+        else:
+            parts = []
+            # if the Optional doesn't take a value, format is:
+            #    -s, --long
+            if action.nargs == 0:
+                parts.extend(action.option_strings)
+            # if the Optional takes a value, format is:
+            #    -s ARGS, --long ARGS
+            # change to
+            #    -s, --long ARGS
+            else:
+                default = action.dest.upper()
+                args_string = self._format_args(action, default)
+                for option_string in action.option_strings:
+                    # parts.append('%s %s' % (option_string, args_string))
+                    parts.append("%s" % option_string)
+                parts[-1] += " %s" % args_string
+            return ", ".join(parts)
+def init_arg_parser():
+    """Setup command line argument parser."""
+    parser = argparse.ArgumentParser(
+        prog="refcheck", usage="refcheck [OPTIONS] [PATH ...]", formatter_class=CustomFormatter
+    )
+    parser.add_argument(
+        "paths",
+        metavar="PATH",
+        type=str,
+        nargs="*",
+        help="Markdown files or directories to check",
+    )
+    parser.add_argument(
+        "-e", "--exclude", metavar="", type=str, nargs="*", default=[], help="Files or directories to exclude"
+    )
+    parser.add_argument(
+        "-cm", "--check-remote", action="store_true", help="Check remote references (HTTP/HTTPS links)"
+    )
+    parser.add_argument("-n", "--no-color", action="store_true", help="Turn off colored output")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
+    return parser

refcheck/utils.py ADDED Viewed

@@ -0,0 +1,91 @@
+import os
+import logging
+logger = logging.getLogger()
+IGNORE_FILE = ".refcheckignore"
+CHECK_IGNORE_DEFAULTS = [
+    ".git",
+    ".vscode",
+    ".idea",
+    "__pycache__",
+    "node_modules",
+    "venv",
+    ".venv",
+    ".pytest_cache",
+]
+def load_exclusion_patterns() -> list:
+    """Read exclusions from the .refcheckignore file."""
+    if not os.path.isfile(IGNORE_FILE):
+        logger.warning(f"Could not find {IGNORE_FILE}. Using default exclusions.")
+        exclusions = CHECK_IGNORE_DEFAULTS
+    else:
+        logger.info(f"Reading exclusions from {IGNORE_FILE}...")
+        with open(IGNORE_FILE, "r", encoding="utf-8") as file:
+            exclusions = [line.strip() for line in file if line.strip()]
+    logger.info(f"Will skip these files and directories: {exclusions}")
+    return exclusions
+def get_markdown_files_from_dir(root_dir: str, exclude: list[str] = []) -> list:
+    """Traverse the directory to get all markdown files."""
+    print(f"[+] Searching for markdown files in {os.path.abspath(root_dir)} ...")
+    exclude_set = set(os.path.normpath(path) for path in exclude)
+    markdown_files = []
+    # Walk through the directory to get all markdown files
+    for subdir, _, files in os.walk(root_dir):
+        subdir_norm = os.path.normpath(subdir)
+        if any(subdir_norm.startswith(exclude_item) for exclude_item in exclude_set):
+            continue  # Skip excluded directories
+        for file in files:
+            file_path = os.path.join(subdir, file)
+            file_path_norm = os.path.normpath(file_path)
+            if file.endswith(".md") and file_path_norm not in exclude_set:
+                markdown_files.append(file_path_norm)
+    return markdown_files
+def get_markdown_files_from_args(paths: list[str], exclude: list[str] = []) -> list:
+    """Retrieve all markdown files specified by the user."""
+    # Read additional exclusions from the ignore file
+    exclude += load_exclusion_patterns()
+    exclude_set = set(os.path.normpath(path) for path in exclude)
+    markdown_files = set()
+    for path in paths:
+        norm_path = os.path.normpath(path)
+        if norm_path in exclude_set:
+            continue
+        if os.path.isdir(norm_path):
+            markdown_files.update(get_markdown_files_from_dir(norm_path, exclude))
+        elif os.path.isfile(norm_path):
+            if norm_path.endswith(".md"):
+                markdown_files.add(norm_path)
+        else:
+            print(f"[!] Warning: {path} is not a valid file or directory.")
+    return list(markdown_files)
+def print_green_background(text: str, no_color: bool = False) -> str:
+    return text if no_color else f"\033[42m{text}\033[0m"
+def print_red_background(text: str, no_color: bool = False) -> str:
+    return text if no_color else f"\033[41m{text}\033[0m"
+def print_red(text: str, no_color: bool = False) -> str:
+    return text if no_color else f"\033[31m{text}\033[0m"
+def print_green(text: str, no_color: bool = False) -> str:
+    return text if no_color else f"\033[32m{text}\033[0m"

refcheck/validators.py ADDED Viewed

@@ -0,0 +1,89 @@
+import os
+import re
+import logging
+import requests
+# Disable verify warnings for HTTPS requests
+requests.packages.urllib3.disable_warnings()  # type: ignore
+logger = logging.getLogger()
+def is_valid_remote_reference(url: str) -> bool:
+    """Check if online references are reachable."""
+    try:
+        response = requests.head(url, timeout=5, verify=False)
+        if response.status_code >= 400:
+            return False
+    except Exception:
+        logger.exception(f"Exception occurred while checking URL: {url}")
+        return False
+    else:
+        return True
+def file_exists(file_path: str) -> bool:
+    """Check if local file exists."""
+    logger.info(f"Checking if file exists: {file_path}")
+    exists = os.path.exists(file_path)
+    if not exists:
+        logger.warning(f"File does not exist: {file_path}")
+    return exists
+def header_exists(file_path: str, header: str) -> bool:
+    """Check if Markdown header exists in the given file."""
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            content = file.read()
+            normalized_header = normalize_header(header)
+            normalized_headers = [normalize_header(h) for h in re.findall(r"^#{1,6}\s+(.*)", content, re.MULTILINE)]
+            if normalized_header in normalized_headers:
+                return True
+    except FileNotFoundError:
+        logger.error(f"File not found: {file_path}")
+    return False
+def normalize_header(header: str) -> str:
+    """Normalize header to match Markdown link format."""
+    return re.sub(r"[^a-zA-Z0-9 -]", "", header.strip().lower().replace(" ", "-"))
+def is_valid_markdown_reference(ref: str, file_path: str) -> bool:
+    """Check if markdown references are reachable.
+    Args:
+        ref: The reference to check, e.g. `file.md#header`, `#header`, `file.md`.
+        file_path: The path of the file where the reference was made in.
+    Returns:
+        bool: True if the reference is valid and reachable, False otherwise.
+    """
+    base_path = os.path.dirname(file_path)  # Directory of the file
+    if ref.startswith("#"):
+        logger.info("Reference is a header in the same Markdown file.")
+        referenced_header = ref[1:]  # Remove leading `#`
+        target_path = file_path
+    elif "#" in ref:
+        logger.info("Reference is a header in another Markdown file.")
+        referenced_file, referenced_header = ref.split("#", 1)
+        target_path = os.path.join(base_path, referenced_file)
+    else:
+        logger.info("Reference is to another Markdown file.")
+        referenced_file = ref
+        referenced_header = None
+        target_path = os.path.join(base_path, referenced_file)
+    # Check if the referenced file exists
+    if not file_exists(target_path):
+        logger.error(f"Referenced file does not exist: {target_path}")
+        return False
+    # Check if the referenced header exists
+    if referenced_header and not header_exists(target_path, referenced_header):
+        logger.error(f"Referenced header does not exist in {target_path}: {referenced_header}")
+        return False
+    return True

refcheck-0.1.0.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 flumi3
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

refcheck-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,93 @@
+Metadata-Version: 2.1
+Name: refcheck
+Version: 0.1.0
+Summary: Tool for validating references in Markdown files.
+Author: Sebastian Flum
+Author-email: sebastian.flum.dev@gmail.com
+Requires-Python: >=3.12,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: requests (>=2.32.3,<3.0.0)
+Description-Content-Type: text/markdown
+# RefCheck
+RefCheck is a simple tool for validating markdown references and highlighting
+broken ones.
+```text
+usage: refcheck [OPTIONS] [PATH ...]
+positional arguments:
+  PATH                  Markdown files or directories to check
+options:
+  -h, --help            show this help message and exit
+  -e, --exclude [ ...]  Files or directories to exclude
+  -cm, --check-remote   Check remote references (HTTP/HTTPS links)
+  -n, --no-color        Turn off colored output
+  -v, --verbose         Enable verbose output
+```
+## Installation
+RefCheck is available on PyPI:
+```bash
+pip install refcheck
+```
+## Examples
+```text
+$ refcheck README.md
+[+] 1 Markdown files to check.
+- README.md
+[+] Checking README.md...
+README.md:3: #introduction - OK
+README.md:5: #installation - OK
+README.md:6: #getting-started - OK
+README.md:24: https://www.github.com - OK
+Reference check complete.
+============================| Summary |=============================
+🎉 No broken references.
+====================================================================
+```
+```text
+$ refcheck .
+[+] Searching for markdown files in /home/flumi3/github/refcheck ...
+[+] 2 Markdown files to check.
+- tests\sample_markdown.md
+- docs\Understanding-Markdown-References.md
+[+] Checking tests\sample_markdown.md...
+tests\sample_markdown.md:39: /img/image.png - BROKEN
+tests\sample_markdown.md:52: https://www.openai.com/logo.png - BROKEN
+[+] Checking docs\Understanding-Markdown-References.md...
+docs\Understanding-Markdown-References.md:42: #local-file-references - OK
+Reference check complete.
+============================| Summary |=============================
+[!] 2 broken references found:
+tests\sample_markdown.md:39: /img/image.png
+tests\sample_markdown.md:52: https://www.openai.com/logo.png
+====================================================================
+```
+## Features
+- Find and check various reference patterns in markdown files
+- Highlight broken references
+- Validate absolute and relative file paths to any file type
+- Support for checking remote references, such as \[Google\]\(https://www.google.com\)
+- User friendly CLI
+- Easy CI pipeline integration

refcheck-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+refcheck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+refcheck/log_conf.py,sha256=UbM4ge0sL39PKxl9Wdk3nV4Xm3H-dXqb8UWrzaOVOz8,818
+refcheck/main.py,sha256=tFcp3YjoZbXN-PmYZYLpxaNIjXfKcfWjWx24kkDCec8,5151
+refcheck/parsers.py,sha256=IOV4k7QOf7WX8hObF29kGgCBpDGP4X2jTxzf7_P8xjs,4455
+refcheck/utils.py,sha256=FZmmXnIVXrF2u_iBVOaEzMZtyzmi3inIcNF4MrogNYM,3127
+refcheck/validators.py,sha256=uPYQKQAyN5qoU6v_gevBs8ShD9nh3jhu03ZiP4l7Od0,3195
+refcheck-0.1.0.dist-info/LICENSE,sha256=FQFAQyto4aY-Grcp1UHDY6NEcithmFh-j9u2GDEyo8I,1084
+refcheck-0.1.0.dist-info/METADATA,sha256=RPEVniyzYDG2kToEv1R7Vgvr6DIsX_7QE68K4LCnteA,2549
+refcheck-0.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+refcheck-0.1.0.dist-info/RECORD,,

refcheck-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.0
+Root-Is-Purelib: true
+Tag: py3-none-any