refcheck 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
refcheck-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 flumi3
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,93 @@
1
+ Metadata-Version: 2.1
2
+ Name: refcheck
3
+ Version: 0.1.0
4
+ Summary: Tool for validating references in Markdown files.
5
+ Author: Sebastian Flum
6
+ Author-email: sebastian.flum.dev@gmail.com
7
+ Requires-Python: >=3.12,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
11
+ Description-Content-Type: text/markdown
12
+
13
+ # RefCheck
14
+
15
+ RefCheck is a simple tool for validating markdown references and highlighting
16
+ broken ones.
17
+
18
+ ```text
19
+ usage: refcheck [OPTIONS] [PATH ...]
20
+
21
+ positional arguments:
22
+ PATH Markdown files or directories to check
23
+
24
+ options:
25
+ -h, --help show this help message and exit
26
+ -e, --exclude [ ...] Files or directories to exclude
27
+ -cm, --check-remote Check remote references (HTTP/HTTPS links)
28
+ -n, --no-color Turn off colored output
29
+ -v, --verbose Enable verbose output
30
+ ```
31
+
32
+ ## Installation
33
+
34
+ RefCheck is available on PyPI:
35
+
36
+ ```bash
37
+ pip install refcheck
38
+ ```
39
+
40
+ ## Examples
41
+
42
+ ```text
43
+ $ refcheck README.md
44
+
45
+ [+] 1 Markdown files to check.
46
+ - README.md
47
+
48
+ [+] Checking README.md...
49
+ README.md:3: #introduction - OK
50
+ README.md:5: #installation - OK
51
+ README.md:6: #getting-started - OK
52
+ README.md:24: https://www.github.com - OK
53
+
54
+ Reference check complete.
55
+
56
+ ============================| Summary |=============================
57
+ 🎉 No broken references.
58
+ ====================================================================
59
+ ```
60
+
61
+ ```text
62
+ $ refcheck .
63
+
64
+ [+] Searching for markdown files in /home/flumi3/github/refcheck ...
65
+ [+] 2 Markdown files to check.
66
+ - tests\sample_markdown.md
67
+ - docs\Understanding-Markdown-References.md
68
+
69
+ [+] Checking tests\sample_markdown.md...
70
+ tests\sample_markdown.md:39: /img/image.png - BROKEN
71
+ tests\sample_markdown.md:52: https://www.openai.com/logo.png - BROKEN
72
+
73
+ [+] Checking docs\Understanding-Markdown-References.md...
74
+ docs\Understanding-Markdown-References.md:42: #local-file-references - OK
75
+
76
+ Reference check complete.
77
+
78
+ ============================| Summary |=============================
79
+ [!] 2 broken references found:
80
+ tests\sample_markdown.md:39: /img/image.png
81
+ tests\sample_markdown.md:52: https://www.openai.com/logo.png
82
+ ====================================================================
83
+ ```
84
+
85
+ ## Features
86
+
87
+ - Find and check various reference patterns in markdown files
88
+ - Highlight broken references
89
+ - Validate absolute and relative file paths to any file type
90
+ - Support for checking remote references, such as \[Google\]\(https://www.google.com\)
91
+ - User friendly CLI
92
+ - Easy CI pipeline integration
93
+
@@ -0,0 +1,80 @@
1
+ # RefCheck
2
+
3
+ RefCheck is a simple tool for validating markdown references and highlighting
4
+ broken ones.
5
+
6
+ ```text
7
+ usage: refcheck [OPTIONS] [PATH ...]
8
+
9
+ positional arguments:
10
+ PATH Markdown files or directories to check
11
+
12
+ options:
13
+ -h, --help show this help message and exit
14
+ -e, --exclude [ ...] Files or directories to exclude
15
+ -cm, --check-remote Check remote references (HTTP/HTTPS links)
16
+ -n, --no-color Turn off colored output
17
+ -v, --verbose Enable verbose output
18
+ ```
19
+
20
+ ## Installation
21
+
22
+ RefCheck is available on PyPI:
23
+
24
+ ```bash
25
+ pip install refcheck
26
+ ```
27
+
28
+ ## Examples
29
+
30
+ ```text
31
+ $ refcheck README.md
32
+
33
+ [+] 1 Markdown files to check.
34
+ - README.md
35
+
36
+ [+] Checking README.md...
37
+ README.md:3: #introduction - OK
38
+ README.md:5: #installation - OK
39
+ README.md:6: #getting-started - OK
40
+ README.md:24: https://www.github.com - OK
41
+
42
+ Reference check complete.
43
+
44
+ ============================| Summary |=============================
45
+ 🎉 No broken references.
46
+ ====================================================================
47
+ ```
48
+
49
+ ```text
50
+ $ refcheck .
51
+
52
+ [+] Searching for markdown files in /home/flumi3/github/refcheck ...
53
+ [+] 2 Markdown files to check.
54
+ - tests\sample_markdown.md
55
+ - docs\Understanding-Markdown-References.md
56
+
57
+ [+] Checking tests\sample_markdown.md...
58
+ tests\sample_markdown.md:39: /img/image.png - BROKEN
59
+ tests\sample_markdown.md:52: https://www.openai.com/logo.png - BROKEN
60
+
61
+ [+] Checking docs\Understanding-Markdown-References.md...
62
+ docs\Understanding-Markdown-References.md:42: #local-file-references - OK
63
+
64
+ Reference check complete.
65
+
66
+ ============================| Summary |=============================
67
+ [!] 2 broken references found:
68
+ tests\sample_markdown.md:39: /img/image.png
69
+ tests\sample_markdown.md:52: https://www.openai.com/logo.png
70
+ ====================================================================
71
+ ```
72
+
73
+ ## Features
74
+
75
+ - Find and check various reference patterns in markdown files
76
+ - Highlight broken references
77
+ - Validate absolute and relative file paths to any file type
78
+ - Support for checking remote references, such as \[Google\]\(https://www.google.com\)
79
+ - User friendly CLI
80
+ - Easy CI pipeline integration
@@ -0,0 +1,17 @@
1
+ [tool.poetry]
2
+ name = "refcheck"
3
+ version = "0.1.0"
4
+ description = "Tool for validating references in Markdown files."
5
+ authors = ["Sebastian Flum <sebastian.flum.dev@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ requests = "^2.32.3"
11
+
12
+ [tool.poetry.group.dev-dependencies.dependencies]
13
+ pytest = "^8.3.3"
14
+
15
+ [build-system]
16
+ requires = ["poetry-core"]
17
+ build-backend = "poetry.core.masonry.api"
File without changes
@@ -0,0 +1,25 @@
1
+ import logging
2
+
3
+
4
+ def setup_logging(verbose=False):
5
+ # Get the root logger
6
+ root_logger = logging.getLogger()
7
+ root_logger.setLevel(logging.DEBUG if verbose else logging.INFO)
8
+
9
+ # Clear any existing handlers
10
+ if root_logger.handlers:
11
+ for handler in root_logger.handlers:
12
+ root_logger.removeHandler(handler)
13
+
14
+ # Create a console handler
15
+ console_handler = logging.StreamHandler()
16
+
17
+ if verbose:
18
+ console_handler.setLevel(logging.INFO)
19
+ console_formatter = logging.Formatter("[%(levelname)s] %(message)s")
20
+ else:
21
+ console_handler.setLevel(logging.CRITICAL)
22
+ console_formatter = logging.Formatter("%(asctime)s - %(message)s")
23
+
24
+ console_handler.setFormatter(console_formatter)
25
+ root_logger.addHandler(console_handler)
@@ -0,0 +1,142 @@
1
+ import os
2
+ import sys
3
+ import logging
4
+ from typing import List, Tuple
5
+ from dataclasses import dataclass
6
+
7
+ from refcheck.log_conf import setup_logging
8
+ from refcheck.parsers import parse_markdown_file, init_arg_parser
9
+ from refcheck.validators import is_valid_remote_reference, file_exists, is_valid_markdown_reference
10
+ from refcheck.utils import (
11
+ get_markdown_files_from_args,
12
+ print_green_background,
13
+ print_red_background,
14
+ print_red,
15
+ print_green,
16
+ )
17
+
18
+ logger = logging.getLogger()
19
+
20
+
21
+ @dataclass
22
+ class Reference:
23
+ file: str
24
+ ref: str
25
+ line_num: int
26
+
27
+
28
+ @dataclass
29
+ class BrokenReference(Reference):
30
+ status: str
31
+
32
+
33
+ class ReferenceChecker:
34
+ def __init__(self, no_color: bool):
35
+ self.no_color = no_color
36
+ self.broken_references: List[BrokenReference] = []
37
+
38
+ def check_remote_references(self, file: str, remote_refs: List[Tuple[str, int]]):
39
+ logger.info("Checking remote references...")
40
+ for url, line_num in remote_refs:
41
+ logger.info(f"Checking remote reference: {url}")
42
+ if is_valid_remote_reference(url):
43
+ status = print_green_background("OK", self.no_color)
44
+ else:
45
+ status = print_red_background("BROKEN", self.no_color)
46
+ self.broken_references.append(BrokenReference(file, url, line_num, status))
47
+ print(f"{file}:{line_num}: {url} - {status}")
48
+
49
+ def check_local_references(self, file: str, local_refs: List[Tuple[str, int]]):
50
+ for ref, line_num in local_refs:
51
+ logger.info(f"Checking local reference: {ref}")
52
+ if ".md" in ref or "#" in ref:
53
+ self.check_markdown_reference(file, ref, line_num)
54
+ else:
55
+ self.check_asset_reference(file, ref, line_num)
56
+
57
+ def check_markdown_reference(self, file: str, ref: str, line_num: int):
58
+ if is_valid_markdown_reference(ref, file):
59
+ status = print_green_background("OK", self.no_color)
60
+ else:
61
+ status = print_red_background("BROKEN", self.no_color)
62
+ self.broken_references.append(BrokenReference(file, ref, line_num, status))
63
+ print(f"{file}:{line_num}: {ref} - {status}")
64
+
65
+ def check_asset_reference(self, file: str, ref: str, line_num: int):
66
+ asset_path = os.path.join(os.path.dirname(file), ref)
67
+ if file_exists(asset_path):
68
+ status = print_green_background("OK", self.no_color)
69
+ else:
70
+ status = print_red_background("BROKEN", self.no_color)
71
+ self.broken_references.append(BrokenReference(file, ref, line_num, status))
72
+ print(f"{file}:{line_num}: {ref} - {status}")
73
+
74
+ def print_summary(self):
75
+ print("\nReference check complete.")
76
+ print("\n============================| Summary |=============================")
77
+
78
+ if self.broken_references:
79
+ print(print_red(f"[!] {len(self.broken_references)} broken references found:", self.no_color))
80
+ self.broken_references = sorted(self.broken_references, key=lambda x: (x.file, x.line_num))
81
+
82
+ for broken_ref in self.broken_references:
83
+ print(f"{broken_ref.file}:{broken_ref.line_num}: {broken_ref.ref}")
84
+ else:
85
+ print(print_green("\U0001F389 No broken references.", self.no_color))
86
+
87
+ print("====================================================================")
88
+
89
+
90
+ def main() -> bool:
91
+ parser = init_arg_parser()
92
+ args = parser.parse_args()
93
+
94
+ # Check if the user has provided any files or directories
95
+ if not args.paths:
96
+ parser.print_help()
97
+ return False
98
+
99
+ setup_logging(verbose=args.verbose) # Setup logging based on the --verbose flag
100
+ no_color = args.no_color
101
+
102
+ # Retrieve all markdown files specified by the user
103
+ markdown_files = get_markdown_files_from_args(args.paths, args.exclude)
104
+ if not markdown_files:
105
+ print("[!] No Markdown files specified or found.")
106
+ return False
107
+
108
+ print(f"[+] {len(markdown_files)} Markdown files to check.")
109
+ for file in markdown_files:
110
+ print(f"- {file}")
111
+
112
+ checker = ReferenceChecker(no_color)
113
+
114
+ for file in markdown_files:
115
+ print(f"\n[+] Checking {file}...")
116
+ references = parse_markdown_file(file)
117
+
118
+ remote_refs = (
119
+ references["http_links"] + references["inline_links"] + references["raw_links"] + references["html_links"]
120
+ )
121
+ local_refs = references["file_refs"] + references["html_images"]
122
+
123
+ if not remote_refs and not local_refs:
124
+ print("-> No references found.")
125
+ continue
126
+
127
+ if args.check_remote:
128
+ checker.check_remote_references(file, remote_refs)
129
+ else:
130
+ logger.warning("Skipping remote reference check. Enable with arg --check-remote.")
131
+
132
+ checker.check_local_references(file, local_refs)
133
+
134
+ checker.print_summary()
135
+ return not bool(checker.broken_references)
136
+
137
+
138
+ if __name__ == "__main__":
139
+ if main():
140
+ sys.exit(0)
141
+ else:
142
+ sys.exit(1)
@@ -0,0 +1,104 @@
1
+ import re
2
+ import argparse
3
+ from re import Pattern
4
+
5
+ # HTTP/HTTPS Links - inline, footnotes, and remote images
6
+ HTTP_LINK_PATTERN = re.compile(r"\[(.*?)\]\((https?://.*?)\)") # all links in []() and ![]()
7
+ INLINE_LINK_PATTERN = re.compile(r"<(https?://\S+)>") # <http://example.com>
8
+ RAW_LINK_PATTERN = re.compile(r"(^| )(?:(https?://\S+))") # all links that are surrounded by nothing or spaces
9
+ HTML_LINK_PATTERN = re.compile(r"<a\s+(?:[^>]*?\s+)?href=([\"\'])(.*?)\1") # <a href="http://example.com">
10
+
11
+ # Local File References - scripts, markdown files, and local images
12
+ FILE_PATTERN = re.compile(r"\[(.*?)\]\((?!http)(.*?)\)") # all local files in []() and ![]()
13
+ HTML_IMAGE_PATTERN = re.compile(r"<img\s+(?:[^>]*?\s+)?src=([\"\'])(.*?)\1") # <img src="image.png">
14
+
15
+
16
+ def parse_markdown_file(file_path: str) -> dict:
17
+ """Parse a markdown file to extract references."""
18
+ try:
19
+ with open(file_path, "r", encoding="utf-8") as file:
20
+ content = file.read()
21
+ except FileNotFoundError:
22
+ print(f"Error: The file {file_path} was not found.")
23
+ return {}
24
+ except IOError as e:
25
+ print(f"Error: An I/O error occurred while reading the file {file_path}: {e}")
26
+ return {}
27
+
28
+ http_links = _find_matches_with_line_numbers(HTTP_LINK_PATTERN, content, group=2)
29
+ inline_links = _find_matches_with_line_numbers(INLINE_LINK_PATTERN, content, group=1)
30
+ raw_links = _find_matches_with_line_numbers(RAW_LINK_PATTERN, content, group=2)
31
+ html_links = _find_matches_with_line_numbers(HTML_LINK_PATTERN, content, group=2)
32
+ file_refs = _find_matches_with_line_numbers(FILE_PATTERN, content, group=2)
33
+ html_images = _find_matches_with_line_numbers(HTML_IMAGE_PATTERN, content, group=2)
34
+
35
+ return {
36
+ "http_links": http_links,
37
+ "inline_links": inline_links,
38
+ "raw_links": raw_links,
39
+ "html_links": html_links,
40
+ "file_refs": file_refs,
41
+ "html_images": html_images,
42
+ }
43
+
44
+
45
+ def _find_matches_with_line_numbers(pattern: Pattern[str], text: str, group: int = 0) -> list:
46
+ """Find regex matches along with their line numbers."""
47
+ matches_with_line_numbers = []
48
+ for match in re.finditer(pattern, text):
49
+ start_pos = match.start(group)
50
+ line_number = text.count("\n", 0, start_pos) + 1
51
+ matches_with_line_numbers.append((match.group(group), line_number))
52
+ return matches_with_line_numbers
53
+
54
+
55
+ # ============================== ARGUMENT PARSER ===============================
56
+
57
+
58
+ class CustomFormatter(argparse.HelpFormatter):
59
+ def _format_action_invocation(self, action):
60
+ if not action.option_strings:
61
+ (metavar,) = self._metavar_formatter(action, action.dest)(1)
62
+ return metavar
63
+ else:
64
+ parts = []
65
+ # if the Optional doesn't take a value, format is:
66
+ # -s, --long
67
+ if action.nargs == 0:
68
+ parts.extend(action.option_strings)
69
+
70
+ # if the Optional takes a value, format is:
71
+ # -s ARGS, --long ARGS
72
+ # change to
73
+ # -s, --long ARGS
74
+ else:
75
+ default = action.dest.upper()
76
+ args_string = self._format_args(action, default)
77
+ for option_string in action.option_strings:
78
+ # parts.append('%s %s' % (option_string, args_string))
79
+ parts.append("%s" % option_string)
80
+ parts[-1] += " %s" % args_string
81
+ return ", ".join(parts)
82
+
83
+
84
+ def init_arg_parser():
85
+ """Setup command line argument parser."""
86
+ parser = argparse.ArgumentParser(
87
+ prog="refcheck", usage="refcheck [OPTIONS] [PATH ...]", formatter_class=CustomFormatter
88
+ )
89
+ parser.add_argument(
90
+ "paths",
91
+ metavar="PATH",
92
+ type=str,
93
+ nargs="*",
94
+ help="Markdown files or directories to check",
95
+ )
96
+ parser.add_argument(
97
+ "-e", "--exclude", metavar="", type=str, nargs="*", default=[], help="Files or directories to exclude"
98
+ )
99
+ parser.add_argument(
100
+ "-cm", "--check-remote", action="store_true", help="Check remote references (HTTP/HTTPS links)"
101
+ )
102
+ parser.add_argument("-n", "--no-color", action="store_true", help="Turn off colored output")
103
+ parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
104
+ return parser
@@ -0,0 +1,91 @@
1
+ import os
2
+ import logging
3
+
4
+ logger = logging.getLogger()
5
+
6
+ IGNORE_FILE = ".refcheckignore"
7
+
8
+ CHECK_IGNORE_DEFAULTS = [
9
+ ".git",
10
+ ".vscode",
11
+ ".idea",
12
+ "__pycache__",
13
+ "node_modules",
14
+ "venv",
15
+ ".venv",
16
+ ".pytest_cache",
17
+ ]
18
+
19
+
20
+ def load_exclusion_patterns() -> list:
21
+ """Read exclusions from the .refcheckignore file."""
22
+ if not os.path.isfile(IGNORE_FILE):
23
+ logger.warning(f"Could not find {IGNORE_FILE}. Using default exclusions.")
24
+ exclusions = CHECK_IGNORE_DEFAULTS
25
+ else:
26
+ logger.info(f"Reading exclusions from {IGNORE_FILE}...")
27
+ with open(IGNORE_FILE, "r", encoding="utf-8") as file:
28
+ exclusions = [line.strip() for line in file if line.strip()]
29
+
30
+ logger.info(f"Will skip these files and directories: {exclusions}")
31
+ return exclusions
32
+
33
+
34
+ def get_markdown_files_from_dir(root_dir: str, exclude: list[str] = []) -> list:
35
+ """Traverse the directory to get all markdown files."""
36
+ print(f"[+] Searching for markdown files in {os.path.abspath(root_dir)} ...")
37
+ exclude_set = set(os.path.normpath(path) for path in exclude)
38
+ markdown_files = []
39
+
40
+ # Walk through the directory to get all markdown files
41
+ for subdir, _, files in os.walk(root_dir):
42
+ subdir_norm = os.path.normpath(subdir)
43
+ if any(subdir_norm.startswith(exclude_item) for exclude_item in exclude_set):
44
+ continue # Skip excluded directories
45
+
46
+ for file in files:
47
+ file_path = os.path.join(subdir, file)
48
+ file_path_norm = os.path.normpath(file_path)
49
+ if file.endswith(".md") and file_path_norm not in exclude_set:
50
+ markdown_files.append(file_path_norm)
51
+
52
+ return markdown_files
53
+
54
+
55
+ def get_markdown_files_from_args(paths: list[str], exclude: list[str] = []) -> list:
56
+ """Retrieve all markdown files specified by the user."""
57
+ # Read additional exclusions from the ignore file
58
+ exclude += load_exclusion_patterns()
59
+
60
+ exclude_set = set(os.path.normpath(path) for path in exclude)
61
+ markdown_files = set()
62
+
63
+ for path in paths:
64
+ norm_path = os.path.normpath(path)
65
+ if norm_path in exclude_set:
66
+ continue
67
+ if os.path.isdir(norm_path):
68
+ markdown_files.update(get_markdown_files_from_dir(norm_path, exclude))
69
+ elif os.path.isfile(norm_path):
70
+ if norm_path.endswith(".md"):
71
+ markdown_files.add(norm_path)
72
+ else:
73
+ print(f"[!] Warning: {path} is not a valid file or directory.")
74
+
75
+ return list(markdown_files)
76
+
77
+
78
+ def print_green_background(text: str, no_color: bool = False) -> str:
79
+ return text if no_color else f"\033[42m{text}\033[0m"
80
+
81
+
82
+ def print_red_background(text: str, no_color: bool = False) -> str:
83
+ return text if no_color else f"\033[41m{text}\033[0m"
84
+
85
+
86
+ def print_red(text: str, no_color: bool = False) -> str:
87
+ return text if no_color else f"\033[31m{text}\033[0m"
88
+
89
+
90
+ def print_green(text: str, no_color: bool = False) -> str:
91
+ return text if no_color else f"\033[32m{text}\033[0m"
@@ -0,0 +1,89 @@
1
+ import os
2
+ import re
3
+ import logging
4
+ import requests
5
+
6
+ # Disable verify warnings for HTTPS requests
7
+ requests.packages.urllib3.disable_warnings() # type: ignore
8
+
9
+ logger = logging.getLogger()
10
+
11
+
12
+ def is_valid_remote_reference(url: str) -> bool:
13
+ """Check if online references are reachable."""
14
+ try:
15
+ response = requests.head(url, timeout=5, verify=False)
16
+ if response.status_code >= 400:
17
+ return False
18
+ except Exception:
19
+ logger.exception(f"Exception occurred while checking URL: {url}")
20
+ return False
21
+ else:
22
+ return True
23
+
24
+
25
+ def file_exists(file_path: str) -> bool:
26
+ """Check if local file exists."""
27
+ logger.info(f"Checking if file exists: {file_path}")
28
+ exists = os.path.exists(file_path)
29
+ if not exists:
30
+ logger.warning(f"File does not exist: {file_path}")
31
+ return exists
32
+
33
+
34
+ def header_exists(file_path: str, header: str) -> bool:
35
+ """Check if Markdown header exists in the given file."""
36
+ try:
37
+ with open(file_path, "r", encoding="utf-8") as file:
38
+ content = file.read()
39
+ normalized_header = normalize_header(header)
40
+ normalized_headers = [normalize_header(h) for h in re.findall(r"^#{1,6}\s+(.*)", content, re.MULTILINE)]
41
+ if normalized_header in normalized_headers:
42
+ return True
43
+ except FileNotFoundError:
44
+ logger.error(f"File not found: {file_path}")
45
+ return False
46
+
47
+
48
+ def normalize_header(header: str) -> str:
49
+ """Normalize header to match Markdown link format."""
50
+ return re.sub(r"[^a-zA-Z0-9 -]", "", header.strip().lower().replace(" ", "-"))
51
+
52
+
53
+ def is_valid_markdown_reference(ref: str, file_path: str) -> bool:
54
+ """Check if markdown references are reachable.
55
+
56
+ Args:
57
+ ref: The reference to check, e.g. `file.md#header`, `#header`, `file.md`.
58
+ file_path: The path of the file where the reference was made in.
59
+
60
+ Returns:
61
+ bool: True if the reference is valid and reachable, False otherwise.
62
+ """
63
+ base_path = os.path.dirname(file_path) # Directory of the file
64
+
65
+ if ref.startswith("#"):
66
+ logger.info("Reference is a header in the same Markdown file.")
67
+ referenced_header = ref[1:] # Remove leading `#`
68
+ target_path = file_path
69
+ elif "#" in ref:
70
+ logger.info("Reference is a header in another Markdown file.")
71
+ referenced_file, referenced_header = ref.split("#", 1)
72
+ target_path = os.path.join(base_path, referenced_file)
73
+ else:
74
+ logger.info("Reference is to another Markdown file.")
75
+ referenced_file = ref
76
+ referenced_header = None
77
+ target_path = os.path.join(base_path, referenced_file)
78
+
79
+ # Check if the referenced file exists
80
+ if not file_exists(target_path):
81
+ logger.error(f"Referenced file does not exist: {target_path}")
82
+ return False
83
+
84
+ # Check if the referenced header exists
85
+ if referenced_header and not header_exists(target_path, referenced_header):
86
+ logger.error(f"Referenced header does not exist in {target_path}: {referenced_header}")
87
+ return False
88
+
89
+ return True