markitecture 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. markitecture/__init__.py +41 -0
  2. markitecture/__main__.py +4 -0
  3. markitecture/cli/__init__.py +3 -0
  4. markitecture/cli/app.py +38 -0
  5. markitecture/cli/commands/__init__.py +21 -0
  6. markitecture/cli/commands/config.py +84 -0
  7. markitecture/cli/commands/links.py +146 -0
  8. markitecture/cli/commands/metrics.py +193 -0
  9. markitecture/cli/commands/mkdocs.py +39 -0
  10. markitecture/cli/commands/split.py +48 -0
  11. markitecture/errors.py +64 -0
  12. markitecture/generators/__init__.py +3 -0
  13. markitecture/generators/configs/__init__.py +0 -0
  14. markitecture/generators/configs/mintlify_json.py +0 -0
  15. markitecture/generators/configs/mkdocs_yaml.py +317 -0
  16. markitecture/metrics/__init__.py +9 -0
  17. markitecture/metrics/analyzer.py +109 -0
  18. markitecture/metrics/badges/__init__.py +28 -0
  19. markitecture/metrics/badges/base.py +7 -0
  20. markitecture/metrics/badges/compact.py +35 -0
  21. markitecture/metrics/badges/detailed.py +60 -0
  22. markitecture/metrics/badges/minimal.py +19 -0
  23. markitecture/metrics/badges/modern.py +45 -0
  24. markitecture/metrics/badges/retro.py +23 -0
  25. markitecture/metrics/badges/shields.py +124 -0
  26. markitecture/metrics/svg_generator.py +70 -0
  27. markitecture/processing/__init__.py +0 -0
  28. markitecture/processing/link_validator.py +133 -0
  29. markitecture/processing/reflink_converter.py +198 -0
  30. markitecture/processing/reflink_extractor.py +82 -0
  31. markitecture/processing/text_splitter.py +290 -0
  32. markitecture/settings/__init__.py +9 -0
  33. markitecture/settings/config.py +61 -0
  34. markitecture/settings/validators.py +26 -0
  35. markitecture/utils/__init__.py +5 -0
  36. markitecture/utils/file_handler.py +24 -0
  37. markitecture/utils/printer.py +195 -0
  38. markitecture/utils/sanitizer.py +78 -0
  39. markitecture-0.1.15.dist-info/METADATA +271 -0
  40. markitecture-0.1.15.dist-info/RECORD +43 -0
  41. markitecture-0.1.15.dist-info/WHEEL +4 -0
  42. markitecture-0.1.15.dist-info/entry_points.txt +2 -0
  43. markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,124 @@
1
+ from typing import Dict
2
+
3
+ from markitecture.metrics.analyzer import ReadabilityMetrics
4
+
5
+
6
+ class ShieldsBadgeGenerator:
7
+ def __init__(self):
8
+ self.HEIGHT = 20
9
+ self.FONT_SIZE = 11
10
+ self.TEXT_MARGIN = 6
11
+ self.COLORS = {
12
+ "low": "#7934C5", # Purple
13
+ "medium": "#00E5FF", # Cyan
14
+ "high": "#FFD700", # Gold
15
+ }
16
+ self.SHIELDS_BG = "#555555"
17
+
18
+ def _calculate_width(self, text: str) -> int:
19
+ return len(text) * 6 + self.TEXT_MARGIN * 2
20
+
21
+ def _get_status_color(self, score: float) -> str:
22
+ if score < 40:
23
+ return self.COLORS["low"]
24
+ elif score < 70:
25
+ return self.COLORS["medium"]
26
+ return self.COLORS["high"]
27
+
28
+ def generate_reading_time_badge(self, minutes: float) -> str:
29
+ label = "reading time"
30
+ status = f"{minutes} min"
31
+ label_width = self._calculate_width(label)
32
+ status_width = self._calculate_width(status)
33
+ total_width = label_width + status_width
34
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
35
+ <linearGradient id="smooth" x2="0" y2="100%">
36
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
37
+ <stop offset="1" stop-opacity=".1"/>
38
+ </linearGradient>
39
+ <clipPath id="round">
40
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
41
+ </clipPath>
42
+ <g clip-path="url(#round)">
43
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
44
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="#4c1"/>
45
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
46
+ </g>
47
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
48
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
49
+ <text x="{label_width / 2}" y="14">{label}</text>
50
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
51
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
52
+ </g>
53
+ </svg>'''
54
+
55
+ def generate_complexity_badge(self, score: float) -> str:
56
+ label = "complexity"
57
+ status = f"{score}%"
58
+ color = self._get_status_color(score)
59
+ label_width = self._calculate_width(label)
60
+ status_width = self._calculate_width(status)
61
+ total_width = label_width + status_width
62
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
63
+ <linearGradient id="smooth" x2="0" y2="100%">
64
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
65
+ <stop offset="1" stop-opacity=".1"/>
66
+ </linearGradient>
67
+ <clipPath id="round">
68
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
69
+ </clipPath>
70
+ <g clip-path="url(#round)">
71
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
72
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
73
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
74
+ </g>
75
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
76
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
77
+ <text x="{label_width / 2}" y="14">{label}</text>
78
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
79
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
80
+ </g>
81
+ </svg>'''
82
+
83
+ def generate_stats_badge(self, count: int, label: str, color: str) -> str:
84
+ status = str(count)
85
+ label_width = self._calculate_width(label)
86
+ status_width = self._calculate_width(status)
87
+ total_width = label_width + status_width
88
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
89
+ <linearGradient id="smooth" x2="0" y2="100%">
90
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
91
+ <stop offset="1" stop-opacity=".1"/>
92
+ </linearGradient>
93
+ <clipPath id="round">
94
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
95
+ </clipPath>
96
+ <g clip-path="url(#round)">
97
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
98
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
99
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
100
+ </g>
101
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
102
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
103
+ <text x="{label_width / 2}" y="14">{label}</text>
104
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
105
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
106
+ </g>
107
+ </svg>'''
108
+
109
+ def generate_badges(self, metrics: ReadabilityMetrics) -> Dict[str, str]:
110
+ return {
111
+ "reading_time": self.generate_reading_time_badge(metrics.reading_time_mins),
112
+ "complexity": self.generate_complexity_badge(metrics.complexity_score),
113
+ "words": self.generate_stats_badge(metrics.word_count, "words", "#1E90FF"),
114
+ "headings": self.generate_stats_badge(
115
+ metrics.heading_count, "headings", "#9370DB"
116
+ ),
117
+ "code_blocks": self.generate_stats_badge(
118
+ metrics.code_block_count, "code blocks", "#FF6347"
119
+ ),
120
+ "links": self.generate_stats_badge(metrics.link_count, "links", "#20B2AA"),
121
+ "images": self.generate_stats_badge(
122
+ metrics.image_count, "images", "#DEB887"
123
+ ),
124
+ }
@@ -0,0 +1,70 @@
1
+ from typing import Callable, Dict, Tuple
2
+
3
+ from markitecture.metrics.analyzer import ReadabilityMetrics
4
+ from markitecture.metrics.badges import (
5
+ BadgeStyle,
6
+ CompactBadgeGenerator,
7
+ DetailedBadgeGenerator,
8
+ MinimalBadgeGenerator,
9
+ ModernBadgeGenerator,
10
+ RetroBadgeGenerator,
11
+ ShieldsBadgeGenerator,
12
+ )
13
+
14
+
15
+ class MetricsSvgGenerator:
16
+ def __init__(self):
17
+ self.dimensions: Dict[BadgeStyle, Tuple[int, int]] = {
18
+ BadgeStyle.MODERN: (560, 140),
19
+ BadgeStyle.COMPACT: (400, 40),
20
+ BadgeStyle.DETAILED: (600, 200),
21
+ BadgeStyle.MINIMAL: (300, 80),
22
+ BadgeStyle.RETRO: (480, 120),
23
+ }
24
+ self.generators: Dict[BadgeStyle, Callable[[ReadabilityMetrics], str]] = {
25
+ BadgeStyle.MODERN: ModernBadgeGenerator().generate,
26
+ BadgeStyle.COMPACT: CompactBadgeGenerator().generate,
27
+ BadgeStyle.DETAILED: DetailedBadgeGenerator().generate,
28
+ BadgeStyle.MINIMAL: MinimalBadgeGenerator().generate,
29
+ BadgeStyle.RETRO: RetroBadgeGenerator().generate,
30
+ BadgeStyle.SHIELDS: self._generate_shields_badge,
31
+ }
32
+
33
+ def _get_gradient_colors(self, score: float) -> Tuple[str, str]:
34
+ if score < 40:
35
+ return ("#7934C5", "#4158D0")
36
+ elif score < 70:
37
+ return ("#00E5FF", "#4158D0")
38
+ return ("#FFD700", "#FF00FF")
39
+
40
+ def _generate_shields_badge(
41
+ self, metrics: ReadabilityMetrics, color_start: str, color_end: str
42
+ ) -> str:
43
+ generator = ShieldsBadgeGenerator()
44
+ badges = generator.generate_badges(metrics)
45
+ width = max(self.dimensions.get(BadgeStyle.MODERN, (560,))[0], 560)
46
+ total_height = (len(badges) * 25) + 20
47
+ svg = f"""<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {total_height}">
48
+ <defs>
49
+ <style>
50
+ @font-face {{
51
+ font-family: "DejaVu Sans";
52
+ src: url("https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSans.ttf");
53
+ }}
54
+ </style>
55
+ </defs>"""
56
+ y_pos = 10
57
+ for badge_svg in badges.values():
58
+ content = badge_svg.split(">", 1)[1].rsplit("</svg>", 1)[0]
59
+ svg += f'\n <g transform="translate(10, {y_pos})">\n {content}\n </g>'
60
+ y_pos += 25
61
+ svg += "\n</svg>"
62
+ return svg
63
+
64
+ def generate_svg(self, metrics: ReadabilityMetrics, style: BadgeStyle) -> str:
65
+ if style not in self.generators:
66
+ raise ValueError(f"Style '{style}' not supported.")
67
+ if style == BadgeStyle.SHIELDS:
68
+ color_start, color_end = self._get_gradient_colors(metrics.complexity_score)
69
+ return self.generators[style](metrics, color_start, color_end)
70
+ return self.generators[style](metrics)
File without changes
@@ -0,0 +1,133 @@
1
+ """Scan documents for broken links in markdown files."""
2
+
3
+ import os
4
+ import re
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+ from urllib.parse import urlparse
9
+
10
+ import requests
11
+
12
+
13
+ class LinkValidator:
14
+ """ "
15
+ Check links in markdown files for accessibility.
16
+ """
17
+
18
+ def __init__(self, timeout: int = 10, max_workers: int = 5):
19
+ """
20
+ Initialize the link checker with configurable timeout and concurrency.
21
+
22
+ Args:
23
+ timeout (int): Seconds to wait for each HTTP request
24
+ max_workers (int): Maximum number of concurrent requests
25
+ """
26
+ self.timeout = timeout
27
+ self.max_workers = max_workers
28
+ self.inline_link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
29
+ self.ref_link_pattern = re.compile(r"\[([^\]]+)\]:\s*(\S+)")
30
+
31
+ def extract_links(self, content: str) -> List[Tuple[str, str, int]]:
32
+ """
33
+ Extract inline and reference links from markdown content.
34
+
35
+ Args:
36
+ content (str): Markdown content
37
+
38
+ Returns:
39
+ List[Tuple[str, str, int]]: List of (text, url, line_number)
40
+ """
41
+ links = []
42
+
43
+ # Extract inline links
44
+ for line_num, line in enumerate(content.splitlines(), 1):
45
+ links.extend(
46
+ (match.group(1), match.group(2).strip(), line_num)
47
+ for match in self.inline_link_pattern.finditer(line)
48
+ )
49
+
50
+ # Extract reference links
51
+ links.extend(
52
+ (match.group(1), match.group(2).strip(), line_num)
53
+ for line_num, line in enumerate(content.splitlines(), 1)
54
+ for match in self.ref_link_pattern.finditer(line)
55
+ )
56
+
57
+ return links
58
+
59
+ def check_link(self, url: str) -> Dict:
60
+ """
61
+ Check if a link is accessible.
62
+
63
+ Args:
64
+ url (str): URL to check
65
+
66
+ Returns:
67
+ Dict: Dictionary with status and error information
68
+ """
69
+ result = {"url": url, "status": "unknown", "error": None}
70
+
71
+ if url.startswith("#"): # Skip internal links
72
+ result["status"] = "internal"
73
+ return result
74
+
75
+ if not urlparse(url).scheme: # Handle local file paths
76
+ if os.path.exists(url):
77
+ result["status"] = "ok"
78
+ else:
79
+ result["status"] = "error"
80
+ result["error"] = "File not found"
81
+ return result
82
+
83
+ try:
84
+ response = requests.head(url, timeout=self.timeout, allow_redirects=True)
85
+ if response.status_code == 405:
86
+ response = requests.get(url, timeout=self.timeout)
87
+
88
+ if response.status_code == 200:
89
+ result["status"] = "ok"
90
+ else:
91
+ result["status"] = "error"
92
+ result["error"] = f"HTTP {response.status_code}"
93
+ except requests.RequestException as e:
94
+ result["status"] = "error"
95
+ result["error"] = str(e)
96
+
97
+ return result
98
+
99
+ def check_markdown_file(self, filepath: str) -> List[Dict[str, str]]:
100
+ """
101
+ Check all links in a markdown file.
102
+
103
+ Args:
104
+ filepath (str): Path to the markdown file
105
+
106
+ Returns:
107
+ List[Dict]: List of results for each link check
108
+ """
109
+ try:
110
+ content = Path(filepath).read_text(encoding="utf-8")
111
+ except OSError as e:
112
+ return [{"error": f"Failed to read file: {e}"}]
113
+
114
+ links = self.extract_links(content)
115
+ results = []
116
+
117
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
118
+ futures = {
119
+ executor.submit(self.check_link, url): (text, url, line)
120
+ for text, url, line in links
121
+ }
122
+ for future in futures:
123
+ text, url, line = futures[future]
124
+ result = future.result()
125
+ results.append({
126
+ "text": text,
127
+ "url": url,
128
+ "line": line,
129
+ "status": result["status"],
130
+ "error": result.get("error"),
131
+ })
132
+
133
+ return results
@@ -0,0 +1,198 @@
1
+ """Reference link handling with sectional placement support."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from enum import StrEnum, auto
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional
8
+
9
+
10
+ class ReferencePlacement(StrEnum):
11
+ """Controls where reference links are placed in the document."""
12
+
13
+ END = auto()
14
+ SECTION = auto()
15
+
16
+
17
+ @dataclass
18
+ class Section:
19
+ """Represents a markdown section with its references."""
20
+
21
+ content: str
22
+ level: int
23
+ references: Dict[str, str]
24
+ start: int
25
+ end: int
26
+
27
+
28
+ class ReferenceLinkConverter:
29
+ """converter for managing reference-style links with section support."""
30
+
31
+ def __init__(self) -> None:
32
+ """Initialize patterns for finding links and headers."""
33
+ self.link_pattern = r"\[([^\]]+)\]\(([^\)]+)\)"
34
+ self.header_pattern = r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$"
35
+
36
+ def _extract_sections(self, content: str) -> List[Section]:
37
+ """Extract document sections based on headers."""
38
+ sections: List[Section] = []
39
+ lines = content.splitlines()
40
+ current_section: Optional[Section] = None
41
+
42
+ for i, line in enumerate(lines):
43
+ header_match = re.match(self.header_pattern, line)
44
+
45
+ if header_match:
46
+ # If we have a previous section, finalize it
47
+ if current_section:
48
+ current_section.end = i
49
+ sections.append(current_section)
50
+
51
+ # Start new section
52
+ level = len(header_match.group(1))
53
+ current_section = Section(
54
+ content="", level=level, references={}, start=i, end=-1
55
+ )
56
+
57
+ # Handle the last section
58
+ if current_section:
59
+ current_section.end = len(lines)
60
+ sections.append(current_section)
61
+
62
+ # If no sections found, treat entire document as one section
63
+ if not sections:
64
+ sections = [
65
+ Section(
66
+ content=content, level=0, references={}, start=0, end=len(lines)
67
+ )
68
+ ]
69
+
70
+ return sections
71
+
72
+ def _process_section_content(
73
+ self, content: str, section: Section, used_refs: Dict[str, str]
74
+ ) -> str:
75
+ """Process content for a single section, adding references if needed."""
76
+ lines = content.splitlines()
77
+ section_lines = lines[section.start : section.end]
78
+
79
+ # Find all link matches in this section
80
+ matches = list(re.finditer(self.link_pattern, "\n".join(section_lines)))
81
+ if not matches:
82
+ return content
83
+
84
+ # Convert links and track references for this section
85
+ modified_lines = section_lines.copy()
86
+ references = {}
87
+
88
+ for match in matches:
89
+ original = match.group(0)
90
+ text = match.group(1)
91
+ url = match.group(2)
92
+
93
+ # Generate reference ID
94
+ ref_id = self._generate_reference_id(text, used_refs)
95
+ used_refs[ref_id] = text
96
+ references[ref_id] = url
97
+
98
+ # Create reference style link
99
+ is_image = text.startswith("!")
100
+ ref_link = f"![{text[1:]}][{ref_id}]" if is_image else f"[{text}][{ref_id}]"
101
+
102
+ # Replace in content
103
+ for i, line in enumerate(modified_lines):
104
+ if original in line:
105
+ modified_lines[i] = line.replace(original, ref_link)
106
+
107
+ # Store references for this section
108
+ section.references = references
109
+
110
+ # Update content with modified lines
111
+ lines[section.start : section.end] = modified_lines
112
+ return "\n".join(lines)
113
+
114
+ def _generate_reference_id(self, text: str, used_refs: Dict[str, str]) -> str:
115
+ """Generate a unique reference ID based on the link text."""
116
+ text = text.lstrip("!")
117
+ ref = re.sub(r"[^\w\s-]", "", text.lower())
118
+ ref = re.sub(r"[-\s]+", "-", ref).strip("-")
119
+
120
+ if not ref:
121
+ ref = "link"
122
+
123
+ base_ref = ref
124
+ counter = 1
125
+ while ref in used_refs and used_refs[ref] != text:
126
+ ref = f"{base_ref}-{counter}"
127
+ counter += 1
128
+
129
+ return ref
130
+
131
+ def convert_to_reflinks(
132
+ self, content: str, placement: ReferencePlacement = ReferencePlacement.END
133
+ ) -> str:
134
+ """Convert inline links to reference style with configurable placement."""
135
+ sections = self._extract_sections(content)
136
+ used_refs: Dict[str, str] = {}
137
+ processed_content = content
138
+
139
+ # Process each section
140
+ for section in sections:
141
+ processed_content = self._process_section_content(
142
+ processed_content, section, used_refs
143
+ )
144
+
145
+ # Add references based on placement preference
146
+ if placement == ReferencePlacement.END:
147
+ # Add all references at end of document
148
+ all_refs = {}
149
+ for section in sections:
150
+ all_refs.update(section.references)
151
+
152
+ if all_refs:
153
+ ref_section = "\n\n---\n\n<!-- REFERENCE LINKS -->\n"
154
+ for ref_id, url in sorted(all_refs.items()):
155
+ ref_section += f"[{ref_id}]: {url}\n"
156
+ processed_content = processed_content.rstrip() + ref_section + "\n"
157
+
158
+ else: # ReferencePlacement.SECTION
159
+ # Add references at the end of each section
160
+ lines = processed_content.splitlines()
161
+
162
+ for section in reversed(
163
+ sections
164
+ ): # Process in reverse to maintain positions
165
+ reflink_comment = "REFERENCE LINKS"
166
+ header_match = re.match(self.header_pattern, lines[section.start])
167
+ if header_match:
168
+ reflink_comment = (
169
+ f"{header_match.group(2).upper()} {reflink_comment}"
170
+ )
171
+ if section.references:
172
+ ref_text = f"<!-- {reflink_comment} -->\n"
173
+ for ref_id, url in sorted(section.references.items()):
174
+ ref_text += f"[{ref_id}]: {url}\n"
175
+
176
+ # Insert references at section end
177
+ lines.insert(section.end, f"{ref_text}\n---\n")
178
+
179
+ processed_content = "\n".join(lines)
180
+
181
+ return processed_content
182
+
183
+ def process_file(
184
+ self,
185
+ input_path: str | Path,
186
+ output_path: str | Path | None = None,
187
+ placement: ReferencePlacement = ReferencePlacement.END,
188
+ ) -> None:
189
+ """Process a markdown file and save to a new file."""
190
+ input_path = Path(input_path)
191
+ if not input_path.exists():
192
+ raise FileNotFoundError(f"Input file not found: {input_path}")
193
+
194
+ content = input_path.read_text(encoding="utf-8")
195
+ modified_content = self.convert_to_reflinks(content, placement)
196
+
197
+ output_path = Path(output_path) if output_path else input_path
198
+ output_path.write_text(modified_content, encoding="utf-8")
@@ -0,0 +1,82 @@
1
+ """Extract and manage reference-style links in Markdown content."""
2
+
3
+ import re
4
+ from typing import Dict
5
+
6
+
7
+ class ReferenceLinkExtractor:
8
+ """
9
+ Handles extraction and management of reference-style links in Markdown.
10
+
11
+ This class provides functionality to extract reference-style links from markdown
12
+ content and track which references are actually used within specific sections.
13
+ """
14
+
15
+ def __init__(self, markdown_text: str) -> None:
16
+ """
17
+ Initialize the ReferenceLinkExtractor with the entire markdown content.
18
+
19
+ Args:
20
+ markdown_text: The full markdown content as a string.
21
+ """
22
+ self.markdown_text = markdown_text
23
+ self.references = self._extract_references()
24
+
25
+ def _extract_references(self) -> dict[str, str]:
26
+ """
27
+ Extract reference-style links from the markdown text.
28
+
29
+ A reference link follows the pattern:
30
+ [refname]: http://example.com
31
+
32
+ Returns:
33
+ Dictionary mapping reference names to their URLs.
34
+ """
35
+ # Extract references that appear after reference marker comments
36
+ ref_sections = re.split(r"<!--\s*REFERENCE\s+LINKS\s*-->", self.markdown_text)
37
+
38
+ references: dict[str, str] = {}
39
+ ref_pattern = re.compile(r"^\[([^\]]+)\]:\s*(.+?)\s*$", re.MULTILINE)
40
+
41
+ for section in ref_sections:
42
+ for match in ref_pattern.finditer(section):
43
+ ref_name = match.group(1).strip()
44
+ ref_link = match.group(2).strip()
45
+ references[ref_name] = ref_link
46
+
47
+ return references
48
+
49
+ def find_used_references(self, section_content: str) -> dict[str, str]:
50
+ """
51
+ Find which references are actually used within a given section.
52
+
53
+ A reference is considered used if it appears in the form [refname]
54
+ within the section content, excluding the reference definitions themselves.
55
+
56
+ Args:
57
+ section_content: The markdown content of a section to analyze.
58
+
59
+ Returns:
60
+ Dictionary of references that are actually used in the section,
61
+ mapping reference names to their URLs.
62
+ """
63
+ used_refs: Dict[str, str] = {}
64
+
65
+ # Remove any existing reference definitions from the content
66
+ content_without_refs = re.sub(
67
+ r"\n*<!--\s*REFERENCE\s+LINKS\s*-->\n*.*$",
68
+ "",
69
+ section_content,
70
+ flags=re.DOTALL,
71
+ )
72
+
73
+ # Find all reference usages, excluding image or link definitions
74
+ ref_usage_pattern = re.compile(r"\[([^\]]+)\](?!\(|\:)")
75
+ found = ref_usage_pattern.findall(content_without_refs)
76
+
77
+ # Only include references that exist and are actually used
78
+ for ref in found:
79
+ if ref in self.references:
80
+ used_refs[ref] = self.references[ref]
81
+
82
+ return used_refs