markitecture 0.1.15__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. markitecture/__init__.py +41 -0
  2. markitecture/__main__.py +4 -0
  3. markitecture/cli/__init__.py +3 -0
  4. markitecture/cli/app.py +38 -0
  5. markitecture/cli/commands/__init__.py +21 -0
  6. markitecture/cli/commands/config.py +84 -0
  7. markitecture/cli/commands/links.py +146 -0
  8. markitecture/cli/commands/metrics.py +193 -0
  9. markitecture/cli/commands/mkdocs.py +39 -0
  10. markitecture/cli/commands/split.py +48 -0
  11. markitecture/errors.py +64 -0
  12. markitecture/generators/__init__.py +3 -0
  13. markitecture/generators/configs/__init__.py +0 -0
  14. markitecture/generators/configs/mintlify_json.py +0 -0
  15. markitecture/generators/configs/mkdocs_yaml.py +317 -0
  16. markitecture/metrics/__init__.py +9 -0
  17. markitecture/metrics/analyzer.py +109 -0
  18. markitecture/metrics/badges/__init__.py +28 -0
  19. markitecture/metrics/badges/base.py +7 -0
  20. markitecture/metrics/badges/compact.py +35 -0
  21. markitecture/metrics/badges/detailed.py +60 -0
  22. markitecture/metrics/badges/minimal.py +19 -0
  23. markitecture/metrics/badges/modern.py +45 -0
  24. markitecture/metrics/badges/retro.py +23 -0
  25. markitecture/metrics/badges/shields.py +124 -0
  26. markitecture/metrics/svg_generator.py +70 -0
  27. markitecture/processing/__init__.py +0 -0
  28. markitecture/processing/link_validator.py +133 -0
  29. markitecture/processing/reflink_converter.py +198 -0
  30. markitecture/processing/reflink_extractor.py +82 -0
  31. markitecture/processing/text_splitter.py +290 -0
  32. markitecture/settings/__init__.py +9 -0
  33. markitecture/settings/config.py +61 -0
  34. markitecture/settings/validators.py +26 -0
  35. markitecture/utils/__init__.py +5 -0
  36. markitecture/utils/file_handler.py +24 -0
  37. markitecture/utils/printer.py +195 -0
  38. markitecture/utils/sanitizer.py +78 -0
  39. markitecture-0.1.15.dist-info/METADATA +271 -0
  40. markitecture-0.1.15.dist-info/RECORD +43 -0
  41. markitecture-0.1.15.dist-info/WHEEL +4 -0
  42. markitecture-0.1.15.dist-info/entry_points.txt +2 -0
  43. markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,124 @@
1
+ from typing import Dict
2
+
3
+ from markitecture.metrics.analyzer import ReadabilityMetrics
4
+
5
+
6
+ class ShieldsBadgeGenerator:
7
+ def __init__(self):
8
+ self.HEIGHT = 20
9
+ self.FONT_SIZE = 11
10
+ self.TEXT_MARGIN = 6
11
+ self.COLORS = {
12
+ "low": "#7934C5", # Purple
13
+ "medium": "#00E5FF", # Cyan
14
+ "high": "#FFD700", # Gold
15
+ }
16
+ self.SHIELDS_BG = "#555555"
17
+
18
+ def _calculate_width(self, text: str) -> int:
19
+ return len(text) * 6 + self.TEXT_MARGIN * 2
20
+
21
+ def _get_status_color(self, score: float) -> str:
22
+ if score < 40:
23
+ return self.COLORS["low"]
24
+ elif score < 70:
25
+ return self.COLORS["medium"]
26
+ return self.COLORS["high"]
27
+
28
+ def generate_reading_time_badge(self, minutes: float) -> str:
29
+ label = "reading time"
30
+ status = f"{minutes} min"
31
+ label_width = self._calculate_width(label)
32
+ status_width = self._calculate_width(status)
33
+ total_width = label_width + status_width
34
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
35
+ <linearGradient id="smooth" x2="0" y2="100%">
36
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
37
+ <stop offset="1" stop-opacity=".1"/>
38
+ </linearGradient>
39
+ <clipPath id="round">
40
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
41
+ </clipPath>
42
+ <g clip-path="url(#round)">
43
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
44
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="#4c1"/>
45
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
46
+ </g>
47
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
48
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
49
+ <text x="{label_width / 2}" y="14">{label}</text>
50
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
51
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
52
+ </g>
53
+ </svg>'''
54
+
55
+ def generate_complexity_badge(self, score: float) -> str:
56
+ label = "complexity"
57
+ status = f"{score}%"
58
+ color = self._get_status_color(score)
59
+ label_width = self._calculate_width(label)
60
+ status_width = self._calculate_width(status)
61
+ total_width = label_width + status_width
62
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
63
+ <linearGradient id="smooth" x2="0" y2="100%">
64
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
65
+ <stop offset="1" stop-opacity=".1"/>
66
+ </linearGradient>
67
+ <clipPath id="round">
68
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
69
+ </clipPath>
70
+ <g clip-path="url(#round)">
71
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
72
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
73
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
74
+ </g>
75
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
76
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
77
+ <text x="{label_width / 2}" y="14">{label}</text>
78
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
79
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
80
+ </g>
81
+ </svg>'''
82
+
83
+ def generate_stats_badge(self, count: int, label: str, color: str) -> str:
84
+ status = str(count)
85
+ label_width = self._calculate_width(label)
86
+ status_width = self._calculate_width(status)
87
+ total_width = label_width + status_width
88
+ return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
89
+ <linearGradient id="smooth" x2="0" y2="100%">
90
+ <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
91
+ <stop offset="1" stop-opacity=".1"/>
92
+ </linearGradient>
93
+ <clipPath id="round">
94
+ <rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
95
+ </clipPath>
96
+ <g clip-path="url(#round)">
97
+ <rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
98
+ <rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
99
+ <rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
100
+ </g>
101
+ <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
102
+ <text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
103
+ <text x="{label_width / 2}" y="14">{label}</text>
104
+ <text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
105
+ <text x="{label_width + status_width / 2}" y="14">{status}</text>
106
+ </g>
107
+ </svg>'''
108
+
109
+ def generate_badges(self, metrics: ReadabilityMetrics) -> Dict[str, str]:
110
+ return {
111
+ "reading_time": self.generate_reading_time_badge(metrics.reading_time_mins),
112
+ "complexity": self.generate_complexity_badge(metrics.complexity_score),
113
+ "words": self.generate_stats_badge(metrics.word_count, "words", "#1E90FF"),
114
+ "headings": self.generate_stats_badge(
115
+ metrics.heading_count, "headings", "#9370DB"
116
+ ),
117
+ "code_blocks": self.generate_stats_badge(
118
+ metrics.code_block_count, "code blocks", "#FF6347"
119
+ ),
120
+ "links": self.generate_stats_badge(metrics.link_count, "links", "#20B2AA"),
121
+ "images": self.generate_stats_badge(
122
+ metrics.image_count, "images", "#DEB887"
123
+ ),
124
+ }
@@ -0,0 +1,70 @@
1
+ from typing import Callable, Dict, Tuple
2
+
3
+ from markitecture.metrics.analyzer import ReadabilityMetrics
4
+ from markitecture.metrics.badges import (
5
+ BadgeStyle,
6
+ CompactBadgeGenerator,
7
+ DetailedBadgeGenerator,
8
+ MinimalBadgeGenerator,
9
+ ModernBadgeGenerator,
10
+ RetroBadgeGenerator,
11
+ ShieldsBadgeGenerator,
12
+ )
13
+
14
+
15
+ class MetricsSvgGenerator:
16
+ def __init__(self):
17
+ self.dimensions: Dict[BadgeStyle, Tuple[int, int]] = {
18
+ BadgeStyle.MODERN: (560, 140),
19
+ BadgeStyle.COMPACT: (400, 40),
20
+ BadgeStyle.DETAILED: (600, 200),
21
+ BadgeStyle.MINIMAL: (300, 80),
22
+ BadgeStyle.RETRO: (480, 120),
23
+ }
24
+ self.generators: Dict[BadgeStyle, Callable[[ReadabilityMetrics], str]] = {
25
+ BadgeStyle.MODERN: ModernBadgeGenerator().generate,
26
+ BadgeStyle.COMPACT: CompactBadgeGenerator().generate,
27
+ BadgeStyle.DETAILED: DetailedBadgeGenerator().generate,
28
+ BadgeStyle.MINIMAL: MinimalBadgeGenerator().generate,
29
+ BadgeStyle.RETRO: RetroBadgeGenerator().generate,
30
+ BadgeStyle.SHIELDS: self._generate_shields_badge,
31
+ }
32
+
33
+ def _get_gradient_colors(self, score: float) -> Tuple[str, str]:
34
+ if score < 40:
35
+ return ("#7934C5", "#4158D0")
36
+ elif score < 70:
37
+ return ("#00E5FF", "#4158D0")
38
+ return ("#FFD700", "#FF00FF")
39
+
40
+ def _generate_shields_badge(
41
+ self, metrics: ReadabilityMetrics, color_start: str, color_end: str
42
+ ) -> str:
43
+ generator = ShieldsBadgeGenerator()
44
+ badges = generator.generate_badges(metrics)
45
+ width = max(self.dimensions.get(BadgeStyle.MODERN, (560,))[0], 560)
46
+ total_height = (len(badges) * 25) + 20
47
+ svg = f"""<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {total_height}">
48
+ <defs>
49
+ <style>
50
+ @font-face {{
51
+ font-family: "DejaVu Sans";
52
+ src: url("https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSans.ttf");
53
+ }}
54
+ </style>
55
+ </defs>"""
56
+ y_pos = 10
57
+ for badge_svg in badges.values():
58
+ content = badge_svg.split(">", 1)[1].rsplit("</svg>", 1)[0]
59
+ svg += f'\n <g transform="translate(10, {y_pos})">\n {content}\n </g>'
60
+ y_pos += 25
61
+ svg += "\n</svg>"
62
+ return svg
63
+
64
+ def generate_svg(self, metrics: ReadabilityMetrics, style: BadgeStyle) -> str:
65
+ if style not in self.generators:
66
+ raise ValueError(f"Style '{style}' not supported.")
67
+ if style == BadgeStyle.SHIELDS:
68
+ color_start, color_end = self._get_gradient_colors(metrics.complexity_score)
69
+ return self.generators[style](metrics, color_start, color_end)
70
+ return self.generators[style](metrics)
File without changes
@@ -0,0 +1,133 @@
1
+ """Scan documents for broken links in markdown files."""
2
+
3
+ import os
4
+ import re
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+ from urllib.parse import urlparse
9
+
10
+ import requests
11
+
12
+
13
+ class LinkValidator:
14
+ """ "
15
+ Check links in markdown files for accessibility.
16
+ """
17
+
18
+ def __init__(self, timeout: int = 10, max_workers: int = 5):
19
+ """
20
+ Initialize the link checker with configurable timeout and concurrency.
21
+
22
+ Args:
23
+ timeout (int): Seconds to wait for each HTTP request
24
+ max_workers (int): Maximum number of concurrent requests
25
+ """
26
+ self.timeout = timeout
27
+ self.max_workers = max_workers
28
+ self.inline_link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
29
+ self.ref_link_pattern = re.compile(r"\[([^\]]+)\]:\s*(\S+)")
30
+
31
+ def extract_links(self, content: str) -> List[Tuple[str, str, int]]:
32
+ """
33
+ Extract inline and reference links from markdown content.
34
+
35
+ Args:
36
+ content (str): Markdown content
37
+
38
+ Returns:
39
+ List[Tuple[str, str, int]]: List of (text, url, line_number)
40
+ """
41
+ links = []
42
+
43
+ # Extract inline links
44
+ for line_num, line in enumerate(content.splitlines(), 1):
45
+ links.extend(
46
+ (match.group(1), match.group(2).strip(), line_num)
47
+ for match in self.inline_link_pattern.finditer(line)
48
+ )
49
+
50
+ # Extract reference links
51
+ links.extend(
52
+ (match.group(1), match.group(2).strip(), line_num)
53
+ for line_num, line in enumerate(content.splitlines(), 1)
54
+ for match in self.ref_link_pattern.finditer(line)
55
+ )
56
+
57
+ return links
58
+
59
+ def check_link(self, url: str) -> Dict:
60
+ """
61
+ Check if a link is accessible.
62
+
63
+ Args:
64
+ url (str): URL to check
65
+
66
+ Returns:
67
+ Dict: Dictionary with status and error information
68
+ """
69
+ result = {"url": url, "status": "unknown", "error": None}
70
+
71
+ if url.startswith("#"): # Skip internal links
72
+ result["status"] = "internal"
73
+ return result
74
+
75
+ if not urlparse(url).scheme: # Handle local file paths
76
+ if os.path.exists(url):
77
+ result["status"] = "ok"
78
+ else:
79
+ result["status"] = "error"
80
+ result["error"] = "File not found"
81
+ return result
82
+
83
+ try:
84
+ response = requests.head(url, timeout=self.timeout, allow_redirects=True)
85
+ if response.status_code == 405:
86
+ response = requests.get(url, timeout=self.timeout)
87
+
88
+ if response.status_code == 200:
89
+ result["status"] = "ok"
90
+ else:
91
+ result["status"] = "error"
92
+ result["error"] = f"HTTP {response.status_code}"
93
+ except requests.RequestException as e:
94
+ result["status"] = "error"
95
+ result["error"] = str(e)
96
+
97
+ return result
98
+
99
+ def check_markdown_file(self, filepath: str) -> List[Dict[str, str]]:
100
+ """
101
+ Check all links in a markdown file.
102
+
103
+ Args:
104
+ filepath (str): Path to the markdown file
105
+
106
+ Returns:
107
+ List[Dict]: List of results for each link check
108
+ """
109
+ try:
110
+ content = Path(filepath).read_text(encoding="utf-8")
111
+ except OSError as e:
112
+ return [{"error": f"Failed to read file: {e}"}]
113
+
114
+ links = self.extract_links(content)
115
+ results = []
116
+
117
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
118
+ futures = {
119
+ executor.submit(self.check_link, url): (text, url, line)
120
+ for text, url, line in links
121
+ }
122
+ for future in futures:
123
+ text, url, line = futures[future]
124
+ result = future.result()
125
+ results.append({
126
+ "text": text,
127
+ "url": url,
128
+ "line": line,
129
+ "status": result["status"],
130
+ "error": result.get("error"),
131
+ })
132
+
133
+ return results
@@ -0,0 +1,198 @@
1
+ """Reference link handling with sectional placement support."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from enum import StrEnum, auto
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional
8
+
9
+
10
+ class ReferencePlacement(StrEnum):
11
+ """Controls where reference links are placed in the document."""
12
+
13
+ END = auto()
14
+ SECTION = auto()
15
+
16
+
17
+ @dataclass
18
+ class Section:
19
+ """Represents a markdown section with its references."""
20
+
21
+ content: str
22
+ level: int
23
+ references: Dict[str, str]
24
+ start: int
25
+ end: int
26
+
27
+
28
+ class ReferenceLinkConverter:
29
+ """converter for managing reference-style links with section support."""
30
+
31
+ def __init__(self) -> None:
32
+ """Initialize patterns for finding links and headers."""
33
+ self.link_pattern = r"\[([^\]]+)\]\(([^\)]+)\)"
34
+ self.header_pattern = r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$"
35
+
36
+ def _extract_sections(self, content: str) -> List[Section]:
37
+ """Extract document sections based on headers."""
38
+ sections: List[Section] = []
39
+ lines = content.splitlines()
40
+ current_section: Optional[Section] = None
41
+
42
+ for i, line in enumerate(lines):
43
+ header_match = re.match(self.header_pattern, line)
44
+
45
+ if header_match:
46
+ # If we have a previous section, finalize it
47
+ if current_section:
48
+ current_section.end = i
49
+ sections.append(current_section)
50
+
51
+ # Start new section
52
+ level = len(header_match.group(1))
53
+ current_section = Section(
54
+ content="", level=level, references={}, start=i, end=-1
55
+ )
56
+
57
+ # Handle the last section
58
+ if current_section:
59
+ current_section.end = len(lines)
60
+ sections.append(current_section)
61
+
62
+ # If no sections found, treat entire document as one section
63
+ if not sections:
64
+ sections = [
65
+ Section(
66
+ content=content, level=0, references={}, start=0, end=len(lines)
67
+ )
68
+ ]
69
+
70
+ return sections
71
+
72
+ def _process_section_content(
73
+ self, content: str, section: Section, used_refs: Dict[str, str]
74
+ ) -> str:
75
+ """Process content for a single section, adding references if needed."""
76
+ lines = content.splitlines()
77
+ section_lines = lines[section.start : section.end]
78
+
79
+ # Find all link matches in this section
80
+ matches = list(re.finditer(self.link_pattern, "\n".join(section_lines)))
81
+ if not matches:
82
+ return content
83
+
84
+ # Convert links and track references for this section
85
+ modified_lines = section_lines.copy()
86
+ references = {}
87
+
88
+ for match in matches:
89
+ original = match.group(0)
90
+ text = match.group(1)
91
+ url = match.group(2)
92
+
93
+ # Generate reference ID
94
+ ref_id = self._generate_reference_id(text, used_refs)
95
+ used_refs[ref_id] = text
96
+ references[ref_id] = url
97
+
98
+ # Create reference style link
99
+ is_image = text.startswith("!")
100
+ ref_link = f"![{text[1:]}][{ref_id}]" if is_image else f"[{text}][{ref_id}]"
101
+
102
+ # Replace in content
103
+ for i, line in enumerate(modified_lines):
104
+ if original in line:
105
+ modified_lines[i] = line.replace(original, ref_link)
106
+
107
+ # Store references for this section
108
+ section.references = references
109
+
110
+ # Update content with modified lines
111
+ lines[section.start : section.end] = modified_lines
112
+ return "\n".join(lines)
113
+
114
+ def _generate_reference_id(self, text: str, used_refs: Dict[str, str]) -> str:
115
+ """Generate a unique reference ID based on the link text."""
116
+ text = text.lstrip("!")
117
+ ref = re.sub(r"[^\w\s-]", "", text.lower())
118
+ ref = re.sub(r"[-\s]+", "-", ref).strip("-")
119
+
120
+ if not ref:
121
+ ref = "link"
122
+
123
+ base_ref = ref
124
+ counter = 1
125
+ while ref in used_refs and used_refs[ref] != text:
126
+ ref = f"{base_ref}-{counter}"
127
+ counter += 1
128
+
129
+ return ref
130
+
131
+ def convert_to_reflinks(
132
+ self, content: str, placement: ReferencePlacement = ReferencePlacement.END
133
+ ) -> str:
134
+ """Convert inline links to reference style with configurable placement."""
135
+ sections = self._extract_sections(content)
136
+ used_refs: Dict[str, str] = {}
137
+ processed_content = content
138
+
139
+ # Process each section
140
+ for section in sections:
141
+ processed_content = self._process_section_content(
142
+ processed_content, section, used_refs
143
+ )
144
+
145
+ # Add references based on placement preference
146
+ if placement == ReferencePlacement.END:
147
+ # Add all references at end of document
148
+ all_refs = {}
149
+ for section in sections:
150
+ all_refs.update(section.references)
151
+
152
+ if all_refs:
153
+ ref_section = "\n\n---\n\n<!-- REFERENCE LINKS -->\n"
154
+ for ref_id, url in sorted(all_refs.items()):
155
+ ref_section += f"[{ref_id}]: {url}\n"
156
+ processed_content = processed_content.rstrip() + ref_section + "\n"
157
+
158
+ else: # ReferencePlacement.SECTION
159
+ # Add references at the end of each section
160
+ lines = processed_content.splitlines()
161
+
162
+ for section in reversed(
163
+ sections
164
+ ): # Process in reverse to maintain positions
165
+ reflink_comment = "REFERENCE LINKS"
166
+ header_match = re.match(self.header_pattern, lines[section.start])
167
+ if header_match:
168
+ reflink_comment = (
169
+ f"{header_match.group(2).upper()} {reflink_comment}"
170
+ )
171
+ if section.references:
172
+ ref_text = f"<!-- {reflink_comment} -->\n"
173
+ for ref_id, url in sorted(section.references.items()):
174
+ ref_text += f"[{ref_id}]: {url}\n"
175
+
176
+ # Insert references at section end
177
+ lines.insert(section.end, f"{ref_text}\n---\n")
178
+
179
+ processed_content = "\n".join(lines)
180
+
181
+ return processed_content
182
+
183
+ def process_file(
184
+ self,
185
+ input_path: str | Path,
186
+ output_path: str | Path | None = None,
187
+ placement: ReferencePlacement = ReferencePlacement.END,
188
+ ) -> None:
189
+ """Process a markdown file and save to a new file."""
190
+ input_path = Path(input_path)
191
+ if not input_path.exists():
192
+ raise FileNotFoundError(f"Input file not found: {input_path}")
193
+
194
+ content = input_path.read_text(encoding="utf-8")
195
+ modified_content = self.convert_to_reflinks(content, placement)
196
+
197
+ output_path = Path(output_path) if output_path else input_path
198
+ output_path.write_text(modified_content, encoding="utf-8")
@@ -0,0 +1,82 @@
1
+ """Extract and manage reference-style links in Markdown content."""
2
+
3
+ import re
4
+ from typing import Dict
5
+
6
+
7
+ class ReferenceLinkExtractor:
8
+ """
9
+ Handles extraction and management of reference-style links in Markdown.
10
+
11
+ This class provides functionality to extract reference-style links from markdown
12
+ content and track which references are actually used within specific sections.
13
+ """
14
+
15
+ def __init__(self, markdown_text: str) -> None:
16
+ """
17
+ Initialize the ReferenceLinkExtractor with the entire markdown content.
18
+
19
+ Args:
20
+ markdown_text: The full markdown content as a string.
21
+ """
22
+ self.markdown_text = markdown_text
23
+ self.references = self._extract_references()
24
+
25
+ def _extract_references(self) -> dict[str, str]:
26
+ """
27
+ Extract reference-style links from the markdown text.
28
+
29
+ A reference link follows the pattern:
30
+ [refname]: http://example.com
31
+
32
+ Returns:
33
+ Dictionary mapping reference names to their URLs.
34
+ """
35
+ # Extract references that appear after reference marker comments
36
+ ref_sections = re.split(r"<!--\s*REFERENCE\s+LINKS\s*-->", self.markdown_text)
37
+
38
+ references: dict[str, str] = {}
39
+ ref_pattern = re.compile(r"^\[([^\]]+)\]:\s*(.+?)\s*$", re.MULTILINE)
40
+
41
+ for section in ref_sections:
42
+ for match in ref_pattern.finditer(section):
43
+ ref_name = match.group(1).strip()
44
+ ref_link = match.group(2).strip()
45
+ references[ref_name] = ref_link
46
+
47
+ return references
48
+
49
+ def find_used_references(self, section_content: str) -> dict[str, str]:
50
+ """
51
+ Find which references are actually used within a given section.
52
+
53
+ A reference is considered used if it appears in the form [refname]
54
+ within the section content, excluding the reference definitions themselves.
55
+
56
+ Args:
57
+ section_content: The markdown content of a section to analyze.
58
+
59
+ Returns:
60
+ Dictionary of references that are actually used in the section,
61
+ mapping reference names to their URLs.
62
+ """
63
+ used_refs: Dict[str, str] = {}
64
+
65
+ # Remove any existing reference definitions from the content
66
+ content_without_refs = re.sub(
67
+ r"\n*<!--\s*REFERENCE\s+LINKS\s*-->\n*.*$",
68
+ "",
69
+ section_content,
70
+ flags=re.DOTALL,
71
+ )
72
+
73
+ # Find all reference usages, excluding image or link definitions
74
+ ref_usage_pattern = re.compile(r"\[([^\]]+)\](?!\(|\:)")
75
+ found = ref_usage_pattern.findall(content_without_refs)
76
+
77
+ # Only include references that exist and are actually used
78
+ for ref in found:
79
+ if ref in self.references:
80
+ used_refs[ref] = self.references[ref]
81
+
82
+ return used_refs