markitecture 0.1.15__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- markitecture/__init__.py +41 -0
- markitecture/__main__.py +4 -0
- markitecture/cli/__init__.py +3 -0
- markitecture/cli/app.py +38 -0
- markitecture/cli/commands/__init__.py +21 -0
- markitecture/cli/commands/config.py +84 -0
- markitecture/cli/commands/links.py +146 -0
- markitecture/cli/commands/metrics.py +193 -0
- markitecture/cli/commands/mkdocs.py +39 -0
- markitecture/cli/commands/split.py +48 -0
- markitecture/errors.py +64 -0
- markitecture/generators/__init__.py +3 -0
- markitecture/generators/configs/__init__.py +0 -0
- markitecture/generators/configs/mintlify_json.py +0 -0
- markitecture/generators/configs/mkdocs_yaml.py +317 -0
- markitecture/metrics/__init__.py +9 -0
- markitecture/metrics/analyzer.py +109 -0
- markitecture/metrics/badges/__init__.py +28 -0
- markitecture/metrics/badges/base.py +7 -0
- markitecture/metrics/badges/compact.py +35 -0
- markitecture/metrics/badges/detailed.py +60 -0
- markitecture/metrics/badges/minimal.py +19 -0
- markitecture/metrics/badges/modern.py +45 -0
- markitecture/metrics/badges/retro.py +23 -0
- markitecture/metrics/badges/shields.py +124 -0
- markitecture/metrics/svg_generator.py +70 -0
- markitecture/processing/__init__.py +0 -0
- markitecture/processing/link_validator.py +133 -0
- markitecture/processing/reflink_converter.py +198 -0
- markitecture/processing/reflink_extractor.py +82 -0
- markitecture/processing/text_splitter.py +290 -0
- markitecture/settings/__init__.py +9 -0
- markitecture/settings/config.py +61 -0
- markitecture/settings/validators.py +26 -0
- markitecture/utils/__init__.py +5 -0
- markitecture/utils/file_handler.py +24 -0
- markitecture/utils/printer.py +195 -0
- markitecture/utils/sanitizer.py +78 -0
- markitecture-0.1.15.dist-info/METADATA +271 -0
- markitecture-0.1.15.dist-info/RECORD +43 -0
- markitecture-0.1.15.dist-info/WHEEL +4 -0
- markitecture-0.1.15.dist-info/entry_points.txt +2 -0
- markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
|
3
|
+
from markitecture.metrics.analyzer import ReadabilityMetrics
|
4
|
+
|
5
|
+
|
6
|
+
class ShieldsBadgeGenerator:
|
7
|
+
def __init__(self):
|
8
|
+
self.HEIGHT = 20
|
9
|
+
self.FONT_SIZE = 11
|
10
|
+
self.TEXT_MARGIN = 6
|
11
|
+
self.COLORS = {
|
12
|
+
"low": "#7934C5", # Purple
|
13
|
+
"medium": "#00E5FF", # Cyan
|
14
|
+
"high": "#FFD700", # Gold
|
15
|
+
}
|
16
|
+
self.SHIELDS_BG = "#555555"
|
17
|
+
|
18
|
+
def _calculate_width(self, text: str) -> int:
|
19
|
+
return len(text) * 6 + self.TEXT_MARGIN * 2
|
20
|
+
|
21
|
+
def _get_status_color(self, score: float) -> str:
|
22
|
+
if score < 40:
|
23
|
+
return self.COLORS["low"]
|
24
|
+
elif score < 70:
|
25
|
+
return self.COLORS["medium"]
|
26
|
+
return self.COLORS["high"]
|
27
|
+
|
28
|
+
def generate_reading_time_badge(self, minutes: float) -> str:
|
29
|
+
label = "reading time"
|
30
|
+
status = f"{minutes} min"
|
31
|
+
label_width = self._calculate_width(label)
|
32
|
+
status_width = self._calculate_width(status)
|
33
|
+
total_width = label_width + status_width
|
34
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
35
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
36
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
37
|
+
<stop offset="1" stop-opacity=".1"/>
|
38
|
+
</linearGradient>
|
39
|
+
<clipPath id="round">
|
40
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
41
|
+
</clipPath>
|
42
|
+
<g clip-path="url(#round)">
|
43
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
44
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="#4c1"/>
|
45
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
46
|
+
</g>
|
47
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
48
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
49
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
50
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
51
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
52
|
+
</g>
|
53
|
+
</svg>'''
|
54
|
+
|
55
|
+
def generate_complexity_badge(self, score: float) -> str:
|
56
|
+
label = "complexity"
|
57
|
+
status = f"{score}%"
|
58
|
+
color = self._get_status_color(score)
|
59
|
+
label_width = self._calculate_width(label)
|
60
|
+
status_width = self._calculate_width(status)
|
61
|
+
total_width = label_width + status_width
|
62
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
63
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
64
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
65
|
+
<stop offset="1" stop-opacity=".1"/>
|
66
|
+
</linearGradient>
|
67
|
+
<clipPath id="round">
|
68
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
69
|
+
</clipPath>
|
70
|
+
<g clip-path="url(#round)">
|
71
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
72
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
|
73
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
74
|
+
</g>
|
75
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
76
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
77
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
78
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
79
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
80
|
+
</g>
|
81
|
+
</svg>'''
|
82
|
+
|
83
|
+
def generate_stats_badge(self, count: int, label: str, color: str) -> str:
|
84
|
+
status = str(count)
|
85
|
+
label_width = self._calculate_width(label)
|
86
|
+
status_width = self._calculate_width(status)
|
87
|
+
total_width = label_width + status_width
|
88
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
89
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
90
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
91
|
+
<stop offset="1" stop-opacity=".1"/>
|
92
|
+
</linearGradient>
|
93
|
+
<clipPath id="round">
|
94
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
95
|
+
</clipPath>
|
96
|
+
<g clip-path="url(#round)">
|
97
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
98
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
|
99
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
100
|
+
</g>
|
101
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
102
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
103
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
104
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
105
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
106
|
+
</g>
|
107
|
+
</svg>'''
|
108
|
+
|
109
|
+
def generate_badges(self, metrics: ReadabilityMetrics) -> Dict[str, str]:
|
110
|
+
return {
|
111
|
+
"reading_time": self.generate_reading_time_badge(metrics.reading_time_mins),
|
112
|
+
"complexity": self.generate_complexity_badge(metrics.complexity_score),
|
113
|
+
"words": self.generate_stats_badge(metrics.word_count, "words", "#1E90FF"),
|
114
|
+
"headings": self.generate_stats_badge(
|
115
|
+
metrics.heading_count, "headings", "#9370DB"
|
116
|
+
),
|
117
|
+
"code_blocks": self.generate_stats_badge(
|
118
|
+
metrics.code_block_count, "code blocks", "#FF6347"
|
119
|
+
),
|
120
|
+
"links": self.generate_stats_badge(metrics.link_count, "links", "#20B2AA"),
|
121
|
+
"images": self.generate_stats_badge(
|
122
|
+
metrics.image_count, "images", "#DEB887"
|
123
|
+
),
|
124
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
from typing import Callable, Dict, Tuple
|
2
|
+
|
3
|
+
from markitecture.metrics.analyzer import ReadabilityMetrics
|
4
|
+
from markitecture.metrics.badges import (
|
5
|
+
BadgeStyle,
|
6
|
+
CompactBadgeGenerator,
|
7
|
+
DetailedBadgeGenerator,
|
8
|
+
MinimalBadgeGenerator,
|
9
|
+
ModernBadgeGenerator,
|
10
|
+
RetroBadgeGenerator,
|
11
|
+
ShieldsBadgeGenerator,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class MetricsSvgGenerator:
|
16
|
+
def __init__(self):
|
17
|
+
self.dimensions: Dict[BadgeStyle, Tuple[int, int]] = {
|
18
|
+
BadgeStyle.MODERN: (560, 140),
|
19
|
+
BadgeStyle.COMPACT: (400, 40),
|
20
|
+
BadgeStyle.DETAILED: (600, 200),
|
21
|
+
BadgeStyle.MINIMAL: (300, 80),
|
22
|
+
BadgeStyle.RETRO: (480, 120),
|
23
|
+
}
|
24
|
+
self.generators: Dict[BadgeStyle, Callable[[ReadabilityMetrics], str]] = {
|
25
|
+
BadgeStyle.MODERN: ModernBadgeGenerator().generate,
|
26
|
+
BadgeStyle.COMPACT: CompactBadgeGenerator().generate,
|
27
|
+
BadgeStyle.DETAILED: DetailedBadgeGenerator().generate,
|
28
|
+
BadgeStyle.MINIMAL: MinimalBadgeGenerator().generate,
|
29
|
+
BadgeStyle.RETRO: RetroBadgeGenerator().generate,
|
30
|
+
BadgeStyle.SHIELDS: self._generate_shields_badge,
|
31
|
+
}
|
32
|
+
|
33
|
+
def _get_gradient_colors(self, score: float) -> Tuple[str, str]:
|
34
|
+
if score < 40:
|
35
|
+
return ("#7934C5", "#4158D0")
|
36
|
+
elif score < 70:
|
37
|
+
return ("#00E5FF", "#4158D0")
|
38
|
+
return ("#FFD700", "#FF00FF")
|
39
|
+
|
40
|
+
def _generate_shields_badge(
|
41
|
+
self, metrics: ReadabilityMetrics, color_start: str, color_end: str
|
42
|
+
) -> str:
|
43
|
+
generator = ShieldsBadgeGenerator()
|
44
|
+
badges = generator.generate_badges(metrics)
|
45
|
+
width = max(self.dimensions.get(BadgeStyle.MODERN, (560,))[0], 560)
|
46
|
+
total_height = (len(badges) * 25) + 20
|
47
|
+
svg = f"""<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {total_height}">
|
48
|
+
<defs>
|
49
|
+
<style>
|
50
|
+
@font-face {{
|
51
|
+
font-family: "DejaVu Sans";
|
52
|
+
src: url("https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSans.ttf");
|
53
|
+
}}
|
54
|
+
</style>
|
55
|
+
</defs>"""
|
56
|
+
y_pos = 10
|
57
|
+
for badge_svg in badges.values():
|
58
|
+
content = badge_svg.split(">", 1)[1].rsplit("</svg>", 1)[0]
|
59
|
+
svg += f'\n <g transform="translate(10, {y_pos})">\n {content}\n </g>'
|
60
|
+
y_pos += 25
|
61
|
+
svg += "\n</svg>"
|
62
|
+
return svg
|
63
|
+
|
64
|
+
def generate_svg(self, metrics: ReadabilityMetrics, style: BadgeStyle) -> str:
|
65
|
+
if style not in self.generators:
|
66
|
+
raise ValueError(f"Style '{style}' not supported.")
|
67
|
+
if style == BadgeStyle.SHIELDS:
|
68
|
+
color_start, color_end = self._get_gradient_colors(metrics.complexity_score)
|
69
|
+
return self.generators[style](metrics, color_start, color_end)
|
70
|
+
return self.generators[style](metrics)
|
File without changes
|
@@ -0,0 +1,133 @@
|
|
1
|
+
"""Scan documents for broken links in markdown files."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict, List, Tuple
|
8
|
+
from urllib.parse import urlparse
|
9
|
+
|
10
|
+
import requests
|
11
|
+
|
12
|
+
|
13
|
+
class LinkValidator:
|
14
|
+
""" "
|
15
|
+
Check links in markdown files for accessibility.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self, timeout: int = 10, max_workers: int = 5):
|
19
|
+
"""
|
20
|
+
Initialize the link checker with configurable timeout and concurrency.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
timeout (int): Seconds to wait for each HTTP request
|
24
|
+
max_workers (int): Maximum number of concurrent requests
|
25
|
+
"""
|
26
|
+
self.timeout = timeout
|
27
|
+
self.max_workers = max_workers
|
28
|
+
self.inline_link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
29
|
+
self.ref_link_pattern = re.compile(r"\[([^\]]+)\]:\s*(\S+)")
|
30
|
+
|
31
|
+
def extract_links(self, content: str) -> List[Tuple[str, str, int]]:
|
32
|
+
"""
|
33
|
+
Extract inline and reference links from markdown content.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
content (str): Markdown content
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
List[Tuple[str, str, int]]: List of (text, url, line_number)
|
40
|
+
"""
|
41
|
+
links = []
|
42
|
+
|
43
|
+
# Extract inline links
|
44
|
+
for line_num, line in enumerate(content.splitlines(), 1):
|
45
|
+
links.extend(
|
46
|
+
(match.group(1), match.group(2).strip(), line_num)
|
47
|
+
for match in self.inline_link_pattern.finditer(line)
|
48
|
+
)
|
49
|
+
|
50
|
+
# Extract reference links
|
51
|
+
links.extend(
|
52
|
+
(match.group(1), match.group(2).strip(), line_num)
|
53
|
+
for line_num, line in enumerate(content.splitlines(), 1)
|
54
|
+
for match in self.ref_link_pattern.finditer(line)
|
55
|
+
)
|
56
|
+
|
57
|
+
return links
|
58
|
+
|
59
|
+
def check_link(self, url: str) -> Dict:
|
60
|
+
"""
|
61
|
+
Check if a link is accessible.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
url (str): URL to check
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
Dict: Dictionary with status and error information
|
68
|
+
"""
|
69
|
+
result = {"url": url, "status": "unknown", "error": None}
|
70
|
+
|
71
|
+
if url.startswith("#"): # Skip internal links
|
72
|
+
result["status"] = "internal"
|
73
|
+
return result
|
74
|
+
|
75
|
+
if not urlparse(url).scheme: # Handle local file paths
|
76
|
+
if os.path.exists(url):
|
77
|
+
result["status"] = "ok"
|
78
|
+
else:
|
79
|
+
result["status"] = "error"
|
80
|
+
result["error"] = "File not found"
|
81
|
+
return result
|
82
|
+
|
83
|
+
try:
|
84
|
+
response = requests.head(url, timeout=self.timeout, allow_redirects=True)
|
85
|
+
if response.status_code == 405:
|
86
|
+
response = requests.get(url, timeout=self.timeout)
|
87
|
+
|
88
|
+
if response.status_code == 200:
|
89
|
+
result["status"] = "ok"
|
90
|
+
else:
|
91
|
+
result["status"] = "error"
|
92
|
+
result["error"] = f"HTTP {response.status_code}"
|
93
|
+
except requests.RequestException as e:
|
94
|
+
result["status"] = "error"
|
95
|
+
result["error"] = str(e)
|
96
|
+
|
97
|
+
return result
|
98
|
+
|
99
|
+
def check_markdown_file(self, filepath: str) -> List[Dict[str, str]]:
|
100
|
+
"""
|
101
|
+
Check all links in a markdown file.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
filepath (str): Path to the markdown file
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
List[Dict]: List of results for each link check
|
108
|
+
"""
|
109
|
+
try:
|
110
|
+
content = Path(filepath).read_text(encoding="utf-8")
|
111
|
+
except OSError as e:
|
112
|
+
return [{"error": f"Failed to read file: {e}"}]
|
113
|
+
|
114
|
+
links = self.extract_links(content)
|
115
|
+
results = []
|
116
|
+
|
117
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
118
|
+
futures = {
|
119
|
+
executor.submit(self.check_link, url): (text, url, line)
|
120
|
+
for text, url, line in links
|
121
|
+
}
|
122
|
+
for future in futures:
|
123
|
+
text, url, line = futures[future]
|
124
|
+
result = future.result()
|
125
|
+
results.append({
|
126
|
+
"text": text,
|
127
|
+
"url": url,
|
128
|
+
"line": line,
|
129
|
+
"status": result["status"],
|
130
|
+
"error": result.get("error"),
|
131
|
+
})
|
132
|
+
|
133
|
+
return results
|
@@ -0,0 +1,198 @@
|
|
1
|
+
"""Reference link handling with sectional placement support."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from enum import StrEnum, auto
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict, List, Optional
|
8
|
+
|
9
|
+
|
10
|
+
class ReferencePlacement(StrEnum):
|
11
|
+
"""Controls where reference links are placed in the document."""
|
12
|
+
|
13
|
+
END = auto()
|
14
|
+
SECTION = auto()
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class Section:
|
19
|
+
"""Represents a markdown section with its references."""
|
20
|
+
|
21
|
+
content: str
|
22
|
+
level: int
|
23
|
+
references: Dict[str, str]
|
24
|
+
start: int
|
25
|
+
end: int
|
26
|
+
|
27
|
+
|
28
|
+
class ReferenceLinkConverter:
|
29
|
+
"""converter for managing reference-style links with section support."""
|
30
|
+
|
31
|
+
def __init__(self) -> None:
|
32
|
+
"""Initialize patterns for finding links and headers."""
|
33
|
+
self.link_pattern = r"\[([^\]]+)\]\(([^\)]+)\)"
|
34
|
+
self.header_pattern = r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$"
|
35
|
+
|
36
|
+
def _extract_sections(self, content: str) -> List[Section]:
|
37
|
+
"""Extract document sections based on headers."""
|
38
|
+
sections: List[Section] = []
|
39
|
+
lines = content.splitlines()
|
40
|
+
current_section: Optional[Section] = None
|
41
|
+
|
42
|
+
for i, line in enumerate(lines):
|
43
|
+
header_match = re.match(self.header_pattern, line)
|
44
|
+
|
45
|
+
if header_match:
|
46
|
+
# If we have a previous section, finalize it
|
47
|
+
if current_section:
|
48
|
+
current_section.end = i
|
49
|
+
sections.append(current_section)
|
50
|
+
|
51
|
+
# Start new section
|
52
|
+
level = len(header_match.group(1))
|
53
|
+
current_section = Section(
|
54
|
+
content="", level=level, references={}, start=i, end=-1
|
55
|
+
)
|
56
|
+
|
57
|
+
# Handle the last section
|
58
|
+
if current_section:
|
59
|
+
current_section.end = len(lines)
|
60
|
+
sections.append(current_section)
|
61
|
+
|
62
|
+
# If no sections found, treat entire document as one section
|
63
|
+
if not sections:
|
64
|
+
sections = [
|
65
|
+
Section(
|
66
|
+
content=content, level=0, references={}, start=0, end=len(lines)
|
67
|
+
)
|
68
|
+
]
|
69
|
+
|
70
|
+
return sections
|
71
|
+
|
72
|
+
def _process_section_content(
|
73
|
+
self, content: str, section: Section, used_refs: Dict[str, str]
|
74
|
+
) -> str:
|
75
|
+
"""Process content for a single section, adding references if needed."""
|
76
|
+
lines = content.splitlines()
|
77
|
+
section_lines = lines[section.start : section.end]
|
78
|
+
|
79
|
+
# Find all link matches in this section
|
80
|
+
matches = list(re.finditer(self.link_pattern, "\n".join(section_lines)))
|
81
|
+
if not matches:
|
82
|
+
return content
|
83
|
+
|
84
|
+
# Convert links and track references for this section
|
85
|
+
modified_lines = section_lines.copy()
|
86
|
+
references = {}
|
87
|
+
|
88
|
+
for match in matches:
|
89
|
+
original = match.group(0)
|
90
|
+
text = match.group(1)
|
91
|
+
url = match.group(2)
|
92
|
+
|
93
|
+
# Generate reference ID
|
94
|
+
ref_id = self._generate_reference_id(text, used_refs)
|
95
|
+
used_refs[ref_id] = text
|
96
|
+
references[ref_id] = url
|
97
|
+
|
98
|
+
# Create reference style link
|
99
|
+
is_image = text.startswith("!")
|
100
|
+
ref_link = f"![{text[1:]}][{ref_id}]" if is_image else f"[{text}][{ref_id}]"
|
101
|
+
|
102
|
+
# Replace in content
|
103
|
+
for i, line in enumerate(modified_lines):
|
104
|
+
if original in line:
|
105
|
+
modified_lines[i] = line.replace(original, ref_link)
|
106
|
+
|
107
|
+
# Store references for this section
|
108
|
+
section.references = references
|
109
|
+
|
110
|
+
# Update content with modified lines
|
111
|
+
lines[section.start : section.end] = modified_lines
|
112
|
+
return "\n".join(lines)
|
113
|
+
|
114
|
+
def _generate_reference_id(self, text: str, used_refs: Dict[str, str]) -> str:
|
115
|
+
"""Generate a unique reference ID based on the link text."""
|
116
|
+
text = text.lstrip("!")
|
117
|
+
ref = re.sub(r"[^\w\s-]", "", text.lower())
|
118
|
+
ref = re.sub(r"[-\s]+", "-", ref).strip("-")
|
119
|
+
|
120
|
+
if not ref:
|
121
|
+
ref = "link"
|
122
|
+
|
123
|
+
base_ref = ref
|
124
|
+
counter = 1
|
125
|
+
while ref in used_refs and used_refs[ref] != text:
|
126
|
+
ref = f"{base_ref}-{counter}"
|
127
|
+
counter += 1
|
128
|
+
|
129
|
+
return ref
|
130
|
+
|
131
|
+
def convert_to_reflinks(
|
132
|
+
self, content: str, placement: ReferencePlacement = ReferencePlacement.END
|
133
|
+
) -> str:
|
134
|
+
"""Convert inline links to reference style with configurable placement."""
|
135
|
+
sections = self._extract_sections(content)
|
136
|
+
used_refs: Dict[str, str] = {}
|
137
|
+
processed_content = content
|
138
|
+
|
139
|
+
# Process each section
|
140
|
+
for section in sections:
|
141
|
+
processed_content = self._process_section_content(
|
142
|
+
processed_content, section, used_refs
|
143
|
+
)
|
144
|
+
|
145
|
+
# Add references based on placement preference
|
146
|
+
if placement == ReferencePlacement.END:
|
147
|
+
# Add all references at end of document
|
148
|
+
all_refs = {}
|
149
|
+
for section in sections:
|
150
|
+
all_refs.update(section.references)
|
151
|
+
|
152
|
+
if all_refs:
|
153
|
+
ref_section = "\n\n---\n\n<!-- REFERENCE LINKS -->\n"
|
154
|
+
for ref_id, url in sorted(all_refs.items()):
|
155
|
+
ref_section += f"[{ref_id}]: {url}\n"
|
156
|
+
processed_content = processed_content.rstrip() + ref_section + "\n"
|
157
|
+
|
158
|
+
else: # ReferencePlacement.SECTION
|
159
|
+
# Add references at the end of each section
|
160
|
+
lines = processed_content.splitlines()
|
161
|
+
|
162
|
+
for section in reversed(
|
163
|
+
sections
|
164
|
+
): # Process in reverse to maintain positions
|
165
|
+
reflink_comment = "REFERENCE LINKS"
|
166
|
+
header_match = re.match(self.header_pattern, lines[section.start])
|
167
|
+
if header_match:
|
168
|
+
reflink_comment = (
|
169
|
+
f"{header_match.group(2).upper()} {reflink_comment}"
|
170
|
+
)
|
171
|
+
if section.references:
|
172
|
+
ref_text = f"<!-- {reflink_comment} -->\n"
|
173
|
+
for ref_id, url in sorted(section.references.items()):
|
174
|
+
ref_text += f"[{ref_id}]: {url}\n"
|
175
|
+
|
176
|
+
# Insert references at section end
|
177
|
+
lines.insert(section.end, f"{ref_text}\n---\n")
|
178
|
+
|
179
|
+
processed_content = "\n".join(lines)
|
180
|
+
|
181
|
+
return processed_content
|
182
|
+
|
183
|
+
def process_file(
|
184
|
+
self,
|
185
|
+
input_path: str | Path,
|
186
|
+
output_path: str | Path | None = None,
|
187
|
+
placement: ReferencePlacement = ReferencePlacement.END,
|
188
|
+
) -> None:
|
189
|
+
"""Process a markdown file and save to a new file."""
|
190
|
+
input_path = Path(input_path)
|
191
|
+
if not input_path.exists():
|
192
|
+
raise FileNotFoundError(f"Input file not found: {input_path}")
|
193
|
+
|
194
|
+
content = input_path.read_text(encoding="utf-8")
|
195
|
+
modified_content = self.convert_to_reflinks(content, placement)
|
196
|
+
|
197
|
+
output_path = Path(output_path) if output_path else input_path
|
198
|
+
output_path.write_text(modified_content, encoding="utf-8")
|
@@ -0,0 +1,82 @@
|
|
1
|
+
"""Extract and manage reference-style links in Markdown content."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from typing import Dict
|
5
|
+
|
6
|
+
|
7
|
+
class ReferenceLinkExtractor:
|
8
|
+
"""
|
9
|
+
Handles extraction and management of reference-style links in Markdown.
|
10
|
+
|
11
|
+
This class provides functionality to extract reference-style links from markdown
|
12
|
+
content and track which references are actually used within specific sections.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, markdown_text: str) -> None:
|
16
|
+
"""
|
17
|
+
Initialize the ReferenceLinkExtractor with the entire markdown content.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
markdown_text: The full markdown content as a string.
|
21
|
+
"""
|
22
|
+
self.markdown_text = markdown_text
|
23
|
+
self.references = self._extract_references()
|
24
|
+
|
25
|
+
def _extract_references(self) -> dict[str, str]:
|
26
|
+
"""
|
27
|
+
Extract reference-style links from the markdown text.
|
28
|
+
|
29
|
+
A reference link follows the pattern:
|
30
|
+
[refname]: http://example.com
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Dictionary mapping reference names to their URLs.
|
34
|
+
"""
|
35
|
+
# Extract references that appear after reference marker comments
|
36
|
+
ref_sections = re.split(r"<!--\s*REFERENCE\s+LINKS\s*-->", self.markdown_text)
|
37
|
+
|
38
|
+
references: dict[str, str] = {}
|
39
|
+
ref_pattern = re.compile(r"^\[([^\]]+)\]:\s*(.+?)\s*$", re.MULTILINE)
|
40
|
+
|
41
|
+
for section in ref_sections:
|
42
|
+
for match in ref_pattern.finditer(section):
|
43
|
+
ref_name = match.group(1).strip()
|
44
|
+
ref_link = match.group(2).strip()
|
45
|
+
references[ref_name] = ref_link
|
46
|
+
|
47
|
+
return references
|
48
|
+
|
49
|
+
def find_used_references(self, section_content: str) -> dict[str, str]:
|
50
|
+
"""
|
51
|
+
Find which references are actually used within a given section.
|
52
|
+
|
53
|
+
A reference is considered used if it appears in the form [refname]
|
54
|
+
within the section content, excluding the reference definitions themselves.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
section_content: The markdown content of a section to analyze.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Dictionary of references that are actually used in the section,
|
61
|
+
mapping reference names to their URLs.
|
62
|
+
"""
|
63
|
+
used_refs: Dict[str, str] = {}
|
64
|
+
|
65
|
+
# Remove any existing reference definitions from the content
|
66
|
+
content_without_refs = re.sub(
|
67
|
+
r"\n*<!--\s*REFERENCE\s+LINKS\s*-->\n*.*$",
|
68
|
+
"",
|
69
|
+
section_content,
|
70
|
+
flags=re.DOTALL,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Find all reference usages, excluding image or link definitions
|
74
|
+
ref_usage_pattern = re.compile(r"\[([^\]]+)\](?!\(|\:)")
|
75
|
+
found = ref_usage_pattern.findall(content_without_refs)
|
76
|
+
|
77
|
+
# Only include references that exist and are actually used
|
78
|
+
for ref in found:
|
79
|
+
if ref in self.references:
|
80
|
+
used_refs[ref] = self.references[ref]
|
81
|
+
|
82
|
+
return used_refs
|