markitecture 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markitecture/__init__.py +41 -0
- markitecture/__main__.py +4 -0
- markitecture/cli/__init__.py +3 -0
- markitecture/cli/app.py +38 -0
- markitecture/cli/commands/__init__.py +21 -0
- markitecture/cli/commands/config.py +84 -0
- markitecture/cli/commands/links.py +146 -0
- markitecture/cli/commands/metrics.py +193 -0
- markitecture/cli/commands/mkdocs.py +39 -0
- markitecture/cli/commands/split.py +48 -0
- markitecture/errors.py +64 -0
- markitecture/generators/__init__.py +3 -0
- markitecture/generators/configs/__init__.py +0 -0
- markitecture/generators/configs/mintlify_json.py +0 -0
- markitecture/generators/configs/mkdocs_yaml.py +317 -0
- markitecture/metrics/__init__.py +9 -0
- markitecture/metrics/analyzer.py +109 -0
- markitecture/metrics/badges/__init__.py +28 -0
- markitecture/metrics/badges/base.py +7 -0
- markitecture/metrics/badges/compact.py +35 -0
- markitecture/metrics/badges/detailed.py +60 -0
- markitecture/metrics/badges/minimal.py +19 -0
- markitecture/metrics/badges/modern.py +45 -0
- markitecture/metrics/badges/retro.py +23 -0
- markitecture/metrics/badges/shields.py +124 -0
- markitecture/metrics/svg_generator.py +70 -0
- markitecture/processing/__init__.py +0 -0
- markitecture/processing/link_validator.py +133 -0
- markitecture/processing/reflink_converter.py +198 -0
- markitecture/processing/reflink_extractor.py +82 -0
- markitecture/processing/text_splitter.py +290 -0
- markitecture/settings/__init__.py +9 -0
- markitecture/settings/config.py +61 -0
- markitecture/settings/validators.py +26 -0
- markitecture/utils/__init__.py +5 -0
- markitecture/utils/file_handler.py +24 -0
- markitecture/utils/printer.py +195 -0
- markitecture/utils/sanitizer.py +78 -0
- markitecture-0.1.15.dist-info/METADATA +271 -0
- markitecture-0.1.15.dist-info/RECORD +43 -0
- markitecture-0.1.15.dist-info/WHEEL +4 -0
- markitecture-0.1.15.dist-info/entry_points.txt +2 -0
- markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
|
3
|
+
from markitecture.metrics.analyzer import ReadabilityMetrics
|
4
|
+
|
5
|
+
|
6
|
+
class ShieldsBadgeGenerator:
|
7
|
+
def __init__(self):
|
8
|
+
self.HEIGHT = 20
|
9
|
+
self.FONT_SIZE = 11
|
10
|
+
self.TEXT_MARGIN = 6
|
11
|
+
self.COLORS = {
|
12
|
+
"low": "#7934C5", # Purple
|
13
|
+
"medium": "#00E5FF", # Cyan
|
14
|
+
"high": "#FFD700", # Gold
|
15
|
+
}
|
16
|
+
self.SHIELDS_BG = "#555555"
|
17
|
+
|
18
|
+
def _calculate_width(self, text: str) -> int:
|
19
|
+
return len(text) * 6 + self.TEXT_MARGIN * 2
|
20
|
+
|
21
|
+
def _get_status_color(self, score: float) -> str:
|
22
|
+
if score < 40:
|
23
|
+
return self.COLORS["low"]
|
24
|
+
elif score < 70:
|
25
|
+
return self.COLORS["medium"]
|
26
|
+
return self.COLORS["high"]
|
27
|
+
|
28
|
+
def generate_reading_time_badge(self, minutes: float) -> str:
|
29
|
+
label = "reading time"
|
30
|
+
status = f"{minutes} min"
|
31
|
+
label_width = self._calculate_width(label)
|
32
|
+
status_width = self._calculate_width(status)
|
33
|
+
total_width = label_width + status_width
|
34
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
35
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
36
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
37
|
+
<stop offset="1" stop-opacity=".1"/>
|
38
|
+
</linearGradient>
|
39
|
+
<clipPath id="round">
|
40
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
41
|
+
</clipPath>
|
42
|
+
<g clip-path="url(#round)">
|
43
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
44
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="#4c1"/>
|
45
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
46
|
+
</g>
|
47
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
48
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
49
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
50
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
51
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
52
|
+
</g>
|
53
|
+
</svg>'''
|
54
|
+
|
55
|
+
def generate_complexity_badge(self, score: float) -> str:
|
56
|
+
label = "complexity"
|
57
|
+
status = f"{score}%"
|
58
|
+
color = self._get_status_color(score)
|
59
|
+
label_width = self._calculate_width(label)
|
60
|
+
status_width = self._calculate_width(status)
|
61
|
+
total_width = label_width + status_width
|
62
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
63
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
64
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
65
|
+
<stop offset="1" stop-opacity=".1"/>
|
66
|
+
</linearGradient>
|
67
|
+
<clipPath id="round">
|
68
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
69
|
+
</clipPath>
|
70
|
+
<g clip-path="url(#round)">
|
71
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
72
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
|
73
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
74
|
+
</g>
|
75
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
76
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
77
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
78
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
79
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
80
|
+
</g>
|
81
|
+
</svg>'''
|
82
|
+
|
83
|
+
def generate_stats_badge(self, count: int, label: str, color: str) -> str:
|
84
|
+
status = str(count)
|
85
|
+
label_width = self._calculate_width(label)
|
86
|
+
status_width = self._calculate_width(status)
|
87
|
+
total_width = label_width + status_width
|
88
|
+
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{total_width}" height="{self.HEIGHT}">
|
89
|
+
<linearGradient id="smooth" x2="0" y2="100%">
|
90
|
+
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
91
|
+
<stop offset="1" stop-opacity=".1"/>
|
92
|
+
</linearGradient>
|
93
|
+
<clipPath id="round">
|
94
|
+
<rect width="{total_width}" height="{self.HEIGHT}" rx="3" fill="#fff"/>
|
95
|
+
</clipPath>
|
96
|
+
<g clip-path="url(#round)">
|
97
|
+
<rect width="{label_width}" height="{self.HEIGHT}" fill="{self.SHIELDS_BG}"/>
|
98
|
+
<rect x="{label_width}" width="{status_width}" height="{self.HEIGHT}" fill="{color}"/>
|
99
|
+
<rect width="{total_width}" height="{self.HEIGHT}" fill="url(#smooth)"/>
|
100
|
+
</g>
|
101
|
+
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="{self.FONT_SIZE}">
|
102
|
+
<text x="{label_width / 2}" y="15" fill="#010101" fill-opacity=".3">{label}</text>
|
103
|
+
<text x="{label_width / 2}" y="14">{label}</text>
|
104
|
+
<text x="{label_width + status_width / 2}" y="15" fill="#010101" fill-opacity=".3">{status}</text>
|
105
|
+
<text x="{label_width + status_width / 2}" y="14">{status}</text>
|
106
|
+
</g>
|
107
|
+
</svg>'''
|
108
|
+
|
109
|
+
def generate_badges(self, metrics: ReadabilityMetrics) -> Dict[str, str]:
|
110
|
+
return {
|
111
|
+
"reading_time": self.generate_reading_time_badge(metrics.reading_time_mins),
|
112
|
+
"complexity": self.generate_complexity_badge(metrics.complexity_score),
|
113
|
+
"words": self.generate_stats_badge(metrics.word_count, "words", "#1E90FF"),
|
114
|
+
"headings": self.generate_stats_badge(
|
115
|
+
metrics.heading_count, "headings", "#9370DB"
|
116
|
+
),
|
117
|
+
"code_blocks": self.generate_stats_badge(
|
118
|
+
metrics.code_block_count, "code blocks", "#FF6347"
|
119
|
+
),
|
120
|
+
"links": self.generate_stats_badge(metrics.link_count, "links", "#20B2AA"),
|
121
|
+
"images": self.generate_stats_badge(
|
122
|
+
metrics.image_count, "images", "#DEB887"
|
123
|
+
),
|
124
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
from typing import Callable, Dict, Tuple
|
2
|
+
|
3
|
+
from markitecture.metrics.analyzer import ReadabilityMetrics
|
4
|
+
from markitecture.metrics.badges import (
|
5
|
+
BadgeStyle,
|
6
|
+
CompactBadgeGenerator,
|
7
|
+
DetailedBadgeGenerator,
|
8
|
+
MinimalBadgeGenerator,
|
9
|
+
ModernBadgeGenerator,
|
10
|
+
RetroBadgeGenerator,
|
11
|
+
ShieldsBadgeGenerator,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class MetricsSvgGenerator:
|
16
|
+
def __init__(self):
|
17
|
+
self.dimensions: Dict[BadgeStyle, Tuple[int, int]] = {
|
18
|
+
BadgeStyle.MODERN: (560, 140),
|
19
|
+
BadgeStyle.COMPACT: (400, 40),
|
20
|
+
BadgeStyle.DETAILED: (600, 200),
|
21
|
+
BadgeStyle.MINIMAL: (300, 80),
|
22
|
+
BadgeStyle.RETRO: (480, 120),
|
23
|
+
}
|
24
|
+
self.generators: Dict[BadgeStyle, Callable[[ReadabilityMetrics], str]] = {
|
25
|
+
BadgeStyle.MODERN: ModernBadgeGenerator().generate,
|
26
|
+
BadgeStyle.COMPACT: CompactBadgeGenerator().generate,
|
27
|
+
BadgeStyle.DETAILED: DetailedBadgeGenerator().generate,
|
28
|
+
BadgeStyle.MINIMAL: MinimalBadgeGenerator().generate,
|
29
|
+
BadgeStyle.RETRO: RetroBadgeGenerator().generate,
|
30
|
+
BadgeStyle.SHIELDS: self._generate_shields_badge,
|
31
|
+
}
|
32
|
+
|
33
|
+
def _get_gradient_colors(self, score: float) -> Tuple[str, str]:
|
34
|
+
if score < 40:
|
35
|
+
return ("#7934C5", "#4158D0")
|
36
|
+
elif score < 70:
|
37
|
+
return ("#00E5FF", "#4158D0")
|
38
|
+
return ("#FFD700", "#FF00FF")
|
39
|
+
|
40
|
+
def _generate_shields_badge(
|
41
|
+
self, metrics: ReadabilityMetrics, color_start: str, color_end: str
|
42
|
+
) -> str:
|
43
|
+
generator = ShieldsBadgeGenerator()
|
44
|
+
badges = generator.generate_badges(metrics)
|
45
|
+
width = max(self.dimensions.get(BadgeStyle.MODERN, (560,))[0], 560)
|
46
|
+
total_height = (len(badges) * 25) + 20
|
47
|
+
svg = f"""<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 {width} {total_height}">
|
48
|
+
<defs>
|
49
|
+
<style>
|
50
|
+
@font-face {{
|
51
|
+
font-family: "DejaVu Sans";
|
52
|
+
src: url("https://cdn.jsdelivr.net/npm/dejavu-fonts-ttf@2.37.3/ttf/DejaVuSans.ttf");
|
53
|
+
}}
|
54
|
+
</style>
|
55
|
+
</defs>"""
|
56
|
+
y_pos = 10
|
57
|
+
for badge_svg in badges.values():
|
58
|
+
content = badge_svg.split(">", 1)[1].rsplit("</svg>", 1)[0]
|
59
|
+
svg += f'\n <g transform="translate(10, {y_pos})">\n {content}\n </g>'
|
60
|
+
y_pos += 25
|
61
|
+
svg += "\n</svg>"
|
62
|
+
return svg
|
63
|
+
|
64
|
+
def generate_svg(self, metrics: ReadabilityMetrics, style: BadgeStyle) -> str:
|
65
|
+
if style not in self.generators:
|
66
|
+
raise ValueError(f"Style '{style}' not supported.")
|
67
|
+
if style == BadgeStyle.SHIELDS:
|
68
|
+
color_start, color_end = self._get_gradient_colors(metrics.complexity_score)
|
69
|
+
return self.generators[style](metrics, color_start, color_end)
|
70
|
+
return self.generators[style](metrics)
|
File without changes
|
@@ -0,0 +1,133 @@
|
|
1
|
+
"""Scan documents for broken links in markdown files."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict, List, Tuple
|
8
|
+
from urllib.parse import urlparse
|
9
|
+
|
10
|
+
import requests
|
11
|
+
|
12
|
+
|
13
|
+
class LinkValidator:
|
14
|
+
""" "
|
15
|
+
Check links in markdown files for accessibility.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self, timeout: int = 10, max_workers: int = 5):
|
19
|
+
"""
|
20
|
+
Initialize the link checker with configurable timeout and concurrency.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
timeout (int): Seconds to wait for each HTTP request
|
24
|
+
max_workers (int): Maximum number of concurrent requests
|
25
|
+
"""
|
26
|
+
self.timeout = timeout
|
27
|
+
self.max_workers = max_workers
|
28
|
+
self.inline_link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
29
|
+
self.ref_link_pattern = re.compile(r"\[([^\]]+)\]:\s*(\S+)")
|
30
|
+
|
31
|
+
def extract_links(self, content: str) -> List[Tuple[str, str, int]]:
|
32
|
+
"""
|
33
|
+
Extract inline and reference links from markdown content.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
content (str): Markdown content
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
List[Tuple[str, str, int]]: List of (text, url, line_number)
|
40
|
+
"""
|
41
|
+
links = []
|
42
|
+
|
43
|
+
# Extract inline links
|
44
|
+
for line_num, line in enumerate(content.splitlines(), 1):
|
45
|
+
links.extend(
|
46
|
+
(match.group(1), match.group(2).strip(), line_num)
|
47
|
+
for match in self.inline_link_pattern.finditer(line)
|
48
|
+
)
|
49
|
+
|
50
|
+
# Extract reference links
|
51
|
+
links.extend(
|
52
|
+
(match.group(1), match.group(2).strip(), line_num)
|
53
|
+
for line_num, line in enumerate(content.splitlines(), 1)
|
54
|
+
for match in self.ref_link_pattern.finditer(line)
|
55
|
+
)
|
56
|
+
|
57
|
+
return links
|
58
|
+
|
59
|
+
def check_link(self, url: str) -> Dict:
|
60
|
+
"""
|
61
|
+
Check if a link is accessible.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
url (str): URL to check
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
Dict: Dictionary with status and error information
|
68
|
+
"""
|
69
|
+
result = {"url": url, "status": "unknown", "error": None}
|
70
|
+
|
71
|
+
if url.startswith("#"): # Skip internal links
|
72
|
+
result["status"] = "internal"
|
73
|
+
return result
|
74
|
+
|
75
|
+
if not urlparse(url).scheme: # Handle local file paths
|
76
|
+
if os.path.exists(url):
|
77
|
+
result["status"] = "ok"
|
78
|
+
else:
|
79
|
+
result["status"] = "error"
|
80
|
+
result["error"] = "File not found"
|
81
|
+
return result
|
82
|
+
|
83
|
+
try:
|
84
|
+
response = requests.head(url, timeout=self.timeout, allow_redirects=True)
|
85
|
+
if response.status_code == 405:
|
86
|
+
response = requests.get(url, timeout=self.timeout)
|
87
|
+
|
88
|
+
if response.status_code == 200:
|
89
|
+
result["status"] = "ok"
|
90
|
+
else:
|
91
|
+
result["status"] = "error"
|
92
|
+
result["error"] = f"HTTP {response.status_code}"
|
93
|
+
except requests.RequestException as e:
|
94
|
+
result["status"] = "error"
|
95
|
+
result["error"] = str(e)
|
96
|
+
|
97
|
+
return result
|
98
|
+
|
99
|
+
def check_markdown_file(self, filepath: str) -> List[Dict[str, str]]:
|
100
|
+
"""
|
101
|
+
Check all links in a markdown file.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
filepath (str): Path to the markdown file
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
List[Dict]: List of results for each link check
|
108
|
+
"""
|
109
|
+
try:
|
110
|
+
content = Path(filepath).read_text(encoding="utf-8")
|
111
|
+
except OSError as e:
|
112
|
+
return [{"error": f"Failed to read file: {e}"}]
|
113
|
+
|
114
|
+
links = self.extract_links(content)
|
115
|
+
results = []
|
116
|
+
|
117
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
118
|
+
futures = {
|
119
|
+
executor.submit(self.check_link, url): (text, url, line)
|
120
|
+
for text, url, line in links
|
121
|
+
}
|
122
|
+
for future in futures:
|
123
|
+
text, url, line = futures[future]
|
124
|
+
result = future.result()
|
125
|
+
results.append({
|
126
|
+
"text": text,
|
127
|
+
"url": url,
|
128
|
+
"line": line,
|
129
|
+
"status": result["status"],
|
130
|
+
"error": result.get("error"),
|
131
|
+
})
|
132
|
+
|
133
|
+
return results
|
@@ -0,0 +1,198 @@
|
|
1
|
+
"""Reference link handling with sectional placement support."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from enum import StrEnum, auto
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Dict, List, Optional
|
8
|
+
|
9
|
+
|
10
|
+
class ReferencePlacement(StrEnum):
|
11
|
+
"""Controls where reference links are placed in the document."""
|
12
|
+
|
13
|
+
END = auto()
|
14
|
+
SECTION = auto()
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class Section:
|
19
|
+
"""Represents a markdown section with its references."""
|
20
|
+
|
21
|
+
content: str
|
22
|
+
level: int
|
23
|
+
references: Dict[str, str]
|
24
|
+
start: int
|
25
|
+
end: int
|
26
|
+
|
27
|
+
|
28
|
+
class ReferenceLinkConverter:
|
29
|
+
"""converter for managing reference-style links with section support."""
|
30
|
+
|
31
|
+
def __init__(self) -> None:
|
32
|
+
"""Initialize patterns for finding links and headers."""
|
33
|
+
self.link_pattern = r"\[([^\]]+)\]\(([^\)]+)\)"
|
34
|
+
self.header_pattern = r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$"
|
35
|
+
|
36
|
+
def _extract_sections(self, content: str) -> List[Section]:
|
37
|
+
"""Extract document sections based on headers."""
|
38
|
+
sections: List[Section] = []
|
39
|
+
lines = content.splitlines()
|
40
|
+
current_section: Optional[Section] = None
|
41
|
+
|
42
|
+
for i, line in enumerate(lines):
|
43
|
+
header_match = re.match(self.header_pattern, line)
|
44
|
+
|
45
|
+
if header_match:
|
46
|
+
# If we have a previous section, finalize it
|
47
|
+
if current_section:
|
48
|
+
current_section.end = i
|
49
|
+
sections.append(current_section)
|
50
|
+
|
51
|
+
# Start new section
|
52
|
+
level = len(header_match.group(1))
|
53
|
+
current_section = Section(
|
54
|
+
content="", level=level, references={}, start=i, end=-1
|
55
|
+
)
|
56
|
+
|
57
|
+
# Handle the last section
|
58
|
+
if current_section:
|
59
|
+
current_section.end = len(lines)
|
60
|
+
sections.append(current_section)
|
61
|
+
|
62
|
+
# If no sections found, treat entire document as one section
|
63
|
+
if not sections:
|
64
|
+
sections = [
|
65
|
+
Section(
|
66
|
+
content=content, level=0, references={}, start=0, end=len(lines)
|
67
|
+
)
|
68
|
+
]
|
69
|
+
|
70
|
+
return sections
|
71
|
+
|
72
|
+
def _process_section_content(
|
73
|
+
self, content: str, section: Section, used_refs: Dict[str, str]
|
74
|
+
) -> str:
|
75
|
+
"""Process content for a single section, adding references if needed."""
|
76
|
+
lines = content.splitlines()
|
77
|
+
section_lines = lines[section.start : section.end]
|
78
|
+
|
79
|
+
# Find all link matches in this section
|
80
|
+
matches = list(re.finditer(self.link_pattern, "\n".join(section_lines)))
|
81
|
+
if not matches:
|
82
|
+
return content
|
83
|
+
|
84
|
+
# Convert links and track references for this section
|
85
|
+
modified_lines = section_lines.copy()
|
86
|
+
references = {}
|
87
|
+
|
88
|
+
for match in matches:
|
89
|
+
original = match.group(0)
|
90
|
+
text = match.group(1)
|
91
|
+
url = match.group(2)
|
92
|
+
|
93
|
+
# Generate reference ID
|
94
|
+
ref_id = self._generate_reference_id(text, used_refs)
|
95
|
+
used_refs[ref_id] = text
|
96
|
+
references[ref_id] = url
|
97
|
+
|
98
|
+
# Create reference style link
|
99
|
+
is_image = text.startswith("!")
|
100
|
+
ref_link = f"![{text[1:]}][{ref_id}]" if is_image else f"[{text}][{ref_id}]"
|
101
|
+
|
102
|
+
# Replace in content
|
103
|
+
for i, line in enumerate(modified_lines):
|
104
|
+
if original in line:
|
105
|
+
modified_lines[i] = line.replace(original, ref_link)
|
106
|
+
|
107
|
+
# Store references for this section
|
108
|
+
section.references = references
|
109
|
+
|
110
|
+
# Update content with modified lines
|
111
|
+
lines[section.start : section.end] = modified_lines
|
112
|
+
return "\n".join(lines)
|
113
|
+
|
114
|
+
def _generate_reference_id(self, text: str, used_refs: Dict[str, str]) -> str:
|
115
|
+
"""Generate a unique reference ID based on the link text."""
|
116
|
+
text = text.lstrip("!")
|
117
|
+
ref = re.sub(r"[^\w\s-]", "", text.lower())
|
118
|
+
ref = re.sub(r"[-\s]+", "-", ref).strip("-")
|
119
|
+
|
120
|
+
if not ref:
|
121
|
+
ref = "link"
|
122
|
+
|
123
|
+
base_ref = ref
|
124
|
+
counter = 1
|
125
|
+
while ref in used_refs and used_refs[ref] != text:
|
126
|
+
ref = f"{base_ref}-{counter}"
|
127
|
+
counter += 1
|
128
|
+
|
129
|
+
return ref
|
130
|
+
|
131
|
+
def convert_to_reflinks(
|
132
|
+
self, content: str, placement: ReferencePlacement = ReferencePlacement.END
|
133
|
+
) -> str:
|
134
|
+
"""Convert inline links to reference style with configurable placement."""
|
135
|
+
sections = self._extract_sections(content)
|
136
|
+
used_refs: Dict[str, str] = {}
|
137
|
+
processed_content = content
|
138
|
+
|
139
|
+
# Process each section
|
140
|
+
for section in sections:
|
141
|
+
processed_content = self._process_section_content(
|
142
|
+
processed_content, section, used_refs
|
143
|
+
)
|
144
|
+
|
145
|
+
# Add references based on placement preference
|
146
|
+
if placement == ReferencePlacement.END:
|
147
|
+
# Add all references at end of document
|
148
|
+
all_refs = {}
|
149
|
+
for section in sections:
|
150
|
+
all_refs.update(section.references)
|
151
|
+
|
152
|
+
if all_refs:
|
153
|
+
ref_section = "\n\n---\n\n<!-- REFERENCE LINKS -->\n"
|
154
|
+
for ref_id, url in sorted(all_refs.items()):
|
155
|
+
ref_section += f"[{ref_id}]: {url}\n"
|
156
|
+
processed_content = processed_content.rstrip() + ref_section + "\n"
|
157
|
+
|
158
|
+
else: # ReferencePlacement.SECTION
|
159
|
+
# Add references at the end of each section
|
160
|
+
lines = processed_content.splitlines()
|
161
|
+
|
162
|
+
for section in reversed(
|
163
|
+
sections
|
164
|
+
): # Process in reverse to maintain positions
|
165
|
+
reflink_comment = "REFERENCE LINKS"
|
166
|
+
header_match = re.match(self.header_pattern, lines[section.start])
|
167
|
+
if header_match:
|
168
|
+
reflink_comment = (
|
169
|
+
f"{header_match.group(2).upper()} {reflink_comment}"
|
170
|
+
)
|
171
|
+
if section.references:
|
172
|
+
ref_text = f"<!-- {reflink_comment} -->\n"
|
173
|
+
for ref_id, url in sorted(section.references.items()):
|
174
|
+
ref_text += f"[{ref_id}]: {url}\n"
|
175
|
+
|
176
|
+
# Insert references at section end
|
177
|
+
lines.insert(section.end, f"{ref_text}\n---\n")
|
178
|
+
|
179
|
+
processed_content = "\n".join(lines)
|
180
|
+
|
181
|
+
return processed_content
|
182
|
+
|
183
|
+
def process_file(
|
184
|
+
self,
|
185
|
+
input_path: str | Path,
|
186
|
+
output_path: str | Path | None = None,
|
187
|
+
placement: ReferencePlacement = ReferencePlacement.END,
|
188
|
+
) -> None:
|
189
|
+
"""Process a markdown file and save to a new file."""
|
190
|
+
input_path = Path(input_path)
|
191
|
+
if not input_path.exists():
|
192
|
+
raise FileNotFoundError(f"Input file not found: {input_path}")
|
193
|
+
|
194
|
+
content = input_path.read_text(encoding="utf-8")
|
195
|
+
modified_content = self.convert_to_reflinks(content, placement)
|
196
|
+
|
197
|
+
output_path = Path(output_path) if output_path else input_path
|
198
|
+
output_path.write_text(modified_content, encoding="utf-8")
|
@@ -0,0 +1,82 @@
|
|
1
|
+
"""Extract and manage reference-style links in Markdown content."""
|
2
|
+
|
3
|
+
import re
|
4
|
+
from typing import Dict
|
5
|
+
|
6
|
+
|
7
|
+
class ReferenceLinkExtractor:
|
8
|
+
"""
|
9
|
+
Handles extraction and management of reference-style links in Markdown.
|
10
|
+
|
11
|
+
This class provides functionality to extract reference-style links from markdown
|
12
|
+
content and track which references are actually used within specific sections.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, markdown_text: str) -> None:
|
16
|
+
"""
|
17
|
+
Initialize the ReferenceLinkExtractor with the entire markdown content.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
markdown_text: The full markdown content as a string.
|
21
|
+
"""
|
22
|
+
self.markdown_text = markdown_text
|
23
|
+
self.references = self._extract_references()
|
24
|
+
|
25
|
+
def _extract_references(self) -> dict[str, str]:
|
26
|
+
"""
|
27
|
+
Extract reference-style links from the markdown text.
|
28
|
+
|
29
|
+
A reference link follows the pattern:
|
30
|
+
[refname]: http://example.com
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Dictionary mapping reference names to their URLs.
|
34
|
+
"""
|
35
|
+
# Extract references that appear after reference marker comments
|
36
|
+
ref_sections = re.split(r"<!--\s*REFERENCE\s+LINKS\s*-->", self.markdown_text)
|
37
|
+
|
38
|
+
references: dict[str, str] = {}
|
39
|
+
ref_pattern = re.compile(r"^\[([^\]]+)\]:\s*(.+?)\s*$", re.MULTILINE)
|
40
|
+
|
41
|
+
for section in ref_sections:
|
42
|
+
for match in ref_pattern.finditer(section):
|
43
|
+
ref_name = match.group(1).strip()
|
44
|
+
ref_link = match.group(2).strip()
|
45
|
+
references[ref_name] = ref_link
|
46
|
+
|
47
|
+
return references
|
48
|
+
|
49
|
+
def find_used_references(self, section_content: str) -> dict[str, str]:
|
50
|
+
"""
|
51
|
+
Find which references are actually used within a given section.
|
52
|
+
|
53
|
+
A reference is considered used if it appears in the form [refname]
|
54
|
+
within the section content, excluding the reference definitions themselves.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
section_content: The markdown content of a section to analyze.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Dictionary of references that are actually used in the section,
|
61
|
+
mapping reference names to their URLs.
|
62
|
+
"""
|
63
|
+
used_refs: Dict[str, str] = {}
|
64
|
+
|
65
|
+
# Remove any existing reference definitions from the content
|
66
|
+
content_without_refs = re.sub(
|
67
|
+
r"\n*<!--\s*REFERENCE\s+LINKS\s*-->\n*.*$",
|
68
|
+
"",
|
69
|
+
section_content,
|
70
|
+
flags=re.DOTALL,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Find all reference usages, excluding image or link definitions
|
74
|
+
ref_usage_pattern = re.compile(r"\[([^\]]+)\](?!\(|\:)")
|
75
|
+
found = ref_usage_pattern.findall(content_without_refs)
|
76
|
+
|
77
|
+
# Only include references that exist and are actually used
|
78
|
+
for ref in found:
|
79
|
+
if ref in self.references:
|
80
|
+
used_refs[ref] = self.references[ref]
|
81
|
+
|
82
|
+
return used_refs
|