markitecture 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. markitecture/__init__.py +41 -0
  2. markitecture/__main__.py +4 -0
  3. markitecture/cli/__init__.py +3 -0
  4. markitecture/cli/app.py +38 -0
  5. markitecture/cli/commands/__init__.py +21 -0
  6. markitecture/cli/commands/config.py +84 -0
  7. markitecture/cli/commands/links.py +146 -0
  8. markitecture/cli/commands/metrics.py +193 -0
  9. markitecture/cli/commands/mkdocs.py +39 -0
  10. markitecture/cli/commands/split.py +48 -0
  11. markitecture/errors.py +64 -0
  12. markitecture/generators/__init__.py +3 -0
  13. markitecture/generators/configs/__init__.py +0 -0
  14. markitecture/generators/configs/mintlify_json.py +0 -0
  15. markitecture/generators/configs/mkdocs_yaml.py +317 -0
  16. markitecture/metrics/__init__.py +9 -0
  17. markitecture/metrics/analyzer.py +109 -0
  18. markitecture/metrics/badges/__init__.py +28 -0
  19. markitecture/metrics/badges/base.py +7 -0
  20. markitecture/metrics/badges/compact.py +35 -0
  21. markitecture/metrics/badges/detailed.py +60 -0
  22. markitecture/metrics/badges/minimal.py +19 -0
  23. markitecture/metrics/badges/modern.py +45 -0
  24. markitecture/metrics/badges/retro.py +23 -0
  25. markitecture/metrics/badges/shields.py +124 -0
  26. markitecture/metrics/svg_generator.py +70 -0
  27. markitecture/processing/__init__.py +0 -0
  28. markitecture/processing/link_validator.py +133 -0
  29. markitecture/processing/reflink_converter.py +198 -0
  30. markitecture/processing/reflink_extractor.py +82 -0
  31. markitecture/processing/text_splitter.py +290 -0
  32. markitecture/settings/__init__.py +9 -0
  33. markitecture/settings/config.py +61 -0
  34. markitecture/settings/validators.py +26 -0
  35. markitecture/utils/__init__.py +5 -0
  36. markitecture/utils/file_handler.py +24 -0
  37. markitecture/utils/printer.py +195 -0
  38. markitecture/utils/sanitizer.py +78 -0
  39. markitecture-0.1.15.dist-info/METADATA +271 -0
  40. markitecture-0.1.15.dist-info/RECORD +43 -0
  41. markitecture-0.1.15.dist-info/WHEEL +4 -0
  42. markitecture-0.1.15.dist-info/entry_points.txt +2 -0
  43. markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,290 @@
1
+ """Text splitting methods for parsing markdown content into sections."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Dict, List
7
+
8
+ from markitecture.processing.reflink_extractor import ReferenceLinkExtractor
9
+ from markitecture.utils.printer import RichPrinter
10
+ from markitecture.utils.sanitizer import sanitize_filename
11
+
12
+ _printer = RichPrinter()
13
+
14
+
15
+ @dataclass
16
+ class Section:
17
+ """
18
+ Represents a split markdown section.
19
+ """
20
+
21
+ title: str
22
+ content: str
23
+ level: int
24
+ filename: Path
25
+ parent_context: str | None = None
26
+ references: dict[str, str] | None = None
27
+
28
+ def __post_init__(self) -> None:
29
+ """Initialize references as an empty dictionary if not provided."""
30
+ if self.references is None:
31
+ self.references = {}
32
+
33
+
34
+ class MarkdownTextSplitter:
35
+ """
36
+ Split markdown content into sections based on specified heading level.
37
+ """
38
+
39
+ def __init__(self, settings: object = None) -> None:
40
+ from markitecture.cli.app import MarkitectureApp
41
+
42
+ self.settings = settings or MarkitectureApp()
43
+ self._compile_patterns()
44
+ _printer.print_debug(
45
+ f"MarkdownSplitter initialized with settings: {self.settings}"
46
+ )
47
+
48
+ def process_file(self, content: str) -> List[Section]:
49
+ """Process markdown file, split it, and handle additional steps."""
50
+ _printer.print_info("Processing markdown content...")
51
+ sections = self.split(content)
52
+ output_dir = Path(self.settings.split.output_dir)
53
+ output_dir.mkdir(parents=True, exist_ok=True)
54
+ _printer.print_debug(f"Verified output directory: {output_dir}")
55
+
56
+ for section in sections:
57
+ section_path = output_dir / section.filename
58
+ _printer.print_debug(f"Writing section '{section.title}' to {section_path}")
59
+ section_path.write_text(section.content, encoding="utf-8")
60
+
61
+ if hasattr(self.settings, "process_mkdocs"):
62
+ _printer.print_info(
63
+ f"Processing mkdocs.yml with settings: {self.settings.settings.mkdocs}"
64
+ )
65
+ self.settings.process_mkdocs(sections)
66
+
67
+ _printer.print_info("File processing completed successfully")
68
+ return sections
69
+
70
+ def split(self, content: str) -> List[Section]:
71
+ """
72
+ Split markdown content into sections based on specified heading level.
73
+ Respects heading hierarchy - only splits at specified level and includes
74
+ appropriate nested content without including higher-level content.
75
+ Properly handles code blocks and comments within headings.
76
+ """
77
+ _printer.print_info("Executing text splitting...")
78
+
79
+ ref_handler = ReferenceLinkExtractor(content)
80
+
81
+ _printer.print_debug(
82
+ f"Extracted {len(ref_handler.references)} references from content"
83
+ )
84
+
85
+ # First, identify all code block positions to exclude them from heading search
86
+ code_blocks = []
87
+
88
+ # Match fenced code blocks (both ``` and ~~~)
89
+ fenced_blocks = re.finditer(
90
+ r"(?:```|~~~)[^\n]*\n.*?(?:```|~~~)", content, re.DOTALL
91
+ )
92
+ code_blocks.extend(fenced_blocks)
93
+
94
+ # Match indented code blocks (4 spaces or 1 tab)
95
+ lines = content.split("\n")
96
+ i = 0
97
+ while i < len(lines):
98
+ if re.match(r"^(?:\s{4}|\t).*$", lines[i]):
99
+ # Found start of indented block
100
+ start_pos = len("\n".join(lines[:i]))
101
+ # Find end of block
102
+ while i < len(lines) and (
103
+ re.match(r"^(?:\s{4}|\t).*$", lines[i]) or lines[i].strip() == ""
104
+ ):
105
+ i += 1
106
+ end_pos = len("\n".join(lines[:i]))
107
+ # Create a proper class instance for block matching
108
+
109
+ class BlockMatch:
110
+ def __init__(self, start_pos, end_pos):
111
+ self._start = start_pos
112
+ self._end = end_pos
113
+
114
+ def start(self, *args):
115
+ return self._start
116
+
117
+ def end(self, *args):
118
+ return self._end
119
+
120
+ code_blocks.append(BlockMatch(start_pos, end_pos))
121
+ i += 1
122
+
123
+ # Find all headings of any level (# through ######), excluding those in code blocks
124
+ all_headings = []
125
+ for match in re.finditer(
126
+ r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$", content, re.MULTILINE
127
+ ):
128
+ # Check if this heading is inside any code block
129
+ is_in_code_block = any(
130
+ block.start() <= match.start() <= block.end() for block in code_blocks
131
+ )
132
+ if not is_in_code_block:
133
+ all_headings.append(match)
134
+
135
+ headings = all_headings
136
+ if not headings:
137
+ _printer.print_info("No headings found. Creating single README section.")
138
+ section = self._create_section(
139
+ title="README",
140
+ content=content,
141
+ level=0,
142
+ references=ref_handler.references,
143
+ )
144
+ return [section]
145
+
146
+ # Target heading level is determined by number of # in settings
147
+ target_level = len(self.settings.model_dump()["split"]["heading_level"])
148
+ sections = []
149
+
150
+ # Track the current section being built
151
+ current_section_start = None
152
+ current_section_title = None
153
+
154
+ for i, match in enumerate(headings):
155
+ heading_level = len(match.group(1)) # Number of # symbols
156
+ heading_title = match.group(2).strip()
157
+ heading_start = match.start()
158
+
159
+ # Determine where this heading's content ends
160
+ next_heading_start = (
161
+ headings[i + 1].start() if i < len(headings) - 1 else len(content)
162
+ )
163
+
164
+ if heading_level == target_level:
165
+ # If we were building a previous section, finalize it
166
+ if current_section_start is not None:
167
+ section_content = content[
168
+ current_section_start:heading_start
169
+ ].strip()
170
+ section_refs = ref_handler.find_used_references(section_content)
171
+
172
+ sections.append(
173
+ self._create_section(
174
+ title=current_section_title,
175
+ content=self._format_section_content(
176
+ section_content, section_refs
177
+ ),
178
+ level=target_level,
179
+ references=section_refs,
180
+ )
181
+ )
182
+
183
+ # Start a new section
184
+ current_section_start = heading_start
185
+ current_section_title = heading_title
186
+
187
+ elif heading_level > target_level and current_section_start is not None:
188
+ # This is nested content for the current section, do nothing
189
+ continue
190
+
191
+ elif heading_level < target_level:
192
+ # This is a higher-level heading, ignore its content
193
+ if current_section_start is not None:
194
+ section_content = content[
195
+ current_section_start:heading_start
196
+ ].strip()
197
+ section_refs = ref_handler.find_used_references(section_content)
198
+
199
+ sections.append(
200
+ self._create_section(
201
+ title=current_section_title,
202
+ content=self._format_section_content(
203
+ section_content, section_refs
204
+ ),
205
+ level=target_level,
206
+ references=section_refs,
207
+ )
208
+ )
209
+ current_section_start = None
210
+ current_section_title = None
211
+
212
+ # Handle the last section if we were building one
213
+ if current_section_start is not None:
214
+ section_content = content[current_section_start:].strip()
215
+ section_refs = ref_handler.find_used_references(section_content)
216
+ sections.append(
217
+ self._create_section(
218
+ title=current_section_title,
219
+ content=self._format_section_content(section_content, section_refs),
220
+ level=target_level,
221
+ references=section_refs,
222
+ )
223
+ )
224
+
225
+ _printer.print_info(
226
+ f"Successfully split document into {len(sections)} sections."
227
+ )
228
+ return sections
229
+
230
+ def _compile_patterns(self) -> None:
231
+ """Compile regex patterns based on settings."""
232
+ flags = (
233
+ 0
234
+ if self.settings.model_dump()["split"]["case_sensitive"]
235
+ else re.IGNORECASE
236
+ )
237
+ self.heading_pattern = re.compile(
238
+ f"^({re.escape(self.settings.model_dump()['split']['heading_level'])})\\s+(.+?)(?:\\s+<!--.*?-->)*\\s*$",
239
+ re.MULTILINE | flags,
240
+ )
241
+ self.reference_pattern = re.compile(r"^\[([^\]]+)\]:\s+(.+)$", re.MULTILINE)
242
+ self.reference_usage = re.compile(r"\[([^\]]+)\](?!\()", re.MULTILINE)
243
+
244
+ def _create_section(
245
+ self, title: str, content: str, level: int, references: Dict[str, str]
246
+ ) -> Section:
247
+ """Create a new Section object."""
248
+ _printer.print_debug(f"Creating section with title: {title}, level: {level}")
249
+ return Section(
250
+ title=title,
251
+ content=content,
252
+ level=level,
253
+ filename=sanitize_filename(text=title),
254
+ references=references,
255
+ )
256
+
257
+ def _format_section_content(self, content: str, references: Dict[str, str]) -> str:
258
+ """
259
+ Format section content with references and ensure proper spacing.
260
+
261
+ Args:
262
+ content: The main content of the section
263
+ references: Dictionary of reference names to their URLs that are
264
+ actually used in this section
265
+
266
+ Returns:
267
+ Formatted content with thematic break, references, and proper spacing
268
+ """
269
+ if not content:
270
+ return ""
271
+
272
+ # Prepare the base content by trimming trailing whitespace
273
+ base_content = content.rstrip()
274
+
275
+ # Check if content already ends with a thematic break
276
+ hr_pattern = re.compile(r"\n[*_-]{3,}\s*$")
277
+
278
+ # Add thematic break if one doesn't exist
279
+ if not hr_pattern.search(base_content):
280
+ base_content += "\n\n---"
281
+
282
+ # Only add references if there are any used in this section
283
+ if references:
284
+ ref_text = "\n\n<!-- REFERENCE LINKS -->\n"
285
+ for ref_name, ref_url in sorted(references.items()):
286
+ ref_text += f"[{ref_name}]: {ref_url}\n"
287
+ base_content += ref_text
288
+
289
+ # Ensure the file ends with exactly one newline
290
+ return base_content.rstrip() + "\n"
@@ -0,0 +1,9 @@
1
+ from .config import MarkitectureApp
2
+ from .validators import ExistingFilePath, convert_to_path, validate_path
3
+
4
+ __all__ = [
5
+ "ExistingFilePath",
6
+ "MarkitectureApp",
7
+ "convert_to_path",
8
+ "validate_path",
9
+ ]
@@ -0,0 +1,61 @@
1
+ """CLI settings implementated using Pydantic Settings Management."""
2
+
3
+ from pydantic import AliasChoices, Field
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+ from markitecture.cli.commands.config import ConfigCommand
7
+ from markitecture.cli.commands.links import CheckLinksCommand, ReferenceLinksCommand
8
+ from markitecture.cli.commands.metrics import MetricsCommand
9
+ from markitecture.cli.commands.mkdocs import MkDocsCommand
10
+ from markitecture.cli.commands.split import SplitCommand
11
+
12
+
13
+ class MarkitectureApp(BaseSettings):
14
+ """
15
+ Main CLI interface for markitecture.
16
+ """
17
+
18
+ config: ConfigCommand | None = Field(
19
+ default=None,
20
+ description="Manage configuration settings",
21
+ validation_alias=AliasChoices("c", "config"),
22
+ )
23
+ check_links: CheckLinksCommand | None = Field(
24
+ default=None,
25
+ description="Validate links in a markdown file",
26
+ validation_alias=AliasChoices("cl", "check-links"),
27
+ )
28
+ reference_links: ReferenceLinksCommand | None = Field(
29
+ default=None,
30
+ description="Convert links to reference style",
31
+ validation_alias=AliasChoices("rl", "reflinks"),
32
+ )
33
+ split: SplitCommand | None = Field(
34
+ default=None,
35
+ description="Split a markdown file into sections",
36
+ validation_alias=AliasChoices("s", "split"),
37
+ )
38
+ metrics: MetricsCommand | None = Field(
39
+ default=None,
40
+ description="Generate document readability metrics",
41
+ validation_alias=AliasChoices("m", "metrics"),
42
+ )
43
+ mkdocs: MkDocsCommand | None = Field(
44
+ default=None,
45
+ description="Generate MkDocs configuration from a Markdown file",
46
+ validation_alias=AliasChoices("mk", "mkdocs"),
47
+ )
48
+ version: bool = Field(
49
+ default=False,
50
+ description="Display the version number",
51
+ validation_alias=AliasChoices("v", "version"),
52
+ )
53
+
54
+ model_config = SettingsConfigDict(
55
+ case_sensitive=False,
56
+ cli_enforce_required=False,
57
+ cli_implicit_flags=True,
58
+ cli_parse_args=True,
59
+ env_prefix="MARKITECTURE_",
60
+ extra="allow",
61
+ )
@@ -0,0 +1,26 @@
1
+ """Pydantic functions and type annotations to validate user input."""
2
+
3
+ from pathlib import Path
4
+ from typing import Annotated
5
+
6
+ from pydantic import AfterValidator
7
+
8
+ from markitecture.errors import InvalidPathError
9
+
10
+
11
+ def convert_to_path(v: str) -> Path:
12
+ """Convert the path string to a Path object."""
13
+ return Path(v)
14
+
15
+
16
+ def validate_path(v: Path) -> Path:
17
+ """Ensure the path exists and is a file."""
18
+ if not v.exists() or not v.is_file():
19
+ raise InvalidPathError(
20
+ message="The provided path does not exist or is not a file.",
21
+ path=str(v),
22
+ )
23
+ return v
24
+
25
+
26
+ ExistingFilePath = Annotated[Path, AfterValidator(validate_path)]
@@ -0,0 +1,5 @@
1
+ from .file_handler import FileHandler
2
+ from .printer import RichPrinter
3
+ from .sanitizer import sanitize_filename
4
+
5
+ __all__ = ["FileHandler", "RichPrinter", "sanitize_filename"]
@@ -0,0 +1,24 @@
1
+ """File handling utilities with error handling."""
2
+
3
+ from pathlib import Path
4
+ from typing import Union
5
+
6
+ from markitecture.errors import FileOperationError
7
+
8
+
9
+ class FileHandler:
10
+ """Handles file operations with proper error handling."""
11
+
12
+ def write(self, file_path: Union[str, Path], content: str) -> None:
13
+ """Write content to file with error handling."""
14
+ try:
15
+ Path(file_path).write_text(content, encoding="utf-8")
16
+ except Exception as e:
17
+ raise FileOperationError(f"Failed to write to {file_path}: {e}") from e
18
+
19
+ def read(self, file_path: Union[str, Path]) -> str:
20
+ """Read content from file with error handling."""
21
+ try:
22
+ return Path(file_path).read_text(encoding="utf-8")
23
+ except Exception as e:
24
+ raise FileOperationError(f"Failed to read {file_path}: {e}") from e
@@ -0,0 +1,195 @@
1
+ """Enhanced terminal output formatting with integrated table titles."""
2
+
3
+ from typing import List, Optional
4
+
5
+ from rich.box import ROUNDED, SIMPLE
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+ from rich.theme import Theme
9
+
10
+
11
+ class RichPrinter:
12
+ """
13
+ Utility class for Rich-based printing with integrated table titles and clickable links.
14
+ """
15
+
16
+ def __init__(self) -> None:
17
+ """Initialize the RichPrinter with a custom theme and console."""
18
+ self.theme = Theme({
19
+ "info": "cyan",
20
+ "success": "bold green",
21
+ "error": "bold red",
22
+ "warning": "yellow",
23
+ "header": "bold blue",
24
+ "title": "bold magenta",
25
+ "key": "bold white",
26
+ "value": "dim",
27
+ "table_title": "bold white on blue",
28
+ })
29
+ self.console = Console(theme=self.theme)
30
+
31
+ # -------------------------------------------------------------------------
32
+ # Basic text-level messages
33
+ # -------------------------------------------------------------------------
34
+ def print_debug(self, message: str) -> None:
35
+ """Print a debug message."""
36
+ self.console.print(f"[dim]{message}[/dim]")
37
+
38
+ def print_info(self, message: str) -> None:
39
+ """Print an informational message."""
40
+ self.console.print(f"[info]{message}[/info]")
41
+
42
+ def print_success(self, message: str) -> None:
43
+ """Print a success message."""
44
+ self.console.print(f"[success]{message}[/success]")
45
+
46
+ def print_error(self, message: str) -> None:
47
+ """Print an error message."""
48
+ self.console.print(f"[error]{message}[/error]")
49
+
50
+ def print_warning(self, message: str) -> None:
51
+ """Print a warning message."""
52
+ self.console.print(f"[warning]{message}[/warning]")
53
+
54
+ def print_title(self, title: str) -> None:
55
+ """Print a styled title."""
56
+ self.console.print(f"[title]{title}[/title]")
57
+
58
+ def print_version(self, version: str) -> None:
59
+ """Print a styled version number."""
60
+ package_name = __package__.split(".")[0]
61
+ self.console.print(f"[bold green]{package_name}[/bold green] {version}")
62
+
63
+ # -------------------------------------------------------------------------
64
+ # Table printing methods
65
+ # -------------------------------------------------------------------------
66
+ def print_key_value_table(self, title: str, data: dict[str, str]) -> None:
67
+ """
68
+ Print a table with integrated title and key-value pairs.
69
+
70
+ Args:
71
+ title: The title of the table
72
+ data: A dictionary of key-value pairs to display
73
+ """
74
+ # Main container with no border
75
+ main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
76
+ main_table.add_column("content", ratio=1)
77
+
78
+ # Title sub-table
79
+ title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
80
+ title_table.add_column("title", style="table_title", ratio=1)
81
+ title_table.add_row(title)
82
+
83
+ # Content sub-table for key-value pairs
84
+ content_table = Table(box=ROUNDED, show_header=False, padding=(0, 1))
85
+ content_table.add_column("Key", style="key", no_wrap=True)
86
+ content_table.add_column("Value", style="value")
87
+
88
+ # Add data rows
89
+ for key, val in data.items():
90
+ content_table.add_row(key, val)
91
+
92
+ main_table.add_row(title_table)
93
+ main_table.add_row(content_table)
94
+
95
+ self.console.print()
96
+ self.console.print(main_table)
97
+ self.console.print()
98
+
99
+ def print_table(
100
+ self, title: str, headers: List[str], rows: List[List[str]]
101
+ ) -> None:
102
+ """
103
+ Print a custom table with integrated title.
104
+
105
+ Args:
106
+ title: The title of the table
107
+ headers: List of column headers
108
+ rows: List of row data, each row being a list of strings
109
+ """
110
+ # Main container
111
+ main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
112
+ main_table.add_column("content", ratio=1)
113
+
114
+ # Title sub-table
115
+ title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
116
+ title_table.add_column("title", style="table_title", ratio=1)
117
+ title_table.add_row(title)
118
+
119
+ # Content table
120
+ content_table = Table(
121
+ box=ROUNDED, show_header=True, header_style="bold blue", padding=(0, 1)
122
+ )
123
+
124
+ for header in headers:
125
+ content_table.add_column(header, style="key")
126
+
127
+ for row in rows:
128
+ content_table.add_row(*row)
129
+
130
+ main_table.add_row(title_table)
131
+ main_table.add_row(content_table)
132
+
133
+ self.console.print()
134
+ self.console.print(main_table)
135
+ self.console.print()
136
+
137
+ def print_link_table(
138
+ self, title: str, link_rows: List[dict], columns: Optional[List[str]] = None
139
+ ) -> None:
140
+ """
141
+ Print a table specifically for link data, allowing clickable URLs.
142
+
143
+ Each element in link_rows is expected to be a dict with
144
+ keys like 'line', 'url', 'status', 'error' (depending on your link checking code).
145
+
146
+ Args:
147
+ title: The table title
148
+ link_rows: A list of dicts representing link info. Must have 'url' at least.
149
+ columns: Optional list of columns to display in table order.
150
+ If None, uses ["line", "status", "url", "error"] by default.
151
+ """
152
+ if columns is None:
153
+ columns = ["line", "status", "url", "error"]
154
+
155
+ # Create main container
156
+ main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
157
+ main_table.add_column("content", ratio=1)
158
+
159
+ # Title sub-table
160
+ title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
161
+ title_table.add_column("title", style="table_title", ratio=1)
162
+ title_table.add_row(title)
163
+
164
+ # Content table
165
+ content_table = Table(
166
+ box=ROUNDED,
167
+ show_header=True,
168
+ header_style="bold blue",
169
+ padding=(0, 1),
170
+ collapse_padding=True,
171
+ )
172
+
173
+ # Add columns
174
+ for col in columns:
175
+ content_table.add_column(col.capitalize(), style="key")
176
+
177
+ # Add rows
178
+ for row_data in link_rows:
179
+ row_values = []
180
+ for col in columns:
181
+ val = row_data.get(col, "")
182
+ if col == "url" and isinstance(val, str) and val.startswith("http"):
183
+ # Make it clickable in the terminal
184
+ link_text = f"[link={val}]{val}[/link]"
185
+ row_values.append(link_text)
186
+ else:
187
+ row_values.append(str(val))
188
+ content_table.add_row(*row_values)
189
+
190
+ main_table.add_row(title_table)
191
+ main_table.add_row(content_table)
192
+
193
+ self.console.print()
194
+ self.console.print(main_table)
195
+ self.console.print()
@@ -0,0 +1,78 @@
1
+ """Module for sanitizing markdown headers into safe filenames."""
2
+
3
+ import html
4
+ import re
5
+ from pathlib import Path
6
+
7
+
8
+ def sanitize_filename(text: str, extension: str = ".md") -> Path:
9
+ """
10
+ Convert a markdown header into a safe filename.
11
+
12
+ Args:
13
+ text: The header text to sanitize
14
+ extension: File extension to append (defaults to .md)
15
+
16
+ Returns:
17
+ Path object with sanitized filename
18
+ """
19
+ # Decode HTML entities
20
+ text = html.unescape(text)
21
+
22
+ # Remove markdown heading markers
23
+ text = re.sub(r"^#+\s*", "", text)
24
+
25
+ # Remove image references and other markdown links
26
+ text = re.sub(r"!\[([^\]]*)\]\[[^\]]*\]", r"\1", text) # Image references
27
+ text = re.sub(r"\[([^\]]*)\]\[[^\]]*\]", r"\1", text) # Regular references
28
+
29
+ # Remove HTML tags and attributes (inline HTML)
30
+ text = re.sub(r"<[^>]+>", "", text)
31
+
32
+ # Remove markdown attributes in curly braces (e.g., {#custom-id}, {#})
33
+ text = re.sub(r"\{[^}]*\}", "", text)
34
+
35
+ # Remove any remaining markdown syntax
36
+ text = re.sub(r"[*_`~]", "", text)
37
+
38
+ # Handle special cases where text is empty
39
+ if not text.strip():
40
+ text = "unnamed-section"
41
+
42
+ # Convert to lowercase and replace spaces/special chars with hyphens
43
+ text = text.strip().lower()
44
+ text = re.sub(r"[^\w\s-]", "", text) # Remove special characters
45
+ text = re.sub(r"[-\s]+", "-", text) # Replace spaces and repeated hyphens
46
+
47
+ # Remove leading/trailing hyphens
48
+ text = text.strip("-")
49
+
50
+ if not text:
51
+ text = "unnamed-section"
52
+
53
+ return Path(f"{text}{extension}")
54
+
55
+
56
+ def extract_image_alt_text(text: str) -> str:
57
+ """Extract alt text from markdown image references.
58
+
59
+ Args:
60
+ text: Text containing markdown image references
61
+
62
+ Returns:
63
+ Extracted alt text or empty string if none found
64
+ """
65
+ match = re.search(r"!\[([^\]]*)\]", text)
66
+ return match.group(1) if match else ""
67
+
68
+
69
+ def strip_markdown_header(text: str) -> str:
70
+ """Remove only the markdown header markers from text.
71
+
72
+ Args:
73
+ text: The header text containing markdown syntax
74
+
75
+ Returns:
76
+ Text with header markers removed but other formatting intact
77
+ """
78
+ return re.sub(r"^#+\s*", "", text)