PyPI - markitecture - Versions diffs - 0.1.15__py3-none-any.whl - Mend

markitecture 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

markitecture/__init__.py +41 -0
markitecture/__main__.py +4 -0
markitecture/cli/__init__.py +3 -0
markitecture/cli/app.py +38 -0
markitecture/cli/commands/__init__.py +21 -0
markitecture/cli/commands/config.py +84 -0
markitecture/cli/commands/links.py +146 -0
markitecture/cli/commands/metrics.py +193 -0
markitecture/cli/commands/mkdocs.py +39 -0
markitecture/cli/commands/split.py +48 -0
markitecture/errors.py +64 -0
markitecture/generators/__init__.py +3 -0
markitecture/generators/configs/__init__.py +0 -0
markitecture/generators/configs/mintlify_json.py +0 -0
markitecture/generators/configs/mkdocs_yaml.py +317 -0
markitecture/metrics/__init__.py +9 -0
markitecture/metrics/analyzer.py +109 -0
markitecture/metrics/badges/__init__.py +28 -0
markitecture/metrics/badges/base.py +7 -0
markitecture/metrics/badges/compact.py +35 -0
markitecture/metrics/badges/detailed.py +60 -0
markitecture/metrics/badges/minimal.py +19 -0
markitecture/metrics/badges/modern.py +45 -0
markitecture/metrics/badges/retro.py +23 -0
markitecture/metrics/badges/shields.py +124 -0
markitecture/metrics/svg_generator.py +70 -0
markitecture/processing/__init__.py +0 -0
markitecture/processing/link_validator.py +133 -0
markitecture/processing/reflink_converter.py +198 -0
markitecture/processing/reflink_extractor.py +82 -0
markitecture/processing/text_splitter.py +290 -0
markitecture/settings/__init__.py +9 -0
markitecture/settings/config.py +61 -0
markitecture/settings/validators.py +26 -0
markitecture/utils/__init__.py +5 -0
markitecture/utils/file_handler.py +24 -0
markitecture/utils/printer.py +195 -0
markitecture/utils/sanitizer.py +78 -0
markitecture-0.1.15.dist-info/METADATA +271 -0
markitecture-0.1.15.dist-info/RECORD +43 -0
markitecture-0.1.15.dist-info/WHEEL +4 -0
markitecture-0.1.15.dist-info/entry_points.txt +2 -0
markitecture-0.1.15.dist-info/licenses/LICENSE +21 -0

markitecture/processing/text_splitter.py ADDED Viewed

@@ -0,0 +1,290 @@
+"""Text splitting methods for parsing markdown content into sections."""
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List
+from markitecture.processing.reflink_extractor import ReferenceLinkExtractor
+from markitecture.utils.printer import RichPrinter
+from markitecture.utils.sanitizer import sanitize_filename
+_printer = RichPrinter()
+@dataclass
+class Section:
+    """
+    Represents a split markdown section.
+    """
+    title: str
+    content: str
+    level: int
+    filename: Path
+    parent_context: str | None = None
+    references: dict[str, str] | None = None
+    def __post_init__(self) -> None:
+        """Initialize references as an empty dictionary if not provided."""
+        if self.references is None:
+            self.references = {}
+class MarkdownTextSplitter:
+    """
+    Split markdown content into sections based on specified heading level.
+    """
+    def __init__(self, settings: object = None) -> None:
+        from markitecture.cli.app import MarkitectureApp
+        self.settings = settings or MarkitectureApp()
+        self._compile_patterns()
+        _printer.print_debug(
+            f"MarkdownSplitter initialized with settings: {self.settings}"
+        )
+    def process_file(self, content: str) -> List[Section]:
+        """Process markdown file, split it, and handle additional steps."""
+        _printer.print_info("Processing markdown content...")
+        sections = self.split(content)
+        output_dir = Path(self.settings.split.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        _printer.print_debug(f"Verified output directory: {output_dir}")
+        for section in sections:
+            section_path = output_dir / section.filename
+            _printer.print_debug(f"Writing section '{section.title}' to {section_path}")
+            section_path.write_text(section.content, encoding="utf-8")
+        if hasattr(self.settings, "process_mkdocs"):
+            _printer.print_info(
+                f"Processing mkdocs.yml with settings: {self.settings.settings.mkdocs}"
+            )
+            self.settings.process_mkdocs(sections)
+        _printer.print_info("File processing completed successfully")
+        return sections
+    def split(self, content: str) -> List[Section]:
+        """
+        Split markdown content into sections based on specified heading level.
+        Respects heading hierarchy - only splits at specified level and includes
+        appropriate nested content without including higher-level content.
+        Properly handles code blocks and comments within headings.
+        """
+        _printer.print_info("Executing text splitting...")
+        ref_handler = ReferenceLinkExtractor(content)
+        _printer.print_debug(
+            f"Extracted {len(ref_handler.references)} references from content"
+        )
+        # First, identify all code block positions to exclude them from heading search
+        code_blocks = []
+        # Match fenced code blocks (both ``` and ~~~)
+        fenced_blocks = re.finditer(
+            r"(?:```|~~~)[^\n]*\n.*?(?:```|~~~)", content, re.DOTALL
+        )
+        code_blocks.extend(fenced_blocks)
+        # Match indented code blocks (4 spaces or 1 tab)
+        lines = content.split("\n")
+        i = 0
+        while i < len(lines):
+            if re.match(r"^(?:\s{4}|\t).*$", lines[i]):
+                # Found start of indented block
+                start_pos = len("\n".join(lines[:i]))
+                # Find end of block
+                while i < len(lines) and (
+                    re.match(r"^(?:\s{4}|\t).*$", lines[i]) or lines[i].strip() == ""
+                ):
+                    i += 1
+                end_pos = len("\n".join(lines[:i]))
+                # Create a proper class instance for block matching
+                class BlockMatch:
+                    def __init__(self, start_pos, end_pos):
+                        self._start = start_pos
+                        self._end = end_pos
+                    def start(self, *args):
+                        return self._start
+                    def end(self, *args):
+                        return self._end
+                code_blocks.append(BlockMatch(start_pos, end_pos))
+            i += 1
+        # Find all headings of any level (# through ######), excluding those in code blocks
+        all_headings = []
+        for match in re.finditer(
+            r"^(#{1,6})\s+(.+?)(?:\s+<!--.*?-->)*\s*$", content, re.MULTILINE
+        ):
+            # Check if this heading is inside any code block
+            is_in_code_block = any(
+                block.start() <= match.start() <= block.end() for block in code_blocks
+            )
+            if not is_in_code_block:
+                all_headings.append(match)
+        headings = all_headings
+        if not headings:
+            _printer.print_info("No headings found. Creating single README section.")
+            section = self._create_section(
+                title="README",
+                content=content,
+                level=0,
+                references=ref_handler.references,
+            )
+            return [section]
+        # Target heading level is determined by number of # in settings
+        target_level = len(self.settings.model_dump()["split"]["heading_level"])
+        sections = []
+        # Track the current section being built
+        current_section_start = None
+        current_section_title = None
+        for i, match in enumerate(headings):
+            heading_level = len(match.group(1))  # Number of # symbols
+            heading_title = match.group(2).strip()
+            heading_start = match.start()
+            # Determine where this heading's content ends
+            next_heading_start = (
+                headings[i + 1].start() if i < len(headings) - 1 else len(content)
+            )
+            if heading_level == target_level:
+                # If we were building a previous section, finalize it
+                if current_section_start is not None:
+                    section_content = content[
+                        current_section_start:heading_start
+                    ].strip()
+                    section_refs = ref_handler.find_used_references(section_content)
+                    sections.append(
+                        self._create_section(
+                            title=current_section_title,
+                            content=self._format_section_content(
+                                section_content, section_refs
+                            ),
+                            level=target_level,
+                            references=section_refs,
+                        )
+                    )
+                # Start a new section
+                current_section_start = heading_start
+                current_section_title = heading_title
+            elif heading_level > target_level and current_section_start is not None:
+                # This is nested content for the current section, do nothing
+                continue
+            elif heading_level < target_level:
+                # This is a higher-level heading, ignore its content
+                if current_section_start is not None:
+                    section_content = content[
+                        current_section_start:heading_start
+                    ].strip()
+                    section_refs = ref_handler.find_used_references(section_content)
+                    sections.append(
+                        self._create_section(
+                            title=current_section_title,
+                            content=self._format_section_content(
+                                section_content, section_refs
+                            ),
+                            level=target_level,
+                            references=section_refs,
+                        )
+                    )
+                    current_section_start = None
+                    current_section_title = None
+        # Handle the last section if we were building one
+        if current_section_start is not None:
+            section_content = content[current_section_start:].strip()
+            section_refs = ref_handler.find_used_references(section_content)
+            sections.append(
+                self._create_section(
+                    title=current_section_title,
+                    content=self._format_section_content(section_content, section_refs),
+                    level=target_level,
+                    references=section_refs,
+                )
+            )
+        _printer.print_info(
+            f"Successfully split document into {len(sections)} sections."
+        )
+        return sections
+    def _compile_patterns(self) -> None:
+        """Compile regex patterns based on settings."""
+        flags = (
+            0
+            if self.settings.model_dump()["split"]["case_sensitive"]
+            else re.IGNORECASE
+        )
+        self.heading_pattern = re.compile(
+            f"^({re.escape(self.settings.model_dump()['split']['heading_level'])})\\s+(.+?)(?:\\s+<!--.*?-->)*\\s*$",
+            re.MULTILINE | flags,
+        )
+        self.reference_pattern = re.compile(r"^\[([^\]]+)\]:\s+(.+)$", re.MULTILINE)
+        self.reference_usage = re.compile(r"\[([^\]]+)\](?!\()", re.MULTILINE)
+    def _create_section(
+        self, title: str, content: str, level: int, references: Dict[str, str]
+    ) -> Section:
+        """Create a new Section object."""
+        _printer.print_debug(f"Creating section with title: {title}, level: {level}")
+        return Section(
+            title=title,
+            content=content,
+            level=level,
+            filename=sanitize_filename(text=title),
+            references=references,
+        )
+    def _format_section_content(self, content: str, references: Dict[str, str]) -> str:
+        """
+        Format section content with references and ensure proper spacing.
+        Args:
+            content: The main content of the section
+            references: Dictionary of reference names to their URLs that are
+                    actually used in this section
+        Returns:
+            Formatted content with thematic break, references, and proper spacing
+        """
+        if not content:
+            return ""
+        # Prepare the base content by trimming trailing whitespace
+        base_content = content.rstrip()
+        # Check if content already ends with a thematic break
+        hr_pattern = re.compile(r"\n[*_-]{3,}\s*$")
+        # Add thematic break if one doesn't exist
+        if not hr_pattern.search(base_content):
+            base_content += "\n\n---"
+        # Only add references if there are any used in this section
+        if references:
+            ref_text = "\n\n<!-- REFERENCE LINKS -->\n"
+            for ref_name, ref_url in sorted(references.items()):
+                ref_text += f"[{ref_name}]: {ref_url}\n"
+            base_content += ref_text
+        # Ensure the file ends with exactly one newline
+        return base_content.rstrip() + "\n"

markitecture/settings/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .config import MarkitectureApp
+from .validators import ExistingFilePath, convert_to_path, validate_path
+__all__ = [
+    "ExistingFilePath",
+    "MarkitectureApp",
+    "convert_to_path",
+    "validate_path",
+]

markitecture/settings/config.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""CLI settings implementated using Pydantic Settings Management."""
+from pydantic import AliasChoices, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from markitecture.cli.commands.config import ConfigCommand
+from markitecture.cli.commands.links import CheckLinksCommand, ReferenceLinksCommand
+from markitecture.cli.commands.metrics import MetricsCommand
+from markitecture.cli.commands.mkdocs import MkDocsCommand
+from markitecture.cli.commands.split import SplitCommand
+class MarkitectureApp(BaseSettings):
+    """
+    Main CLI interface for markitecture.
+    """
+    config: ConfigCommand | None = Field(
+        default=None,
+        description="Manage configuration settings",
+        validation_alias=AliasChoices("c", "config"),
+    )
+    check_links: CheckLinksCommand | None = Field(
+        default=None,
+        description="Validate links in a markdown file",
+        validation_alias=AliasChoices("cl", "check-links"),
+    )
+    reference_links: ReferenceLinksCommand | None = Field(
+        default=None,
+        description="Convert links to reference style",
+        validation_alias=AliasChoices("rl", "reflinks"),
+    )
+    split: SplitCommand | None = Field(
+        default=None,
+        description="Split a markdown file into sections",
+        validation_alias=AliasChoices("s", "split"),
+    )
+    metrics: MetricsCommand | None = Field(
+        default=None,
+        description="Generate document readability metrics",
+        validation_alias=AliasChoices("m", "metrics"),
+    )
+    mkdocs: MkDocsCommand | None = Field(
+        default=None,
+        description="Generate MkDocs configuration from a Markdown file",
+        validation_alias=AliasChoices("mk", "mkdocs"),
+    )
+    version: bool = Field(
+        default=False,
+        description="Display the version number",
+        validation_alias=AliasChoices("v", "version"),
+    )
+    model_config = SettingsConfigDict(
+        case_sensitive=False,
+        cli_enforce_required=False,
+        cli_implicit_flags=True,
+        cli_parse_args=True,
+        env_prefix="MARKITECTURE_",
+        extra="allow",
+    )

markitecture/settings/validators.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Pydantic functions and type annotations to validate user input."""
+from pathlib import Path
+from typing import Annotated
+from pydantic import AfterValidator
+from markitecture.errors import InvalidPathError
+def convert_to_path(v: str) -> Path:
+    """Convert the path string to a Path object."""
+    return Path(v)
+def validate_path(v: Path) -> Path:
+    """Ensure the path exists and is a file."""
+    if not v.exists() or not v.is_file():
+        raise InvalidPathError(
+            message="The provided path does not exist or is not a file.",
+            path=str(v),
+        )
+    return v
+ExistingFilePath = Annotated[Path, AfterValidator(validate_path)]

markitecture/utils/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .file_handler import FileHandler
+from .printer import RichPrinter
+from .sanitizer import sanitize_filename
+__all__ = ["FileHandler", "RichPrinter", "sanitize_filename"]

markitecture/utils/file_handler.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""File handling utilities with error handling."""
+from pathlib import Path
+from typing import Union
+from markitecture.errors import FileOperationError
+class FileHandler:
+    """Handles file operations with proper error handling."""
+    def write(self, file_path: Union[str, Path], content: str) -> None:
+        """Write content to file with error handling."""
+        try:
+            Path(file_path).write_text(content, encoding="utf-8")
+        except Exception as e:
+            raise FileOperationError(f"Failed to write to {file_path}: {e}") from e
+    def read(self, file_path: Union[str, Path]) -> str:
+        """Read content from file with error handling."""
+        try:
+            return Path(file_path).read_text(encoding="utf-8")
+        except Exception as e:
+            raise FileOperationError(f"Failed to read {file_path}: {e}") from e

markitecture/utils/printer.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""Enhanced terminal output formatting with integrated table titles."""
+from typing import List, Optional
+from rich.box import ROUNDED, SIMPLE
+from rich.console import Console
+from rich.table import Table
+from rich.theme import Theme
+class RichPrinter:
+    """
+    Utility class for Rich-based printing with integrated table titles and clickable links.
+    """
+    def __init__(self) -> None:
+        """Initialize the RichPrinter with a custom theme and console."""
+        self.theme = Theme({
+            "info": "cyan",
+            "success": "bold green",
+            "error": "bold red",
+            "warning": "yellow",
+            "header": "bold blue",
+            "title": "bold magenta",
+            "key": "bold white",
+            "value": "dim",
+            "table_title": "bold white on blue",
+        })
+        self.console = Console(theme=self.theme)
+    # -------------------------------------------------------------------------
+    # Basic text-level messages
+    # -------------------------------------------------------------------------
+    def print_debug(self, message: str) -> None:
+        """Print a debug message."""
+        self.console.print(f"[dim]{message}[/dim]")
+    def print_info(self, message: str) -> None:
+        """Print an informational message."""
+        self.console.print(f"[info]{message}[/info]")
+    def print_success(self, message: str) -> None:
+        """Print a success message."""
+        self.console.print(f"[success]{message}[/success]")
+    def print_error(self, message: str) -> None:
+        """Print an error message."""
+        self.console.print(f"[error]{message}[/error]")
+    def print_warning(self, message: str) -> None:
+        """Print a warning message."""
+        self.console.print(f"[warning]{message}[/warning]")
+    def print_title(self, title: str) -> None:
+        """Print a styled title."""
+        self.console.print(f"[title]{title}[/title]")
+    def print_version(self, version: str) -> None:
+        """Print a styled version number."""
+        package_name = __package__.split(".")[0]
+        self.console.print(f"[bold green]{package_name}[/bold green] {version}")
+    # -------------------------------------------------------------------------
+    # Table printing methods
+    # -------------------------------------------------------------------------
+    def print_key_value_table(self, title: str, data: dict[str, str]) -> None:
+        """
+        Print a table with integrated title and key-value pairs.
+        Args:
+            title: The title of the table
+            data: A dictionary of key-value pairs to display
+        """
+        # Main container with no border
+        main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
+        main_table.add_column("content", ratio=1)
+        # Title sub-table
+        title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
+        title_table.add_column("title", style="table_title", ratio=1)
+        title_table.add_row(title)
+        # Content sub-table for key-value pairs
+        content_table = Table(box=ROUNDED, show_header=False, padding=(0, 1))
+        content_table.add_column("Key", style="key", no_wrap=True)
+        content_table.add_column("Value", style="value")
+        # Add data rows
+        for key, val in data.items():
+            content_table.add_row(key, val)
+        main_table.add_row(title_table)
+        main_table.add_row(content_table)
+        self.console.print()
+        self.console.print(main_table)
+        self.console.print()
+    def print_table(
+        self, title: str, headers: List[str], rows: List[List[str]]
+    ) -> None:
+        """
+        Print a custom table with integrated title.
+        Args:
+            title: The title of the table
+            headers: List of column headers
+            rows: List of row data, each row being a list of strings
+        """
+        # Main container
+        main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
+        main_table.add_column("content", ratio=1)
+        # Title sub-table
+        title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
+        title_table.add_column("title", style="table_title", ratio=1)
+        title_table.add_row(title)
+        # Content table
+        content_table = Table(
+            box=ROUNDED, show_header=True, header_style="bold blue", padding=(0, 1)
+        )
+        for header in headers:
+            content_table.add_column(header, style="key")
+        for row in rows:
+            content_table.add_row(*row)
+        main_table.add_row(title_table)
+        main_table.add_row(content_table)
+        self.console.print()
+        self.console.print(main_table)
+        self.console.print()
+    def print_link_table(
+        self, title: str, link_rows: List[dict], columns: Optional[List[str]] = None
+    ) -> None:
+        """
+        Print a table specifically for link data, allowing clickable URLs.
+        Each element in link_rows is expected to be a dict with
+        keys like 'line', 'url', 'status', 'error' (depending on your link checking code).
+        Args:
+            title: The table title
+            link_rows: A list of dicts representing link info. Must have 'url' at least.
+            columns: Optional list of columns to display in table order.
+                     If None, uses ["line", "status", "url", "error"] by default.
+        """
+        if columns is None:
+            columns = ["line", "status", "url", "error"]
+        # Create main container
+        main_table = Table(box=None, show_header=False, show_edge=False, padding=0)
+        main_table.add_column("content", ratio=1)
+        # Title sub-table
+        title_table = Table(box=SIMPLE, show_header=False, padding=(0, 1))
+        title_table.add_column("title", style="table_title", ratio=1)
+        title_table.add_row(title)
+        # Content table
+        content_table = Table(
+            box=ROUNDED,
+            show_header=True,
+            header_style="bold blue",
+            padding=(0, 1),
+            collapse_padding=True,
+        )
+        # Add columns
+        for col in columns:
+            content_table.add_column(col.capitalize(), style="key")
+        # Add rows
+        for row_data in link_rows:
+            row_values = []
+            for col in columns:
+                val = row_data.get(col, "")
+                if col == "url" and isinstance(val, str) and val.startswith("http"):
+                    # Make it clickable in the terminal
+                    link_text = f"[link={val}]{val}[/link]"
+                    row_values.append(link_text)
+                else:
+                    row_values.append(str(val))
+            content_table.add_row(*row_values)
+        main_table.add_row(title_table)
+        main_table.add_row(content_table)
+        self.console.print()
+        self.console.print(main_table)
+        self.console.print()

markitecture/utils/sanitizer.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Module for sanitizing markdown headers into safe filenames."""
+import html
+import re
+from pathlib import Path
+def sanitize_filename(text: str, extension: str = ".md") -> Path:
+    """
+    Convert a markdown header into a safe filename.
+    Args:
+        text: The header text to sanitize
+        extension: File extension to append (defaults to .md)
+    Returns:
+        Path object with sanitized filename
+    """
+    # Decode HTML entities
+    text = html.unescape(text)
+    # Remove markdown heading markers
+    text = re.sub(r"^#+\s*", "", text)
+    # Remove image references and other markdown links
+    text = re.sub(r"!\[([^\]]*)\]\[[^\]]*\]", r"\1", text)  # Image references
+    text = re.sub(r"\[([^\]]*)\]\[[^\]]*\]", r"\1", text)  # Regular references
+    # Remove HTML tags and attributes (inline HTML)
+    text = re.sub(r"<[^>]+>", "", text)
+    # Remove markdown attributes in curly braces (e.g., {#custom-id}, {#})
+    text = re.sub(r"\{[^}]*\}", "", text)
+    # Remove any remaining markdown syntax
+    text = re.sub(r"[*_`~]", "", text)
+    # Handle special cases where text is empty
+    if not text.strip():
+        text = "unnamed-section"
+    # Convert to lowercase and replace spaces/special chars with hyphens
+    text = text.strip().lower()
+    text = re.sub(r"[^\w\s-]", "", text)  # Remove special characters
+    text = re.sub(r"[-\s]+", "-", text)  # Replace spaces and repeated hyphens
+    # Remove leading/trailing hyphens
+    text = text.strip("-")
+    if not text:
+        text = "unnamed-section"
+    return Path(f"{text}{extension}")
+def extract_image_alt_text(text: str) -> str:
+    """Extract alt text from markdown image references.
+    Args:
+        text: Text containing markdown image references
+    Returns:
+        Extracted alt text or empty string if none found
+    """
+    match = re.search(r"!\[([^\]]*)\]", text)
+    return match.group(1) if match else ""
+def strip_markdown_header(text: str) -> str:
+    """Remove only the markdown header markers from text.
+    Args:
+        text: The header text containing markdown syntax
+    Returns:
+        Text with header markers removed but other formatting intact
+    """
+    return re.sub(r"^#+\s*", "", text)