PyPI - diary-docs - Versions diffs - 0.1.0__py3-none-any.whl - Mend

diary-docs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

diary/__init__.py +1 -0
diary/__main__.py +3 -0
diary/aimb/__init__.py +48 -0
diary/aimb/hasher.py +157 -0
diary/aimb/merge.py +252 -0
diary/aimb/parser.py +202 -0
diary/cli.py +999 -0
diary/git_utils.py +202 -0
diary/indexer/__init__.py +44 -0
diary/indexer/database.py +340 -0
diary/indexer/extractors.py +468 -0
diary/indexer/gitignore.py +62 -0
diary/indexer/indexer.py +511 -0
diary/indexer/reporter.py +137 -0
diary/indexer/scanner.py +65 -0
diary/sync/__init__.py +33 -0
diary/sync/detector.py +405 -0
diary/sync/engine.py +404 -0
diary/sync/protocol.py +176 -0
diary/templates.py +102 -0
diary_docs-0.1.0.dist-info/METADATA +228 -0
diary_docs-0.1.0.dist-info/RECORD +26 -0
diary_docs-0.1.0.dist-info/WHEEL +5 -0
diary_docs-0.1.0.dist-info/entry_points.txt +2 -0
diary_docs-0.1.0.dist-info/licenses/LICENSE +21 -0
diary_docs-0.1.0.dist-info/top_level.txt +1 -0

diary/git_utils.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""git_utils — Git operations for branch detection, diff, and detached HEAD handling.
+All functions use ``subprocess.run(['git', ...])`` with pure stdlib — no external
+git libraries. Every public function handles errors gracefully, returning sensible
+defaults instead of raising.
+"""
+from __future__ import annotations
+import subprocess
+import unicodedata
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def is_git_repo(workspace_path: Path) -> bool:
+    """Return ``True`` if *workspace_path* is inside a Git repository."""
+    return (workspace_path / ".git").is_dir()
+def get_current_branch(workspace_path: Path) -> str:
+    """Return the current branch name, or ``"HEAD"`` if in detached HEAD state.
+    Parameters
+    ----------
+    workspace_path : Path
+        Path to the repository root (or any directory inside it).
+    Returns
+    -------
+    str
+        Branch name, ``"HEAD"`` if detached, or empty string on failure.
+    """
+    if not is_git_repo(workspace_path):
+        return ""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+            capture_output=True,
+            text=True,
+            cwd=workspace_path,
+        )
+        if result.returncode != 0:
+            return ""
+        return result.stdout.strip()
+    except (FileNotFoundError, OSError):
+        return ""
+def is_detached_head(workspace_path: Path) -> bool:
+    """Return ``True`` if the repository is in a detached HEAD state."""
+    return get_current_branch(workspace_path) == "HEAD"
+def get_git_diff(workspace_path: Path, base_ref: str = "HEAD") -> list[dict]:
+    """Return a list of changed files compared to *base_ref*.
+    Each entry is a dict with ``"status"`` (``"A"``/``"M"``/``"D"``) and
+    ``"path"`` keys.
+    Parameters
+    ----------
+    workspace_path : Path
+        Repository root.
+    base_ref : str
+        Git ref to diff against (default ``"HEAD"``).
+    Returns
+    -------
+    list[dict]
+        Empty list on failure or no changes.
+    """
+    if not is_git_repo(workspace_path):
+        return []
+    try:
+        result = subprocess.run(
+            ["git", "diff", "--name-status", base_ref],
+            capture_output=True,
+            text=True,
+            cwd=workspace_path,
+        )
+        if result.returncode != 0:
+            return []
+        changes: list[dict] = []
+        for line in result.stdout.strip().splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            # Typical output: "M\tsrc/foo.py"
+            parts = line.split("\t", maxsplit=1)
+            if len(parts) == 2:
+                changes.append({"status": parts[0], "path": parts[1]})
+        return changes
+    except (FileNotFoundError, OSError):
+        return []
+def get_file_diff(workspace_path: Path, file_path: str) -> str:
+    """Return the unified diff for *file_path* against ``HEAD``.
+    Parameters
+    ----------
+    workspace_path : Path
+        Repository root.
+    file_path : str
+        Path to the file, relative to the repository root.
+    Returns
+    -------
+    str
+        Empty string if the file has no unstaged changes or on error.
+    """
+    if not is_git_repo(workspace_path):
+        return ""
+    try:
+        result = subprocess.run(
+            ["git", "diff", "HEAD", "--", file_path],
+            capture_output=True,
+            text=True,
+            cwd=workspace_path,
+        )
+        if result.returncode != 0:
+            return ""
+        return result.stdout
+    except (FileNotFoundError, OSError):
+        return ""
+def sanitize_branch_name(name: str) -> str:
+    """Normalize a branch name for use as a filename suffix.
+    Rules applied in order:
+    1. NFC-normalise (``unicodedata.normalize('NFC', ...)``).
+    2. Lowercase.
+    3. Replace ``/`` with ``_``.
+    4. Replace any non-alphanumeric character (except ``_``, ``.``, ``-``)
+       with ``_``.
+    5. Strip leading/trailing ``_``, ``.``, ``-``.
+    6. Truncate to 100 characters.
+    Parameters
+    ----------
+    name : str
+        Raw branch name (e.g. ``"feature/my-branch"``).
+    Returns
+    -------
+    str
+        Sanitised, safe filename fragment.
+    """
+    if not name:
+        return "default"
+    s = unicodedata.normalize("NFC", name)
+    s = s.lower()
+    s = s.replace("/", "_")
+    # Replace anything that isn't alphanumeric, _, -, or . with _
+    cleaned: list[str] = []
+    for ch in s:
+        if ch.isalnum() or ch in ("_", "-", "."):
+            cleaned.append(ch)
+        else:
+            cleaned.append("_")
+    s = "".join(cleaned)
+    # Strip leading/trailing separators
+    s = s.strip("_.-")
+    # Truncate
+    s = s[:100]
+    return s if s else "default"
+def get_index_path(workspace_path: Path, branch: str) -> Path:
+    """Return the filesystem path to the per-branch knowledge index database.
+    The path is ``<workspace>/docs/.index/knowledge-<sanitized-branch>.db``.
+    Parameters
+    ----------
+    workspace_path : Path
+        Repository root.
+    branch : str
+        Current branch name (raw — will be sanitised automatically).
+    Returns
+    -------
+    Path
+    """
+    safe = sanitize_branch_name(branch)
+    return workspace_path / "docs" / ".index" / f"knowledge-{safe}.db"

diary/indexer/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""diary indexer — file scanner, symbol extraction, and search database.
+Submodules are loaded lazily so the package does not crash if individual
+submodule files do not yet exist (e.g. during initial scaffolding).
+"""
+__all__ = [
+    "scan_files",
+    "extract_symbols",
+    "IndexDatabase",
+    "generate_report",
+    "ensure_gitignore",
+]
+# ---------------------------------------------------------------------------
+# Safe imports — each submodule is loaded inside a try/except so the package
+# works even when submodule files haven't been created yet.
+# ---------------------------------------------------------------------------
+try:
+    from .scanner import scan_files  # type: ignore[import-untyped]
+except ImportError:
+    scan_files = None  # type: ignore[assignment]
+try:
+    from .extractors import extract_symbols  # type: ignore[import-untyped]
+except ImportError:
+    extract_symbols = None  # type: ignore[assignment]
+try:
+    from .database import IndexDatabase  # type: ignore[import-untyped]
+except ImportError:
+    IndexDatabase = None  # type: ignore[assignment]
+try:
+    from .reporter import generate_report  # type: ignore[import-untyped]
+except ImportError:
+    generate_report = None  # type: ignore[assignment]
+try:
+    from .gitignore import ensure_gitignore  # type: ignore[import-untyped]
+except ImportError:
+    ensure_gitignore = None  # type: ignore[assignment]

diary/indexer/database.py ADDED Viewed

@@ -0,0 +1,340 @@
+"""SQLite database layer for the knowledge indexer.
+Provides an IndexDatabase class with schema creation, CRUD operations,
+WAL mode, schema versioning, batch commits, and context manager support.
+Python stdlib only (sqlite3, pathlib).
+"""
+from __future__ import annotations
+import logging
+import sqlite3
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+SCHEMA_VERSION = 1
+# fmt: off
+_CREATE_TABLES = [
+    """CREATE TABLE IF NOT EXISTS files (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        path TEXT UNIQUE NOT NULL,
+        rel_path TEXT NOT NULL,
+        language TEXT NOT NULL,
+        sha256 TEXT NOT NULL,
+        size INTEGER NOT NULL,
+        modified REAL NOT NULL,
+        lines INTEGER NOT NULL
+    )""",
+    """CREATE TABLE IF NOT EXISTS symbols (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        file_id INTEGER NOT NULL REFERENCES files(id),
+        name TEXT NOT NULL,
+        fqn TEXT,
+        type TEXT NOT NULL,
+        parent TEXT,
+        namespace TEXT,
+        start_line INTEGER,
+        end_line INTEGER,
+        visibility TEXT DEFAULT 'public',
+        signature TEXT
+    )""",
+    """CREATE TABLE IF NOT EXISTS documents (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        path TEXT UNIQUE NOT NULL,
+        title TEXT,
+        headings TEXT,
+        summary TEXT,
+        sha256 TEXT NOT NULL
+    )""",
+    """CREATE TABLE IF NOT EXISTS relations (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        doc_id INTEGER NOT NULL REFERENCES documents(id),
+        symbol_id INTEGER NOT NULL REFERENCES symbols(id),
+        file_id INTEGER REFERENCES files(id),
+        confidence REAL NOT NULL DEFAULT 1.0,
+        reason TEXT
+    )""",
+    """CREATE TABLE IF NOT EXISTS dependencies (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        source_id INTEGER NOT NULL REFERENCES files(id),
+        target_path TEXT NOT NULL,
+        dep_type TEXT DEFAULT 'import'
+    )""",
+    """CREATE TABLE IF NOT EXISTS summaries (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        file_id INTEGER REFERENCES files(id),
+        doc_id INTEGER REFERENCES documents(id),
+        content TEXT NOT NULL,
+        summary_type TEXT DEFAULT 'concise'
+    )""",
+    """CREATE TABLE IF NOT EXISTS hashes (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        path TEXT UNIQUE NOT NULL,
+        sha256 TEXT NOT NULL
+    )""",
+    """CREATE TABLE IF NOT EXISTS metadata (
+        key TEXT PRIMARY KEY,
+        value TEXT NOT NULL
+    )""",
+]
+_INDEXES = [
+    "CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)",
+    "CREATE INDEX IF NOT EXISTS idx_symbols_file_id ON symbols(file_id)",
+    "CREATE INDEX IF NOT EXISTS idx_relations_doc_id ON relations(doc_id)",
+    "CREATE INDEX IF NOT EXISTS idx_relations_symbol_id ON relations(symbol_id)",
+    "CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)",
+    "CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path)",
+]
+# All data tables (everything except metadata) — for clear_all
+_DATA_TABLES = [
+    # Delete child rows first (FK-safe order)
+    "dependencies",
+    "relations",
+    "summaries",
+    "symbols",
+    "hashes",
+    "documents",
+    "files",
+]
+# fmt: on
+_BATCH_SIZE = 100
+class IndexDatabase:
+    """SQLite-backed index database with WAL mode and schema versioning.
+    Parameters
+    ----------
+    db_path : Path
+        Path to the SQLite database file.
+    """
+    def __init__(self, db_path: Path) -> None:
+        self.db_path = db_path
+        self._count = 0
+        self.conn: sqlite3.Connection = sqlite3.connect(str(db_path))
+        self.conn.execute("PRAGMA journal_mode=WAL;")
+        self.conn.execute("PRAGMA foreign_keys=ON;")
+    def create_tables(self) -> None:
+        """Create all tables, indexes, and set schema version.
+        If the existing schema version (PRAGMA user_version) does not match
+        the expected SCHEMA_VERSION, all tables are dropped and recreated.
+        """
+        cur = self.conn.execute("PRAGMA user_version")
+        existing_version = cur.fetchone()[0]
+        if existing_version != SCHEMA_VERSION:
+            if existing_version != 0:
+                logger.info(
+                    "Schema version mismatch (existing=%d, expected=%d) — recreating",
+                    existing_version,
+                    SCHEMA_VERSION,
+                )
+            # Drop all known tables so we start fresh
+            for table in _DATA_TABLES:
+                self.conn.execute(f"DROP TABLE IF EXISTS {table}")
+            self.conn.execute("DROP TABLE IF EXISTS metadata")
+        for ddl in _CREATE_TABLES:
+            self.conn.execute(ddl)
+        for idx in _INDEXES:
+            self.conn.execute(idx)
+        self.conn.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
+        self.conn.commit()
+    # ------------------------------------------------------------------
+    # Insert helpers
+    # ------------------------------------------------------------------
+    def _maybe_commit(self) -> None:
+        self._count += 1
+        if self._count % _BATCH_SIZE == 0:
+            self.conn.commit()
+    # ------------------------------------------------------------------
+    # CRUD
+    # ------------------------------------------------------------------
+    def insert_file(
+        self,
+        path: str,
+        rel_path: str,
+        language: str,
+        sha256: str,
+        size: int,
+        modified: float,
+        lines: int,
+    ) -> int:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO files (path, rel_path, language, sha256, size, modified, lines) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                (path, rel_path, language, sha256, size, modified, lines),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.IntegrityError:
+            # File already indexed — return existing id
+            cur = self.conn.execute("SELECT id FROM files WHERE path = ?", (path,))
+            row = cur.fetchone()
+            if row is not None:
+                return row[0]
+            raise
+    def insert_symbol(
+        self,
+        file_id: int,
+        name: str,
+        fqn: Optional[str],
+        sym_type: str,
+        parent: Optional[str],
+        namespace: str,
+        start_line: int,
+        end_line: int,
+        visibility: str = "public",
+        signature: str = "",
+    ) -> Optional[int]:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO symbols (file_id, name, fqn, type, parent, namespace, "
+                "start_line, end_line, visibility, signature) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (file_id, name, fqn, sym_type, parent, namespace, start_line, end_line, visibility, signature),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.Error as exc:
+            logger.warning("Failed to insert symbol %s: %s", name, exc)
+            return None
+    def insert_document(
+        self,
+        path: str,
+        title: str,
+        headings: str,
+        summary: str,
+        sha256: str,
+    ) -> int:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO documents (path, title, headings, summary, sha256) "
+                "VALUES (?, ?, ?, ?, ?)",
+                (path, title, headings, summary, sha256),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.IntegrityError:
+            cur = self.conn.execute("SELECT id FROM documents WHERE path = ?", (path,))
+            row = cur.fetchone()
+            if row is not None:
+                return row[0]
+            raise
+    def insert_relation(
+        self,
+        doc_id: int,
+        symbol_id: int,
+        file_id: int,
+        confidence: float = 1.0,
+        reason: str = "",
+    ) -> Optional[int]:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO relations (doc_id, symbol_id, file_id, confidence, reason) "
+                "VALUES (?, ?, ?, ?, ?)",
+                (doc_id, symbol_id, file_id, confidence, reason),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.Error as exc:
+            logger.warning("Failed to insert relation: %s", exc)
+            return None
+    def insert_dependency(
+        self,
+        source_id: int,
+        target_path: str,
+        dep_type: str = "import",
+    ) -> Optional[int]:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO dependencies (source_id, target_path, dep_type) "
+                "VALUES (?, ?, ?)",
+                (source_id, target_path, dep_type),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.Error as exc:
+            logger.warning("Failed to insert dependency: %s", exc)
+            return None
+    def insert_summary(
+        self,
+        file_id: Optional[int],
+        doc_id: Optional[int],
+        content: str,
+        summary_type: str = "concise",
+    ) -> Optional[int]:
+        try:
+            cur = self.conn.execute(
+                "INSERT INTO summaries (file_id, doc_id, content, summary_type) "
+                "VALUES (?, ?, ?, ?)",
+                (file_id, doc_id, content, summary_type),
+            )
+            self._maybe_commit()
+            return cur.lastrowid  # type: ignore[return-value]
+        except sqlite3.Error as exc:
+            logger.warning("Failed to insert summary: %s", exc)
+            return None
+    # ------------------------------------------------------------------
+    # Bulk operations
+    # ------------------------------------------------------------------
+    def clear_all(self) -> None:
+        """Delete all rows from data tables (preserves metadata).
+        Also resets the AUTOINCREMENT counters by deleting from sqlite_sequence.
+        """
+        for table in _DATA_TABLES:
+            self.conn.execute(f"DELETE FROM {table}")
+        self.conn.execute("DELETE FROM sqlite_sequence")
+        self.conn.commit()
+        self._count = 0
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+    def close(self) -> None:
+        """Commit pending changes and close the connection."""
+        try:
+            self.conn.commit()
+        except sqlite3.Error:
+            pass
+        self.conn.close()
+    # ------------------------------------------------------------------
+    # Context manager
+    # ------------------------------------------------------------------
+    def __enter__(self) -> "IndexDatabase":
+        return self
+    def __exit__(
+        self,
+        exc_type: Optional[type],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[object],
+    ) -> None:
+        self.close()