PyPI - ragtime-cli - Versions diffs - 0.2.6__tar.gz → 0.2.7__tar.gz - Mend

ragtime-cli 0.2.6tar.gz → 0.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{ragtime_cli-0.2.6/ragtime_cli.egg-info → ragtime_cli-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragtime-cli
-Version: 0.2.6
+Version: 0.2.7
 Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
 Author-email: Bret Martineau <bretwardjames@gmail.com>
 License-Expression: MIT

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ragtime-cli"
-version = "0.2.6"
+version = "0.2.7"
 description = "Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge"
 readme = "README.md"
 license = "MIT"

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7/ragtime_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragtime-cli
-Version: 0.2.6
+Version: 0.2.7
 Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
 Author-email: Bret Martineau <bretwardjames@gmail.com>
 License-Expression: MIT

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/src/cli.py RENAMED Viewed

@@ -169,7 +169,7 @@ def get_remote_branches_with_ragtime(path: Path) -> list[str]:
 @click.group()
-@click.version_option(version="0.2.6")
+@click.version_option(version="0.2.7")
 def main():
     """Ragtime - semantic search over code and documentation."""
     pass
@@ -258,12 +258,73 @@ Add your team's conventions above. Each rule should be:
         click.echo(f"  Install for enhanced workflow: npm install -g @bretwardjames/ghp-cli")
+# Batch size for ChromaDB upserts (embedding computation happens here)
+INDEX_BATCH_SIZE = 100
+def _upsert_entries(db, entries, entry_type: str = "docs", label: str = "  Embedding"):
+    """Upsert entries to ChromaDB in batches with progress bar."""
+    if not entries:
+        return
+    # Process in batches with progress feedback
+    batches = [entries[i:i + INDEX_BATCH_SIZE] for i in range(0, len(entries), INDEX_BATCH_SIZE)]
+    with click.progressbar(
+        batches,
+        label=label,
+        show_percent=True,
+        show_pos=True,
+        item_show_func=lambda b: f"{len(b)} items" if b else "",
+    ) as batch_iter:
+        for batch in batch_iter:
+            if entry_type == "code":
+                ids = [f"{e.file_path}:{e.line_number}:{e.symbol_name}" for e in batch]
+            else:
+                ids = [e.file_path for e in batch]
+            documents = [e.content for e in batch]
+            metadatas = [e.to_metadata() for e in batch]
+            db.upsert(ids=ids, documents=documents, metadatas=metadatas)
+def _get_files_to_process(
+    all_files: list[Path],
+    indexed_files: dict[str, float],
+) -> tuple[list[Path], list[str]]:
+    """
+    Compare files on disk with indexed files to determine what needs processing.
+    Returns:
+        (files_to_index, files_to_delete)
+    """
+    disk_files = {str(f): os.path.getmtime(f) for f in all_files}
+    to_index = []
+    for file_path in all_files:
+        path_str = str(file_path)
+        disk_mtime = disk_files[path_str]
+        indexed_mtime = indexed_files.get(path_str, 0.0)
+        # Index if new or modified (with 1-second tolerance for filesystem precision)
+        if disk_mtime > indexed_mtime + 1.0:
+            to_index.append(file_path)
+    # Find deleted files (in index but not on disk)
+    to_delete = [f for f in indexed_files.keys() if f not in disk_files]
+    return to_index, to_delete
 @main.command()
 @click.argument("path", type=click.Path(exists=True, path_type=Path), default=".")
 @click.option("--type", "index_type", type=click.Choice(["all", "docs", "code"]), default="all")
 @click.option("--clear", is_flag=True, help="Clear existing index before indexing")
 def index(path: Path, index_type: str, clear: bool):
-    """Index a project directory."""
+    """Index a project directory.
+    Without --clear, performs incremental indexing (only changed files).
+    """
     path = path.resolve()
     db = get_db(path)
     config = RagtimeConfig.load(path)
@@ -276,7 +337,10 @@ def index(path: Path, index_type: str, clear: bool):
             db.clear(type_filter=index_type)
     if index_type in ("all", "docs"):
-        # Discover all doc files first
+        # Get currently indexed docs
+        indexed_docs = {} if clear else db.get_indexed_files("docs")
+        # Discover all doc files
         all_doc_files = []
         for docs_path in config.docs.paths:
             docs_root = path / docs_path
@@ -290,39 +354,55 @@ def index(path: Path, index_type: str, clear: bool):
             )
             all_doc_files.extend(files)
-        if all_doc_files:
+        if all_doc_files or indexed_docs:
+            # Determine what needs processing
+            to_index, to_delete = _get_files_to_process(all_doc_files, indexed_docs)
             click.echo(f"Found {len(all_doc_files)} doc files")
-            total_entries = []
-            with click.progressbar(
-                all_doc_files,
-                label="  Processing",
-                show_percent=True,
-                show_pos=True,
-                item_show_func=lambda f: f.name[:30] if f else "",
-            ) as files:
-                for file_path in files:
-                    entry = index_doc_file(file_path)
-                    if entry:
-                        total_entries.append(entry)
-            if total_entries:
-                ids = [e.file_path for e in total_entries]
-                documents = [e.content for e in total_entries]
-                metadatas = [e.to_metadata() for e in total_entries]
-                db.upsert(ids=ids, documents=documents, metadatas=metadatas)
-                click.echo(f"  Indexed {len(total_entries)} documents")
-            else:
-                click.echo("  No valid documents found")
+            if not clear:
+                unchanged = len(all_doc_files) - len(to_index)
+                if unchanged > 0:
+                    click.echo(f"  {unchanged} unchanged, {len(to_index)} to index")
+                if to_delete:
+                    click.echo(f"  {len(to_delete)} to remove (deleted from disk)")
+            # Delete removed files
+            if to_delete:
+                db.delete_by_file(to_delete, "docs")
+            # Index new/changed files
+            if to_index:
+                entries = []
+                with click.progressbar(
+                    to_index,
+                    label="  Parsing",
+                    show_percent=True,
+                    show_pos=True,
+                    item_show_func=lambda f: f.name[:30] if f else "",
+                ) as files:
+                    for file_path in files:
+                        entry = index_doc_file(file_path)
+                        if entry:
+                            entries.append(entry)
+                if entries:
+                    _upsert_entries(db, entries, "docs")
+                    click.echo(f"  Indexed {len(entries)} documents")
+            elif not to_delete:
+                click.echo("  All docs up to date")
         else:
             click.echo("  No documents found")
     if index_type in ("all", "code"):
+        # Get currently indexed code files
+        indexed_code = {} if clear else db.get_indexed_files("code")
         # Build exclusion list for code
         code_exclude = list(config.code.exclude)
         for docs_path in config.docs.paths:
             code_exclude.append(f"**/{docs_path}/**")
-        # Discover all code files first
+        # Discover all code files
         all_code_files = []
         for code_path_str in config.code.paths:
             code_root = path / code_path_str
@@ -336,36 +416,47 @@ def index(path: Path, index_type: str, clear: bool):
             )
             all_code_files.extend(files)
-        if all_code_files:
+        if all_code_files or indexed_code:
+            # Determine what needs processing
+            to_index, to_delete = _get_files_to_process(all_code_files, indexed_code)
             click.echo(f"Found {len(all_code_files)} code files")
-            total_entries = []
-            with click.progressbar(
-                all_code_files,
-                label="  Processing",
-                show_percent=True,
-                show_pos=True,
-                item_show_func=lambda f: f.name[:30] if f else "",
-            ) as files:
-                for file_path in files:
-                    file_entries = index_code_file(file_path)
-                    total_entries.extend(file_entries)
-            if total_entries:
-                # Create unique IDs: file:line:symbol
-                ids = [f"{e.file_path}:{e.line_number}:{e.symbol_name}" for e in total_entries]
-                documents = [e.content for e in total_entries]
-                metadatas = [e.to_metadata() for e in total_entries]
-                db.upsert(ids=ids, documents=documents, metadatas=metadatas)
-                click.echo(f"  Indexed {len(total_entries)} code symbols")
-                # Show breakdown by type
+            if not clear:
+                unchanged = len(all_code_files) - len(to_index)
+                if unchanged > 0:
+                    click.echo(f"  {unchanged} unchanged, {len(to_index)} to index")
+                if to_delete:
+                    click.echo(f"  {len(to_delete)} to remove (deleted from disk)")
+            # Delete removed files
+            if to_delete:
+                db.delete_by_file(to_delete, "code")
+            # Index new/changed files
+            if to_index:
+                entries = []
                 by_type = {}
-                for e in total_entries:
-                    by_type[e.symbol_type] = by_type.get(e.symbol_type, 0) + 1
-                breakdown = ", ".join(f"{count} {typ}s" for typ, count in sorted(by_type.items()))
-                click.echo(f"    ({breakdown})")
-            else:
-                click.echo("  No code symbols found")
+                with click.progressbar(
+                    to_index,
+                    label="  Parsing",
+                    show_percent=True,
+                    show_pos=True,
+                    item_show_func=lambda f: f.name[:30] if f else "",
+                ) as files:
+                    for file_path in files:
+                        file_entries = index_code_file(file_path)
+                        for entry in file_entries:
+                            entries.append(entry)
+                            by_type[entry.symbol_type] = by_type.get(entry.symbol_type, 0) + 1
+                if entries:
+                    click.echo(f"  Found {len(entries)} symbols")
+                    _upsert_entries(db, entries, "code")
+                    click.echo(f"  Indexed {len(entries)} code symbols")
+                    breakdown = ", ".join(f"{count} {typ}s" for typ, count in sorted(by_type.items()))
+                    click.echo(f"    ({breakdown})")
+            elif not to_delete:
+                click.echo("  All code up to date")
         else:
             click.echo("  No code files found")
@@ -2026,7 +2117,7 @@ def update(check: bool):
     from urllib.request import urlopen
     from urllib.error import URLError
-    current = "0.2.6"
+    current = "0.2.7"
     click.echo(f"Current version: {current}")
     click.echo("Checking PyPI for updates...")

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/src/config.py RENAMED Viewed

@@ -33,6 +33,18 @@ class CodeConfig:
         "**/build/**",
         "**/dist/**",
         "**/.dart_tool/**",
+        # Generated code (Prisma, GraphQL, OpenAPI, etc.)
+        "**/generated/**",
+        "**/*.generated.*",
+        "**/*.g.dart",
+        # TypeScript declaration files (often auto-generated)
+        "**/*.d.ts",
+        # Test files (usually not needed in search)
+        "**/__tests__/**",
+        "**/*.test.*",
+        "**/*.spec.*",
+        # Python init files (rarely have searchable content)
+        "**/__init__.py",
     ])

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/src/db.py RENAMED Viewed

@@ -165,3 +165,54 @@ class RagtimeDB:
             "docs": docs_count,
             "code": code_count,
         }
+    def get_indexed_files(self, type_filter: str | None = None) -> dict[str, float]:
+        """
+        Get all indexed files and their modification times.
+        Args:
+            type_filter: "code" or "docs" (None = both)
+        Returns:
+            Dict mapping file paths to their indexed mtime
+        """
+        where = {"type": type_filter} if type_filter else None
+        results = self.collection.get(where=where, include=["metadatas"])
+        files: dict[str, float] = {}
+        for meta in results["metadatas"]:
+            file_path = meta.get("file", "")
+            mtime = meta.get("mtime", 0.0)
+            # For code files, multiple entries per file - keep max mtime
+            if file_path not in files or mtime > files[file_path]:
+                files[file_path] = mtime
+        return files
+    def delete_by_file(self, file_paths: list[str], type_filter: str | None = None) -> int:
+        """
+        Delete all entries for the given file paths.
+        Args:
+            file_paths: List of file paths to remove
+            type_filter: "code" or "docs" (None = both)
+        Returns:
+            Number of entries deleted
+        """
+        if not file_paths:
+            return 0
+        # Build where clause
+        where = {"file": {"$in": file_paths}}
+        if type_filter:
+            where = {"$and": [{"file": {"$in": file_paths}}, {"type": type_filter}]}
+        # Get IDs to delete
+        results = self.collection.get(where=where)
+        ids = results["ids"]
+        if ids:
+            self.collection.delete(ids=ids)
+        return len(ids)

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/src/indexers/code.py RENAMED Viewed

@@ -6,6 +6,7 @@ This allows searching for specific code constructs like "useAsyncState" or "JWTM
 """
 import ast
+import os
 import re
 from fnmatch import fnmatch
 from pathlib import Path
@@ -32,6 +33,7 @@ class CodeEntry:
     symbol_type: str       # function, class, interface, component, etc.
     line_number: int       # Line where symbol starts
     docstring: str | None = None  # Extracted docstring/JSDoc
+    mtime: float | None = None    # File modification time for incremental indexing
     def to_metadata(self) -> dict:
         """Convert to ChromaDB metadata dict."""
@@ -42,6 +44,7 @@ class CodeEntry:
             "symbol_name": self.symbol_name,
             "symbol_type": self.symbol_type,
             "line": self.line_number,
+            "mtime": self.mtime or 0.0,
         }
@@ -92,14 +95,21 @@ def discover_code_files(
                     rel_path = str(path)
                 for ex in exclude:
-                    # Handle ** patterns by checking if pattern appears in path
+                    # Handle ** patterns
                     if "**" in ex:
-                        # Convert glob to a simpler check: **/node_modules/** means
-                        # any path containing /node_modules/ segment
-                        core_pattern = ex.replace("**", "").strip("/")
-                        if core_pattern and f"/{core_pattern}/" in f"/{rel_path}/":
-                            skip = True
-                            break
+                        if ex.endswith("/**"):
+                            # Directory pattern: **/node_modules/** or **/generated/**
+                            # Extract the directory name to match as path segment
+                            dir_pattern = ex.replace("**/", "").replace("/**", "")
+                            if f"/{dir_pattern}/" in f"/{rel_path}/":
+                                skip = True
+                                break
+                        else:
+                            # File pattern: **/*.d.ts, **/*.test.*, **/*.generated.*
+                            file_pattern = ex.replace("**/", "")
+                            if fnmatch(path.name, file_pattern):
+                                skip = True
+                                break
                     elif fnmatch(rel_path, ex) or fnmatch(path.name, ex):
                         skip = True
                         break
@@ -432,7 +442,8 @@ def index_file(file_path: Path) -> list[CodeEntry]:
     """
     try:
         content = file_path.read_text(encoding='utf-8')
-    except (IOError, UnicodeDecodeError):
+        mtime = os.path.getmtime(file_path)
+    except (IOError, UnicodeDecodeError, OSError):
         return []
     # Skip empty files
@@ -442,15 +453,21 @@ def index_file(file_path: Path) -> list[CodeEntry]:
     suffix = file_path.suffix.lower()
     if suffix == ".py":
-        return index_python_file(file_path, content)
+        entries = index_python_file(file_path, content)
     elif suffix in [".ts", ".tsx", ".js", ".jsx"]:
-        return index_typescript_file(file_path, content)
+        entries = index_typescript_file(file_path, content)
     elif suffix == ".vue":
-        return index_vue_file(file_path, content)
+        entries = index_vue_file(file_path, content)
     elif suffix == ".dart":
-        return index_dart_file(file_path, content)
+        entries = index_dart_file(file_path, content)
+    else:
+        return []
-    return []
+    # Set mtime on all entries from this file
+    for entry in entries:
+        entry.mtime = mtime
+    return entries
 def index_directory(

{ragtime_cli-0.2.6 → ragtime_cli-0.2.7}/src/indexers/docs.py RENAMED Viewed

@@ -4,6 +4,7 @@ Docs indexer - parses markdown files with YAML frontmatter.
 Designed for .claude/memory/ style files but works with any markdown.
 """
+import os
 import re
 from pathlib import Path
 from dataclasses import dataclass
@@ -19,6 +20,7 @@ class DocEntry:
     category: str | None = None
     component: str | None = None
     title: str | None = None
+    mtime: float | None = None  # File modification time for incremental indexing
     def to_metadata(self) -> dict:
         """Convert to ChromaDB metadata dict."""
@@ -29,6 +31,7 @@ class DocEntry:
             "category": self.category or "",
             "component": self.component or "",
             "title": self.title or Path(self.file_path).stem,
+            "mtime": self.mtime or 0.0,
         }
@@ -61,7 +64,8 @@ def index_file(file_path: Path) -> DocEntry | None:
     """
     try:
         content = file_path.read_text(encoding='utf-8')
-    except (IOError, UnicodeDecodeError):
+        mtime = os.path.getmtime(file_path)
+    except (IOError, UnicodeDecodeError, OSError):
         return None
     metadata, body = parse_frontmatter(content)
@@ -77,6 +81,7 @@ def index_file(file_path: Path) -> DocEntry | None:
         category=metadata.get("category"),
         component=metadata.get("component"),
         title=metadata.get("title"),
+        mtime=mtime,
     )