PyPI - ragtime-cli - Versions diffs - 0.2.12__tar.gz → 0.2.14__tar.gz - Mend

ragtime-cli 0.2.12tar.gz → 0.2.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{ragtime_cli-0.2.12/ragtime_cli.egg-info → ragtime_cli-0.2.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragtime-cli
-Version: 0.2.12
+Version: 0.2.14
 Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
 Author-email: Bret Martineau <bretwardjames@gmail.com>
 License-Expression: MIT

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ragtime-cli"
-version = "0.2.12"
+version = "0.2.14"
 description = "Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge"
 readme = "README.md"
 license = "MIT"

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14/ragtime_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragtime-cli
-Version: 0.2.12
+Version: 0.2.14
 Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
 Author-email: Bret Martineau <bretwardjames@gmail.com>
 License-Expression: MIT

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/cli.py RENAMED Viewed

@@ -470,17 +470,21 @@ def index(path: Path, index_type: str, clear: bool):
 @click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
 @click.option("--namespace", "-n", help="Filter by namespace")
 @click.option("--require", "-r", "require_terms", multiple=True,
-              help="Terms that MUST appear in results (repeatable)")
+              help="Additional terms that MUST appear (usually auto-detected)")
+@click.option("--raw", is_flag=True, help="Disable auto-detection of qualifiers")
 @click.option("--include-archive", is_flag=True, help="Also search archived branches")
 @click.option("--limit", "-l", default=5, help="Max results")
 @click.option("--verbose", "-v", is_flag=True, help="Show full content")
 def search(query: str, path: Path, type_filter: str, namespace: str,
-           require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
+           require_terms: tuple, raw: bool, include_archive: bool, limit: int, verbose: bool):
     """
-    Hybrid search: semantic similarity + keyword filtering.
+    Smart search: auto-detects qualifiers like 'mobile', 'auth', 'dart'.
-    Use --require/-r to ensure specific terms appear in results.
-    Example: ragtime search "error handling" -r mobile -r dart
+    \b
+    Examples:
+      ragtime search "error handling in mobile"  # auto-requires 'mobile'
+      ragtime search "auth flow"                 # auto-requires 'auth'
+      ragtime search "useAsyncState" --raw       # literal search, no extraction
     """
     path = Path(path).resolve()
     db = get_db(path)
@@ -493,6 +497,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
         type_filter=type_arg,
         namespace=namespace,
         require_terms=list(require_terms) if require_terms else None,
+        auto_extract=not raw,
     )
     if not results:
@@ -736,47 +741,68 @@ def reindex(path: Path):
 @main.command()
 @click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
-@click.option("--dry-run", is_flag=True, help="Show duplicates without removing them")
+@click.option("--dry-run", is_flag=True, help="Show what would be removed")
 def dedupe(path: Path, dry_run: bool):
-    """Remove duplicate entries from the index.
+    """Clean up index: remove duplicates and orphaned entries.
-    Keeps one entry per unique file path, removing duplicates created
-    by older versions of reindex that generated random IDs.
+    - Removes duplicate entries (keeps one per file path)
+    - Removes orphaned entries (files that no longer exist on disk)
     """
     path = Path(path).resolve()
     db = get_db(path)
+    memory_dir = path / ".ragtime"
     # Get all entries with their file paths
     results = db.collection.get(include=["metadatas"])
-    # Group by file path
+    # Group by file path and track orphans
     by_file: dict[str, list[str]] = {}
+    orphans: list[str] = []
     for i, mem_id in enumerate(results["ids"]):
         file_path = results["metadatas"][i].get("file", "")
-        if file_path:
-            if file_path not in by_file:
-                by_file[file_path] = []
-            by_file[file_path].append(mem_id)
+        entry_type = results["metadatas"][i].get("type", "")
+        # Skip docs/code entries - only clean up memory entries
+        if entry_type in ("docs", "code"):
+            continue
+        if not file_path:
+            orphans.append(mem_id)
+            continue
-    # Find duplicates
-    duplicates_to_remove = []
+        # Check if file exists on disk
+        full_path = memory_dir / file_path
+        if not full_path.exists():
+            orphans.append(mem_id)
+            if dry_run:
+                click.echo(f"  Orphan: {file_path} (file missing)")
+            continue
+        if file_path not in by_file:
+            by_file[file_path] = []
+        by_file[file_path].append(mem_id)
+    # Find duplicates (keep first, remove rest)
+    duplicates: list[str] = []
     for file_path, ids in by_file.items():
         if len(ids) > 1:
-            # Keep the first one, remove the rest
-            duplicates_to_remove.extend(ids[1:])
+            duplicates.extend(ids[1:])
             if dry_run:
-                click.echo(f"  {file_path}: {len(ids)} copies (would remove {len(ids) - 1})")
+                click.echo(f"  Duplicate: {file_path} ({len(ids)} copies, removing {len(ids) - 1})")
+    to_remove = orphans + duplicates
-    if not duplicates_to_remove:
-        click.echo("✓ No duplicates found")
+    if not to_remove:
+        click.echo("✓ Index is clean (no duplicates or orphans)")
         return
     if dry_run:
-        click.echo(f"\nWould remove {len(duplicates_to_remove)} duplicate entries")
+        click.echo(f"\nWould remove {len(orphans)} orphans + {len(duplicates)} duplicates = {len(to_remove)} entries")
         click.echo("Run without --dry-run to remove them")
     else:
-        db.delete(duplicates_to_remove)
-        click.echo(f"✓ Removed {len(duplicates_to_remove)} duplicate entries")
+        db.delete(to_remove)
+        click.echo(f"✓ Removed {len(orphans)} orphans + {len(duplicates)} duplicates = {len(to_remove)} entries")
 @main.command("new-branch")

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/db.py RENAMED Viewed

@@ -4,12 +4,74 @@ ChromaDB wrapper for ragtime.
 Handles storage and retrieval of indexed documents and code.
 """
+import re
 from pathlib import Path
 from typing import Any
 import chromadb
 from chromadb.config import Settings
+def extract_query_hints(query: str, known_components: list[str] | None = None) -> tuple[str, list[str]]:
+    """
+    Extract component/scope hints from a query for hybrid search.
+    Detects patterns like "X in mobile", "mobile X", "X for auth" and extracts
+    the qualifier to use as require_terms. This prevents qualifiers from being
+    diluted in semantic search.
+    Args:
+        query: The natural language search query
+        known_components: Optional list of known component names to detect
+    Returns:
+        (cleaned_query, extracted_terms) - query with hints removed, terms to require
+    """
+    # Default known components/scopes (common patterns)
+    default_components = [
+        # Platforms
+        "mobile", "web", "desktop", "ios", "android", "flutter", "react", "vue",
+        # Languages
+        "dart", "python", "typescript", "javascript", "ts", "js", "py",
+        # Common components
+        "auth", "authentication", "api", "database", "db", "ui", "frontend", "backend",
+        "server", "client", "admin", "user", "payment", "billing", "notification",
+        "email", "cache", "queue", "worker", "scheduler", "logging", "metrics",
+    ]
+    components = set(c.lower() for c in (known_components or default_components))
+    extracted = []
+    cleaned = query
+    # Pattern 1: "X in/for/on {component}" - extract component
+    patterns = [
+        r'\b(?:in|for|on|from|using|with)\s+(?:the\s+)?(\w+)\s*(?:app|code|module|service|codebase)?(?:\s|$)',
+        r'\b(\w+)\s+(?:app|code|module|service|codebase)\b',
+    ]
+    for pattern in patterns:
+        for match in re.finditer(pattern, query, re.IGNORECASE):
+            word = match.group(1).lower()
+            if word in components:
+                extracted.append(word)
+                # Remove the matched phrase from query
+                cleaned = cleaned[:match.start()] + " " + cleaned[match.end():]
+    # Pattern 2: Check if any known component appears as standalone word
+    words = re.findall(r'\b\w+\b', query.lower())
+    for word in words:
+        if word in components and word not in extracted:
+            # Only extract if it looks like a qualifier (not the main subject)
+            # Heuristic: if query has other meaningful words, it's likely a qualifier
+            other_words = [w for w in words if w != word and len(w) > 3]
+            if len(other_words) >= 2:
+                extracted.append(word)
+    # Clean up extra whitespace
+    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    return cleaned, list(set(extracted))
 class RagtimeDB:
     """Vector database for ragtime indexes."""
@@ -85,6 +147,7 @@ class RagtimeDB:
         type_filter: str | None = None,
         namespace: str | None = None,
         require_terms: list[str] | None = None,
+        auto_extract: bool = True,
         **filters,
     ) -> list[dict]:
         """
@@ -98,11 +161,26 @@ class RagtimeDB:
             require_terms: List of terms that MUST appear in results (case-insensitive).
                           Use for scoped queries like "error handling in mobile" with
                           require_terms=["mobile"] to ensure "mobile" isn't ignored.
+            auto_extract: If True (default), automatically detect component qualifiers
+                         in the query and add them to require_terms. Set to False
+                         for raw/literal search.
             **filters: Additional metadata filters (None values are ignored)
         Returns:
             List of dicts with 'content', 'metadata', 'distance'
         """
+        # Auto-extract component hints from query if enabled
+        search_query = query
+        all_require_terms = list(require_terms) if require_terms else []
+        if auto_extract:
+            cleaned_query, extracted = extract_query_hints(query)
+            if extracted:
+                # Use cleaned query for embedding (removes noise)
+                search_query = cleaned_query
+                # Add extracted terms to require_terms
+                all_require_terms.extend(extracted)
+                all_require_terms = list(set(all_require_terms))  # dedupe
         # Build list of filter conditions, excluding None values
         conditions = []
@@ -126,10 +204,10 @@ class RagtimeDB:
             where = {"$and": conditions}
         # When using require_terms, fetch more results since we'll filter some out
-        fetch_limit = limit * 5 if require_terms else limit
+        fetch_limit = limit * 5 if all_require_terms else limit
         results = self.collection.query(
-            query_texts=[query],
+            query_texts=[search_query],
             n_results=fetch_limit,
             where=where,
         )
@@ -139,13 +217,13 @@ class RagtimeDB:
         if results["documents"] and results["documents"][0]:
             for i, doc in enumerate(results["documents"][0]):
                 # Hybrid filtering: ensure required terms appear
-                if require_terms:
+                if all_require_terms:
                     doc_lower = doc.lower()
                     # Also check file path in metadata for code/file matches
                     file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
                     combined_text = f"{doc_lower} {file_path}"
-                    if not all(term.lower() in combined_text for term in require_terms):
+                    if not all(term.lower() in combined_text for term in all_require_terms):
                         continue
                 output.append({

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/mcp_server.py RENAMED Viewed

@@ -132,13 +132,13 @@ class RagtimeMCPServer:
             },
             {
                 "name": "search",
-                "description": "Hybrid search over indexed code and docs (semantic + keyword). Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
+                "description": "Smart hybrid search over indexed code and docs. Auto-detects qualifiers like 'mobile', 'auth', 'dart' in your query and ensures they appear in results. Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations.",
                 "inputSchema": {
                     "type": "object",
                     "properties": {
                         "query": {
                             "type": "string",
-                            "description": "Natural language search query"
+                            "description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected and used for filtering."
                         },
                         "namespace": {
                             "type": "string",
@@ -155,7 +155,12 @@ class RagtimeMCPServer:
                         "require_terms": {
                             "type": "array",
                             "items": {"type": "string"},
-                            "description": "Terms that MUST appear in results (case-insensitive). Use for scoped queries like 'error handling in mobile' with require_terms=['mobile'] to ensure the qualifier isn't lost in semantic search."
+                            "description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected from the query."
+                        },
+                        "auto_extract": {
+                            "type": "boolean",
+                            "default": True,
+                            "description": "Auto-detect component qualifiers from query (default: true). Set to false for literal/raw search."
                         },
                         "limit": {
                             "type": "integer",
@@ -338,7 +343,7 @@ class RagtimeMCPServer:
         }
     def _search(self, args: dict) -> dict:
-        """Search indexed content with hybrid semantic + keyword matching."""
+        """Search indexed content with smart query understanding."""
         results = self.db.search(
             query=args["query"],
             limit=args.get("limit", 10),
@@ -346,6 +351,7 @@ class RagtimeMCPServer:
             type_filter=args.get("type"),
             component=args.get("component"),
             require_terms=args.get("require_terms"),
+            auto_extract=args.get("auto_extract", True),
         )
         return {
@@ -493,7 +499,7 @@ class RagtimeMCPServer:
                         "protocolVersion": "2024-11-05",
                         "serverInfo": {
                             "name": "ragtime",
-                            "version": "0.2.12",
+                            "version": "0.2.14",
                         },
                         "capabilities": {
                             "tools": {},

{ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/memory.py RENAMED Viewed

@@ -110,35 +110,83 @@ class Memory:
         slug = re.sub(r'[-\s]+', '-', slug).strip('-')
         return slug[:40]  # Limit length
+    @classmethod
+    def _infer_metadata_from_path(cls, relative_path: str) -> dict:
+        """
+        Infer namespace, component, and type from folder structure.
+        Supports:
+          app/{component}/*.md → namespace=app, component={component}
+          app/*.md → namespace=app
+          team/*.md → namespace=team
+          users/{username}/*.md → namespace=user-{username}
+          branches/{branch}/*.md → namespace=branch-{branch}
+        """
+        parts = relative_path.replace("\\", "/").split("/")
+        metadata = {}
+        if len(parts) >= 1:
+            first = parts[0]
+            if first == "app":
+                metadata["namespace"] = "app"
+                if len(parts) >= 3:  # app/{component}/file.md
+                    metadata["component"] = parts[1]
+            elif first == "team":
+                metadata["namespace"] = "team"
+            elif first == "users" and len(parts) >= 2:
+                metadata["namespace"] = f"user-{parts[1]}"
+            elif first == "branches" and len(parts) >= 2:
+                metadata["namespace"] = f"branch-{parts[1]}"
+        return metadata
     @classmethod
     def from_file(cls, path: Path, relative_to: Optional[Path] = None) -> "Memory":
         """
         Parse a memory from a markdown file with YAML frontmatter.
+        If no frontmatter exists, infers metadata from folder structure.
         Args:
             path: Full path to the markdown file
             relative_to: Base directory to compute relative path from (for indexing)
         """
         text = path.read_text()
+        # Compute relative path for inference and indexing
+        file_path = None
+        if relative_to:
+            try:
+                file_path = str(path.relative_to(relative_to))
+            except ValueError:
+                pass
+        # Handle files without frontmatter - infer from path
         if not text.startswith("---"):
-            raise ValueError(f"No YAML frontmatter found in {path}")
+            inferred = cls._infer_metadata_from_path(file_path or str(path))
+            # Generate stable ID from path
+            memory_id = hashlib.sha256((file_path or str(path)).encode()).hexdigest()[:8]
+            return cls(
+                id=memory_id,
+                content=text.strip(),
+                namespace=inferred.get("namespace", "app"),
+                type=inferred.get("type", "note"),
+                component=inferred.get("component"),
+                source="file",
+                _file_path=file_path,
+            )
         # Split frontmatter and content
         parts = text.split("---", 2)
         if len(parts) < 3:
             raise ValueError(f"Invalid frontmatter format in {path}")
-        frontmatter = yaml.safe_load(parts[1])
+        frontmatter = yaml.safe_load(parts[1]) or {}
         content = parts[2].strip()
-        # Compute relative file path for indexing
-        file_path = None
-        if relative_to:
-            try:
-                file_path = str(path.relative_to(relative_to))
-            except ValueError:
-                pass  # path not relative to base, will regenerate
+        # Infer missing metadata from folder structure
+        inferred = cls._infer_metadata_from_path(file_path or str(path))
         # Use frontmatter ID if present, otherwise derive stable ID from file path
         # This ensures reindex is idempotent - same file always gets same ID
@@ -154,9 +202,10 @@ class Memory:
         return cls(
             id=memory_id,
             content=content,
-            namespace=frontmatter.get("namespace", "app"),
-            type=frontmatter.get("type", "unknown"),
-            component=frontmatter.get("component"),
+            # Use frontmatter if present, fall back to inferred, then defaults
+            namespace=frontmatter.get("namespace") or inferred.get("namespace", "app"),
+            type=frontmatter.get("type") or inferred.get("type", "note"),
+            component=frontmatter.get("component") or inferred.get("component"),
             confidence=frontmatter.get("confidence", "medium"),
             confidence_reason=frontmatter.get("confidence_reason"),
             source=frontmatter.get("source", "file"),
@@ -423,7 +472,9 @@ class MemoryStore:
         """
         Reindex all memory files.
-        Scans .ragtime/ and indexes any files not in ChromaDB.
+        Scans .ragtime/ and indexes files. Removes old entries for each file
+        before upserting to prevent duplicates from ID changes.
         Returns count of files indexed.
         """
         if not self.memory_dir.exists():
@@ -432,7 +483,13 @@ class MemoryStore:
         count = 0
         for md_file in self.memory_dir.rglob("*.md"):
             try:
-                # Pass memory_dir so the actual file path is stored, not regenerated
+                # Compute relative path for this file
+                rel_path = str(md_file.relative_to(self.memory_dir))
+                # Delete any existing entries for this file path (handles ID changes)
+                self.db.delete_by_file([rel_path])
+                # Parse and index with stable ID
                 memory = Memory.from_file(md_file, relative_to=self.memory_dir)
                 self.db.upsert(
                     ids=[memory.id],