PyPI - ragtime-cli - Versions diffs - 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl - Mend

ragtime-cli 0.2.13py3-none-any.whl → 0.2.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/METADATA +1 -1
{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/RECORD +11 -10
src/cli.py +14 -9
src/db.py +194 -4
src/feedback.py +202 -0
src/indexers/docs.py +197 -19
src/mcp_server.py +133 -14
{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/WHEEL +0 -0
{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/entry_points.txt +0 -0
{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/licenses/LICENSE +0 -0
{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/top_level.txt +0 -0

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragtime-cli
-Version: 0.2.13
+Version: 0.2.15
 Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
 Author-email: Bret Martineau <bretwardjames@gmail.com>
 License-Expression: MIT

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,10 @@
-ragtime_cli-0.2.13.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
+ragtime_cli-0.2.15.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
 src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-src/cli.py,sha256=jVkl3MuttkqphMq3T_jfg49Lf_fdTHVDoQ5YDchygk4,76659
+src/cli.py,sha256=RmH3M9NvZcIO4sjYgrEJJrD-mn2mcK4dPyqaBxrhdeU,76984
 src/config.py,sha256=tQ6gPLr4ksn2bJPIUjtELFr-k01Eg4g-LDo3GNE6P0Q,4600
-src/db.py,sha256=ueSThFXkhI5MFwXICkNW3zqCawGDi3kqFQnbm4st_Ew,8186
-src/mcp_server.py,sha256=NBZaqVwYfuXxKEZp9alis04IE2CZoofkoJzRTs6qExw,21559
+src/db.py,sha256=eWqFGrg3O6hve67EzRJGcAsIpYxWJo4JlrAtlZUUA_s,15169
+src/feedback.py,sha256=cPw_lzusZZPvkgUxs_eV67NtV1FoCfTXUulBPnD78lo,6455
+src/mcp_server.py,sha256=QHU8jtPdA-kEzoXj88ZM0XhFvwhIngKD8Ow4plvHBfM,26498
 src/memory.py,sha256=UiHyudKbseMMY-sdcaDSfVBMGj6sFXXw1GxBsZ7nuBc,18450
 src/commands/audit.md,sha256=Xkucm-gfBIMalK9wf7NBbyejpsqBTUAGGlb7GxMtMPY,5137
 src/commands/create-pr.md,sha256=u6-jVkDP_6bJQp6ImK039eY9F6B9E2KlAVlvLY-WV6Q,9483
@@ -17,9 +18,9 @@ src/commands/save.md,sha256=7gTpW46AU9Y4l8XVZ8f4h1sEdBfVqIRA7hlidUxMAC4,251
 src/commands/start.md,sha256=qoqhkMgET74DBx8YPIT1-wqCiVBUDxlmevigsCinHSY,6506
 src/indexers/__init__.py,sha256=MYoCPZUpHakMX1s2vWnc9shjWfx_X1_0JzUhpKhnKUQ,454
 src/indexers/code.py,sha256=G2TbiKbWj0e7DV5KsU8-Ggw6ziDb4zTuZ4Bu3ryV4g8,18059
-src/indexers/docs.py,sha256=nyewQ4Ug4SCuhne4TuLDlUDzz9GH2STInddj81ocz50,3555
-ragtime_cli-0.2.13.dist-info/METADATA,sha256=vHivVa4-y0GQ01--zYlzHpUInn59RQsPw8J1ItYAUI0,11269
-ragtime_cli-0.2.13.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-ragtime_cli-0.2.13.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
-ragtime_cli-0.2.13.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
-ragtime_cli-0.2.13.dist-info/RECORD,,
+src/indexers/docs.py,sha256=Q8krHYw0bybUyZaq1sJ0r6Fv-I_6BjTufhqI1eg_25s,9992
+ragtime_cli-0.2.15.dist-info/METADATA,sha256=J0tETjffr7XYMo3VmUwtm6SqUUnsuPVkrNpw5VYcgd8,11269
+ragtime_cli-0.2.15.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+ragtime_cli-0.2.15.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
+ragtime_cli-0.2.15.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
+ragtime_cli-0.2.15.dist-info/RECORD,,

src/cli.py CHANGED Viewed

@@ -381,13 +381,13 @@ def index(path: Path, index_type: str, clear: bool):
                     item_show_func=lambda f: f.name[:30] if f else "",
                 ) as files:
                     for file_path in files:
-                        entry = index_doc_file(file_path)
-                        if entry:
-                            entries.append(entry)
+                        # index_doc_file returns list (hierarchical chunks)
+                        file_entries = index_doc_file(file_path)
+                        entries.extend(file_entries)
                 if entries:
                     _upsert_entries(db, entries, "docs")
-                    click.echo(f"  Indexed {len(entries)} documents")
+                    click.echo(f"  Indexed {len(entries)} document chunks")
             elif not to_delete:
                 click.echo("  All docs up to date")
         else:
@@ -470,17 +470,21 @@ def index(path: Path, index_type: str, clear: bool):
 @click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
 @click.option("--namespace", "-n", help="Filter by namespace")
 @click.option("--require", "-r", "require_terms", multiple=True,
-              help="Terms that MUST appear in results (repeatable)")
+              help="Additional terms that MUST appear (usually auto-detected)")
+@click.option("--raw", is_flag=True, help="Disable auto-detection of qualifiers")
 @click.option("--include-archive", is_flag=True, help="Also search archived branches")
 @click.option("--limit", "-l", default=5, help="Max results")
 @click.option("--verbose", "-v", is_flag=True, help="Show full content")
 def search(query: str, path: Path, type_filter: str, namespace: str,
-           require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
+           require_terms: tuple, raw: bool, include_archive: bool, limit: int, verbose: bool):
     """
-    Hybrid search: semantic similarity + keyword filtering.
+    Smart search: auto-detects qualifiers like 'mobile', 'auth', 'dart'.
-    Use --require/-r to ensure specific terms appear in results.
-    Example: ragtime search "error handling" -r mobile -r dart
+    \b
+    Examples:
+      ragtime search "error handling in mobile"  # auto-requires 'mobile'
+      ragtime search "auth flow"                 # auto-requires 'auth'
+      ragtime search "useAsyncState" --raw       # literal search, no extraction
     """
     path = Path(path).resolve()
     db = get_db(path)
@@ -493,6 +497,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
         type_filter=type_arg,
         namespace=namespace,
         require_terms=list(require_terms) if require_terms else None,
+        auto_extract=not raw,
     )
     if not results:

src/db.py CHANGED Viewed

@@ -4,12 +4,74 @@ ChromaDB wrapper for ragtime.
 Handles storage and retrieval of indexed documents and code.
 """
+import re
 from pathlib import Path
 from typing import Any
 import chromadb
 from chromadb.config import Settings
+def extract_query_hints(query: str, known_components: list[str] | None = None) -> tuple[str, list[str]]:
+    """
+    Extract component/scope hints from a query for hybrid search.
+    Detects patterns like "X in mobile", "mobile X", "X for auth" and extracts
+    the qualifier to use as require_terms. This prevents qualifiers from being
+    diluted in semantic search.
+    Args:
+        query: The natural language search query
+        known_components: Optional list of known component names to detect
+    Returns:
+        (cleaned_query, extracted_terms) - query with hints removed, terms to require
+    """
+    # Default known components/scopes (common patterns)
+    default_components = [
+        # Platforms
+        "mobile", "web", "desktop", "ios", "android", "flutter", "react", "vue",
+        # Languages
+        "dart", "python", "typescript", "javascript", "ts", "js", "py",
+        # Common components
+        "auth", "authentication", "api", "database", "db", "ui", "frontend", "backend",
+        "server", "client", "admin", "user", "payment", "billing", "notification",
+        "email", "cache", "queue", "worker", "scheduler", "logging", "metrics",
+    ]
+    components = set(c.lower() for c in (known_components or default_components))
+    extracted = []
+    cleaned = query
+    # Pattern 1: "X in/for/on {component}" - extract component
+    patterns = [
+        r'\b(?:in|for|on|from|using|with)\s+(?:the\s+)?(\w+)\s*(?:app|code|module|service|codebase)?(?:\s|$)',
+        r'\b(\w+)\s+(?:app|code|module|service|codebase)\b',
+    ]
+    for pattern in patterns:
+        for match in re.finditer(pattern, query, re.IGNORECASE):
+            word = match.group(1).lower()
+            if word in components:
+                extracted.append(word)
+                # Remove the matched phrase from query
+                cleaned = cleaned[:match.start()] + " " + cleaned[match.end():]
+    # Pattern 2: Check if any known component appears as standalone word
+    words = re.findall(r'\b\w+\b', query.lower())
+    for word in words:
+        if word in components and word not in extracted:
+            # Only extract if it looks like a qualifier (not the main subject)
+            # Heuristic: if query has other meaningful words, it's likely a qualifier
+            other_words = [w for w in words if w != word and len(w) > 3]
+            if len(other_words) >= 2:
+                extracted.append(word)
+    # Clean up extra whitespace
+    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    return cleaned, list(set(extracted))
 class RagtimeDB:
     """Vector database for ragtime indexes."""
@@ -85,6 +147,7 @@ class RagtimeDB:
         type_filter: str | None = None,
         namespace: str | None = None,
         require_terms: list[str] | None = None,
+        auto_extract: bool = True,
         **filters,
     ) -> list[dict]:
         """
@@ -98,11 +161,26 @@ class RagtimeDB:
             require_terms: List of terms that MUST appear in results (case-insensitive).
                           Use for scoped queries like "error handling in mobile" with
                           require_terms=["mobile"] to ensure "mobile" isn't ignored.
+            auto_extract: If True (default), automatically detect component qualifiers
+                         in the query and add them to require_terms. Set to False
+                         for raw/literal search.
             **filters: Additional metadata filters (None values are ignored)
         Returns:
             List of dicts with 'content', 'metadata', 'distance'
         """
+        # Auto-extract component hints from query if enabled
+        search_query = query
+        all_require_terms = list(require_terms) if require_terms else []
+        if auto_extract:
+            cleaned_query, extracted = extract_query_hints(query)
+            if extracted:
+                # Use cleaned query for embedding (removes noise)
+                search_query = cleaned_query
+                # Add extracted terms to require_terms
+                all_require_terms.extend(extracted)
+                all_require_terms = list(set(all_require_terms))  # dedupe
         # Build list of filter conditions, excluding None values
         conditions = []
@@ -126,10 +204,10 @@ class RagtimeDB:
             where = {"$and": conditions}
         # When using require_terms, fetch more results since we'll filter some out
-        fetch_limit = limit * 5 if require_terms else limit
+        fetch_limit = limit * 5 if all_require_terms else limit
         results = self.collection.query(
-            query_texts=[query],
+            query_texts=[search_query],
             n_results=fetch_limit,
             where=where,
         )
@@ -139,13 +217,13 @@ class RagtimeDB:
         if results["documents"] and results["documents"][0]:
             for i, doc in enumerate(results["documents"][0]):
                 # Hybrid filtering: ensure required terms appear
-                if require_terms:
+                if all_require_terms:
                     doc_lower = doc.lower()
                     # Also check file path in metadata for code/file matches
                     file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
                     combined_text = f"{doc_lower} {file_path}"
-                    if not all(term.lower() in combined_text for term in require_terms):
+                    if not all(term.lower() in combined_text for term in all_require_terms):
                         continue
                 output.append({
@@ -160,6 +238,118 @@ class RagtimeDB:
         return output
+    def search_tiered(
+        self,
+        query: str,
+        limit: int = 10,
+        namespace: str | None = None,
+        require_terms: list[str] | None = None,
+        auto_extract: bool = True,
+        **filters,
+    ) -> list[dict]:
+        """
+        Tiered search: prioritizes memories > docs > code.
+        Searches in priority order, filling up to limit:
+        1. Memories (curated, high-signal knowledge)
+        2. Documentation (indexed markdown)
+        3. Code (broadest, implementation details)
+        Args:
+            query: Natural language search query
+            limit: Max total results to return
+            namespace: Filter by namespace
+            require_terms: Terms that MUST appear in results
+            auto_extract: Auto-detect qualifiers from query
+            **filters: Additional metadata filters
+        Returns:
+            List of dicts with 'content', 'metadata', 'distance', 'tier'
+        """
+        results = []
+        # Tier 1: Memories (not docs or code)
+        memory_results = self._search_tier(
+            query=query,
+            tier_name="memory",
+            exclude_types=["docs", "code"],
+            limit=limit,
+            namespace=namespace,
+            require_terms=require_terms,
+            auto_extract=auto_extract,
+            **filters,
+        )
+        results.extend(memory_results)
+        # Tier 2: Documentation
+        if len(results) < limit:
+            doc_results = self._search_tier(
+                query=query,
+                tier_name="docs",
+                type_filter="docs",
+                limit=limit - len(results),
+                namespace=namespace,
+                require_terms=require_terms,
+                auto_extract=auto_extract,
+                **filters,
+            )
+            results.extend(doc_results)
+        # Tier 3: Code
+        if len(results) < limit:
+            code_results = self._search_tier(
+                query=query,
+                tier_name="code",
+                type_filter="code",
+                limit=limit - len(results),
+                namespace=namespace,
+                require_terms=require_terms,
+                auto_extract=auto_extract,
+                **filters,
+            )
+            results.extend(code_results)
+        return results
+    def _search_tier(
+        self,
+        query: str,
+        tier_name: str,
+        limit: int,
+        type_filter: str | None = None,
+        exclude_types: list[str] | None = None,
+        **kwargs,
+    ) -> list[dict]:
+        """Search a single tier and tag results."""
+        # Build where clause for exclusion if needed
+        if exclude_types:
+            # Search without type filter, then exclude in post-processing
+            results = self.search(
+                query=query,
+                limit=limit * 2,  # fetch more since we'll filter
+                type_filter=None,
+                **kwargs,
+            )
+            # Filter out excluded types
+            filtered = []
+            for r in results:
+                if r["metadata"].get("type") not in exclude_types:
+                    r["tier"] = tier_name
+                    filtered.append(r)
+                    if len(filtered) >= limit:
+                        break
+            return filtered
+        else:
+            results = self.search(
+                query=query,
+                limit=limit,
+                type_filter=type_filter,
+                **kwargs,
+            )
+            for r in results:
+                r["tier"] = tier_name
+            return results
     def delete(self, ids: list[str]) -> None:
         """Delete documents by ID."""
         self.collection.delete(ids=ids)

src/feedback.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""
+Feedback loop for RAG result quality improvement.
+Tracks which search results are actually used/referenced by Claude,
+enabling re-ranking and quality improvements over time.
+"""
+import json
+from pathlib import Path
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from typing import Optional
+@dataclass
+class SearchFeedback:
+    """Feedback for a single search result."""
+    query: str
+    result_id: str  # ChromaDB document ID
+    result_file: str  # File path for easier debugging
+    action: str  # "used", "referenced", "ignored", "helpful", "not_helpful"
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    session_id: Optional[str] = None  # Group related searches
+    position: int = 0  # Position in results (1-indexed)
+    distance: float = 0.0  # Original semantic distance
+class FeedbackStore:
+    """
+    Simple file-based feedback storage.
+    Stores feedback as JSON lines for easy analysis.
+    Can be upgraded to SQLite or ChromaDB later.
+    """
+    def __init__(self, path: Path):
+        """
+        Initialize feedback store.
+        Args:
+            path: Directory to store feedback data
+        """
+        self.path = path
+        self.feedback_file = path / "feedback.jsonl"
+        self.stats_file = path / "feedback_stats.json"
+        path.mkdir(parents=True, exist_ok=True)
+    def record(self, feedback: SearchFeedback) -> None:
+        """Record a single feedback entry."""
+        with open(self.feedback_file, "a") as f:
+            f.write(json.dumps(asdict(feedback)) + "\n")
+    def record_usage(
+        self,
+        query: str,
+        result_id: str,
+        result_file: str,
+        position: int = 0,
+        distance: float = 0.0,
+        session_id: Optional[str] = None,
+    ) -> None:
+        """Convenience method to record when a result is used."""
+        self.record(SearchFeedback(
+            query=query,
+            result_id=result_id,
+            result_file=result_file,
+            action="used",
+            position=position,
+            distance=distance,
+            session_id=session_id,
+        ))
+    def record_batch(
+        self,
+        query: str,
+        used_ids: list[str],
+        all_results: list[dict],
+        session_id: Optional[str] = None,
+    ) -> None:
+        """
+        Record feedback for a batch of results.
+        Marks used_ids as "used" and others as "ignored".
+        """
+        used_set = set(used_ids)
+        for i, result in enumerate(all_results):
+            result_id = result.get("id", "")
+            result_file = result.get("metadata", {}).get("file", "")
+            distance = result.get("distance", 0.0)
+            action = "used" if result_id in used_set else "ignored"
+            self.record(SearchFeedback(
+                query=query,
+                result_id=result_id,
+                result_file=result_file,
+                action=action,
+                position=i + 1,
+                distance=distance,
+                session_id=session_id,
+            ))
+    def get_usage_stats(self) -> dict:
+        """
+        Get aggregated usage statistics.
+        Returns:
+            Dict with usage counts, popular files, etc.
+        """
+        if not self.feedback_file.exists():
+            return {"total": 0, "used": 0, "ignored": 0}
+        stats = {
+            "total": 0,
+            "used": 0,
+            "ignored": 0,
+            "helpful": 0,
+            "not_helpful": 0,
+            "files_used": {},  # file -> count
+            "avg_position_used": 0.0,
+        }
+        positions = []
+        with open(self.feedback_file) as f:
+            for line in f:
+                if not line.strip():
+                    continue
+                try:
+                    entry = json.loads(line)
+                    stats["total"] += 1
+                    action = entry.get("action", "")
+                    if action == "used":
+                        stats["used"] += 1
+                        positions.append(entry.get("position", 0))
+                        file_path = entry.get("result_file", "")
+                        stats["files_used"][file_path] = stats["files_used"].get(file_path, 0) + 1
+                    elif action == "ignored":
+                        stats["ignored"] += 1
+                    elif action == "helpful":
+                        stats["helpful"] += 1
+                    elif action == "not_helpful":
+                        stats["not_helpful"] += 1
+                except json.JSONDecodeError:
+                    continue
+        if positions:
+            stats["avg_position_used"] = sum(positions) / len(positions)
+        return stats
+    def get_boost_scores(self) -> dict[str, float]:
+        """
+        Calculate boost scores for files based on historical usage.
+        Returns:
+            Dict mapping file paths to boost multipliers (1.0 = no boost).
+        """
+        stats = self.get_usage_stats()
+        files_used = stats.get("files_used", {})
+        if not files_used:
+            return {}
+        # Normalize to 0-1 range, then convert to boost multiplier
+        max_count = max(files_used.values())
+        boosts = {}
+        for file_path, count in files_used.items():
+            # Boost range: 1.0 (no boost) to 1.5 (50% boost for most-used)
+            normalized = count / max_count
+            boosts[file_path] = 1.0 + (normalized * 0.5)
+        return boosts
+    def apply_boosts(self, results: list[dict], boosts: dict[str, float]) -> list[dict]:
+        """
+        Apply historical boost scores to search results.
+        Adjusts distances based on historical usage patterns.
+        Lower distance = more relevant, so we divide by boost.
+        """
+        if not boosts:
+            return results
+        for result in results:
+            file_path = result.get("metadata", {}).get("file", "")
+            boost = boosts.get(file_path, 1.0)
+            if "distance" in result and result["distance"]:
+                # Reduce distance for frequently-used files
+                result["distance"] = result["distance"] / boost
+                result["boosted"] = boost > 1.0
+        # Re-sort by adjusted distance
+        return sorted(results, key=lambda r: r.get("distance", float("inf")))
+    def clear(self) -> None:
+        """Clear all feedback data."""
+        if self.feedback_file.exists():
+            self.feedback_file.unlink()

src/indexers/docs.py CHANGED Viewed

@@ -21,6 +21,10 @@ class DocEntry:
     component: str | None = None
     title: str | None = None
     mtime: float | None = None  # File modification time for incremental indexing
+    # Hierarchical chunking fields
+    section_path: str | None = None  # e.g., "Installation > Configuration > Environment Variables"
+    section_level: int = 0  # Header depth (0=whole doc, 1=h1, 2=h2, etc.)
+    chunk_index: int = 0  # Position within file (for stable IDs)
     def to_metadata(self) -> dict:
         """Convert to ChromaDB metadata dict."""
@@ -32,6 +36,8 @@ class DocEntry:
             "component": self.component or "",
             "title": self.title or Path(self.file_path).stem,
             "mtime": self.mtime or 0.0,
+            "section_path": self.section_path or "",
+            "section_level": self.section_level,
         }
@@ -56,33 +62,200 @@ def parse_frontmatter(content: str) -> tuple[dict, str]:
         return {}, content
-def index_file(file_path: Path) -> DocEntry | None:
+@dataclass
+class Section:
+    """A markdown section for hierarchical chunking."""
+    title: str
+    level: int  # 1-6 for h1-h6
+    content: str
+    line_start: int
+    parent_path: list[str]  # Parent headers for context
+def chunk_by_headers(
+    content: str,
+    min_chunk_size: int = 100,
+    max_chunk_size: int = 2000,
+) -> list[Section]:
+    """
+    Split markdown into sections by headers, preserving hierarchy.
+    Args:
+        content: Markdown body (without frontmatter)
+        min_chunk_size: Minimum chars to make a standalone section
+        max_chunk_size: Maximum chars before splitting further
+    Returns:
+        List of Section objects with hierarchical context
+    """
+    lines = content.split('\n')
+    sections: list[Section] = []
+    header_stack: list[tuple[int, str]] = []  # (level, title) for building paths
+    current_section_lines: list[str] = []
+    current_section_start = 0
+    current_title = ""
+    current_level = 0
+    def flush_section():
+        """Save accumulated lines as a section."""
+        nonlocal current_section_lines, current_section_start, current_title, current_level
+        text = '\n'.join(current_section_lines).strip()
+        if text:
+            # Build parent path from stack (excluding current)
+            parent_path = [h[1] for h in header_stack[:-1]] if header_stack else []
+            sections.append(Section(
+                title=current_title or "Introduction",
+                level=current_level,
+                content=text,
+                line_start=current_section_start,
+                parent_path=parent_path,
+            ))
+        current_section_lines = []
+    for i, line in enumerate(lines):
+        # Detect markdown headers
+        header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
+        if header_match:
+            # Save previous section
+            flush_section()
+            level = len(header_match.group(1))
+            title = header_match.group(2).strip()
+            # Update header stack - pop headers at same or lower level
+            while header_stack and header_stack[-1][0] >= level:
+                header_stack.pop()
+            header_stack.append((level, title))
+            current_title = title
+            current_level = level
+            current_section_start = i
+            current_section_lines = [line]  # Include header in content
+        else:
+            current_section_lines.append(line)
+    # Don't forget the last section
+    flush_section()
+    # Post-process: merge tiny sections into parents, split huge ones
+    processed: list[Section] = []
+    for section in sections:
+        if len(section.content) < min_chunk_size and processed:
+            # Merge into previous section
+            processed[-1].content += '\n\n' + section.content
+        elif len(section.content) > max_chunk_size:
+            # Split by paragraphs
+            paragraphs = re.split(r'\n\n+', section.content)
+            current_chunk = ""
+            chunk_num = 0
+            for para in paragraphs:
+                if len(current_chunk) + len(para) > max_chunk_size and current_chunk:
+                    processed.append(Section(
+                        title=f"{section.title} (part {chunk_num + 1})",
+                        level=section.level,
+                        content=current_chunk.strip(),
+                        line_start=section.line_start,
+                        parent_path=section.parent_path,
+                    ))
+                    current_chunk = para
+                    chunk_num += 1
+                else:
+                    current_chunk += '\n\n' + para if current_chunk else para
+            if current_chunk.strip():
+                title = f"{section.title} (part {chunk_num + 1})" if chunk_num > 0 else section.title
+                processed.append(Section(
+                    title=title,
+                    level=section.level,
+                    content=current_chunk.strip(),
+                    line_start=section.line_start,
+                    parent_path=section.parent_path,
+                ))
+        else:
+            processed.append(section)
+    return processed
+def index_file(file_path: Path, hierarchical: bool = True) -> list[DocEntry]:
     """
-    Parse a single markdown file into a DocEntry.
+    Parse a single markdown file into DocEntry objects.
+    Args:
+        file_path: Path to the markdown file
+        hierarchical: If True, chunk by headers for better semantic search.
+                     If False, return whole file as single entry.
-    Returns None if file can't be parsed.
+    Returns:
+        List of DocEntry objects (one per section if hierarchical, else one for whole file).
+        Empty list if file can't be parsed.
     """
     try:
         content = file_path.read_text(encoding='utf-8')
         mtime = os.path.getmtime(file_path)
     except (IOError, UnicodeDecodeError, OSError):
-        return None
+        return []
     metadata, body = parse_frontmatter(content)
     # Skip empty documents
     if not body.strip():
-        return None
+        return []
-    return DocEntry(
-        content=body.strip(),
-        file_path=str(file_path),
-        namespace=metadata.get("namespace"),
-        category=metadata.get("category"),
-        component=metadata.get("component"),
-        title=metadata.get("title"),
-        mtime=mtime,
-    )
+    # Base metadata from frontmatter
+    base_namespace = metadata.get("namespace")
+    base_category = metadata.get("category")
+    base_component = metadata.get("component")
+    base_title = metadata.get("title") or file_path.stem
+    # Short docs: return as single entry
+    if not hierarchical or len(body) < 500:
+        return [DocEntry(
+            content=body.strip(),
+            file_path=str(file_path),
+            namespace=base_namespace,
+            category=base_category,
+            component=base_component,
+            title=base_title,
+            mtime=mtime,
+            section_path="",
+            section_level=0,
+            chunk_index=0,
+        )]
+    # Hierarchical chunking for longer docs
+    sections = chunk_by_headers(body)
+    entries = []
+    for i, section in enumerate(sections):
+        # Build full section path: "Parent > Child > Current"
+        path_parts = section.parent_path + [section.title]
+        section_path = " > ".join(path_parts)
+        # Prepend context for better embeddings
+        context_prefix = f"# {base_title}\n"
+        if section.parent_path:
+            context_prefix += f"Section: {' > '.join(section.parent_path)}\n\n"
+        entries.append(DocEntry(
+            content=context_prefix + section.content,
+            file_path=str(file_path),
+            namespace=base_namespace,
+            category=base_category,
+            component=base_component,
+            title=section.title,
+            mtime=mtime,
+            section_path=section_path,
+            section_level=section.level,
+            chunk_index=i,
+        ))
+    return entries
 def discover_docs(
@@ -117,18 +290,23 @@ def discover_docs(
     return files
-def index_directory(root: Path, **kwargs) -> list[DocEntry]:
+def index_directory(root: Path, hierarchical: bool = True, **kwargs) -> list[DocEntry]:
     """
     Index all markdown files in a directory.
-    Returns list of DocEntry objects ready for vector DB.
+    Args:
+        root: Directory to search
+        hierarchical: If True, chunk long docs by headers
+        **kwargs: Passed to discover_docs (patterns, exclude)
+    Returns:
+        List of DocEntry objects ready for vector DB.
     """
     files = discover_docs(root, **kwargs)
     entries = []
     for file_path in files:
-        entry = index_file(file_path)
-        if entry:
-            entries.append(entry)
+        file_entries = index_file(file_path, hierarchical=hierarchical)
+        entries.extend(file_entries)
     return entries

src/mcp_server.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import Any
 from .db import RagtimeDB
 from .memory import Memory, MemoryStore
+from .feedback import FeedbackStore, SearchFeedback
 class RagtimeMCPServer:
@@ -28,6 +29,7 @@ class RagtimeMCPServer:
         self.project_path = project_path or Path.cwd()
         self._db = None
         self._store = None
+        self._feedback = None
     @property
     def db(self) -> RagtimeDB:
@@ -44,6 +46,14 @@ class RagtimeMCPServer:
             self._store = MemoryStore(self.project_path, self.db)
         return self._store
+    @property
+    def feedback(self) -> FeedbackStore:
+        """Lazy-load the feedback store."""
+        if self._feedback is None:
+            feedback_path = self.project_path / ".ragtime" / "feedback"
+            self._feedback = FeedbackStore(feedback_path)
+        return self._feedback
     def get_author(self) -> str:
         """Get the current developer's username."""
         try:
@@ -132,13 +142,18 @@ class RagtimeMCPServer:
             },
             {
                 "name": "search",
-                "description": "Hybrid search over indexed code and docs (semantic + keyword). Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
+                "description": "Smart hybrid search over indexed content. Auto-detects qualifiers like 'mobile', 'auth', 'dart' and ensures they appear in results. Use tiered=true for priority ordering (memories > docs > code). Returns summaries with file paths - use Read tool for full implementations.",
                 "inputSchema": {
                     "type": "object",
                     "properties": {
                         "query": {
                             "type": "string",
-                            "description": "Natural language search query"
+                            "description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected."
+                        },
+                        "tiered": {
+                            "type": "boolean",
+                            "default": False,
+                            "description": "If true, search in priority order: memories (curated) > docs > code. Good for conceptual queries."
                         },
                         "namespace": {
                             "type": "string",
@@ -146,7 +161,7 @@ class RagtimeMCPServer:
                         },
                         "type": {
                             "type": "string",
-                            "description": "Filter by type (docs, code, architecture, etc.)"
+                            "description": "Filter by type (docs, code, architecture, etc.). Ignored if tiered=true."
                         },
                         "component": {
                             "type": "string",
@@ -155,7 +170,12 @@ class RagtimeMCPServer:
                         "require_terms": {
                             "type": "array",
                             "items": {"type": "string"},
-                            "description": "Terms that MUST appear in results (case-insensitive). Use for scoped queries like 'error handling in mobile' with require_terms=['mobile'] to ensure the qualifier isn't lost in semantic search."
+                            "description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected."
+                        },
+                        "auto_extract": {
+                            "type": "boolean",
+                            "default": True,
+                            "description": "Auto-detect component qualifiers from query. Set to false for literal search."
                         },
                         "limit": {
                             "type": "integer",
@@ -287,6 +307,42 @@ class RagtimeMCPServer:
                     },
                     "required": ["memory_id", "status"]
                 }
+            },
+            {
+                "name": "record_feedback",
+                "description": "Record feedback when search results are used or referenced. Call this after using a search result to improve future rankings.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The original search query"
+                        },
+                        "result_file": {
+                            "type": "string",
+                            "description": "File path of the result that was used"
+                        },
+                        "action": {
+                            "type": "string",
+                            "enum": ["used", "referenced", "helpful", "not_helpful"],
+                            "default": "used",
+                            "description": "What happened with this result"
+                        },
+                        "position": {
+                            "type": "integer",
+                            "description": "Position in search results (1-indexed)"
+                        }
+                    },
+                    "required": ["query", "result_file"]
+                }
+            },
+            {
+                "name": "feedback_stats",
+                "description": "Get statistics about search result usage patterns",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {}
+                }
             }
         ]
@@ -308,6 +364,10 @@ class RagtimeMCPServer:
             return self._graduate(arguments)
         elif name == "update_status":
             return self._update_status(arguments)
+        elif name == "record_feedback":
+            return self._record_feedback(arguments)
+        elif name == "feedback_stats":
+            return self._feedback_stats(arguments)
         else:
             raise ValueError(f"Unknown tool: {name}")
@@ -338,23 +398,43 @@ class RagtimeMCPServer:
         }
     def _search(self, args: dict) -> dict:
-        """Search indexed content with hybrid semantic + keyword matching."""
-        results = self.db.search(
-            query=args["query"],
-            limit=args.get("limit", 10),
-            namespace=args.get("namespace"),
-            type_filter=args.get("type"),
-            component=args.get("component"),
-            require_terms=args.get("require_terms"),
-        )
+        """Search indexed content with smart query understanding."""
+        if args.get("tiered", False):
+            # Tiered search: memories > docs > code
+            results = self.db.search_tiered(
+                query=args["query"],
+                limit=args.get("limit", 10),
+                namespace=args.get("namespace"),
+                require_terms=args.get("require_terms"),
+                auto_extract=args.get("auto_extract", True),
+                component=args.get("component"),
+            )
+        else:
+            results = self.db.search(
+                query=args["query"],
+                limit=args.get("limit", 10),
+                namespace=args.get("namespace"),
+                type_filter=args.get("type"),
+                component=args.get("component"),
+                require_terms=args.get("require_terms"),
+                auto_extract=args.get("auto_extract", True),
+            )
+        # Apply feedback-based boosts
+        boosts = self.feedback.get_boost_scores()
+        if boosts:
+            results = self.feedback.apply_boosts(results, boosts)
         return {
             "count": len(results),
+            "query": args["query"],
             "results": [
                 {
                     "content": r["content"],
                     "metadata": r["metadata"],
                     "score": 1 - r["distance"] if r["distance"] else None,
+                    "boosted": r.get("boosted", False),
+                    "tier": r.get("tier"),  # For tiered search
                 }
                 for r in results
             ]
@@ -479,6 +559,45 @@ class RagtimeMCPServer:
             "status": args["status"],
         }
+    def _record_feedback(self, args: dict) -> dict:
+        """Record feedback for a search result."""
+        feedback = SearchFeedback(
+            query=args["query"],
+            result_id="",  # We match by file path
+            result_file=args["result_file"],
+            action=args.get("action", "used"),
+            position=args.get("position", 0),
+        )
+        self.feedback.record(feedback)
+        return {
+            "success": True,
+            "query": args["query"],
+            "result_file": args["result_file"],
+            "action": feedback.action,
+        }
+    def _feedback_stats(self, args: dict) -> dict:
+        """Get feedback statistics."""
+        stats = self.feedback.get_usage_stats()
+        boosts = self.feedback.get_boost_scores()
+        # Get top boosted files
+        top_files = sorted(boosts.items(), key=lambda x: x[1], reverse=True)[:10]
+        return {
+            "total_feedback": stats["total"],
+            "results_used": stats["used"],
+            "results_ignored": stats["ignored"],
+            "helpful_count": stats["helpful"],
+            "not_helpful_count": stats["not_helpful"],
+            "avg_position_used": round(stats["avg_position_used"], 2),
+            "top_boosted_files": [
+                {"file": f, "boost": round(b, 2)} for f, b in top_files
+            ],
+        }
     def handle_message(self, message: dict) -> dict:
         """Handle an incoming JSON-RPC message."""
         method = message.get("method")
@@ -493,7 +612,7 @@ class RagtimeMCPServer:
                         "protocolVersion": "2024-11-05",
                         "serverInfo": {
                             "name": "ragtime",
-                            "version": "0.2.13",
+                            "version": "0.2.15",
                         },
                         "capabilities": {
                             "tools": {},

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/WHEEL RENAMED Viewed

File without changes

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ragtime_cli-0.2.13.dist-info → ragtime_cli-0.2.15.dist-info}/top_level.txt RENAMED Viewed

File without changes

ragtime-cli 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl

ragtime-cli 0.2.13py3-none-any.whl → 0.2.15py3-none-any.whl