ragtime-cli 0.2.12__tar.gz → 0.2.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragtime_cli-0.2.12/ragtime_cli.egg-info → ragtime_cli-0.2.14}/PKG-INFO +1 -1
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/pyproject.toml +1 -1
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14/ragtime_cli.egg-info}/PKG-INFO +1 -1
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/cli.py +50 -24
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/db.py +82 -4
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/mcp_server.py +11 -5
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/memory.py +71 -14
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/LICENSE +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/README.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/ragtime_cli.egg-info/SOURCES.txt +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/ragtime_cli.egg-info/dependency_links.txt +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/ragtime_cli.egg-info/entry_points.txt +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/ragtime_cli.egg-info/requires.txt +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/ragtime_cli.egg-info/top_level.txt +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/setup.cfg +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/__init__.py +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/audit.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/create-pr.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/generate-docs.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/handoff.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/import-docs.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/pr-graduate.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/recall.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/remember.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/save.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/commands/start.md +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/config.py +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/indexers/__init__.py +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/indexers/code.py +0 -0
- {ragtime_cli-0.2.12 → ragtime_cli-0.2.14}/src/indexers/docs.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragtime-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.14
|
|
4
4
|
Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
|
|
5
5
|
Author-email: Bret Martineau <bretwardjames@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragtime-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.14
|
|
4
4
|
Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
|
|
5
5
|
Author-email: Bret Martineau <bretwardjames@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -470,17 +470,21 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
470
470
|
@click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
|
|
471
471
|
@click.option("--namespace", "-n", help="Filter by namespace")
|
|
472
472
|
@click.option("--require", "-r", "require_terms", multiple=True,
|
|
473
|
-
help="
|
|
473
|
+
help="Additional terms that MUST appear (usually auto-detected)")
|
|
474
|
+
@click.option("--raw", is_flag=True, help="Disable auto-detection of qualifiers")
|
|
474
475
|
@click.option("--include-archive", is_flag=True, help="Also search archived branches")
|
|
475
476
|
@click.option("--limit", "-l", default=5, help="Max results")
|
|
476
477
|
@click.option("--verbose", "-v", is_flag=True, help="Show full content")
|
|
477
478
|
def search(query: str, path: Path, type_filter: str, namespace: str,
|
|
478
|
-
require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
|
|
479
|
+
require_terms: tuple, raw: bool, include_archive: bool, limit: int, verbose: bool):
|
|
479
480
|
"""
|
|
480
|
-
|
|
481
|
+
Smart search: auto-detects qualifiers like 'mobile', 'auth', 'dart'.
|
|
481
482
|
|
|
482
|
-
|
|
483
|
-
|
|
483
|
+
\b
|
|
484
|
+
Examples:
|
|
485
|
+
ragtime search "error handling in mobile" # auto-requires 'mobile'
|
|
486
|
+
ragtime search "auth flow" # auto-requires 'auth'
|
|
487
|
+
ragtime search "useAsyncState" --raw # literal search, no extraction
|
|
484
488
|
"""
|
|
485
489
|
path = Path(path).resolve()
|
|
486
490
|
db = get_db(path)
|
|
@@ -493,6 +497,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
|
|
|
493
497
|
type_filter=type_arg,
|
|
494
498
|
namespace=namespace,
|
|
495
499
|
require_terms=list(require_terms) if require_terms else None,
|
|
500
|
+
auto_extract=not raw,
|
|
496
501
|
)
|
|
497
502
|
|
|
498
503
|
if not results:
|
|
@@ -736,47 +741,68 @@ def reindex(path: Path):
|
|
|
736
741
|
|
|
737
742
|
@main.command()
|
|
738
743
|
@click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
|
|
739
|
-
@click.option("--dry-run", is_flag=True, help="Show
|
|
744
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be removed")
|
|
740
745
|
def dedupe(path: Path, dry_run: bool):
|
|
741
|
-
"""
|
|
746
|
+
"""Clean up index: remove duplicates and orphaned entries.
|
|
742
747
|
|
|
743
|
-
|
|
744
|
-
|
|
748
|
+
- Removes duplicate entries (keeps one per file path)
|
|
749
|
+
- Removes orphaned entries (files that no longer exist on disk)
|
|
745
750
|
"""
|
|
746
751
|
path = Path(path).resolve()
|
|
747
752
|
db = get_db(path)
|
|
753
|
+
memory_dir = path / ".ragtime"
|
|
748
754
|
|
|
749
755
|
# Get all entries with their file paths
|
|
750
756
|
results = db.collection.get(include=["metadatas"])
|
|
751
757
|
|
|
752
|
-
# Group by file path
|
|
758
|
+
# Group by file path and track orphans
|
|
753
759
|
by_file: dict[str, list[str]] = {}
|
|
760
|
+
orphans: list[str] = []
|
|
761
|
+
|
|
754
762
|
for i, mem_id in enumerate(results["ids"]):
|
|
755
763
|
file_path = results["metadatas"][i].get("file", "")
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
764
|
+
entry_type = results["metadatas"][i].get("type", "")
|
|
765
|
+
|
|
766
|
+
# Skip docs/code entries - only clean up memory entries
|
|
767
|
+
if entry_type in ("docs", "code"):
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
if not file_path:
|
|
771
|
+
orphans.append(mem_id)
|
|
772
|
+
continue
|
|
760
773
|
|
|
761
|
-
|
|
762
|
-
|
|
774
|
+
# Check if file exists on disk
|
|
775
|
+
full_path = memory_dir / file_path
|
|
776
|
+
if not full_path.exists():
|
|
777
|
+
orphans.append(mem_id)
|
|
778
|
+
if dry_run:
|
|
779
|
+
click.echo(f" Orphan: {file_path} (file missing)")
|
|
780
|
+
continue
|
|
781
|
+
|
|
782
|
+
if file_path not in by_file:
|
|
783
|
+
by_file[file_path] = []
|
|
784
|
+
by_file[file_path].append(mem_id)
|
|
785
|
+
|
|
786
|
+
# Find duplicates (keep first, remove rest)
|
|
787
|
+
duplicates: list[str] = []
|
|
763
788
|
for file_path, ids in by_file.items():
|
|
764
789
|
if len(ids) > 1:
|
|
765
|
-
|
|
766
|
-
duplicates_to_remove.extend(ids[1:])
|
|
790
|
+
duplicates.extend(ids[1:])
|
|
767
791
|
if dry_run:
|
|
768
|
-
click.echo(f" {file_path}
|
|
792
|
+
click.echo(f" Duplicate: {file_path} ({len(ids)} copies, removing {len(ids) - 1})")
|
|
793
|
+
|
|
794
|
+
to_remove = orphans + duplicates
|
|
769
795
|
|
|
770
|
-
if not
|
|
771
|
-
click.echo("✓
|
|
796
|
+
if not to_remove:
|
|
797
|
+
click.echo("✓ Index is clean (no duplicates or orphans)")
|
|
772
798
|
return
|
|
773
799
|
|
|
774
800
|
if dry_run:
|
|
775
|
-
click.echo(f"\nWould remove {len(
|
|
801
|
+
click.echo(f"\nWould remove {len(orphans)} orphans + {len(duplicates)} duplicates = {len(to_remove)} entries")
|
|
776
802
|
click.echo("Run without --dry-run to remove them")
|
|
777
803
|
else:
|
|
778
|
-
db.delete(
|
|
779
|
-
click.echo(f"✓ Removed {len(
|
|
804
|
+
db.delete(to_remove)
|
|
805
|
+
click.echo(f"✓ Removed {len(orphans)} orphans + {len(duplicates)} duplicates = {len(to_remove)} entries")
|
|
780
806
|
|
|
781
807
|
|
|
782
808
|
@main.command("new-branch")
|
|
@@ -4,12 +4,74 @@ ChromaDB wrapper for ragtime.
|
|
|
4
4
|
Handles storage and retrieval of indexed documents and code.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import re
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
9
10
|
import chromadb
|
|
10
11
|
from chromadb.config import Settings
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def extract_query_hints(query: str, known_components: list[str] | None = None) -> tuple[str, list[str]]:
|
|
15
|
+
"""
|
|
16
|
+
Extract component/scope hints from a query for hybrid search.
|
|
17
|
+
|
|
18
|
+
Detects patterns like "X in mobile", "mobile X", "X for auth" and extracts
|
|
19
|
+
the qualifier to use as require_terms. This prevents qualifiers from being
|
|
20
|
+
diluted in semantic search.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
query: The natural language search query
|
|
24
|
+
known_components: Optional list of known component names to detect
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
(cleaned_query, extracted_terms) - query with hints removed, terms to require
|
|
28
|
+
"""
|
|
29
|
+
# Default known components/scopes (common patterns)
|
|
30
|
+
default_components = [
|
|
31
|
+
# Platforms
|
|
32
|
+
"mobile", "web", "desktop", "ios", "android", "flutter", "react", "vue",
|
|
33
|
+
# Languages
|
|
34
|
+
"dart", "python", "typescript", "javascript", "ts", "js", "py",
|
|
35
|
+
# Common components
|
|
36
|
+
"auth", "authentication", "api", "database", "db", "ui", "frontend", "backend",
|
|
37
|
+
"server", "client", "admin", "user", "payment", "billing", "notification",
|
|
38
|
+
"email", "cache", "queue", "worker", "scheduler", "logging", "metrics",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
components = set(c.lower() for c in (known_components or default_components))
|
|
42
|
+
extracted = []
|
|
43
|
+
cleaned = query
|
|
44
|
+
|
|
45
|
+
# Pattern 1: "X in/for/on {component}" - extract component
|
|
46
|
+
patterns = [
|
|
47
|
+
r'\b(?:in|for|on|from|using|with)\s+(?:the\s+)?(\w+)\s*(?:app|code|module|service|codebase)?(?:\s|$)',
|
|
48
|
+
r'\b(\w+)\s+(?:app|code|module|service|codebase)\b',
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
for pattern in patterns:
|
|
52
|
+
for match in re.finditer(pattern, query, re.IGNORECASE):
|
|
53
|
+
word = match.group(1).lower()
|
|
54
|
+
if word in components:
|
|
55
|
+
extracted.append(word)
|
|
56
|
+
# Remove the matched phrase from query
|
|
57
|
+
cleaned = cleaned[:match.start()] + " " + cleaned[match.end():]
|
|
58
|
+
|
|
59
|
+
# Pattern 2: Check if any known component appears as standalone word
|
|
60
|
+
words = re.findall(r'\b\w+\b', query.lower())
|
|
61
|
+
for word in words:
|
|
62
|
+
if word in components and word not in extracted:
|
|
63
|
+
# Only extract if it looks like a qualifier (not the main subject)
|
|
64
|
+
# Heuristic: if query has other meaningful words, it's likely a qualifier
|
|
65
|
+
other_words = [w for w in words if w != word and len(w) > 3]
|
|
66
|
+
if len(other_words) >= 2:
|
|
67
|
+
extracted.append(word)
|
|
68
|
+
|
|
69
|
+
# Clean up extra whitespace
|
|
70
|
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
|
71
|
+
|
|
72
|
+
return cleaned, list(set(extracted))
|
|
73
|
+
|
|
74
|
+
|
|
13
75
|
class RagtimeDB:
|
|
14
76
|
"""Vector database for ragtime indexes."""
|
|
15
77
|
|
|
@@ -85,6 +147,7 @@ class RagtimeDB:
|
|
|
85
147
|
type_filter: str | None = None,
|
|
86
148
|
namespace: str | None = None,
|
|
87
149
|
require_terms: list[str] | None = None,
|
|
150
|
+
auto_extract: bool = True,
|
|
88
151
|
**filters,
|
|
89
152
|
) -> list[dict]:
|
|
90
153
|
"""
|
|
@@ -98,11 +161,26 @@ class RagtimeDB:
|
|
|
98
161
|
require_terms: List of terms that MUST appear in results (case-insensitive).
|
|
99
162
|
Use for scoped queries like "error handling in mobile" with
|
|
100
163
|
require_terms=["mobile"] to ensure "mobile" isn't ignored.
|
|
164
|
+
auto_extract: If True (default), automatically detect component qualifiers
|
|
165
|
+
in the query and add them to require_terms. Set to False
|
|
166
|
+
for raw/literal search.
|
|
101
167
|
**filters: Additional metadata filters (None values are ignored)
|
|
102
168
|
|
|
103
169
|
Returns:
|
|
104
170
|
List of dicts with 'content', 'metadata', 'distance'
|
|
105
171
|
"""
|
|
172
|
+
# Auto-extract component hints from query if enabled
|
|
173
|
+
search_query = query
|
|
174
|
+
all_require_terms = list(require_terms) if require_terms else []
|
|
175
|
+
|
|
176
|
+
if auto_extract:
|
|
177
|
+
cleaned_query, extracted = extract_query_hints(query)
|
|
178
|
+
if extracted:
|
|
179
|
+
# Use cleaned query for embedding (removes noise)
|
|
180
|
+
search_query = cleaned_query
|
|
181
|
+
# Add extracted terms to require_terms
|
|
182
|
+
all_require_terms.extend(extracted)
|
|
183
|
+
all_require_terms = list(set(all_require_terms)) # dedupe
|
|
106
184
|
# Build list of filter conditions, excluding None values
|
|
107
185
|
conditions = []
|
|
108
186
|
|
|
@@ -126,10 +204,10 @@ class RagtimeDB:
|
|
|
126
204
|
where = {"$and": conditions}
|
|
127
205
|
|
|
128
206
|
# When using require_terms, fetch more results since we'll filter some out
|
|
129
|
-
fetch_limit = limit * 5 if
|
|
207
|
+
fetch_limit = limit * 5 if all_require_terms else limit
|
|
130
208
|
|
|
131
209
|
results = self.collection.query(
|
|
132
|
-
query_texts=[
|
|
210
|
+
query_texts=[search_query],
|
|
133
211
|
n_results=fetch_limit,
|
|
134
212
|
where=where,
|
|
135
213
|
)
|
|
@@ -139,13 +217,13 @@ class RagtimeDB:
|
|
|
139
217
|
if results["documents"] and results["documents"][0]:
|
|
140
218
|
for i, doc in enumerate(results["documents"][0]):
|
|
141
219
|
# Hybrid filtering: ensure required terms appear
|
|
142
|
-
if
|
|
220
|
+
if all_require_terms:
|
|
143
221
|
doc_lower = doc.lower()
|
|
144
222
|
# Also check file path in metadata for code/file matches
|
|
145
223
|
file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
|
|
146
224
|
combined_text = f"{doc_lower} {file_path}"
|
|
147
225
|
|
|
148
|
-
if not all(term.lower() in combined_text for term in
|
|
226
|
+
if not all(term.lower() in combined_text for term in all_require_terms):
|
|
149
227
|
continue
|
|
150
228
|
|
|
151
229
|
output.append({
|
|
@@ -132,13 +132,13 @@ class RagtimeMCPServer:
|
|
|
132
132
|
},
|
|
133
133
|
{
|
|
134
134
|
"name": "search",
|
|
135
|
-
"description": "
|
|
135
|
+
"description": "Smart hybrid search over indexed code and docs. Auto-detects qualifiers like 'mobile', 'auth', 'dart' in your query and ensures they appear in results. Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations.",
|
|
136
136
|
"inputSchema": {
|
|
137
137
|
"type": "object",
|
|
138
138
|
"properties": {
|
|
139
139
|
"query": {
|
|
140
140
|
"type": "string",
|
|
141
|
-
"description": "Natural language search query"
|
|
141
|
+
"description": "Natural language search query. Qualifiers like 'in mobile', 'for auth', 'dart' are auto-detected and used for filtering."
|
|
142
142
|
},
|
|
143
143
|
"namespace": {
|
|
144
144
|
"type": "string",
|
|
@@ -155,7 +155,12 @@ class RagtimeMCPServer:
|
|
|
155
155
|
"require_terms": {
|
|
156
156
|
"type": "array",
|
|
157
157
|
"items": {"type": "string"},
|
|
158
|
-
"description": "
|
|
158
|
+
"description": "Additional terms that MUST appear in results. Usually not needed since qualifiers are auto-detected from the query."
|
|
159
|
+
},
|
|
160
|
+
"auto_extract": {
|
|
161
|
+
"type": "boolean",
|
|
162
|
+
"default": True,
|
|
163
|
+
"description": "Auto-detect component qualifiers from query (default: true). Set to false for literal/raw search."
|
|
159
164
|
},
|
|
160
165
|
"limit": {
|
|
161
166
|
"type": "integer",
|
|
@@ -338,7 +343,7 @@ class RagtimeMCPServer:
|
|
|
338
343
|
}
|
|
339
344
|
|
|
340
345
|
def _search(self, args: dict) -> dict:
|
|
341
|
-
"""Search indexed content with
|
|
346
|
+
"""Search indexed content with smart query understanding."""
|
|
342
347
|
results = self.db.search(
|
|
343
348
|
query=args["query"],
|
|
344
349
|
limit=args.get("limit", 10),
|
|
@@ -346,6 +351,7 @@ class RagtimeMCPServer:
|
|
|
346
351
|
type_filter=args.get("type"),
|
|
347
352
|
component=args.get("component"),
|
|
348
353
|
require_terms=args.get("require_terms"),
|
|
354
|
+
auto_extract=args.get("auto_extract", True),
|
|
349
355
|
)
|
|
350
356
|
|
|
351
357
|
return {
|
|
@@ -493,7 +499,7 @@ class RagtimeMCPServer:
|
|
|
493
499
|
"protocolVersion": "2024-11-05",
|
|
494
500
|
"serverInfo": {
|
|
495
501
|
"name": "ragtime",
|
|
496
|
-
"version": "0.2.
|
|
502
|
+
"version": "0.2.14",
|
|
497
503
|
},
|
|
498
504
|
"capabilities": {
|
|
499
505
|
"tools": {},
|
|
@@ -110,35 +110,83 @@ class Memory:
|
|
|
110
110
|
slug = re.sub(r'[-\s]+', '-', slug).strip('-')
|
|
111
111
|
return slug[:40] # Limit length
|
|
112
112
|
|
|
113
|
+
@classmethod
|
|
114
|
+
def _infer_metadata_from_path(cls, relative_path: str) -> dict:
|
|
115
|
+
"""
|
|
116
|
+
Infer namespace, component, and type from folder structure.
|
|
117
|
+
|
|
118
|
+
Supports:
|
|
119
|
+
app/{component}/*.md → namespace=app, component={component}
|
|
120
|
+
app/*.md → namespace=app
|
|
121
|
+
team/*.md → namespace=team
|
|
122
|
+
users/{username}/*.md → namespace=user-{username}
|
|
123
|
+
branches/{branch}/*.md → namespace=branch-{branch}
|
|
124
|
+
"""
|
|
125
|
+
parts = relative_path.replace("\\", "/").split("/")
|
|
126
|
+
metadata = {}
|
|
127
|
+
|
|
128
|
+
if len(parts) >= 1:
|
|
129
|
+
first = parts[0]
|
|
130
|
+
if first == "app":
|
|
131
|
+
metadata["namespace"] = "app"
|
|
132
|
+
if len(parts) >= 3: # app/{component}/file.md
|
|
133
|
+
metadata["component"] = parts[1]
|
|
134
|
+
elif first == "team":
|
|
135
|
+
metadata["namespace"] = "team"
|
|
136
|
+
elif first == "users" and len(parts) >= 2:
|
|
137
|
+
metadata["namespace"] = f"user-{parts[1]}"
|
|
138
|
+
elif first == "branches" and len(parts) >= 2:
|
|
139
|
+
metadata["namespace"] = f"branch-{parts[1]}"
|
|
140
|
+
|
|
141
|
+
return metadata
|
|
142
|
+
|
|
113
143
|
@classmethod
|
|
114
144
|
def from_file(cls, path: Path, relative_to: Optional[Path] = None) -> "Memory":
|
|
115
145
|
"""
|
|
116
146
|
Parse a memory from a markdown file with YAML frontmatter.
|
|
117
147
|
|
|
148
|
+
If no frontmatter exists, infers metadata from folder structure.
|
|
149
|
+
|
|
118
150
|
Args:
|
|
119
151
|
path: Full path to the markdown file
|
|
120
152
|
relative_to: Base directory to compute relative path from (for indexing)
|
|
121
153
|
"""
|
|
122
154
|
text = path.read_text()
|
|
123
155
|
|
|
156
|
+
# Compute relative path for inference and indexing
|
|
157
|
+
file_path = None
|
|
158
|
+
if relative_to:
|
|
159
|
+
try:
|
|
160
|
+
file_path = str(path.relative_to(relative_to))
|
|
161
|
+
except ValueError:
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
# Handle files without frontmatter - infer from path
|
|
124
165
|
if not text.startswith("---"):
|
|
125
|
-
|
|
166
|
+
inferred = cls._infer_metadata_from_path(file_path or str(path))
|
|
167
|
+
# Generate stable ID from path
|
|
168
|
+
memory_id = hashlib.sha256((file_path or str(path)).encode()).hexdigest()[:8]
|
|
169
|
+
|
|
170
|
+
return cls(
|
|
171
|
+
id=memory_id,
|
|
172
|
+
content=text.strip(),
|
|
173
|
+
namespace=inferred.get("namespace", "app"),
|
|
174
|
+
type=inferred.get("type", "note"),
|
|
175
|
+
component=inferred.get("component"),
|
|
176
|
+
source="file",
|
|
177
|
+
_file_path=file_path,
|
|
178
|
+
)
|
|
126
179
|
|
|
127
180
|
# Split frontmatter and content
|
|
128
181
|
parts = text.split("---", 2)
|
|
129
182
|
if len(parts) < 3:
|
|
130
183
|
raise ValueError(f"Invalid frontmatter format in {path}")
|
|
131
184
|
|
|
132
|
-
frontmatter = yaml.safe_load(parts[1])
|
|
185
|
+
frontmatter = yaml.safe_load(parts[1]) or {}
|
|
133
186
|
content = parts[2].strip()
|
|
134
187
|
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
if relative_to:
|
|
138
|
-
try:
|
|
139
|
-
file_path = str(path.relative_to(relative_to))
|
|
140
|
-
except ValueError:
|
|
141
|
-
pass # path not relative to base, will regenerate
|
|
188
|
+
# Infer missing metadata from folder structure
|
|
189
|
+
inferred = cls._infer_metadata_from_path(file_path or str(path))
|
|
142
190
|
|
|
143
191
|
# Use frontmatter ID if present, otherwise derive stable ID from file path
|
|
144
192
|
# This ensures reindex is idempotent - same file always gets same ID
|
|
@@ -154,9 +202,10 @@ class Memory:
|
|
|
154
202
|
return cls(
|
|
155
203
|
id=memory_id,
|
|
156
204
|
content=content,
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
205
|
+
# Use frontmatter if present, fall back to inferred, then defaults
|
|
206
|
+
namespace=frontmatter.get("namespace") or inferred.get("namespace", "app"),
|
|
207
|
+
type=frontmatter.get("type") or inferred.get("type", "note"),
|
|
208
|
+
component=frontmatter.get("component") or inferred.get("component"),
|
|
160
209
|
confidence=frontmatter.get("confidence", "medium"),
|
|
161
210
|
confidence_reason=frontmatter.get("confidence_reason"),
|
|
162
211
|
source=frontmatter.get("source", "file"),
|
|
@@ -423,7 +472,9 @@ class MemoryStore:
|
|
|
423
472
|
"""
|
|
424
473
|
Reindex all memory files.
|
|
425
474
|
|
|
426
|
-
Scans .ragtime/ and indexes
|
|
475
|
+
Scans .ragtime/ and indexes files. Removes old entries for each file
|
|
476
|
+
before upserting to prevent duplicates from ID changes.
|
|
477
|
+
|
|
427
478
|
Returns count of files indexed.
|
|
428
479
|
"""
|
|
429
480
|
if not self.memory_dir.exists():
|
|
@@ -432,7 +483,13 @@ class MemoryStore:
|
|
|
432
483
|
count = 0
|
|
433
484
|
for md_file in self.memory_dir.rglob("*.md"):
|
|
434
485
|
try:
|
|
435
|
-
#
|
|
486
|
+
# Compute relative path for this file
|
|
487
|
+
rel_path = str(md_file.relative_to(self.memory_dir))
|
|
488
|
+
|
|
489
|
+
# Delete any existing entries for this file path (handles ID changes)
|
|
490
|
+
self.db.delete_by_file([rel_path])
|
|
491
|
+
|
|
492
|
+
# Parse and index with stable ID
|
|
436
493
|
memory = Memory.from_file(md_file, relative_to=self.memory_dir)
|
|
437
494
|
self.db.upsert(
|
|
438
495
|
ids=[memory.id],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|