ragtime-cli 0.2.9__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {ragtime_cli-0.2.9/ragtime_cli.egg-info → ragtime_cli-0.2.11}/PKG-INFO +38 -5
  2. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/README.md +37 -4
  3. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/pyproject.toml +1 -1
  4. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11/ragtime_cli.egg-info}/PKG-INFO +38 -5
  5. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/cli.py +10 -2
  6. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/config.py +4 -3
  7. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/db.py +40 -8
  8. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/mcp_server.py +9 -3
  9. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/memory.py +81 -27
  10. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/LICENSE +0 -0
  11. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/ragtime_cli.egg-info/SOURCES.txt +0 -0
  12. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/ragtime_cli.egg-info/dependency_links.txt +0 -0
  13. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/ragtime_cli.egg-info/entry_points.txt +0 -0
  14. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/ragtime_cli.egg-info/requires.txt +0 -0
  15. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/ragtime_cli.egg-info/top_level.txt +0 -0
  16. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/setup.cfg +0 -0
  17. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/__init__.py +0 -0
  18. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/audit.md +0 -0
  19. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/create-pr.md +0 -0
  20. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/generate-docs.md +0 -0
  21. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/handoff.md +0 -0
  22. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/import-docs.md +0 -0
  23. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/pr-graduate.md +0 -0
  24. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/recall.md +0 -0
  25. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/remember.md +0 -0
  26. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/save.md +0 -0
  27. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/commands/start.md +0 -0
  28. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/indexers/__init__.py +0 -0
  29. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/indexers/code.py +0 -0
  30. {ragtime_cli-0.2.9 → ragtime_cli-0.2.11}/src/indexers/docs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragtime-cli
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
5
5
  Author-email: Bret Martineau <bretwardjames@gmail.com>
6
6
  License-Expression: MIT
@@ -100,13 +100,16 @@ ragtime forget <memory-id>
100
100
  # Index everything (docs + code)
101
101
  ragtime index
102
102
 
103
+ # Incremental index (only changed files - fast!)
104
+ ragtime index # ~8 seconds vs ~5 minutes for unchanged codebases
105
+
103
106
  # Index only docs
104
107
  ragtime index --type docs
105
108
 
106
109
  # Index only code (functions, classes, composables)
107
110
  ragtime index --type code
108
111
 
109
- # Re-index with clear (removes old entries)
112
+ # Full re-index (removes old entries, recomputes all embeddings)
110
113
  ragtime index --clear
111
114
 
112
115
  # Semantic search across all content
@@ -118,6 +121,10 @@ ragtime search "useAsyncState" --type code
118
121
  # Search only docs
119
122
  ragtime search "authentication" --type docs --namespace app
120
123
 
124
+ # Hybrid search: semantic + keyword filtering
125
+ # Use -r/--require to ensure terms appear in results
126
+ ragtime search "error handling" -r mobile -r dart
127
+
121
128
  # Reindex memory files
122
129
  ragtime reindex
123
130
 
@@ -230,9 +237,9 @@ ragtime setup-ghp
230
237
 
231
238
  ```yaml
232
239
  docs:
233
- paths: ["docs", ".ragtime"]
240
+ paths: ["docs"]
234
241
  patterns: ["**/*.md"]
235
- exclude: ["**/node_modules/**"]
242
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
236
243
 
237
244
  code:
238
245
  paths: ["."]
@@ -244,6 +251,32 @@ conventions:
244
251
  also_search_memories: true
245
252
  ```
246
253
 
254
+ ## How Search Works
255
+
256
+ Search returns **summaries with locations**, not full code:
257
+
258
+ 1. **What you get**: Function signatures, docstrings, class definitions
259
+ 2. **What you don't get**: Full implementations
260
+ 3. **What to do**: Use the file path + line number to read the full code
261
+
262
+ This is intentional - embeddings work better on focused summaries than large code blocks. The search tells you *what exists and where*, then you read the file for details.
263
+
264
+ For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
265
+
266
+ ### Hybrid Search
267
+
268
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
269
+
270
+ ```bash
271
+ # CLI
272
+ ragtime search "error handling" -r mobile -r dart
273
+
274
+ # MCP
275
+ search(query="error handling", require_terms=["mobile", "dart"])
276
+ ```
277
+
278
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
279
+
247
280
  ## Code Indexing
248
281
 
249
282
  The code indexer extracts meaningful symbols from your codebase:
@@ -251,7 +284,7 @@ The code indexer extracts meaningful symbols from your codebase:
251
284
  | Language | What Gets Indexed |
252
285
  |----------|-------------------|
253
286
  | Python | Classes, methods, functions (with docstrings) |
254
- | TypeScript/JS | Exported functions, classes, interfaces, types, constants |
287
+ | TypeScript/JS | Functions, classes, interfaces, types (exported and non-exported) |
255
288
  | Vue | Components, composable usage (useXxx calls) |
256
289
  | Dart | Classes, functions, mixins, extensions |
257
290
 
@@ -70,13 +70,16 @@ ragtime forget <memory-id>
70
70
  # Index everything (docs + code)
71
71
  ragtime index
72
72
 
73
+ # Incremental index (only changed files - fast!)
74
+ ragtime index # ~8 seconds vs ~5 minutes for unchanged codebases
75
+
73
76
  # Index only docs
74
77
  ragtime index --type docs
75
78
 
76
79
  # Index only code (functions, classes, composables)
77
80
  ragtime index --type code
78
81
 
79
- # Re-index with clear (removes old entries)
82
+ # Full re-index (removes old entries, recomputes all embeddings)
80
83
  ragtime index --clear
81
84
 
82
85
  # Semantic search across all content
@@ -88,6 +91,10 @@ ragtime search "useAsyncState" --type code
88
91
  # Search only docs
89
92
  ragtime search "authentication" --type docs --namespace app
90
93
 
94
+ # Hybrid search: semantic + keyword filtering
95
+ # Use -r/--require to ensure terms appear in results
96
+ ragtime search "error handling" -r mobile -r dart
97
+
91
98
  # Reindex memory files
92
99
  ragtime reindex
93
100
 
@@ -200,9 +207,9 @@ ragtime setup-ghp
200
207
 
201
208
  ```yaml
202
209
  docs:
203
- paths: ["docs", ".ragtime"]
210
+ paths: ["docs"]
204
211
  patterns: ["**/*.md"]
205
- exclude: ["**/node_modules/**"]
212
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
206
213
 
207
214
  code:
208
215
  paths: ["."]
@@ -214,6 +221,32 @@ conventions:
214
221
  also_search_memories: true
215
222
  ```
216
223
 
224
+ ## How Search Works
225
+
226
+ Search returns **summaries with locations**, not full code:
227
+
228
+ 1. **What you get**: Function signatures, docstrings, class definitions
229
+ 2. **What you don't get**: Full implementations
230
+ 3. **What to do**: Use the file path + line number to read the full code
231
+
232
+ This is intentional - embeddings work better on focused summaries than large code blocks. The search tells you *what exists and where*, then you read the file for details.
233
+
234
+ For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
235
+
236
+ ### Hybrid Search
237
+
238
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
239
+
240
+ ```bash
241
+ # CLI
242
+ ragtime search "error handling" -r mobile -r dart
243
+
244
+ # MCP
245
+ search(query="error handling", require_terms=["mobile", "dart"])
246
+ ```
247
+
248
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
249
+
217
250
  ## Code Indexing
218
251
 
219
252
  The code indexer extracts meaningful symbols from your codebase:
@@ -221,7 +254,7 @@ The code indexer extracts meaningful symbols from your codebase:
221
254
  | Language | What Gets Indexed |
222
255
  |----------|-------------------|
223
256
  | Python | Classes, methods, functions (with docstrings) |
224
- | TypeScript/JS | Exported functions, classes, interfaces, types, constants |
257
+ | TypeScript/JS | Functions, classes, interfaces, types (exported and non-exported) |
225
258
  | Vue | Components, composable usage (useXxx calls) |
226
259
  | Dart | Classes, functions, mixins, extensions |
227
260
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ragtime-cli"
3
- version = "0.2.9"
3
+ version = "0.2.11"
4
4
  description = "Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge"
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragtime-cli
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
5
5
  Author-email: Bret Martineau <bretwardjames@gmail.com>
6
6
  License-Expression: MIT
@@ -100,13 +100,16 @@ ragtime forget <memory-id>
100
100
  # Index everything (docs + code)
101
101
  ragtime index
102
102
 
103
+ # Incremental index (only changed files - fast!)
104
+ ragtime index # ~8 seconds vs ~5 minutes for unchanged codebases
105
+
103
106
  # Index only docs
104
107
  ragtime index --type docs
105
108
 
106
109
  # Index only code (functions, classes, composables)
107
110
  ragtime index --type code
108
111
 
109
- # Re-index with clear (removes old entries)
112
+ # Full re-index (removes old entries, recomputes all embeddings)
110
113
  ragtime index --clear
111
114
 
112
115
  # Semantic search across all content
@@ -118,6 +121,10 @@ ragtime search "useAsyncState" --type code
118
121
  # Search only docs
119
122
  ragtime search "authentication" --type docs --namespace app
120
123
 
124
+ # Hybrid search: semantic + keyword filtering
125
+ # Use -r/--require to ensure terms appear in results
126
+ ragtime search "error handling" -r mobile -r dart
127
+
121
128
  # Reindex memory files
122
129
  ragtime reindex
123
130
 
@@ -230,9 +237,9 @@ ragtime setup-ghp
230
237
 
231
238
  ```yaml
232
239
  docs:
233
- paths: ["docs", ".ragtime"]
240
+ paths: ["docs"]
234
241
  patterns: ["**/*.md"]
235
- exclude: ["**/node_modules/**"]
242
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
236
243
 
237
244
  code:
238
245
  paths: ["."]
@@ -244,6 +251,32 @@ conventions:
244
251
  also_search_memories: true
245
252
  ```
246
253
 
254
+ ## How Search Works
255
+
256
+ Search returns **summaries with locations**, not full code:
257
+
258
+ 1. **What you get**: Function signatures, docstrings, class definitions
259
+ 2. **What you don't get**: Full implementations
260
+ 3. **What to do**: Use the file path + line number to read the full code
261
+
262
+ This is intentional - embeddings work better on focused summaries than large code blocks. The search tells you *what exists and where*, then you read the file for details.
263
+
264
+ For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
265
+
266
+ ### Hybrid Search
267
+
268
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
269
+
270
+ ```bash
271
+ # CLI
272
+ ragtime search "error handling" -r mobile -r dart
273
+
274
+ # MCP
275
+ search(query="error handling", require_terms=["mobile", "dart"])
276
+ ```
277
+
278
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
279
+
247
280
  ## Code Indexing
248
281
 
249
282
  The code indexer extracts meaningful symbols from your codebase:
@@ -251,7 +284,7 @@ The code indexer extracts meaningful symbols from your codebase:
251
284
  | Language | What Gets Indexed |
252
285
  |----------|-------------------|
253
286
  | Python | Classes, methods, functions (with docstrings) |
254
- | TypeScript/JS | Exported functions, classes, interfaces, types, constants |
287
+ | TypeScript/JS | Functions, classes, interfaces, types (exported and non-exported) |
255
288
  | Vue | Components, composable usage (useXxx calls) |
256
289
  | Dart | Classes, functions, mixins, extensions |
257
290
 
@@ -469,12 +469,19 @@ def index(path: Path, index_type: str, clear: bool):
469
469
  @click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
470
470
  @click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
471
471
  @click.option("--namespace", "-n", help="Filter by namespace")
472
+ @click.option("--require", "-r", "require_terms", multiple=True,
473
+ help="Terms that MUST appear in results (repeatable)")
472
474
  @click.option("--include-archive", is_flag=True, help="Also search archived branches")
473
475
  @click.option("--limit", "-l", default=5, help="Max results")
474
476
  @click.option("--verbose", "-v", is_flag=True, help="Show full content")
475
477
  def search(query: str, path: Path, type_filter: str, namespace: str,
476
- include_archive: bool, limit: int, verbose: bool):
477
- """Search indexed content."""
478
+ require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
479
+ """
480
+ Hybrid search: semantic similarity + keyword filtering.
481
+
482
+ Use --require/-r to ensure specific terms appear in results.
483
+ Example: ragtime search "error handling" -r mobile -r dart
484
+ """
478
485
  path = Path(path).resolve()
479
486
  db = get_db(path)
480
487
 
@@ -485,6 +492,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
485
492
  limit=limit,
486
493
  type_filter=type_arg,
487
494
  namespace=namespace,
495
+ require_terms=list(require_terms) if require_terms else None,
488
496
  )
489
497
 
490
498
  if not results:
@@ -12,13 +12,14 @@ import yaml
12
12
  @dataclass
13
13
  class DocsConfig:
14
14
  """Configuration for docs indexing."""
15
- paths: list[str] = field(default_factory=lambda: ["docs", ".ragtime"])
15
+ # Note: .ragtime/ is NOT included here - memories are indexed separately via 'reindex'
16
+ # to avoid duplicate entries (same file indexed as both doc and memory)
17
+ paths: list[str] = field(default_factory=lambda: ["docs"])
16
18
  patterns: list[str] = field(default_factory=lambda: ["**/*.md"])
17
19
  exclude: list[str] = field(default_factory=lambda: [
18
20
  "**/node_modules/**",
19
21
  "**/.git/**",
20
- "**/.ragtime/index/**",
21
- "**/.ragtime/branches/.*", # Exclude synced (dot-prefixed) branches
22
+ "**/.ragtime/**", # Memories indexed separately
22
23
  ])
23
24
 
24
25
 
@@ -84,48 +84,80 @@ class RagtimeDB:
84
84
  limit: int = 10,
85
85
  type_filter: str | None = None,
86
86
  namespace: str | None = None,
87
+ require_terms: list[str] | None = None,
87
88
  **filters,
88
89
  ) -> list[dict]:
89
90
  """
90
- Semantic search over indexed content.
91
+ Hybrid search: semantic similarity + keyword filtering.
91
92
 
92
93
  Args:
93
94
  query: Natural language search query
94
95
  limit: Max results to return
95
96
  type_filter: "code" or "docs" (None = both)
96
97
  namespace: Filter by namespace (for docs)
97
- **filters: Additional metadata filters
98
+ require_terms: List of terms that MUST appear in results (case-insensitive).
99
+ Use for scoped queries like "error handling in mobile" with
100
+ require_terms=["mobile"] to ensure "mobile" isn't ignored.
101
+ **filters: Additional metadata filters (None values are ignored)
98
102
 
99
103
  Returns:
100
104
  List of dicts with 'content', 'metadata', 'distance'
101
105
  """
102
- where = {}
106
+ # Build list of filter conditions, excluding None values
107
+ conditions = []
103
108
 
104
109
  if type_filter:
105
- where["type"] = type_filter
110
+ conditions.append({"type": type_filter})
106
111
 
107
112
  if namespace:
108
- where["namespace"] = namespace
113
+ conditions.append({"namespace": namespace})
109
114
 
115
+ # Add any additional filters, but skip None values
110
116
  for key, value in filters.items():
111
- where[key] = value
117
+ if value is not None:
118
+ conditions.append({key: value})
119
+
120
+ # ChromaDB requires $and for multiple conditions
121
+ if len(conditions) == 0:
122
+ where = None
123
+ elif len(conditions) == 1:
124
+ where = conditions[0]
125
+ else:
126
+ where = {"$and": conditions}
127
+
128
+ # When using require_terms, fetch more results since we'll filter some out
129
+ fetch_limit = limit * 5 if require_terms else limit
112
130
 
113
131
  results = self.collection.query(
114
132
  query_texts=[query],
115
- n_results=limit,
116
- where=where if where else None,
133
+ n_results=fetch_limit,
134
+ where=where,
117
135
  )
118
136
 
119
137
  # Flatten results into list of dicts
120
138
  output = []
121
139
  if results["documents"] and results["documents"][0]:
122
140
  for i, doc in enumerate(results["documents"][0]):
141
+ # Hybrid filtering: ensure required terms appear
142
+ if require_terms:
143
+ doc_lower = doc.lower()
144
+ # Also check file path in metadata for code/file matches
145
+ file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
146
+ combined_text = f"{doc_lower} {file_path}"
147
+
148
+ if not all(term.lower() in combined_text for term in require_terms):
149
+ continue
150
+
123
151
  output.append({
124
152
  "content": doc,
125
153
  "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
126
154
  "distance": results["distances"][0][i] if results["distances"] else None,
127
155
  })
128
156
 
157
+ # Stop once we have enough
158
+ if len(output) >= limit:
159
+ break
160
+
129
161
  return output
130
162
 
131
163
  def delete(self, ids: list[str]) -> None:
@@ -132,7 +132,7 @@ class RagtimeMCPServer:
132
132
  },
133
133
  {
134
134
  "name": "search",
135
- "description": "Semantic search over indexed code and docs. Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
135
+ "description": "Hybrid search over indexed code and docs (semantic + keyword). Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
136
136
  "inputSchema": {
137
137
  "type": "object",
138
138
  "properties": {
@@ -152,6 +152,11 @@ class RagtimeMCPServer:
152
152
  "type": "string",
153
153
  "description": "Filter by component"
154
154
  },
155
+ "require_terms": {
156
+ "type": "array",
157
+ "items": {"type": "string"},
158
+ "description": "Terms that MUST appear in results (case-insensitive). Use for scoped queries like 'error handling in mobile' with require_terms=['mobile'] to ensure the qualifier isn't lost in semantic search."
159
+ },
155
160
  "limit": {
156
161
  "type": "integer",
157
162
  "default": 10,
@@ -333,13 +338,14 @@ class RagtimeMCPServer:
333
338
  }
334
339
 
335
340
  def _search(self, args: dict) -> dict:
336
- """Search indexed content."""
341
+ """Search indexed content with hybrid semantic + keyword matching."""
337
342
  results = self.db.search(
338
343
  query=args["query"],
339
344
  limit=args.get("limit", 10),
340
345
  namespace=args.get("namespace"),
341
346
  type_filter=args.get("type"),
342
347
  component=args.get("component"),
348
+ require_terms=args.get("require_terms"),
343
349
  )
344
350
 
345
351
  return {
@@ -487,7 +493,7 @@ class RagtimeMCPServer:
487
493
  "protocolVersion": "2024-11-05",
488
494
  "serverInfo": {
489
495
  "name": "ragtime",
490
- "version": "0.2.9",
496
+ "version": "0.2.11",
491
497
  },
492
498
  "capabilities": {
493
499
  "tools": {},
@@ -32,6 +32,8 @@ class Memory:
32
32
  epic: Optional[str] = None
33
33
  branch: Optional[str] = None
34
34
  supersedes: Optional[str] = None
35
+ # Internal: actual file path when loaded from disk (not serialized)
36
+ _file_path: Optional[str] = field(default=None, repr=False)
35
37
 
36
38
  def to_frontmatter(self) -> dict:
37
39
  """Convert to YAML frontmatter dict."""
@@ -71,7 +73,8 @@ class Memory:
71
73
  def to_metadata(self) -> dict:
72
74
  """Convert to metadata dict for ChromaDB."""
73
75
  meta = self.to_frontmatter()
74
- meta["file"] = self.get_relative_path()
76
+ # Use actual file path if loaded from disk, otherwise generate it
77
+ meta["file"] = self._file_path if self._file_path else self.get_relative_path()
75
78
  return meta
76
79
 
77
80
  def get_relative_path(self) -> str:
@@ -107,8 +110,14 @@ class Memory:
107
110
  return slug[:40] # Limit length
108
111
 
109
112
  @classmethod
110
- def from_file(cls, path: Path) -> "Memory":
111
- """Parse a memory from a markdown file with YAML frontmatter."""
113
+ def from_file(cls, path: Path, relative_to: Optional[Path] = None) -> "Memory":
114
+ """
115
+ Parse a memory from a markdown file with YAML frontmatter.
116
+
117
+ Args:
118
+ path: Full path to the markdown file
119
+ relative_to: Base directory to compute relative path from (for indexing)
120
+ """
112
121
  text = path.read_text()
113
122
 
114
123
  if not text.startswith("---"):
@@ -122,6 +131,14 @@ class Memory:
122
131
  frontmatter = yaml.safe_load(parts[1])
123
132
  content = parts[2].strip()
124
133
 
134
+ # Compute relative file path for indexing
135
+ file_path = None
136
+ if relative_to:
137
+ try:
138
+ file_path = str(path.relative_to(relative_to))
139
+ except ValueError:
140
+ pass # path not relative to base, will regenerate
141
+
125
142
  return cls(
126
143
  id=frontmatter.get("id", str(uuid.uuid4())[:8]),
127
144
  content=content,
@@ -138,6 +155,7 @@ class Memory:
138
155
  epic=frontmatter.get("epic"),
139
156
  branch=frontmatter.get("branch"),
140
157
  supersedes=frontmatter.get("supersedes"),
158
+ _file_path=file_path,
141
159
  )
142
160
 
143
161
 
@@ -189,24 +207,41 @@ class MemoryStore:
189
207
 
190
208
  def get(self, memory_id: str) -> Optional[Memory]:
191
209
  """Get a memory by ID."""
192
- # Search in ChromaDB to find the file
193
- results = self.db.collection.get(ids=[memory_id])
210
+ # Search in ChromaDB to find the memory
211
+ results = self.db.collection.get(ids=[memory_id], include=["documents", "metadatas"])
194
212
 
195
213
  if not results["ids"]:
196
214
  return None
197
215
 
198
216
  metadata = results["metadatas"][0]
217
+ content = results["documents"][0] if results["documents"] else ""
199
218
  file_rel_path = metadata.get("file", "")
200
219
 
201
- if not file_rel_path:
202
- return None
203
-
204
- file_path = self.memory_dir / file_rel_path
220
+ # Try to read from file first (has full frontmatter data)
221
+ if file_rel_path:
222
+ file_path = self.memory_dir / file_rel_path
223
+ if file_path.exists():
224
+ return Memory.from_file(file_path, relative_to=self.memory_dir)
205
225
 
206
- if file_path.exists():
207
- return Memory.from_file(file_path)
208
-
209
- return None
226
+ # Fall back to constructing from ChromaDB data
227
+ # This handles cases where file path is wrong or file was deleted
228
+ return Memory(
229
+ id=memory_id,
230
+ content=content,
231
+ namespace=metadata.get("namespace", "unknown"),
232
+ type=metadata.get("type", "unknown"),
233
+ component=metadata.get("component"),
234
+ confidence=metadata.get("confidence", "medium"),
235
+ confidence_reason=metadata.get("confidence_reason"),
236
+ source=metadata.get("source", "unknown"),
237
+ status=metadata.get("status", "active"),
238
+ added=metadata.get("added", ""),
239
+ author=metadata.get("author"),
240
+ issue=metadata.get("issue"),
241
+ epic=metadata.get("epic"),
242
+ branch=metadata.get("branch"),
243
+ _file_path=file_rel_path,
244
+ )
210
245
 
211
246
  def delete(self, memory_id: str) -> bool:
212
247
  """Delete a memory by ID."""
@@ -283,29 +318,44 @@ class MemoryStore:
283
318
  limit: int = 100,
284
319
  ) -> list[Memory]:
285
320
  """List memories with optional filters."""
286
- where = {}
321
+ # Build filter conditions
322
+ conditions = []
323
+ namespace_prefix = None
287
324
 
288
325
  if namespace:
289
326
  if namespace.endswith("*"):
290
- # Prefix match - ChromaDB doesn't support this directly
291
- # We'll filter in Python
292
- pass
327
+ # Prefix match - filter in Python after fetching
328
+ namespace_prefix = namespace[:-1]
293
329
  else:
294
- where["namespace"] = namespace
330
+ conditions.append({"namespace": namespace})
295
331
 
296
332
  if type_filter:
297
- where["type"] = type_filter
333
+ conditions.append({"type": type_filter})
298
334
 
299
335
  if status:
300
- where["status"] = status
336
+ conditions.append({"status": status})
301
337
 
302
338
  if component:
303
- where["component"] = component
339
+ conditions.append({"component": component})
340
+
341
+ # Exclude docs/code entries - they use type="docs" or type="code"
342
+ # while memories use types like "architecture", "feature", etc.
343
+ # This is especially important for wildcard queries
344
+ conditions.append({"type": {"$nin": ["docs", "code"]}})
345
+
346
+ # Build where clause with $and if multiple conditions
347
+ if len(conditions) == 1:
348
+ where = conditions[0]
349
+ else:
350
+ where = {"$and": conditions}
351
+
352
+ # When using prefix match, fetch more results since we'll filter some out
353
+ fetch_limit = limit * 5 if namespace_prefix else limit
304
354
 
305
355
  # Get from ChromaDB
306
356
  results = self.db.collection.get(
307
- where=where if where else None,
308
- limit=limit,
357
+ where=where,
358
+ limit=fetch_limit,
309
359
  )
310
360
 
311
361
  memories = []
@@ -314,9 +364,8 @@ class MemoryStore:
314
364
  content = results["documents"][i] if results["documents"] else ""
315
365
 
316
366
  # Handle namespace prefix filtering
317
- if namespace and namespace.endswith("*"):
318
- prefix = namespace[:-1]
319
- if not metadata.get("namespace", "").startswith(prefix):
367
+ if namespace_prefix:
368
+ if not metadata.get("namespace", "").startswith(namespace_prefix):
320
369
  continue
321
370
 
322
371
  memories.append(Memory(
@@ -332,6 +381,10 @@ class MemoryStore:
332
381
  author=metadata.get("author"),
333
382
  ))
334
383
 
384
+ # Stop once we have enough
385
+ if len(memories) >= limit:
386
+ break
387
+
335
388
  return memories
336
389
 
337
390
  def store_document(self, file_path: Path, namespace: str, doc_type: str = "handoff") -> Memory:
@@ -367,7 +420,8 @@ class MemoryStore:
367
420
  count = 0
368
421
  for md_file in self.memory_dir.rglob("*.md"):
369
422
  try:
370
- memory = Memory.from_file(md_file)
423
+ # Pass memory_dir so the actual file path is stored, not regenerated
424
+ memory = Memory.from_file(md_file, relative_to=self.memory_dir)
371
425
  self.db.upsert(
372
426
  ids=[memory.id],
373
427
  documents=[memory.content],
File without changes
File without changes