ragtime-cli 0.2.10__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {ragtime_cli-0.2.10/ragtime_cli.egg-info → ragtime_cli-0.2.12}/PKG-INFO +21 -3
  2. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/README.md +20 -2
  3. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/pyproject.toml +1 -1
  4. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12/ragtime_cli.egg-info}/PKG-INFO +21 -3
  5. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/cli.py +55 -2
  6. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/config.py +4 -3
  7. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/db.py +23 -2
  8. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/mcp_server.py +9 -3
  9. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/memory.py +46 -15
  10. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/LICENSE +0 -0
  11. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/ragtime_cli.egg-info/SOURCES.txt +0 -0
  12. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/ragtime_cli.egg-info/dependency_links.txt +0 -0
  13. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/ragtime_cli.egg-info/entry_points.txt +0 -0
  14. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/ragtime_cli.egg-info/requires.txt +0 -0
  15. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/ragtime_cli.egg-info/top_level.txt +0 -0
  16. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/setup.cfg +0 -0
  17. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/__init__.py +0 -0
  18. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/audit.md +0 -0
  19. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/create-pr.md +0 -0
  20. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/generate-docs.md +0 -0
  21. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/handoff.md +0 -0
  22. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/import-docs.md +0 -0
  23. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/pr-graduate.md +0 -0
  24. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/recall.md +0 -0
  25. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/remember.md +0 -0
  26. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/save.md +0 -0
  27. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/commands/start.md +0 -0
  28. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/indexers/__init__.py +0 -0
  29. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/indexers/code.py +0 -0
  30. {ragtime_cli-0.2.10 → ragtime_cli-0.2.12}/src/indexers/docs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragtime-cli
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
5
5
  Author-email: Bret Martineau <bretwardjames@gmail.com>
6
6
  License-Expression: MIT
@@ -121,6 +121,10 @@ ragtime search "useAsyncState" --type code
121
121
  # Search only docs
122
122
  ragtime search "authentication" --type docs --namespace app
123
123
 
124
+ # Hybrid search: semantic + keyword filtering
125
+ # Use -r/--require to ensure terms appear in results
126
+ ragtime search "error handling" -r mobile -r dart
127
+
124
128
  # Reindex memory files
125
129
  ragtime reindex
126
130
 
@@ -233,9 +237,9 @@ ragtime setup-ghp
233
237
 
234
238
  ```yaml
235
239
  docs:
236
- paths: ["docs", ".ragtime"]
240
+ paths: ["docs"]
237
241
  patterns: ["**/*.md"]
238
- exclude: ["**/node_modules/**"]
242
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
239
243
 
240
244
  code:
241
245
  paths: ["."]
@@ -259,6 +263,20 @@ This is intentional - embeddings work better on focused summaries than large cod
259
263
 
260
264
  For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
261
265
 
266
+ ### Hybrid Search
267
+
268
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
269
+
270
+ ```bash
271
+ # CLI
272
+ ragtime search "error handling" -r mobile -r dart
273
+
274
+ # MCP
275
+ search(query="error handling", require_terms=["mobile", "dart"])
276
+ ```
277
+
278
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
279
+
262
280
  ## Code Indexing
263
281
 
264
282
  The code indexer extracts meaningful symbols from your codebase:
@@ -91,6 +91,10 @@ ragtime search "useAsyncState" --type code
91
91
  # Search only docs
92
92
  ragtime search "authentication" --type docs --namespace app
93
93
 
94
+ # Hybrid search: semantic + keyword filtering
95
+ # Use -r/--require to ensure terms appear in results
96
+ ragtime search "error handling" -r mobile -r dart
97
+
94
98
  # Reindex memory files
95
99
  ragtime reindex
96
100
 
@@ -203,9 +207,9 @@ ragtime setup-ghp
203
207
 
204
208
  ```yaml
205
209
  docs:
206
- paths: ["docs", ".ragtime"]
210
+ paths: ["docs"]
207
211
  patterns: ["**/*.md"]
208
- exclude: ["**/node_modules/**"]
212
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
209
213
 
210
214
  code:
211
215
  paths: ["."]
@@ -229,6 +233,20 @@ This is intentional - embeddings work better on focused summaries than large cod
229
233
 
230
234
  For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
231
235
 
236
+ ### Hybrid Search
237
+
238
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
239
+
240
+ ```bash
241
+ # CLI
242
+ ragtime search "error handling" -r mobile -r dart
243
+
244
+ # MCP
245
+ search(query="error handling", require_terms=["mobile", "dart"])
246
+ ```
247
+
248
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
249
+
232
250
  ## Code Indexing
233
251
 
234
252
  The code indexer extracts meaningful symbols from your codebase:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ragtime-cli"
3
- version = "0.2.10"
3
+ version = "0.2.12"
4
4
  description = "Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge"
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragtime-cli
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
5
5
  Author-email: Bret Martineau <bretwardjames@gmail.com>
6
6
  License-Expression: MIT
@@ -121,6 +121,10 @@ ragtime search "useAsyncState" --type code
121
121
  # Search only docs
122
122
  ragtime search "authentication" --type docs --namespace app
123
123
 
124
+ # Hybrid search: semantic + keyword filtering
125
+ # Use -r/--require to ensure terms appear in results
126
+ ragtime search "error handling" -r mobile -r dart
127
+
124
128
  # Reindex memory files
125
129
  ragtime reindex
126
130
 
@@ -233,9 +237,9 @@ ragtime setup-ghp
233
237
 
234
238
  ```yaml
235
239
  docs:
236
- paths: ["docs", ".ragtime"]
240
+ paths: ["docs"]
237
241
  patterns: ["**/*.md"]
238
- exclude: ["**/node_modules/**"]
242
+ exclude: ["**/node_modules/**", "**/.ragtime/**"]
239
243
 
240
244
  code:
241
245
  paths: ["."]
@@ -259,6 +263,20 @@ This is intentional - embeddings work better on focused summaries than large cod
259
263
 
260
264
  For Claude/MCP usage: The search tool description instructs Claude to read returned file paths for full implementations before making code changes.
261
265
 
266
+ ### Hybrid Search
267
+
268
+ Semantic search can lose qualifiers - "error handling in mobile app" might return web app results because "error handling" dominates the embedding. Use `require_terms` to ensure specific words appear:
269
+
270
+ ```bash
271
+ # CLI
272
+ ragtime search "error handling" -r mobile -r dart
273
+
274
+ # MCP
275
+ search(query="error handling", require_terms=["mobile", "dart"])
276
+ ```
277
+
278
+ This combines semantic similarity (finds conceptually related content) with keyword filtering (ensures qualifiers aren't ignored).
279
+
262
280
  ## Code Indexing
263
281
 
264
282
  The code indexer extracts meaningful symbols from your codebase:
@@ -469,12 +469,19 @@ def index(path: Path, index_type: str, clear: bool):
469
469
  @click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
470
470
  @click.option("--type", "type_filter", type=click.Choice(["all", "docs", "code"]), default="all")
471
471
  @click.option("--namespace", "-n", help="Filter by namespace")
472
+ @click.option("--require", "-r", "require_terms", multiple=True,
473
+ help="Terms that MUST appear in results (repeatable)")
472
474
  @click.option("--include-archive", is_flag=True, help="Also search archived branches")
473
475
  @click.option("--limit", "-l", default=5, help="Max results")
474
476
  @click.option("--verbose", "-v", is_flag=True, help="Show full content")
475
477
  def search(query: str, path: Path, type_filter: str, namespace: str,
476
- include_archive: bool, limit: int, verbose: bool):
477
- """Search indexed content."""
478
+ require_terms: tuple, include_archive: bool, limit: int, verbose: bool):
479
+ """
480
+ Hybrid search: semantic similarity + keyword filtering.
481
+
482
+ Use --require/-r to ensure specific terms appear in results.
483
+ Example: ragtime search "error handling" -r mobile -r dart
484
+ """
478
485
  path = Path(path).resolve()
479
486
  db = get_db(path)
480
487
 
@@ -485,6 +492,7 @@ def search(query: str, path: Path, type_filter: str, namespace: str,
485
492
  limit=limit,
486
493
  type_filter=type_arg,
487
494
  namespace=namespace,
495
+ require_terms=list(require_terms) if require_terms else None,
488
496
  )
489
497
 
490
498
  if not results:
@@ -726,6 +734,51 @@ def reindex(path: Path):
726
734
  click.echo(f"✓ Reindexed {count} memory files")
727
735
 
728
736
 
737
+ @main.command()
738
+ @click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
739
+ @click.option("--dry-run", is_flag=True, help="Show duplicates without removing them")
740
+ def dedupe(path: Path, dry_run: bool):
741
+ """Remove duplicate entries from the index.
742
+
743
+ Keeps one entry per unique file path, removing duplicates created
744
+ by older versions of reindex that generated random IDs.
745
+ """
746
+ path = Path(path).resolve()
747
+ db = get_db(path)
748
+
749
+ # Get all entries with their file paths
750
+ results = db.collection.get(include=["metadatas"])
751
+
752
+ # Group by file path
753
+ by_file: dict[str, list[str]] = {}
754
+ for i, mem_id in enumerate(results["ids"]):
755
+ file_path = results["metadatas"][i].get("file", "")
756
+ if file_path:
757
+ if file_path not in by_file:
758
+ by_file[file_path] = []
759
+ by_file[file_path].append(mem_id)
760
+
761
+ # Find duplicates
762
+ duplicates_to_remove = []
763
+ for file_path, ids in by_file.items():
764
+ if len(ids) > 1:
765
+ # Keep the first one, remove the rest
766
+ duplicates_to_remove.extend(ids[1:])
767
+ if dry_run:
768
+ click.echo(f" {file_path}: {len(ids)} copies (would remove {len(ids) - 1})")
769
+
770
+ if not duplicates_to_remove:
771
+ click.echo("✓ No duplicates found")
772
+ return
773
+
774
+ if dry_run:
775
+ click.echo(f"\nWould remove {len(duplicates_to_remove)} duplicate entries")
776
+ click.echo("Run without --dry-run to remove them")
777
+ else:
778
+ db.delete(duplicates_to_remove)
779
+ click.echo(f"✓ Removed {len(duplicates_to_remove)} duplicate entries")
780
+
781
+
729
782
  @main.command("new-branch")
730
783
  @click.argument("issue", type=int)
731
784
  @click.option("--path", type=click.Path(exists=True, path_type=Path), default=".")
@@ -12,13 +12,14 @@ import yaml
12
12
  @dataclass
13
13
  class DocsConfig:
14
14
  """Configuration for docs indexing."""
15
- paths: list[str] = field(default_factory=lambda: ["docs", ".ragtime"])
15
+ # Note: .ragtime/ is NOT included here - memories are indexed separately via 'reindex'
16
+ # to avoid duplicate entries (same file indexed as both doc and memory)
17
+ paths: list[str] = field(default_factory=lambda: ["docs"])
16
18
  patterns: list[str] = field(default_factory=lambda: ["**/*.md"])
17
19
  exclude: list[str] = field(default_factory=lambda: [
18
20
  "**/node_modules/**",
19
21
  "**/.git/**",
20
- "**/.ragtime/index/**",
21
- "**/.ragtime/branches/.*", # Exclude synced (dot-prefixed) branches
22
+ "**/.ragtime/**", # Memories indexed separately
22
23
  ])
23
24
 
24
25
 
@@ -84,16 +84,20 @@ class RagtimeDB:
84
84
  limit: int = 10,
85
85
  type_filter: str | None = None,
86
86
  namespace: str | None = None,
87
+ require_terms: list[str] | None = None,
87
88
  **filters,
88
89
  ) -> list[dict]:
89
90
  """
90
- Semantic search over indexed content.
91
+ Hybrid search: semantic similarity + keyword filtering.
91
92
 
92
93
  Args:
93
94
  query: Natural language search query
94
95
  limit: Max results to return
95
96
  type_filter: "code" or "docs" (None = both)
96
97
  namespace: Filter by namespace (for docs)
98
+ require_terms: List of terms that MUST appear in results (case-insensitive).
99
+ Use for scoped queries like "error handling in mobile" with
100
+ require_terms=["mobile"] to ensure "mobile" isn't ignored.
97
101
  **filters: Additional metadata filters (None values are ignored)
98
102
 
99
103
  Returns:
@@ -121,9 +125,12 @@ class RagtimeDB:
121
125
  else:
122
126
  where = {"$and": conditions}
123
127
 
128
+ # When using require_terms, fetch more results since we'll filter some out
129
+ fetch_limit = limit * 5 if require_terms else limit
130
+
124
131
  results = self.collection.query(
125
132
  query_texts=[query],
126
- n_results=limit,
133
+ n_results=fetch_limit,
127
134
  where=where,
128
135
  )
129
136
 
@@ -131,12 +138,26 @@ class RagtimeDB:
131
138
  output = []
132
139
  if results["documents"] and results["documents"][0]:
133
140
  for i, doc in enumerate(results["documents"][0]):
141
+ # Hybrid filtering: ensure required terms appear
142
+ if require_terms:
143
+ doc_lower = doc.lower()
144
+ # Also check file path in metadata for code/file matches
145
+ file_path = (results["metadatas"][0][i].get("file", "") or "").lower()
146
+ combined_text = f"{doc_lower} {file_path}"
147
+
148
+ if not all(term.lower() in combined_text for term in require_terms):
149
+ continue
150
+
134
151
  output.append({
135
152
  "content": doc,
136
153
  "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
137
154
  "distance": results["distances"][0][i] if results["distances"] else None,
138
155
  })
139
156
 
157
+ # Stop once we have enough
158
+ if len(output) >= limit:
159
+ break
160
+
140
161
  return output
141
162
 
142
163
  def delete(self, ids: list[str]) -> None:
@@ -132,7 +132,7 @@ class RagtimeMCPServer:
132
132
  },
133
133
  {
134
134
  "name": "search",
135
- "description": "Semantic search over indexed code and docs. Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
135
+ "description": "Hybrid search over indexed code and docs (semantic + keyword). Returns function signatures, class definitions, and doc summaries with file paths and line numbers. IMPORTANT: Results are summaries only - use the Read tool on returned file paths to see full implementations before making code changes or decisions.",
136
136
  "inputSchema": {
137
137
  "type": "object",
138
138
  "properties": {
@@ -152,6 +152,11 @@ class RagtimeMCPServer:
152
152
  "type": "string",
153
153
  "description": "Filter by component"
154
154
  },
155
+ "require_terms": {
156
+ "type": "array",
157
+ "items": {"type": "string"},
158
+ "description": "Terms that MUST appear in results (case-insensitive). Use for scoped queries like 'error handling in mobile' with require_terms=['mobile'] to ensure the qualifier isn't lost in semantic search."
159
+ },
155
160
  "limit": {
156
161
  "type": "integer",
157
162
  "default": 10,
@@ -333,13 +338,14 @@ class RagtimeMCPServer:
333
338
  }
334
339
 
335
340
  def _search(self, args: dict) -> dict:
336
- """Search indexed content."""
341
+ """Search indexed content with hybrid semantic + keyword matching."""
337
342
  results = self.db.search(
338
343
  query=args["query"],
339
344
  limit=args.get("limit", 10),
340
345
  namespace=args.get("namespace"),
341
346
  type_filter=args.get("type"),
342
347
  component=args.get("component"),
348
+ require_terms=args.get("require_terms"),
343
349
  )
344
350
 
345
351
  return {
@@ -487,7 +493,7 @@ class RagtimeMCPServer:
487
493
  "protocolVersion": "2024-11-05",
488
494
  "serverInfo": {
489
495
  "name": "ragtime",
490
- "version": "0.2.10",
496
+ "version": "0.2.12",
491
497
  },
492
498
  "capabilities": {
493
499
  "tools": {},
@@ -10,6 +10,7 @@ from dataclasses import dataclass, field
10
10
  from datetime import date
11
11
  from typing import Optional
12
12
  import uuid
13
+ import hashlib
13
14
  import re
14
15
  import yaml
15
16
 
@@ -139,8 +140,19 @@ class Memory:
139
140
  except ValueError:
140
141
  pass # path not relative to base, will regenerate
141
142
 
143
+ # Use frontmatter ID if present, otherwise derive stable ID from file path
144
+ # This ensures reindex is idempotent - same file always gets same ID
145
+ if "id" in frontmatter:
146
+ memory_id = frontmatter["id"]
147
+ elif file_path:
148
+ # Stable hash of relative path
149
+ memory_id = hashlib.sha256(file_path.encode()).hexdigest()[:8]
150
+ else:
151
+ # Fallback: hash of absolute path
152
+ memory_id = hashlib.sha256(str(path).encode()).hexdigest()[:8]
153
+
142
154
  return cls(
143
- id=frontmatter.get("id", str(uuid.uuid4())[:8]),
155
+ id=memory_id,
144
156
  content=content,
145
157
  namespace=frontmatter.get("namespace", "app"),
146
158
  type=frontmatter.get("type", "unknown"),
@@ -207,25 +219,41 @@ class MemoryStore:
207
219
 
208
220
  def get(self, memory_id: str) -> Optional[Memory]:
209
221
  """Get a memory by ID."""
210
- # Search in ChromaDB to find the file
211
- results = self.db.collection.get(ids=[memory_id])
222
+ # Search in ChromaDB to find the memory
223
+ results = self.db.collection.get(ids=[memory_id], include=["documents", "metadatas"])
212
224
 
213
225
  if not results["ids"]:
214
226
  return None
215
227
 
216
228
  metadata = results["metadatas"][0]
229
+ content = results["documents"][0] if results["documents"] else ""
217
230
  file_rel_path = metadata.get("file", "")
218
231
 
219
- if not file_rel_path:
220
- return None
232
+ # Try to read from file first (has full frontmatter data)
233
+ if file_rel_path:
234
+ file_path = self.memory_dir / file_rel_path
235
+ if file_path.exists():
236
+ return Memory.from_file(file_path, relative_to=self.memory_dir)
221
237
 
222
- file_path = self.memory_dir / file_rel_path
223
-
224
- if file_path.exists():
225
- # Pass relative_to so the memory preserves its actual file path
226
- return Memory.from_file(file_path, relative_to=self.memory_dir)
227
-
228
- return None
238
+ # Fall back to constructing from ChromaDB data
239
+ # This handles cases where file path is wrong or file was deleted
240
+ return Memory(
241
+ id=memory_id,
242
+ content=content,
243
+ namespace=metadata.get("namespace", "unknown"),
244
+ type=metadata.get("type", "unknown"),
245
+ component=metadata.get("component"),
246
+ confidence=metadata.get("confidence", "medium"),
247
+ confidence_reason=metadata.get("confidence_reason"),
248
+ source=metadata.get("source", "unknown"),
249
+ status=metadata.get("status", "active"),
250
+ added=metadata.get("added", ""),
251
+ author=metadata.get("author"),
252
+ issue=metadata.get("issue"),
253
+ epic=metadata.get("epic"),
254
+ branch=metadata.get("branch"),
255
+ _file_path=file_rel_path,
256
+ )
229
257
 
230
258
  def delete(self, memory_id: str) -> bool:
231
259
  """Delete a memory by ID."""
@@ -322,10 +350,13 @@ class MemoryStore:
322
350
  if component:
323
351
  conditions.append({"component": component})
324
352
 
353
+ # Exclude docs/code entries - they use type="docs" or type="code"
354
+ # while memories use types like "architecture", "feature", etc.
355
+ # This is especially important for wildcard queries
356
+ conditions.append({"type": {"$nin": ["docs", "code"]}})
357
+
325
358
  # Build where clause with $and if multiple conditions
326
- if len(conditions) == 0:
327
- where = None
328
- elif len(conditions) == 1:
359
+ if len(conditions) == 1:
329
360
  where = conditions[0]
330
361
  else:
331
362
  where = {"$and": conditions}
File without changes
File without changes