ragtime-cli 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ragtime-cli might be problematic. Click here for more details.
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/METADATA +1 -1
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/RECORD +13 -13
- src/cli.py +173 -62
- src/config.py +12 -0
- src/db.py +71 -9
- src/indexers/code.py +49 -16
- src/indexers/docs.py +6 -1
- src/mcp_server.py +13 -3
- src/memory.py +30 -9
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/WHEEL +0 -0
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/entry_points.txt +0 -0
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {ragtime_cli-0.2.6.dist-info → ragtime_cli-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragtime-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
|
|
5
5
|
Author-email: Bret Martineau <bretwardjames@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
ragtime_cli-0.2.
|
|
1
|
+
ragtime_cli-0.2.8.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
|
|
2
2
|
src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
src/cli.py,sha256=
|
|
4
|
-
src/config.py,sha256=
|
|
5
|
-
src/db.py,sha256=
|
|
6
|
-
src/mcp_server.py,sha256=
|
|
7
|
-
src/memory.py,sha256=
|
|
3
|
+
src/cli.py,sha256=qO7tQjzMF9wm5PmBpglCaskEzndyVMk5OMJ5tB69rCs,73978
|
|
4
|
+
src/config.py,sha256=qyn5ADjPwvQlhwSJHwzat1v449b7MKEzIHr37Q5qhCc,4497
|
|
5
|
+
src/db.py,sha256=Lm3zrj-6KBw4YlSCW17fu5GlaVqrK6AFw06xaW8E-Yw,6648
|
|
6
|
+
src/mcp_server.py,sha256=Ta_v8SgjTI3qXvJvJ9laOEbRqYCdHKBJXB-NmFpvFuw,20791
|
|
7
|
+
src/memory.py,sha256=byzjAu2YaL9u4vNm6MLmaxeOWrg6RqRn5eJlIwxkEzA,12990
|
|
8
8
|
src/commands/audit.md,sha256=Xkucm-gfBIMalK9wf7NBbyejpsqBTUAGGlb7GxMtMPY,5137
|
|
9
9
|
src/commands/create-pr.md,sha256=u6-jVkDP_6bJQp6ImK039eY9F6B9E2KlAVlvLY-WV6Q,9483
|
|
10
10
|
src/commands/generate-docs.md,sha256=9W2Yy-PDyC3p5k39uEb31z5YAHkSKsQLg6gV3tLgSnQ,7015
|
|
@@ -16,10 +16,10 @@ src/commands/remember.md,sha256=nNewsUhIqF4wtD1jhVDZvmLZjdcmPN6NmUM43SdWepc,5368
|
|
|
16
16
|
src/commands/save.md,sha256=7gTpW46AU9Y4l8XVZ8f4h1sEdBfVqIRA7hlidUxMAC4,251
|
|
17
17
|
src/commands/start.md,sha256=qoqhkMgET74DBx8YPIT1-wqCiVBUDxlmevigsCinHSY,6506
|
|
18
18
|
src/indexers/__init__.py,sha256=MYoCPZUpHakMX1s2vWnc9shjWfx_X1_0JzUhpKhnKUQ,454
|
|
19
|
-
src/indexers/code.py,sha256=
|
|
20
|
-
src/indexers/docs.py,sha256=
|
|
21
|
-
ragtime_cli-0.2.
|
|
22
|
-
ragtime_cli-0.2.
|
|
23
|
-
ragtime_cli-0.2.
|
|
24
|
-
ragtime_cli-0.2.
|
|
25
|
-
ragtime_cli-0.2.
|
|
19
|
+
src/indexers/code.py,sha256=G2TbiKbWj0e7DV5KsU8-Ggw6ziDb4zTuZ4Bu3ryV4g8,18059
|
|
20
|
+
src/indexers/docs.py,sha256=nyewQ4Ug4SCuhne4TuLDlUDzz9GH2STInddj81ocz50,3555
|
|
21
|
+
ragtime_cli-0.2.8.dist-info/METADATA,sha256=at8aPrC2FKRCn2MKj73K2aICXbXdqhreZb1w0M5DIwg,9875
|
|
22
|
+
ragtime_cli-0.2.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
23
|
+
ragtime_cli-0.2.8.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
|
|
24
|
+
ragtime_cli-0.2.8.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
|
|
25
|
+
ragtime_cli-0.2.8.dist-info/RECORD,,
|
src/cli.py
CHANGED
|
@@ -75,6 +75,7 @@ def check_ghp_installed() -> bool:
|
|
|
75
75
|
|
|
76
76
|
def get_issue_from_ghp(issue_num: int, path: Path) -> dict | None:
|
|
77
77
|
"""Get issue details using ghp issue open."""
|
|
78
|
+
import json
|
|
78
79
|
try:
|
|
79
80
|
result = subprocess.run(
|
|
80
81
|
["ghp", "issue", "open", str(issue_num), "--json"],
|
|
@@ -84,15 +85,15 @@ def get_issue_from_ghp(issue_num: int, path: Path) -> dict | None:
|
|
|
84
85
|
timeout=30,
|
|
85
86
|
)
|
|
86
87
|
if result.returncode == 0:
|
|
87
|
-
import json
|
|
88
88
|
return json.loads(result.stdout)
|
|
89
|
-
except (subprocess.TimeoutExpired, FileNotFoundError,
|
|
89
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError):
|
|
90
90
|
pass
|
|
91
91
|
return None
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def get_issue_from_gh(issue_num: int, path: Path) -> dict | None:
|
|
95
95
|
"""Get issue details using gh CLI."""
|
|
96
|
+
import json
|
|
96
97
|
try:
|
|
97
98
|
result = subprocess.run(
|
|
98
99
|
["gh", "issue", "view", str(issue_num), "--json", "title,body,labels,number"],
|
|
@@ -102,9 +103,8 @@ def get_issue_from_gh(issue_num: int, path: Path) -> dict | None:
|
|
|
102
103
|
timeout=30,
|
|
103
104
|
)
|
|
104
105
|
if result.returncode == 0:
|
|
105
|
-
import json
|
|
106
106
|
return json.loads(result.stdout)
|
|
107
|
-
except (subprocess.TimeoutExpired, FileNotFoundError,
|
|
107
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError):
|
|
108
108
|
pass
|
|
109
109
|
return None
|
|
110
110
|
|
|
@@ -169,7 +169,7 @@ def get_remote_branches_with_ragtime(path: Path) -> list[str]:
|
|
|
169
169
|
|
|
170
170
|
|
|
171
171
|
@click.group()
|
|
172
|
-
@click.version_option(version="0.2.
|
|
172
|
+
@click.version_option(version="0.2.8")
|
|
173
173
|
def main():
|
|
174
174
|
"""Ragtime - semantic search over code and documentation."""
|
|
175
175
|
pass
|
|
@@ -258,12 +258,73 @@ Add your team's conventions above. Each rule should be:
|
|
|
258
258
|
click.echo(f" Install for enhanced workflow: npm install -g @bretwardjames/ghp-cli")
|
|
259
259
|
|
|
260
260
|
|
|
261
|
+
# Batch size for ChromaDB upserts (embedding computation happens here)
|
|
262
|
+
INDEX_BATCH_SIZE = 100
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _upsert_entries(db, entries, entry_type: str = "docs", label: str = " Embedding"):
|
|
266
|
+
"""Upsert entries to ChromaDB in batches with progress bar."""
|
|
267
|
+
if not entries:
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
# Process in batches with progress feedback
|
|
271
|
+
batches = [entries[i:i + INDEX_BATCH_SIZE] for i in range(0, len(entries), INDEX_BATCH_SIZE)]
|
|
272
|
+
|
|
273
|
+
with click.progressbar(
|
|
274
|
+
batches,
|
|
275
|
+
label=label,
|
|
276
|
+
show_percent=True,
|
|
277
|
+
show_pos=True,
|
|
278
|
+
item_show_func=lambda b: f"{len(b)} items" if b else "",
|
|
279
|
+
) as batch_iter:
|
|
280
|
+
for batch in batch_iter:
|
|
281
|
+
if entry_type == "code":
|
|
282
|
+
ids = [f"{e.file_path}:{e.line_number}:{e.symbol_name}" for e in batch]
|
|
283
|
+
else:
|
|
284
|
+
ids = [e.file_path for e in batch]
|
|
285
|
+
|
|
286
|
+
documents = [e.content for e in batch]
|
|
287
|
+
metadatas = [e.to_metadata() for e in batch]
|
|
288
|
+
db.upsert(ids=ids, documents=documents, metadatas=metadatas)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _get_files_to_process(
|
|
292
|
+
all_files: list[Path],
|
|
293
|
+
indexed_files: dict[str, float],
|
|
294
|
+
) -> tuple[list[Path], list[str]]:
|
|
295
|
+
"""
|
|
296
|
+
Compare files on disk with indexed files to determine what needs processing.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
(files_to_index, files_to_delete)
|
|
300
|
+
"""
|
|
301
|
+
disk_files = {str(f): os.path.getmtime(f) for f in all_files}
|
|
302
|
+
|
|
303
|
+
to_index = []
|
|
304
|
+
for file_path in all_files:
|
|
305
|
+
path_str = str(file_path)
|
|
306
|
+
disk_mtime = disk_files[path_str]
|
|
307
|
+
indexed_mtime = indexed_files.get(path_str, 0.0)
|
|
308
|
+
|
|
309
|
+
# Index if new or modified (with 1-second tolerance for filesystem precision)
|
|
310
|
+
if disk_mtime > indexed_mtime + 1.0:
|
|
311
|
+
to_index.append(file_path)
|
|
312
|
+
|
|
313
|
+
# Find deleted files (in index but not on disk)
|
|
314
|
+
to_delete = [f for f in indexed_files.keys() if f not in disk_files]
|
|
315
|
+
|
|
316
|
+
return to_index, to_delete
|
|
317
|
+
|
|
318
|
+
|
|
261
319
|
@main.command()
|
|
262
320
|
@click.argument("path", type=click.Path(exists=True, path_type=Path), default=".")
|
|
263
321
|
@click.option("--type", "index_type", type=click.Choice(["all", "docs", "code"]), default="all")
|
|
264
322
|
@click.option("--clear", is_flag=True, help="Clear existing index before indexing")
|
|
265
323
|
def index(path: Path, index_type: str, clear: bool):
|
|
266
|
-
"""Index a project directory.
|
|
324
|
+
"""Index a project directory.
|
|
325
|
+
|
|
326
|
+
Without --clear, performs incremental indexing (only changed files).
|
|
327
|
+
"""
|
|
267
328
|
path = path.resolve()
|
|
268
329
|
db = get_db(path)
|
|
269
330
|
config = RagtimeConfig.load(path)
|
|
@@ -276,7 +337,10 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
276
337
|
db.clear(type_filter=index_type)
|
|
277
338
|
|
|
278
339
|
if index_type in ("all", "docs"):
|
|
279
|
-
#
|
|
340
|
+
# Get currently indexed docs
|
|
341
|
+
indexed_docs = {} if clear else db.get_indexed_files("docs")
|
|
342
|
+
|
|
343
|
+
# Discover all doc files
|
|
280
344
|
all_doc_files = []
|
|
281
345
|
for docs_path in config.docs.paths:
|
|
282
346
|
docs_root = path / docs_path
|
|
@@ -290,39 +354,55 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
290
354
|
)
|
|
291
355
|
all_doc_files.extend(files)
|
|
292
356
|
|
|
293
|
-
if all_doc_files:
|
|
357
|
+
if all_doc_files or indexed_docs:
|
|
358
|
+
# Determine what needs processing
|
|
359
|
+
to_index, to_delete = _get_files_to_process(all_doc_files, indexed_docs)
|
|
360
|
+
|
|
294
361
|
click.echo(f"Found {len(all_doc_files)} doc files")
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
362
|
+
if not clear:
|
|
363
|
+
unchanged = len(all_doc_files) - len(to_index)
|
|
364
|
+
if unchanged > 0:
|
|
365
|
+
click.echo(f" {unchanged} unchanged, {len(to_index)} to index")
|
|
366
|
+
if to_delete:
|
|
367
|
+
click.echo(f" {len(to_delete)} to remove (deleted from disk)")
|
|
368
|
+
|
|
369
|
+
# Delete removed files
|
|
370
|
+
if to_delete:
|
|
371
|
+
db.delete_by_file(to_delete, "docs")
|
|
372
|
+
|
|
373
|
+
# Index new/changed files
|
|
374
|
+
if to_index:
|
|
375
|
+
entries = []
|
|
376
|
+
with click.progressbar(
|
|
377
|
+
to_index,
|
|
378
|
+
label=" Parsing",
|
|
379
|
+
show_percent=True,
|
|
380
|
+
show_pos=True,
|
|
381
|
+
item_show_func=lambda f: f.name[:30] if f else "",
|
|
382
|
+
) as files:
|
|
383
|
+
for file_path in files:
|
|
384
|
+
entry = index_doc_file(file_path)
|
|
385
|
+
if entry:
|
|
386
|
+
entries.append(entry)
|
|
387
|
+
|
|
388
|
+
if entries:
|
|
389
|
+
_upsert_entries(db, entries, "docs")
|
|
390
|
+
click.echo(f" Indexed {len(entries)} documents")
|
|
391
|
+
elif not to_delete:
|
|
392
|
+
click.echo(" All docs up to date")
|
|
316
393
|
else:
|
|
317
394
|
click.echo(" No documents found")
|
|
318
395
|
|
|
319
396
|
if index_type in ("all", "code"):
|
|
397
|
+
# Get currently indexed code files
|
|
398
|
+
indexed_code = {} if clear else db.get_indexed_files("code")
|
|
399
|
+
|
|
320
400
|
# Build exclusion list for code
|
|
321
401
|
code_exclude = list(config.code.exclude)
|
|
322
402
|
for docs_path in config.docs.paths:
|
|
323
403
|
code_exclude.append(f"**/{docs_path}/**")
|
|
324
404
|
|
|
325
|
-
# Discover all code files
|
|
405
|
+
# Discover all code files
|
|
326
406
|
all_code_files = []
|
|
327
407
|
for code_path_str in config.code.paths:
|
|
328
408
|
code_root = path / code_path_str
|
|
@@ -336,36 +416,47 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
336
416
|
)
|
|
337
417
|
all_code_files.extend(files)
|
|
338
418
|
|
|
339
|
-
if all_code_files:
|
|
419
|
+
if all_code_files or indexed_code:
|
|
420
|
+
# Determine what needs processing
|
|
421
|
+
to_index, to_delete = _get_files_to_process(all_code_files, indexed_code)
|
|
422
|
+
|
|
340
423
|
click.echo(f"Found {len(all_code_files)} code files")
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
if
|
|
354
|
-
|
|
355
|
-
ids = [f"{e.file_path}:{e.line_number}:{e.symbol_name}" for e in total_entries]
|
|
356
|
-
documents = [e.content for e in total_entries]
|
|
357
|
-
metadatas = [e.to_metadata() for e in total_entries]
|
|
358
|
-
db.upsert(ids=ids, documents=documents, metadatas=metadatas)
|
|
359
|
-
click.echo(f" Indexed {len(total_entries)} code symbols")
|
|
360
|
-
|
|
361
|
-
# Show breakdown by type
|
|
424
|
+
if not clear:
|
|
425
|
+
unchanged = len(all_code_files) - len(to_index)
|
|
426
|
+
if unchanged > 0:
|
|
427
|
+
click.echo(f" {unchanged} unchanged, {len(to_index)} to index")
|
|
428
|
+
if to_delete:
|
|
429
|
+
click.echo(f" {len(to_delete)} to remove (deleted from disk)")
|
|
430
|
+
|
|
431
|
+
# Delete removed files
|
|
432
|
+
if to_delete:
|
|
433
|
+
db.delete_by_file(to_delete, "code")
|
|
434
|
+
|
|
435
|
+
# Index new/changed files
|
|
436
|
+
if to_index:
|
|
437
|
+
entries = []
|
|
362
438
|
by_type = {}
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
439
|
+
with click.progressbar(
|
|
440
|
+
to_index,
|
|
441
|
+
label=" Parsing",
|
|
442
|
+
show_percent=True,
|
|
443
|
+
show_pos=True,
|
|
444
|
+
item_show_func=lambda f: f.name[:30] if f else "",
|
|
445
|
+
) as files:
|
|
446
|
+
for file_path in files:
|
|
447
|
+
file_entries = index_code_file(file_path)
|
|
448
|
+
for entry in file_entries:
|
|
449
|
+
entries.append(entry)
|
|
450
|
+
by_type[entry.symbol_type] = by_type.get(entry.symbol_type, 0) + 1
|
|
451
|
+
|
|
452
|
+
if entries:
|
|
453
|
+
click.echo(f" Found {len(entries)} symbols")
|
|
454
|
+
_upsert_entries(db, entries, "code")
|
|
455
|
+
click.echo(f" Indexed {len(entries)} code symbols")
|
|
456
|
+
breakdown = ", ".join(f"{count} {typ}s" for typ, count in sorted(by_type.items()))
|
|
457
|
+
click.echo(f" ({breakdown})")
|
|
458
|
+
elif not to_delete:
|
|
459
|
+
click.echo(" All code up to date")
|
|
369
460
|
else:
|
|
370
461
|
click.echo(" No code files found")
|
|
371
462
|
|
|
@@ -1163,7 +1254,6 @@ def daemon_start(path: Path, interval: str):
|
|
|
1163
1254
|
pid_file.write_text(str(os.getpid()))
|
|
1164
1255
|
|
|
1165
1256
|
# Redirect output to log file
|
|
1166
|
-
# Note: log_fd is intentionally kept open for the lifetime of the daemon
|
|
1167
1257
|
log_fd = open(log_file, "a")
|
|
1168
1258
|
os.dup2(log_fd.fileno(), sys.stdout.fileno())
|
|
1169
1259
|
os.dup2(log_fd.fileno(), sys.stderr.fileno())
|
|
@@ -1171,9 +1261,20 @@ def daemon_start(path: Path, interval: str):
|
|
|
1171
1261
|
import time
|
|
1172
1262
|
from datetime import datetime
|
|
1173
1263
|
|
|
1264
|
+
# Set up signal handler for clean shutdown
|
|
1265
|
+
running = True
|
|
1266
|
+
|
|
1267
|
+
def handle_shutdown(signum, frame):
|
|
1268
|
+
nonlocal running
|
|
1269
|
+
running = False
|
|
1270
|
+
print(f"\n[{datetime.now().isoformat()}] Received signal {signum}, shutting down...")
|
|
1271
|
+
|
|
1272
|
+
signal.signal(signal.SIGTERM, handle_shutdown)
|
|
1273
|
+
signal.signal(signal.SIGINT, handle_shutdown)
|
|
1274
|
+
|
|
1174
1275
|
print(f"\n[{datetime.now().isoformat()}] Daemon started (interval: {interval})")
|
|
1175
1276
|
|
|
1176
|
-
while
|
|
1277
|
+
while running:
|
|
1177
1278
|
try:
|
|
1178
1279
|
print(f"[{datetime.now().isoformat()}] Running sync...")
|
|
1179
1280
|
|
|
@@ -1196,7 +1297,17 @@ def daemon_start(path: Path, interval: str):
|
|
|
1196
1297
|
except Exception as e:
|
|
1197
1298
|
print(f"[{datetime.now().isoformat()}] Error: {e}")
|
|
1198
1299
|
|
|
1199
|
-
|
|
1300
|
+
# Sleep in small increments to respond to signals faster
|
|
1301
|
+
for _ in range(interval_seconds):
|
|
1302
|
+
if not running:
|
|
1303
|
+
break
|
|
1304
|
+
time.sleep(1)
|
|
1305
|
+
|
|
1306
|
+
# Clean up
|
|
1307
|
+
print(f"[{datetime.now().isoformat()}] Daemon stopped")
|
|
1308
|
+
log_fd.close()
|
|
1309
|
+
if pid_file.exists():
|
|
1310
|
+
pid_file.unlink()
|
|
1200
1311
|
|
|
1201
1312
|
|
|
1202
1313
|
@daemon.command("stop")
|
|
@@ -2026,7 +2137,7 @@ def update(check: bool):
|
|
|
2026
2137
|
from urllib.request import urlopen
|
|
2027
2138
|
from urllib.error import URLError
|
|
2028
2139
|
|
|
2029
|
-
current = "0.2.
|
|
2140
|
+
current = "0.2.8"
|
|
2030
2141
|
|
|
2031
2142
|
click.echo(f"Current version: {current}")
|
|
2032
2143
|
click.echo("Checking PyPI for updates...")
|
src/config.py
CHANGED
|
@@ -33,6 +33,18 @@ class CodeConfig:
|
|
|
33
33
|
"**/build/**",
|
|
34
34
|
"**/dist/**",
|
|
35
35
|
"**/.dart_tool/**",
|
|
36
|
+
# Generated code (Prisma, GraphQL, OpenAPI, etc.)
|
|
37
|
+
"**/generated/**",
|
|
38
|
+
"**/*.generated.*",
|
|
39
|
+
"**/*.g.dart",
|
|
40
|
+
# TypeScript declaration files (often auto-generated)
|
|
41
|
+
"**/*.d.ts",
|
|
42
|
+
# Test files (usually not needed in search)
|
|
43
|
+
"**/__tests__/**",
|
|
44
|
+
"**/*.test.*",
|
|
45
|
+
"**/*.spec.*",
|
|
46
|
+
# Python init files (rarely have searchable content)
|
|
47
|
+
"**/__init__.py",
|
|
36
48
|
])
|
|
37
49
|
|
|
38
50
|
|
src/db.py
CHANGED
|
@@ -154,14 +154,76 @@ class RagtimeDB:
|
|
|
154
154
|
|
|
155
155
|
def stats(self) -> dict:
|
|
156
156
|
"""Get index statistics."""
|
|
157
|
-
|
|
157
|
+
try:
|
|
158
|
+
count = self.collection.count()
|
|
159
|
+
|
|
160
|
+
# Count by type - only retrieve IDs, not full documents
|
|
161
|
+
docs_result = self.collection.get(where={"type": "docs"}, include=[])
|
|
162
|
+
code_result = self.collection.get(where={"type": "code"}, include=[])
|
|
163
|
+
|
|
164
|
+
docs_count = len(docs_result["ids"])
|
|
165
|
+
code_count = len(code_result["ids"])
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"total": count,
|
|
169
|
+
"docs": docs_count,
|
|
170
|
+
"code": code_count,
|
|
171
|
+
}
|
|
172
|
+
except Exception:
|
|
173
|
+
# Return zeros if collection is corrupted or unavailable
|
|
174
|
+
return {
|
|
175
|
+
"total": 0,
|
|
176
|
+
"docs": 0,
|
|
177
|
+
"code": 0,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
def get_indexed_files(self, type_filter: str | None = None) -> dict[str, float]:
|
|
181
|
+
"""
|
|
182
|
+
Get all indexed files and their modification times.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
type_filter: "code" or "docs" (None = both)
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dict mapping file paths to their indexed mtime
|
|
189
|
+
"""
|
|
190
|
+
where = {"type": type_filter} if type_filter else None
|
|
191
|
+
results = self.collection.get(where=where, include=["metadatas"])
|
|
192
|
+
|
|
193
|
+
files: dict[str, float] = {}
|
|
194
|
+
for meta in results["metadatas"]:
|
|
195
|
+
file_path = meta.get("file", "")
|
|
196
|
+
mtime = meta.get("mtime", 0.0)
|
|
197
|
+
# For code files, multiple entries per file - keep max mtime
|
|
198
|
+
if file_path not in files or mtime > files[file_path]:
|
|
199
|
+
files[file_path] = mtime
|
|
200
|
+
|
|
201
|
+
return files
|
|
202
|
+
|
|
203
|
+
def delete_by_file(self, file_paths: list[str], type_filter: str | None = None) -> int:
|
|
204
|
+
"""
|
|
205
|
+
Delete all entries for the given file paths.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
file_paths: List of file paths to remove
|
|
209
|
+
type_filter: "code" or "docs" (None = both)
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Number of entries deleted
|
|
213
|
+
"""
|
|
214
|
+
if not file_paths:
|
|
215
|
+
return 0
|
|
216
|
+
|
|
217
|
+
# Build where clause
|
|
218
|
+
where = {"file": {"$in": file_paths}}
|
|
219
|
+
if type_filter:
|
|
220
|
+
where = {"$and": [{"file": {"$in": file_paths}}, {"type": type_filter}]}
|
|
221
|
+
|
|
222
|
+
# Get IDs to delete
|
|
223
|
+
results = self.collection.get(where=where)
|
|
224
|
+
ids = results["ids"]
|
|
158
225
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
code_count = len(self.collection.get(where={"type": "code"})["ids"])
|
|
226
|
+
if ids:
|
|
227
|
+
self.collection.delete(ids=ids)
|
|
162
228
|
|
|
163
|
-
return
|
|
164
|
-
"total": count,
|
|
165
|
-
"docs": docs_count,
|
|
166
|
-
"code": code_count,
|
|
167
|
-
}
|
|
229
|
+
return len(ids)
|
src/indexers/code.py
CHANGED
|
@@ -6,6 +6,7 @@ This allows searching for specific code constructs like "useAsyncState" or "JWTM
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import ast
|
|
9
|
+
import os
|
|
9
10
|
import re
|
|
10
11
|
from fnmatch import fnmatch
|
|
11
12
|
from pathlib import Path
|
|
@@ -32,6 +33,7 @@ class CodeEntry:
|
|
|
32
33
|
symbol_type: str # function, class, interface, component, etc.
|
|
33
34
|
line_number: int # Line where symbol starts
|
|
34
35
|
docstring: str | None = None # Extracted docstring/JSDoc
|
|
36
|
+
mtime: float | None = None # File modification time for incremental indexing
|
|
35
37
|
|
|
36
38
|
def to_metadata(self) -> dict:
|
|
37
39
|
"""Convert to ChromaDB metadata dict."""
|
|
@@ -42,6 +44,7 @@ class CodeEntry:
|
|
|
42
44
|
"symbol_name": self.symbol_name,
|
|
43
45
|
"symbol_type": self.symbol_type,
|
|
44
46
|
"line": self.line_number,
|
|
47
|
+
"mtime": self.mtime or 0.0,
|
|
45
48
|
}
|
|
46
49
|
|
|
47
50
|
|
|
@@ -92,14 +95,21 @@ def discover_code_files(
|
|
|
92
95
|
rel_path = str(path)
|
|
93
96
|
|
|
94
97
|
for ex in exclude:
|
|
95
|
-
# Handle ** patterns
|
|
98
|
+
# Handle ** patterns
|
|
96
99
|
if "**" in ex:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
if ex.endswith("/**"):
|
|
101
|
+
# Directory pattern: **/node_modules/** or **/generated/**
|
|
102
|
+
# Extract the directory name to match as path segment
|
|
103
|
+
dir_pattern = ex.replace("**/", "").replace("/**", "")
|
|
104
|
+
if f"/{dir_pattern}/" in f"/{rel_path}/":
|
|
105
|
+
skip = True
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
# File pattern: **/*.d.ts, **/*.test.*, **/*.generated.*
|
|
109
|
+
file_pattern = ex.replace("**/", "")
|
|
110
|
+
if fnmatch(path.name, file_pattern):
|
|
111
|
+
skip = True
|
|
112
|
+
break
|
|
103
113
|
elif fnmatch(rel_path, ex) or fnmatch(path.name, ex):
|
|
104
114
|
skip = True
|
|
105
115
|
break
|
|
@@ -239,23 +249,38 @@ def index_typescript_file(file_path: Path, content: str) -> list[CodeEntry]:
|
|
|
239
249
|
entries = []
|
|
240
250
|
lines = content.split("\n")
|
|
241
251
|
|
|
242
|
-
# Patterns for different constructs
|
|
252
|
+
# Patterns for different constructs (exported and non-exported)
|
|
243
253
|
patterns = [
|
|
244
254
|
# Exported functions
|
|
245
255
|
(r'export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*(?:<[^>]+>)?\s*\(([^)]*)\)(?:\s*:\s*([^\{]+))?',
|
|
246
256
|
"function"),
|
|
257
|
+
# Non-exported functions (top-level, not inside class/object)
|
|
258
|
+
(r'^(?:async\s+)?function\s+(\w+)\s*(?:<[^>]+>)?\s*\(',
|
|
259
|
+
"function"),
|
|
247
260
|
# Arrow function exports
|
|
248
261
|
(r'export\s+const\s+(\w+)\s*(?::\s*[^=]+)?\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*[^=]+)?\s*=>',
|
|
249
262
|
"function"),
|
|
263
|
+
# Non-exported arrow functions (top-level const)
|
|
264
|
+
(r'^const\s+(\w+)\s*(?::\s*[^=]+)?\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*[^=]+)?\s*=>',
|
|
265
|
+
"function"),
|
|
250
266
|
# Class exports
|
|
251
267
|
(r'export\s+(?:default\s+)?class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([^{]+))?',
|
|
252
268
|
"class"),
|
|
269
|
+
# Non-exported classes
|
|
270
|
+
(r'^class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([^{]+))?',
|
|
271
|
+
"class"),
|
|
253
272
|
# Interface exports
|
|
254
273
|
(r'export\s+(?:default\s+)?interface\s+(\w+)(?:<[^>]+>)?(?:\s+extends\s+([^{]+))?',
|
|
255
274
|
"interface"),
|
|
275
|
+
# Non-exported interfaces
|
|
276
|
+
(r'^interface\s+(\w+)(?:<[^>]+>)?(?:\s+extends\s+([^{]+))?',
|
|
277
|
+
"interface"),
|
|
256
278
|
# Type exports
|
|
257
279
|
(r'export\s+type\s+(\w+)(?:<[^>]+>)?\s*=',
|
|
258
280
|
"type"),
|
|
281
|
+
# Non-exported types
|
|
282
|
+
(r'^type\s+(\w+)(?:<[^>]+>)?\s*=',
|
|
283
|
+
"type"),
|
|
259
284
|
# Const exports (useful for config objects, composables, etc.)
|
|
260
285
|
(r'export\s+const\s+(\w+)\s*(?::\s*([^=]+))?\s*=\s*(?!.*=>)',
|
|
261
286
|
"constant"),
|
|
@@ -380,8 +405,9 @@ def index_dart_file(file_path: Path, content: str) -> list[CodeEntry]:
|
|
|
380
405
|
# Class definitions
|
|
381
406
|
(r'(?:abstract\s+)?class\s+(\w+)(?:<[^>]+>)?(?:\s+extends\s+(\w+))?(?:\s+with\s+([^{]+))?(?:\s+implements\s+([^{]+))?',
|
|
382
407
|
"class"),
|
|
383
|
-
#
|
|
384
|
-
|
|
408
|
+
# Top-level function definitions - explicit return types to avoid matching variable declarations
|
|
409
|
+
# The re.match already anchors at start, \s* is prepended in loop
|
|
410
|
+
(r'(?:Future<[^>]+>|Stream<[^>]+>|void|int|String|bool|double|dynamic|List<[^>]+>|Map<[^>]+>|Set<[^>]+>)\s+(\w+)\s*(?:<[^>]+>)?\s*\(',
|
|
385
411
|
"function"),
|
|
386
412
|
# Mixins
|
|
387
413
|
(r'mixin\s+(\w+)(?:\s+on\s+(\w+))?',
|
|
@@ -432,7 +458,8 @@ def index_file(file_path: Path) -> list[CodeEntry]:
|
|
|
432
458
|
"""
|
|
433
459
|
try:
|
|
434
460
|
content = file_path.read_text(encoding='utf-8')
|
|
435
|
-
|
|
461
|
+
mtime = os.path.getmtime(file_path)
|
|
462
|
+
except (IOError, UnicodeDecodeError, OSError):
|
|
436
463
|
return []
|
|
437
464
|
|
|
438
465
|
# Skip empty files
|
|
@@ -442,15 +469,21 @@ def index_file(file_path: Path) -> list[CodeEntry]:
|
|
|
442
469
|
suffix = file_path.suffix.lower()
|
|
443
470
|
|
|
444
471
|
if suffix == ".py":
|
|
445
|
-
|
|
472
|
+
entries = index_python_file(file_path, content)
|
|
446
473
|
elif suffix in [".ts", ".tsx", ".js", ".jsx"]:
|
|
447
|
-
|
|
474
|
+
entries = index_typescript_file(file_path, content)
|
|
448
475
|
elif suffix == ".vue":
|
|
449
|
-
|
|
476
|
+
entries = index_vue_file(file_path, content)
|
|
450
477
|
elif suffix == ".dart":
|
|
451
|
-
|
|
478
|
+
entries = index_dart_file(file_path, content)
|
|
479
|
+
else:
|
|
480
|
+
return []
|
|
452
481
|
|
|
453
|
-
|
|
482
|
+
# Set mtime on all entries from this file
|
|
483
|
+
for entry in entries:
|
|
484
|
+
entry.mtime = mtime
|
|
485
|
+
|
|
486
|
+
return entries
|
|
454
487
|
|
|
455
488
|
|
|
456
489
|
def index_directory(
|
src/indexers/docs.py
CHANGED
|
@@ -4,6 +4,7 @@ Docs indexer - parses markdown files with YAML frontmatter.
|
|
|
4
4
|
Designed for .claude/memory/ style files but works with any markdown.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import os
|
|
7
8
|
import re
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from dataclasses import dataclass
|
|
@@ -19,6 +20,7 @@ class DocEntry:
|
|
|
19
20
|
category: str | None = None
|
|
20
21
|
component: str | None = None
|
|
21
22
|
title: str | None = None
|
|
23
|
+
mtime: float | None = None # File modification time for incremental indexing
|
|
22
24
|
|
|
23
25
|
def to_metadata(self) -> dict:
|
|
24
26
|
"""Convert to ChromaDB metadata dict."""
|
|
@@ -29,6 +31,7 @@ class DocEntry:
|
|
|
29
31
|
"category": self.category or "",
|
|
30
32
|
"component": self.component or "",
|
|
31
33
|
"title": self.title or Path(self.file_path).stem,
|
|
34
|
+
"mtime": self.mtime or 0.0,
|
|
32
35
|
}
|
|
33
36
|
|
|
34
37
|
|
|
@@ -61,7 +64,8 @@ def index_file(file_path: Path) -> DocEntry | None:
|
|
|
61
64
|
"""
|
|
62
65
|
try:
|
|
63
66
|
content = file_path.read_text(encoding='utf-8')
|
|
64
|
-
|
|
67
|
+
mtime = os.path.getmtime(file_path)
|
|
68
|
+
except (IOError, UnicodeDecodeError, OSError):
|
|
65
69
|
return None
|
|
66
70
|
|
|
67
71
|
metadata, body = parse_frontmatter(content)
|
|
@@ -77,6 +81,7 @@ def index_file(file_path: Path) -> DocEntry | None:
|
|
|
77
81
|
category=metadata.get("category"),
|
|
78
82
|
component=metadata.get("component"),
|
|
79
83
|
title=metadata.get("title"),
|
|
84
|
+
mtime=mtime,
|
|
80
85
|
)
|
|
81
86
|
|
|
82
87
|
|
src/mcp_server.py
CHANGED
|
@@ -487,7 +487,7 @@ class RagtimeMCPServer:
|
|
|
487
487
|
"protocolVersion": "2024-11-05",
|
|
488
488
|
"serverInfo": {
|
|
489
489
|
"name": "ragtime",
|
|
490
|
-
"version": "0.2.
|
|
490
|
+
"version": "0.2.8",
|
|
491
491
|
},
|
|
492
492
|
"capabilities": {
|
|
493
493
|
"tools": {},
|
|
@@ -563,8 +563,18 @@ class RagtimeMCPServer:
|
|
|
563
563
|
sys.stdout.write(json.dumps(response) + "\n")
|
|
564
564
|
sys.stdout.flush()
|
|
565
565
|
|
|
566
|
-
except json.JSONDecodeError:
|
|
567
|
-
|
|
566
|
+
except json.JSONDecodeError as e:
|
|
567
|
+
# Log error and send JSON-RPC error response
|
|
568
|
+
error_response = {
|
|
569
|
+
"jsonrpc": "2.0",
|
|
570
|
+
"id": None,
|
|
571
|
+
"error": {
|
|
572
|
+
"code": -32700,
|
|
573
|
+
"message": f"Parse error: {e}",
|
|
574
|
+
},
|
|
575
|
+
}
|
|
576
|
+
sys.stdout.write(json.dumps(error_response) + "\n")
|
|
577
|
+
sys.stdout.flush()
|
|
568
578
|
except KeyboardInterrupt:
|
|
569
579
|
break
|
|
570
580
|
|
src/memory.py
CHANGED
|
@@ -80,18 +80,23 @@ class Memory:
|
|
|
80
80
|
|
|
81
81
|
if self.namespace == "app":
|
|
82
82
|
if self.component:
|
|
83
|
-
|
|
83
|
+
# Sanitize component to prevent path traversal
|
|
84
|
+
safe_component = self._slugify(self.component)
|
|
85
|
+
return f"app/{safe_component}/{self.id}-{slug}.md"
|
|
84
86
|
return f"app/{self.id}-{slug}.md"
|
|
85
87
|
elif self.namespace == "team":
|
|
86
88
|
return f"team/{self.id}-{slug}.md"
|
|
87
89
|
elif self.namespace.startswith("user-"):
|
|
88
|
-
username
|
|
90
|
+
# Sanitize username to prevent path traversal
|
|
91
|
+
username = self._slugify(self.namespace.replace("user-", ""))
|
|
89
92
|
return f"users/{username}/{self.id}-{slug}.md"
|
|
90
93
|
elif self.namespace.startswith("branch-"):
|
|
91
94
|
branch_slug = self._slugify(self.namespace.replace("branch-", ""))
|
|
92
95
|
return f"branches/{branch_slug}/{self.id}-{slug}.md"
|
|
93
96
|
else:
|
|
94
|
-
|
|
97
|
+
# Sanitize namespace to prevent path traversal
|
|
98
|
+
safe_namespace = self._slugify(self.namespace)
|
|
99
|
+
return f"other/{safe_namespace}/{self.id}-{slug}.md"
|
|
95
100
|
|
|
96
101
|
@staticmethod
|
|
97
102
|
def _slugify(text: str) -> str:
|
|
@@ -376,9 +381,25 @@ class MemoryStore:
|
|
|
376
381
|
|
|
377
382
|
def _cleanup_empty_dirs(self, dir_path: Path) -> None:
|
|
378
383
|
"""Remove empty directories up to memory_dir."""
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
384
|
+
# Resolve paths to handle symlinks and ensure we stay within bounds
|
|
385
|
+
try:
|
|
386
|
+
dir_path = dir_path.resolve()
|
|
387
|
+
memory_dir_resolved = self.memory_dir.resolve()
|
|
388
|
+
except OSError:
|
|
389
|
+
return # Can't resolve paths, bail out safely
|
|
390
|
+
|
|
391
|
+
# Ensure dir_path is actually under memory_dir
|
|
392
|
+
try:
|
|
393
|
+
dir_path.relative_to(memory_dir_resolved)
|
|
394
|
+
except ValueError:
|
|
395
|
+
return # dir_path is not under memory_dir, bail out
|
|
396
|
+
|
|
397
|
+
while dir_path != memory_dir_resolved and dir_path.exists():
|
|
398
|
+
try:
|
|
399
|
+
if not any(dir_path.iterdir()):
|
|
400
|
+
dir_path.rmdir()
|
|
401
|
+
dir_path = dir_path.parent.resolve()
|
|
402
|
+
else:
|
|
403
|
+
break
|
|
404
|
+
except OSError:
|
|
405
|
+
break # Permission error or other issue, stop cleanup
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|