ragtime-cli 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/METADATA +1 -1
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/RECORD +13 -13
- src/cli.py +162 -36
- src/config.py +12 -0
- src/db.py +51 -0
- src/indexers/__init__.py +8 -3
- src/indexers/code.py +30 -13
- src/indexers/docs.py +6 -1
- src/mcp_server.py +1 -1
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/WHEEL +0 -0
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/entry_points.txt +0 -0
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {ragtime_cli-0.2.5.dist-info → ragtime_cli-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ragtime-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: Local-first memory and RAG system for Claude Code - semantic search over code, docs, and team knowledge
|
|
5
5
|
Author-email: Bret Martineau <bretwardjames@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ragtime_cli-0.2.
|
|
1
|
+
ragtime_cli-0.2.7.dist-info/licenses/LICENSE,sha256=9A0wJs2PRDciGRH4F8JUJ-aMKYQyq_gVu2ixrXs-l5A,1070
|
|
2
2
|
src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
src/cli.py,sha256=
|
|
4
|
-
src/config.py,sha256=
|
|
5
|
-
src/db.py,sha256=
|
|
6
|
-
src/mcp_server.py,sha256=
|
|
3
|
+
src/cli.py,sha256=Cjg4Dg4UGMwuh-uiCZjNP1mk9zmLLiCLcL7Lh3map4c,73400
|
|
4
|
+
src/config.py,sha256=qyn5ADjPwvQlhwSJHwzat1v449b7MKEzIHr37Q5qhCc,4497
|
|
5
|
+
src/db.py,sha256=Tz3DbkbHRwIEFv0FMOa5jDi1TF5BTS6xSPgZl-yLudE,6246
|
|
6
|
+
src/mcp_server.py,sha256=uOixURyPK0sodt3OqZJJBlUHncas9PPC49N-OGGuSAg,20374
|
|
7
7
|
src/memory.py,sha256=8kuHBLDTsZdSBumgA9FRJhHp_VNeoV78QFmiDcJs7YI,12033
|
|
8
8
|
src/commands/audit.md,sha256=Xkucm-gfBIMalK9wf7NBbyejpsqBTUAGGlb7GxMtMPY,5137
|
|
9
9
|
src/commands/create-pr.md,sha256=u6-jVkDP_6bJQp6ImK039eY9F6B9E2KlAVlvLY-WV6Q,9483
|
|
@@ -15,11 +15,11 @@ src/commands/recall.md,sha256=unQPWsmocKRoQR7jRtjrj8aVcMHverjGR6u5mYL8TLw,6008
|
|
|
15
15
|
src/commands/remember.md,sha256=nNewsUhIqF4wtD1jhVDZvmLZjdcmPN6NmUM43SdWepc,5368
|
|
16
16
|
src/commands/save.md,sha256=7gTpW46AU9Y4l8XVZ8f4h1sEdBfVqIRA7hlidUxMAC4,251
|
|
17
17
|
src/commands/start.md,sha256=qoqhkMgET74DBx8YPIT1-wqCiVBUDxlmevigsCinHSY,6506
|
|
18
|
-
src/indexers/__init__.py,sha256=
|
|
19
|
-
src/indexers/code.py,sha256=
|
|
20
|
-
src/indexers/docs.py,sha256=
|
|
21
|
-
ragtime_cli-0.2.
|
|
22
|
-
ragtime_cli-0.2.
|
|
23
|
-
ragtime_cli-0.2.
|
|
24
|
-
ragtime_cli-0.2.
|
|
25
|
-
ragtime_cli-0.2.
|
|
18
|
+
src/indexers/__init__.py,sha256=MYoCPZUpHakMX1s2vWnc9shjWfx_X1_0JzUhpKhnKUQ,454
|
|
19
|
+
src/indexers/code.py,sha256=OIYJCcpUAMhmKlKBwbJ-VowpWoHDYPgUSP_U2ruJz8w,17163
|
|
20
|
+
src/indexers/docs.py,sha256=nyewQ4Ug4SCuhne4TuLDlUDzz9GH2STInddj81ocz50,3555
|
|
21
|
+
ragtime_cli-0.2.7.dist-info/METADATA,sha256=R0POwJReqRJE7EENLEeEAfAfl4c6SmTCePApE5pb-hE,9875
|
|
22
|
+
ragtime_cli-0.2.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
23
|
+
ragtime_cli-0.2.7.dist-info/entry_points.txt,sha256=cWLbeyMxZNbew-THS3bHXTpCRXt1EaUy5QUOXGXLjl4,75
|
|
24
|
+
ragtime_cli-0.2.7.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
|
|
25
|
+
ragtime_cli-0.2.7.dist-info/RECORD,,
|
src/cli.py
CHANGED
|
@@ -11,8 +11,10 @@ import sys
|
|
|
11
11
|
|
|
12
12
|
from .db import RagtimeDB
|
|
13
13
|
from .config import RagtimeConfig, init_config
|
|
14
|
-
from .indexers
|
|
15
|
-
|
|
14
|
+
from .indexers import (
|
|
15
|
+
discover_docs, index_doc_file, DocEntry,
|
|
16
|
+
discover_code_files, index_code_file, CodeEntry,
|
|
17
|
+
)
|
|
16
18
|
from .memory import Memory, MemoryStore
|
|
17
19
|
|
|
18
20
|
|
|
@@ -167,7 +169,7 @@ def get_remote_branches_with_ragtime(path: Path) -> list[str]:
|
|
|
167
169
|
|
|
168
170
|
|
|
169
171
|
@click.group()
|
|
170
|
-
@click.version_option(version="0.2.
|
|
172
|
+
@click.version_option(version="0.2.7")
|
|
171
173
|
def main():
|
|
172
174
|
"""Ragtime - semantic search over code and documentation."""
|
|
173
175
|
pass
|
|
@@ -256,12 +258,73 @@ Add your team's conventions above. Each rule should be:
|
|
|
256
258
|
click.echo(f" Install for enhanced workflow: npm install -g @bretwardjames/ghp-cli")
|
|
257
259
|
|
|
258
260
|
|
|
261
|
+
# Batch size for ChromaDB upserts (embedding computation happens here)
|
|
262
|
+
INDEX_BATCH_SIZE = 100
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _upsert_entries(db, entries, entry_type: str = "docs", label: str = " Embedding"):
|
|
266
|
+
"""Upsert entries to ChromaDB in batches with progress bar."""
|
|
267
|
+
if not entries:
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
# Process in batches with progress feedback
|
|
271
|
+
batches = [entries[i:i + INDEX_BATCH_SIZE] for i in range(0, len(entries), INDEX_BATCH_SIZE)]
|
|
272
|
+
|
|
273
|
+
with click.progressbar(
|
|
274
|
+
batches,
|
|
275
|
+
label=label,
|
|
276
|
+
show_percent=True,
|
|
277
|
+
show_pos=True,
|
|
278
|
+
item_show_func=lambda b: f"{len(b)} items" if b else "",
|
|
279
|
+
) as batch_iter:
|
|
280
|
+
for batch in batch_iter:
|
|
281
|
+
if entry_type == "code":
|
|
282
|
+
ids = [f"{e.file_path}:{e.line_number}:{e.symbol_name}" for e in batch]
|
|
283
|
+
else:
|
|
284
|
+
ids = [e.file_path for e in batch]
|
|
285
|
+
|
|
286
|
+
documents = [e.content for e in batch]
|
|
287
|
+
metadatas = [e.to_metadata() for e in batch]
|
|
288
|
+
db.upsert(ids=ids, documents=documents, metadatas=metadatas)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _get_files_to_process(
|
|
292
|
+
all_files: list[Path],
|
|
293
|
+
indexed_files: dict[str, float],
|
|
294
|
+
) -> tuple[list[Path], list[str]]:
|
|
295
|
+
"""
|
|
296
|
+
Compare files on disk with indexed files to determine what needs processing.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
(files_to_index, files_to_delete)
|
|
300
|
+
"""
|
|
301
|
+
disk_files = {str(f): os.path.getmtime(f) for f in all_files}
|
|
302
|
+
|
|
303
|
+
to_index = []
|
|
304
|
+
for file_path in all_files:
|
|
305
|
+
path_str = str(file_path)
|
|
306
|
+
disk_mtime = disk_files[path_str]
|
|
307
|
+
indexed_mtime = indexed_files.get(path_str, 0.0)
|
|
308
|
+
|
|
309
|
+
# Index if new or modified (with 1-second tolerance for filesystem precision)
|
|
310
|
+
if disk_mtime > indexed_mtime + 1.0:
|
|
311
|
+
to_index.append(file_path)
|
|
312
|
+
|
|
313
|
+
# Find deleted files (in index but not on disk)
|
|
314
|
+
to_delete = [f for f in indexed_files.keys() if f not in disk_files]
|
|
315
|
+
|
|
316
|
+
return to_index, to_delete
|
|
317
|
+
|
|
318
|
+
|
|
259
319
|
@main.command()
|
|
260
320
|
@click.argument("path", type=click.Path(exists=True, path_type=Path), default=".")
|
|
261
321
|
@click.option("--type", "index_type", type=click.Choice(["all", "docs", "code"]), default="all")
|
|
262
322
|
@click.option("--clear", is_flag=True, help="Clear existing index before indexing")
|
|
263
323
|
def index(path: Path, index_type: str, clear: bool):
|
|
264
|
-
"""Index a project directory.
|
|
324
|
+
"""Index a project directory.
|
|
325
|
+
|
|
326
|
+
Without --clear, performs incremental indexing (only changed files).
|
|
327
|
+
"""
|
|
265
328
|
path = path.resolve()
|
|
266
329
|
db = get_db(path)
|
|
267
330
|
config = RagtimeConfig.load(path)
|
|
@@ -274,65 +337,128 @@ def index(path: Path, index_type: str, clear: bool):
|
|
|
274
337
|
db.clear(type_filter=index_type)
|
|
275
338
|
|
|
276
339
|
if index_type in ("all", "docs"):
|
|
277
|
-
|
|
340
|
+
# Get currently indexed docs
|
|
341
|
+
indexed_docs = {} if clear else db.get_indexed_files("docs")
|
|
342
|
+
|
|
343
|
+
# Discover all doc files
|
|
344
|
+
all_doc_files = []
|
|
278
345
|
for docs_path in config.docs.paths:
|
|
279
346
|
docs_root = path / docs_path
|
|
280
347
|
if not docs_root.exists():
|
|
281
348
|
click.echo(f" Docs path {docs_root} not found, skipping...")
|
|
282
349
|
continue
|
|
283
|
-
|
|
284
|
-
entries = index_docs(
|
|
350
|
+
files = discover_docs(
|
|
285
351
|
docs_root,
|
|
286
352
|
patterns=config.docs.patterns,
|
|
287
353
|
exclude=config.docs.exclude,
|
|
288
354
|
)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
if
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
355
|
+
all_doc_files.extend(files)
|
|
356
|
+
|
|
357
|
+
if all_doc_files or indexed_docs:
|
|
358
|
+
# Determine what needs processing
|
|
359
|
+
to_index, to_delete = _get_files_to_process(all_doc_files, indexed_docs)
|
|
360
|
+
|
|
361
|
+
click.echo(f"Found {len(all_doc_files)} doc files")
|
|
362
|
+
if not clear:
|
|
363
|
+
unchanged = len(all_doc_files) - len(to_index)
|
|
364
|
+
if unchanged > 0:
|
|
365
|
+
click.echo(f" {unchanged} unchanged, {len(to_index)} to index")
|
|
366
|
+
if to_delete:
|
|
367
|
+
click.echo(f" {len(to_delete)} to remove (deleted from disk)")
|
|
368
|
+
|
|
369
|
+
# Delete removed files
|
|
370
|
+
if to_delete:
|
|
371
|
+
db.delete_by_file(to_delete, "docs")
|
|
372
|
+
|
|
373
|
+
# Index new/changed files
|
|
374
|
+
if to_index:
|
|
375
|
+
entries = []
|
|
376
|
+
with click.progressbar(
|
|
377
|
+
to_index,
|
|
378
|
+
label=" Parsing",
|
|
379
|
+
show_percent=True,
|
|
380
|
+
show_pos=True,
|
|
381
|
+
item_show_func=lambda f: f.name[:30] if f else "",
|
|
382
|
+
) as files:
|
|
383
|
+
for file_path in files:
|
|
384
|
+
entry = index_doc_file(file_path)
|
|
385
|
+
if entry:
|
|
386
|
+
entries.append(entry)
|
|
387
|
+
|
|
388
|
+
if entries:
|
|
389
|
+
_upsert_entries(db, entries, "docs")
|
|
390
|
+
click.echo(f" Indexed {len(entries)} documents")
|
|
391
|
+
elif not to_delete:
|
|
392
|
+
click.echo(" All docs up to date")
|
|
297
393
|
else:
|
|
298
394
|
click.echo(" No documents found")
|
|
299
395
|
|
|
300
396
|
if index_type in ("all", "code"):
|
|
397
|
+
# Get currently indexed code files
|
|
398
|
+
indexed_code = {} if clear else db.get_indexed_files("code")
|
|
399
|
+
|
|
301
400
|
# Build exclusion list for code
|
|
302
401
|
code_exclude = list(config.code.exclude)
|
|
303
402
|
for docs_path in config.docs.paths:
|
|
304
403
|
code_exclude.append(f"**/{docs_path}/**")
|
|
305
404
|
|
|
306
|
-
|
|
405
|
+
# Discover all code files
|
|
406
|
+
all_code_files = []
|
|
307
407
|
for code_path_str in config.code.paths:
|
|
308
408
|
code_root = path / code_path_str
|
|
309
409
|
if not code_root.exists():
|
|
310
410
|
click.echo(f" Code path {code_root} not found, skipping...")
|
|
311
411
|
continue
|
|
312
|
-
|
|
313
|
-
entries = index_code(
|
|
412
|
+
files = discover_code_files(
|
|
314
413
|
code_root,
|
|
315
414
|
languages=config.code.languages,
|
|
316
415
|
exclude=code_exclude,
|
|
317
416
|
)
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
if
|
|
321
|
-
#
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
417
|
+
all_code_files.extend(files)
|
|
418
|
+
|
|
419
|
+
if all_code_files or indexed_code:
|
|
420
|
+
# Determine what needs processing
|
|
421
|
+
to_index, to_delete = _get_files_to_process(all_code_files, indexed_code)
|
|
422
|
+
|
|
423
|
+
click.echo(f"Found {len(all_code_files)} code files")
|
|
424
|
+
if not clear:
|
|
425
|
+
unchanged = len(all_code_files) - len(to_index)
|
|
426
|
+
if unchanged > 0:
|
|
427
|
+
click.echo(f" {unchanged} unchanged, {len(to_index)} to index")
|
|
428
|
+
if to_delete:
|
|
429
|
+
click.echo(f" {len(to_delete)} to remove (deleted from disk)")
|
|
430
|
+
|
|
431
|
+
# Delete removed files
|
|
432
|
+
if to_delete:
|
|
433
|
+
db.delete_by_file(to_delete, "code")
|
|
434
|
+
|
|
435
|
+
# Index new/changed files
|
|
436
|
+
if to_index:
|
|
437
|
+
entries = []
|
|
438
|
+
by_type = {}
|
|
439
|
+
with click.progressbar(
|
|
440
|
+
to_index,
|
|
441
|
+
label=" Parsing",
|
|
442
|
+
show_percent=True,
|
|
443
|
+
show_pos=True,
|
|
444
|
+
item_show_func=lambda f: f.name[:30] if f else "",
|
|
445
|
+
) as files:
|
|
446
|
+
for file_path in files:
|
|
447
|
+
file_entries = index_code_file(file_path)
|
|
448
|
+
for entry in file_entries:
|
|
449
|
+
entries.append(entry)
|
|
450
|
+
by_type[entry.symbol_type] = by_type.get(entry.symbol_type, 0) + 1
|
|
451
|
+
|
|
452
|
+
if entries:
|
|
453
|
+
click.echo(f" Found {len(entries)} symbols")
|
|
454
|
+
_upsert_entries(db, entries, "code")
|
|
455
|
+
click.echo(f" Indexed {len(entries)} code symbols")
|
|
456
|
+
breakdown = ", ".join(f"{count} {typ}s" for typ, count in sorted(by_type.items()))
|
|
457
|
+
click.echo(f" ({breakdown})")
|
|
458
|
+
elif not to_delete:
|
|
459
|
+
click.echo(" All code up to date")
|
|
334
460
|
else:
|
|
335
|
-
click.echo(" No code
|
|
461
|
+
click.echo(" No code files found")
|
|
336
462
|
|
|
337
463
|
stats = db.stats()
|
|
338
464
|
click.echo(f"\nIndex stats: {stats['total']} total ({stats['docs']} docs, {stats['code']} code)")
|
|
@@ -1991,7 +2117,7 @@ def update(check: bool):
|
|
|
1991
2117
|
from urllib.request import urlopen
|
|
1992
2118
|
from urllib.error import URLError
|
|
1993
2119
|
|
|
1994
|
-
current = "0.2.
|
|
2120
|
+
current = "0.2.7"
|
|
1995
2121
|
|
|
1996
2122
|
click.echo(f"Current version: {current}")
|
|
1997
2123
|
click.echo("Checking PyPI for updates...")
|
src/config.py
CHANGED
|
@@ -33,6 +33,18 @@ class CodeConfig:
|
|
|
33
33
|
"**/build/**",
|
|
34
34
|
"**/dist/**",
|
|
35
35
|
"**/.dart_tool/**",
|
|
36
|
+
# Generated code (Prisma, GraphQL, OpenAPI, etc.)
|
|
37
|
+
"**/generated/**",
|
|
38
|
+
"**/*.generated.*",
|
|
39
|
+
"**/*.g.dart",
|
|
40
|
+
# TypeScript declaration files (often auto-generated)
|
|
41
|
+
"**/*.d.ts",
|
|
42
|
+
# Test files (usually not needed in search)
|
|
43
|
+
"**/__tests__/**",
|
|
44
|
+
"**/*.test.*",
|
|
45
|
+
"**/*.spec.*",
|
|
46
|
+
# Python init files (rarely have searchable content)
|
|
47
|
+
"**/__init__.py",
|
|
36
48
|
])
|
|
37
49
|
|
|
38
50
|
|
src/db.py
CHANGED
|
@@ -165,3 +165,54 @@ class RagtimeDB:
|
|
|
165
165
|
"docs": docs_count,
|
|
166
166
|
"code": code_count,
|
|
167
167
|
}
|
|
168
|
+
|
|
169
|
+
def get_indexed_files(self, type_filter: str | None = None) -> dict[str, float]:
|
|
170
|
+
"""
|
|
171
|
+
Get all indexed files and their modification times.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
type_filter: "code" or "docs" (None = both)
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Dict mapping file paths to their indexed mtime
|
|
178
|
+
"""
|
|
179
|
+
where = {"type": type_filter} if type_filter else None
|
|
180
|
+
results = self.collection.get(where=where, include=["metadatas"])
|
|
181
|
+
|
|
182
|
+
files: dict[str, float] = {}
|
|
183
|
+
for meta in results["metadatas"]:
|
|
184
|
+
file_path = meta.get("file", "")
|
|
185
|
+
mtime = meta.get("mtime", 0.0)
|
|
186
|
+
# For code files, multiple entries per file - keep max mtime
|
|
187
|
+
if file_path not in files or mtime > files[file_path]:
|
|
188
|
+
files[file_path] = mtime
|
|
189
|
+
|
|
190
|
+
return files
|
|
191
|
+
|
|
192
|
+
def delete_by_file(self, file_paths: list[str], type_filter: str | None = None) -> int:
|
|
193
|
+
"""
|
|
194
|
+
Delete all entries for the given file paths.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
file_paths: List of file paths to remove
|
|
198
|
+
type_filter: "code" or "docs" (None = both)
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Number of entries deleted
|
|
202
|
+
"""
|
|
203
|
+
if not file_paths:
|
|
204
|
+
return 0
|
|
205
|
+
|
|
206
|
+
# Build where clause
|
|
207
|
+
where = {"file": {"$in": file_paths}}
|
|
208
|
+
if type_filter:
|
|
209
|
+
where = {"$and": [{"file": {"$in": file_paths}}, {"type": type_filter}]}
|
|
210
|
+
|
|
211
|
+
# Get IDs to delete
|
|
212
|
+
results = self.collection.get(where=where)
|
|
213
|
+
ids = results["ids"]
|
|
214
|
+
|
|
215
|
+
if ids:
|
|
216
|
+
self.collection.delete(ids=ids)
|
|
217
|
+
|
|
218
|
+
return len(ids)
|
src/indexers/__init__.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
"""Indexers for ragtime - parse different content types for vector search."""
|
|
2
2
|
|
|
3
|
-
from .docs import index_directory as index_docs, DocEntry
|
|
4
|
-
from .code import index_directory as index_code, CodeEntry
|
|
3
|
+
from .docs import index_directory as index_docs, DocEntry, discover_docs, index_file as index_doc_file
|
|
4
|
+
from .code import index_directory as index_code, CodeEntry, discover_code_files, index_file as index_code_file
|
|
5
5
|
|
|
6
|
-
__all__ = [
|
|
6
|
+
__all__ = [
|
|
7
|
+
"index_docs", "index_code",
|
|
8
|
+
"DocEntry", "CodeEntry",
|
|
9
|
+
"discover_docs", "discover_code_files",
|
|
10
|
+
"index_doc_file", "index_code_file",
|
|
11
|
+
]
|
src/indexers/code.py
CHANGED
|
@@ -6,6 +6,7 @@ This allows searching for specific code constructs like "useAsyncState" or "JWTM
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import ast
|
|
9
|
+
import os
|
|
9
10
|
import re
|
|
10
11
|
from fnmatch import fnmatch
|
|
11
12
|
from pathlib import Path
|
|
@@ -32,6 +33,7 @@ class CodeEntry:
|
|
|
32
33
|
symbol_type: str # function, class, interface, component, etc.
|
|
33
34
|
line_number: int # Line where symbol starts
|
|
34
35
|
docstring: str | None = None # Extracted docstring/JSDoc
|
|
36
|
+
mtime: float | None = None # File modification time for incremental indexing
|
|
35
37
|
|
|
36
38
|
def to_metadata(self) -> dict:
|
|
37
39
|
"""Convert to ChromaDB metadata dict."""
|
|
@@ -42,6 +44,7 @@ class CodeEntry:
|
|
|
42
44
|
"symbol_name": self.symbol_name,
|
|
43
45
|
"symbol_type": self.symbol_type,
|
|
44
46
|
"line": self.line_number,
|
|
47
|
+
"mtime": self.mtime or 0.0,
|
|
45
48
|
}
|
|
46
49
|
|
|
47
50
|
|
|
@@ -92,14 +95,21 @@ def discover_code_files(
|
|
|
92
95
|
rel_path = str(path)
|
|
93
96
|
|
|
94
97
|
for ex in exclude:
|
|
95
|
-
# Handle ** patterns
|
|
98
|
+
# Handle ** patterns
|
|
96
99
|
if "**" in ex:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
if ex.endswith("/**"):
|
|
101
|
+
# Directory pattern: **/node_modules/** or **/generated/**
|
|
102
|
+
# Extract the directory name to match as path segment
|
|
103
|
+
dir_pattern = ex.replace("**/", "").replace("/**", "")
|
|
104
|
+
if f"/{dir_pattern}/" in f"/{rel_path}/":
|
|
105
|
+
skip = True
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
# File pattern: **/*.d.ts, **/*.test.*, **/*.generated.*
|
|
109
|
+
file_pattern = ex.replace("**/", "")
|
|
110
|
+
if fnmatch(path.name, file_pattern):
|
|
111
|
+
skip = True
|
|
112
|
+
break
|
|
103
113
|
elif fnmatch(rel_path, ex) or fnmatch(path.name, ex):
|
|
104
114
|
skip = True
|
|
105
115
|
break
|
|
@@ -432,7 +442,8 @@ def index_file(file_path: Path) -> list[CodeEntry]:
|
|
|
432
442
|
"""
|
|
433
443
|
try:
|
|
434
444
|
content = file_path.read_text(encoding='utf-8')
|
|
435
|
-
|
|
445
|
+
mtime = os.path.getmtime(file_path)
|
|
446
|
+
except (IOError, UnicodeDecodeError, OSError):
|
|
436
447
|
return []
|
|
437
448
|
|
|
438
449
|
# Skip empty files
|
|
@@ -442,15 +453,21 @@ def index_file(file_path: Path) -> list[CodeEntry]:
|
|
|
442
453
|
suffix = file_path.suffix.lower()
|
|
443
454
|
|
|
444
455
|
if suffix == ".py":
|
|
445
|
-
|
|
456
|
+
entries = index_python_file(file_path, content)
|
|
446
457
|
elif suffix in [".ts", ".tsx", ".js", ".jsx"]:
|
|
447
|
-
|
|
458
|
+
entries = index_typescript_file(file_path, content)
|
|
448
459
|
elif suffix == ".vue":
|
|
449
|
-
|
|
460
|
+
entries = index_vue_file(file_path, content)
|
|
450
461
|
elif suffix == ".dart":
|
|
451
|
-
|
|
462
|
+
entries = index_dart_file(file_path, content)
|
|
463
|
+
else:
|
|
464
|
+
return []
|
|
452
465
|
|
|
453
|
-
|
|
466
|
+
# Set mtime on all entries from this file
|
|
467
|
+
for entry in entries:
|
|
468
|
+
entry.mtime = mtime
|
|
469
|
+
|
|
470
|
+
return entries
|
|
454
471
|
|
|
455
472
|
|
|
456
473
|
def index_directory(
|
src/indexers/docs.py
CHANGED
|
@@ -4,6 +4,7 @@ Docs indexer - parses markdown files with YAML frontmatter.
|
|
|
4
4
|
Designed for .claude/memory/ style files but works with any markdown.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import os
|
|
7
8
|
import re
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from dataclasses import dataclass
|
|
@@ -19,6 +20,7 @@ class DocEntry:
|
|
|
19
20
|
category: str | None = None
|
|
20
21
|
component: str | None = None
|
|
21
22
|
title: str | None = None
|
|
23
|
+
mtime: float | None = None # File modification time for incremental indexing
|
|
22
24
|
|
|
23
25
|
def to_metadata(self) -> dict:
|
|
24
26
|
"""Convert to ChromaDB metadata dict."""
|
|
@@ -29,6 +31,7 @@ class DocEntry:
|
|
|
29
31
|
"category": self.category or "",
|
|
30
32
|
"component": self.component or "",
|
|
31
33
|
"title": self.title or Path(self.file_path).stem,
|
|
34
|
+
"mtime": self.mtime or 0.0,
|
|
32
35
|
}
|
|
33
36
|
|
|
34
37
|
|
|
@@ -61,7 +64,8 @@ def index_file(file_path: Path) -> DocEntry | None:
|
|
|
61
64
|
"""
|
|
62
65
|
try:
|
|
63
66
|
content = file_path.read_text(encoding='utf-8')
|
|
64
|
-
|
|
67
|
+
mtime = os.path.getmtime(file_path)
|
|
68
|
+
except (IOError, UnicodeDecodeError, OSError):
|
|
65
69
|
return None
|
|
66
70
|
|
|
67
71
|
metadata, body = parse_frontmatter(content)
|
|
@@ -77,6 +81,7 @@ def index_file(file_path: Path) -> DocEntry | None:
|
|
|
77
81
|
category=metadata.get("category"),
|
|
78
82
|
component=metadata.get("component"),
|
|
79
83
|
title=metadata.get("title"),
|
|
84
|
+
mtime=mtime,
|
|
80
85
|
)
|
|
81
86
|
|
|
82
87
|
|
src/mcp_server.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|