loki-mode 7.22.0 → 7.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/loki +78 -1
- package/autonomy/run.sh +88 -3
- package/dashboard/__init__.py +1 -1
- package/docs/INSTALLATION.md +1 -1
- package/loki-ts/dist/loki.js +2 -2
- package/mcp/__init__.py +1 -1
- package/mcp/server.py +79 -1
- package/package.json +1 -1
- package/references/mcp-integration.md +65 -0
- package/skills/00-index.md +2 -0
- package/skills/parallel-workflows.md +96 -0
- package/skills/quality-gates.md +18 -0
- package/tools/hybrid_search.py +451 -0
- package/tools/index-codebase.py +296 -0
package/tools/index-codebase.py
CHANGED
|
@@ -10,14 +10,27 @@ Usage:
|
|
|
10
10
|
python tools/index-codebase.py # Index everything
|
|
11
11
|
python tools/index-codebase.py --collection loki # Custom collection name
|
|
12
12
|
python tools/index-codebase.py --reset # Clear and re-index
|
|
13
|
+
python tools/index-codebase.py --changed # Incremental: only changed files
|
|
13
14
|
python tools/index-codebase.py --stats # Show index stats
|
|
14
15
|
|
|
15
16
|
Requires:
|
|
16
17
|
- ChromaDB running on localhost:8100 (docker)
|
|
17
18
|
- pip install chromadb
|
|
19
|
+
|
|
20
|
+
Incremental freshness (--changed):
|
|
21
|
+
Maintains a manifest at .loki/state/code-index-manifest.json that records,
|
|
22
|
+
per indexed file, its mtime, sha1, and the chunk IDs it produced. The
|
|
23
|
+
--changed mode re-chunks only files whose mtime OR sha1 differ from the
|
|
24
|
+
manifest, upserts the new chunks, deletes chunk IDs that disappeared for a
|
|
25
|
+
changed file (the orphan-chunk fix), and drops all chunks for files removed
|
|
26
|
+
from disk. The --reset and default full-index paths are unchanged in their
|
|
27
|
+
indexing behavior; they additionally write the manifest at the end so a
|
|
28
|
+
later --changed run has an accurate baseline.
|
|
18
29
|
"""
|
|
19
30
|
|
|
20
31
|
import argparse
|
|
32
|
+
import hashlib
|
|
33
|
+
import json
|
|
21
34
|
import os
|
|
22
35
|
import re
|
|
23
36
|
import sys
|
|
@@ -30,6 +43,11 @@ import chromadb
|
|
|
30
43
|
# Project root
|
|
31
44
|
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
|
32
45
|
|
|
46
|
+
# Manifest path (per-file freshness tracking for incremental indexing).
|
|
47
|
+
# Resolved relative to PROJECT_ROOT so the indexer and the MCP staleness
|
|
48
|
+
# check agree on a single location.
|
|
49
|
+
MANIFEST_PATH = PROJECT_ROOT / ".loki" / "state" / "code-index-manifest.json"
|
|
50
|
+
|
|
33
51
|
# ChromaDB connection
|
|
34
52
|
CHROMA_HOST = os.environ.get("LOKI_CHROMA_HOST", "localhost")
|
|
35
53
|
CHROMA_PORT = int(os.environ.get("LOKI_CHROMA_PORT", "8100"))
|
|
@@ -333,6 +351,267 @@ def collect_files() -> list[tuple[Path, str]]:
|
|
|
333
351
|
return files
|
|
334
352
|
|
|
335
353
|
|
|
354
|
+
# -----------------------------------------------------------------------------
|
|
355
|
+
# Manifest / incremental freshness (pure logic, no ChromaDB)
|
|
356
|
+
# -----------------------------------------------------------------------------
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def file_sha1(filepath: Path) -> str:
|
|
360
|
+
"""Return the hex sha1 of a file's bytes."""
|
|
361
|
+
h = hashlib.sha1()
|
|
362
|
+
h.update(filepath.read_bytes())
|
|
363
|
+
return h.hexdigest()
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def chunk_file(filepath: Path, file_type: str) -> list[dict]:
|
|
367
|
+
"""Dispatch to the right chunker for a file type."""
|
|
368
|
+
if file_type == "shell":
|
|
369
|
+
return chunk_shell_file(filepath)
|
|
370
|
+
if file_type == "python":
|
|
371
|
+
return chunk_python_file(filepath)
|
|
372
|
+
if file_type == "markdown":
|
|
373
|
+
return chunk_markdown_file(filepath)
|
|
374
|
+
return []
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def load_manifest(manifest_path: Path = MANIFEST_PATH) -> dict:
|
|
378
|
+
"""Load the freshness manifest, or return an empty one.
|
|
379
|
+
|
|
380
|
+
Schema:
|
|
381
|
+
{
|
|
382
|
+
"version": 1,
|
|
383
|
+
"collection": "<name>",
|
|
384
|
+
"files": {
|
|
385
|
+
"<rel_path>": {"mtime": <float>, "sha1": "<hex>", "chunk_ids": [...]}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
"""
|
|
389
|
+
try:
|
|
390
|
+
data = json.loads(manifest_path.read_text())
|
|
391
|
+
if isinstance(data, dict) and isinstance(data.get("files"), dict):
|
|
392
|
+
return data
|
|
393
|
+
except Exception:
|
|
394
|
+
pass
|
|
395
|
+
return {"version": 1, "collection": None, "files": {}}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def save_manifest(manifest: dict, manifest_path: Path = MANIFEST_PATH) -> None:
|
|
399
|
+
"""Persist the freshness manifest atomically."""
|
|
400
|
+
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
401
|
+
tmp = manifest_path.with_suffix(manifest_path.suffix + ".tmp")
|
|
402
|
+
tmp.write_text(json.dumps(manifest, indent=2, sort_keys=True))
|
|
403
|
+
tmp.replace(manifest_path)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def build_manifest_entry(filepath: Path, chunk_ids: list[str]) -> dict:
|
|
407
|
+
"""Build one manifest entry for a freshly chunked file."""
|
|
408
|
+
return {
|
|
409
|
+
"mtime": os.path.getmtime(filepath),
|
|
410
|
+
"sha1": file_sha1(filepath),
|
|
411
|
+
"chunk_ids": list(chunk_ids),
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def compute_manifest_diff(old_manifest: dict,
|
|
416
|
+
current_files: list[tuple[str, dict]],
|
|
417
|
+
present_rel_paths: set) -> dict:
|
|
418
|
+
"""Pure diff: decide what changed without touching ChromaDB or disk.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
old_manifest: the previously saved manifest dict.
|
|
422
|
+
current_files: list of (rel_path, entry) for files re-chunked THIS run,
|
|
423
|
+
where entry is {"mtime", "sha1", "chunk_ids"}. Callers should only
|
|
424
|
+
include files they actually re-chunked (i.e. changed/new files).
|
|
425
|
+
present_rel_paths: the set of rel_paths that currently exist on disk and
|
|
426
|
+
are in scope for indexing (collect_files results). Used to detect
|
|
427
|
+
files removed from disk.
|
|
428
|
+
|
|
429
|
+
Returns a dict:
|
|
430
|
+
{
|
|
431
|
+
"upsert_ids": [...], # chunk IDs to (re)upsert this run
|
|
432
|
+
"delete_ids": [...], # orphan chunk IDs to remove
|
|
433
|
+
"changed_files": [rel_path], # files re-chunked this run
|
|
434
|
+
"removed_files": [rel_path], # files dropped from disk / scope
|
|
435
|
+
}
|
|
436
|
+
"""
|
|
437
|
+
old_files = old_manifest.get("files", {})
|
|
438
|
+
current_map = {rel: entry for rel, entry in current_files}
|
|
439
|
+
|
|
440
|
+
upsert_ids: list[str] = []
|
|
441
|
+
delete_ids: list[str] = []
|
|
442
|
+
changed_files: list[str] = []
|
|
443
|
+
|
|
444
|
+
# Changed / new files: upsert their new chunk IDs, delete IDs that vanished.
|
|
445
|
+
for rel, entry in current_files:
|
|
446
|
+
changed_files.append(rel)
|
|
447
|
+
new_ids = list(entry.get("chunk_ids", []))
|
|
448
|
+
upsert_ids.extend(new_ids)
|
|
449
|
+
old_ids = set(old_files.get(rel, {}).get("chunk_ids", []))
|
|
450
|
+
gone = old_ids - set(new_ids)
|
|
451
|
+
delete_ids.extend(sorted(gone))
|
|
452
|
+
|
|
453
|
+
# Files removed from disk (tracked before, not present now): delete all chunks.
|
|
454
|
+
removed_files: list[str] = []
|
|
455
|
+
for rel, old_entry in old_files.items():
|
|
456
|
+
if rel in current_map:
|
|
457
|
+
continue
|
|
458
|
+
if rel in present_rel_paths:
|
|
459
|
+
continue
|
|
460
|
+
removed_files.append(rel)
|
|
461
|
+
delete_ids.extend(sorted(old_entry.get("chunk_ids", [])))
|
|
462
|
+
|
|
463
|
+
# Stable, deduped ordering for deterministic behavior / testing.
|
|
464
|
+
return {
|
|
465
|
+
"upsert_ids": upsert_ids,
|
|
466
|
+
"delete_ids": sorted(set(delete_ids)),
|
|
467
|
+
"changed_files": changed_files,
|
|
468
|
+
"removed_files": removed_files,
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def file_is_changed(filepath: Path, rel: str, old_manifest: dict) -> bool:
|
|
473
|
+
"""Return True if a file differs from its manifest entry (mtime OR sha1).
|
|
474
|
+
|
|
475
|
+
A file with no manifest entry is treated as new (changed). The mtime check
|
|
476
|
+
is the cheap first pass; sha1 is the authoritative fallback so a touch with
|
|
477
|
+
no content change still re-verifies but a real edit is always caught.
|
|
478
|
+
"""
|
|
479
|
+
entry = old_manifest.get("files", {}).get(rel)
|
|
480
|
+
if not entry:
|
|
481
|
+
return True
|
|
482
|
+
try:
|
|
483
|
+
if os.path.getmtime(filepath) != entry.get("mtime"):
|
|
484
|
+
return True
|
|
485
|
+
except OSError:
|
|
486
|
+
return True
|
|
487
|
+
return file_sha1(filepath) != entry.get("sha1")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def check_staleness(manifest_path: Path = MANIFEST_PATH) -> dict:
|
|
491
|
+
"""Compare manifest mtimes against current files on disk.
|
|
492
|
+
|
|
493
|
+
Mirrors the mtime-staleness pattern in memory/retrieval.py. Returns a dict
|
|
494
|
+
{"stale": bool, "stale_files": int, "manifest_present": bool} computed from
|
|
495
|
+
the manifest alone (no ChromaDB, no chunking) so it is safe to call from the
|
|
496
|
+
MCP server under any Python. A missing manifest degrades to not-stale.
|
|
497
|
+
|
|
498
|
+
Note: a brand-new file that was never indexed will not appear in the
|
|
499
|
+
manifest, so it is not counted here. This check detects edits and deletions
|
|
500
|
+
of already-indexed files, which is what drives orphan/staleness signals.
|
|
501
|
+
"""
|
|
502
|
+
manifest = load_manifest(manifest_path)
|
|
503
|
+
files = manifest.get("files", {})
|
|
504
|
+
if not files:
|
|
505
|
+
return {"stale": False, "stale_files": 0, "manifest_present": False}
|
|
506
|
+
|
|
507
|
+
stale = 0
|
|
508
|
+
for rel, entry in files.items():
|
|
509
|
+
abs_path = PROJECT_ROOT / rel
|
|
510
|
+
if not abs_path.exists():
|
|
511
|
+
stale += 1 # deleted from disk -> orphan chunks remain
|
|
512
|
+
continue
|
|
513
|
+
try:
|
|
514
|
+
if os.path.getmtime(abs_path) != entry.get("mtime"):
|
|
515
|
+
stale += 1
|
|
516
|
+
except OSError:
|
|
517
|
+
stale += 1
|
|
518
|
+
return {"stale": stale > 0, "stale_files": stale, "manifest_present": True}
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def index_changed(collection):
|
|
522
|
+
"""Incremental index: re-chunk only changed files, fix orphan chunks.
|
|
523
|
+
|
|
524
|
+
Returns (changed_count, removed_count, upserted_chunks, deleted_chunks).
|
|
525
|
+
"""
|
|
526
|
+
old_manifest = load_manifest(MANIFEST_PATH)
|
|
527
|
+
files = collect_files()
|
|
528
|
+
present_rel_paths = {
|
|
529
|
+
str(fp.relative_to(PROJECT_ROOT)) for fp, _ in files
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
# Re-chunk only files whose mtime or sha1 differs from the manifest.
|
|
533
|
+
current_entries: list[tuple[str, dict]] = []
|
|
534
|
+
chunks_by_rel: dict = {}
|
|
535
|
+
for filepath, file_type in files:
|
|
536
|
+
rel = str(filepath.relative_to(PROJECT_ROOT))
|
|
537
|
+
if not file_is_changed(filepath, rel, old_manifest):
|
|
538
|
+
continue
|
|
539
|
+
try:
|
|
540
|
+
chunks = chunk_file(filepath, file_type)
|
|
541
|
+
except Exception as e:
|
|
542
|
+
print(f" ERROR chunking {filepath}: {e}", file=sys.stderr)
|
|
543
|
+
continue
|
|
544
|
+
chunk_ids = [c["id"] for c in chunks]
|
|
545
|
+
current_entries.append((rel, build_manifest_entry(filepath, chunk_ids)))
|
|
546
|
+
chunks_by_rel[rel] = chunks
|
|
547
|
+
|
|
548
|
+
diff = compute_manifest_diff(old_manifest, current_entries, present_rel_paths)
|
|
549
|
+
|
|
550
|
+
# Apply deletes first (orphans + removed files), then upserts.
|
|
551
|
+
if diff["delete_ids"]:
|
|
552
|
+
try:
|
|
553
|
+
collection.delete(ids=diff["delete_ids"])
|
|
554
|
+
except Exception as e:
|
|
555
|
+
print(f" ERROR deleting orphan chunks: {e}", file=sys.stderr)
|
|
556
|
+
|
|
557
|
+
upserted = 0
|
|
558
|
+
for rel in diff["changed_files"]:
|
|
559
|
+
chunks = chunks_by_rel.get(rel, [])
|
|
560
|
+
if not chunks:
|
|
561
|
+
continue
|
|
562
|
+
try:
|
|
563
|
+
collection.upsert(
|
|
564
|
+
ids=[c["id"] for c in chunks],
|
|
565
|
+
documents=[c["content"] for c in chunks],
|
|
566
|
+
metadatas=[c["metadata"] for c in chunks],
|
|
567
|
+
)
|
|
568
|
+
upserted += len(chunks)
|
|
569
|
+
print(f" upsert {rel}: {len(chunks)} chunks")
|
|
570
|
+
except Exception as e:
|
|
571
|
+
print(f" ERROR upserting {rel}: {e}", file=sys.stderr)
|
|
572
|
+
|
|
573
|
+
# Update the manifest: keep unchanged entries, refresh changed ones, drop
|
|
574
|
+
# removed files.
|
|
575
|
+
new_files = dict(old_manifest.get("files", {}))
|
|
576
|
+
for rel in diff["removed_files"]:
|
|
577
|
+
new_files.pop(rel, None)
|
|
578
|
+
for rel, entry in current_entries:
|
|
579
|
+
new_files[rel] = entry
|
|
580
|
+
new_manifest = {
|
|
581
|
+
"version": 1,
|
|
582
|
+
"collection": collection.name,
|
|
583
|
+
"files": new_files,
|
|
584
|
+
}
|
|
585
|
+
save_manifest(new_manifest, MANIFEST_PATH)
|
|
586
|
+
|
|
587
|
+
return (len(diff["changed_files"]), len(diff["removed_files"]),
|
|
588
|
+
upserted, len(diff["delete_ids"]))
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def write_manifest_for_full_index(collection):
|
|
592
|
+
"""Rebuild the manifest from a full pass over all in-scope files.
|
|
593
|
+
|
|
594
|
+
Called at the end of --reset and default full-index so a later --changed run
|
|
595
|
+
has an accurate baseline. This is additive persistence only: it does not
|
|
596
|
+
change what was indexed, it records the chunk IDs that were produced.
|
|
597
|
+
"""
|
|
598
|
+
files = collect_files()
|
|
599
|
+
new_files: dict = {}
|
|
600
|
+
for filepath, file_type in files:
|
|
601
|
+
try:
|
|
602
|
+
chunks = chunk_file(filepath, file_type)
|
|
603
|
+
except Exception:
|
|
604
|
+
continue
|
|
605
|
+
rel = str(filepath.relative_to(PROJECT_ROOT))
|
|
606
|
+
new_files[rel] = build_manifest_entry(filepath, [c["id"] for c in chunks])
|
|
607
|
+
save_manifest({
|
|
608
|
+
"version": 1,
|
|
609
|
+
"collection": collection.name,
|
|
610
|
+
"files": new_files,
|
|
611
|
+
}, MANIFEST_PATH)
|
|
612
|
+
return len(new_files)
|
|
613
|
+
|
|
614
|
+
|
|
336
615
|
def index_all(collection, reset: bool = False):
|
|
337
616
|
"""Index the entire codebase."""
|
|
338
617
|
files = collect_files()
|
|
@@ -429,6 +708,8 @@ def main():
|
|
|
429
708
|
parser = argparse.ArgumentParser(description="Index loki-mode codebase into ChromaDB")
|
|
430
709
|
parser.add_argument("--collection", default=COLLECTION_NAME, help="Collection name")
|
|
431
710
|
parser.add_argument("--reset", action="store_true", help="Clear and re-index")
|
|
711
|
+
parser.add_argument("--changed", action="store_true",
|
|
712
|
+
help="Incremental: re-index only changed files (uses manifest)")
|
|
432
713
|
parser.add_argument("--stats", action="store_true", help="Show index stats")
|
|
433
714
|
parser.add_argument("--search", type=str, help="Run a test search query")
|
|
434
715
|
parser.add_argument("--host", default=CHROMA_HOST, help="ChromaDB host")
|
|
@@ -457,11 +738,26 @@ def main():
|
|
|
457
738
|
test_search(collection, args.search)
|
|
458
739
|
return
|
|
459
740
|
|
|
741
|
+
if args.changed:
|
|
742
|
+
start = time.time()
|
|
743
|
+
changed, removed, upserted, deleted = index_changed(collection)
|
|
744
|
+
elapsed = time.time() - start
|
|
745
|
+
print(f"\nIncremental done: {changed} changed file(s), "
|
|
746
|
+
f"{removed} removed file(s), {upserted} chunks upserted, "
|
|
747
|
+
f"{deleted} orphan chunks deleted in {elapsed:.1f}s")
|
|
748
|
+
show_stats(collection)
|
|
749
|
+
return
|
|
750
|
+
|
|
460
751
|
start = time.time()
|
|
461
752
|
file_count, total_chunks = index_all(collection)
|
|
462
753
|
elapsed = time.time() - start
|
|
463
754
|
|
|
755
|
+
# Additive persistence: record the manifest so a later --changed run has an
|
|
756
|
+
# accurate baseline. Does not change what was indexed above.
|
|
757
|
+
manifest_files = write_manifest_for_full_index(collection)
|
|
464
758
|
print(f"\nDone: {total_chunks} chunks from {file_count} files in {elapsed:.1f}s")
|
|
759
|
+
print(f"Manifest: {manifest_files} files tracked at "
|
|
760
|
+
f"{MANIFEST_PATH.relative_to(PROJECT_ROOT)}")
|
|
465
761
|
show_stats(collection)
|
|
466
762
|
|
|
467
763
|
# Run a few test searches
|