codebase-index 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_index/__init__.py +7 -0
- codebase_index/__main__.py +3 -0
- codebase_index/cli.py +916 -0
- codebase_index/config.py +110 -0
- codebase_index/discovery/__init__.py +10 -0
- codebase_index/discovery/classify.py +151 -0
- codebase_index/discovery/ignore.py +58 -0
- codebase_index/discovery/walker.py +75 -0
- codebase_index/doctor.py +138 -0
- codebase_index/embeddings/__init__.py +2 -0
- codebase_index/embeddings/backend.py +67 -0
- codebase_index/embeddings/external.py +56 -0
- codebase_index/embeddings/local.py +41 -0
- codebase_index/embeddings/noop.py +15 -0
- codebase_index/graph/__init__.py +8 -0
- codebase_index/graph/analysis.py +468 -0
- codebase_index/graph/builder.py +160 -0
- codebase_index/graph/expand.py +136 -0
- codebase_index/graph/export.py +381 -0
- codebase_index/graph/navigate.py +201 -0
- codebase_index/indexer/__init__.py +8 -0
- codebase_index/indexer/doc_chunks.py +202 -0
- codebase_index/indexer/freshness.py +109 -0
- codebase_index/indexer/pipeline.py +423 -0
- codebase_index/mcp/__init__.py +2 -0
- codebase_index/mcp/server.py +354 -0
- codebase_index/models.py +145 -0
- codebase_index/output/__init__.py +6 -0
- codebase_index/output/json.py +13 -0
- codebase_index/output/markdown.py +316 -0
- codebase_index/output/redact.py +31 -0
- codebase_index/parsers/__init__.py +9 -0
- codebase_index/parsers/base.py +47 -0
- codebase_index/parsers/languages.py +290 -0
- codebase_index/parsers/line_chunker.py +39 -0
- codebase_index/parsers/symbol_chunks.py +62 -0
- codebase_index/parsers/treesitter.py +439 -0
- codebase_index/retrieval/__init__.py +9 -0
- codebase_index/retrieval/budget.py +82 -0
- codebase_index/retrieval/fusion.py +62 -0
- codebase_index/retrieval/intent.py +56 -0
- codebase_index/retrieval/pipeline.py +207 -0
- codebase_index/retrieval/rerank.py +69 -0
- codebase_index/retrieval/searchers.py +291 -0
- codebase_index/retrieval/skeleton.py +251 -0
- codebase_index/retrieval/types.py +79 -0
- codebase_index/scaffold.py +399 -0
- codebase_index/service.py +158 -0
- codebase_index/skill_template/SKILL.md +198 -0
- codebase_index/skill_template/examples/hooks/settings.json +16 -0
- codebase_index/skill_template/scripts/cbx +25 -0
- codebase_index/skill_template/scripts/cbx.ps1 +25 -0
- codebase_index/skill_update.py +150 -0
- codebase_index/storage/__init__.py +8 -0
- codebase_index/storage/db.py +116 -0
- codebase_index/storage/repo.py +701 -0
- codebase_index/storage/schema.sql +125 -0
- codebase_index/watch/__init__.py +5 -0
- codebase_index/watch/watcher.py +93 -0
- codebase_index-1.6.0.dist-info/METADATA +748 -0
- codebase_index-1.6.0.dist-info/RECORD +64 -0
- codebase_index-1.6.0.dist-info/WHEEL +4 -0
- codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
- codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
-- Canonical DDL for codebase-index. Mirrors docs/SCHEMA.md. Applied by storage/db.py.
|
|
2
|
+
PRAGMA journal_mode = WAL;
|
|
3
|
+
PRAGMA synchronous = NORMAL;
|
|
4
|
+
PRAGMA foreign_keys = ON;
|
|
5
|
+
PRAGMA temp_store = MEMORY;
|
|
6
|
+
|
|
7
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
8
|
+
id INTEGER PRIMARY KEY,
|
|
9
|
+
path TEXT NOT NULL UNIQUE,
|
|
10
|
+
lang TEXT,
|
|
11
|
+
size_bytes INTEGER NOT NULL,
|
|
12
|
+
sha256 TEXT NOT NULL,
|
|
13
|
+
mtime_ns INTEGER NOT NULL,
|
|
14
|
+
git_status TEXT,
|
|
15
|
+
parser TEXT NOT NULL,
|
|
16
|
+
indexed_at TEXT NOT NULL,
|
|
17
|
+
is_generated INTEGER NOT NULL DEFAULT 0,
|
|
18
|
+
summary TEXT
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
22
|
+
id INTEGER PRIMARY KEY,
|
|
23
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
24
|
+
name TEXT NOT NULL,
|
|
25
|
+
qualified TEXT,
|
|
26
|
+
kind TEXT NOT NULL,
|
|
27
|
+
line_start INTEGER NOT NULL,
|
|
28
|
+
line_end INTEGER NOT NULL,
|
|
29
|
+
signature TEXT,
|
|
30
|
+
parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
|
|
31
|
+
docstring TEXT,
|
|
32
|
+
in_degree INTEGER NOT NULL DEFAULT 0,
|
|
33
|
+
out_degree INTEGER NOT NULL DEFAULT 0
|
|
34
|
+
);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
|
|
38
|
+
|
|
39
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
40
|
+
id INTEGER PRIMARY KEY,
|
|
41
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
42
|
+
line_start INTEGER NOT NULL,
|
|
43
|
+
line_end INTEGER NOT NULL,
|
|
44
|
+
kind TEXT,
|
|
45
|
+
symbol_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
|
|
46
|
+
content TEXT NOT NULL,
|
|
47
|
+
token_est INTEGER NOT NULL,
|
|
48
|
+
-- Denormalized copy of the chunk's symbol name, populated at write time.
|
|
49
|
+
-- Stored (not a live join) so the FTS triggers below can replay the exact
|
|
50
|
+
-- indexed value on delete/update; a subquery would read a symbol row that the
|
|
51
|
+
-- ON DELETE SET NULL cascade may already have detached, corrupting the index.
|
|
52
|
+
symbol_names TEXT NOT NULL DEFAULT ''
|
|
53
|
+
);
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
|
|
55
|
+
|
|
56
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
57
|
+
id INTEGER PRIMARY KEY,
|
|
58
|
+
edge_type TEXT NOT NULL,
|
|
59
|
+
src_kind TEXT NOT NULL,
|
|
60
|
+
src_id INTEGER NOT NULL,
|
|
61
|
+
dst_kind TEXT,
|
|
62
|
+
dst_id INTEGER,
|
|
63
|
+
dst_name TEXT,
|
|
64
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
65
|
+
line INTEGER,
|
|
66
|
+
resolved INTEGER NOT NULL DEFAULT 0,
|
|
67
|
+
-- Honesty audit trail (see docs/SCHEMA.md). How sure are we this edge points
|
|
68
|
+
-- where it claims? 'extracted' = exact match (same-file symbol or a repo-unique
|
|
69
|
+
-- name); 'inferred' = a heuristic resolved it (import path-suffix); 'ambiguous'
|
|
70
|
+
-- = a name/import we could not pin to a unique target. Set at build time by the
|
|
71
|
+
-- global graph pass; never guessed by an LLM (the index is fully local).
|
|
72
|
+
confidence TEXT NOT NULL DEFAULT 'extracted'
|
|
73
|
+
);
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src_kind, src_id);
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst_kind, dst_id);
|
|
76
|
+
CREATE INDEX IF NOT EXISTS idx_edges_name ON edges(dst_name);
|
|
77
|
+
CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(edge_type);
|
|
78
|
+
-- replace_edges deletes per file on every incremental update, and files(id)
|
|
79
|
+
-- deletions cascade here; without this index both are full edges scans.
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_id);
|
|
81
|
+
|
|
82
|
+
CREATE TABLE IF NOT EXISTS modules (
|
|
83
|
+
id INTEGER PRIMARY KEY,
|
|
84
|
+
path TEXT NOT NULL UNIQUE,
|
|
85
|
+
kind TEXT NOT NULL,
|
|
86
|
+
summary TEXT,
|
|
87
|
+
file_count INTEGER NOT NULL DEFAULT 0,
|
|
88
|
+
symbol_count INTEGER NOT NULL DEFAULT 0
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
92
|
+
key TEXT PRIMARY KEY,
|
|
93
|
+
value TEXT NOT NULL
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
-- FTS5 over chunks (external content). Triggers keep it in sync.
|
|
97
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
|
|
98
|
+
content,
|
|
99
|
+
symbol_names,
|
|
100
|
+
path UNINDEXED,
|
|
101
|
+
content='chunks',
|
|
102
|
+
content_rowid='id',
|
|
103
|
+
tokenize = "unicode61 remove_diacritics 2"
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
-- symbol_names mirrors new/old.symbol_names (the stored chunk column), NOT a live
|
|
107
|
+
-- join: external-content FTS requires the delete to replay the exact value that was
|
|
108
|
+
-- indexed, which a join could no longer reproduce after a symbol cascade. path is
|
|
109
|
+
-- UNINDEXED so its delete value is irrelevant.
|
|
110
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
111
|
+
INSERT INTO fts_chunks(rowid, content, symbol_names, path)
|
|
112
|
+
VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
|
|
113
|
+
END;
|
|
114
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
115
|
+
INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
|
|
116
|
+
VALUES ('delete', old.id, old.content, old.symbol_names, '');
|
|
117
|
+
END;
|
|
118
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
119
|
+
INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
|
|
120
|
+
VALUES ('delete', old.id, old.content, old.symbol_names, '');
|
|
121
|
+
INSERT INTO fts_chunks(rowid, content, symbol_names, path)
|
|
122
|
+
VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
|
|
123
|
+
END;
|
|
124
|
+
|
|
125
|
+
-- vec_chunks (sqlite-vec) is created at runtime ONLY when embeddings.enabled = true.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Optional live indexing (extra: watch).
|
|
2
|
+
|
|
3
|
+
A burst of filesystem events is coalesced by `DebouncedIndexer` into a single incremental
|
|
4
|
+
`update` once edits go quiet for `window_s`, so we never block or thrash the edit loop.
|
|
5
|
+
`run_watch` wires that to a watchdog observer; watchdog is imported lazily so the base
|
|
6
|
+
install never depends on it.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Callable, Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DebouncedIndexer:
|
|
17
|
+
"""Coalesce edit notifications; run the callback once the quiet window elapses.
|
|
18
|
+
|
|
19
|
+
Pure and clock-injected for deterministic tests. `notify()` records an edit;
|
|
20
|
+
`maybe_run()` runs the callback exactly once if there is pending work and at least
|
|
21
|
+
`window_s` has passed since the last notification, then re-arms.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
callback: Callable[[], None],
|
|
27
|
+
*,
|
|
28
|
+
window_s: float,
|
|
29
|
+
clock: Callable[[], float] = time.monotonic,
|
|
30
|
+
) -> None:
|
|
31
|
+
self._callback = callback
|
|
32
|
+
self._window_s = window_s
|
|
33
|
+
self._clock = clock
|
|
34
|
+
self._last_event: Optional[float] = None
|
|
35
|
+
|
|
36
|
+
def notify(self) -> None:
|
|
37
|
+
self._last_event = self._clock()
|
|
38
|
+
|
|
39
|
+
def maybe_run(self) -> bool:
|
|
40
|
+
if self._last_event is None:
|
|
41
|
+
return False
|
|
42
|
+
if self._clock() - self._last_event < self._window_s - 1e-9:
|
|
43
|
+
return False
|
|
44
|
+
self._last_event = None
|
|
45
|
+
self._callback()
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def run_watch(config, db_path, debounce_ms: int) -> None: # pragma: no cover - exercised via CLI/manual QA
|
|
50
|
+
"""Watch the repo and run incremental `update` on debounced changes.
|
|
51
|
+
|
|
52
|
+
Raises RuntimeError (not ImportError) with install guidance if watchdog is absent.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
from watchdog.events import FileSystemEventHandler
|
|
56
|
+
from watchdog.observers import Observer
|
|
57
|
+
except ImportError as exc:
|
|
58
|
+
raise RuntimeError(
|
|
59
|
+
"watch mode requires the optional 'watchdog' dependency. "
|
|
60
|
+
'Install it with: pip install "codebase-index[watch]"'
|
|
61
|
+
) from exc
|
|
62
|
+
|
|
63
|
+
from ..indexer.pipeline import update_index
|
|
64
|
+
from ..storage.db import Database
|
|
65
|
+
|
|
66
|
+
root = Path(config.root).resolve()
|
|
67
|
+
|
|
68
|
+
def _run_update() -> None:
|
|
69
|
+
with Database(db_path) as db:
|
|
70
|
+
stats = update_index(config, db, root=root)
|
|
71
|
+
if stats.indexed or stats.deleted:
|
|
72
|
+
print(f"[watch] updated {stats.indexed}, pruned {stats.deleted}", flush=True)
|
|
73
|
+
|
|
74
|
+
debouncer = DebouncedIndexer(_run_update, window_s=debounce_ms / 1000.0)
|
|
75
|
+
|
|
76
|
+
class _Handler(FileSystemEventHandler):
|
|
77
|
+
def on_any_event(self, event) -> None:
|
|
78
|
+
if not event.is_directory:
|
|
79
|
+
debouncer.notify()
|
|
80
|
+
|
|
81
|
+
observer = Observer()
|
|
82
|
+
observer.schedule(_Handler(), str(root), recursive=True)
|
|
83
|
+
observer.start()
|
|
84
|
+
print(f"[watch] watching {root} (debounce {debounce_ms}ms). Ctrl-C to stop.", flush=True)
|
|
85
|
+
try:
|
|
86
|
+
while True:
|
|
87
|
+
time.sleep(min(0.25, debounce_ms / 1000.0))
|
|
88
|
+
debouncer.maybe_run()
|
|
89
|
+
except KeyboardInterrupt:
|
|
90
|
+
pass
|
|
91
|
+
finally:
|
|
92
|
+
observer.stop()
|
|
93
|
+
observer.join()
|