codebase-index 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codebase_index/__init__.py +7 -0
  2. codebase_index/__main__.py +3 -0
  3. codebase_index/cli.py +916 -0
  4. codebase_index/config.py +110 -0
  5. codebase_index/discovery/__init__.py +10 -0
  6. codebase_index/discovery/classify.py +151 -0
  7. codebase_index/discovery/ignore.py +58 -0
  8. codebase_index/discovery/walker.py +75 -0
  9. codebase_index/doctor.py +138 -0
  10. codebase_index/embeddings/__init__.py +2 -0
  11. codebase_index/embeddings/backend.py +67 -0
  12. codebase_index/embeddings/external.py +56 -0
  13. codebase_index/embeddings/local.py +41 -0
  14. codebase_index/embeddings/noop.py +15 -0
  15. codebase_index/graph/__init__.py +8 -0
  16. codebase_index/graph/analysis.py +468 -0
  17. codebase_index/graph/builder.py +160 -0
  18. codebase_index/graph/expand.py +136 -0
  19. codebase_index/graph/export.py +381 -0
  20. codebase_index/graph/navigate.py +201 -0
  21. codebase_index/indexer/__init__.py +8 -0
  22. codebase_index/indexer/doc_chunks.py +202 -0
  23. codebase_index/indexer/freshness.py +109 -0
  24. codebase_index/indexer/pipeline.py +423 -0
  25. codebase_index/mcp/__init__.py +2 -0
  26. codebase_index/mcp/server.py +354 -0
  27. codebase_index/models.py +145 -0
  28. codebase_index/output/__init__.py +6 -0
  29. codebase_index/output/json.py +13 -0
  30. codebase_index/output/markdown.py +316 -0
  31. codebase_index/output/redact.py +31 -0
  32. codebase_index/parsers/__init__.py +9 -0
  33. codebase_index/parsers/base.py +47 -0
  34. codebase_index/parsers/languages.py +290 -0
  35. codebase_index/parsers/line_chunker.py +39 -0
  36. codebase_index/parsers/symbol_chunks.py +62 -0
  37. codebase_index/parsers/treesitter.py +439 -0
  38. codebase_index/retrieval/__init__.py +9 -0
  39. codebase_index/retrieval/budget.py +82 -0
  40. codebase_index/retrieval/fusion.py +62 -0
  41. codebase_index/retrieval/intent.py +56 -0
  42. codebase_index/retrieval/pipeline.py +207 -0
  43. codebase_index/retrieval/rerank.py +69 -0
  44. codebase_index/retrieval/searchers.py +291 -0
  45. codebase_index/retrieval/skeleton.py +251 -0
  46. codebase_index/retrieval/types.py +79 -0
  47. codebase_index/scaffold.py +399 -0
  48. codebase_index/service.py +158 -0
  49. codebase_index/skill_template/SKILL.md +198 -0
  50. codebase_index/skill_template/examples/hooks/settings.json +16 -0
  51. codebase_index/skill_template/scripts/cbx +25 -0
  52. codebase_index/skill_template/scripts/cbx.ps1 +25 -0
  53. codebase_index/skill_update.py +150 -0
  54. codebase_index/storage/__init__.py +8 -0
  55. codebase_index/storage/db.py +116 -0
  56. codebase_index/storage/repo.py +701 -0
  57. codebase_index/storage/schema.sql +125 -0
  58. codebase_index/watch/__init__.py +5 -0
  59. codebase_index/watch/watcher.py +93 -0
  60. codebase_index-1.6.0.dist-info/METADATA +748 -0
  61. codebase_index-1.6.0.dist-info/RECORD +64 -0
  62. codebase_index-1.6.0.dist-info/WHEEL +4 -0
  63. codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
  64. codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,125 @@
1
+ -- Canonical DDL for codebase-index. Mirrors docs/SCHEMA.md. Applied by storage/db.py.
2
+ PRAGMA journal_mode = WAL;
3
+ PRAGMA synchronous = NORMAL;
4
+ PRAGMA foreign_keys = ON;
5
+ PRAGMA temp_store = MEMORY;
6
+
7
+ CREATE TABLE IF NOT EXISTS files (
8
+ id INTEGER PRIMARY KEY,
9
+ path TEXT NOT NULL UNIQUE,
10
+ lang TEXT,
11
+ size_bytes INTEGER NOT NULL,
12
+ sha256 TEXT NOT NULL,
13
+ mtime_ns INTEGER NOT NULL,
14
+ git_status TEXT,
15
+ parser TEXT NOT NULL,
16
+ indexed_at TEXT NOT NULL,
17
+ is_generated INTEGER NOT NULL DEFAULT 0,
18
+ summary TEXT
19
+ );
20
+
21
+ CREATE TABLE IF NOT EXISTS symbols (
22
+ id INTEGER PRIMARY KEY,
23
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
24
+ name TEXT NOT NULL,
25
+ qualified TEXT,
26
+ kind TEXT NOT NULL,
27
+ line_start INTEGER NOT NULL,
28
+ line_end INTEGER NOT NULL,
29
+ signature TEXT,
30
+ parent_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
31
+ docstring TEXT,
32
+ in_degree INTEGER NOT NULL DEFAULT 0,
33
+ out_degree INTEGER NOT NULL DEFAULT 0
34
+ );
35
+ CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
36
+ CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
37
+ CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
38
+
39
+ CREATE TABLE IF NOT EXISTS chunks (
40
+ id INTEGER PRIMARY KEY,
41
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
42
+ line_start INTEGER NOT NULL,
43
+ line_end INTEGER NOT NULL,
44
+ kind TEXT,
45
+ symbol_id INTEGER REFERENCES symbols(id) ON DELETE SET NULL,
46
+ content TEXT NOT NULL,
47
+ token_est INTEGER NOT NULL,
48
+ -- Denormalized copy of the chunk's symbol name, populated at write time.
49
+ -- Stored (not a live join) so the FTS triggers below can replay the exact
50
+ -- indexed value on delete/update; a subquery would read a symbol row that the
51
+ -- ON DELETE SET NULL cascade may already have detached, corrupting the index.
52
+ symbol_names TEXT NOT NULL DEFAULT ''
53
+ );
54
+ CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
55
+
56
+ CREATE TABLE IF NOT EXISTS edges (
57
+ id INTEGER PRIMARY KEY,
58
+ edge_type TEXT NOT NULL,
59
+ src_kind TEXT NOT NULL,
60
+ src_id INTEGER NOT NULL,
61
+ dst_kind TEXT,
62
+ dst_id INTEGER,
63
+ dst_name TEXT,
64
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
65
+ line INTEGER,
66
+ resolved INTEGER NOT NULL DEFAULT 0,
67
+ -- Honesty audit trail (see docs/SCHEMA.md). How sure are we this edge points
68
+ -- where it claims? 'extracted' = exact match (same-file symbol or a repo-unique
69
+ -- name); 'inferred' = a heuristic resolved it (import path-suffix); 'ambiguous'
70
+ -- = a name/import we could not pin to a unique target. Set at build time by the
71
+ -- global graph pass; never guessed by an LLM (the index is fully local).
72
+ confidence TEXT NOT NULL DEFAULT 'extracted'
73
+ );
74
+ CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src_kind, src_id);
75
+ CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst_kind, dst_id);
76
+ CREATE INDEX IF NOT EXISTS idx_edges_name ON edges(dst_name);
77
+ CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(edge_type);
78
+ -- replace_edges deletes per file on every incremental update, and files(id)
79
+ -- deletions cascade here; without this index both are full edges scans.
80
+ CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_id);
81
+
82
+ CREATE TABLE IF NOT EXISTS modules (
83
+ id INTEGER PRIMARY KEY,
84
+ path TEXT NOT NULL UNIQUE,
85
+ kind TEXT NOT NULL,
86
+ summary TEXT,
87
+ file_count INTEGER NOT NULL DEFAULT 0,
88
+ symbol_count INTEGER NOT NULL DEFAULT 0
89
+ );
90
+
91
+ CREATE TABLE IF NOT EXISTS meta (
92
+ key TEXT PRIMARY KEY,
93
+ value TEXT NOT NULL
94
+ );
95
+
96
+ -- FTS5 over chunks (external content). Triggers keep it in sync.
97
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
98
+ content,
99
+ symbol_names,
100
+ path UNINDEXED,
101
+ content='chunks',
102
+ content_rowid='id',
103
+ tokenize = "unicode61 remove_diacritics 2"
104
+ );
105
+
106
+ -- symbol_names mirrors new/old.symbol_names (the stored chunk column), NOT a live
107
+ -- join: external-content FTS requires the delete to replay the exact value that was
108
+ -- indexed, which a join could no longer reproduce after a symbol cascade. path is
109
+ -- UNINDEXED so its delete value is irrelevant.
110
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
111
+ INSERT INTO fts_chunks(rowid, content, symbol_names, path)
112
+ VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
113
+ END;
114
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
115
+ INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
116
+ VALUES ('delete', old.id, old.content, old.symbol_names, '');
117
+ END;
118
+ CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
119
+ INSERT INTO fts_chunks(fts_chunks, rowid, content, symbol_names, path)
120
+ VALUES ('delete', old.id, old.content, old.symbol_names, '');
121
+ INSERT INTO fts_chunks(rowid, content, symbol_names, path)
122
+ VALUES (new.id, new.content, new.symbol_names, (SELECT path FROM files WHERE id = new.file_id));
123
+ END;
124
+
125
+ -- vec_chunks (sqlite-vec) is created at runtime ONLY when embeddings.enabled = true.
@@ -0,0 +1,5 @@
1
+ """OPTIONAL live indexing (extra: watch).
2
+
3
+ watcher.py : watchdog-based observer that debounces filesystem events and calls the incremental
4
+ indexer asynchronously. Never required; `update` (manual or via hook) is the default.
5
+ """
@@ -0,0 +1,93 @@
1
+ """Optional live indexing (extra: watch).
2
+
3
+ A burst of filesystem events is coalesced by `DebouncedIndexer` into a single incremental
4
+ `update` once edits go quiet for `window_s`, so we never block or thrash the edit loop.
5
+ `run_watch` wires that to a watchdog observer; watchdog is imported lazily so the base
6
+ install never depends on it.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Callable, Optional
14
+
15
+
16
+ class DebouncedIndexer:
17
+ """Coalesce edit notifications; run the callback once the quiet window elapses.
18
+
19
+ Pure and clock-injected for deterministic tests. `notify()` records an edit;
20
+ `maybe_run()` runs the callback exactly once if there is pending work and at least
21
+ `window_s` has passed since the last notification, then re-arms.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ callback: Callable[[], None],
27
+ *,
28
+ window_s: float,
29
+ clock: Callable[[], float] = time.monotonic,
30
+ ) -> None:
31
+ self._callback = callback
32
+ self._window_s = window_s
33
+ self._clock = clock
34
+ self._last_event: Optional[float] = None
35
+
36
+ def notify(self) -> None:
37
+ self._last_event = self._clock()
38
+
39
+ def maybe_run(self) -> bool:
40
+ if self._last_event is None:
41
+ return False
42
+ if self._clock() - self._last_event < self._window_s - 1e-9:
43
+ return False
44
+ self._last_event = None
45
+ self._callback()
46
+ return True
47
+
48
+
49
+ def run_watch(config, db_path, debounce_ms: int) -> None: # pragma: no cover - exercised via CLI/manual QA
50
+ """Watch the repo and run incremental `update` on debounced changes.
51
+
52
+ Raises RuntimeError (not ImportError) with install guidance if watchdog is absent.
53
+ """
54
+ try:
55
+ from watchdog.events import FileSystemEventHandler
56
+ from watchdog.observers import Observer
57
+ except ImportError as exc:
58
+ raise RuntimeError(
59
+ "watch mode requires the optional 'watchdog' dependency. "
60
+ 'Install it with: pip install "codebase-index[watch]"'
61
+ ) from exc
62
+
63
+ from ..indexer.pipeline import update_index
64
+ from ..storage.db import Database
65
+
66
+ root = Path(config.root).resolve()
67
+
68
+ def _run_update() -> None:
69
+ with Database(db_path) as db:
70
+ stats = update_index(config, db, root=root)
71
+ if stats.indexed or stats.deleted:
72
+ print(f"[watch] updated {stats.indexed}, pruned {stats.deleted}", flush=True)
73
+
74
+ debouncer = DebouncedIndexer(_run_update, window_s=debounce_ms / 1000.0)
75
+
76
+ class _Handler(FileSystemEventHandler):
77
+ def on_any_event(self, event) -> None:
78
+ if not event.is_directory:
79
+ debouncer.notify()
80
+
81
+ observer = Observer()
82
+ observer.schedule(_Handler(), str(root), recursive=True)
83
+ observer.start()
84
+ print(f"[watch] watching {root} (debounce {debounce_ms}ms). Ctrl-C to stop.", flush=True)
85
+ try:
86
+ while True:
87
+ time.sleep(min(0.25, debounce_ms / 1000.0))
88
+ debouncer.maybe_run()
89
+ except KeyboardInterrupt:
90
+ pass
91
+ finally:
92
+ observer.stop()
93
+ observer.join()