codebase-index 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_index/__init__.py +7 -0
- codebase_index/__main__.py +3 -0
- codebase_index/cli.py +916 -0
- codebase_index/config.py +110 -0
- codebase_index/discovery/__init__.py +10 -0
- codebase_index/discovery/classify.py +151 -0
- codebase_index/discovery/ignore.py +58 -0
- codebase_index/discovery/walker.py +75 -0
- codebase_index/doctor.py +138 -0
- codebase_index/embeddings/__init__.py +2 -0
- codebase_index/embeddings/backend.py +67 -0
- codebase_index/embeddings/external.py +56 -0
- codebase_index/embeddings/local.py +41 -0
- codebase_index/embeddings/noop.py +15 -0
- codebase_index/graph/__init__.py +8 -0
- codebase_index/graph/analysis.py +468 -0
- codebase_index/graph/builder.py +160 -0
- codebase_index/graph/expand.py +136 -0
- codebase_index/graph/export.py +381 -0
- codebase_index/graph/navigate.py +201 -0
- codebase_index/indexer/__init__.py +8 -0
- codebase_index/indexer/doc_chunks.py +202 -0
- codebase_index/indexer/freshness.py +109 -0
- codebase_index/indexer/pipeline.py +423 -0
- codebase_index/mcp/__init__.py +2 -0
- codebase_index/mcp/server.py +354 -0
- codebase_index/models.py +145 -0
- codebase_index/output/__init__.py +6 -0
- codebase_index/output/json.py +13 -0
- codebase_index/output/markdown.py +316 -0
- codebase_index/output/redact.py +31 -0
- codebase_index/parsers/__init__.py +9 -0
- codebase_index/parsers/base.py +47 -0
- codebase_index/parsers/languages.py +290 -0
- codebase_index/parsers/line_chunker.py +39 -0
- codebase_index/parsers/symbol_chunks.py +62 -0
- codebase_index/parsers/treesitter.py +439 -0
- codebase_index/retrieval/__init__.py +9 -0
- codebase_index/retrieval/budget.py +82 -0
- codebase_index/retrieval/fusion.py +62 -0
- codebase_index/retrieval/intent.py +56 -0
- codebase_index/retrieval/pipeline.py +207 -0
- codebase_index/retrieval/rerank.py +69 -0
- codebase_index/retrieval/searchers.py +291 -0
- codebase_index/retrieval/skeleton.py +251 -0
- codebase_index/retrieval/types.py +79 -0
- codebase_index/scaffold.py +399 -0
- codebase_index/service.py +158 -0
- codebase_index/skill_template/SKILL.md +198 -0
- codebase_index/skill_template/examples/hooks/settings.json +16 -0
- codebase_index/skill_template/scripts/cbx +25 -0
- codebase_index/skill_template/scripts/cbx.ps1 +25 -0
- codebase_index/skill_update.py +150 -0
- codebase_index/storage/__init__.py +8 -0
- codebase_index/storage/db.py +116 -0
- codebase_index/storage/repo.py +701 -0
- codebase_index/storage/schema.sql +125 -0
- codebase_index/watch/__init__.py +5 -0
- codebase_index/watch/watcher.py +93 -0
- codebase_index-1.6.0.dist-info/METADATA +748 -0
- codebase_index-1.6.0.dist-info/RECORD +64 -0
- codebase_index-1.6.0.dist-info/WHEEL +4 -0
- codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
- codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codebase-index
|
|
3
|
+
description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
|
|
4
|
+
allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Codebase Index
|
|
8
|
+
|
|
9
|
+
Use this skill first for codebase questions.
|
|
10
|
+
|
|
11
|
+
Never scan the entire repository before searching the index.
|
|
12
|
+
|
|
13
|
+
## When to use
|
|
14
|
+
|
|
15
|
+
Invoke this skill **before reading any files** when the user asks about this project's code:
|
|
16
|
+
|
|
17
|
+
- "where is X implemented" / "find X" / "locate the X function"
|
|
18
|
+
- "how does X work" / "explain the X flow"
|
|
19
|
+
- "what breaks if I change X" / "what depends on X" (impact analysis)
|
|
20
|
+
- "who calls X" / "references to X"
|
|
21
|
+
- "trace the data flow of X"
|
|
22
|
+
- "why is this error happening" (error/stack trace)
|
|
23
|
+
- "explain the architecture" / "give me an overview"
|
|
24
|
+
- Any question about symbols, files, dependencies, or refactoring scope
|
|
25
|
+
|
|
26
|
+
Do **not** use it for: editing files, running the application, or non-code questions.
|
|
27
|
+
|
|
28
|
+
## How to call the CLI
|
|
29
|
+
|
|
30
|
+
Use the `codebase-index` CLI directly, or the bundled `cbx` wrapper:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
codebase-index search "$QUERY" --json
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Pick the subcommand by intent:
|
|
37
|
+
|
|
38
|
+
| User intent | Command |
|
|
39
|
+
|---|---|
|
|
40
|
+
| "how does X work" / "explain X" / "walk me through" | `codebase-index explain "$QUERY" --json` |
|
|
41
|
+
| overview / architecture / "map the codebase" | `codebase-index architecture --json` |
|
|
42
|
+
| general / unsure | `codebase-index search "$QUERY" --json` |
|
|
43
|
+
| keyword / "where is" | `codebase-index search "$QUERY" --json` |
|
|
44
|
+
| a specific symbol name | `codebase-index symbol "<name>" --json` |
|
|
45
|
+
| "who calls / references" | `codebase-index refs "<name>" --json` |
|
|
46
|
+
| "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
|
|
47
|
+
| "how is X connected to Y" / dependency path | `codebase-index path "<A>" "<B>" --json` |
|
|
48
|
+
| "what is X" / describe a symbol's role | `codebase-index describe "<name>" --json` |
|
|
49
|
+
| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
|
|
50
|
+
|
|
51
|
+
`architecture` returns the codebase map computed at index time — detected modules
|
|
52
|
+
(communities), god nodes (most-connected symbols), surprising cross-module links,
|
|
53
|
+
and suggested questions. Reach for it on "give me an overview" / "where do I
|
|
54
|
+
start" questions instead of a broad `explain`.
|
|
55
|
+
|
|
56
|
+
`path "A" "B"` returns the shortest dependency/call chain between two symbols or
|
|
57
|
+
files; `describe "X"` returns a node card (definition, callers, callees,
|
|
58
|
+
in/out degree, module, god-node rank). Both annotate edges with a `confidence`
|
|
59
|
+
(`extracted` exact, `inferred` heuristic, `ambiguous` unresolved) — treat a path
|
|
60
|
+
or callee list that leans on `inferred`/`ambiguous` edges as less certain.
|
|
61
|
+
|
|
62
|
+
The `graph` command renders an HTML dependency graph for a person to look at —
|
|
63
|
+
it is not a retrieval packet. Use it only when the user explicitly wants a visual
|
|
64
|
+
graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
|
|
65
|
+
session prefer `--output <path>` over `--open`. `--format graphml|dot|neo4j`
|
|
66
|
+
exports the graph for external tools (Gephi/yEd, Graphviz, Neo4j) instead of HTML.
|
|
67
|
+
|
|
68
|
+
`explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
|
|
69
|
+
|
|
70
|
+
For `search`, pick a `--mode` when the intent is clear:
|
|
71
|
+
- `--mode symbol` — pure symbol lookups (faster, no FTS noise)
|
|
72
|
+
- `--mode fts` — text/keyword queries where symbol names don't matter
|
|
73
|
+
- `--mode hybrid` — default; best for mixed queries
|
|
74
|
+
- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
|
|
75
|
+
requests" without the exact words). Requires opt-in embeddings; falls back with
|
|
76
|
+
a clear message when they are not enabled. `hybrid` already blends vectors in
|
|
77
|
+
when embeddings are on, so reach for `vector` only for pure-semantic recall.
|
|
78
|
+
|
|
79
|
+
Natural-language kind words such as `method`, `function`, `class`, `interface`,
|
|
80
|
+
`enum`, and `type` constrain the symbol retriever inside `search`.
|
|
81
|
+
|
|
82
|
+
Use `--json` for programmatic parsing; omit for human-readable output.
|
|
83
|
+
Search/read commands auto-build the index when it is missing; still check
|
|
84
|
+
freshness and run `update`/`index` when responses report stale data.
|
|
85
|
+
|
|
86
|
+
## Step-by-step workflow
|
|
87
|
+
|
|
88
|
+
1. **Query the index** using the appropriate subcommand for `$QUERY`.
|
|
89
|
+
2. **Check index freshness** in the response:
|
|
90
|
+
- `index.exists: false` → run `codebase-index index` first, then re-query.
|
|
91
|
+
- `index.stale: true`, `files_changed_since_build < 20` → run `codebase-index update`, then re-query.
|
|
92
|
+
- `index.stale: true`, `files_changed_since_build ≥ 20` → run `codebase-index index` (full rebuild).
|
|
93
|
+
- Otherwise proceed with results.
|
|
94
|
+
3. **Read ONLY the `recommended_reads`** — use the Read tool with `offset`/`limit` to read the exact line ranges returned. Do not open whole files.
|
|
95
|
+
4. **Answer** with file:line citations (e.g., `src/auth/token.py:88-134`).
|
|
96
|
+
5. **Fallback** only if confidence is low or results are empty (see below).
|
|
97
|
+
|
|
98
|
+
## Token-budgeted output interpretation
|
|
99
|
+
|
|
100
|
+
The index returns a **ranked retrieval packet** with:
|
|
101
|
+
|
|
102
|
+
- `rank` — result position (start with 1-3)
|
|
103
|
+
- `path` — file path
|
|
104
|
+
- `line_start` / `line_end` — exact line range to read
|
|
105
|
+
- `symbols` — symbols found in this range
|
|
106
|
+
- `score` — relevance score
|
|
107
|
+
- `reason` — why this result ranked (e.g., "exact symbol match, 4 callers")
|
|
108
|
+
- `snippet` — compact code excerpt (may already answer the question); `null` means budget was spent — read via `recommended_reads` instead
|
|
109
|
+
- `skeletonized` — when `true`, the `snippet` is a **focus skeleton**: import/signature/class lines and the line(s) matching your query are kept, while function bodies collapse to a marker like `... 24 lines elided (read 88-134)`. Read that line range (or the result's `line_start`/`line_end`) when you need a full body.
|
|
110
|
+
- `elided_lines` — how many source lines the skeleton folded away (`0` when not skeletonized).
|
|
111
|
+
|
|
112
|
+
Top-level fields:
|
|
113
|
+
|
|
114
|
+
- `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
|
|
115
|
+
- `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
|
|
116
|
+
- `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
|
|
117
|
+
- `intent` / `mode` — how the query was classified and which retrievers ran;
|
|
118
|
+
useful to sanity-check a weak result (e.g. a "how does X work" question that
|
|
119
|
+
resolved to a bare symbol lookup may need `explain` instead).
|
|
120
|
+
- `pagination` — present only when more results exist than fit the page. It
|
|
121
|
+
reports `has_more` and `next_offset`. To page, re-run `search` with
|
|
122
|
+
`--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
|
|
123
|
+
refining with a more specific subcommand or raising `--token-budget` first —
|
|
124
|
+
page only when the top results genuinely miss the answer.
|
|
125
|
+
- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
|
|
126
|
+
edges (imports/inheritance) are extracted only for fully supported languages.
|
|
127
|
+
When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
|
|
128
|
+
as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
|
|
129
|
+
just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
|
|
130
|
+
references this". `coverage.languages` lists the affected languages.
|
|
131
|
+
|
|
132
|
+
## Token efficiency rules
|
|
133
|
+
|
|
134
|
+
- Trust the index. Read the **fewest** files needed — start with rank 1-3 only.
|
|
135
|
+
- Read **line ranges**, not whole files. Use `line_start`/`line_end` with Read's `offset`/`limit`.
|
|
136
|
+
- The `snippet` may already answer the question — re-read only if you need more context.
|
|
137
|
+
- Prefer `search`/`symbol`/`refs`/`impact`/`explain` over manual Grep/Glob — those are expensive fallbacks, not step 1.
|
|
138
|
+
- Don't re-run the query with trivially reworded text; refine with a different subcommand instead.
|
|
139
|
+
- For broad questions (`confidence: low`, architecture, data-flow), raise the budget: `--token-budget 3000`.
|
|
140
|
+
- Test files are demoted in ranking by default. Include "test" in the query to surface them.
|
|
141
|
+
- Snippets are skeletonized by default to fit more results in the budget. The matched line is always preserved; pass `--raw` (CLI) or `raw: true` (MCP) on the rare occasion you need full bodies inline instead of reading the cited line range.
|
|
142
|
+
|
|
143
|
+
## Fallback behavior
|
|
144
|
+
|
|
145
|
+
Fall back to built-in search **only** when: results are empty, `confidence` is `low`, or the user asks for something the index clearly doesn't cover.
|
|
146
|
+
|
|
147
|
+
0. If confidence is consistently low across queries, run diagnostics first:
|
|
148
|
+
```bash
|
|
149
|
+
codebase-index stats --json # per-language file/symbol counts + graph tier
|
|
150
|
+
codebase-index doctor # surface config or security issues
|
|
151
|
+
```
|
|
152
|
+
Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
|
|
153
|
+
In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
|
|
154
|
+
`graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
|
|
155
|
+
import/inheritance edges for that language — treat empty results there as
|
|
156
|
+
inconclusive.
|
|
157
|
+
|
|
158
|
+
1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
|
|
159
|
+
2. If still nothing, Glob for likely paths, then Grep within them.
|
|
160
|
+
3. As a last resort, broaden the search — but tell the user the index was weak here (it may need a rebuild: `codebase-index index`).
|
|
161
|
+
|
|
162
|
+
Never start with a full-repo scan when the index exists and is fresh.
|
|
163
|
+
|
|
164
|
+
## Examples
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# "how does the auth flow work?"
|
|
168
|
+
codebase-index explain "auth flow" --json
|
|
169
|
+
|
|
170
|
+
# "explain the overall architecture" / "where do I start" — modules, god nodes
|
|
171
|
+
codebase-index architecture --json
|
|
172
|
+
|
|
173
|
+
# "where is auth token refresh implemented?"
|
|
174
|
+
codebase-index search "auth token refresh" --json
|
|
175
|
+
|
|
176
|
+
# "what breaks if I change the User model?"
|
|
177
|
+
codebase-index impact "User" --json
|
|
178
|
+
|
|
179
|
+
# "who calls send_email?"
|
|
180
|
+
codebase-index refs "send_email" --json
|
|
181
|
+
|
|
182
|
+
# "find the AuthService class"
|
|
183
|
+
codebase-index symbol "AuthService" --json
|
|
184
|
+
|
|
185
|
+
# precise symbol search (faster, no FTS noise)
|
|
186
|
+
codebase-index search "AuthService" --mode symbol --json
|
|
187
|
+
|
|
188
|
+
# "how is the API layer connected to the database?"
|
|
189
|
+
codebase-index path "ApiController" "Database" --json
|
|
190
|
+
|
|
191
|
+
# "what is the Database class and how is it used?"
|
|
192
|
+
codebase-index describe "Database" --json
|
|
193
|
+
|
|
194
|
+
# generate and open an HTML graph around a file or symbol
|
|
195
|
+
codebase-index graph "User" --direction both --depth 2 --open
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Then Read only the returned line ranges and answer with citations.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Thin, safe wrapper around the installed `codebase-index` CLI.
|
|
3
|
+
# - Resolves the binary (prefers one on PATH; falls back to `python -m codebase_index`).
|
|
4
|
+
# - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
|
|
5
|
+
set -euo pipefail
|
|
6
|
+
|
|
7
|
+
ALLOWED="search explain symbol refs impact graph stats doctor update index"
|
|
8
|
+
|
|
9
|
+
sub="${1:-}"
|
|
10
|
+
case " $ALLOWED " in
|
|
11
|
+
*" ${sub} "*) : ;;
|
|
12
|
+
*)
|
|
13
|
+
echo "cbx: refusing subcommand '${sub}'. Allowed: ${ALLOWED}" >&2
|
|
14
|
+
exit 2
|
|
15
|
+
;;
|
|
16
|
+
esac
|
|
17
|
+
|
|
18
|
+
if python -c "import codebase_index" >/dev/null 2>&1; then
|
|
19
|
+
exec python -m codebase_index "$@"
|
|
20
|
+
elif command -v codebase-index >/dev/null 2>&1; then
|
|
21
|
+
exec codebase-index "$@"
|
|
22
|
+
else
|
|
23
|
+
echo "cbx: codebase_index is not importable and codebase-index is not on PATH" >&2
|
|
24
|
+
exit 127
|
|
25
|
+
fi
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Windows PowerShell wrapper around the installed `codebase-index` CLI.
|
|
2
|
+
# Mirrors scripts/cbx: whitelists safe subcommands, falls back to `python -m codebase_index`.
|
|
3
|
+
param(
|
|
4
|
+
[Parameter(Mandatory = $true, Position = 0)]
|
|
5
|
+
[string]$Subcommand,
|
|
6
|
+
[Parameter(ValueFromRemainingArguments = $true)]
|
|
7
|
+
[string[]]$Rest
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
$ErrorActionPreference = "Stop"
|
|
11
|
+
$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
|
|
12
|
+
|
|
13
|
+
if ($allowed -notcontains $Subcommand) {
|
|
14
|
+
Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
|
|
15
|
+
exit 2
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
& python -c "import codebase_index" 2>$null
|
|
19
|
+
if ($LASTEXITCODE -eq 0) {
|
|
20
|
+
& python -m codebase_index $Subcommand @Rest
|
|
21
|
+
exit $LASTEXITCODE
|
|
22
|
+
}
|
|
23
|
+
$bin = Get-Command codebase-index -ErrorAction SilentlyContinue
|
|
24
|
+
if ($bin) { & $bin.Source $Subcommand @Rest }
|
|
25
|
+
exit $LASTEXITCODE
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Skill auto-update and rollback helpers.
|
|
2
|
+
|
|
3
|
+
Auto-update flow:
|
|
4
|
+
1. On any CLI invocation, compare the installed skill's .skill_version stamp
|
|
5
|
+
against the running package version.
|
|
6
|
+
2. If they differ, re-materialize the skill template silently and stamp the new
|
|
7
|
+
version. A backup is saved first so the user can roll back.
|
|
8
|
+
|
|
9
|
+
Manual commands exposed via cli.py:
|
|
10
|
+
codebase-index skill-update -- force refresh all/one installed targets
|
|
11
|
+
codebase-index skill-rollback -- restore the last backup
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import shutil
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
VERSION_FILE = ".skill_version"
|
|
21
|
+
_CACHE_BACKUP_REL = ".claude/cache/codebase-index/skill-backups"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# version helpers
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
def _package_version() -> str:
|
|
29
|
+
try:
|
|
30
|
+
from importlib.metadata import version
|
|
31
|
+
return version("codebase-index")
|
|
32
|
+
except Exception:
|
|
33
|
+
return "unknown"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _installed_version(skill_dir: Path) -> str:
|
|
37
|
+
vf = skill_dir / VERSION_FILE
|
|
38
|
+
return vf.read_text(encoding="utf-8").strip() if vf.exists() else ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _write_version(skill_dir: Path, ver: str) -> None:
|
|
42
|
+
(skill_dir / VERSION_FILE).write_text(ver + "\n", encoding="utf-8")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def needs_update(skill_dir: Path) -> bool:
|
|
46
|
+
"""True when the installed skill stamp differs from the running package version."""
|
|
47
|
+
return _installed_version(skill_dir) != _package_version()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# backup helpers
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
def _backup_dir(root: Path, target: str) -> Path:
|
|
55
|
+
"""Backup lives in the cache, not next to the skill (avoids polluting skill namespaces)."""
|
|
56
|
+
return root / _CACHE_BACKUP_REL / target
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _make_backup(root: Path, skill_dir: Path, target: str) -> bool:
|
|
60
|
+
"""Copy skill_dir to the cache backup location. Returns True if a backup was written."""
|
|
61
|
+
if not skill_dir.exists():
|
|
62
|
+
return False
|
|
63
|
+
bak = _backup_dir(root, target)
|
|
64
|
+
if bak.exists():
|
|
65
|
+
shutil.rmtree(bak)
|
|
66
|
+
bak.parent.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
shutil.copytree(skill_dir, bak)
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# public API
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
def update_skill(root: Path, target: str, *, backup: bool = True) -> dict:
|
|
76
|
+
"""Re-materialize the bundled skill template for *target*.
|
|
77
|
+
|
|
78
|
+
Returns a result dict:
|
|
79
|
+
- updated (bool)
|
|
80
|
+
- backed_up (bool)
|
|
81
|
+
- target (str)
|
|
82
|
+
- old_version (str)
|
|
83
|
+
- new_version (str)
|
|
84
|
+
"""
|
|
85
|
+
from . import scaffold
|
|
86
|
+
|
|
87
|
+
skill_dir = root / scaffold.skill_rel_for_target(target)
|
|
88
|
+
pkg_ver = _package_version()
|
|
89
|
+
old_ver = _installed_version(skill_dir)
|
|
90
|
+
|
|
91
|
+
backed_up = _make_backup(root, skill_dir, target) if backup else False
|
|
92
|
+
|
|
93
|
+
scaffold.materialize_skill(root, force=True, target=target)
|
|
94
|
+
_write_version(skill_dir, pkg_ver)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"target": target,
|
|
98
|
+
"old_version": old_ver,
|
|
99
|
+
"new_version": pkg_ver,
|
|
100
|
+
"backed_up": backed_up,
|
|
101
|
+
"updated": True,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def rollback_skill(root: Path, target: str) -> dict:
|
|
106
|
+
"""Restore the backed-up skill for *target*.
|
|
107
|
+
|
|
108
|
+
Returns a result dict:
|
|
109
|
+
- target (str)
|
|
110
|
+
- rolled_back (bool)
|
|
111
|
+
- reason (str, only when rolled_back=False)
|
|
112
|
+
"""
|
|
113
|
+
from . import scaffold
|
|
114
|
+
|
|
115
|
+
skill_dir = root / scaffold.skill_rel_for_target(target)
|
|
116
|
+
bak = _backup_dir(root, target)
|
|
117
|
+
|
|
118
|
+
if not bak.exists():
|
|
119
|
+
return {"target": target, "rolled_back": False, "reason": "no backup found"}
|
|
120
|
+
|
|
121
|
+
if skill_dir.exists():
|
|
122
|
+
shutil.rmtree(skill_dir)
|
|
123
|
+
shutil.copytree(bak, skill_dir)
|
|
124
|
+
return {"target": target, "rolled_back": True}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def auto_update_if_needed(root: Path, target: str) -> bool:
|
|
128
|
+
"""Silently update *target* skill if the installed version is outdated.
|
|
129
|
+
|
|
130
|
+
Returns True when an update was applied. Never raises — failures are swallowed
|
|
131
|
+
because a broken auto-update must never crash the user's real command.
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
from . import scaffold
|
|
135
|
+
|
|
136
|
+
skill_dir = root / scaffold.skill_rel_for_target(target)
|
|
137
|
+
if not skill_dir.exists():
|
|
138
|
+
return False
|
|
139
|
+
if not needs_update(skill_dir):
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
update_skill(root, target, backup=True)
|
|
143
|
+
return True
|
|
144
|
+
except Exception as exc:
|
|
145
|
+
print(
|
|
146
|
+
f"[codebase-index] skill auto-update for '{target}' failed "
|
|
147
|
+
f"({type(exc).__name__}: {exc}); run `codebase-index skill-update`.",
|
|
148
|
+
file=sys.stderr,
|
|
149
|
+
)
|
|
150
|
+
return False
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""SQLite persistence layer.
|
|
2
|
+
|
|
3
|
+
db.py : connection management, pragmas, applying schema.sql, migrations gated on
|
|
4
|
+
meta.schema_version.
|
|
5
|
+
schema.sql: canonical DDL (mirrors docs/SCHEMA.md).
|
|
6
|
+
repo.py : typed accessors (upsert_file, replace_chunks, insert_symbols, insert_edges, FTS query,
|
|
7
|
+
vector query, freshness read). All SQL lives here — no raw SQL elsewhere.
|
|
8
|
+
"""
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""SQLite connection management: pragmas, schema application, version guard."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sqlite3
|
|
6
|
+
from importlib import resources
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
# 2: chunks gained a denormalized `symbol_names` column (FTS symbol-name boost).
|
|
11
|
+
# 3: edges gained a `confidence` column (extracted/inferred/ambiguous audit trail).
|
|
12
|
+
SCHEMA_VERSION = 3
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Database:
|
|
16
|
+
"""Own one SQLite connection for one CLI invocation."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, path: Path | str) -> None:
|
|
19
|
+
self.path = Path(path)
|
|
20
|
+
self._conn: Optional[sqlite3.Connection] = None
|
|
21
|
+
|
|
22
|
+
def open(self) -> "Database":
|
|
23
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
self._conn = sqlite3.connect(self.path)
|
|
25
|
+
self._conn.row_factory = sqlite3.Row
|
|
26
|
+
self._apply_pragmas()
|
|
27
|
+
self._apply_schema()
|
|
28
|
+
self._guard_version()
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def close(self) -> None:
|
|
32
|
+
if self._conn is not None:
|
|
33
|
+
self._conn.commit()
|
|
34
|
+
self._conn.close()
|
|
35
|
+
self._conn = None
|
|
36
|
+
|
|
37
|
+
def __enter__(self) -> "Database":
|
|
38
|
+
return self.open()
|
|
39
|
+
|
|
40
|
+
def __exit__(self, *exc: object) -> None:
|
|
41
|
+
self.close()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def conn(self) -> sqlite3.Connection:
|
|
45
|
+
if self._conn is None:
|
|
46
|
+
raise RuntimeError("Database is not open")
|
|
47
|
+
return self._conn
|
|
48
|
+
|
|
49
|
+
def get_schema_version(self) -> int:
|
|
50
|
+
row = self.conn.execute("SELECT value FROM meta WHERE key = 'schema_version'").fetchone()
|
|
51
|
+
return int(row[0]) if row else 0
|
|
52
|
+
|
|
53
|
+
def _apply_pragmas(self) -> None:
|
|
54
|
+
self.conn.execute("PRAGMA journal_mode = WAL")
|
|
55
|
+
self.conn.execute("PRAGMA synchronous = NORMAL")
|
|
56
|
+
self.conn.execute("PRAGMA foreign_keys = ON")
|
|
57
|
+
self.conn.execute("PRAGMA temp_store = MEMORY")
|
|
58
|
+
|
|
59
|
+
def _apply_schema(self) -> None:
|
|
60
|
+
ddl = resources.files("codebase_index.storage").joinpath("schema.sql").read_text(
|
|
61
|
+
encoding="utf-8"
|
|
62
|
+
)
|
|
63
|
+
self.conn.executescript(ddl)
|
|
64
|
+
|
|
65
|
+
def _guard_version(self) -> None:
|
|
66
|
+
current = self.get_schema_version()
|
|
67
|
+
if current == 0:
|
|
68
|
+
self.conn.execute(
|
|
69
|
+
"INSERT OR REPLACE INTO meta(key, value) VALUES ('schema_version', ?)",
|
|
70
|
+
(str(SCHEMA_VERSION),),
|
|
71
|
+
)
|
|
72
|
+
self.conn.commit()
|
|
73
|
+
elif current > SCHEMA_VERSION:
|
|
74
|
+
raise RuntimeError(
|
|
75
|
+
f"Index schema_version {current} is newer than supported {SCHEMA_VERSION}; "
|
|
76
|
+
"rebuild the index with an updated CLI."
|
|
77
|
+
)
|
|
78
|
+
# current < SCHEMA_VERSION is tolerated on open: queries never read the
|
|
79
|
+
# added columns, so an older index is still safely *readable*. The build
|
|
80
|
+
# commands (index/update) detect the mismatch via peek_schema_version and
|
|
81
|
+
# rebuild from scratch, since there is no in-place migration framework and
|
|
82
|
+
# schema.sql is applied with IF NOT EXISTS (old tables/triggers persist).
|
|
83
|
+
|
|
84
|
+
def enable_vectors(self) -> None:
|
|
85
|
+
"""Load the sqlite-vec extension into this connection (optional extra)."""
|
|
86
|
+
try:
|
|
87
|
+
import sqlite_vec # type: ignore[import-untyped]
|
|
88
|
+
except ImportError as exc:
|
|
89
|
+
raise RuntimeError(
|
|
90
|
+
"Vector search needs the optional extra: pip install codebase-index[embeddings]"
|
|
91
|
+
) from exc
|
|
92
|
+
self.conn.enable_load_extension(True)
|
|
93
|
+
sqlite_vec.load(self.conn)
|
|
94
|
+
self.conn.enable_load_extension(False)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def peek_schema_version(path: Path | str) -> int:
|
|
98
|
+
"""Read meta.schema_version without applying schema or running the guard.
|
|
99
|
+
|
|
100
|
+
Returns 0 when the file, the meta table, or the key is absent/unreadable, so
|
|
101
|
+
callers can treat "0 < peek < SCHEMA_VERSION" (or a missing meta) as "rebuild".
|
|
102
|
+
"""
|
|
103
|
+
p = Path(path)
|
|
104
|
+
if not p.exists():
|
|
105
|
+
return 0
|
|
106
|
+
try:
|
|
107
|
+
conn = sqlite3.connect(p)
|
|
108
|
+
try:
|
|
109
|
+
row = conn.execute(
|
|
110
|
+
"SELECT value FROM meta WHERE key = 'schema_version'"
|
|
111
|
+
).fetchone()
|
|
112
|
+
return int(row[0]) if row else 0
|
|
113
|
+
finally:
|
|
114
|
+
conn.close()
|
|
115
|
+
except (sqlite3.Error, ValueError, OSError):
|
|
116
|
+
return 0
|