@toolbaux/guardian 0.1.22 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/adapters/runner.js +72 -3
- package/dist/adapters/typescript-adapter.js +24 -10
- package/dist/benchmarking/metrics/context-coverage.js +82 -0
- package/dist/benchmarking/metrics/drift-score.js +104 -0
- package/dist/benchmarking/metrics/search-recall.js +207 -0
- package/dist/benchmarking/metrics/token-efficiency.js +79 -0
- package/dist/benchmarking/report.js +131 -0
- package/dist/benchmarking/runner.js +175 -0
- package/dist/benchmarking/types.js +13 -0
- package/dist/cli.js +53 -10
- package/dist/commands/benchmark.js +62 -0
- package/dist/commands/context.js +87 -29
- package/dist/commands/discrepancy.js +1 -1
- package/dist/commands/doc-generate.js +1 -1
- package/dist/commands/doc-html.js +1 -1
- package/dist/commands/extract.js +4 -1
- package/dist/commands/feature-context.js +1 -1
- package/dist/commands/generate.js +83 -10
- package/dist/commands/init.js +89 -56
- package/dist/commands/intel.js +70 -1
- package/dist/commands/mcp-serve.js +155 -316
- package/dist/commands/search.js +642 -14
- package/dist/config.js +1 -0
- package/dist/db/embeddings.js +113 -0
- package/dist/db/file-specs-store.js +174 -0
- package/dist/db/fts-builder.js +390 -0
- package/dist/db/index.js +55 -0
- package/dist/db/specs-store.js +13 -0
- package/dist/db/sqlite-specs-store.js +934 -0
- package/dist/extract/codebase-intel.js +31 -2
- package/dist/extract/compress.js +70 -3
- package/dist/extract/context-block.js +11 -2
- package/dist/extract/function-intel.js +5 -2
- package/dist/extract/index.js +1 -23
- package/dist/extract/writer.js +6 -0
- package/package.json +4 -1
package/dist/config.js
CHANGED
|
@@ -273,6 +273,7 @@ function normalizeConfig(input, configDir) {
|
|
|
273
273
|
}
|
|
274
274
|
function mergeConfig(base, override) {
|
|
275
275
|
return {
|
|
276
|
+
project_id: override.project_id ?? base.project_id,
|
|
276
277
|
project: {
|
|
277
278
|
root: override.project?.root ?? base.project?.root ?? "",
|
|
278
279
|
backendRoot: override.project?.backendRoot ?? base.project?.backendRoot ?? "",
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding generation for function-level semantic search.
|
|
3
|
+
*
|
|
4
|
+
* Strategy (local-first, no API key required):
|
|
5
|
+
* Default — @xenova/transformers running Xenova/all-MiniLM-L6-v2 on-device.
|
|
6
|
+
* Model downloads once (~23 MB) and is cached in ~/.cache/xenova.
|
|
7
|
+
* dim=384, pure JS/ONNX, no external service needed.
|
|
8
|
+
*
|
|
9
|
+
* Upgrade — OpenAI text-embedding-3-small when OPENAI_API_KEY is set.
|
|
10
|
+
* dim=256, higher quality, costs ~$0.002 per 1M tokens.
|
|
11
|
+
*
|
|
12
|
+
* Text per function (concise — name carries most semantic signal):
|
|
13
|
+
* "{name} {filename}: {top calls} {short literals}"
|
|
14
|
+
*/
|
|
15
|
+
const LOCAL_MODEL = "Xenova/all-MiniLM-L6-v2";
|
|
16
|
+
const LOCAL_DIM = 384;
|
|
17
|
+
const OPENAI_MODEL = "text-embedding-3-small";
|
|
18
|
+
const OPENAI_DIM = 256;
|
|
19
|
+
const BATCH = 64; // safe for both local and OpenAI
|
|
20
|
+
function fnToText(fn) {
|
|
21
|
+
const filename = fn.file.split("/").pop() ?? fn.file;
|
|
22
|
+
const callStr = (fn.calls ?? []).slice(0, 10).join(" ");
|
|
23
|
+
const litStr = (fn.stringLiterals ?? []).slice(0, 5).join(" ").slice(0, 100);
|
|
24
|
+
return `${fn.name} ${filename}: ${callStr} ${litStr}`.trim().slice(0, 300);
|
|
25
|
+
}
|
|
26
|
+
// ── Local embedder (no API key) ───────────────────────────────────────────────
|
|
27
|
+
async function embedBatchLocal(texts, pipe) {
|
|
28
|
+
const out = [];
|
|
29
|
+
for (const text of texts) {
|
|
30
|
+
const result = await pipe(text, { pooling: "mean", normalize: true });
|
|
31
|
+
out.push(new Float32Array(result.data));
|
|
32
|
+
}
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
// ── OpenAI embedder (OPENAI_API_KEY required) ─────────────────────────────────
|
|
36
|
+
async function embedBatchOpenAI(texts, apiKey) {
|
|
37
|
+
const { default: OpenAI } = await import("openai");
|
|
38
|
+
const client = new OpenAI({ apiKey });
|
|
39
|
+
const response = await client.embeddings.create({
|
|
40
|
+
model: OPENAI_MODEL,
|
|
41
|
+
input: texts,
|
|
42
|
+
dimensions: OPENAI_DIM,
|
|
43
|
+
encoding_format: "float",
|
|
44
|
+
});
|
|
45
|
+
return response.data.map(d => new Float32Array(d.embedding));
|
|
46
|
+
}
|
|
47
|
+
// ── Public API ────────────────────────────────────────────────────────────────
|
|
48
|
+
/**
|
|
49
|
+
* Embed all functions and store them in guardian.db function_embeddings table.
|
|
50
|
+
* Uses local model by default; OpenAI when OPENAI_API_KEY is set (better quality).
|
|
51
|
+
*/
|
|
52
|
+
export async function embedFunctions(store, fns, apiKey) {
|
|
53
|
+
if (fns.length === 0)
|
|
54
|
+
return;
|
|
55
|
+
const useOpenAI = !!apiKey;
|
|
56
|
+
let pipe;
|
|
57
|
+
if (!useOpenAI) {
|
|
58
|
+
// Lazy-load local model (downloads once, then cached)
|
|
59
|
+
const { pipeline } = await import("@xenova/transformers");
|
|
60
|
+
console.log(`[guardian embed] loading local model ${LOCAL_MODEL}…`);
|
|
61
|
+
pipe = await pipeline("feature-extraction", LOCAL_MODEL);
|
|
62
|
+
}
|
|
63
|
+
const rows = [];
|
|
64
|
+
for (let i = 0; i < fns.length; i += BATCH) {
|
|
65
|
+
const batch = fns.slice(i, i + BATCH);
|
|
66
|
+
const texts = batch.map(fnToText);
|
|
67
|
+
let vecs;
|
|
68
|
+
try {
|
|
69
|
+
vecs = useOpenAI
|
|
70
|
+
? await embedBatchOpenAI(texts, apiKey)
|
|
71
|
+
: await embedBatchLocal(texts, pipe);
|
|
72
|
+
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
console.warn(`[guardian embed] batch ${i}–${i + batch.length - 1} failed: ${err.message}`);
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
for (let j = 0; j < batch.length; j++) {
|
|
78
|
+
if (!vecs[j])
|
|
79
|
+
continue;
|
|
80
|
+
rows.push({
|
|
81
|
+
file_path: batch[j].file,
|
|
82
|
+
name: batch[j].name,
|
|
83
|
+
line: batch[j].lines[0],
|
|
84
|
+
vec: vecs[j],
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
if (i > 0 && i % 500 === 0) {
|
|
88
|
+
console.log(`[guardian embed] ${i}/${fns.length} functions embedded`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
store.rebuildEmbeddings(rows);
|
|
92
|
+
const source = useOpenAI ? `OpenAI ${OPENAI_MODEL} dim=${OPENAI_DIM}` : `local ${LOCAL_MODEL} dim=${LOCAL_DIM}`;
|
|
93
|
+
console.log(`[guardian embed] stored ${rows.length} embeddings (${source})`);
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Embed a single query string for hybrid search.
|
|
97
|
+
* Returns null on failure — graceful degradation to BM25 + call-graph authority.
|
|
98
|
+
*/
|
|
99
|
+
export async function embedQuery(query, apiKey) {
|
|
100
|
+
try {
|
|
101
|
+
if (apiKey) {
|
|
102
|
+
const [vec] = await embedBatchOpenAI([query.slice(0, 300)], apiKey);
|
|
103
|
+
return vec ?? null;
|
|
104
|
+
}
|
|
105
|
+
const { pipeline } = await import("@xenova/transformers");
|
|
106
|
+
const pipe = await pipeline("feature-extraction", LOCAL_MODEL);
|
|
107
|
+
const [vec] = await embedBatchLocal([query.slice(0, 300)], pipe);
|
|
108
|
+
return vec ?? null;
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileSpecsStore — filesystem implementation of SpecsStore.
|
|
3
|
+
*
|
|
4
|
+
* This is a drop-in wrapper around the existing file-based behavior.
|
|
5
|
+
* It maps each SpecsStore call to the exact same read/write the codebase
|
|
6
|
+
* did before the interface existed, so no existing behavior changes.
|
|
7
|
+
*
|
|
8
|
+
* File layout (unchanged):
|
|
9
|
+
* <machineDir>/
|
|
10
|
+
* architecture.snapshot.yaml
|
|
11
|
+
* ux.snapshot.yaml
|
|
12
|
+
* codebase-intelligence.json
|
|
13
|
+
* structural-intelligence.json
|
|
14
|
+
* function-intelligence.json
|
|
15
|
+
* mcp-metrics.jsonl
|
|
16
|
+
* <humanDir>/
|
|
17
|
+
* overview.md
|
|
18
|
+
* modules/
|
|
19
|
+
* src-extract.md
|
|
20
|
+
* ...
|
|
21
|
+
*/
|
|
22
|
+
import fs from "node:fs/promises";
|
|
23
|
+
import path from "node:path";
|
|
24
|
+
/** Maps spec name → filename on disk. */
|
|
25
|
+
const SPEC_FILENAMES = {
|
|
26
|
+
"architecture.snapshot": "architecture.snapshot.yaml",
|
|
27
|
+
"ux.snapshot": "ux.snapshot.yaml",
|
|
28
|
+
"codebase-intelligence": "codebase-intelligence.json",
|
|
29
|
+
"structural-intelligence": "structural-intelligence.json",
|
|
30
|
+
"function-intelligence": "function-intelligence.json",
|
|
31
|
+
"mcp-metrics": "mcp-metrics.jsonl",
|
|
32
|
+
};
|
|
33
|
+
function nameToFilename(name) {
|
|
34
|
+
return SPEC_FILENAMES[name] ?? `${name}.json`;
|
|
35
|
+
}
|
|
36
|
+
function filenameToFormat(filename) {
|
|
37
|
+
if (filename.endsWith(".yaml"))
|
|
38
|
+
return "yaml";
|
|
39
|
+
if (filename.endsWith(".jsonl"))
|
|
40
|
+
return "jsonl";
|
|
41
|
+
if (filename.endsWith(".json"))
|
|
42
|
+
return "json";
|
|
43
|
+
return "text";
|
|
44
|
+
}
|
|
45
|
+
export class FileSpecsStore {
|
|
46
|
+
machineDir;
|
|
47
|
+
humanDir;
|
|
48
|
+
constructor(machineDir, humanDir) {
|
|
49
|
+
this.machineDir = machineDir;
|
|
50
|
+
this.humanDir = humanDir;
|
|
51
|
+
}
|
|
52
|
+
async init() {
|
|
53
|
+
await fs.mkdir(this.machineDir, { recursive: true });
|
|
54
|
+
await fs.mkdir(this.humanDir, { recursive: true });
|
|
55
|
+
}
|
|
56
|
+
async close() {
|
|
57
|
+
// nothing to close for file IO
|
|
58
|
+
}
|
|
59
|
+
// ── Spec blobs ─────────────────────────────────────────────────────────────
|
|
60
|
+
async readSpec(name) {
|
|
61
|
+
const filename = nameToFilename(name);
|
|
62
|
+
const filePath = path.join(this.machineDir, filename);
|
|
63
|
+
try {
|
|
64
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
65
|
+
const stat = await fs.stat(filePath);
|
|
66
|
+
return {
|
|
67
|
+
name,
|
|
68
|
+
format: filenameToFormat(filename),
|
|
69
|
+
content,
|
|
70
|
+
tier: "free",
|
|
71
|
+
updatedAt: stat.mtimeMs,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
async writeSpec(name, content, format, tier = "free") {
|
|
79
|
+
const filename = nameToFilename(name);
|
|
80
|
+
await fs.writeFile(path.join(this.machineDir, filename), content, "utf8");
|
|
81
|
+
}
|
|
82
|
+
async listSpecs() {
|
|
83
|
+
try {
|
|
84
|
+
const entries = await fs.readdir(this.machineDir);
|
|
85
|
+
return entries
|
|
86
|
+
.filter(e => e.endsWith(".json") || e.endsWith(".yaml") || e.endsWith(".jsonl"))
|
|
87
|
+
.map(e => {
|
|
88
|
+
const found = Object.entries(SPEC_FILENAMES).find(([, v]) => v === e);
|
|
89
|
+
return found ? found[0] : e;
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
async hasSpec(name) {
|
|
97
|
+
const filename = nameToFilename(name);
|
|
98
|
+
try {
|
|
99
|
+
await fs.stat(path.join(this.machineDir, filename));
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// ── Human docs ─────────────────────────────────────────────────────────────
|
|
107
|
+
async readDoc(id) {
|
|
108
|
+
const filePath = this._docPath(id);
|
|
109
|
+
try {
|
|
110
|
+
const body = await fs.readFile(filePath, "utf8");
|
|
111
|
+
const stat = await fs.stat(filePath);
|
|
112
|
+
const title = body.match(/^#\s+(.+)$/m)?.[1] ?? id;
|
|
113
|
+
return { id, section: id.split(":")[0], title, body, tier: "free", updatedAt: stat.mtimeMs };
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
async writeDoc(entry) {
|
|
120
|
+
const filePath = this._docPath(entry.id);
|
|
121
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
122
|
+
await fs.writeFile(filePath, entry.body, "utf8");
|
|
123
|
+
}
|
|
124
|
+
async listDocs(section) {
|
|
125
|
+
const results = [];
|
|
126
|
+
await this._walkDocs(this.humanDir, results, section);
|
|
127
|
+
return results;
|
|
128
|
+
}
|
|
129
|
+
async _walkDocs(dir, acc, section) {
|
|
130
|
+
let entries;
|
|
131
|
+
try {
|
|
132
|
+
entries = await fs.readdir(dir);
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
for (const e of entries) {
|
|
138
|
+
const full = path.join(dir, e);
|
|
139
|
+
const stat = await fs.stat(full);
|
|
140
|
+
if (stat.isDirectory()) {
|
|
141
|
+
await this._walkDocs(full, acc, section);
|
|
142
|
+
}
|
|
143
|
+
else if (e.endsWith(".md")) {
|
|
144
|
+
const id = path.relative(this.humanDir, full).replace(/\.md$/, "").replace(/\\/g, "/");
|
|
145
|
+
if (section && !id.startsWith(section))
|
|
146
|
+
continue;
|
|
147
|
+
const body = await fs.readFile(full, "utf8");
|
|
148
|
+
const title = body.match(/^#\s+(.+)$/m)?.[1] ?? id;
|
|
149
|
+
acc.push({ id, section: id.split("/")[0], title, body, tier: "free", updatedAt: stat.mtimeMs });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
_docPath(id) {
|
|
154
|
+
return path.join(this.humanDir, `${id.replace(/:/g, "/")}.md`);
|
|
155
|
+
}
|
|
156
|
+
// ── Metrics log ────────────────────────────────────────────────────────────
|
|
157
|
+
async appendMetric(event, payload) {
|
|
158
|
+
const line = JSON.stringify({ ts: Date.now(), event, payload }) + "\n";
|
|
159
|
+
await fs.appendFile(path.join(this.machineDir, "mcp-metrics.jsonl"), line, "utf8");
|
|
160
|
+
}
|
|
161
|
+
async readMetrics(limit = 1000) {
|
|
162
|
+
try {
|
|
163
|
+
const raw = await fs.readFile(path.join(this.machineDir, "mcp-metrics.jsonl"), "utf8");
|
|
164
|
+
return raw
|
|
165
|
+
.split("\n")
|
|
166
|
+
.filter(Boolean)
|
|
167
|
+
.slice(-limit)
|
|
168
|
+
.map(l => JSON.parse(l));
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FTS index builder — converts codebase-intelligence.json into FTS5 rows.
|
|
3
|
+
*
|
|
4
|
+
* One row per file: aggregates all endpoints, symbols, and descriptions
|
|
5
|
+
* associated with that file so BM25 can rank files, not individual records.
|
|
6
|
+
*
|
|
7
|
+
* This is what fixes cases like csharp-auth-001 where the current linear
|
|
8
|
+
* scorer misses Users/Login.cs because "auth" doesn't appear literally in
|
|
9
|
+
* the file path — BM25 + porter stemmer ranks it via "login" + "user" + module.
|
|
10
|
+
*/
|
|
11
|
+
import path from "node:path";
|
|
12
|
+
import { normPath } from "./sqlite-specs-store.js";
|
|
13
|
+
const SOURCE_EXTS = new Set([".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".java", ".cs", ".rb", ".rs", ".cpp", ".c", ".h", ".php", ".swift", ".kt"]);
|
|
14
|
+
// Only filter true noise — dependency trees and test-only dirs.
|
|
15
|
+
// examples/, docs/, fixtures/ may contain real source files.
|
|
16
|
+
const NOISE_RE = /(?:^|\/)(?:test|tests|spec|specs|__pycache__|node_modules|vendor|\.git|\.tox|\.venv|venv)\//i;
|
|
17
|
+
function isSourceFile(filePath) {
|
|
18
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
19
|
+
return SOURCE_EXTS.has(ext) && !NOISE_RE.test(filePath);
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Build FTS rows from a raw codebase-intelligence object.
|
|
23
|
+
* The intel object is the parsed JSON from codebase-intelligence.json —
|
|
24
|
+
* no schema changes, same structure as today.
|
|
25
|
+
*/
|
|
26
|
+
export function buildFTSRows(intel) {
|
|
27
|
+
// Per-file accumulators
|
|
28
|
+
const files = new Map();
|
|
29
|
+
function getRow(filePath, module = "") {
|
|
30
|
+
if (!isSourceFile(filePath))
|
|
31
|
+
return null;
|
|
32
|
+
const normalised = normPath(filePath);
|
|
33
|
+
if (!files.has(normalised)) {
|
|
34
|
+
files.set(normalised, { file_path: normalised, symbol_name: "", endpoint: "", body: "", module });
|
|
35
|
+
}
|
|
36
|
+
return files.get(normalised);
|
|
37
|
+
}
|
|
38
|
+
function appendToken(row, field, token) {
|
|
39
|
+
if (!token)
|
|
40
|
+
return;
|
|
41
|
+
row[field] += (row[field] ? " " : "") + token;
|
|
42
|
+
}
|
|
43
|
+
// ── API registry: endpoints → files ──────────────────────────────────────
|
|
44
|
+
for (const [route, entry] of Object.entries(intel.api_registry ?? {})) {
|
|
45
|
+
if (!entry.file)
|
|
46
|
+
continue;
|
|
47
|
+
const row = getRow(entry.file, entry.module ?? "");
|
|
48
|
+
if (!row)
|
|
49
|
+
continue;
|
|
50
|
+
appendToken(row, "endpoint", route);
|
|
51
|
+
appendToken(row, "symbol_name", entry.handler ?? "");
|
|
52
|
+
if (entry.request_schema)
|
|
53
|
+
appendToken(row, "body", entry.request_schema);
|
|
54
|
+
if (entry.response_schema)
|
|
55
|
+
appendToken(row, "body", entry.response_schema);
|
|
56
|
+
for (const sc of entry.service_calls ?? [])
|
|
57
|
+
appendToken(row, "body", sc);
|
|
58
|
+
}
|
|
59
|
+
// ── Model registry: ORM models → files ───────────────────────────────────
|
|
60
|
+
for (const [name, entry] of Object.entries(intel.model_registry ?? {})) {
|
|
61
|
+
if (!entry.file)
|
|
62
|
+
continue;
|
|
63
|
+
const row = getRow(entry.file, entry.module ?? "");
|
|
64
|
+
if (!row)
|
|
65
|
+
continue;
|
|
66
|
+
appendToken(row, "symbol_name", name);
|
|
67
|
+
for (const f of entry.fields ?? [])
|
|
68
|
+
appendToken(row, "body", f);
|
|
69
|
+
for (const r of entry.relationships ?? [])
|
|
70
|
+
appendToken(row, "body", r);
|
|
71
|
+
}
|
|
72
|
+
// ── Enum registry ─────────────────────────────────────────────────────────
|
|
73
|
+
for (const [name, entry] of Object.entries(intel.enum_registry ?? {})) {
|
|
74
|
+
if (!entry.file)
|
|
75
|
+
continue;
|
|
76
|
+
const row = getRow(entry.file, "");
|
|
77
|
+
if (!row)
|
|
78
|
+
continue;
|
|
79
|
+
appendToken(row, "symbol_name", name);
|
|
80
|
+
for (const v of entry.values ?? [])
|
|
81
|
+
appendToken(row, "body", v);
|
|
82
|
+
}
|
|
83
|
+
// ── Service map: module files ─────────────────────────────────────────────
|
|
84
|
+
for (const svc of intel.service_map ?? []) {
|
|
85
|
+
for (const filePath of svc.files ?? []) {
|
|
86
|
+
const row = getRow(filePath, svc.name ?? "");
|
|
87
|
+
if (!row)
|
|
88
|
+
continue;
|
|
89
|
+
appendToken(row, "module", svc.name ?? "");
|
|
90
|
+
for (const dep of svc.dependencies ?? [])
|
|
91
|
+
appendToken(row, "body", dep);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// ── Frontend pages ────────────────────────────────────────────────────────
|
|
95
|
+
for (const page of intel.frontend_pages ?? []) {
|
|
96
|
+
const filePath = page.file ?? page.component ?? page.path;
|
|
97
|
+
if (!filePath)
|
|
98
|
+
continue;
|
|
99
|
+
const row = getRow(filePath, "frontend");
|
|
100
|
+
if (!row)
|
|
101
|
+
continue;
|
|
102
|
+
appendToken(row, "endpoint", page.path ?? "");
|
|
103
|
+
appendToken(row, "symbol_name", page.component ?? "");
|
|
104
|
+
for (const api of page.api_calls ?? [])
|
|
105
|
+
appendToken(row, "body", api);
|
|
106
|
+
for (const c of page.components ?? [])
|
|
107
|
+
appendToken(row, "body", c);
|
|
108
|
+
}
|
|
109
|
+
// ── Background tasks ──────────────────────────────────────────────────────
|
|
110
|
+
for (const task of intel.background_tasks ?? []) {
|
|
111
|
+
if (!task.file)
|
|
112
|
+
continue;
|
|
113
|
+
const row = getRow(task.file, task.module ?? "");
|
|
114
|
+
if (!row)
|
|
115
|
+
continue;
|
|
116
|
+
appendToken(row, "symbol_name", task.name ?? "");
|
|
117
|
+
appendToken(row, "body", task.queue ?? "");
|
|
118
|
+
}
|
|
119
|
+
return Array.from(files.values());
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Merge all data from architecture.snapshot into the FTS row map.
|
|
123
|
+
* Covers: endpoints, data_models, enums, tasks, module files + exports.
|
|
124
|
+
* This is the main enrichment for library repos with few intel entries.
|
|
125
|
+
*/
|
|
126
|
+
export function mergeArchitectureRows(rows, arch) {
|
|
127
|
+
function upsert(filePath, module = "") {
|
|
128
|
+
if (!filePath)
|
|
129
|
+
return null;
|
|
130
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
131
|
+
if (!SOURCE_EXTS.has(ext) || NOISE_RE.test(filePath))
|
|
132
|
+
return null;
|
|
133
|
+
const norm = normPath(filePath);
|
|
134
|
+
if (!rows.has(norm))
|
|
135
|
+
rows.set(norm, { file_path: norm, symbol_name: "", endpoint: "", body: "", module });
|
|
136
|
+
return rows.get(norm);
|
|
137
|
+
}
|
|
138
|
+
function add(row, field, token) {
|
|
139
|
+
if (!token)
|
|
140
|
+
return;
|
|
141
|
+
row[field] += (row[field] ? " " : "") + token;
|
|
142
|
+
}
|
|
143
|
+
// ── arch.endpoints[] ─────────────────────────────────────────────────────
|
|
144
|
+
for (const ep of arch.endpoints ?? []) {
|
|
145
|
+
const row = upsert(ep.file, ep.module ?? "");
|
|
146
|
+
if (!row)
|
|
147
|
+
continue;
|
|
148
|
+
add(row, "endpoint", ep.path ?? "");
|
|
149
|
+
add(row, "endpoint", ep.method ?? "");
|
|
150
|
+
add(row, "symbol_name", ep.handler ?? "");
|
|
151
|
+
for (const sc of ep.service_calls ?? [])
|
|
152
|
+
add(row, "body", sc);
|
|
153
|
+
}
|
|
154
|
+
// ── arch.data_models[] ───────────────────────────────────────────────────
|
|
155
|
+
for (const m of arch.data_models ?? []) {
|
|
156
|
+
const row = upsert(m.file, m.module ?? "");
|
|
157
|
+
if (!row)
|
|
158
|
+
continue;
|
|
159
|
+
add(row, "symbol_name", m.name ?? "");
|
|
160
|
+
for (const f of m.fields ?? [])
|
|
161
|
+
add(row, "body", f);
|
|
162
|
+
for (const r of m.relationships ?? [])
|
|
163
|
+
add(row, "body", r);
|
|
164
|
+
}
|
|
165
|
+
// ── arch.enums[] ─────────────────────────────────────────────────────────
|
|
166
|
+
for (const e of arch.enums ?? []) {
|
|
167
|
+
const row = upsert(e.file, "");
|
|
168
|
+
if (!row)
|
|
169
|
+
continue;
|
|
170
|
+
add(row, "symbol_name", e.name ?? "");
|
|
171
|
+
for (const v of e.values ?? [])
|
|
172
|
+
add(row, "body", v);
|
|
173
|
+
}
|
|
174
|
+
// ── arch.tasks[] (background tasks / celery / etc.) ──────────────────────
|
|
175
|
+
for (const t of arch.tasks ?? []) {
|
|
176
|
+
const row = upsert(t.file, t.module ?? "");
|
|
177
|
+
if (!row)
|
|
178
|
+
continue;
|
|
179
|
+
add(row, "symbol_name", t.name ?? "");
|
|
180
|
+
add(row, "body", t.queue ?? "");
|
|
181
|
+
}
|
|
182
|
+
// ── arch.modules[].files + exports ────────────────────────────────────────
|
|
183
|
+
// mod.exports is [{file, symbols: string[], exports: [...]}], not a flat string array
|
|
184
|
+
for (const mod of arch.modules ?? []) {
|
|
185
|
+
for (const filePath of mod.files ?? []) {
|
|
186
|
+
const row = upsert(filePath, mod.id ?? mod.name ?? "");
|
|
187
|
+
if (!row)
|
|
188
|
+
continue;
|
|
189
|
+
if (mod.id && !row.module)
|
|
190
|
+
row.module = mod.id;
|
|
191
|
+
}
|
|
192
|
+
for (const expEntry of mod.exports ?? []) {
|
|
193
|
+
// expEntry may be a string (old format) or {file, symbols} (new format)
|
|
194
|
+
if (typeof expEntry === "string") {
|
|
195
|
+
const row = upsert(expEntry, mod.id ?? "");
|
|
196
|
+
if (row)
|
|
197
|
+
add(row, "symbol_name", expEntry);
|
|
198
|
+
}
|
|
199
|
+
else if (expEntry && typeof expEntry === "object") {
|
|
200
|
+
const row = upsert(expEntry.file ?? "", mod.id ?? "");
|
|
201
|
+
if (!row)
|
|
202
|
+
continue;
|
|
203
|
+
for (const sym of expEntry.symbols ?? []) {
|
|
204
|
+
if (typeof sym === "string")
|
|
205
|
+
add(row, "symbol_name", sym);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// ── arch.frontend_files[] ────────────────────────────────────────────────
|
|
211
|
+
for (const ff of arch.frontend_files ?? []) {
|
|
212
|
+
const filePath = ff.file ?? ff;
|
|
213
|
+
upsert(typeof filePath === "string" ? filePath : "", "frontend");
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Merge function-intelligence.json entries into the FTS row map.
|
|
218
|
+
* Each function becomes a symbol_name token on its file's row.
|
|
219
|
+
*/
|
|
220
|
+
export function mergeFunctionIntelRows(rows, funcIntel) {
|
|
221
|
+
function upsert(filePath) {
|
|
222
|
+
if (!filePath)
|
|
223
|
+
return null;
|
|
224
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
225
|
+
if (!SOURCE_EXTS.has(ext) || NOISE_RE.test(filePath))
|
|
226
|
+
return null;
|
|
227
|
+
const norm = normPath(filePath);
|
|
228
|
+
if (!rows.has(norm))
|
|
229
|
+
rows.set(norm, { file_path: norm, symbol_name: "", endpoint: "", body: "", module: "" });
|
|
230
|
+
return rows.get(norm);
|
|
231
|
+
}
|
|
232
|
+
for (const fn of funcIntel.functions ?? []) {
|
|
233
|
+
const row = upsert(fn.file ?? "");
|
|
234
|
+
if (!row)
|
|
235
|
+
continue;
|
|
236
|
+
if (fn.name)
|
|
237
|
+
row.symbol_name += (row.symbol_name ? " " : "") + fn.name;
|
|
238
|
+
if (fn.docstring)
|
|
239
|
+
row.body += (row.body ? " " : "") + fn.docstring;
|
|
240
|
+
for (const p of fn.params ?? [])
|
|
241
|
+
row.body += " " + p;
|
|
242
|
+
for (const c of fn.calls ?? [])
|
|
243
|
+
row.body += " " + c;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Build import edges from arch.dependencies.file_graph.
|
|
248
|
+
* Returns normalized {file, imports} pairs for all source-to-source edges.
|
|
249
|
+
*/
|
|
250
|
+
export function buildDepEdges(arch) {
|
|
251
|
+
const edges = [];
|
|
252
|
+
const graph = arch?.dependencies?.file_graph;
|
|
253
|
+
if (!graph)
|
|
254
|
+
return edges;
|
|
255
|
+
// file_graph may be a list of {from, to} edges (new format)
|
|
256
|
+
// or a dict of {file: {imports: []}} (old format)
|
|
257
|
+
if (Array.isArray(graph)) {
|
|
258
|
+
for (const edge of graph) {
|
|
259
|
+
const from = edge.from ?? edge.file;
|
|
260
|
+
const to = edge.to ?? edge.imports;
|
|
261
|
+
if (typeof from !== "string" || typeof to !== "string")
|
|
262
|
+
continue;
|
|
263
|
+
if (!isSourceFile(from) || !isSourceFile(to))
|
|
264
|
+
continue;
|
|
265
|
+
const normFrom = normPath(from);
|
|
266
|
+
const normTo = normPath(to);
|
|
267
|
+
edges.push({ file: normFrom, imports: normTo });
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
else {
|
|
271
|
+
for (const [file, info] of Object.entries(graph)) {
|
|
272
|
+
if (!isSourceFile(file))
|
|
273
|
+
continue;
|
|
274
|
+
const normFile = normPath(file);
|
|
275
|
+
const deps = info.imports ?? info.dependencies ?? [];
|
|
276
|
+
for (const imp of deps) {
|
|
277
|
+
if (typeof imp !== "string" || !isSourceFile(imp))
|
|
278
|
+
continue;
|
|
279
|
+
edges.push({ file: normFile, imports: normPath(imp) });
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return edges;
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Populate the FTS5 search_fts table + file_deps graph from all extract output.
|
|
287
|
+
* intel — parsed codebase-intelligence.json
|
|
288
|
+
* arch — parsed architecture.snapshot.yaml (optional)
|
|
289
|
+
* funcIntel — parsed function-intelligence.json (optional)
|
|
290
|
+
*/
|
|
291
|
+
export function populateFTSIndex(store, intel, arch, funcIntel) {
|
|
292
|
+
const rowMap = new Map();
|
|
293
|
+
for (const row of buildFTSRows(intel))
|
|
294
|
+
rowMap.set(row.file_path, row);
|
|
295
|
+
if (arch)
|
|
296
|
+
mergeArchitectureRows(rowMap, arch);
|
|
297
|
+
if (funcIntel)
|
|
298
|
+
mergeFunctionIntelRows(rowMap, funcIntel);
|
|
299
|
+
store.rebuildSearchIndex(Array.from(rowMap.values()));
|
|
300
|
+
// Per-function index — enables symbol-level search results with line numbers.
|
|
301
|
+
if (funcIntel?.functions?.length) {
|
|
302
|
+
store.rebuildFunctionIndex(funcIntel.functions);
|
|
303
|
+
}
|
|
304
|
+
// Build dependency graph
|
|
305
|
+
if (arch) {
|
|
306
|
+
const edges = buildDepEdges(arch);
|
|
307
|
+
store.rebuildDeps(edges);
|
|
308
|
+
}
|
|
309
|
+
// ── Normalised fact tables ─────────────────────────────────────────────────
|
|
310
|
+
// Merge arch endpoints + intel api_registry into endpoints_raw.
|
|
311
|
+
// arch.endpoints is the richer source (has method + file); intel.api_registry adds
|
|
312
|
+
// request/response schemas and service_calls that arch may not have.
|
|
313
|
+
const endpointMap = new Map();
|
|
314
|
+
for (const ep of arch?.endpoints ?? []) {
|
|
315
|
+
const key = `${(ep.method ?? "").toUpperCase()}::${ep.path ?? ""}`;
|
|
316
|
+
if (!ep.path)
|
|
317
|
+
continue;
|
|
318
|
+
endpointMap.set(key, {
|
|
319
|
+
method: ep.method ?? "",
|
|
320
|
+
path: ep.path,
|
|
321
|
+
handler: ep.handler ?? "",
|
|
322
|
+
file_path: ep.file ?? ep.file_path ?? "",
|
|
323
|
+
module: ep.module ?? "",
|
|
324
|
+
service_calls: ep.service_calls ?? [],
|
|
325
|
+
request_schema: "",
|
|
326
|
+
response_schema: "",
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
for (const [route, entry] of Object.entries(intel?.api_registry ?? {})) {
|
|
330
|
+
// route is like "GET /users" or "/users"
|
|
331
|
+
const parts = route.trim().split(/\s+/);
|
|
332
|
+
const method = parts.length >= 2 ? parts[0].toUpperCase() : "";
|
|
333
|
+
const p = parts.length >= 2 ? parts[1] : parts[0];
|
|
334
|
+
const key = `${method}::${p}`;
|
|
335
|
+
const existing = endpointMap.get(key);
|
|
336
|
+
if (existing) {
|
|
337
|
+
if (entry.request_schema)
|
|
338
|
+
existing.request_schema = entry.request_schema;
|
|
339
|
+
if (entry.response_schema)
|
|
340
|
+
existing.response_schema = entry.response_schema;
|
|
341
|
+
if (entry.service_calls?.length)
|
|
342
|
+
existing.service_calls = entry.service_calls;
|
|
343
|
+
}
|
|
344
|
+
else {
|
|
345
|
+
endpointMap.set(key, {
|
|
346
|
+
method,
|
|
347
|
+
path: p,
|
|
348
|
+
handler: entry.handler ?? "",
|
|
349
|
+
file_path: entry.file ?? "",
|
|
350
|
+
module: entry.module ?? "",
|
|
351
|
+
service_calls: entry.service_calls ?? [],
|
|
352
|
+
request_schema: entry.request_schema ?? "",
|
|
353
|
+
response_schema: entry.response_schema ?? "",
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
store.rebuildEndpointsRaw(Array.from(endpointMap.values()));
|
|
358
|
+
// Merge arch data_models + intel model_registry into models_raw.
|
|
359
|
+
const modelMap = new Map();
|
|
360
|
+
for (const m of arch?.data_models ?? []) {
|
|
361
|
+
if (!m.name)
|
|
362
|
+
continue;
|
|
363
|
+
modelMap.set(m.name, {
|
|
364
|
+
name: m.name,
|
|
365
|
+
file_path: m.file ?? m.file_path ?? "",
|
|
366
|
+
module: m.module ?? "",
|
|
367
|
+
fields: m.fields ?? [],
|
|
368
|
+
relationships: m.relationships ?? [],
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
for (const [name, entry] of Object.entries(intel?.model_registry ?? {})) {
|
|
372
|
+
const existing = modelMap.get(name);
|
|
373
|
+
if (existing) {
|
|
374
|
+
if (entry.fields?.length)
|
|
375
|
+
existing.fields = entry.fields;
|
|
376
|
+
if (entry.relationships?.length)
|
|
377
|
+
existing.relationships = entry.relationships;
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
modelMap.set(name, {
|
|
381
|
+
name,
|
|
382
|
+
file_path: entry.file ?? "",
|
|
383
|
+
module: entry.module ?? "",
|
|
384
|
+
fields: entry.fields ?? [],
|
|
385
|
+
relationships: entry.relationships ?? [],
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
store.rebuildModelsRaw(Array.from(modelMap.values()));
|
|
390
|
+
}
|