@dreb/coding-agent 1.18.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/tools/search.d.ts.map +1 -1
- package/dist/core/tools/search.js +14 -36
- package/dist/core/tools/search.js.map +1 -1
- package/package.json +2 -1
- package/dist/core/search/chunker.d.ts +0 -21
- package/dist/core/search/chunker.d.ts.map +0 -1
- package/dist/core/search/chunker.js +0 -51
- package/dist/core/search/chunker.js.map +0 -1
- package/dist/core/search/db.d.ts +0 -89
- package/dist/core/search/db.d.ts.map +0 -1
- package/dist/core/search/db.js +0 -406
- package/dist/core/search/db.js.map +0 -1
- package/dist/core/search/embedder.d.ts +0 -52
- package/dist/core/search/embedder.d.ts.map +0 -1
- package/dist/core/search/embedder.js +0 -158
- package/dist/core/search/embedder.js.map +0 -1
- package/dist/core/search/index-manager.d.ts +0 -55
- package/dist/core/search/index-manager.d.ts.map +0 -1
- package/dist/core/search/index-manager.js +0 -311
- package/dist/core/search/index-manager.js.map +0 -1
- package/dist/core/search/metrics/bm25.d.ts +0 -10
- package/dist/core/search/metrics/bm25.d.ts.map +0 -1
- package/dist/core/search/metrics/bm25.js +0 -32
- package/dist/core/search/metrics/bm25.js.map +0 -1
- package/dist/core/search/metrics/git-recency.d.ts +0 -14
- package/dist/core/search/metrics/git-recency.d.ts.map +0 -1
- package/dist/core/search/metrics/git-recency.js +0 -123
- package/dist/core/search/metrics/git-recency.js.map +0 -1
- package/dist/core/search/metrics/import-graph.d.ts +0 -15
- package/dist/core/search/metrics/import-graph.d.ts.map +0 -1
- package/dist/core/search/metrics/import-graph.js +0 -115
- package/dist/core/search/metrics/import-graph.js.map +0 -1
- package/dist/core/search/metrics/path-match.d.ts +0 -13
- package/dist/core/search/metrics/path-match.d.ts.map +0 -1
- package/dist/core/search/metrics/path-match.js +0 -54
- package/dist/core/search/metrics/path-match.js.map +0 -1
- package/dist/core/search/metrics/symbol-match.d.ts +0 -12
- package/dist/core/search/metrics/symbol-match.d.ts.map +0 -1
- package/dist/core/search/metrics/symbol-match.js +0 -62
- package/dist/core/search/metrics/symbol-match.js.map +0 -1
- package/dist/core/search/metrics/tokenize.d.ts +0 -12
- package/dist/core/search/metrics/tokenize.d.ts.map +0 -1
- package/dist/core/search/metrics/tokenize.js +0 -29
- package/dist/core/search/metrics/tokenize.js.map +0 -1
- package/dist/core/search/poem.d.ts +0 -38
- package/dist/core/search/poem.d.ts.map +0 -1
- package/dist/core/search/poem.js +0 -214
- package/dist/core/search/poem.js.map +0 -1
- package/dist/core/search/query-classifier.d.ts +0 -17
- package/dist/core/search/query-classifier.d.ts.map +0 -1
- package/dist/core/search/query-classifier.js +0 -54
- package/dist/core/search/query-classifier.js.map +0 -1
- package/dist/core/search/scanner.d.ts +0 -30
- package/dist/core/search/scanner.d.ts.map +0 -1
- package/dist/core/search/scanner.js +0 -344
- package/dist/core/search/scanner.js.map +0 -1
- package/dist/core/search/search.d.ts +0 -51
- package/dist/core/search/search.d.ts.map +0 -1
- package/dist/core/search/search.js +0 -381
- package/dist/core/search/search.js.map +0 -1
- package/dist/core/search/text-chunker.d.ts +0 -15
- package/dist/core/search/text-chunker.d.ts.map +0 -1
- package/dist/core/search/text-chunker.js +0 -580
- package/dist/core/search/text-chunker.js.map +0 -1
- package/dist/core/search/tree-sitter-chunker.d.ts +0 -25
- package/dist/core/search/tree-sitter-chunker.d.ts.map +0 -1
- package/dist/core/search/tree-sitter-chunker.js +0 -357
- package/dist/core/search/tree-sitter-chunker.js.map +0 -1
- package/dist/core/search/types.d.ts +0 -96
- package/dist/core/search/types.d.ts.map +0 -1
- package/dist/core/search/types.js +0 -6
- package/dist/core/search/types.js.map +0 -1
- package/dist/core/search/vector-store.d.ts +0 -43
- package/dist/core/search/vector-store.d.ts.map +0 -1
- package/dist/core/search/vector-store.js +0 -73
- package/dist/core/search/vector-store.js.map +0 -1
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Embedding pipeline using @huggingface/transformers.
|
|
3
|
-
*
|
|
4
|
-
* Generates normalized embeddings for semantic search. The embedding
|
|
5
|
-
* dimension is derived at runtime from the model's actual output.
|
|
6
|
-
* First use downloads the model (~23MB) to the configured cache directory.
|
|
7
|
-
*/
|
|
8
|
-
// ============================================================================
|
|
9
|
-
// Constants
|
|
10
|
-
// ============================================================================
|
|
11
|
-
const DEFAULT_MODEL_NAME = "Xenova/all-MiniLM-L6-v2";
|
|
12
|
-
const DEFAULT_BATCH_SIZE = 32;
|
|
13
|
-
const DEFAULT_DIMENSION = 384;
|
|
14
|
-
/**
|
|
15
|
-
* Model-specific prefixes for document vs query embeddings.
|
|
16
|
-
* nomic-embed-text-v1.5 requires these; most other models don't.
|
|
17
|
-
*/
|
|
18
|
-
const MODEL_PREFIXES = {
|
|
19
|
-
"nomic-ai/nomic-embed-text-v1.5": { document: "search_document: ", query: "search_query: " },
|
|
20
|
-
};
|
|
21
|
-
// ============================================================================
|
|
22
|
-
// Embedder
|
|
23
|
-
// ============================================================================
|
|
24
|
-
export class Embedder {
|
|
25
|
-
modelCacheDir;
|
|
26
|
-
modelName;
|
|
27
|
-
batchSize;
|
|
28
|
-
extractor = null;
|
|
29
|
-
initPromise = null;
|
|
30
|
-
resolvedDimension = null;
|
|
31
|
-
constructor(options) {
|
|
32
|
-
this.modelCacheDir = options.modelCacheDir;
|
|
33
|
-
this.modelName = options.modelName ?? DEFAULT_MODEL_NAME;
|
|
34
|
-
this.batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
|
|
35
|
-
}
|
|
36
|
-
/**
|
|
37
|
-
* Initialize the model pipeline. Must be called before embedding.
|
|
38
|
-
*
|
|
39
|
-
* On first use this downloads the ONNX model to `modelCacheDir`.
|
|
40
|
-
* Subsequent calls reuse the cached model.
|
|
41
|
-
*/
|
|
42
|
-
async initialize() {
|
|
43
|
-
if (this.extractor)
|
|
44
|
-
return;
|
|
45
|
-
if (this.initPromise)
|
|
46
|
-
return this.initPromise;
|
|
47
|
-
this.initPromise = (async () => {
|
|
48
|
-
try {
|
|
49
|
-
// Dynamic import — @huggingface/transformers is a heavy dependency
|
|
50
|
-
const { pipeline, env } = await import("@huggingface/transformers");
|
|
51
|
-
// Direct the model cache to our managed directory
|
|
52
|
-
env.cacheDir = this.modelCacheDir;
|
|
53
|
-
// Suppress the onnxruntime native addon warning — WASM fallback is fine
|
|
54
|
-
// The library tries to load native onnxruntime first and logs a warning
|
|
55
|
-
// when it falls back to WASM. We suppress this to avoid confusing users.
|
|
56
|
-
const originalWarn = console.warn;
|
|
57
|
-
console.warn = (...args) => {
|
|
58
|
-
const msg = typeof args[0] === "string" ? args[0] : "";
|
|
59
|
-
if (msg.includes("onnxruntime") || msg.includes("ONNX"))
|
|
60
|
-
return;
|
|
61
|
-
originalWarn.apply(console, args);
|
|
62
|
-
};
|
|
63
|
-
try {
|
|
64
|
-
this.extractor = await pipeline("feature-extraction", this.modelName, {
|
|
65
|
-
dtype: "q8",
|
|
66
|
-
device: "cpu",
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
finally {
|
|
70
|
-
console.warn = originalWarn;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
catch (err) {
|
|
74
|
-
// Reset so subsequent calls can retry instead of returning
|
|
75
|
-
// the same rejected promise forever
|
|
76
|
-
this.initPromise = null;
|
|
77
|
-
throw err;
|
|
78
|
-
}
|
|
79
|
-
})();
|
|
80
|
-
return this.initPromise;
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Embed documents for indexing.
|
|
84
|
-
*
|
|
85
|
-
* Applies model-specific prefixes if required, then processes texts
|
|
86
|
-
* in batches of `batchSize` for memory efficiency.
|
|
87
|
-
*/
|
|
88
|
-
async embedDocuments(texts, onProgress) {
|
|
89
|
-
this.ensureInitialized();
|
|
90
|
-
const results = [];
|
|
91
|
-
const total = texts.length;
|
|
92
|
-
const prefix = MODEL_PREFIXES[this.modelName]?.document ?? "";
|
|
93
|
-
for (let i = 0; i < total; i += this.batchSize) {
|
|
94
|
-
const batch = texts.slice(i, i + this.batchSize);
|
|
95
|
-
const prefixed = prefix ? batch.map((t) => prefix + t) : batch;
|
|
96
|
-
const output = await this.extractor(prefixed, {
|
|
97
|
-
pooling: "mean",
|
|
98
|
-
normalize: true,
|
|
99
|
-
});
|
|
100
|
-
// output.data is a flat Float32Array of shape [batchLen, dim]
|
|
101
|
-
const data = output.data;
|
|
102
|
-
const dim = data.length / batch.length;
|
|
103
|
-
if (this.resolvedDimension === null) {
|
|
104
|
-
this.resolvedDimension = dim;
|
|
105
|
-
}
|
|
106
|
-
for (let j = 0; j < batch.length; j++) {
|
|
107
|
-
const start = j * dim;
|
|
108
|
-
results.push(data.slice(start, start + dim));
|
|
109
|
-
}
|
|
110
|
-
if (onProgress) {
|
|
111
|
-
onProgress("embedding", Math.min(i + batch.length, total), total);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
return results;
|
|
115
|
-
}
|
|
116
|
-
/**
|
|
117
|
-
* Embed a query for search.
|
|
118
|
-
*
|
|
119
|
-
* Applies model-specific query prefix if required.
|
|
120
|
-
*/
|
|
121
|
-
async embedQuery(query) {
|
|
122
|
-
this.ensureInitialized();
|
|
123
|
-
const prefix = MODEL_PREFIXES[this.modelName]?.query ?? "";
|
|
124
|
-
const output = await this.extractor(prefix + query, {
|
|
125
|
-
pooling: "mean",
|
|
126
|
-
normalize: true,
|
|
127
|
-
});
|
|
128
|
-
// Single input — derive dimension and slice to exactly one vector
|
|
129
|
-
const data = output.data;
|
|
130
|
-
const dim = data.length;
|
|
131
|
-
if (this.resolvedDimension === null) {
|
|
132
|
-
this.resolvedDimension = dim;
|
|
133
|
-
}
|
|
134
|
-
return data.slice(0, dim);
|
|
135
|
-
}
|
|
136
|
-
/** Get the embedding dimension. Returns the model's actual dimension once known, or 384 as default. */
|
|
137
|
-
get dimension() {
|
|
138
|
-
return this.resolvedDimension ?? DEFAULT_DIMENSION;
|
|
139
|
-
}
|
|
140
|
-
/** Dispose the pipeline to free memory. */
|
|
141
|
-
dispose() {
|
|
142
|
-
if (this.extractor) {
|
|
143
|
-
// The pipeline object may have a dispose method depending on the version
|
|
144
|
-
if (typeof this.extractor.dispose === "function") {
|
|
145
|
-
this.extractor.dispose();
|
|
146
|
-
}
|
|
147
|
-
this.extractor = null;
|
|
148
|
-
}
|
|
149
|
-
this.initPromise = null;
|
|
150
|
-
}
|
|
151
|
-
/** Throw if initialize() hasn't been called yet. */
|
|
152
|
-
ensureInitialized() {
|
|
153
|
-
if (!this.extractor) {
|
|
154
|
-
throw new Error("Embedder not initialized. Call initialize() first.");
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
//# sourceMappingURL=embedder.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/core/search/embedder.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAiBH,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,kBAAkB,GAAG,yBAAyB,CAAC;AACrD,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;GAGG;AACH,MAAM,cAAc,GAAwD;IAC3E,gCAAgC,EAAE,EAAE,QAAQ,EAAE,mBAAmB,EAAE,KAAK,EAAE,gBAAgB,EAAE;CAC5F,CAAC;AAEF,+EAA+E;AAC/E,WAAW;AACX,+EAA+E;AAE/E,MAAM,OAAO,QAAQ;IACH,aAAa,CAAS;IACtB,SAAS,CAAS;IAClB,SAAS,CAAS;IAC3B,SAAS,GAAe,IAAI,CAAC;IAC7B,WAAW,GAAyB,IAAI,CAAC;IACzC,iBAAiB,GAAkB,IAAI,CAAC;IAEhD,YAAY,OAAwB,EAAE;QACrC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;QAC3C,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;QACzD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;IAAA,CACzD;IAED;;;;;OAKG;IACH,KAAK,CAAC,UAAU,GAAkB;QACjC,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO;QAC3B,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,WAAW,CAAC;QAE9C,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;YAC/B,IAAI,CAAC;gBACJ,qEAAmE;gBACnE,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;gBAEpE,kDAAkD;gBAClD,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC;gBAElC,0EAAwE;gBACxE,wEAAwE;gBACxE,yEAAyE;gBACzE,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;gBAClC,OAAO,CAAC,IAAI,GAAG,CAAC,GAAG,IAAW,EAAE,EAAE,CAAC;oBAClC,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBACvD,IAAI,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;wBAAE,OAAO;oBAChE,YAAY,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBAAA,CAClC,CAAC;gBAEF,IAAI,CAAC;oBACJ,IAAI,CAAC,SAAS,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,EAAE;wBACrE,KAAK,EAAE,IAAW;wBAClB,MAAM,EAAE,KAAY;qBACpB,CAAC,CAAC;gBACJ,CAAC;wBAAS,CAAC;oBACV,OAAO,CAAC,IAAI,GAAG,YAAY,CAAC;gBAC7B,CAAC;YACF,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACd,2DAA2D;gBAC3D,oCAAoC;gBACpC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;gBACxB,MAAM,GAAG,CAAC;YACX,CAAC;QAAA,CACD,CAAC,EAAE,CAAC;QAEL,OAAO,IAAI,CAAC,WAAW,CAAC;IAAA,CACxB;IAED;;;;;OAKG;IACH,KAAK,CAAC,cAAc,CAAC,KAAe,EAAE,UAAkC,EAA2B;QAClG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;QAC3B,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,IAAI,EAAE,CAAC;QAE9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;YAE/D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAU,CAAC,QAAQ,EAAE;gBAC9C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aACf,CAAC,CAAC;YAEH,8DAA8D;YAC9D,MAAM,IAAI,GAAiB,MAAM,CAAC,IAAI,CAAC;YACvC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACvC,IAAI,IAAI,CAAC,iBAAiB,KAAK,IAAI,EAAE,CAAC;gBACrC,IAAI,CAAC,iBAAiB,GAAG,GAAG,CAAC;YAC9B,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;gBACtB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC;YAC9C,CAAC;YAED,IAAI,UAAU,EAAE,CAAC;gBAChB,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,CAAC,CAAC;YACnE,CAAC;QACF,CAAC;QAED,OAAO,OAAO,CAAC;IAAA,CACf;IAED;;;;OAIG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa,EAAyB;QACtD,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,EAAE,CAAC;QAC3D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAU,CAAC,MAAM,GAAG,KAAK,EAAE;YACpD,OAAO,EAAE,MAAM;YACf,SAAS,EAAE,IAAI;SACf,CAAC,CAAC;QAEH,oEAAkE;QAClE,MAAM,IAAI,GAAiB,MAAM,CAAC,IAAI,CAAC;QACvC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACxB,IAAI,IAAI,CAAC,iBAAiB,KAAK,IAAI,EAAE,CAAC;YACrC,IAAI,CAAC,iBAAiB,GAAG,GAAG,CAAC;QAC9B,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAAA,CAC1B;IAED,uGAAuG;IACvG,IAAI,SAAS,GAAW;QACvB,OAAO,IAAI,CAAC,iBAAiB,IAAI,iBAAiB,CAAC;IAAA,CACnD;IAED,2CAA2C;IAC3C,OAAO,GAAS;QACf,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,yEAAyE;YACzE,IAAI,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;gBAClD,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;YAC1B,CAAC;YACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACvB,CAAC;QACD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAAA,CACxB;IAED,oDAAoD;IAC5C,iBAAiB,GAAS;QACjC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACvE,CAAC;IAAA,CACD;CACD","sourcesContent":["/**\n * Embedding pipeline using @huggingface/transformers.\n *\n * Generates normalized embeddings for semantic search. The embedding\n * dimension is derived at runtime from the model's actual output.\n * First use downloads the model (~23MB) to the configured cache directory.\n */\n\nimport type { IndexProgressCallback } from \"./types.js\";\n\n// ============================================================================\n// Types\n// ============================================================================\n\nexport interface EmbedderOptions {\n\t/** Absolute path to the model cache directory (e.g. ~/.dreb/agent/models/). */\n\tmodelCacheDir: string;\n\t/** HuggingFace model name. Default: 'Xenova/all-MiniLM-L6-v2'. */\n\tmodelName?: string;\n\t/** Number of texts to embed per batch. Default: 32. */\n\tbatchSize?: number;\n}\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DEFAULT_MODEL_NAME = \"Xenova/all-MiniLM-L6-v2\";\nconst DEFAULT_BATCH_SIZE = 32;\nconst DEFAULT_DIMENSION = 384;\n\n/**\n * Model-specific prefixes for document vs query embeddings.\n * nomic-embed-text-v1.5 requires these; most other models don't.\n */\nconst MODEL_PREFIXES: Record<string, { document: string; query: string }> = {\n\t\"nomic-ai/nomic-embed-text-v1.5\": { document: \"search_document: \", query: \"search_query: \" },\n};\n\n// ============================================================================\n// Embedder\n// ============================================================================\n\nexport class Embedder {\n\tprivate readonly modelCacheDir: string;\n\tprivate readonly modelName: string;\n\tprivate readonly batchSize: number;\n\tprivate extractor: any | null = null;\n\tprivate initPromise: Promise<void> | null = null;\n\tprivate resolvedDimension: number | null = null;\n\n\tconstructor(options: EmbedderOptions) {\n\t\tthis.modelCacheDir = options.modelCacheDir;\n\t\tthis.modelName = options.modelName ?? DEFAULT_MODEL_NAME;\n\t\tthis.batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;\n\t}\n\n\t/**\n\t * Initialize the model pipeline. Must be called before embedding.\n\t *\n\t * On first use this downloads the ONNX model to `modelCacheDir`.\n\t * Subsequent calls reuse the cached model.\n\t */\n\tasync initialize(): Promise<void> {\n\t\tif (this.extractor) return;\n\t\tif (this.initPromise) return this.initPromise;\n\n\t\tthis.initPromise = (async () => {\n\t\t\ttry {\n\t\t\t\t// Dynamic import — @huggingface/transformers is a heavy dependency\n\t\t\t\tconst { pipeline, env } = await import(\"@huggingface/transformers\");\n\n\t\t\t\t// Direct the model cache to our managed directory\n\t\t\t\tenv.cacheDir = this.modelCacheDir;\n\n\t\t\t\t// Suppress the onnxruntime native addon warning — WASM fallback is fine\n\t\t\t\t// The library tries to load native onnxruntime first and logs a warning\n\t\t\t\t// when it falls back to WASM. We suppress this to avoid confusing users.\n\t\t\t\tconst originalWarn = console.warn;\n\t\t\t\tconsole.warn = (...args: any[]) => {\n\t\t\t\t\tconst msg = typeof args[0] === \"string\" ? args[0] : \"\";\n\t\t\t\t\tif (msg.includes(\"onnxruntime\") || msg.includes(\"ONNX\")) return;\n\t\t\t\t\toriginalWarn.apply(console, args);\n\t\t\t\t};\n\n\t\t\t\ttry {\n\t\t\t\t\tthis.extractor = await pipeline(\"feature-extraction\", this.modelName, {\n\t\t\t\t\t\tdtype: \"q8\" as any,\n\t\t\t\t\t\tdevice: \"cpu\" as any,\n\t\t\t\t\t});\n\t\t\t\t} finally {\n\t\t\t\t\tconsole.warn = originalWarn;\n\t\t\t\t}\n\t\t\t} catch (err) {\n\t\t\t\t// Reset so subsequent calls can retry instead of returning\n\t\t\t\t// the same rejected promise forever\n\t\t\t\tthis.initPromise = null;\n\t\t\t\tthrow err;\n\t\t\t}\n\t\t})();\n\n\t\treturn this.initPromise;\n\t}\n\n\t/**\n\t * Embed documents for indexing.\n\t *\n\t * Applies model-specific prefixes if required, then processes texts\n\t * in batches of `batchSize` for memory efficiency.\n\t */\n\tasync embedDocuments(texts: string[], onProgress?: IndexProgressCallback): Promise<Float32Array[]> {\n\t\tthis.ensureInitialized();\n\n\t\tconst results: Float32Array[] = [];\n\t\tconst total = texts.length;\n\t\tconst prefix = MODEL_PREFIXES[this.modelName]?.document ?? \"\";\n\n\t\tfor (let i = 0; i < total; i += this.batchSize) {\n\t\t\tconst batch = texts.slice(i, i + this.batchSize);\n\t\t\tconst prefixed = prefix ? batch.map((t) => prefix + t) : batch;\n\n\t\t\tconst output = await this.extractor!(prefixed, {\n\t\t\t\tpooling: \"mean\",\n\t\t\t\tnormalize: true,\n\t\t\t});\n\n\t\t\t// output.data is a flat Float32Array of shape [batchLen, dim]\n\t\t\tconst data: Float32Array = output.data;\n\t\t\tconst dim = data.length / batch.length;\n\t\t\tif (this.resolvedDimension === null) {\n\t\t\t\tthis.resolvedDimension = dim;\n\t\t\t}\n\t\t\tfor (let j = 0; j < batch.length; j++) {\n\t\t\t\tconst start = j * dim;\n\t\t\t\tresults.push(data.slice(start, start + dim));\n\t\t\t}\n\n\t\t\tif (onProgress) {\n\t\t\t\tonProgress(\"embedding\", Math.min(i + batch.length, total), total);\n\t\t\t}\n\t\t}\n\n\t\treturn results;\n\t}\n\n\t/**\n\t * Embed a query for search.\n\t *\n\t * Applies model-specific query prefix if required.\n\t */\n\tasync embedQuery(query: string): Promise<Float32Array> {\n\t\tthis.ensureInitialized();\n\n\t\tconst prefix = MODEL_PREFIXES[this.modelName]?.query ?? \"\";\n\t\tconst output = await this.extractor!(prefix + query, {\n\t\t\tpooling: \"mean\",\n\t\t\tnormalize: true,\n\t\t});\n\n\t\t// Single input — derive dimension and slice to exactly one vector\n\t\tconst data: Float32Array = output.data;\n\t\tconst dim = data.length;\n\t\tif (this.resolvedDimension === null) {\n\t\t\tthis.resolvedDimension = dim;\n\t\t}\n\t\treturn data.slice(0, dim);\n\t}\n\n\t/** Get the embedding dimension. Returns the model's actual dimension once known, or 384 as default. */\n\tget dimension(): number {\n\t\treturn this.resolvedDimension ?? DEFAULT_DIMENSION;\n\t}\n\n\t/** Dispose the pipeline to free memory. */\n\tdispose(): void {\n\t\tif (this.extractor) {\n\t\t\t// The pipeline object may have a dispose method depending on the version\n\t\t\tif (typeof this.extractor.dispose === \"function\") {\n\t\t\t\tthis.extractor.dispose();\n\t\t\t}\n\t\t\tthis.extractor = null;\n\t\t}\n\t\tthis.initPromise = null;\n\t}\n\n\t/** Throw if initialize() hasn't been called yet. */\n\tprivate ensureInitialized(): void {\n\t\tif (!this.extractor) {\n\t\t\tthrow new Error(\"Embedder not initialized. Call initialize() first.\");\n\t\t}\n\t}\n}\n"]}
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Index lifecycle manager.
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates: file scanning → chunking → embedding → FTS indexing → import graph.
|
|
5
|
-
* Supports incremental updates via mtime comparison.
|
|
6
|
-
*/
|
|
7
|
-
import type { SearchDatabase } from "./db.js";
|
|
8
|
-
import type { Embedder } from "./embedder.js";
|
|
9
|
-
import type { IndexConfig, IndexProgressCallback } from "./types.js";
|
|
10
|
-
export declare class IndexManager {
|
|
11
|
-
private readonly config;
|
|
12
|
-
private db;
|
|
13
|
-
private embedder;
|
|
14
|
-
constructor(config: IndexConfig);
|
|
15
|
-
/** Check if node:sqlite is available. */
|
|
16
|
-
static isAvailable(): boolean;
|
|
17
|
-
/** Open or create the index database. */
|
|
18
|
-
open(): SearchDatabase;
|
|
19
|
-
/**
|
|
20
|
-
* Set an external embedder instance to share with the caller.
|
|
21
|
-
* When set, embedChunks() uses this instead of creating its own.
|
|
22
|
-
*/
|
|
23
|
-
setEmbedder(embedder: Embedder): void;
|
|
24
|
-
/** Close the database and dispose resources. */
|
|
25
|
-
close(): void;
|
|
26
|
-
/** Get the database, opening if needed. */
|
|
27
|
-
getDb(): SearchDatabase;
|
|
28
|
-
/**
|
|
29
|
-
* Build or incrementally update the index.
|
|
30
|
-
*
|
|
31
|
-
* 1. Scan project for files
|
|
32
|
-
* 2. Compare mtimes against stored records
|
|
33
|
-
* 3. Re-chunk and re-embed only changed/new files
|
|
34
|
-
* 4. Remove deleted files
|
|
35
|
-
*/
|
|
36
|
-
buildIndex(onProgress?: IndexProgressCallback): Promise<{
|
|
37
|
-
added: number;
|
|
38
|
-
updated: number;
|
|
39
|
-
removed: number;
|
|
40
|
-
failed: number;
|
|
41
|
-
}>;
|
|
42
|
-
/**
|
|
43
|
-
* Ensure all chunks have embeddings, generating any missing ones.
|
|
44
|
-
*/
|
|
45
|
-
ensureEmbeddings(onProgress?: IndexProgressCallback): Promise<number>;
|
|
46
|
-
private embedChunks;
|
|
47
|
-
/** Check if the index exists. */
|
|
48
|
-
indexExists(): boolean;
|
|
49
|
-
/** Get index stats. */
|
|
50
|
-
getStats(): {
|
|
51
|
-
files: number;
|
|
52
|
-
chunks: number;
|
|
53
|
-
} | null;
|
|
54
|
-
}
|
|
55
|
-
//# sourceMappingURL=index-manager.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index-manager.d.ts","sourceRoot":"","sources":["../../../src/core/search/index-manager.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAE9C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,OAAO,KAAK,EAAE,WAAW,EAAe,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAYlF,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,EAAE,CAA+B;IACzC,OAAO,CAAC,QAAQ,CAAyB;IAEzC,YAAY,MAAM,EAAE,WAAW,EAE9B;IAED,yCAAyC;IACzC,MAAM,CAAC,WAAW,IAAI,OAAO,CAE5B;IAED,yCAAyC;IACzC,IAAI,IAAI,cAAc,CAOrB;IAED;;;OAGG;IACH,WAAW,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAEpC;IAED,gDAAgD;IAChD,KAAK,IAAI,IAAI,CAQZ;IAED,2CAA2C;IAC3C,KAAK,IAAI,cAAc,CAGtB;IAED;;;;;;;OAOG;IACG,UAAU,CACf,UAAU,CAAC,EAAE,qBAAqB,GAChC,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CA8H9E;IAED;;OAEG;IACG,gBAAgB,CAAC,UAAU,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,MAAM,CAAC,CAO1E;YAMa,WAAW;IA2BzB,iCAAiC;IACjC,WAAW,IAAI,OAAO,CAGrB;IAED,uBAAuB;IACvB,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAOnD;CACD","sourcesContent":["/**\n * Index lifecycle manager.\n *\n * Orchestrates: file scanning → chunking → embedding → FTS indexing → import graph.\n * Supports incremental updates via mtime comparison.\n */\n\nimport { existsSync, mkdirSync, readFileSync } from \"node:fs\";\nimport path from \"node:path\";\nimport { chunkFile } from \"./chunker.js\";\nimport type { SearchDatabase } from \"./db.js\";\nimport { isSqliteAvailable, SearchDatabase as SearchDatabaseClass } from \"./db.js\";\nimport type { Embedder } from \"./embedder.js\";\nimport { type ScannedFile, scanProject } from \"./scanner.js\";\nimport type { IndexConfig, IndexedFile, IndexProgressCallback } from \"./types.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DB_FILENAME = \"search.db\";\n\n// ============================================================================\n// Index Manager\n// ============================================================================\n\nexport class IndexManager {\n\tprivate readonly config: IndexConfig;\n\tprivate db: SearchDatabase | null = null;\n\tprivate embedder: Embedder | null = null;\n\n\tconstructor(config: IndexConfig) {\n\t\tthis.config = config;\n\t}\n\n\t/** Check if node:sqlite is available. */\n\tstatic isAvailable(): boolean {\n\t\treturn isSqliteAvailable();\n\t}\n\n\t/** Open or create the index database. */\n\topen(): SearchDatabase {\n\t\tif (this.db) return this.db;\n\n\t\tmkdirSync(this.config.indexDir, { recursive: true });\n\t\tconst dbPath = path.join(this.config.indexDir, DB_FILENAME);\n\t\tthis.db = new SearchDatabaseClass(dbPath);\n\t\treturn this.db;\n\t}\n\n\t/**\n\t * Set an external embedder instance to share with the caller.\n\t * When set, embedChunks() uses this instead of creating its own.\n\t */\n\tsetEmbedder(embedder: Embedder): void {\n\t\tthis.embedder = embedder;\n\t}\n\n\t/** Close the database and dispose resources. */\n\tclose(): void {\n\t\tif (this.db) {\n\t\t\tthis.db.close();\n\t\t\tthis.db = null;\n\t\t}\n\t\t// Only dispose the embedder if we own it (not shared externally).\n\t\t// The caller who set it via setEmbedder() is responsible for its lifecycle.\n\t\tthis.embedder = null;\n\t}\n\n\t/** Get the database, opening if needed. */\n\tgetDb(): SearchDatabase {\n\t\tif (!this.db) return this.open();\n\t\treturn this.db;\n\t}\n\n\t/**\n\t * Build or incrementally update the index.\n\t *\n\t * 1. Scan project for files\n\t * 2. Compare mtimes against stored records\n\t * 3. Re-chunk and re-embed only changed/new files\n\t * 4. Remove deleted files\n\t */\n\tasync buildIndex(\n\t\tonProgress?: IndexProgressCallback,\n\t): Promise<{ added: number; updated: number; removed: number; failed: number }> {\n\t\tconst db = this.getDb();\n\t\tconst config = this.config;\n\n\t\t// Phase 1: Scan\n\t\tonProgress?.(\"scanning\", 0, 1);\n\t\tconst scannedFiles = await scanProject(config.projectRoot, config.globalMemoryDir);\n\t\tonProgress?.(\"scanning\", 1, 1);\n\n\t\t// Phase 2: Diff against existing index\n\t\tconst existingFiles = db.getAllFiles();\n\t\tconst existingByPath = new Map<string, IndexedFile>();\n\t\tfor (const f of existingFiles) {\n\t\t\texistingByPath.set(f.filePath, f);\n\t\t}\n\n\t\tconst scannedByPath = new Map<string, ScannedFile>();\n\t\tfor (const f of scannedFiles) {\n\t\t\tscannedByPath.set(f.filePath, f);\n\t\t}\n\n\t\tconst toAdd: ScannedFile[] = [];\n\t\tconst toUpdate: ScannedFile[] = [];\n\t\tconst toRemove: IndexedFile[] = [];\n\n\t\t// Find new and changed files\n\t\tfor (const scanned of scannedFiles) {\n\t\t\tconst existing = existingByPath.get(scanned.filePath);\n\t\t\tif (!existing) {\n\t\t\t\ttoAdd.push(scanned);\n\t\t\t} else if (existing.mtime !== scanned.mtime) {\n\t\t\t\ttoUpdate.push(scanned);\n\t\t\t}\n\t\t}\n\n\t\t// Find deleted files\n\t\tfor (const existing of existingFiles) {\n\t\t\tif (!scannedByPath.has(existing.filePath)) {\n\t\t\t\ttoRemove.push(existing);\n\t\t\t}\n\t\t}\n\n\t\tconst totalWork = toAdd.length + toUpdate.length + toRemove.length;\n\t\tif (totalWork === 0) {\n\t\t\treturn { added: 0, updated: 0, removed: 0, failed: 0 };\n\t\t}\n\n\t\t// Phase 3: Remove deleted files\n\t\tfor (const file of toRemove) {\n\t\t\tdb.deleteFile(file.id);\n\t\t}\n\n\t\t// Phase 4: Process new and changed files\n\t\tconst filesToProcess = [...toAdd, ...toUpdate];\n\t\tconst allNewChunkIds: number[] = [];\n\t\tlet failed = 0;\n\n\t\tfor (let i = 0; i < filesToProcess.length; i++) {\n\t\t\tconst scanned = filesToProcess[i];\n\t\t\tonProgress?.(\"indexing\", i + 1, filesToProcess.length);\n\n\t\t\ttry {\n\t\t\t\t// Read file content and chunk BEFORE the transaction so that\n\t\t\t\t// failures in I/O or chunking don't leave a committed mtime\n\t\t\t\t// with zero chunks (which would make the file permanently invisible).\n\t\t\t\tconst absPath = scanned.filePath.startsWith(\"~memory/\")\n\t\t\t\t\t? path.join(this.config.globalMemoryDir ?? \"\", scanned.filePath.replace(\"~memory/\", \"\"))\n\t\t\t\t\t: path.join(config.projectRoot, scanned.filePath);\n\n\t\t\t\tconst content = readFileSync(absPath, \"utf-8\");\n\t\t\t\tconst chunks = await chunkFile(content, scanned.filePath, scanned.fileType);\n\t\t\t\tconst imports = extractImports(content, scanned.filePath, scanned.fileType);\n\n\t\t\t\t// All DB mutations in a single transaction — atomic per file\n\t\t\t\tdb.transaction(() => {\n\t\t\t\t\tconst fileId = db.upsertFile(scanned.filePath, scanned.mtime, scanned.fileType);\n\n\t\t\t\t\t// Delete old chunks (for updates)\n\t\t\t\t\tconst existingFile = existingByPath.get(scanned.filePath);\n\t\t\t\t\tif (existingFile) {\n\t\t\t\t\t\tdb.deleteChunksForFile(existingFile.id);\n\t\t\t\t\t\tdb.deleteImportsForFile(existingFile.id);\n\t\t\t\t\t}\n\n\t\t\t\t\t// Insert chunks and symbols\n\t\t\t\t\tfor (const chunk of chunks) {\n\t\t\t\t\t\tconst chunkId = db.insertChunk(\n\t\t\t\t\t\t\tfileId,\n\t\t\t\t\t\t\tchunk.filePath,\n\t\t\t\t\t\t\tchunk.startLine,\n\t\t\t\t\t\t\tchunk.endLine,\n\t\t\t\t\t\t\tchunk.kind,\n\t\t\t\t\t\t\tchunk.name,\n\t\t\t\t\t\t\tchunk.content,\n\t\t\t\t\t\t\tchunk.fileType,\n\t\t\t\t\t\t);\n\t\t\t\t\t\tallNewChunkIds.push(chunkId);\n\n\t\t\t\t\t\tif (chunk.name) {\n\t\t\t\t\t\t\tdb.insertSymbol(chunkId, chunk.name, chunk.kind);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\t// Store import edges\n\t\t\t\t\tfor (const imp of imports) {\n\t\t\t\t\t\tdb.insertImport(fileId, imp);\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t} catch (err: unknown) {\n\t\t\t\t// Re-throw DB-level errors (SQLITE_FULL, disk full, etc.) — these\n\t\t\t\t// indicate infrastructure problems, not per-file issues.\n\t\t\t\tconst message = err instanceof Error ? err.message : String(err);\n\t\t\t\tif (message.includes(\"SQLITE\")) {\n\t\t\t\t\tthrow err;\n\t\t\t\t}\n\t\t\t\t// Skip files that fail to process (permissions, encoding issues, etc.)\n\t\t\t\tfailed++;\n\t\t\t}\n\t\t}\n\n\t\t// Phase 5: Embed new chunks\n\t\tif (allNewChunkIds.length > 0) {\n\t\t\tawait this.embedChunks(db, allNewChunkIds, onProgress);\n\t\t}\n\n\t\treturn { added: toAdd.length, updated: toUpdate.length, removed: toRemove.length, failed };\n\t}\n\n\t/**\n\t * Ensure all chunks have embeddings, generating any missing ones.\n\t */\n\tasync ensureEmbeddings(onProgress?: IndexProgressCallback): Promise<number> {\n\t\tconst db = this.getDb();\n\t\tconst missingIds = db.getChunkIdsWithoutEmbedding(this.config.modelName);\n\t\tif (missingIds.length === 0) return 0;\n\n\t\tawait this.embedChunks(db, missingIds, onProgress);\n\t\treturn missingIds.length;\n\t}\n\n\t/**\n\t * Generate embeddings for specific chunks.\n\t * Requires an embedder to be set via setEmbedder() before calling.\n\t */\n\tprivate async embedChunks(\n\t\tdb: SearchDatabase,\n\t\tchunkIds: number[],\n\t\tonProgress?: IndexProgressCallback,\n\t): Promise<void> {\n\t\tif (chunkIds.length === 0) return;\n\n\t\tif (!this.embedder) {\n\t\t\tthrow new Error(\"IndexManager: embedder not set. Call setEmbedder() before embedding.\");\n\t\t}\n\n\t\t// Get chunk contents\n\t\tconst chunks = db.getChunksById(chunkIds);\n\t\tconst texts = chunks.map((c) => c.content);\n\n\t\t// Generate embeddings\n\t\tconst vectors = await this.embedder.embedDocuments(texts, onProgress);\n\n\t\t// Store embeddings\n\t\tconst items = chunks.map((chunk, i) => ({\n\t\t\tchunkId: chunk.id,\n\t\t\tmodelName: this.config.modelName,\n\t\t\tvector: vectors[i],\n\t\t}));\n\t\tdb.batchUpsertEmbeddings(items);\n\t}\n\n\t/** Check if the index exists. */\n\tindexExists(): boolean {\n\t\tconst dbPath = path.join(this.config.indexDir, DB_FILENAME);\n\t\treturn existsSync(dbPath);\n\t}\n\n\t/** Get index stats. */\n\tgetStats(): { files: number; chunks: number } | null {\n\t\tif (!this.indexExists()) return null;\n\t\tconst db = this.getDb();\n\t\treturn {\n\t\t\tfiles: db.getFileCount(),\n\t\t\tchunks: db.getChunkCount(),\n\t\t};\n\t}\n}\n\n// ============================================================================\n// Import Extraction (simple regex-based)\n// ============================================================================\n\n/**\n * Extract import targets from source code.\n * Returns resolved relative paths (without extensions for JS/TS).\n */\nfunction extractImports(content: string, filePath: string, fileType: string): string[] {\n\tconst imports: string[] = [];\n\tconst dir = path.dirname(filePath);\n\n\tif (fileType === \"typescript\" || fileType === \"tsx\" || fileType === \"javascript\") {\n\t\t// ES6 imports: import ... from '...'\n\t\t// require(): require('...')\n\t\tconst importRe = /(?:import\\s+.*?\\s+from\\s+|import\\s*\\(|require\\s*\\()\\s*['\"]([^'\"]+)['\"]/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\tconst target = match[1];\n\t\t\tif (target.startsWith(\".\")) {\n\t\t\t\timports.push(resolveImportPath(dir, target));\n\t\t\t}\n\t\t}\n\t} else if (fileType === \"python\") {\n\t\t// from . import X, from .module import X\n\t\tconst fromRe = /from\\s+(\\.\\S*)\\s+import/g;\n\t\tfor (const match of content.matchAll(fromRe)) {\n\t\t\tconst resolved = resolvePythonImport(dir, match[1]);\n\t\t\tif (resolved) imports.push(resolved);\n\t\t}\n\t} else if (fileType === \"go\") {\n\t\t// import \"...\" or import ( \"...\" )\n\t\tconst importRe = /import\\s+(?:\\(\\s*(?:[\\s\\S]*?)\\s*\\)|\"([^\"]+)\")/g;\n\t\tconst pathRe = /\"([^\"]+)\"/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\tif (match[1]) {\n\t\t\t\timports.push(match[1]);\n\t\t\t} else {\n\t\t\t\t// Multi-line import block\n\t\t\t\tfor (const inner of match[0].matchAll(pathRe)) {\n\t\t\t\t\timports.push(inner[1]);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t} else if (fileType === \"rust\") {\n\t\t// use crate::..., use super::...,\n\t\tconst useRe = /use\\s+((?:crate|super|self)::[\\w:]+)/g;\n\t\tfor (const match of content.matchAll(useRe)) {\n\t\t\timports.push(match[1].replace(/::/g, \"/\"));\n\t\t}\n\t} else if (fileType === \"java\") {\n\t\t// import com.example.Foo;\n\t\tconst importRe = /import\\s+([\\w.]+)\\s*;/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\timports.push(match[1].replace(/\\./g, \"/\"));\n\t\t}\n\t} else if (fileType === \"c\" || fileType === \"cpp\") {\n\t\t// #include \"local.h\"\n\t\tconst includeRe = /#include\\s+\"([^\"]+)\"/g;\n\t\tfor (const match of content.matchAll(includeRe)) {\n\t\t\timports.push(path.posix.join(dir, match[1]));\n\t\t}\n\t}\n\n\treturn imports;\n}\n\n/** Resolve a relative JS/TS import to a file path. */\nfunction resolveImportPath(fromDir: string, importPath: string): string {\n\t// Strip .js/.ts extension if present (normalize)\n\tconst cleaned = importPath.replace(/\\.[jt]sx?$/, \"\");\n\treturn path.posix.join(fromDir, cleaned);\n}\n\n/** Resolve a Python relative import. */\nfunction resolvePythonImport(fromDir: string, importPath: string): string | null {\n\tif (importPath === \".\") return fromDir;\n\tconst dots = importPath.match(/^(\\.+)/);\n\tif (!dots) return null;\n\tconst levels = dots[1].length - 1;\n\tconst remainder = importPath.slice(dots[1].length).replace(/\\./g, \"/\");\n\tlet base = fromDir;\n\tfor (let i = 0; i < levels; i++) {\n\t\tbase = path.posix.dirname(base);\n\t}\n\treturn remainder ? path.posix.join(base, remainder) : base;\n}\n"]}
|
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Index lifecycle manager.
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates: file scanning → chunking → embedding → FTS indexing → import graph.
|
|
5
|
-
* Supports incremental updates via mtime comparison.
|
|
6
|
-
*/
|
|
7
|
-
import { existsSync, mkdirSync, readFileSync } from "node:fs";
|
|
8
|
-
import path from "node:path";
|
|
9
|
-
import { chunkFile } from "./chunker.js";
|
|
10
|
-
import { isSqliteAvailable, SearchDatabase as SearchDatabaseClass } from "./db.js";
|
|
11
|
-
import { scanProject } from "./scanner.js";
|
|
12
|
-
// ============================================================================
|
|
13
|
-
// Constants
|
|
14
|
-
// ============================================================================
|
|
15
|
-
const DB_FILENAME = "search.db";
|
|
16
|
-
// ============================================================================
|
|
17
|
-
// Index Manager
|
|
18
|
-
// ============================================================================
|
|
19
|
-
export class IndexManager {
|
|
20
|
-
config;
|
|
21
|
-
db = null;
|
|
22
|
-
embedder = null;
|
|
23
|
-
constructor(config) {
|
|
24
|
-
this.config = config;
|
|
25
|
-
}
|
|
26
|
-
/** Check if node:sqlite is available. */
|
|
27
|
-
static isAvailable() {
|
|
28
|
-
return isSqliteAvailable();
|
|
29
|
-
}
|
|
30
|
-
/** Open or create the index database. */
|
|
31
|
-
open() {
|
|
32
|
-
if (this.db)
|
|
33
|
-
return this.db;
|
|
34
|
-
mkdirSync(this.config.indexDir, { recursive: true });
|
|
35
|
-
const dbPath = path.join(this.config.indexDir, DB_FILENAME);
|
|
36
|
-
this.db = new SearchDatabaseClass(dbPath);
|
|
37
|
-
return this.db;
|
|
38
|
-
}
|
|
39
|
-
/**
|
|
40
|
-
* Set an external embedder instance to share with the caller.
|
|
41
|
-
* When set, embedChunks() uses this instead of creating its own.
|
|
42
|
-
*/
|
|
43
|
-
setEmbedder(embedder) {
|
|
44
|
-
this.embedder = embedder;
|
|
45
|
-
}
|
|
46
|
-
/** Close the database and dispose resources. */
|
|
47
|
-
close() {
|
|
48
|
-
if (this.db) {
|
|
49
|
-
this.db.close();
|
|
50
|
-
this.db = null;
|
|
51
|
-
}
|
|
52
|
-
// Only dispose the embedder if we own it (not shared externally).
|
|
53
|
-
// The caller who set it via setEmbedder() is responsible for its lifecycle.
|
|
54
|
-
this.embedder = null;
|
|
55
|
-
}
|
|
56
|
-
/** Get the database, opening if needed. */
|
|
57
|
-
getDb() {
|
|
58
|
-
if (!this.db)
|
|
59
|
-
return this.open();
|
|
60
|
-
return this.db;
|
|
61
|
-
}
|
|
62
|
-
/**
|
|
63
|
-
* Build or incrementally update the index.
|
|
64
|
-
*
|
|
65
|
-
* 1. Scan project for files
|
|
66
|
-
* 2. Compare mtimes against stored records
|
|
67
|
-
* 3. Re-chunk and re-embed only changed/new files
|
|
68
|
-
* 4. Remove deleted files
|
|
69
|
-
*/
|
|
70
|
-
async buildIndex(onProgress) {
|
|
71
|
-
const db = this.getDb();
|
|
72
|
-
const config = this.config;
|
|
73
|
-
// Phase 1: Scan
|
|
74
|
-
onProgress?.("scanning", 0, 1);
|
|
75
|
-
const scannedFiles = await scanProject(config.projectRoot, config.globalMemoryDir);
|
|
76
|
-
onProgress?.("scanning", 1, 1);
|
|
77
|
-
// Phase 2: Diff against existing index
|
|
78
|
-
const existingFiles = db.getAllFiles();
|
|
79
|
-
const existingByPath = new Map();
|
|
80
|
-
for (const f of existingFiles) {
|
|
81
|
-
existingByPath.set(f.filePath, f);
|
|
82
|
-
}
|
|
83
|
-
const scannedByPath = new Map();
|
|
84
|
-
for (const f of scannedFiles) {
|
|
85
|
-
scannedByPath.set(f.filePath, f);
|
|
86
|
-
}
|
|
87
|
-
const toAdd = [];
|
|
88
|
-
const toUpdate = [];
|
|
89
|
-
const toRemove = [];
|
|
90
|
-
// Find new and changed files
|
|
91
|
-
for (const scanned of scannedFiles) {
|
|
92
|
-
const existing = existingByPath.get(scanned.filePath);
|
|
93
|
-
if (!existing) {
|
|
94
|
-
toAdd.push(scanned);
|
|
95
|
-
}
|
|
96
|
-
else if (existing.mtime !== scanned.mtime) {
|
|
97
|
-
toUpdate.push(scanned);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
// Find deleted files
|
|
101
|
-
for (const existing of existingFiles) {
|
|
102
|
-
if (!scannedByPath.has(existing.filePath)) {
|
|
103
|
-
toRemove.push(existing);
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
const totalWork = toAdd.length + toUpdate.length + toRemove.length;
|
|
107
|
-
if (totalWork === 0) {
|
|
108
|
-
return { added: 0, updated: 0, removed: 0, failed: 0 };
|
|
109
|
-
}
|
|
110
|
-
// Phase 3: Remove deleted files
|
|
111
|
-
for (const file of toRemove) {
|
|
112
|
-
db.deleteFile(file.id);
|
|
113
|
-
}
|
|
114
|
-
// Phase 4: Process new and changed files
|
|
115
|
-
const filesToProcess = [...toAdd, ...toUpdate];
|
|
116
|
-
const allNewChunkIds = [];
|
|
117
|
-
let failed = 0;
|
|
118
|
-
for (let i = 0; i < filesToProcess.length; i++) {
|
|
119
|
-
const scanned = filesToProcess[i];
|
|
120
|
-
onProgress?.("indexing", i + 1, filesToProcess.length);
|
|
121
|
-
try {
|
|
122
|
-
// Read file content and chunk BEFORE the transaction so that
|
|
123
|
-
// failures in I/O or chunking don't leave a committed mtime
|
|
124
|
-
// with zero chunks (which would make the file permanently invisible).
|
|
125
|
-
const absPath = scanned.filePath.startsWith("~memory/")
|
|
126
|
-
? path.join(this.config.globalMemoryDir ?? "", scanned.filePath.replace("~memory/", ""))
|
|
127
|
-
: path.join(config.projectRoot, scanned.filePath);
|
|
128
|
-
const content = readFileSync(absPath, "utf-8");
|
|
129
|
-
const chunks = await chunkFile(content, scanned.filePath, scanned.fileType);
|
|
130
|
-
const imports = extractImports(content, scanned.filePath, scanned.fileType);
|
|
131
|
-
// All DB mutations in a single transaction — atomic per file
|
|
132
|
-
db.transaction(() => {
|
|
133
|
-
const fileId = db.upsertFile(scanned.filePath, scanned.mtime, scanned.fileType);
|
|
134
|
-
// Delete old chunks (for updates)
|
|
135
|
-
const existingFile = existingByPath.get(scanned.filePath);
|
|
136
|
-
if (existingFile) {
|
|
137
|
-
db.deleteChunksForFile(existingFile.id);
|
|
138
|
-
db.deleteImportsForFile(existingFile.id);
|
|
139
|
-
}
|
|
140
|
-
// Insert chunks and symbols
|
|
141
|
-
for (const chunk of chunks) {
|
|
142
|
-
const chunkId = db.insertChunk(fileId, chunk.filePath, chunk.startLine, chunk.endLine, chunk.kind, chunk.name, chunk.content, chunk.fileType);
|
|
143
|
-
allNewChunkIds.push(chunkId);
|
|
144
|
-
if (chunk.name) {
|
|
145
|
-
db.insertSymbol(chunkId, chunk.name, chunk.kind);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
// Store import edges
|
|
149
|
-
for (const imp of imports) {
|
|
150
|
-
db.insertImport(fileId, imp);
|
|
151
|
-
}
|
|
152
|
-
});
|
|
153
|
-
}
|
|
154
|
-
catch (err) {
|
|
155
|
-
// Re-throw DB-level errors (SQLITE_FULL, disk full, etc.) — these
|
|
156
|
-
// indicate infrastructure problems, not per-file issues.
|
|
157
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
158
|
-
if (message.includes("SQLITE")) {
|
|
159
|
-
throw err;
|
|
160
|
-
}
|
|
161
|
-
// Skip files that fail to process (permissions, encoding issues, etc.)
|
|
162
|
-
failed++;
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
// Phase 5: Embed new chunks
|
|
166
|
-
if (allNewChunkIds.length > 0) {
|
|
167
|
-
await this.embedChunks(db, allNewChunkIds, onProgress);
|
|
168
|
-
}
|
|
169
|
-
return { added: toAdd.length, updated: toUpdate.length, removed: toRemove.length, failed };
|
|
170
|
-
}
|
|
171
|
-
/**
|
|
172
|
-
* Ensure all chunks have embeddings, generating any missing ones.
|
|
173
|
-
*/
|
|
174
|
-
async ensureEmbeddings(onProgress) {
|
|
175
|
-
const db = this.getDb();
|
|
176
|
-
const missingIds = db.getChunkIdsWithoutEmbedding(this.config.modelName);
|
|
177
|
-
if (missingIds.length === 0)
|
|
178
|
-
return 0;
|
|
179
|
-
await this.embedChunks(db, missingIds, onProgress);
|
|
180
|
-
return missingIds.length;
|
|
181
|
-
}
|
|
182
|
-
/**
|
|
183
|
-
* Generate embeddings for specific chunks.
|
|
184
|
-
* Requires an embedder to be set via setEmbedder() before calling.
|
|
185
|
-
*/
|
|
186
|
-
async embedChunks(db, chunkIds, onProgress) {
|
|
187
|
-
if (chunkIds.length === 0)
|
|
188
|
-
return;
|
|
189
|
-
if (!this.embedder) {
|
|
190
|
-
throw new Error("IndexManager: embedder not set. Call setEmbedder() before embedding.");
|
|
191
|
-
}
|
|
192
|
-
// Get chunk contents
|
|
193
|
-
const chunks = db.getChunksById(chunkIds);
|
|
194
|
-
const texts = chunks.map((c) => c.content);
|
|
195
|
-
// Generate embeddings
|
|
196
|
-
const vectors = await this.embedder.embedDocuments(texts, onProgress);
|
|
197
|
-
// Store embeddings
|
|
198
|
-
const items = chunks.map((chunk, i) => ({
|
|
199
|
-
chunkId: chunk.id,
|
|
200
|
-
modelName: this.config.modelName,
|
|
201
|
-
vector: vectors[i],
|
|
202
|
-
}));
|
|
203
|
-
db.batchUpsertEmbeddings(items);
|
|
204
|
-
}
|
|
205
|
-
/** Check if the index exists. */
|
|
206
|
-
indexExists() {
|
|
207
|
-
const dbPath = path.join(this.config.indexDir, DB_FILENAME);
|
|
208
|
-
return existsSync(dbPath);
|
|
209
|
-
}
|
|
210
|
-
/** Get index stats. */
|
|
211
|
-
getStats() {
|
|
212
|
-
if (!this.indexExists())
|
|
213
|
-
return null;
|
|
214
|
-
const db = this.getDb();
|
|
215
|
-
return {
|
|
216
|
-
files: db.getFileCount(),
|
|
217
|
-
chunks: db.getChunkCount(),
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
// ============================================================================
|
|
222
|
-
// Import Extraction (simple regex-based)
|
|
223
|
-
// ============================================================================
|
|
224
|
-
/**
|
|
225
|
-
* Extract import targets from source code.
|
|
226
|
-
* Returns resolved relative paths (without extensions for JS/TS).
|
|
227
|
-
*/
|
|
228
|
-
function extractImports(content, filePath, fileType) {
|
|
229
|
-
const imports = [];
|
|
230
|
-
const dir = path.dirname(filePath);
|
|
231
|
-
if (fileType === "typescript" || fileType === "tsx" || fileType === "javascript") {
|
|
232
|
-
// ES6 imports: import ... from '...'
|
|
233
|
-
// require(): require('...')
|
|
234
|
-
const importRe = /(?:import\s+.*?\s+from\s+|import\s*\(|require\s*\()\s*['"]([^'"]+)['"]/g;
|
|
235
|
-
for (const match of content.matchAll(importRe)) {
|
|
236
|
-
const target = match[1];
|
|
237
|
-
if (target.startsWith(".")) {
|
|
238
|
-
imports.push(resolveImportPath(dir, target));
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
else if (fileType === "python") {
|
|
243
|
-
// from . import X, from .module import X
|
|
244
|
-
const fromRe = /from\s+(\.\S*)\s+import/g;
|
|
245
|
-
for (const match of content.matchAll(fromRe)) {
|
|
246
|
-
const resolved = resolvePythonImport(dir, match[1]);
|
|
247
|
-
if (resolved)
|
|
248
|
-
imports.push(resolved);
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
else if (fileType === "go") {
|
|
252
|
-
// import "..." or import ( "..." )
|
|
253
|
-
const importRe = /import\s+(?:\(\s*(?:[\s\S]*?)\s*\)|"([^"]+)")/g;
|
|
254
|
-
const pathRe = /"([^"]+)"/g;
|
|
255
|
-
for (const match of content.matchAll(importRe)) {
|
|
256
|
-
if (match[1]) {
|
|
257
|
-
imports.push(match[1]);
|
|
258
|
-
}
|
|
259
|
-
else {
|
|
260
|
-
// Multi-line import block
|
|
261
|
-
for (const inner of match[0].matchAll(pathRe)) {
|
|
262
|
-
imports.push(inner[1]);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
else if (fileType === "rust") {
|
|
268
|
-
// use crate::..., use super::...,
|
|
269
|
-
const useRe = /use\s+((?:crate|super|self)::[\w:]+)/g;
|
|
270
|
-
for (const match of content.matchAll(useRe)) {
|
|
271
|
-
imports.push(match[1].replace(/::/g, "/"));
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
else if (fileType === "java") {
|
|
275
|
-
// import com.example.Foo;
|
|
276
|
-
const importRe = /import\s+([\w.]+)\s*;/g;
|
|
277
|
-
for (const match of content.matchAll(importRe)) {
|
|
278
|
-
imports.push(match[1].replace(/\./g, "/"));
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
else if (fileType === "c" || fileType === "cpp") {
|
|
282
|
-
// #include "local.h"
|
|
283
|
-
const includeRe = /#include\s+"([^"]+)"/g;
|
|
284
|
-
for (const match of content.matchAll(includeRe)) {
|
|
285
|
-
imports.push(path.posix.join(dir, match[1]));
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
return imports;
|
|
289
|
-
}
|
|
290
|
-
/** Resolve a relative JS/TS import to a file path. */
|
|
291
|
-
function resolveImportPath(fromDir, importPath) {
|
|
292
|
-
// Strip .js/.ts extension if present (normalize)
|
|
293
|
-
const cleaned = importPath.replace(/\.[jt]sx?$/, "");
|
|
294
|
-
return path.posix.join(fromDir, cleaned);
|
|
295
|
-
}
|
|
296
|
-
/** Resolve a Python relative import. */
|
|
297
|
-
function resolvePythonImport(fromDir, importPath) {
|
|
298
|
-
if (importPath === ".")
|
|
299
|
-
return fromDir;
|
|
300
|
-
const dots = importPath.match(/^(\.+)/);
|
|
301
|
-
if (!dots)
|
|
302
|
-
return null;
|
|
303
|
-
const levels = dots[1].length - 1;
|
|
304
|
-
const remainder = importPath.slice(dots[1].length).replace(/\./g, "/");
|
|
305
|
-
let base = fromDir;
|
|
306
|
-
for (let i = 0; i < levels; i++) {
|
|
307
|
-
base = path.posix.dirname(base);
|
|
308
|
-
}
|
|
309
|
-
return remainder ? path.posix.join(base, remainder) : base;
|
|
310
|
-
}
|
|
311
|
-
//# sourceMappingURL=index-manager.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index-manager.js","sourceRoot":"","sources":["../../../src/core/search/index-manager.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,iBAAiB,EAAE,cAAc,IAAI,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAEnF,OAAO,EAAoB,WAAW,EAAE,MAAM,cAAc,CAAC;AAG7D,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,WAAW,GAAG,WAAW,CAAC;AAEhC,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,OAAO,YAAY;IACP,MAAM,CAAc;IAC7B,EAAE,GAA0B,IAAI,CAAC;IACjC,QAAQ,GAAoB,IAAI,CAAC;IAEzC,YAAY,MAAmB,EAAE;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IAAA,CACrB;IAED,yCAAyC;IACzC,MAAM,CAAC,WAAW,GAAY;QAC7B,OAAO,iBAAiB,EAAE,CAAC;IAAA,CAC3B;IAED,yCAAyC;IACzC,IAAI,GAAmB;QACtB,IAAI,IAAI,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC,EAAE,CAAC;QAE5B,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAC5D,IAAI,CAAC,EAAE,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;QAC1C,OAAO,IAAI,CAAC,EAAE,CAAC;IAAA,CACf;IAED;;;OAGG;IACH,WAAW,CAAC,QAAkB,EAAQ;QACrC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAAA,CACzB;IAED,gDAAgD;IAChD,KAAK,GAAS;QACb,IAAI,IAAI,CAAC,EAAE,EAAE,CAAC;YACb,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;YAChB,IAAI,CAAC,EAAE,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,kEAAkE;QAClE,4EAA4E;QAC5E,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IAAA,CACrB;IAED,2CAA2C;IAC3C,KAAK,GAAmB;QACvB,IAAI,CAAC,IAAI,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;QACjC,OAAO,IAAI,CAAC,EAAE,CAAC;IAAA,CACf;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACf,UAAkC,EAC6C;QAC/E,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAE3B,gBAAgB;QAChB,UAAU,EAAE,CAAC,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QAC/B,MAAM,YAAY,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,eAAe,CAAC,CAAC;QACnF,UAAU,EAAE,CAAC,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QAE/B,uCAAuC;QACvC,MAAM,aAAa,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QACvC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAuB,CAAC;QACtD,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;YAC/B,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,GAAG,EAAuB,CAAC;QACrD,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC9B,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;QAED,MAAM,KAAK,GAAkB,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAkB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAkB,EAAE,CAAC;QAEnC,6BAA6B;QAC7B,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACpC,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACtD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACf,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC;iBAAM,IAAI,QAAQ,CAAC,KAAK,KAAK,OAAO,CAAC,KAAK,EAAE,CAAC;gBAC7C,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;QACF,CAAC;QAED,qBAAqB;QACrB,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3C,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACzB,CAAC;QACF,CAAC;QAED,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;QACnE,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACrB,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;QACxD,CAAC;QAED,gCAAgC;QAChC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC7B,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACxB,CAAC;QAED,yCAAyC;QACzC,MAAM,cAAc,GAAG,CAAC,GAAG,KAAK,EAAE,GAAG,QAAQ,CAAC,CAAC;QAC/C,MAAM,cAAc,GAAa,EAAE,CAAC;QACpC,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAClC,UAAU,EAAE,CAAC,UAAU,EAAE,CAAC,GAAG,CAAC,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC;YAEvD,IAAI,CAAC;gBACJ,6DAA6D;gBAC7D,4DAA4D;gBAC5D,sEAAsE;gBACtE,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;oBACtD,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,IAAI,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;oBACxF,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAEnD,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAC5E,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAE5E,+DAA6D;gBAC7D,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC;oBACpB,MAAM,MAAM,GAAG,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;oBAEhF,kCAAkC;oBAClC,MAAM,YAAY,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBAC1D,IAAI,YAAY,EAAE,CAAC;wBAClB,EAAE,CAAC,mBAAmB,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;wBACxC,EAAE,CAAC,oBAAoB,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;oBAC1C,CAAC;oBAED,4BAA4B;oBAC5B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;wBAC5B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAC7B,MAAM,EACN,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,OAAO,EACb,KAAK,CAAC,QAAQ,CACd,CAAC;wBACF,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;wBAE7B,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;4BAChB,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;wBAClD,CAAC;oBACF,CAAC;oBAED,qBAAqB;oBACrB,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;wBAC3B,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;oBAC9B,CAAC;gBAAA,CACD,CAAC,CAAC;YACJ,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACvB,oEAAkE;gBAClE,yDAAyD;gBACzD,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAChC,MAAM,GAAG,CAAC;gBACX,CAAC;gBACD,uEAAuE;gBACvE,MAAM,EAAE,CAAC;YACV,CAAC;QACF,CAAC;QAED,4BAA4B;QAC5B,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAAA,CAC3F;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CAAC,UAAkC,EAAmB;QAC3E,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QACxB,MAAM,UAAU,GAAG,EAAE,CAAC,2BAA2B,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACzE,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEtC,MAAM,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;QACnD,OAAO,UAAU,CAAC,MAAM,CAAC;IAAA,CACzB;IAED;;;OAGG;IACK,KAAK,CAAC,WAAW,CACxB,EAAkB,EAClB,QAAkB,EAClB,UAAkC,EAClB;QAChB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAElC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;QACzF,CAAC;QAED,qBAAqB;QACrB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAE3C,sBAAsB;QACtB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QAEtE,mBAAmB;QACnB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACvC,OAAO,EAAE,KAAK,CAAC,EAAE;YACjB,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;SAClB,CAAC,CAAC,CAAC;QACJ,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAAA,CAChC;IAED,iCAAiC;IACjC,WAAW,GAAY;QACtB,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAC5D,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC;IAAA,CAC1B;IAED,uBAAuB;IACvB,QAAQ,GAA6C;QACpD,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;YAAE,OAAO,IAAI,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;QACxB,OAAO;YACN,KAAK,EAAE,EAAE,CAAC,YAAY,EAAE;YACxB,MAAM,EAAE,EAAE,CAAC,aAAa,EAAE;SAC1B,CAAC;IAAA,CACF;CACD;AAED,+EAA+E;AAC/E,yCAAyC;AACzC,+EAA+E;AAE/E;;;GAGG;AACH,SAAS,cAAc,CAAC,OAAe,EAAE,QAAgB,EAAE,QAAgB,EAAY;IACtF,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAEnC,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,KAAK,KAAK,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAClF,qCAAqC;QACrC,4BAA4B;QAC5B,MAAM,QAAQ,GAAG,yEAAyE,CAAC;QAC3F,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACxB,IAAI,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC;IACF,CAAC;SAAM,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,yCAAyC;QACzC,MAAM,MAAM,GAAG,0BAA0B,CAAC;QAC1C,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC9C,MAAM,QAAQ,GAAG,mBAAmB,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACpD,IAAI,QAAQ;gBAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,CAAC;IACF,CAAC;SAAM,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QAC9B,mCAAmC;QACnC,MAAM,QAAQ,GAAG,gDAAgD,CAAC;QAClE,MAAM,MAAM,GAAG,YAAY,CAAC;QAC5B,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACxB,CAAC;iBAAM,CAAC;gBACP,0BAA0B;gBAC1B,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC/C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxB,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QAChC,kCAAkC;QAClC,MAAM,KAAK,GAAG,uCAAuC,CAAC;QACtD,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC5C,CAAC;IACF,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QAChC,0BAA0B;QAC1B,MAAM,QAAQ,GAAG,wBAAwB,CAAC;QAC1C,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC5C,CAAC;IACF,CAAC;SAAM,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;QACnD,qBAAqB;QACrB,MAAM,SAAS,GAAG,uBAAuB,CAAC;QAC1C,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACjD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC;AAAA,CACf;AAED,sDAAsD;AACtD,SAAS,iBAAiB,CAAC,OAAe,EAAE,UAAkB,EAAU;IACvE,iDAAiD;IACjD,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AAAA,CACzC;AAED,wCAAwC;AACxC,SAAS,mBAAmB,CAAC,OAAe,EAAE,UAAkB,EAAiB;IAChF,IAAI,UAAU,KAAK,GAAG;QAAE,OAAO,OAAO,CAAC;IACvC,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAClC,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACvE,IAAI,IAAI,GAAG,OAAO,CAAC;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAAA,CAC3D","sourcesContent":["/**\n * Index lifecycle manager.\n *\n * Orchestrates: file scanning → chunking → embedding → FTS indexing → import graph.\n * Supports incremental updates via mtime comparison.\n */\n\nimport { existsSync, mkdirSync, readFileSync } from \"node:fs\";\nimport path from \"node:path\";\nimport { chunkFile } from \"./chunker.js\";\nimport type { SearchDatabase } from \"./db.js\";\nimport { isSqliteAvailable, SearchDatabase as SearchDatabaseClass } from \"./db.js\";\nimport type { Embedder } from \"./embedder.js\";\nimport { type ScannedFile, scanProject } from \"./scanner.js\";\nimport type { IndexConfig, IndexedFile, IndexProgressCallback } from \"./types.js\";\n\n// ============================================================================\n// Constants\n// ============================================================================\n\nconst DB_FILENAME = \"search.db\";\n\n// ============================================================================\n// Index Manager\n// ============================================================================\n\nexport class IndexManager {\n\tprivate readonly config: IndexConfig;\n\tprivate db: SearchDatabase | null = null;\n\tprivate embedder: Embedder | null = null;\n\n\tconstructor(config: IndexConfig) {\n\t\tthis.config = config;\n\t}\n\n\t/** Check if node:sqlite is available. */\n\tstatic isAvailable(): boolean {\n\t\treturn isSqliteAvailable();\n\t}\n\n\t/** Open or create the index database. */\n\topen(): SearchDatabase {\n\t\tif (this.db) return this.db;\n\n\t\tmkdirSync(this.config.indexDir, { recursive: true });\n\t\tconst dbPath = path.join(this.config.indexDir, DB_FILENAME);\n\t\tthis.db = new SearchDatabaseClass(dbPath);\n\t\treturn this.db;\n\t}\n\n\t/**\n\t * Set an external embedder instance to share with the caller.\n\t * When set, embedChunks() uses this instead of creating its own.\n\t */\n\tsetEmbedder(embedder: Embedder): void {\n\t\tthis.embedder = embedder;\n\t}\n\n\t/** Close the database and dispose resources. */\n\tclose(): void {\n\t\tif (this.db) {\n\t\t\tthis.db.close();\n\t\t\tthis.db = null;\n\t\t}\n\t\t// Only dispose the embedder if we own it (not shared externally).\n\t\t// The caller who set it via setEmbedder() is responsible for its lifecycle.\n\t\tthis.embedder = null;\n\t}\n\n\t/** Get the database, opening if needed. */\n\tgetDb(): SearchDatabase {\n\t\tif (!this.db) return this.open();\n\t\treturn this.db;\n\t}\n\n\t/**\n\t * Build or incrementally update the index.\n\t *\n\t * 1. Scan project for files\n\t * 2. Compare mtimes against stored records\n\t * 3. Re-chunk and re-embed only changed/new files\n\t * 4. Remove deleted files\n\t */\n\tasync buildIndex(\n\t\tonProgress?: IndexProgressCallback,\n\t): Promise<{ added: number; updated: number; removed: number; failed: number }> {\n\t\tconst db = this.getDb();\n\t\tconst config = this.config;\n\n\t\t// Phase 1: Scan\n\t\tonProgress?.(\"scanning\", 0, 1);\n\t\tconst scannedFiles = await scanProject(config.projectRoot, config.globalMemoryDir);\n\t\tonProgress?.(\"scanning\", 1, 1);\n\n\t\t// Phase 2: Diff against existing index\n\t\tconst existingFiles = db.getAllFiles();\n\t\tconst existingByPath = new Map<string, IndexedFile>();\n\t\tfor (const f of existingFiles) {\n\t\t\texistingByPath.set(f.filePath, f);\n\t\t}\n\n\t\tconst scannedByPath = new Map<string, ScannedFile>();\n\t\tfor (const f of scannedFiles) {\n\t\t\tscannedByPath.set(f.filePath, f);\n\t\t}\n\n\t\tconst toAdd: ScannedFile[] = [];\n\t\tconst toUpdate: ScannedFile[] = [];\n\t\tconst toRemove: IndexedFile[] = [];\n\n\t\t// Find new and changed files\n\t\tfor (const scanned of scannedFiles) {\n\t\t\tconst existing = existingByPath.get(scanned.filePath);\n\t\t\tif (!existing) {\n\t\t\t\ttoAdd.push(scanned);\n\t\t\t} else if (existing.mtime !== scanned.mtime) {\n\t\t\t\ttoUpdate.push(scanned);\n\t\t\t}\n\t\t}\n\n\t\t// Find deleted files\n\t\tfor (const existing of existingFiles) {\n\t\t\tif (!scannedByPath.has(existing.filePath)) {\n\t\t\t\ttoRemove.push(existing);\n\t\t\t}\n\t\t}\n\n\t\tconst totalWork = toAdd.length + toUpdate.length + toRemove.length;\n\t\tif (totalWork === 0) {\n\t\t\treturn { added: 0, updated: 0, removed: 0, failed: 0 };\n\t\t}\n\n\t\t// Phase 3: Remove deleted files\n\t\tfor (const file of toRemove) {\n\t\t\tdb.deleteFile(file.id);\n\t\t}\n\n\t\t// Phase 4: Process new and changed files\n\t\tconst filesToProcess = [...toAdd, ...toUpdate];\n\t\tconst allNewChunkIds: number[] = [];\n\t\tlet failed = 0;\n\n\t\tfor (let i = 0; i < filesToProcess.length; i++) {\n\t\t\tconst scanned = filesToProcess[i];\n\t\t\tonProgress?.(\"indexing\", i + 1, filesToProcess.length);\n\n\t\t\ttry {\n\t\t\t\t// Read file content and chunk BEFORE the transaction so that\n\t\t\t\t// failures in I/O or chunking don't leave a committed mtime\n\t\t\t\t// with zero chunks (which would make the file permanently invisible).\n\t\t\t\tconst absPath = scanned.filePath.startsWith(\"~memory/\")\n\t\t\t\t\t? path.join(this.config.globalMemoryDir ?? \"\", scanned.filePath.replace(\"~memory/\", \"\"))\n\t\t\t\t\t: path.join(config.projectRoot, scanned.filePath);\n\n\t\t\t\tconst content = readFileSync(absPath, \"utf-8\");\n\t\t\t\tconst chunks = await chunkFile(content, scanned.filePath, scanned.fileType);\n\t\t\t\tconst imports = extractImports(content, scanned.filePath, scanned.fileType);\n\n\t\t\t\t// All DB mutations in a single transaction — atomic per file\n\t\t\t\tdb.transaction(() => {\n\t\t\t\t\tconst fileId = db.upsertFile(scanned.filePath, scanned.mtime, scanned.fileType);\n\n\t\t\t\t\t// Delete old chunks (for updates)\n\t\t\t\t\tconst existingFile = existingByPath.get(scanned.filePath);\n\t\t\t\t\tif (existingFile) {\n\t\t\t\t\t\tdb.deleteChunksForFile(existingFile.id);\n\t\t\t\t\t\tdb.deleteImportsForFile(existingFile.id);\n\t\t\t\t\t}\n\n\t\t\t\t\t// Insert chunks and symbols\n\t\t\t\t\tfor (const chunk of chunks) {\n\t\t\t\t\t\tconst chunkId = db.insertChunk(\n\t\t\t\t\t\t\tfileId,\n\t\t\t\t\t\t\tchunk.filePath,\n\t\t\t\t\t\t\tchunk.startLine,\n\t\t\t\t\t\t\tchunk.endLine,\n\t\t\t\t\t\t\tchunk.kind,\n\t\t\t\t\t\t\tchunk.name,\n\t\t\t\t\t\t\tchunk.content,\n\t\t\t\t\t\t\tchunk.fileType,\n\t\t\t\t\t\t);\n\t\t\t\t\t\tallNewChunkIds.push(chunkId);\n\n\t\t\t\t\t\tif (chunk.name) {\n\t\t\t\t\t\t\tdb.insertSymbol(chunkId, chunk.name, chunk.kind);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\t// Store import edges\n\t\t\t\t\tfor (const imp of imports) {\n\t\t\t\t\t\tdb.insertImport(fileId, imp);\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t} catch (err: unknown) {\n\t\t\t\t// Re-throw DB-level errors (SQLITE_FULL, disk full, etc.) — these\n\t\t\t\t// indicate infrastructure problems, not per-file issues.\n\t\t\t\tconst message = err instanceof Error ? err.message : String(err);\n\t\t\t\tif (message.includes(\"SQLITE\")) {\n\t\t\t\t\tthrow err;\n\t\t\t\t}\n\t\t\t\t// Skip files that fail to process (permissions, encoding issues, etc.)\n\t\t\t\tfailed++;\n\t\t\t}\n\t\t}\n\n\t\t// Phase 5: Embed new chunks\n\t\tif (allNewChunkIds.length > 0) {\n\t\t\tawait this.embedChunks(db, allNewChunkIds, onProgress);\n\t\t}\n\n\t\treturn { added: toAdd.length, updated: toUpdate.length, removed: toRemove.length, failed };\n\t}\n\n\t/**\n\t * Ensure all chunks have embeddings, generating any missing ones.\n\t */\n\tasync ensureEmbeddings(onProgress?: IndexProgressCallback): Promise<number> {\n\t\tconst db = this.getDb();\n\t\tconst missingIds = db.getChunkIdsWithoutEmbedding(this.config.modelName);\n\t\tif (missingIds.length === 0) return 0;\n\n\t\tawait this.embedChunks(db, missingIds, onProgress);\n\t\treturn missingIds.length;\n\t}\n\n\t/**\n\t * Generate embeddings for specific chunks.\n\t * Requires an embedder to be set via setEmbedder() before calling.\n\t */\n\tprivate async embedChunks(\n\t\tdb: SearchDatabase,\n\t\tchunkIds: number[],\n\t\tonProgress?: IndexProgressCallback,\n\t): Promise<void> {\n\t\tif (chunkIds.length === 0) return;\n\n\t\tif (!this.embedder) {\n\t\t\tthrow new Error(\"IndexManager: embedder not set. Call setEmbedder() before embedding.\");\n\t\t}\n\n\t\t// Get chunk contents\n\t\tconst chunks = db.getChunksById(chunkIds);\n\t\tconst texts = chunks.map((c) => c.content);\n\n\t\t// Generate embeddings\n\t\tconst vectors = await this.embedder.embedDocuments(texts, onProgress);\n\n\t\t// Store embeddings\n\t\tconst items = chunks.map((chunk, i) => ({\n\t\t\tchunkId: chunk.id,\n\t\t\tmodelName: this.config.modelName,\n\t\t\tvector: vectors[i],\n\t\t}));\n\t\tdb.batchUpsertEmbeddings(items);\n\t}\n\n\t/** Check if the index exists. */\n\tindexExists(): boolean {\n\t\tconst dbPath = path.join(this.config.indexDir, DB_FILENAME);\n\t\treturn existsSync(dbPath);\n\t}\n\n\t/** Get index stats. */\n\tgetStats(): { files: number; chunks: number } | null {\n\t\tif (!this.indexExists()) return null;\n\t\tconst db = this.getDb();\n\t\treturn {\n\t\t\tfiles: db.getFileCount(),\n\t\t\tchunks: db.getChunkCount(),\n\t\t};\n\t}\n}\n\n// ============================================================================\n// Import Extraction (simple regex-based)\n// ============================================================================\n\n/**\n * Extract import targets from source code.\n * Returns resolved relative paths (without extensions for JS/TS).\n */\nfunction extractImports(content: string, filePath: string, fileType: string): string[] {\n\tconst imports: string[] = [];\n\tconst dir = path.dirname(filePath);\n\n\tif (fileType === \"typescript\" || fileType === \"tsx\" || fileType === \"javascript\") {\n\t\t// ES6 imports: import ... from '...'\n\t\t// require(): require('...')\n\t\tconst importRe = /(?:import\\s+.*?\\s+from\\s+|import\\s*\\(|require\\s*\\()\\s*['\"]([^'\"]+)['\"]/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\tconst target = match[1];\n\t\t\tif (target.startsWith(\".\")) {\n\t\t\t\timports.push(resolveImportPath(dir, target));\n\t\t\t}\n\t\t}\n\t} else if (fileType === \"python\") {\n\t\t// from . import X, from .module import X\n\t\tconst fromRe = /from\\s+(\\.\\S*)\\s+import/g;\n\t\tfor (const match of content.matchAll(fromRe)) {\n\t\t\tconst resolved = resolvePythonImport(dir, match[1]);\n\t\t\tif (resolved) imports.push(resolved);\n\t\t}\n\t} else if (fileType === \"go\") {\n\t\t// import \"...\" or import ( \"...\" )\n\t\tconst importRe = /import\\s+(?:\\(\\s*(?:[\\s\\S]*?)\\s*\\)|\"([^\"]+)\")/g;\n\t\tconst pathRe = /\"([^\"]+)\"/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\tif (match[1]) {\n\t\t\t\timports.push(match[1]);\n\t\t\t} else {\n\t\t\t\t// Multi-line import block\n\t\t\t\tfor (const inner of match[0].matchAll(pathRe)) {\n\t\t\t\t\timports.push(inner[1]);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t} else if (fileType === \"rust\") {\n\t\t// use crate::..., use super::...,\n\t\tconst useRe = /use\\s+((?:crate|super|self)::[\\w:]+)/g;\n\t\tfor (const match of content.matchAll(useRe)) {\n\t\t\timports.push(match[1].replace(/::/g, \"/\"));\n\t\t}\n\t} else if (fileType === \"java\") {\n\t\t// import com.example.Foo;\n\t\tconst importRe = /import\\s+([\\w.]+)\\s*;/g;\n\t\tfor (const match of content.matchAll(importRe)) {\n\t\t\timports.push(match[1].replace(/\\./g, \"/\"));\n\t\t}\n\t} else if (fileType === \"c\" || fileType === \"cpp\") {\n\t\t// #include \"local.h\"\n\t\tconst includeRe = /#include\\s+\"([^\"]+)\"/g;\n\t\tfor (const match of content.matchAll(includeRe)) {\n\t\t\timports.push(path.posix.join(dir, match[1]));\n\t\t}\n\t}\n\n\treturn imports;\n}\n\n/** Resolve a relative JS/TS import to a file path. */\nfunction resolveImportPath(fromDir: string, importPath: string): string {\n\t// Strip .js/.ts extension if present (normalize)\n\tconst cleaned = importPath.replace(/\\.[jt]sx?$/, \"\");\n\treturn path.posix.join(fromDir, cleaned);\n}\n\n/** Resolve a Python relative import. */\nfunction resolvePythonImport(fromDir: string, importPath: string): string | null {\n\tif (importPath === \".\") return fromDir;\n\tconst dots = importPath.match(/^(\\.+)/);\n\tif (!dots) return null;\n\tconst levels = dots[1].length - 1;\n\tconst remainder = importPath.slice(dots[1].length).replace(/\\./g, \"/\");\n\tlet base = fromDir;\n\tfor (let i = 0; i < levels; i++) {\n\t\tbase = path.posix.dirname(base);\n\t}\n\treturn remainder ? path.posix.join(base, remainder) : base;\n}\n"]}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* BM25 metric — full-text search scoring via FTS5.
|
|
3
|
-
*/
|
|
4
|
-
import type { SearchDatabase } from "../db.js";
|
|
5
|
-
/**
|
|
6
|
-
* Compute BM25 scores for a query using FTS5.
|
|
7
|
-
* Returns a Map of chunkId → normalized score (0-1, higher = more relevant).
|
|
8
|
-
*/
|
|
9
|
-
export declare function computeBm25Scores(db: SearchDatabase, query: string, limit: number): Map<number, number>;
|
|
10
|
-
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../../../src/core/search/metrics/bm25.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAE/C;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAwBvG","sourcesContent":["/**\n * BM25 metric — full-text search scoring via FTS5.\n */\n\nimport type { SearchDatabase } from \"../db.js\";\n\n/**\n * Compute BM25 scores for a query using FTS5.\n * Returns a Map of chunkId → normalized score (0-1, higher = more relevant).\n */\nexport function computeBm25Scores(db: SearchDatabase, query: string, limit: number): Map<number, number> {\n\tconst scores = new Map<number, number>();\n\n\ttry {\n\t\tconst results = db.ftsSearch(query, limit);\n\t\tif (results.length === 0) return scores;\n\n\t\t// Find the maximum score for normalization\n\t\tlet maxScore = 0;\n\t\tfor (const r of results) {\n\t\t\tif (r.score > maxScore) maxScore = r.score;\n\t\t}\n\n\t\t// Normalize: top result → 1.0, others proportional\n\t\tif (maxScore > 0) {\n\t\t\tfor (const r of results) {\n\t\t\t\tscores.set(r.chunkId, r.score / maxScore);\n\t\t\t}\n\t\t}\n\t} catch {\n\t\t// If FTS query fails, return empty map\n\t}\n\n\treturn scores;\n}\n"]}
|