@jafreck/lore 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -6
- package/dist/cli.js +56 -0
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -3
- package/dist/index.js.map +1 -1
- package/dist/indexer/embedder.d.ts +67 -30
- package/dist/indexer/embedder.d.ts.map +1 -1
- package/dist/indexer/embedder.js +151 -146
- package/dist/indexer/embedder.js.map +1 -1
- package/dist/indexer/graph-analysis.d.ts +115 -0
- package/dist/indexer/graph-analysis.d.ts.map +1 -0
- package/dist/indexer/graph-analysis.js +575 -0
- package/dist/indexer/graph-analysis.js.map +1 -0
- package/dist/indexer/stages/embedding.d.ts +9 -0
- package/dist/indexer/stages/embedding.d.ts.map +1 -1
- package/dist/indexer/stages/embedding.js +127 -83
- package/dist/indexer/stages/embedding.js.map +1 -1
- package/dist/lore-server/db.d.ts +74 -0
- package/dist/lore-server/db.d.ts.map +1 -1
- package/dist/lore-server/db.js +114 -0
- package/dist/lore-server/db.js.map +1 -1
- package/dist/lore-server/server.d.ts.map +1 -1
- package/dist/lore-server/server.js +7 -17
- package/dist/lore-server/server.js.map +1 -1
- package/dist/lore-server/tool-registry.d.ts.map +1 -1
- package/dist/lore-server/tool-registry.js +6 -1
- package/dist/lore-server/tool-registry.js.map +1 -1
- package/dist/lore-server/tools/graph-analysis.d.ts +64 -0
- package/dist/lore-server/tools/graph-analysis.d.ts.map +1 -0
- package/dist/lore-server/tools/graph-analysis.js +82 -0
- package/dist/lore-server/tools/graph-analysis.js.map +1 -0
- package/dist/runtime.d.ts.map +1 -1
- package/dist/runtime.js +4 -5
- package/dist/runtime.js.map +1 -1
- package/package.json +2 -1
- package/dist/indexer/ensure-python-deps.d.ts +0 -22
- package/dist/indexer/ensure-python-deps.d.ts.map +0 -1
- package/dist/indexer/ensure-python-deps.js +0 -47
- package/dist/indexer/ensure-python-deps.js.map +0 -1
package/dist/indexer/embedder.js
CHANGED
|
@@ -2,16 +2,14 @@
|
|
|
2
2
|
* @module indexer/embedder
|
|
3
3
|
*
|
|
4
4
|
* Provides an `EmbeddingProvider` abstraction for generating dense vector
|
|
5
|
-
* embeddings from text. The primary implementation (`
|
|
6
|
-
*
|
|
7
|
-
*
|
|
5
|
+
* embeddings from text. The primary implementation (`TransformersJsProvider`)
|
|
6
|
+
* uses the `@huggingface/transformers` library to run ONNX models natively
|
|
7
|
+
* in Node.js — no Python or external processes required.
|
|
8
8
|
*
|
|
9
|
-
* The model's embedding dimensionality is auto-detected at startup
|
|
10
|
-
*
|
|
9
|
+
* The model's embedding dimensionality is auto-detected at startup by
|
|
10
|
+
* embedding a probe sentence and inspecting the output shape.
|
|
11
11
|
*/
|
|
12
|
-
import {
|
|
13
|
-
import * as readline from 'node:readline';
|
|
14
|
-
import { trackProcess } from '../process-tracker.js';
|
|
12
|
+
import { createHash } from 'node:crypto';
|
|
15
13
|
export function buildStructuralEmbeddingText(input) {
|
|
16
14
|
const parts = [
|
|
17
15
|
input.signature?.trim() ?? '',
|
|
@@ -22,51 +20,79 @@ export function buildStructuralEmbeddingText(input) {
|
|
|
22
20
|
const uniqueParts = [...new Set(parts)];
|
|
23
21
|
return uniqueParts.join('\n');
|
|
24
22
|
}
|
|
25
|
-
|
|
23
|
+
/** SHA-256 hex hash of an embedding input text (used for skip-unchanged logic). */
|
|
24
|
+
export function hashEmbeddingText(text) {
|
|
25
|
+
return createHash('sha256').update(text).digest('hex');
|
|
26
|
+
}
|
|
27
|
+
// ─── Token-aware batching ─────────────────────────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* Approximate token count for a text string.
|
|
30
|
+
* Uses the ~4 chars/token heuristic (reasonable for code/English).
|
|
31
|
+
*/
|
|
32
|
+
function estimateTokens(text) {
|
|
33
|
+
return Math.ceil(text.length / 4);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Maximum total tokens per embedding batch. Larger batches amortise
|
|
37
|
+
* call overhead but Transformers.js pads every item to the longest in the
|
|
38
|
+
* batch, so one very long text inflates memory for the whole batch.
|
|
39
|
+
*
|
|
40
|
+
* 32 768 tokens ≈ 128 KB of text — keeps peak memory reasonable while
|
|
41
|
+
* avoiding pathological padding waste.
|
|
42
|
+
*/
|
|
43
|
+
const MAX_BATCH_TOKENS = 32_768;
|
|
44
|
+
/** Absolute cap on items per batch (avoids degenerate cases with many tiny texts). */
|
|
45
|
+
const MAX_BATCH_ITEMS = 512;
|
|
26
46
|
/**
|
|
27
|
-
*
|
|
28
|
-
* 1. Loads the model.
|
|
29
|
-
* 2. Prints `{"dims": <N>}` on the first stdout line.
|
|
30
|
-
* 3. Enters an NDJSON request loop (stdin → stdout).
|
|
47
|
+
* Split `items` into token-budget-aware batches.
|
|
31
48
|
*
|
|
32
|
-
*
|
|
49
|
+
* Each batch stays within `MAX_BATCH_TOKENS` total estimated tokens and
|
|
50
|
+
* `MAX_BATCH_ITEMS` items. An individual item that exceeds the token
|
|
51
|
+
* budget gets its own single-item batch.
|
|
33
52
|
*/
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
export function tokenAwareBatch(items, getText) {
|
|
54
|
+
const batches = [];
|
|
55
|
+
let current = [];
|
|
56
|
+
let currentTokens = 0;
|
|
57
|
+
for (const item of items) {
|
|
58
|
+
const tokens = estimateTokens(getText(item));
|
|
59
|
+
if (current.length > 0 && (currentTokens + tokens > MAX_BATCH_TOKENS || current.length >= MAX_BATCH_ITEMS)) {
|
|
60
|
+
batches.push(current);
|
|
61
|
+
current = [];
|
|
62
|
+
currentTokens = 0;
|
|
63
|
+
}
|
|
64
|
+
current.push(item);
|
|
65
|
+
currentTokens += tokens;
|
|
66
|
+
}
|
|
67
|
+
if (current.length > 0)
|
|
68
|
+
batches.push(current);
|
|
69
|
+
return batches;
|
|
70
|
+
}
|
|
50
71
|
/**
|
|
51
|
-
*
|
|
52
|
-
*
|
|
72
|
+
* Generates embeddings using `@huggingface/transformers` (Transformers.js).
|
|
73
|
+
* Runs ONNX models natively in Node — zero Python dependency.
|
|
74
|
+
*
|
|
75
|
+
* Auto-detects the best available ONNX execution provider:
|
|
76
|
+
* - `webgpu` when available (Linux/Windows with GPU)
|
|
77
|
+
* - `coreml` on Apple Silicon (macOS arm64)
|
|
78
|
+
* - `cpu` everywhere else (always available)
|
|
79
|
+
*
|
|
80
|
+
* Override via `LORE_EMBED_DEVICE` env var (e.g. `cpu`, `coreml`, `webgpu`).
|
|
81
|
+
* Override quantization via `LORE_EMBED_DTYPE` env var (e.g. `q8`, `q4`, `fp16`).
|
|
53
82
|
*
|
|
54
|
-
* Call `init()` first to
|
|
55
|
-
* dimensionality.
|
|
56
|
-
* provider for efficiency.
|
|
83
|
+
* Call `init()` first to download/load the model and detect its embedding
|
|
84
|
+
* dimensionality. The model is kept in memory for the provider's lifetime.
|
|
57
85
|
*/
|
|
58
|
-
export class
|
|
86
|
+
export class TransformersJsProvider {
|
|
59
87
|
modelName;
|
|
88
|
+
dtype;
|
|
60
89
|
_dims = null;
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
pendingRequests = [];
|
|
64
|
-
pythonBin;
|
|
65
|
-
/** Whether the first stdout line (dims handshake) has been consumed. */
|
|
90
|
+
_device = null;
|
|
91
|
+
_pipeline = null;
|
|
66
92
|
initialized = false;
|
|
67
|
-
constructor(modelName,
|
|
93
|
+
constructor(modelName, dtype) {
|
|
68
94
|
this.modelName = modelName;
|
|
69
|
-
this.
|
|
95
|
+
this.dtype = dtype ?? process.env['LORE_EMBED_DTYPE'] ?? 'fp32';
|
|
70
96
|
}
|
|
71
97
|
/** Embedding dimensionality — available only after `init()`. */
|
|
72
98
|
get dims() {
|
|
@@ -75,129 +101,108 @@ export class SentenceTransformersProvider {
|
|
|
75
101
|
}
|
|
76
102
|
return this._dims;
|
|
77
103
|
}
|
|
104
|
+
/** ONNX execution provider selected during init (cpu/coreml/webgpu). */
|
|
105
|
+
get device() {
|
|
106
|
+
return this._device ?? 'unknown';
|
|
107
|
+
}
|
|
78
108
|
/**
|
|
79
|
-
*
|
|
80
|
-
*
|
|
109
|
+
* Load the model via Transformers.js and detect embedding dimensionality
|
|
110
|
+
* by running a single probe sentence.
|
|
81
111
|
*/
|
|
82
112
|
async init() {
|
|
83
113
|
if (this.initialized)
|
|
84
114
|
return;
|
|
85
|
-
this.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (typeof msg.dims === 'number') {
|
|
92
|
-
this._dims = msg.dims;
|
|
93
|
-
this.initialized = true;
|
|
94
|
-
resolve();
|
|
95
|
-
}
|
|
96
|
-
else {
|
|
97
|
-
reject(new Error(`Unexpected handshake from embedding subprocess: ${line}`));
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
catch (err) {
|
|
101
|
-
reject(new Error(`Failed to parse embedding handshake: ${line}`));
|
|
102
|
-
}
|
|
103
|
-
};
|
|
104
|
-
// Read exactly one line for the handshake, then re-wire for embed requests.
|
|
105
|
-
this.rl.once('line', onLine);
|
|
106
|
-
// If the process dies before the handshake, reject.
|
|
107
|
-
this.proc.once('exit', (code) => {
|
|
108
|
-
if (!this.initialized) {
|
|
109
|
-
reject(new Error(`Embedding subprocess exited with code ${code} before handshake`));
|
|
110
|
-
}
|
|
111
|
-
});
|
|
115
|
+
const device = this.detectDevice();
|
|
116
|
+
this._device = device;
|
|
117
|
+
const { pipeline } = await import('@huggingface/transformers');
|
|
118
|
+
this._pipeline = await pipeline('feature-extraction', this.modelName, {
|
|
119
|
+
device: device,
|
|
120
|
+
...(this.dtype !== 'fp32' && { dtype: this.dtype }),
|
|
112
121
|
});
|
|
122
|
+
// Probe the model to detect dimensionality.
|
|
123
|
+
const probe = await this._pipeline('dimensionality probe', { pooling: 'mean', normalize: true });
|
|
124
|
+
const probeList = probe.tolist();
|
|
125
|
+
this._dims = probeList[0].length;
|
|
126
|
+
this.initialized = true;
|
|
113
127
|
}
|
|
114
128
|
async embed(texts) {
|
|
115
129
|
if (texts.length === 0)
|
|
116
130
|
return [];
|
|
117
|
-
if (!this.initialized) {
|
|
131
|
+
if (!this.initialized || !this._pipeline) {
|
|
118
132
|
throw new Error('EmbeddingProvider not initialised — call init() first');
|
|
119
133
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
this.proc.stdin.write(JSON.stringify({ texts }) + '\n');
|
|
123
|
-
});
|
|
134
|
+
const output = await this._pipeline(texts, { pooling: 'mean', normalize: true });
|
|
135
|
+
return output.tolist();
|
|
124
136
|
}
|
|
125
137
|
async dispose() {
|
|
126
|
-
if (
|
|
127
|
-
return;
|
|
128
|
-
const proc = this.proc;
|
|
129
|
-
// Reject all pending requests before tearing down.
|
|
130
|
-
const pending = this.pendingRequests.splice(0);
|
|
131
|
-
const err = new Error('EmbeddingProvider disposed');
|
|
132
|
-
for (const r of pending)
|
|
133
|
-
r.reject(err);
|
|
134
|
-
this.proc = null;
|
|
135
|
-
this.rl?.close();
|
|
136
|
-
this.rl = null;
|
|
137
|
-
proc.stdin?.end();
|
|
138
|
-
await new Promise(resolve => {
|
|
139
|
-
const timeout = setTimeout(() => { proc.kill(); resolve(); }, 5_000);
|
|
140
|
-
proc.once('close', () => { clearTimeout(timeout); resolve(); });
|
|
141
|
-
});
|
|
142
|
-
}
|
|
143
|
-
/** Spawn the Python subprocess and wire up the readline interface. */
|
|
144
|
-
spawnProcess() {
|
|
145
|
-
if (this.proc)
|
|
146
|
-
return;
|
|
147
|
-
this.proc = spawn(this.pythonBin, ['-c', BOOTSTRAP_SCRIPT, this.modelName], {
|
|
148
|
-
stdio: ['pipe', 'pipe', 'inherit'],
|
|
149
|
-
});
|
|
150
|
-
trackProcess(this.proc);
|
|
151
|
-
this.rl = readline.createInterface({ input: this.proc.stdout });
|
|
152
|
-
// After init(), all subsequent lines are embed responses.
|
|
153
|
-
this.rl.on('line', (line) => {
|
|
154
|
-
// Skip lines until init handshake is done (handled by init's once listener).
|
|
155
|
-
if (!this.initialized)
|
|
156
|
-
return;
|
|
157
|
-
const pending = this.pendingRequests.shift();
|
|
158
|
-
if (!pending)
|
|
159
|
-
return;
|
|
138
|
+
if (this._pipeline) {
|
|
160
139
|
try {
|
|
161
|
-
|
|
162
|
-
pending.resolve(embeddings);
|
|
163
|
-
}
|
|
164
|
-
catch (err) {
|
|
165
|
-
pending.reject(err);
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
this.proc.on('error', (err) => {
|
|
169
|
-
const reqs = this.pendingRequests.splice(0);
|
|
170
|
-
this.proc = null;
|
|
171
|
-
this.rl = null;
|
|
172
|
-
for (const r of reqs)
|
|
173
|
-
r.reject(err);
|
|
174
|
-
});
|
|
175
|
-
this.proc.on('exit', (code) => {
|
|
176
|
-
if (code !== 0 && this.pendingRequests.length > 0) {
|
|
177
|
-
const err = new Error(`Embedding subprocess exited with code ${code}`);
|
|
178
|
-
const reqs = this.pendingRequests.splice(0);
|
|
179
|
-
for (const r of reqs)
|
|
180
|
-
r.reject(err);
|
|
140
|
+
await this._pipeline.dispose?.();
|
|
181
141
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
142
|
+
catch { /* best-effort cleanup */ }
|
|
143
|
+
this._pipeline = null;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Detect the best available ONNX execution provider.
|
|
148
|
+
* Respects `LORE_EMBED_DEVICE` env var for explicit override.
|
|
149
|
+
*/
|
|
150
|
+
detectDevice() {
|
|
151
|
+
const envDevice = process.env['LORE_EMBED_DEVICE'];
|
|
152
|
+
if (envDevice)
|
|
153
|
+
return envDevice;
|
|
154
|
+
// CoreML is available on Apple Silicon via onnxruntime-node
|
|
155
|
+
if (process.platform === 'darwin' && process.arch === 'arm64') {
|
|
156
|
+
return 'coreml';
|
|
157
|
+
}
|
|
158
|
+
return 'cpu';
|
|
187
159
|
}
|
|
188
160
|
}
|
|
189
|
-
// ───
|
|
190
|
-
/** Default embedding model used when no model is explicitly specified. */
|
|
191
|
-
export const DEFAULT_EMBEDDING_MODEL = 'Qwen/Qwen3-Embedding-4B';
|
|
161
|
+
// ─── Lazy embedding provider ──────────────────────────────────────────────────
|
|
192
162
|
/**
|
|
193
|
-
*
|
|
194
|
-
*
|
|
163
|
+
* Wraps a `TransformersJsProvider` with deferred initialisation.
|
|
164
|
+
*
|
|
165
|
+
* The model is only downloaded and loaded on the first call to `embed()` or
|
|
166
|
+
* an explicit `init()`. This allows `lore index` to complete faster when
|
|
167
|
+
* embeddings are configured but the user primarily uses structural search.
|
|
195
168
|
*
|
|
196
|
-
*
|
|
197
|
-
*
|
|
169
|
+
* The MCP server can pass a `LazyEmbeddingProvider` so semantic search
|
|
170
|
+
* triggers model loading on-demand rather than at startup.
|
|
198
171
|
*/
|
|
199
|
-
export
|
|
200
|
-
|
|
201
|
-
|
|
172
|
+
export class LazyEmbeddingProvider {
|
|
173
|
+
inner;
|
|
174
|
+
_initPromise = null;
|
|
175
|
+
constructor(modelName, dtype) {
|
|
176
|
+
this.inner = new TransformersJsProvider(modelName, dtype);
|
|
177
|
+
}
|
|
178
|
+
get modelName() { return this.inner.modelName; }
|
|
179
|
+
get dims() { return this.inner.dims; }
|
|
180
|
+
async init() {
|
|
181
|
+
if (!this._initPromise) {
|
|
182
|
+
this._initPromise = this.inner.init();
|
|
183
|
+
}
|
|
184
|
+
return this._initPromise;
|
|
185
|
+
}
|
|
186
|
+
async embed(texts) {
|
|
187
|
+
if (texts.length === 0)
|
|
188
|
+
return [];
|
|
189
|
+
await this.init();
|
|
190
|
+
return this.inner.embed(texts);
|
|
191
|
+
}
|
|
192
|
+
async dispose() {
|
|
193
|
+
if (this._initPromise) {
|
|
194
|
+
try {
|
|
195
|
+
await this._initPromise;
|
|
196
|
+
}
|
|
197
|
+
catch { /* init may have failed */ }
|
|
198
|
+
}
|
|
199
|
+
return this.inner.dispose();
|
|
200
|
+
}
|
|
202
201
|
}
|
|
202
|
+
// ─── Default model ────────────────────────────────────────────────────────────
|
|
203
|
+
/**
|
|
204
|
+
* Default embedding model — `Qwen/Qwen3-Embedding-0.6B` is a compact,
|
|
205
|
+
* 1024-dim model with strong multilingual and code understanding.
|
|
206
|
+
*/
|
|
207
|
+
export const DEFAULT_EMBEDDING_MODEL = 'Qwen/Qwen3-Embedding-0.6B';
|
|
203
208
|
//# sourceMappingURL=embedder.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/indexer/embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../src/indexer/embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA8BzC,MAAM,UAAU,4BAA4B,CAAC,KAA+B;IAC1E,MAAM,KAAK,GAAG;QACZ,KAAK,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE;QAC7B,KAAK,CAAC,qBAAqB,EAAE,IAAI,EAAE,IAAI,EAAE;QACzC,KAAK,CAAC,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE;QACtC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE;KAClB,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACxC,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,mFAAmF;AACnF,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACzD,CAAC;AAED,iFAAiF;AAEjF;;;GAGG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC,sFAAsF;AACtF,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAI,KAAU,EAAE,OAA4B;IACzE,MAAM,OAAO,GAAU,EAAE,CAAC;IAC1B,IAAI,OAAO,GAAQ,EAAE,CAAC;IACtB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC7C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,aAAa,GAAG,MAAM,GAAG,gBAAgB,IAAI,OAAO,CAAC,MAAM,IAAI,eAAe,CAAC,EAAE,CAAC;YAC3G,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,GAAG,EAAE,CAAC;YACb,aAAa,GAAG,CAAC,CAAC;QACpB,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,aAAa,IAAI,MAAM,CAAC;IAC1B,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC9C,OAAO,OAAO,CAAC;AACjB,CAAC;AAYD;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,sBAAsB;IACxB,SAAS,CAAS;IAClB,KAAK,CAAY;IAClB,KAAK,GAAkB,IAAI,CAAC;IAC5B,OAAO,GAAkB,IAAI,CAAC;IAC9B,SAAS,GAAqC,IAAI,CAAC;IACnD,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,KAAiB;QAC9C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,KAAK,GAAG,KAAK,IAAK,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAA2B,IAAI,MAAM,CAAC;IAC7F,CAAC;IAED,gEAAgE;IAChE,IAAI,IAAI;QACN,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAC3E,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,OAAO,IAAI,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACnC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;QAEtB,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QAC/D,IAAI,CAAC,SAAS,GAAG,MAAO,QAAuB,CAC7C,oBAAoB,EACpB,IAAI,CAAC,SAAS,EACd;YACE,MAAM,EAAE,MAAe;YACvB,GAAG,CAAC,IAAI,CAAC,KAAK,KAAK,MAAM,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;SACpD,CACF,CAAC;QAEF,4CAA4C;QAC5C,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,sBAAsB,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACjG,MAAM,SAAS,GAAI,KAAkC,CAAC,MAAM,EAAE,CAAC;QAC/D,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC,MAAM,CAAC;QAClC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAClC,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAC3E,CAAC;QACD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACjF,OAAQ,MAAmC,CAAC,MAAM,EAAE,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,IAAI,CAAC;gBACH,MAAO,IAAI,CAAC,SAA0D,CAAC,OAAO,EAAE,EAAE,CAAC;YACrF,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YACrC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,YAAY;QAClB,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QACnD,IAAI,SAAS;YAAE,OAAO,SAAS,CAAC;QAEhC,4DAA4D;QAC5D,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC9D,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAED,iFAAiF;AAEjF;;;;;;;;;GASG;AACH,MAAM,OAAO,qBAAqB;IACf,KAAK,CAAyB;IACvC,YAAY,GAAyB,IAAI,CAAC;IAElD,YAAY,SAAiB,EAAE,KAAiB;QAC9C,IAAI,CAAC,KAAK,GAAG,IAAI,sBAAsB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,SAAS,KAAa,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;IAExD,IAAI,IAAI,KAAa,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;IAE9C,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QACxC,CAAC;QACD,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAClC,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC;gBAAC,MAAM,IAAI,CAAC,YAAY,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,0BAA0B,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;IAC9B,CAAC;CACF;AAED,iFAAiF;AAEjF;;;GAGG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,2BAA2B,CAAC"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module indexer/graph-analysis
|
|
3
|
+
*
|
|
4
|
+
* Higher-level graph analysis primitives operating on the SQLite
|
|
5
|
+
* knowledge-base:
|
|
6
|
+
*
|
|
7
|
+
* - `detectSymbolCycles(db, opts)` — Tarjan's SCC on the symbol adjacency
|
|
8
|
+
* graph (call_refs, type_refs, or both).
|
|
9
|
+
* - `findConnectedComponents(db, opts)` — union-find connected components
|
|
10
|
+
* at file or symbol scope.
|
|
11
|
+
* - `clusterSymbols(db, opts)` — partitions the call graph into bounded-
|
|
12
|
+
* size coherent chunks via SCC contraction, same-file merge, greedy
|
|
13
|
+
* edge-weight consolidation, and affinity folding.
|
|
14
|
+
* - `buildCodebaseSummary(db, opts)` — condensed dependency summary with
|
|
15
|
+
* per-module files, symbol counts, line spans, and SCCs.
|
|
16
|
+
*/
|
|
17
|
+
import type { Database } from './db.js';
|
|
18
|
+
import type { ResolutionMethod } from './resolution-method.js';
|
|
19
|
+
export type EdgeKind = 'call' | 'type' | 'both';
|
|
20
|
+
export interface GraphAnalysisOptions {
|
|
21
|
+
/** Which edge kinds to traverse. Default: 'both'. */
|
|
22
|
+
edgeKinds?: EdgeKind;
|
|
23
|
+
/** Resolution methods to include. Default: RESOLVED_METHODS. */
|
|
24
|
+
methods?: ResolutionMethod[];
|
|
25
|
+
/** Branch filter. */
|
|
26
|
+
branch?: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Detects strongly connected components (mutual recursion, circular type
|
|
30
|
+
* dependencies) in the **symbol** adjacency graph using Tarjan's algorithm.
|
|
31
|
+
*
|
|
32
|
+
* Returns arrays of symbol IDs where each SCC has 2+ members (or a single
|
|
33
|
+
* member with a self-edge).
|
|
34
|
+
*/
|
|
35
|
+
export declare function detectSymbolCycles(db: Database.Database, options?: GraphAnalysisOptions): number[][];
|
|
36
|
+
export interface ConnectedComponentsOptions extends GraphAnalysisOptions {
|
|
37
|
+
/** Scope of the analysis. Default: 'symbol'. */
|
|
38
|
+
scope?: 'file' | 'symbol';
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Finds connected components in the **undirected** graph of files or symbols
|
|
42
|
+
* using a union-find (disjoint set) data structure.
|
|
43
|
+
*
|
|
44
|
+
* Returns arrays of IDs where each component has 2+ members.
|
|
45
|
+
*/
|
|
46
|
+
export declare function findConnectedComponents(db: Database.Database, options?: ConnectedComponentsOptions): number[][];
|
|
47
|
+
export interface ClusterOptions extends GraphAnalysisOptions {
|
|
48
|
+
/** Maximum total line span per cluster. Default: 500. */
|
|
49
|
+
maxLinesPerCluster?: number;
|
|
50
|
+
}
|
|
51
|
+
export interface SymbolCluster {
|
|
52
|
+
/** Cluster index (0-based). */
|
|
53
|
+
id: number;
|
|
54
|
+
/** Symbol IDs belonging to this cluster. */
|
|
55
|
+
symbolIds: number[];
|
|
56
|
+
/** Total line count across all symbols. */
|
|
57
|
+
totalLines: number;
|
|
58
|
+
/** File IDs that have at least one symbol in this cluster. */
|
|
59
|
+
fileIds: number[];
|
|
60
|
+
/** Number of internal edges (edges within the cluster). */
|
|
61
|
+
internalEdges: number;
|
|
62
|
+
/** Number of external edges (edges crossing cluster boundaries). */
|
|
63
|
+
externalEdges: number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Partition the symbol call graph into bounded-size coherent clusters.
|
|
67
|
+
*
|
|
68
|
+
* Algorithm:
|
|
69
|
+
* 1. Contract SCCs (mutually dependent symbols → same cluster)
|
|
70
|
+
* 2. Merge same-file symbols into one cluster per file
|
|
71
|
+
* 3. Greedy merge by edge weight (respecting maxLines)
|
|
72
|
+
* 4. Fold undersized clusters into their heaviest-edge neighbor
|
|
73
|
+
*/
|
|
74
|
+
export declare function clusterSymbols(db: Database.Database, options?: ClusterOptions): SymbolCluster[];
|
|
75
|
+
export interface CodebaseSummaryOptions extends GraphAnalysisOptions {
|
|
76
|
+
/** Maximum lines per module for clustering. Default: 500. */
|
|
77
|
+
maxLinesPerModule?: number;
|
|
78
|
+
}
|
|
79
|
+
export interface ModuleSummary {
|
|
80
|
+
/** Module index. */
|
|
81
|
+
id: number;
|
|
82
|
+
/** File paths in this module. */
|
|
83
|
+
files: string[];
|
|
84
|
+
/** Total symbol count. */
|
|
85
|
+
symbolCount: number;
|
|
86
|
+
/** Total line span. */
|
|
87
|
+
totalLines: number;
|
|
88
|
+
/** IDs of modules this module depends on. */
|
|
89
|
+
dependsOn: number[];
|
|
90
|
+
/** IDs of modules that depend on this module. */
|
|
91
|
+
dependedOnBy: number[];
|
|
92
|
+
}
|
|
93
|
+
export interface CodebaseSummary {
|
|
94
|
+
/** Total indexed files. */
|
|
95
|
+
totalFiles: number;
|
|
96
|
+
/** Total indexed symbols. */
|
|
97
|
+
totalSymbols: number;
|
|
98
|
+
/** Total resolved edges. */
|
|
99
|
+
totalEdges: number;
|
|
100
|
+
/** Modules grouped by clustering. */
|
|
101
|
+
modules: ModuleSummary[];
|
|
102
|
+
/** Connected component groups (module IDs). */
|
|
103
|
+
connectedComponents: number[][];
|
|
104
|
+
/** Strongly connected component groups (module IDs). */
|
|
105
|
+
cyclicGroups: number[][];
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Produces a condensed dependency summary of the codebase — the "30-second
|
|
109
|
+
* architecture overview."
|
|
110
|
+
*
|
|
111
|
+
* Combines symbol clustering with inter-module edge analysis and SCC/CC
|
|
112
|
+
* detection at the module level.
|
|
113
|
+
*/
|
|
114
|
+
export declare function buildCodebaseSummary(db: Database.Database, options?: CodebaseSummaryOptions): CodebaseSummary;
|
|
115
|
+
//# sourceMappingURL=graph-analysis.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"graph-analysis.d.ts","sourceRoot":"","sources":["../../src/indexer/graph-analysis.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAK/D,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhD,MAAM,WAAW,oBAAoB;IACnC,qDAAqD;IACrD,SAAS,CAAC,EAAE,QAAQ,CAAC;IACrB,gEAAgE;IAChE,OAAO,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAC7B,qBAAqB;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAqED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,GAAE,oBAAyB,GACjC,MAAM,EAAE,EAAE,CAoEZ;AAID,MAAM,WAAW,0BAA2B,SAAQ,oBAAoB;IACtE,gDAAgD;IAChD,KAAK,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC;CAC3B;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,CACrC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,GAAE,0BAA+B,GACvC,MAAM,EAAE,EAAE,CAOZ;AAkHD,MAAM,WAAW,cAAe,SAAQ,oBAAoB;IAC1D,yDAAyD;IACzD,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,4CAA4C;IAC5C,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,2CAA2C;IAC3C,UAAU,EAAE,MAAM,CAAC;IACnB,8DAA8D;IAC9D,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,2DAA2D;IAC3D,aAAa,EAAE,MAAM,CAAC;IACtB,oEAAoE;IACpE,aAAa,EAAE,MAAM,CAAC;CACvB;AAQD;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAC5B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,GAAE,cAAmB,GAC3B,aAAa,EAAE,CA+LjB;AAmBD,MAAM,WAAW,sBAAuB,SAAQ,oBAAoB;IAClE,6DAA6D;IAC7D,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,aAAa;IAC5B,oBAAoB;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,iCAAiC;IACjC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,0BAA0B;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,6CAA6C;IAC7C,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,iDAAiD;IACjD,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,2BAA2B;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,4BAA4B;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,+CAA+C;IAC/C,mBAAmB,EAAE,MAAM,EAAE,EAAE,CAAC;IAChC,wDAAwD;IACxD,YAAY,EAAE,MAAM,EAAE,EAAE,CAAC;CAC1B;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAClC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,GAAE,sBAA2B,GACnC,eAAe,CAqHjB"}
|