@danielblomma/cortex-mcp 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +203 -0
  2. package/bin/cortex.mjs +621 -0
  3. package/docs/MCP_MARKETPLACE.md +160 -0
  4. package/package.json +42 -0
  5. package/scaffold/.context/config.yaml +21 -0
  6. package/scaffold/.context/ontology.cypher +63 -0
  7. package/scaffold/.context/rules.yaml +25 -0
  8. package/scaffold/.githooks/_cortex-update-runner.sh +58 -0
  9. package/scaffold/.githooks/post-checkout +22 -0
  10. package/scaffold/.githooks/post-merge +14 -0
  11. package/scaffold/docs/architecture.md +22 -0
  12. package/scaffold/mcp/package-lock.json +2623 -0
  13. package/scaffold/mcp/package.json +29 -0
  14. package/scaffold/mcp/src/embed.ts +416 -0
  15. package/scaffold/mcp/src/embeddings.ts +192 -0
  16. package/scaffold/mcp/src/graph.ts +666 -0
  17. package/scaffold/mcp/src/loadGraph.ts +597 -0
  18. package/scaffold/mcp/src/paths.ts +33 -0
  19. package/scaffold/mcp/src/search.ts +412 -0
  20. package/scaffold/mcp/src/server.ts +98 -0
  21. package/scaffold/mcp/src/types.ts +109 -0
  22. package/scaffold/mcp/tests/server.test.mjs +60 -0
  23. package/scaffold/mcp/tsconfig.json +13 -0
  24. package/scaffold/scripts/bootstrap.sh +57 -0
  25. package/scaffold/scripts/capture-note.sh +55 -0
  26. package/scaffold/scripts/context.sh +109 -0
  27. package/scaffold/scripts/embed.sh +15 -0
  28. package/scaffold/scripts/ingest.mjs +1118 -0
  29. package/scaffold/scripts/ingest.sh +20 -0
  30. package/scaffold/scripts/install-git-hooks.sh +21 -0
  31. package/scaffold/scripts/load-kuzu.sh +6 -0
  32. package/scaffold/scripts/load-ryu.sh +18 -0
  33. package/scaffold/scripts/parsers/javascript.mjs +390 -0
  34. package/scaffold/scripts/parsers/package-lock.json +51 -0
  35. package/scaffold/scripts/parsers/package.json +17 -0
  36. package/scaffold/scripts/plan-state-engine.cjs +310 -0
  37. package/scaffold/scripts/plan-state.sh +71 -0
  38. package/scaffold/scripts/refresh.sh +9 -0
  39. package/scaffold/scripts/status.sh +282 -0
  40. package/scaffold/scripts/update-context.sh +18 -0
  41. package/scaffold/scripts/watch.sh +374 -0
@@ -0,0 +1,29 @@
1
+ {
2
+ "name": "cortex-mcp",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "build": "tsc -p tsconfig.json",
8
+ "embed": "npm run build --silent && node dist/embed.js",
9
+ "graph:load": "npm run build --silent && node dist/loadGraph.js",
10
+ "dev": "node --loader ts-node/esm src/server.ts",
11
+ "start": "node dist/server.js",
12
+ "test": "npm run build --silent && node --test tests/*.test.mjs"
13
+ },
14
+ "dependencies": {
15
+ "@modelcontextprotocol/sdk": "^1.27.1",
16
+ "@xenova/transformers": "^2.17.2",
17
+ "ryugraph": "^25.9.1",
18
+ "zod": "^3.24.1"
19
+ },
20
+ "overrides": {
21
+ "cmake-js": "^8.0.0",
22
+ "tar": "^7.5.9"
23
+ },
24
+ "devDependencies": {
25
+ "@types/node": "^22.10.1",
26
+ "ts-node": "^10.9.2",
27
+ "typescript": "^5.7.2"
28
+ }
29
+ }
@@ -0,0 +1,416 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { env, pipeline } from "@xenova/transformers";
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = path.dirname(__filename);
9
+ const REPO_ROOT = path.resolve(__dirname, "../..");
10
+ const CONTEXT_DIR = path.join(REPO_ROOT, ".context");
11
+ const CACHE_DIR = path.join(CONTEXT_DIR, "cache");
12
+ const EMBEDDINGS_DIR = path.join(CONTEXT_DIR, "embeddings");
13
+ const EMBEDDINGS_PATH = path.join(EMBEDDINGS_DIR, "entities.jsonl");
14
+ const EMBEDDINGS_MANIFEST_PATH = path.join(EMBEDDINGS_DIR, "manifest.json");
15
+ const MODEL_CACHE_DIR = path.join(EMBEDDINGS_DIR, "models");
16
+
17
+ const DEFAULT_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
18
+ const DEFAULT_MAX_TEXT_CHARS = 7000;
19
+
20
+ type JsonValue = string | number | boolean | null | JsonObject | JsonValue[];
21
+ type JsonObject = { [key: string]: JsonValue };
22
+
23
+ type FileEntity = {
24
+ id: string;
25
+ type: "File";
26
+ kind: string;
27
+ label: string;
28
+ path: string;
29
+ status: string;
30
+ source_of_truth: boolean;
31
+ trust_level: number;
32
+ updated_at: string;
33
+ text: string;
34
+ signature: string;
35
+ };
36
+
37
+ type RuleEntity = {
38
+ id: string;
39
+ type: "Rule";
40
+ kind: "RULE";
41
+ label: string;
42
+ path: string;
43
+ status: string;
44
+ source_of_truth: boolean;
45
+ trust_level: number;
46
+ updated_at: string;
47
+ text: string;
48
+ signature: string;
49
+ };
50
+
51
+ type AdrEntity = {
52
+ id: string;
53
+ type: "ADR";
54
+ kind: "ADR";
55
+ label: string;
56
+ path: string;
57
+ status: string;
58
+ source_of_truth: boolean;
59
+ trust_level: number;
60
+ updated_at: string;
61
+ text: string;
62
+ signature: string;
63
+ };
64
+
65
+ type SearchEntity = FileEntity | RuleEntity | AdrEntity;
66
+
67
+ type EmbeddingRecord = {
68
+ id: string;
69
+ entity_type: string;
70
+ kind: string;
71
+ label: string;
72
+ path: string;
73
+ status: string;
74
+ source_of_truth: boolean;
75
+ trust_level: number;
76
+ updated_at: string;
77
+ signature: string;
78
+ model: string;
79
+ dimensions: number;
80
+ vector: number[];
81
+ };
82
+
83
+ function parseArgs(argv: string[]): { mode: "full" | "changed" } {
84
+ const args = new Set(argv.slice(2));
85
+ return {
86
+ mode: args.has("--changed") ? "changed" : "full"
87
+ };
88
+ }
89
+
90
+ function asString(value: JsonValue | undefined, fallback = ""): string {
91
+ return typeof value === "string" ? value : fallback;
92
+ }
93
+
94
+ function asNumber(value: JsonValue | undefined, fallback = 0): number {
95
+ return typeof value === "number" && Number.isFinite(value) ? value : fallback;
96
+ }
97
+
98
+ function asBoolean(value: JsonValue | undefined, fallback = false): boolean {
99
+ return typeof value === "boolean" ? value : fallback;
100
+ }
101
+
102
+ function hashText(value: string): string {
103
+ return crypto.createHash("sha256").update(value).digest("hex");
104
+ }
105
+
106
+ function normalizeText(value: string): string {
107
+ return value.replace(/\s+/g, " ").trim();
108
+ }
109
+
110
+ function clampText(value: string, maxChars: number): string {
111
+ return value.slice(0, maxChars);
112
+ }
113
+
114
+ function readJsonl(filePath: string): JsonObject[] {
115
+ if (!fs.existsSync(filePath)) {
116
+ return [];
117
+ }
118
+
119
+ return fs
120
+ .readFileSync(filePath, "utf8")
121
+ .split(/\r?\n/)
122
+ .map((line) => line.trim())
123
+ .filter(Boolean)
124
+ .map((line) => {
125
+ try {
126
+ return JSON.parse(line) as JsonObject;
127
+ } catch {
128
+ return null;
129
+ }
130
+ })
131
+ .filter((value): value is JsonObject => value !== null);
132
+ }
133
+
134
+ function writeJsonl(filePath: string, records: EmbeddingRecord[]): void {
135
+ const body = records.map((record) => JSON.stringify(record)).join("\n");
136
+ fs.writeFileSync(filePath, body ? `${body}\n` : "", "utf8");
137
+ }
138
+
139
+ function ensureRequiredFiles(): void {
140
+ const required = [
141
+ path.join(CACHE_DIR, "documents.jsonl"),
142
+ path.join(CACHE_DIR, "entities.rule.jsonl"),
143
+ path.join(CACHE_DIR, "entities.adr.jsonl")
144
+ ];
145
+
146
+ for (const filePath of required) {
147
+ if (!fs.existsSync(filePath)) {
148
+ throw new Error(`Missing required cache file: ${filePath}`);
149
+ }
150
+ }
151
+ }
152
+
153
+ function parseFileEntities(raw: JsonObject[], maxChars: number): FileEntity[] {
154
+ return raw
155
+ .map((item) => {
156
+ const id = asString(item.id);
157
+ const filePath = asString(item.path);
158
+ if (!id || !filePath) {
159
+ return null;
160
+ }
161
+
162
+ const content = asString(item.content);
163
+ const excerpt = asString(item.excerpt);
164
+ const updatedAt = asString(item.updated_at);
165
+ const checksum = asString(item.checksum, hashText(content));
166
+ const text = clampText(`${filePath}\n${excerpt}\n${content}`, maxChars);
167
+
168
+ return {
169
+ id,
170
+ type: "File" as const,
171
+ kind: asString(item.kind, "DOC"),
172
+ label: filePath,
173
+ path: filePath,
174
+ status: asString(item.status, "active"),
175
+ source_of_truth: asBoolean(item.source_of_truth, false),
176
+ trust_level: asNumber(item.trust_level, 50),
177
+ updated_at: updatedAt,
178
+ text,
179
+ signature: hashText(`file|${checksum}|${updatedAt}|${hashText(text)}`)
180
+ };
181
+ })
182
+ .filter((value): value is FileEntity => value !== null);
183
+ }
184
+
185
+ function parseRuleEntities(raw: JsonObject[], maxChars: number): RuleEntity[] {
186
+ return raw
187
+ .map((item) => {
188
+ const id = asString(item.id);
189
+ if (!id) {
190
+ return null;
191
+ }
192
+
193
+ const title = asString(item.title, id);
194
+ const body = asString(item.body);
195
+ const updatedAt = asString(item.updated_at, "");
196
+ const text = clampText(`${title}\n${body}`, maxChars);
197
+
198
+ return {
199
+ id,
200
+ type: "Rule" as const,
201
+ kind: "RULE" as const,
202
+ label: title,
203
+ path: "",
204
+ status: asString(item.status, "active"),
205
+ source_of_truth: asBoolean(item.source_of_truth, true),
206
+ trust_level: asNumber(item.trust_level, 95),
207
+ updated_at: updatedAt,
208
+ text,
209
+ signature: hashText(`rule|${id}|${updatedAt}|${hashText(text)}`)
210
+ };
211
+ })
212
+ .filter((value): value is RuleEntity => value !== null);
213
+ }
214
+
215
+ function parseAdrEntities(raw: JsonObject[], maxChars: number): AdrEntity[] {
216
+ return raw
217
+ .map((item) => {
218
+ const id = asString(item.id);
219
+ if (!id) {
220
+ return null;
221
+ }
222
+
223
+ const title = asString(item.title, id);
224
+ const body = asString(item.body);
225
+ const adrPath = asString(item.path);
226
+ const decisionDate = asString(item.decision_date, "");
227
+ const text = clampText(`${adrPath}\n${title}\n${body}`, maxChars);
228
+
229
+ return {
230
+ id,
231
+ type: "ADR" as const,
232
+ kind: "ADR" as const,
233
+ label: title,
234
+ path: adrPath,
235
+ status: asString(item.status, "active"),
236
+ source_of_truth: asBoolean(item.source_of_truth, true),
237
+ trust_level: asNumber(item.trust_level, 95),
238
+ updated_at: decisionDate,
239
+ text,
240
+ signature: hashText(`adr|${id}|${decisionDate}|${hashText(text)}`)
241
+ };
242
+ })
243
+ .filter((value): value is AdrEntity => value !== null);
244
+ }
245
+
246
+ function parseExistingEmbeddings(raw: JsonObject[], modelId: string): Map<string, EmbeddingRecord> {
247
+ const index = new Map<string, EmbeddingRecord>();
248
+
249
+ for (const item of raw) {
250
+ const id = asString(item.id);
251
+ if (!id) continue;
252
+
253
+ const vectorRaw = item.vector;
254
+ if (!Array.isArray(vectorRaw)) continue;
255
+
256
+ const vector = vectorRaw
257
+ .map((value) => (typeof value === "number" && Number.isFinite(value) ? value : null))
258
+ .filter((value): value is number => value !== null);
259
+
260
+ if (vector.length === 0) continue;
261
+ const model = asString(item.model);
262
+ if (model && model !== modelId) continue;
263
+
264
+ index.set(id, {
265
+ id,
266
+ entity_type: asString(item.entity_type, "Unknown"),
267
+ kind: asString(item.kind, "DOC"),
268
+ label: asString(item.label, id),
269
+ path: asString(item.path),
270
+ status: asString(item.status, "active"),
271
+ source_of_truth: asBoolean(item.source_of_truth, false),
272
+ trust_level: asNumber(item.trust_level, 50),
273
+ updated_at: asString(item.updated_at),
274
+ signature: asString(item.signature),
275
+ model: modelId,
276
+ dimensions: asNumber(item.dimensions, vector.length),
277
+ vector
278
+ });
279
+ }
280
+
281
+ return index;
282
+ }
283
+
284
+ function toEmbeddingVector(output: unknown): number[] {
285
+ if (!output || typeof output !== "object") {
286
+ throw new Error("Invalid embedding output type");
287
+ }
288
+
289
+ const data = (output as { data?: unknown }).data;
290
+ if (!data || typeof (data as ArrayLike<number>).length !== "number") {
291
+ throw new Error("Missing embedding data");
292
+ }
293
+
294
+ return Array.from(data as ArrayLike<number>).map((value) => Number(value));
295
+ }
296
+
297
+ function roundVector(values: number[]): number[] {
298
+ return values.map((value) => Number(value.toFixed(6)));
299
+ }
300
+
301
+ async function main(): Promise<void> {
302
+ const { mode } = parseArgs(process.argv);
303
+ ensureRequiredFiles();
304
+
305
+ fs.mkdirSync(EMBEDDINGS_DIR, { recursive: true });
306
+ fs.mkdirSync(MODEL_CACHE_DIR, { recursive: true });
307
+
308
+ const modelId = (process.env.CORTEX_EMBED_MODEL ?? DEFAULT_MODEL_ID).trim() || DEFAULT_MODEL_ID;
309
+ const maxChars = Number(process.env.CORTEX_EMBED_MAX_CHARS ?? DEFAULT_MAX_TEXT_CHARS);
310
+ const maxTextChars = Number.isFinite(maxChars) && maxChars > 0 ? Math.floor(maxChars) : DEFAULT_MAX_TEXT_CHARS;
311
+
312
+ const documents = parseFileEntities(readJsonl(path.join(CACHE_DIR, "documents.jsonl")), maxTextChars);
313
+ const rules = parseRuleEntities(readJsonl(path.join(CACHE_DIR, "entities.rule.jsonl")), maxTextChars);
314
+ const adrs = parseAdrEntities(readJsonl(path.join(CACHE_DIR, "entities.adr.jsonl")), maxTextChars);
315
+ const entities: SearchEntity[] = [...documents, ...rules, ...adrs].sort((a, b) => a.id.localeCompare(b.id));
316
+
317
+ const existing = parseExistingEmbeddings(readJsonl(EMBEDDINGS_PATH), modelId);
318
+
319
+ env.cacheDir = MODEL_CACHE_DIR;
320
+ const extractor = await pipeline("feature-extraction", modelId);
321
+
322
+ let reused = 0;
323
+ let embedded = 0;
324
+ let failed = 0;
325
+ const failures: string[] = [];
326
+ const output: EmbeddingRecord[] = [];
327
+ let dimensions = 0;
328
+
329
+ for (const entity of entities) {
330
+ const previous = existing.get(entity.id);
331
+ if (previous && previous.signature === entity.signature && previous.vector.length > 0) {
332
+ reused += 1;
333
+ dimensions = dimensions || previous.vector.length;
334
+ output.push({
335
+ ...previous,
336
+ entity_type: entity.type,
337
+ kind: entity.kind,
338
+ label: entity.label,
339
+ path: entity.path,
340
+ status: entity.status,
341
+ source_of_truth: entity.source_of_truth,
342
+ trust_level: entity.trust_level,
343
+ updated_at: entity.updated_at,
344
+ signature: entity.signature,
345
+ model: modelId,
346
+ dimensions: previous.vector.length
347
+ });
348
+ continue;
349
+ }
350
+
351
+ try {
352
+ const embeddingOutput = await extractor(normalizeText(entity.text), {
353
+ pooling: "mean",
354
+ normalize: true
355
+ });
356
+ const vector = roundVector(toEmbeddingVector(embeddingOutput));
357
+ if (vector.length === 0) {
358
+ throw new Error("Empty embedding vector");
359
+ }
360
+
361
+ embedded += 1;
362
+ dimensions = dimensions || vector.length;
363
+ output.push({
364
+ id: entity.id,
365
+ entity_type: entity.type,
366
+ kind: entity.kind,
367
+ label: entity.label,
368
+ path: entity.path,
369
+ status: entity.status,
370
+ source_of_truth: entity.source_of_truth,
371
+ trust_level: entity.trust_level,
372
+ updated_at: entity.updated_at,
373
+ signature: entity.signature,
374
+ model: modelId,
375
+ dimensions: vector.length,
376
+ vector
377
+ });
378
+ } catch (error) {
379
+ failed += 1;
380
+ failures.push(
381
+ `${entity.id}: ${error instanceof Error ? error.message : "embedding generation failed"}`
382
+ );
383
+ }
384
+ }
385
+
386
+ writeJsonl(EMBEDDINGS_PATH, output);
387
+
388
+ const manifest = {
389
+ generated_at: new Date().toISOString(),
390
+ mode,
391
+ model: modelId,
392
+ dimensions,
393
+ counts: {
394
+ entities: entities.length,
395
+ output: output.length,
396
+ embedded,
397
+ reused,
398
+ failed
399
+ },
400
+ failures: failures.slice(0, 50)
401
+ };
402
+
403
+ fs.writeFileSync(EMBEDDINGS_MANIFEST_PATH, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
404
+
405
+ console.log(`[embed] mode=${mode} model=${modelId} dim=${dimensions}`);
406
+ console.log(
407
+ `[embed] entities=${entities.length} embedded=${embedded} reused=${reused} failed=${failed}`
408
+ );
409
+ console.log(`[embed] wrote ${EMBEDDINGS_PATH}`);
410
+ console.log(`[embed] manifest ${EMBEDDINGS_MANIFEST_PATH}`);
411
+ }
412
+
413
+ main().catch((error) => {
414
+ process.stderr.write(`${error instanceof Error ? error.message : "Embedding error"}\n`);
415
+ process.exit(1);
416
+ });
@@ -0,0 +1,192 @@
1
+ import fs from "node:fs";
2
+ import { env, pipeline } from "@xenova/transformers";
3
+ import { PATHS } from "./paths.js";
4
+ import type { EmbeddingIndex, JsonObject } from "./types.js";
5
+
6
+ const EMBEDDING_INIT_RETRY_INTERVAL_MS = 5000;
7
+
8
+ let embeddingsCacheKey = "";
9
+ let embeddingsCache: EmbeddingIndex = { model: null, vectors: new Map() };
10
+ let embeddingExtractorModel: string | null = null;
11
+ let embeddingExtractorPromise: Promise<unknown | null> | null = null;
12
+ let embeddingLastInitAttemptAt = 0;
13
+ let embeddingRuntimeWarning: string | null = null;
14
+
15
+ function asString(value: unknown, fallback = ""): string {
16
+ return typeof value === "string" ? value : fallback;
17
+ }
18
+
19
+ function readJsonl(filePath: string): JsonObject[] {
20
+ if (!fs.existsSync(filePath)) {
21
+ return [];
22
+ }
23
+
24
+ const raw = fs.readFileSync(filePath, "utf8");
25
+ return raw
26
+ .split(/\r?\n/)
27
+ .map((line) => line.trim())
28
+ .filter(Boolean)
29
+ .map((line) => {
30
+ try {
31
+ return JSON.parse(line) as JsonObject;
32
+ } catch {
33
+ return null;
34
+ }
35
+ })
36
+ .filter((value): value is JsonObject => value !== null);
37
+ }
38
+
39
+ function toVector(output: unknown): number[] | null {
40
+ if (!output || typeof output !== "object") {
41
+ return null;
42
+ }
43
+
44
+ const data = (output as { data?: unknown }).data;
45
+ if (!data || typeof (data as ArrayLike<number>).length !== "number") {
46
+ return null;
47
+ }
48
+
49
+ return Array.from(data as ArrayLike<number>)
50
+ .map((value) => Number(value))
51
+ .filter((value) => Number.isFinite(value));
52
+ }
53
+
54
+ function readFileVersion(filePath: string): string {
55
+ if (!fs.existsSync(filePath)) {
56
+ return "none";
57
+ }
58
+ try {
59
+ const stats = fs.statSync(filePath);
60
+ return `${Math.round(stats.mtimeMs)}:${stats.size}`;
61
+ } catch {
62
+ return "none";
63
+ }
64
+ }
65
+
66
+ function parseEmbeddingIndex(raw: JsonObject[]): EmbeddingIndex {
67
+ const vectors = new Map<string, number[]>();
68
+ let model: string | null = null;
69
+
70
+ for (const item of raw) {
71
+ const id = asString(item.id);
72
+ if (!id) continue;
73
+
74
+ const vectorRaw = item.vector;
75
+ if (!Array.isArray(vectorRaw)) continue;
76
+
77
+ const vector = vectorRaw
78
+ .map((value) => (typeof value === "number" && Number.isFinite(value) ? value : null))
79
+ .filter((value): value is number => value !== null);
80
+
81
+ if (vector.length === 0) continue;
82
+ vectors.set(id, vector);
83
+
84
+ const nextModel = asString(item.model);
85
+ if (nextModel && !model) {
86
+ model = nextModel;
87
+ }
88
+ }
89
+
90
+ return { model, vectors };
91
+ }
92
+
93
+ export function loadEmbeddingIndex(): EmbeddingIndex {
94
+ const key = `${readFileVersion(PATHS.embeddingsManifest)}|${readFileVersion(PATHS.embeddingsEntities)}`;
95
+ if (embeddingsCacheKey === key) {
96
+ return embeddingsCache;
97
+ }
98
+
99
+ if (!fs.existsSync(PATHS.embeddingsEntities)) {
100
+ embeddingsCacheKey = key;
101
+ embeddingsCache = {
102
+ model: null,
103
+ vectors: new Map(),
104
+ warning: "Embedding index missing (run: ./scripts/context.sh embed)"
105
+ };
106
+ return embeddingsCache;
107
+ }
108
+
109
+ const parsed = parseEmbeddingIndex(readJsonl(PATHS.embeddingsEntities));
110
+ embeddingsCacheKey = key;
111
+ embeddingsCache =
112
+ parsed.vectors.size === 0
113
+ ? { ...parsed, warning: "Embedding index is empty; using lexical fallback." }
114
+ : parsed;
115
+ return embeddingsCache;
116
+ }
117
+
118
+ async function getEmbeddingExtractor(modelId: string): Promise<unknown | null> {
119
+ if (!modelId) {
120
+ return null;
121
+ }
122
+
123
+ if (embeddingExtractorModel !== modelId) {
124
+ embeddingExtractorModel = modelId;
125
+ embeddingExtractorPromise = null;
126
+ embeddingLastInitAttemptAt = 0;
127
+ }
128
+
129
+ if (embeddingExtractorPromise) {
130
+ const existing = await embeddingExtractorPromise;
131
+ if (existing) {
132
+ return existing;
133
+ }
134
+
135
+ if (Date.now() - embeddingLastInitAttemptAt < EMBEDDING_INIT_RETRY_INTERVAL_MS) {
136
+ return null;
137
+ }
138
+
139
+ // Previous init failed; allow a fresh retry after cooldown.
140
+ embeddingExtractorPromise = null;
141
+ }
142
+
143
+ if (Date.now() - embeddingLastInitAttemptAt < EMBEDDING_INIT_RETRY_INTERVAL_MS) {
144
+ return null;
145
+ }
146
+
147
+ embeddingLastInitAttemptAt = Date.now();
148
+ embeddingExtractorPromise = (async () => {
149
+ try {
150
+ fs.mkdirSync(PATHS.embeddingsModelCache, { recursive: true });
151
+ env.cacheDir = PATHS.embeddingsModelCache;
152
+ const extractor = await pipeline("feature-extraction", modelId);
153
+ embeddingRuntimeWarning = null;
154
+ return extractor;
155
+ } catch (error) {
156
+ embeddingRuntimeWarning =
157
+ error instanceof Error ? error.message : "Failed to load embedding model";
158
+ return null;
159
+ }
160
+ })();
161
+
162
+ return embeddingExtractorPromise;
163
+ }
164
+
165
+ export async function embedQuery(query: string, modelId: string): Promise<number[] | null> {
166
+ const extractor = await getEmbeddingExtractor(modelId);
167
+ if (!extractor) {
168
+ return null;
169
+ }
170
+
171
+ try {
172
+ const output = await (extractor as (text: string, options: unknown) => Promise<unknown>)(query, {
173
+ pooling: "mean",
174
+ normalize: true
175
+ });
176
+ const vector = toVector(output);
177
+ if (!vector || vector.length === 0) {
178
+ embeddingRuntimeWarning = "Failed to embed query text";
179
+ return null;
180
+ }
181
+
182
+ embeddingRuntimeWarning = null;
183
+ return vector;
184
+ } catch (error) {
185
+ embeddingRuntimeWarning = error instanceof Error ? error.message : "Failed to embed query text";
186
+ return null;
187
+ }
188
+ }
189
+
190
+ export function getEmbeddingRuntimeWarning(): string | null {
191
+ return embeddingRuntimeWarning;
192
+ }