@minhpnq1807/contextos 0.5.42 → 0.5.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,10 +15,6 @@ const IMPORTANT_WORDS = [
15
15
  "luon", "khong bao gio", "bat buoc", "quan trong"
16
16
  ];
17
17
 
18
- const IGNORE_DIRS = new Set([
19
- ".git", ".next", ".turbo", "coverage", "dist", "build", "node_modules", "vendor"
20
- ]);
21
-
22
18
  const SEMANTIC_ALIASES = {
23
19
  duyet: ["moderation", "moderate", "review", "approve", "approval", "approved", "reject", "rejected"],
24
20
  kiem: ["check", "verify", "validation", "validate"],
@@ -49,8 +45,6 @@ const SEMANTIC_ALIASES = {
49
45
  recheck: ["check", "verify", "review"]
50
46
  };
51
47
 
52
- const MODERATION_TOKENS = new Set(["moderation", "moderate", "content-moderation", "approval", "approved", "reject", "rejected", "needs_review"]);
53
-
54
48
  const SYSTEM_USER_RULE_PATTERNS = [
55
49
  /\ball\s+shell\s+commands?\s+must\s+run\s+as\b/i,
56
50
  /\bcommands?\s+must\s+run\s+as\b/i,
@@ -287,50 +281,31 @@ export async function findRelevantFiles({
287
281
  fileEmbeddingTimeoutMs,
288
282
  fileEmbeddingOptions = {}
289
283
  } = {}) {
290
- const rawTaskTokens = new Set(tokenize(task));
291
- if (!rawTaskTokens.size) return [];
292
-
293
- const candidates = [];
294
- walkFiles(cwd, (filePath) => {
295
- const rel = path.relative(cwd, filePath);
296
- const fileTokens = new Set(tokenize(rel));
297
- const match = scoreFileTokens({ rawTaskTokens, fileTokens });
298
- if (match.score > 0) {
299
- candidates.push({
300
- path: rel,
301
- score: match.score,
302
- reasons: match.reasons
303
- });
304
- }
305
- });
284
+ if (!String(task || "").trim()) return [];
306
285
 
307
- const heuristicFiles = candidates
308
- .sort((a, b) => b.score - a.score || a.path.localeCompare(b.path))
309
- .slice(0, Math.max(limit * 2, 6));
310
- const hasHighConfidenceHeuristics =
311
- heuristicFiles.length >= limit &&
312
- Number(heuristicFiles[0]?.score || 0) >= 8;
313
- const embeddingFiles = hasHighConfidenceHeuristics
314
- ? []
315
- : await embeddingFileFinder({
316
- cwd,
317
- task,
318
- dataDir,
319
- timeoutMs: fileEmbeddingTimeoutMs,
320
- embeddingOptions: fileEmbeddingOptions,
321
- limit: Math.max(limit * 2, 6)
322
- });
286
+ const retrievalTask = expandFileRetrievalTask(task);
287
+ const explicitFiles = findExplicitPromptFiles({ cwd, task, limit: Math.max(limit * 2, 6) });
288
+ const manifestFiles = findProjectManifestFiles({ cwd, task, limit: Math.max(limit * 2, 6) });
289
+ const embeddingFiles = await embeddingFileFinder({
290
+ cwd,
291
+ task: retrievalTask,
292
+ dataDir,
293
+ timeoutMs: fileEmbeddingTimeoutMs,
294
+ embeddingOptions: fileEmbeddingOptions,
295
+ limit: Math.max(limit * 2, 6)
296
+ });
323
297
  const importGraphFiles = expandImportGraph({
324
298
  cwd,
325
- seedFiles: mergeLocalFileCandidates([...heuristicFiles, ...embeddingFiles]).slice(0, limit),
299
+ seedFiles: [...explicitFiles, ...manifestFiles, ...embeddingFiles].slice(0, limit),
300
+ dataDir,
326
301
  limit: Math.max(limit * 2, 6)
327
302
  });
328
- const seedFiles = mergeLocalFileCandidates([...heuristicFiles, ...embeddingFiles, ...importGraphFiles])
303
+ const seedFiles = mergeLocalFileCandidates([...explicitFiles, ...manifestFiles, ...embeddingFiles, ...importGraphFiles])
329
304
  .slice(0, Math.max(limit * 3, 9));
330
305
 
331
306
  const graphFiles = findGraphRelevantFiles({
332
307
  cwd,
333
- task,
308
+ task: retrievalTask,
334
309
  rules,
335
310
  seedFiles,
336
311
  limit: Math.max(limit * 2, 6)
@@ -339,70 +314,197 @@ export async function findRelevantFiles({
339
314
  return mergeRelevantFiles({ graphFiles, heuristicFiles: seedFiles, limit });
340
315
  }
341
316
 
342
- function mergeLocalFileCandidates(files) {
343
- const byPath = new Map();
344
- for (const file of files) {
345
- const existing = byPath.get(file.path);
346
- byPath.set(file.path, {
347
- ...existing,
348
- ...file,
349
- score: Number(existing?.score || 0) + Number(file.score || 0),
350
- reasons: [...new Set([...(existing?.reasons || []), ...(file.reasons || [])])],
351
- source: existing?.source === "import-graph" || file.source === "import-graph" ? "import-graph" : file.source
352
- });
353
- }
354
- return [...byPath.values()].sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
317
+ export function findProjectManifestFiles({ cwd = process.cwd(), task = "", limit = 6 } = {}) {
318
+ const tokens = new Set(tokenize(task));
319
+ if (!isManifestRelevantTask(tokens)) return [];
320
+ const manifests = workspacePackageManifests(cwd, tokens);
321
+ return manifests.slice(0, limit).map((filePath, index) => ({
322
+ path: filePath,
323
+ score: manifestScore(filePath, tokens, index),
324
+ source: "manifest",
325
+ reasons: ["project-manifest"]
326
+ }));
355
327
  }
356
328
 
357
- function scoreFileTokens({ rawTaskTokens, fileTokens }) {
358
- let score = 0;
359
- const reasons = new Set();
360
- const hasModerationIntent = rawTaskTokens.has("kiem-duyet") || rawTaskTokens.has("kiemduyet") || rawTaskTokens.has("duyet");
361
- const hasUploadIntent = rawTaskTokens.has("upload") || rawTaskTokens.has("tai-len") || rawTaskTokens.has("tailen");
329
+ function manifestScore(manifest, taskTokens, index) {
330
+ if (manifest === "package.json") return 50;
331
+ const parts = manifest.split(/[\\/]+/).filter(Boolean);
332
+ const workspaceName = parts.at(-2);
333
+ return (taskTokens.has(workspaceName) ? 35 : 20) - index * 0.01;
334
+ }
362
335
 
363
- for (const token of rawTaskTokens) {
364
- if (fileTokens.has(token)) {
365
- score += 3;
366
- reasons.add(token);
367
- }
368
- for (const alias of SEMANTIC_ALIASES[token] || []) {
369
- if (fileTokens.has(alias)) {
370
- score += 2;
371
- reasons.add(`${token}->${alias}`);
372
- }
373
- }
374
- }
336
+ function isManifestRelevantTask(tokens) {
337
+ const runIntent = ["run", "start", "connect", "qr", "install", "build", "script", "scripts"].some((token) => tokens.has(token));
338
+ const projectIntent = ["webapp", "frontend", "expo", "native", "app", "package", "workspace"].some((token) => tokens.has(token));
339
+ return runIntent && projectIntent;
340
+ }
375
341
 
376
- if (hasModerationIntent && [...fileTokens].some((token) => MODERATION_TOKENS.has(token))) {
377
- score += 6;
378
- reasons.add("domain:moderation");
342
+ function workspacePackageManifests(cwd, taskTokens = new Set()) {
343
+ const rootManifest = path.join(cwd, "package.json");
344
+ const manifests = [];
345
+ if (fs.existsSync(rootManifest)) manifests.push("package.json");
346
+ const rootPackage = readJson(rootManifest);
347
+ for (const pattern of workspacePatterns(rootPackage?.workspaces)) {
348
+ for (const manifest of expandWorkspacePattern({ cwd, pattern })) {
349
+ manifests.push(path.relative(cwd, manifest));
350
+ }
379
351
  }
352
+ return [...new Set(manifests)].sort((a, b) => manifestPriority(b, taskTokens) - manifestPriority(a, taskTokens) || a.localeCompare(b));
353
+ }
380
354
 
381
- if (hasUploadIntent && (fileTokens.has("upload") || fileTokens.has("uploaded") || fileTokens.has("resource"))) {
382
- score += 2;
383
- reasons.add("domain:upload");
384
- }
355
+ function manifestPriority(manifest, taskTokens) {
356
+ if (manifest === "package.json") return 100;
357
+ const parts = manifest.split(/[\\/]+/).filter(Boolean);
358
+ const workspaceName = parts.at(-2);
359
+ return taskTokens.has(workspaceName) ? 80 : 0;
360
+ }
385
361
 
386
- return { score, reasons: [...reasons] };
362
+ function workspacePatterns(workspaces) {
363
+ if (Array.isArray(workspaces)) return workspaces.filter((item) => typeof item === "string");
364
+ if (Array.isArray(workspaces?.packages)) return workspaces.packages.filter((item) => typeof item === "string");
365
+ return [];
387
366
  }
388
367
 
389
- function walkFiles(directory, onFile, depth = 0) {
390
- if (depth > 6) return;
368
+ function expandWorkspacePattern({ cwd, pattern }) {
369
+ const normalized = String(pattern || "").replace(/\\/g, "/").replace(/\/+$/g, "");
370
+ if (!normalized || normalized.startsWith("..") || path.isAbsolute(normalized)) return [];
371
+ if (!normalized.includes("*")) {
372
+ const manifest = path.join(cwd, normalized, "package.json");
373
+ return fs.existsSync(manifest) ? [manifest] : [];
374
+ }
375
+ const parts = normalized.split("/");
376
+ const starIndex = parts.indexOf("*");
377
+ if (starIndex < 0 || parts.includes("**")) return [];
378
+ const baseDir = path.join(cwd, ...parts.slice(0, starIndex));
379
+ const suffix = parts.slice(starIndex + 1);
391
380
  let entries = [];
392
381
  try {
393
- entries = fs.readdirSync(directory, { withFileTypes: true });
382
+ entries = fs.readdirSync(baseDir, { withFileTypes: true });
383
+ } catch {
384
+ return [];
385
+ }
386
+ return entries
387
+ .filter((entry) => entry.isDirectory() && !entry.name.startsWith("."))
388
+ .map((entry) => path.join(baseDir, entry.name, ...suffix, "package.json"))
389
+ .filter((manifest) => fs.existsSync(manifest));
390
+ }
391
+
392
+ function readJson(filePath) {
393
+ try {
394
+ return JSON.parse(fs.readFileSync(filePath, "utf8"));
394
395
  } catch {
395
- return;
396
+ return null;
397
+ }
398
+ }
399
+
400
+ function expandFileRetrievalTask(task) {
401
+ const tokens = new Set(tokenize(task));
402
+ const additions = new Set();
403
+ if (hasAny(tokens, ["purchase", "purchased", "buy", "buyer", "seller", "payment", "pay", "checkout"])) {
404
+ addAll(additions, [
405
+ "purchase", "payment", "checkout", "billing", "wallet", "balance", "top up",
406
+ "transaction", "order", "invoice"
407
+ ]);
396
408
  }
397
- for (const entry of entries) {
398
- if (entry.name.startsWith(".") && entry.name !== ".github") {
399
- if (entry.name !== ".codex") continue;
409
+ if (hasAny(tokens, ["wallet", "balance", "topup", "top", "funded"])) {
410
+ addAll(additions, ["wallet", "balance", "top up", "billing"]);
411
+ }
412
+ if (hasAny(tokens, ["library", "access", "permissions", "permission", "resources", "tutorials", "collections"])) {
413
+ addAll(additions, [
414
+ "content access", "content-access-service", "access permissions", "library",
415
+ "resource", "resources", "tutorial", "tutorials", "collections"
416
+ ]);
417
+ }
418
+ if (hasAny(tokens, ["notification", "notifications", "notify", "buyer", "seller"])) {
419
+ addAll(additions, ["notification", "notifications", "notify", "buyer", "seller"]);
420
+ }
421
+ if (!additions.size) return task;
422
+ return `${task}\n\nContextOS retrieval hints: ${[...additions].join(", ")}`;
423
+ }
424
+
425
+ function hasAny(tokens, values) {
426
+ return values.some((value) => tokens.has(value));
427
+ }
428
+
429
+ function addAll(target, values) {
430
+ for (const value of values) target.add(value);
431
+ }
432
+
433
+ export function findExplicitPromptFiles({ cwd = process.cwd(), task = "", limit = 6 } = {}) {
434
+ const candidates = new Set();
435
+ const normalizedTask = String(task || "").replace(/\/\s+/g, "/");
436
+ const matches = normalizedTask.match(/[A-Za-z0-9_.()[\]@~:-]+(?:\/[A-Za-z0-9_.()[\]@~:-]+)+/g) || [];
437
+ for (const match of matches) {
438
+ const cleaned = match.replace(/[),.;:]+$/g, "");
439
+ for (const filePath of resolvePromptPathCandidates({ cwd, promptPath: cleaned })) {
440
+ candidates.add(filePath);
441
+ if (candidates.size >= limit) break;
400
442
  }
401
- const fullPath = path.join(directory, entry.name);
402
- if (entry.isDirectory()) {
403
- if (!IGNORE_DIRS.has(entry.name)) walkFiles(fullPath, onFile, depth + 1);
404
- } else if (entry.isFile()) {
405
- onFile(fullPath);
443
+ if (candidates.size >= limit) break;
444
+ }
445
+ return [...candidates].map((filePath, index) => ({
446
+ path: filePath,
447
+ score: 12 - index * 0.01,
448
+ source: "prompt-path",
449
+ reasons: ["prompt-path"]
450
+ }));
451
+ }
452
+
453
+ function resolvePromptPathCandidates({ cwd, promptPath }) {
454
+ if (!promptPath || promptPath.includes("://")) return [];
455
+ const relative = promptPath.replace(/^\.?\//, "");
456
+ if (relative.startsWith("..")) return [];
457
+ const absolute = path.resolve(cwd, relative);
458
+ if (!isInsidePath(absolute, cwd)) return [];
459
+ const resolved = [];
460
+ if (isSourceFile(absolute)) resolved.push(path.relative(cwd, absolute));
461
+ if (isDirectory(absolute)) {
462
+ for (const fileName of ["page.tsx", "page.ts", "page.jsx", "page.js", "layout.tsx", "index.tsx", "index.ts"]) {
463
+ const candidate = path.join(absolute, fileName);
464
+ if (isSourceFile(candidate)) resolved.push(path.relative(cwd, candidate));
465
+ }
466
+ }
467
+ if (!path.extname(relative)) {
468
+ for (const extension of [".tsx", ".ts", ".jsx", ".js", ".md", ".json"]) {
469
+ const candidate = `${absolute}${extension}`;
470
+ if (isSourceFile(candidate)) resolved.push(path.relative(cwd, candidate));
406
471
  }
407
472
  }
473
+ return resolved;
474
+ }
475
+
476
+ function isInsidePath(filePath, parentPath) {
477
+ const relative = path.relative(path.resolve(parentPath), path.resolve(filePath));
478
+ return relative && !relative.startsWith("..") && !path.isAbsolute(relative);
479
+ }
480
+
481
+ function isDirectory(filePath) {
482
+ try {
483
+ return fs.statSync(filePath).isDirectory();
484
+ } catch {
485
+ return false;
486
+ }
487
+ }
488
+
489
+ function isSourceFile(filePath) {
490
+ try {
491
+ return fs.statSync(filePath).isFile();
492
+ } catch {
493
+ return false;
494
+ }
495
+ }
496
+
497
+ function mergeLocalFileCandidates(files) {
498
+ const byPath = new Map();
499
+ for (const file of files) {
500
+ const existing = byPath.get(file.path);
501
+ byPath.set(file.path, {
502
+ ...existing,
503
+ ...file,
504
+ score: Number(existing?.score || 0) + Number(file.score || 0),
505
+ reasons: [...new Set([...(existing?.reasons || []), ...(file.reasons || [])])],
506
+ source: existing?.source === "import-graph" || file.source === "import-graph" ? "import-graph" : file.source
507
+ });
508
+ }
509
+ return [...byPath.values()].sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
408
510
  }
@@ -0,0 +1,74 @@
1
+ import { spawn } from "node:child_process";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const DEFAULT_COOLDOWN_MS = 15 * 60 * 1000;
7
+
8
+ export function maybeAutoWarmWorkspace({
9
+ cwd = process.cwd(),
10
+ prompt = "",
11
+ dataDir,
12
+ reason,
13
+ now = Date.now(),
14
+ spawnProcess = spawn,
15
+ cooldownMs = Number(process.env.CONTEXTOS_AUTO_WARM_COOLDOWN_MS || DEFAULT_COOLDOWN_MS)
16
+ } = {}) {
17
+ if (process.env.CONTEXTOS_AUTO_WARM === "0") return { status: "disabled" };
18
+ if (!dataDir) return { status: "skipped", reason: "missing-data-dir" };
19
+ if (!String(prompt || "").trim()) return { status: "skipped", reason: "missing-prompt" };
20
+ if (!shouldAutoWarm(reason)) return { status: "skipped", reason: "not-actionable" };
21
+
22
+ const markerPath = path.join(dataDir, "auto-warm.json");
23
+ const existing = readJson(markerPath);
24
+ if (existing?.startedAt) {
25
+ const ageMs = now - Date.parse(existing.startedAt);
26
+ if (Number.isFinite(ageMs) && ageMs >= 0 && ageMs < cooldownMs) {
27
+ return { status: "cooldown", markerPath, ageMs };
28
+ }
29
+ }
30
+
31
+ try {
32
+ fs.mkdirSync(dataDir, { recursive: true });
33
+ fs.writeFileSync(markerPath, `${JSON.stringify({
34
+ startedAt: new Date(now).toISOString(),
35
+ cwd,
36
+ reason,
37
+ prompt: String(prompt).slice(0, 300)
38
+ }, null, 2)}\n`, "utf8");
39
+ } catch {
40
+ return { status: "skipped", reason: "marker-write-failed" };
41
+ }
42
+
43
+ const child = spawnProcess(process.execPath, [ctxBinPath(), "autowarm", "--", prompt], {
44
+ cwd,
45
+ detached: true,
46
+ stdio: "ignore",
47
+ env: {
48
+ ...process.env,
49
+ CONTEXTOS_AUTO_WARM_CHILD: "1"
50
+ }
51
+ });
52
+ child.on?.("error", () => {});
53
+ child.unref?.();
54
+ return { status: "started", pid: child.pid, markerPath };
55
+ }
56
+
57
+ function shouldAutoWarm(reason) {
58
+ if (reason === "no-context-candidates") return true;
59
+ if (reason === "enabled-sections-empty-after-formatting") return true;
60
+ return false;
61
+ }
62
+
63
+ function ctxBinPath() {
64
+ const here = path.dirname(fileURLToPath(import.meta.url));
65
+ return path.resolve(here, "../../../bin/ctx.js");
66
+ }
67
+
68
+ function readJson(filePath) {
69
+ try {
70
+ return JSON.parse(fs.readFileSync(filePath, "utf8"));
71
+ } catch {
72
+ return null;
73
+ }
74
+ }
@@ -4,50 +4,99 @@ import path from "node:path";
4
4
 
5
5
  import { defaultDataRoot } from "./workspace-data.js";
6
6
 
7
- const DEFAULT_TIMEOUT_MS = 1000;
7
+ const DEFAULT_TIMEOUT_MS = 2000;
8
+ const DEFAULT_CONNECT_TIMEOUT_MS = 100;
9
+ export const CTX_MCP_BRIDGE_REVISION = 2;
8
10
 
9
11
  export function ctxMcpSocketPath(dataDir = defaultDataDir()) {
10
12
  return path.join(dataDir, "ctx-mcp.sock");
11
13
  }
12
14
 
15
+ export function invalidateCtxMcpSocket(dataDir = defaultDataDir()) {
16
+ const socketPath = ctxMcpSocketPath(dataDir);
17
+ if (!fs.existsSync(socketPath)) return false;
18
+ try {
19
+ fs.rmSync(socketPath, { force: true });
20
+ return true;
21
+ } catch {
22
+ return false;
23
+ }
24
+ }
25
+
13
26
  export async function callCtxScoreContext(payload, {
14
27
  dataDir = defaultDataDir(),
15
- timeoutMs = Number(process.env.CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS || DEFAULT_TIMEOUT_MS)
28
+ timeoutMs = Number(process.env.CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS || DEFAULT_TIMEOUT_MS),
29
+ connectTimeoutMs = Number(process.env.CONTEXTOS_MCP_CONNECT_TIMEOUT_MS || DEFAULT_CONNECT_TIMEOUT_MS),
30
+ createConnection = net.createConnection
16
31
  } = {}) {
17
32
  const socketPath = ctxMcpSocketPath(dataDir);
18
33
  if (!fs.existsSync(socketPath)) {
19
34
  throw new Error(`ctx-mcp bridge socket not found: ${socketPath}`);
20
35
  }
36
+ const socketIdentity = statIdentity(socketPath);
21
37
 
22
38
  return new Promise((resolve, reject) => {
23
- const client = net.createConnection(socketPath);
39
+ const client = createConnection(socketPath);
24
40
  let raw = "";
25
- const timer = setTimeout(() => {
41
+ let responseTimer;
42
+ const connectTimer = setTimeout(() => {
26
43
  client.destroy();
27
- reject(new Error(`ctx-mcp bridge timed out after ${timeoutMs}ms`));
28
- }, timeoutMs);
44
+ reject(new Error(`ctx-mcp bridge connect timed out after ${connectTimeoutMs}ms`));
45
+ }, connectTimeoutMs);
29
46
 
30
47
  client.on("connect", () => {
48
+ clearTimeout(connectTimer);
49
+ responseTimer = setTimeout(() => {
50
+ client.destroy();
51
+ reject(new Error(`ctx-mcp bridge timed out after ${timeoutMs}ms`));
52
+ }, timeoutMs);
31
53
  client.write(`${JSON.stringify(payload)}\n`);
32
54
  });
33
55
  client.on("data", (chunk) => {
34
56
  raw += chunk.toString("utf8");
35
57
  });
36
58
  client.on("end", () => {
37
- clearTimeout(timer);
59
+ clearTimeout(connectTimer);
60
+ clearTimeout(responseTimer);
38
61
  try {
39
- resolve(JSON.parse(raw || "{}"));
62
+ const response = JSON.parse(raw || "{}");
63
+ if (response.bridgeRevision !== CTX_MCP_BRIDGE_REVISION) {
64
+ invalidateSocketIfUnchanged(socketPath, socketIdentity);
65
+ reject(new Error(`ctx-mcp bridge revision mismatch: expected ${CTX_MCP_BRIDGE_REVISION}, received ${response.bridgeRevision || "missing"}`));
66
+ return;
67
+ }
68
+ resolve(response);
40
69
  } catch (error) {
41
70
  reject(error);
42
71
  }
43
72
  });
44
73
  client.on("error", (error) => {
45
- clearTimeout(timer);
74
+ clearTimeout(connectTimer);
75
+ clearTimeout(responseTimer);
46
76
  reject(error);
47
77
  });
48
78
  });
49
79
  }
50
80
 
81
+ function invalidateSocketIfUnchanged(socketPath, expectedIdentity) {
82
+ if (!expectedIdentity || statIdentity(socketPath) !== expectedIdentity) return false;
83
+ try {
84
+ fs.rmSync(socketPath, { force: true });
85
+ return true;
86
+ } catch {
87
+ return false;
88
+ }
89
+ }
90
+
91
+ function statIdentity(filePath) {
92
+ try {
93
+ const stat = fs.statSync(filePath);
94
+ return `${stat.dev}:${stat.ino}`;
95
+ } catch {
96
+ return null;
97
+ }
98
+ }
99
+
51
100
  function defaultDataDir() {
52
101
  return defaultDataRoot();
53
102
  }
@@ -74,6 +74,54 @@ export async function warmRuleEmbeddings({
74
74
  return { count: texts.length, cachePath: cache.path };
75
75
  }
76
76
 
77
+ export async function searchIndexedEmbeddings({
78
+ kind,
79
+ task = "",
80
+ dataDir = defaultDataRoot(),
81
+ timeoutMs = Number(process.env.CONTEXTOS_EMBEDDING_TIMEOUT_MS || DEFAULT_TIMEOUT_MS),
82
+ allowRemote = process.env.CONTEXTOS_EMBEDDING_ALLOW_REMOTE === "1",
83
+ enabled = process.env.CONTEXTOS_EMBEDDINGS !== "0"
84
+ } = {}) {
85
+ if (!enabled || !kind || !String(task || "").trim()) return { items: [], status: "disabled" };
86
+ const cachePath = path.join(dataDir, "embeddings.db");
87
+ if (!allowRemote && !fs.existsSync(cachePath)) return { items: [], status: "cold-cache", cachePath };
88
+
89
+ try {
90
+ return await withTimeout(searchIndexed({ kind, task, dataDir, allowRemote }), timeoutMs);
91
+ } catch (error) {
92
+ return { items: [], status: "fallback", error: error?.message || String(error) };
93
+ }
94
+ }
95
+
96
+ export async function warmIndexedEmbeddings({
97
+ kind,
98
+ items = [],
99
+ task = "",
100
+ dataDir = defaultDataRoot(),
101
+ sources = [],
102
+ allowRemote = true
103
+ } = {}) {
104
+ if (!kind || !items.length) return { count: 0, cachePath: path.join(dataDir, "embeddings.db") };
105
+ if (!allowRemote && !isModelCacheReady(dataDir)) {
106
+ return { count: 0, cachePath: path.join(dataDir, "embeddings.db"), status: "missing-model" };
107
+ }
108
+
109
+ const cache = await openEmbeddingCache(dataDir);
110
+ const embedder = await getExtractor({ allowRemote, dataDir });
111
+ if (String(task || "").trim()) await getCachedEmbedding({ cache, embedder, text: task, sources });
112
+
113
+ const indexed = [];
114
+ for (const item of items) {
115
+ const text = String(item.text || "");
116
+ if (!item.id || !text.trim()) continue;
117
+ const vector = await getCachedEmbedding({ cache, embedder, text, sources });
118
+ indexed.push({ id: item.id, text, vector });
119
+ }
120
+ cache.replaceIndex(kind, indexed);
121
+ cache.close();
122
+ return { count: indexed.length, cachePath: cache.path };
123
+ }
124
+
77
125
  async function enhanceRuleScores(rules, task, { dataDir, sources, allowRemote }) {
78
126
  const cache = await openEmbeddingCache(dataDir);
79
127
  const embedder = await getExtractor({ allowRemote, dataDir });
@@ -113,6 +161,20 @@ async function enhanceRuleScores(rules, task, { dataDir, sources, allowRemote })
113
161
  };
114
162
  }
115
163
 
164
+ async function searchIndexed({ kind, task, dataDir, allowRemote }) {
165
+ const cache = await openEmbeddingCache(dataDir);
166
+ const embedder = await getExtractor({ allowRemote, dataDir });
167
+ const taskEmbedding = await getCachedEmbedding({ cache, embedder, text: task, sources: [] });
168
+ const items = cache.listIndexed(kind)
169
+ .map((item) => ({
170
+ ...item,
171
+ embeddingScore: Number(similarityToScore(cosine(taskEmbedding, item.vector)).toFixed(3))
172
+ }))
173
+ .sort((a, b) => b.embeddingScore - a.embeddingScore || a.id.localeCompare(b.id));
174
+ cache.close();
175
+ return { items, status: "enabled", model: DEFAULT_MODEL, cachePath: cache.path };
176
+ }
177
+
116
178
  async function getExtractor({ allowRemote, dataDir }) {
117
179
  const cacheDir = modelCacheDir(dataDir);
118
180
  const key = `${allowRemote ? "remote" : "local"}:${cacheDir}`;
@@ -183,6 +245,30 @@ export async function openEmbeddingCache(dataDir) {
183
245
  );
184
246
  writeDatabaseAtomically(cachePath, db);
185
247
  },
248
+ listIndexed(kind) {
249
+ const stmt = db.prepare("SELECT id, text, vector FROM embedding_index WHERE kind = ? AND model = ?");
250
+ const items = [];
251
+ try {
252
+ stmt.bind([kind, DEFAULT_MODEL]);
253
+ while (stmt.step()) {
254
+ const row = stmt.getAsObject();
255
+ items.push({ id: row.id, text: row.text, vector: JSON.parse(row.vector) });
256
+ }
257
+ } finally {
258
+ stmt.free();
259
+ }
260
+ return items;
261
+ },
262
+ replaceIndex(kind, items) {
263
+ db.run("DELETE FROM embedding_index WHERE kind = ? AND model = ?", [kind, DEFAULT_MODEL]);
264
+ for (const item of items) {
265
+ db.run(
266
+ "INSERT INTO embedding_index (kind, id, text, model, vector, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
267
+ [kind, item.id, item.text, DEFAULT_MODEL, JSON.stringify(item.vector), new Date().toISOString()]
268
+ );
269
+ }
270
+ writeDatabaseAtomically(cachePath, db);
271
+ },
186
272
  close() {
187
273
  writeDatabaseAtomically(cachePath, db);
188
274
  db.close();
@@ -218,6 +304,17 @@ function ensureEmbeddingSchema(db) {
218
304
  updated_at TEXT NOT NULL
219
305
  )
220
306
  `);
307
+ db.run(`
308
+ CREATE TABLE IF NOT EXISTS embedding_index (
309
+ kind TEXT NOT NULL,
310
+ id TEXT NOT NULL,
311
+ text TEXT NOT NULL,
312
+ model TEXT NOT NULL,
313
+ vector TEXT NOT NULL,
314
+ updated_at TEXT NOT NULL,
315
+ PRIMARY KEY (kind, id, model)
316
+ )
317
+ `);
221
318
  }
222
319
 
223
320
  function openSqlDatabase(SQL, cachePath) {
@@ -316,11 +413,16 @@ function similarityToScore(similarity) {
316
413
  return Math.max(0, Math.min(1, (similarity + 1) / 2));
317
414
  }
318
415
 
319
- function withTimeout(promise, timeoutMs) {
320
- return Promise.race([
321
- promise,
322
- new Promise((_, reject) => {
323
- setTimeout(() => reject(new Error(`embedding scorer timed out after ${timeoutMs}ms`)), timeoutMs);
324
- })
325
- ]);
416
+ async function withTimeout(promise, timeoutMs) {
417
+ let timer;
418
+ try {
419
+ return await Promise.race([
420
+ promise,
421
+ new Promise((_, reject) => {
422
+ timer = setTimeout(() => reject(new Error(`embedding scorer timed out after ${timeoutMs}ms`)), timeoutMs);
423
+ })
424
+ ]);
425
+ } finally {
426
+ clearTimeout(timer);
427
+ }
326
428
  }