@minhpnq1807/contextos 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { findGraphRelevantFiles, mergeRelevantFiles } from "./graph-retriever.js";
4
+ import { expandImportGraph } from "./import-graph.js";
5
+ import { findEmbeddingRelevantFiles } from "./file-embedding-retriever.js";
6
+
7
+ const STOP_WORDS = new Set([
8
+ "a", "an", "and", "are", "as", "at", "be", "by", "cho", "co", "cua", "do", "fix", "for",
9
+ "from", "in", "is", "it", "la", "of", "on", "or", "sua", "task", "the", "to", "trong",
10
+ "tra", "va", "with"
11
+ ]);
12
+
13
+ const IMPORTANT_WORDS = [
14
+ "always", "never", "must", "required", "important", "strictly", "mandatory",
15
+ "luon", "khong bao gio", "bat buoc", "quan trong"
16
+ ];
17
+
18
+ const IGNORE_DIRS = new Set([
19
+ ".git", ".next", ".turbo", "coverage", "dist", "build", "node_modules", "vendor"
20
+ ]);
21
+
22
+ const SEMANTIC_ALIASES = {
23
+ duyet: ["moderation", "moderate", "review", "approve", "approval", "approved", "reject", "rejected"],
24
+ kiem: ["check", "verify", "validation", "validate"],
25
+ "kiem-duyet": ["moderation", "moderate", "review", "approve", "approval", "reject"],
26
+ kiemduyet: ["moderation", "moderate", "review", "approve", "approval", "reject"],
27
+ moderation: ["duyet", "kiemduyet", "review", "approval", "reject"],
28
+ moderate: ["duyet", "kiemduyet", "review", "approval", "reject"],
29
+ review: ["duyet", "moderation", "moderate"],
30
+ approve: ["duyet", "approval", "approved"],
31
+ approval: ["duyet", "approve", "approved"],
32
+ reject: ["duyet", "rejected", "rejection"],
33
+ flow: ["workflow", "pipeline", "process"],
34
+ workflow: ["flow", "pipeline", "process"],
35
+ tai: ["upload", "uploaded", "resource"],
36
+ "tai-len": ["upload", "uploaded", "resource"],
37
+ tailen: ["upload", "uploaded", "resource"],
38
+ upload: ["tai", "tailen", "resource", "uploaded"],
39
+ xac: ["confirm", "verify", "verification"],
40
+ nhan: ["confirm", "confirmation"],
41
+ "xac-nhan": ["confirm", "confirmation", "verify", "verification"],
42
+ xacnhan: ["confirm", "confirmation", "verify", "verification"],
43
+ thong: ["notification", "notify", "message"],
44
+ bao: ["notification", "notify", "message"],
45
+ "thong-bao": ["notification", "notify", "message"],
46
+ thongbao: ["notification", "notify", "message"]
47
+ };
48
+
49
+ const MODERATION_TOKENS = new Set(["moderation", "moderate", "content-moderation", "approval", "approved", "reject", "rejected", "needs_review"]);
50
+
51
+ export function tokenize(value) {
52
+ const normalized = String(value || "")
53
+ .toLowerCase()
54
+ .normalize("NFD")
55
+ .replace(/[\u0300-\u036f]/g, "")
56
+ .replace(/kiem\s+duyet/g, "kiem-duyet")
57
+ .replace(/tai\s+len/g, "tai-len")
58
+ .replace(/xac\s+nhan/g, "xac-nhan")
59
+ .replace(/thong\s+bao/g, "thong-bao");
60
+
61
+ return normalized
62
+ .split(/[^a-z0-9_.-]+/g)
63
+ .flatMap(splitCompoundToken)
64
+ .filter((word) => word.length > 1 && !STOP_WORDS.has(word));
65
+ }
66
+
67
+ function splitCompoundToken(token) {
68
+ const parts = String(token || "").split(/[_.-]+/g).filter(Boolean);
69
+ return parts.length > 1 ? [token, ...parts] : [token];
70
+ }
71
+
72
+ function expandSemanticTokens(tokens) {
73
+ const expanded = new Set(tokens);
74
+ for (const token of tokens) {
75
+ for (const alias of SEMANTIC_ALIASES[token] || []) expanded.add(alias);
76
+ }
77
+ return expanded;
78
+ }
79
+
80
+ function sourceFromLine(line) {
81
+ const match = line.match(/^## Source:\s+(.+)$/);
82
+ return match ? match[1].trim() : null;
83
+ }
84
+
85
+ function cleanRuleLine(line) {
86
+ return line
87
+ .replace(/^\s{0,3}[-*+]\s+/, "")
88
+ .replace(/^\s{0,3}\d+[.)]\s+/, "")
89
+ .replace(/^#+\s+/, "")
90
+ .trim();
91
+ }
92
+
93
+ export function parseRules(markdown) {
94
+ const rules = [];
95
+ let sourcePath = "unknown";
96
+ let paragraph = [];
97
+
98
+ const flushParagraph = () => {
99
+ const content = cleanRuleLine(paragraph.join(" ").replace(/\s+/g, " "));
100
+ paragraph = [];
101
+ if (content.length < 20) return;
102
+ rules.push({
103
+ id: `r${rules.length + 1}`,
104
+ sourcePath,
105
+ content,
106
+ originalOrder: rules.length
107
+ });
108
+ };
109
+
110
+ for (const rawLine of String(markdown || "").split(/\r?\n/)) {
111
+ const line = rawLine.trim();
112
+ const nextSource = sourceFromLine(line);
113
+ if (nextSource) {
114
+ flushParagraph();
115
+ sourcePath = nextSource;
116
+ continue;
117
+ }
118
+ if (!line || /^-{3,}$/.test(line)) {
119
+ flushParagraph();
120
+ continue;
121
+ }
122
+ if (/^\s{0,3}([-*+]|\d+[.)])\s+/.test(rawLine) || /^#{1,6}\s+/.test(rawLine)) {
123
+ flushParagraph();
124
+ const content = cleanRuleLine(rawLine);
125
+ if (content.length >= 4) {
126
+ rules.push({
127
+ id: `r${rules.length + 1}`,
128
+ sourcePath,
129
+ content,
130
+ originalOrder: rules.length
131
+ });
132
+ }
133
+ continue;
134
+ }
135
+ paragraph.push(line);
136
+ }
137
+ flushParagraph();
138
+ return dedupeRules(rules);
139
+ }
140
+
141
+ function dedupeRules(rules) {
142
+ const seen = new Set();
143
+ return rules.filter((rule) => {
144
+ const key = `${rule.sourcePath}:${rule.content.toLowerCase()}`;
145
+ if (seen.has(key)) return false;
146
+ seen.add(key);
147
+ return true;
148
+ }).map((rule, index) => ({ ...rule, id: `r${index + 1}`, originalOrder: index }));
149
+ }
150
+
151
+ export function scoreRules(rules, task, openFiles = []) {
152
+ const rawTaskTokens = new Set(tokenize(task));
153
+ const openFileText = Array.isArray(openFiles) ? openFiles.join(" ") : String(openFiles || "");
154
+ const openFileTokens = new Set(tokenize(openFileText));
155
+
156
+ return rules.map((rule) => {
157
+ const ruleTokens = new Set(tokenize(rule.content));
158
+ const exactOverlap = [...rawTaskTokens].filter((token) => ruleTokens.has(token));
159
+ const semanticOverlap = [];
160
+ for (const token of rawTaskTokens) {
161
+ for (const alias of SEMANTIC_ALIASES[token] || []) {
162
+ if (!rawTaskTokens.has(alias) && ruleTokens.has(alias)) semanticOverlap.push(`${token}->${alias}`);
163
+ }
164
+ }
165
+ const reasons = [];
166
+ let score = rawTaskTokens.size
167
+ ? (exactOverlap.length + semanticOverlap.length * 0.5) / Math.max(rawTaskTokens.size, 1)
168
+ : 0;
169
+
170
+ if (exactOverlap.length) reasons.push(`task:${exactOverlap.join("/")}`);
171
+ if (semanticOverlap.length) reasons.push(`semantic:${semanticOverlap.join("/")}`);
172
+
173
+ const lowerRule = rule.content.toLowerCase();
174
+ if (IMPORTANT_WORDS.some((word) => lowerRule.includes(word))) {
175
+ score += 0.4;
176
+ reasons.push("imperative");
177
+ }
178
+
179
+ const fileMentions = [...ruleTokens].filter((token) => /[./]/.test(token) || /\.[a-z0-9]+$/.test(token));
180
+ if (fileMentions.some((token) => openFileTokens.has(token) || openFileText.includes(token))) {
181
+ score += 0.2;
182
+ reasons.push("open-file");
183
+ }
184
+
185
+ return {
186
+ ...rule,
187
+ score: Math.max(0, Math.min(1, Number(score.toFixed(3)))),
188
+ reasons
189
+ };
190
+ }).sort((a, b) => b.score - a.score || a.originalOrder - b.originalOrder);
191
+ }
192
+
193
+ export async function findRelevantFiles({
194
+ cwd = process.cwd(),
195
+ task = "",
196
+ rules = [],
197
+ dataDir,
198
+ limit = 3,
199
+ embeddingFileFinder = findEmbeddingRelevantFiles,
200
+ fileEmbeddingTimeoutMs,
201
+ fileEmbeddingOptions = {}
202
+ } = {}) {
203
+ const rawTaskTokens = new Set(tokenize(task));
204
+ if (!rawTaskTokens.size) return [];
205
+
206
+ const candidates = [];
207
+ walkFiles(cwd, (filePath) => {
208
+ const rel = path.relative(cwd, filePath);
209
+ const fileTokens = new Set(tokenize(rel));
210
+ const match = scoreFileTokens({ rawTaskTokens, fileTokens });
211
+ if (match.score > 0) {
212
+ candidates.push({
213
+ path: rel,
214
+ score: match.score,
215
+ reasons: match.reasons
216
+ });
217
+ }
218
+ });
219
+
220
+ const heuristicFiles = candidates
221
+ .sort((a, b) => b.score - a.score || a.path.localeCompare(b.path))
222
+ .slice(0, Math.max(limit * 2, 6));
223
+ const hasHighConfidenceHeuristics =
224
+ heuristicFiles.length >= limit &&
225
+ Number(heuristicFiles[0]?.score || 0) >= 8;
226
+ const embeddingFiles = hasHighConfidenceHeuristics
227
+ ? []
228
+ : await embeddingFileFinder({
229
+ cwd,
230
+ task,
231
+ dataDir,
232
+ timeoutMs: fileEmbeddingTimeoutMs,
233
+ embeddingOptions: fileEmbeddingOptions,
234
+ limit: Math.max(limit * 2, 6)
235
+ });
236
+ const importGraphFiles = expandImportGraph({
237
+ cwd,
238
+ seedFiles: mergeLocalFileCandidates([...heuristicFiles, ...embeddingFiles]).slice(0, limit),
239
+ limit: Math.max(limit * 2, 6)
240
+ });
241
+ const seedFiles = mergeLocalFileCandidates([...heuristicFiles, ...embeddingFiles, ...importGraphFiles])
242
+ .slice(0, Math.max(limit * 3, 9));
243
+
244
+ const graphFiles = findGraphRelevantFiles({
245
+ cwd,
246
+ task,
247
+ rules,
248
+ seedFiles,
249
+ limit: Math.max(limit * 2, 6)
250
+ });
251
+
252
+ return mergeRelevantFiles({ graphFiles, heuristicFiles: seedFiles, limit });
253
+ }
254
+
255
+ function mergeLocalFileCandidates(files) {
256
+ const byPath = new Map();
257
+ for (const file of files) {
258
+ const existing = byPath.get(file.path);
259
+ byPath.set(file.path, {
260
+ ...existing,
261
+ ...file,
262
+ score: Number(existing?.score || 0) + Number(file.score || 0),
263
+ reasons: [...new Set([...(existing?.reasons || []), ...(file.reasons || [])])],
264
+ source: existing?.source === "import-graph" || file.source === "import-graph" ? "import-graph" : file.source
265
+ });
266
+ }
267
+ return [...byPath.values()].sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
268
+ }
269
+
270
+ function scoreFileTokens({ rawTaskTokens, fileTokens }) {
271
+ let score = 0;
272
+ const reasons = new Set();
273
+ const hasModerationIntent = rawTaskTokens.has("kiem-duyet") || rawTaskTokens.has("kiemduyet") || rawTaskTokens.has("duyet");
274
+ const hasUploadIntent = rawTaskTokens.has("upload") || rawTaskTokens.has("tai-len") || rawTaskTokens.has("tailen");
275
+
276
+ for (const token of rawTaskTokens) {
277
+ if (fileTokens.has(token)) {
278
+ score += 3;
279
+ reasons.add(token);
280
+ }
281
+ for (const alias of SEMANTIC_ALIASES[token] || []) {
282
+ if (fileTokens.has(alias)) {
283
+ score += 2;
284
+ reasons.add(`${token}->${alias}`);
285
+ }
286
+ }
287
+ }
288
+
289
+ if (hasModerationIntent && [...fileTokens].some((token) => MODERATION_TOKENS.has(token))) {
290
+ score += 6;
291
+ reasons.add("domain:moderation");
292
+ }
293
+
294
+ if (hasUploadIntent && (fileTokens.has("upload") || fileTokens.has("uploaded") || fileTokens.has("resource"))) {
295
+ score += 2;
296
+ reasons.add("domain:upload");
297
+ }
298
+
299
+ return { score, reasons: [...reasons] };
300
+ }
301
+
302
+ function walkFiles(directory, onFile, depth = 0) {
303
+ if (depth > 6) return;
304
+ let entries = [];
305
+ try {
306
+ entries = fs.readdirSync(directory, { withFileTypes: true });
307
+ } catch {
308
+ return;
309
+ }
310
+ for (const entry of entries) {
311
+ if (entry.name.startsWith(".") && entry.name !== ".github") {
312
+ if (entry.name !== ".codex") continue;
313
+ }
314
+ const fullPath = path.join(directory, entry.name);
315
+ if (entry.isDirectory()) {
316
+ if (!IGNORE_DIRS.has(entry.name)) walkFiles(fullPath, onFile, depth + 1);
317
+ } else if (entry.isFile()) {
318
+ onFile(fullPath);
319
+ }
320
+ }
321
+ }
@@ -0,0 +1,52 @@
1
+ import fs from "node:fs";
2
+ import net from "node:net";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+
6
+ const DEFAULT_TIMEOUT_MS = 1000;
7
+
8
+ export function ctxMcpSocketPath(dataDir = defaultDataDir()) {
9
+ return path.join(dataDir, "ctx-mcp.sock");
10
+ }
11
+
12
+ export async function callCtxScoreContext(payload, {
13
+ dataDir = defaultDataDir(),
14
+ timeoutMs = Number(process.env.CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS || DEFAULT_TIMEOUT_MS)
15
+ } = {}) {
16
+ const socketPath = ctxMcpSocketPath(dataDir);
17
+ if (!fs.existsSync(socketPath)) {
18
+ throw new Error(`ctx-mcp bridge socket not found: ${socketPath}`);
19
+ }
20
+
21
+ return new Promise((resolve, reject) => {
22
+ const client = net.createConnection(socketPath);
23
+ let raw = "";
24
+ const timer = setTimeout(() => {
25
+ client.destroy();
26
+ reject(new Error(`ctx-mcp bridge timed out after ${timeoutMs}ms`));
27
+ }, timeoutMs);
28
+
29
+ client.on("connect", () => {
30
+ client.write(`${JSON.stringify(payload)}\n`);
31
+ });
32
+ client.on("data", (chunk) => {
33
+ raw += chunk.toString("utf8");
34
+ });
35
+ client.on("end", () => {
36
+ clearTimeout(timer);
37
+ try {
38
+ resolve(JSON.parse(raw || "{}"));
39
+ } catch (error) {
40
+ reject(error);
41
+ }
42
+ });
43
+ client.on("error", (error) => {
44
+ clearTimeout(timer);
45
+ reject(error);
46
+ });
47
+ });
48
+ }
49
+
50
+ function defaultDataDir() {
51
+ return process.env.PLUGIN_DATA || path.join(process.env.CODEX_HOME || path.join(os.homedir(), ".codex"), "contextos");
52
+ }
@@ -0,0 +1,248 @@
1
+ import crypto from "node:crypto";
2
+ import fs from "node:fs";
3
+ import os from "node:os";
4
+ import path from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+
7
+ const DEFAULT_MODEL = "Xenova/all-MiniLM-L6-v2";
8
+ const DEFAULT_TIMEOUT_MS = 800;
9
+ const SEMANTIC_HIGH_THRESHOLD = 0.5;
10
+
11
+ const extractorPromises = new Map();
12
+ let sqlPromise = null;
13
+
14
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
15
+ const repoRoot = path.resolve(__dirname, "..", "..", "..");
16
+
17
+ export async function enhanceRuleScoresWithEmbeddings(
18
+ rules,
19
+ task,
20
+ {
21
+ dataDir = path.join(os.homedir(), ".codex", "contextos"),
22
+ sources = [],
23
+ timeoutMs = Number(process.env.CONTEXTOS_EMBEDDING_TIMEOUT_MS || DEFAULT_TIMEOUT_MS),
24
+ allowRemote = process.env.CONTEXTOS_EMBEDDING_ALLOW_REMOTE === "1",
25
+ enabled = process.env.CONTEXTOS_EMBEDDINGS !== "0"
26
+ } = {}
27
+ ) {
28
+ if (!enabled || !String(task || "").trim() || !rules?.length) {
29
+ return { rules, status: "disabled" };
30
+ }
31
+ const cachePath = path.join(dataDir, "embeddings.db");
32
+ if (!allowRemote && !fs.existsSync(cachePath)) {
33
+ return { rules, status: "cold-cache", cachePath };
34
+ }
35
+
36
+ try {
37
+ return await withTimeout(
38
+ enhanceRuleScores(rules, task, { dataDir, sources, allowRemote }),
39
+ timeoutMs
40
+ );
41
+ } catch (error) {
42
+ return {
43
+ rules,
44
+ status: "fallback",
45
+ error: error?.message || String(error)
46
+ };
47
+ }
48
+ }
49
+
50
+ export async function warmRuleEmbeddings({
51
+ rules = [],
52
+ task = "",
53
+ dataDir = path.join(os.homedir(), ".codex", "contextos"),
54
+ sources = [],
55
+ allowRemote = true
56
+ } = {}) {
57
+ const texts = [...new Set([
58
+ task,
59
+ ...rules.map((rule) => rule.content || "")
60
+ ].filter((text) => String(text).trim()))];
61
+
62
+ const cache = await openEmbeddingCache(dataDir);
63
+ const embedder = await getExtractor({ allowRemote, dataDir });
64
+ for (const text of texts) {
65
+ await getCachedEmbedding({ cache, embedder, text, sources });
66
+ }
67
+ cache.close();
68
+ return { count: texts.length, cachePath: cache.path };
69
+ }
70
+
71
+ async function enhanceRuleScores(rules, task, { dataDir, sources, allowRemote }) {
72
+ const cache = await openEmbeddingCache(dataDir);
73
+ const embedder = await getExtractor({ allowRemote, dataDir });
74
+ const taskEmbedding = await getCachedEmbedding({ cache, embedder, text: task, sources });
75
+
76
+ const enhanced = [];
77
+ for (const rule of rules) {
78
+ const ruleEmbedding = await getCachedEmbedding({
79
+ cache,
80
+ embedder,
81
+ text: rule.content || "",
82
+ sources
83
+ });
84
+ const similarity = cosine(taskEmbedding, ruleEmbedding);
85
+ const semanticScore = similarityToScore(similarity);
86
+ const baseScore = Number(rule.score || 0);
87
+ const score = semanticScore >= SEMANTIC_HIGH_THRESHOLD
88
+ ? Math.max(baseScore, semanticScore)
89
+ : baseScore;
90
+
91
+ enhanced.push({
92
+ ...rule,
93
+ score: Math.max(0, Math.min(1, Number(score.toFixed(3)))),
94
+ embeddingScore: Number(semanticScore.toFixed(3)),
95
+ reasons: semanticScore >= 0.45
96
+ ? [...new Set([...(rule.reasons || []), `embedding:${semanticScore.toFixed(2)}`])]
97
+ : rule.reasons
98
+ });
99
+ }
100
+
101
+ cache.close();
102
+ return {
103
+ rules: enhanced.sort((a, b) => b.score - a.score || a.originalOrder - b.originalOrder),
104
+ status: "enabled",
105
+ model: DEFAULT_MODEL,
106
+ cachePath: cache.path
107
+ };
108
+ }
109
+
110
+ async function getExtractor({ allowRemote, dataDir }) {
111
+ const cacheDir = modelCacheDir(dataDir);
112
+ const key = `${allowRemote ? "remote" : "local"}:${cacheDir}`;
113
+ if (!extractorPromises.has(key)) {
114
+ extractorPromises.set(key, (async () => {
115
+ const transformers = await import("@xenova/transformers");
116
+ transformers.env.allowRemoteModels = Boolean(allowRemote);
117
+ transformers.env.allowLocalModels = true;
118
+ transformers.env.cacheDir = cacheDir;
119
+ return transformers.pipeline("feature-extraction", DEFAULT_MODEL, {
120
+ quantized: true
121
+ });
122
+ })());
123
+ }
124
+ return extractorPromises.get(key);
125
+ }
126
+
127
+ export function modelCacheDir(dataDir = path.join(os.homedir(), ".codex", "contextos")) {
128
+ return path.join(dataDir, "models");
129
+ }
130
+
131
+ async function getCachedEmbedding({ cache, embedder, text, sources }) {
132
+ const key = cacheKey(text, sources);
133
+ const existing = cache.get(key);
134
+ if (existing) return existing;
135
+
136
+ const output = await embedder(String(text || ""), {
137
+ pooling: "mean",
138
+ normalize: true
139
+ });
140
+ const embedding = Array.from(output.data || []);
141
+ cache.set(key, embedding);
142
+ return embedding;
143
+ }
144
+
145
+ async function openEmbeddingCache(dataDir) {
146
+ fs.mkdirSync(dataDir, { recursive: true });
147
+ const cachePath = path.join(dataDir, "embeddings.db");
148
+ const SQL = await getSql();
149
+ const buffer = fs.existsSync(cachePath) ? fs.readFileSync(cachePath) : null;
150
+ const db = buffer?.length ? new SQL.Database(buffer) : new SQL.Database();
151
+
152
+ db.run(`
153
+ CREATE TABLE IF NOT EXISTS embeddings (
154
+ key TEXT PRIMARY KEY,
155
+ model TEXT NOT NULL,
156
+ vector TEXT NOT NULL,
157
+ updated_at TEXT NOT NULL
158
+ )
159
+ `);
160
+
161
+ return {
162
+ path: cachePath,
163
+ get(key) {
164
+ const stmt = db.prepare("SELECT vector FROM embeddings WHERE key = ? AND model = ?");
165
+ try {
166
+ stmt.bind([key, DEFAULT_MODEL]);
167
+ if (!stmt.step()) return null;
168
+ return JSON.parse(stmt.getAsObject().vector);
169
+ } finally {
170
+ stmt.free();
171
+ }
172
+ },
173
+ set(key, vector) {
174
+ db.run(
175
+ "INSERT OR REPLACE INTO embeddings (key, model, vector, updated_at) VALUES (?, ?, ?, ?)",
176
+ [key, DEFAULT_MODEL, JSON.stringify(vector), new Date().toISOString()]
177
+ );
178
+ fs.writeFileSync(cachePath, Buffer.from(db.export()));
179
+ },
180
+ close() {
181
+ fs.writeFileSync(cachePath, Buffer.from(db.export()));
182
+ db.close();
183
+ }
184
+ };
185
+ }
186
+
187
+ async function getSql() {
188
+ if (!sqlPromise) {
189
+ sqlPromise = (async () => {
190
+ const initSqlJs = (await import("sql.js")).default;
191
+ return initSqlJs({
192
+ locateFile: (file) => path.join(repoRoot, "node_modules", "sql.js", "dist", file)
193
+ });
194
+ })();
195
+ }
196
+ return sqlPromise;
197
+ }
198
+
199
+ function cacheKey(text, sources) {
200
+ return crypto
201
+ .createHash("sha256")
202
+ .update(DEFAULT_MODEL)
203
+ .update("\0")
204
+ .update(String(text || ""))
205
+ .update("\0")
206
+ .update(sourceFingerprint(sources))
207
+ .digest("hex");
208
+ }
209
+
210
+ function sourceFingerprint(sources) {
211
+ const parts = [];
212
+ for (const source of sources || []) {
213
+ try {
214
+ const stat = fs.statSync(source);
215
+ parts.push(`${source}:${stat.mtimeMs}:${stat.size}`);
216
+ } catch {
217
+ parts.push(String(source));
218
+ }
219
+ }
220
+ return parts.join("|");
221
+ }
222
+
223
+ function cosine(a, b) {
224
+ let dot = 0;
225
+ let normA = 0;
226
+ let normB = 0;
227
+ const length = Math.min(a?.length || 0, b?.length || 0);
228
+ for (let index = 0; index < length; index += 1) {
229
+ dot += a[index] * b[index];
230
+ normA += a[index] * a[index];
231
+ normB += b[index] * b[index];
232
+ }
233
+ if (!normA || !normB) return 0;
234
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB));
235
+ }
236
+
237
+ function similarityToScore(similarity) {
238
+ return Math.max(0, Math.min(1, (similarity + 1) / 2));
239
+ }
240
+
241
+ function withTimeout(promise, timeoutMs) {
242
+ return Promise.race([
243
+ promise,
244
+ new Promise((_, reject) => {
245
+ setTimeout(() => reject(new Error(`embedding scorer timed out after ${timeoutMs}ms`)), timeoutMs);
246
+ })
247
+ ]);
248
+ }