@kontourai/flow-agents 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/.github/workflows/kit-gates-demo.yml +171 -0
  2. package/.github/workflows/release-please.yml +13 -1
  3. package/AGENTS.md +8 -1
  4. package/CHANGELOG.md +53 -0
  5. package/CONTEXT.md +1 -1
  6. package/README.md +13 -2
  7. package/build/src/cli/flow-kit.js +41 -2
  8. package/build/src/flow-kit/validate.js +98 -0
  9. package/build/src/tools/validate-source-tree.js +2 -1
  10. package/context/scripts/hooks/config-protection.js +217 -15
  11. package/docs/fixture-ownership.md +1 -0
  12. package/docs/index.md +9 -1
  13. package/docs/kit-authoring-guide.md +126 -0
  14. package/docs/knowledge-kit.md +69 -0
  15. package/docs/vision.md +22 -0
  16. package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +26 -0
  17. package/evals/fixtures/kit-conformance-levels/k0-flows-only/kit.json +13 -0
  18. package/evals/fixtures/kit-conformance-levels/k1-agent-extension/docs/README.md +3 -0
  19. package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +26 -0
  20. package/evals/fixtures/kit-conformance-levels/k1-agent-extension/kit.json +20 -0
  21. package/evals/fixtures/kit-conformance-levels/k2-with-evals/docs/README.md +3 -0
  22. package/evals/fixtures/kit-conformance-levels/k2-with-evals/eval-suites/contract-suite/suite.test.js +1 -0
  23. package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +26 -0
  24. package/evals/fixtures/kit-conformance-levels/k2-with-evals/kit.json +27 -0
  25. package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +26 -0
  26. package/evals/fixtures/kit-conformance-levels/third-party-extension/kit.json +19 -0
  27. package/evals/integration/test_fixture_retirement_audit.sh +2 -2
  28. package/evals/integration/test_hook_category_behaviors.sh +51 -0
  29. package/evals/integration/test_kit_conformance_levels.sh +209 -0
  30. package/evals/run.sh +2 -0
  31. package/evals/static/test_universal_bundles.sh +10 -0
  32. package/kits/catalog.json +6 -0
  33. package/kits/knowledge/adapters/default-store/index.js +95 -14
  34. package/kits/knowledge/adapters/flow-runner/entity-extractor.js +194 -0
  35. package/kits/knowledge/adapters/flow-runner/index.js +639 -0
  36. package/kits/knowledge/adapters/obsidian-store/README.md +141 -0
  37. package/kits/knowledge/adapters/obsidian-store/demo.js +181 -0
  38. package/kits/knowledge/adapters/obsidian-store/index.js +868 -0
  39. package/kits/knowledge/adapters/shared/codec.js +325 -0
  40. package/kits/knowledge/adapters/similarity-vector/index.js +284 -0
  41. package/kits/knowledge/docs/README.md +193 -0
  42. package/kits/knowledge/docs/store-contract.md +196 -0
  43. package/kits/knowledge/evals/contract-suite/suite.test.js +10 -5
  44. package/kits/knowledge/evals/entities/demo-acme.js +125 -0
  45. package/kits/knowledge/evals/entities/suite.test.js +722 -0
  46. package/kits/knowledge/evals/retirement/suite.test.js +1173 -0
  47. package/kits/knowledge/evals/similarity-vector/suite.test.js +685 -0
  48. package/kits/knowledge/evals/synthesis/suite.test.js +10 -3
  49. package/kits/knowledge/flows/retire.flow.json +77 -0
  50. package/kits/knowledge/kit.json +31 -1
  51. package/kits/release-evidence/fixtures/claims/README.md +14 -0
  52. package/kits/release-evidence/fixtures/claims/fail-rejected-release.trust.json +22 -0
  53. package/kits/release-evidence/fixtures/claims/pass-trusted-release.trust.json +22 -0
  54. package/kits/release-evidence/flows/release-evidence.flow.json +38 -0
  55. package/kits/release-evidence/kit.json +13 -0
  56. package/package.json +1 -1
  57. package/packaging/conformance/fixtures/config-protection--allow-no-verify-in-string.json +20 -0
  58. package/packaging/conformance/fixtures/config-protection--block-git-no-verify.json +23 -0
  59. package/scripts/hooks/config-protection.js +217 -15
  60. package/src/cli/flow-kit.ts +40 -2
  61. package/src/flow-kit/validate.ts +127 -0
  62. package/src/tools/validate-source-tree.ts +2 -1
@@ -0,0 +1,325 @@
1
+ /**
2
+ * Knowledge Kit — Shared Codec
3
+ *
4
+ * Utility functions shared between Knowledge Kit store adapters:
5
+ * - Error helpers (MISSING_EVIDENCE, NOT_FOUND)
6
+ * - YAML frontmatter codec (zero-dep subset)
7
+ * - Wikilink parser / indexer
8
+ * - Graph index helpers
9
+ * - Validation constants and helpers
10
+ *
11
+ * @module adapters/shared/codec
12
+ */
13
+
14
+ import * as fs from "node:fs";
15
+ import * as path from "node:path";
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Error helpers
19
+ // ---------------------------------------------------------------------------
20
+
21
+ export function missingEvidenceError(message) {
22
+ const err = new Error(message);
23
+ err.code = "MISSING_EVIDENCE";
24
+ return err;
25
+ }
26
+
27
+ export function notFoundError(id) {
28
+ const err = new Error(`Record not found: ${id}`);
29
+ err.code = "NOT_FOUND";
30
+ return err;
31
+ }
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // YAML frontmatter codec (no external deps — handles the subset we need)
35
+ // ---------------------------------------------------------------------------
36
+
37
+ /**
38
+ * Parse a markdown file that begins with a YAML frontmatter block.
39
+ * Returns { meta, body }.
40
+ */
41
+ export function parseMarkdown(text) {
42
+ if (!text.startsWith("---\n")) {
43
+ return { meta: {}, body: text };
44
+ }
45
+ const end = text.indexOf("\n---\n", 4);
46
+ if (end === -1) {
47
+ return { meta: {}, body: text };
48
+ }
49
+ const yaml = text.slice(4, end);
50
+ const body = text.slice(end + 5).replace(/^\n+/, "");
51
+ return { meta: parseYaml(yaml), body };
52
+ }
53
+
54
+ /**
55
+ * Minimal YAML parser: handles the scalar/list/nested-object subset
56
+ * emitted by serializeYaml below. Not a general YAML parser.
57
+ */
58
+ export function parseYaml(yaml) {
59
+ const lines = yaml.split("\n");
60
+ return parseYamlLines(lines, 0, 0).value;
61
+ }
62
+
63
+ export function parseYamlLines(lines, start, baseIndent) {
64
+ const obj = {};
65
+ let i = start;
66
+ while (i < lines.length) {
67
+ const line = lines[i];
68
+ if (line.trim() === "" || line.trim().startsWith("#")) { i++; continue; }
69
+ const indent = line.search(/\S/);
70
+ if (indent < baseIndent) break;
71
+ if (indent > baseIndent) { i++; continue; }
72
+
73
+ // key: value OR key:
74
+ const colonIdx = line.indexOf(":");
75
+ if (colonIdx === -1) { i++; continue; }
76
+ const key = line.slice(indent, colonIdx).trim();
77
+ const rest = line.slice(colonIdx + 1).trim();
78
+
79
+ if (rest.startsWith("[")) {
80
+ // Inline array: [a, b, c]
81
+ const inner = rest.slice(1, rest.lastIndexOf("]"));
82
+ obj[key] = inner ? inner.split(",").map((s) => unquote(s.trim())).filter(Boolean) : [];
83
+ i++;
84
+ } else if (rest === "") {
85
+ // Block: peek ahead
86
+ i++;
87
+ if (i < lines.length) {
88
+ const nextLine = lines[i];
89
+ const nextIndent = nextLine.search(/\S/);
90
+ if (nextIndent > baseIndent && nextLine.trimStart().startsWith("- ")) {
91
+ // Block sequence
92
+ const arr = [];
93
+ while (i < lines.length) {
94
+ const l = lines[i];
95
+ if (l.trim() === "") { i++; continue; }
96
+ const ind = l.search(/\S/);
97
+ if (ind < nextIndent) break;
98
+ if (l.trimStart().startsWith("- ")) {
99
+ const itemText = l.trimStart().slice(2).trim();
100
+ if (itemText.includes(": ") || (i + 1 < lines.length && lines[i + 1].search(/\S/) > ind + 1)) {
101
+ // Object item
102
+ const childLines = [" ".repeat(ind + 2) + itemText];
103
+ i++;
104
+ while (i < lines.length) {
105
+ const cl = lines[i];
106
+ const ci = cl.search(/\S/);
107
+ if (cl.trim() === "" || ci <= ind) break;
108
+ childLines.push(cl);
109
+ i++;
110
+ }
111
+ arr.push(parseYamlLines(childLines, 0, ind + 2).value);
112
+ } else {
113
+ arr.push(unquote(itemText));
114
+ i++;
115
+ }
116
+ } else {
117
+ break;
118
+ }
119
+ }
120
+ obj[key] = arr;
121
+ } else if (nextIndent > baseIndent) {
122
+ // Nested mapping
123
+ const result = parseYamlLines(lines, i, nextIndent);
124
+ obj[key] = result.value;
125
+ i = result.next;
126
+ } else {
127
+ obj[key] = null;
128
+ }
129
+ } else {
130
+ obj[key] = null;
131
+ }
132
+ } else {
133
+ obj[key] = unquote(rest);
134
+ i++;
135
+ }
136
+ }
137
+ return { value: obj, next: i };
138
+ }
139
+
140
+ export function unquote(s) {
141
+ if (s.startsWith('"') && s.endsWith('"')) {
142
+ return s.slice(1, -1).replace(/\\(\\|n|r|")/g, (_, c) => {
143
+ if (c === "\\") return "\\";
144
+ if (c === "n") return "\n";
145
+ if (c === "r") return "\r";
146
+ if (c === '"') return '"';
147
+ return c;
148
+ });
149
+ }
150
+ if (s.startsWith("'") && s.endsWith("'")) {
151
+ return s.slice(1, -1);
152
+ }
153
+ return s;
154
+ }
155
+
156
+ /**
157
+ * Serialize an object to YAML-ish text suitable for frontmatter.
158
+ * Only handles strings, numbers, arrays of primitives, and shallow objects.
159
+ */
160
+ export function serializeYaml(obj, indent = 0) {
161
+ const pad = " ".repeat(indent);
162
+ const lines = [];
163
+ for (const [key, value] of Object.entries(obj)) {
164
+ if (value === undefined || value === null) continue;
165
+ if (Array.isArray(value)) {
166
+ if (value.length === 0) {
167
+ lines.push(`${pad}${key}: []`);
168
+ } else if (value.every((v) => typeof v !== "object")) {
169
+ lines.push(`${pad}${key}: [${value.map(yamlScalar).join(", ")}]`);
170
+ } else {
171
+ lines.push(`${pad}${key}:`);
172
+ for (const item of value) {
173
+ if (typeof item === "object" && item !== null) {
174
+ const entries = Object.entries(item).filter(([, v]) => v !== undefined && v !== null);
175
+ if (entries.length === 0) { lines.push(`${pad} - {}`); continue; }
176
+ const [firstKey, firstVal] = entries[0];
177
+ if (typeof firstVal === "object" && firstVal !== null && !Array.isArray(firstVal)) {
178
+ lines.push(`${pad} - ${firstKey}:`);
179
+ lines.push(serializeYaml(firstVal, indent + 6));
180
+ } else {
181
+ lines.push(`${pad} - ${firstKey}: ${yamlScalar(firstVal)}`);
182
+ }
183
+ for (const [k, v] of entries.slice(1)) {
184
+ if (typeof v === "object" && v !== null && !Array.isArray(v)) {
185
+ lines.push(`${pad} ${k}:`);
186
+ lines.push(serializeYaml(v, indent + 6));
187
+ } else {
188
+ lines.push(`${pad} ${k}: ${yamlScalar(v)}`);
189
+ }
190
+ }
191
+ } else {
192
+ lines.push(`${pad} - ${yamlScalar(item)}`);
193
+ }
194
+ }
195
+ }
196
+ } else if (typeof value === "object") {
197
+ lines.push(`${pad}${key}:`);
198
+ lines.push(serializeYaml(value, indent + 2));
199
+ } else {
200
+ lines.push(`${pad}${key}: ${yamlScalar(value)}`);
201
+ }
202
+ }
203
+ return lines.join("\n");
204
+ }
205
+
206
+ export function yamlScalar(v) {
207
+ if (typeof v === "string") {
208
+ // Quote if it contains special chars or actual newlines/carriage returns
209
+ if (/[:#\[\]{},&*?|<>=!%@`"'\n\r]/.test(v) || v.trim() !== v || v === "") {
210
+ return `"${v.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r")}"`;
211
+ }
212
+ return v;
213
+ }
214
+ return String(v);
215
+ }
216
+
217
+ export function serializeMarkdown(meta, body) {
218
+ return `---\n${serializeYaml(meta)}\n---\n\n${body}`;
219
+ }
220
+
221
+ // ---------------------------------------------------------------------------
222
+ // Wikilink parser / indexer
223
+ // ---------------------------------------------------------------------------
224
+
225
+ const WIKILINK_RE = /\[\[([^\]|]+)(?:\|([^\]]+))?\]\]/g;
226
+
227
+ /**
228
+ * Extract all [[target_id]] and [[target_id|label]] links from body text.
229
+ * Returns Link objects.
230
+ */
231
+ export function extractWikilinks(body) {
232
+ const links = [];
233
+ for (const match of body.matchAll(WIKILINK_RE)) {
234
+ links.push({ target_id: match[1].trim(), kind: "related", label: match[2]?.trim() });
235
+ }
236
+ return links;
237
+ }
238
+
239
+ /**
240
+ * Merge explicit links array with wikilink-derived links.
241
+ * De-duplicates by (target_id, kind); explicit links win on conflict.
242
+ */
243
+ export function mergeLinks(explicit, wikilinks) {
244
+ const key = (l) => `${l.target_id}::${l.kind}`;
245
+ const seen = new Set(explicit.map(key));
246
+ const merged = [...explicit];
247
+ for (const wl of wikilinks) {
248
+ if (!seen.has(key(wl))) {
249
+ merged.push(wl);
250
+ seen.add(key(wl));
251
+ }
252
+ }
253
+ return merged;
254
+ }
255
+
256
+ // ---------------------------------------------------------------------------
257
+ // Graph index
258
+ // ---------------------------------------------------------------------------
259
+
260
+ export const GRAPH_SCHEMA_VERSION = "1.0";
261
+
262
+ export function emptyGraph() {
263
+ return { schema_version: GRAPH_SCHEMA_VERSION, forward: {}, reverse: {} };
264
+ }
265
+
266
+ export function loadGraph(graphPath) {
267
+ if (!fs.existsSync(graphPath)) return emptyGraph();
268
+ try {
269
+ return JSON.parse(fs.readFileSync(graphPath, "utf8"));
270
+ } catch {
271
+ return emptyGraph();
272
+ }
273
+ }
274
+
275
+ export function saveGraph(graphPath, graph) {
276
+ fs.writeFileSync(graphPath, JSON.stringify(graph, null, 2) + "\n", "utf8");
277
+ }
278
+
279
+ export function addLinksToGraph(graph, sourceId, links) {
280
+ if (!graph.forward[sourceId]) graph.forward[sourceId] = [];
281
+ for (const link of links) {
282
+ const { target_id, kind, label } = link;
283
+ // Idempotent: skip if already present
284
+ const exists = graph.forward[sourceId].some(
285
+ (l) => l.target_id === target_id && l.kind === kind
286
+ );
287
+ if (!exists) {
288
+ const entry = { target_id, kind };
289
+ if (label) entry.label = label;
290
+ graph.forward[sourceId].push(entry);
291
+ if (!graph.reverse[target_id]) graph.reverse[target_id] = [];
292
+ graph.reverse[target_id].push({ source_id: sourceId, kind });
293
+ }
294
+ }
295
+ }
296
+
297
+ export function removeLinksFromGraph(graph, sourceId) {
298
+ const oldForward = graph.forward[sourceId] || [];
299
+ for (const link of oldForward) {
300
+ const rev = graph.reverse[link.target_id] || [];
301
+ graph.reverse[link.target_id] = rev.filter((r) => r.source_id !== sourceId);
302
+ if (graph.reverse[link.target_id].length === 0) delete graph.reverse[link.target_id];
303
+ }
304
+ delete graph.forward[sourceId];
305
+ }
306
+
307
+ // ---------------------------------------------------------------------------
308
+ // Validation helpers
309
+ // ---------------------------------------------------------------------------
310
+
311
+ export const VALID_TYPES = new Set(["raw", "compiled", "concept", "snapshot", "person"]);
312
+ export const VALID_STATUSES = new Set(["active", "implemented", "retired"]);
313
+ const CATEGORY_SEGMENT_RE = /^[a-z0-9_-]+$/;
314
+
315
+ // Status transition table: from → allowed targets
316
+ export const VALID_STATUS_TRANSITIONS = {
317
+ active: new Set(["implemented", "retired"]),
318
+ implemented: new Set(["retired"]),
319
+ retired: new Set(), // terminal — no further transitions
320
+ };
321
+
322
+ export function validateCategory(cat) {
323
+ if (!cat || typeof cat !== "string") return false;
324
+ return cat.split(".").every((seg) => CATEGORY_SEGMENT_RE.test(seg));
325
+ }
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Knowledge Kit — Vector Similarity Adapter
3
+ *
4
+ * Provides a drop-in SimilarityDetector implementation backed by dense vector
5
+ * embeddings (cosine similarity) instead of the default category-prefix /
6
+ * link-overlap heuristic.
7
+ *
8
+ * SimilarityDetector interface (from adapters/flow-runner/index.js):
9
+ * async (concept: Record, candidates: Record[], store: KnowledgeStoreAdapter) => string[]
10
+ *
11
+ * Usage:
12
+ * import { createVectorSimilarityDetector } from './adapters/similarity-vector/index.js';
13
+ *
14
+ * // Ollama (default):
15
+ * const detector = createVectorSimilarityDetector();
16
+ *
17
+ * // Ollama with non-default model/host:
18
+ * const detector = createVectorSimilarityDetector({
19
+ * host: 'http://localhost:11434',
20
+ * model: 'nomic-embed-text',
21
+ * threshold: 0.60,
22
+ * });
23
+ *
24
+ * // Injectable embed fn (for tests / custom providers):
25
+ * const detector = createVectorSimilarityDetector({
26
+ * embed: async (texts) => texts.map(() => [0.1, 0.9, 0.0]),
27
+ * threshold: 0.60,
28
+ * });
29
+ *
30
+ * // Pass to synthesize:
31
+ * await runner.synthesize(conceptId, {
32
+ * proposedBody: '...',
33
+ * rationale: '...',
34
+ * similarityDetector: detector,
35
+ * });
36
+ *
37
+ * Zero npm dependencies — uses Node.js built-in fetch (Node >= 18).
38
+ *
39
+ * Fail-closed policy:
40
+ * If the embedding call fails (network error, non-200, malformed response),
41
+ * the detector throws an Error with code="EMBED_FAILURE". This is intentional:
42
+ * silently returning [] would look identical to "no similar records found" and
43
+ * mask infrastructure failures as legitimate empty clusters, blocking synthesis
44
+ * with a misleading MISSING_EVIDENCE rather than a clear infrastructure error.
45
+ *
46
+ * @module adapters/similarity-vector
47
+ */
48
+
49
+ // ---------------------------------------------------------------------------
50
+ // Pure cosine similarity (exported for tests)
51
+ // ---------------------------------------------------------------------------
52
+
53
+ /**
54
+ * Compute the cosine similarity between two equal-length numeric vectors.
55
+ *
56
+ * Returns a value in [-1, 1]:
57
+ * 1.0 — identical direction
58
+ * 0.0 — orthogonal
59
+ * -1.0 — opposite direction
60
+ *
61
+ * Edge cases:
62
+ * - Zero-magnitude vector(s): returns 0 (no similarity signal).
63
+ * - Empty or unequal-length vectors: returns 0.
64
+ *
65
+ * @param {number[]} a
66
+ * @param {number[]} b
67
+ * @returns {number}
68
+ */
69
+ export function cosineSimilarity(a, b) {
70
+ if (!Array.isArray(a) || !Array.isArray(b)) return 0;
71
+ if (a.length !== b.length || a.length === 0) return 0;
72
+
73
+ let dot = 0;
74
+ let magA = 0;
75
+ let magB = 0;
76
+
77
+ for (let i = 0; i < a.length; i++) {
78
+ dot += a[i] * b[i];
79
+ magA += a[i] * a[i];
80
+ magB += b[i] * b[i];
81
+ }
82
+
83
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
84
+ if (denom === 0) return 0;
85
+ return dot / denom;
86
+ }
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // Ollama embed call
90
+ // ---------------------------------------------------------------------------
91
+
92
+ /**
93
+ * Call ollama's /api/embed endpoint.
94
+ *
95
+ * Throws an Error with code="EMBED_FAILURE" on any failure.
96
+ *
97
+ * @param {string} host
98
+ * @param {string} model
99
+ * @param {string[]} texts
100
+ * @returns {Promise<number[][]>}
101
+ */
102
+ async function ollamaEmbed(host, model, texts) {
103
+ const url = `${host}/api/embed`;
104
+ let response;
105
+ try {
106
+ response = await fetch(url, {
107
+ method: "POST",
108
+ headers: { "Content-Type": "application/json" },
109
+ body: JSON.stringify({ model, input: texts }),
110
+ });
111
+ } catch (cause) {
112
+ const err = new Error(
113
+ `EMBED_FAILURE: embedding call to ${url} failed — ${cause.message}`
114
+ );
115
+ err.code = "EMBED_FAILURE";
116
+ err.cause = cause;
117
+ throw err;
118
+ }
119
+
120
+ if (!response.ok) {
121
+ const body = await response.text().catch(() => "(unreadable)");
122
+ const err = new Error(
123
+ `EMBED_FAILURE: embedding call to ${url} returned HTTP ${response.status}: ${body}`
124
+ );
125
+ err.code = "EMBED_FAILURE";
126
+ throw err;
127
+ }
128
+
129
+ let data;
130
+ try {
131
+ data = await response.json();
132
+ } catch (cause) {
133
+ const err = new Error(
134
+ `EMBED_FAILURE: embedding response from ${url} was not valid JSON — ${cause.message}`
135
+ );
136
+ err.code = "EMBED_FAILURE";
137
+ err.cause = cause;
138
+ throw err;
139
+ }
140
+
141
+ // ollama /api/embed returns { embeddings: number[][] }
142
+ if (!data.embeddings || !Array.isArray(data.embeddings)) {
143
+ const err = new Error(
144
+ `EMBED_FAILURE: embedding response missing .embeddings array (got: ${JSON.stringify(Object.keys(data || {}))})`
145
+ );
146
+ err.code = "EMBED_FAILURE";
147
+ throw err;
148
+ }
149
+
150
+ if (data.embeddings.length !== texts.length) {
151
+ const err = new Error(
152
+ `EMBED_FAILURE: expected ${texts.length} embedding(s), got ${data.embeddings.length}`
153
+ );
154
+ err.code = "EMBED_FAILURE";
155
+ throw err;
156
+ }
157
+
158
+ return data.embeddings;
159
+ }
160
+
161
+ // ---------------------------------------------------------------------------
162
+ // createVectorSimilarityDetector
163
+ // ---------------------------------------------------------------------------
164
+
165
+ /**
166
+ * Create a SimilarityDetector backed by dense vector embeddings.
167
+ *
168
+ * The returned detector satisfies the SimilarityDetector interface:
169
+ * async (concept, candidates, store) => string[]
170
+ *
171
+ * @param {object} [options]
172
+ * @param {((texts: string[]) => Promise<number[][]>)} [options.embed]
173
+ * Injectable embedding function. When provided, `host` and `model` are
174
+ * ignored. Signature: async (texts: string[]) => number[][]
175
+ * Must return one vector per input text.
176
+ * @param {string} [options.host="http://localhost:11434"]
177
+ * Ollama server base URL. Used when `embed` is not provided.
178
+ * @param {string} [options.model="nomic-embed-text"]
179
+ * Embedding model name passed to ollama. Used when `embed` is not provided.
180
+ * @param {number} [options.threshold=0.60]
181
+ * Minimum cosine similarity score for a candidate to be included.
182
+ * Range: [-1, 1]. Default 0.60 is calibrated for nomic-embed-text where
183
+ * semantically related texts from the same domain typically score ≥ 0.70
184
+ * and unrelated texts score < 0.50.
185
+ * @param {((record: object) => string)} [options.text]
186
+ * Extractor that converts a store record to the text to embed.
187
+ * Default: `record.title + "\n" + record.body`.
188
+ * @returns {(concept: object, candidates: object[], store: object) => Promise<string[]>}
189
+ */
190
+ export function createVectorSimilarityDetector(options = {}) {
191
+ const {
192
+ embed: injectEmbed = null,
193
+ host = "http://localhost:11434",
194
+ model = "nomic-embed-text",
195
+ threshold = 0.60,
196
+ text: extractText = defaultTextExtractor,
197
+ } = options;
198
+
199
+ // Resolve the actual embed function once (avoid re-resolving on each call)
200
+ const embedFn = injectEmbed
201
+ ? injectEmbed
202
+ : (texts) => ollamaEmbed(host, model, texts);
203
+
204
+ /**
205
+ * SimilarityDetector: returns candidate IDs whose cosine similarity to the
206
+ * concept embedding meets or exceeds `threshold`.
207
+ *
208
+ * Fail-closed: any embedding failure throws EMBED_FAILURE rather than
209
+ * silently returning [].
210
+ *
211
+ * @param {object} concept
212
+ * @param {object[]} candidates
213
+ * @param {object} _store (not used by vector detector; kept for interface compat)
214
+ * @returns {Promise<string[]>}
215
+ */
216
+ async function vectorSimilarityDetector(concept, candidates, _store) {
217
+ if (!candidates || candidates.length === 0) {
218
+ return [];
219
+ }
220
+
221
+ // Exclude retired records from the working set (Addendum B — R3)
222
+ const activeCandidates = candidates.filter(
223
+ (c) => (c.status || "active") !== "retired"
224
+ );
225
+
226
+ if (activeCandidates.length === 0) {
227
+ return [];
228
+ }
229
+
230
+ const conceptText = extractText(concept);
231
+
232
+ // Build the batch: concept first, then all active candidates.
233
+ // One round-trip minimises latency and keeps the batch API simple.
234
+ const allTexts = [conceptText, ...activeCandidates.map(extractText)];
235
+
236
+ // Embedding call — throws EMBED_FAILURE on any infrastructure error.
237
+ const embeddings = await embedFn(allTexts);
238
+
239
+ // Validate count: the embed fn must return one vector per input text.
240
+ // A count mismatch would produce silent wrong results (undefined vectors
241
+ // scoring 0 and being excluded) — throw EMBED_FAILURE instead.
242
+ if (!Array.isArray(embeddings) || embeddings.length !== allTexts.length) {
243
+ const err = new Error(
244
+ `EMBED_FAILURE: embed function returned ${Array.isArray(embeddings) ? embeddings.length : typeof embeddings} vector(s) but expected ${allTexts.length} (1 concept + ${activeCandidates.length} active candidates)`
245
+ );
246
+ err.code = 'EMBED_FAILURE';
247
+ throw err;
248
+ }
249
+
250
+ const conceptVec = embeddings[0];
251
+ const similar = [];
252
+
253
+ for (let i = 0; i < activeCandidates.length; i++) {
254
+ const candidateVec = embeddings[i + 1];
255
+ const score = cosineSimilarity(conceptVec, candidateVec);
256
+ if (score >= threshold) {
257
+ similar.push(activeCandidates[i].id);
258
+ }
259
+ }
260
+
261
+ return similar;
262
+ }
263
+
264
+ return vectorSimilarityDetector;
265
+ }
266
+
267
+ // ---------------------------------------------------------------------------
268
+ // Helpers
269
+ // ---------------------------------------------------------------------------
270
+
271
+ /**
272
+ * Default text extractor: title + newline + body.
273
+ * Gracefully handles missing fields.
274
+ *
275
+ * @param {object} record
276
+ * @returns {string}
277
+ */
278
+ function defaultTextExtractor(record) {
279
+ const title = record?.title || "";
280
+ const body = record?.body || "";
281
+ return `${title}\n${body}`;
282
+ }
283
+
284
+ export default createVectorSimilarityDetector;