@kontourai/flow-agents 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release-please.yml +13 -1
- package/.github/workflows/runtime-compat.yml +1 -1
- package/AGENTS.md +8 -1
- package/CHANGELOG.md +41 -0
- package/README.md +38 -19
- package/build/src/cli/flow-kit.js +9 -4
- package/build/src/cli/runtime-adapter.js +9 -5
- package/build/src/cli/telemetry-doctor.js +4 -1
- package/build/src/runtime-adapters.js +34 -0
- package/build/src/tools/build-universal-bundles.js +18 -1
- package/console.telemetry.json +115 -20
- package/docs/_layouts/default.html +2 -0
- package/docs/index.md +8 -0
- package/docs/integrations/index.md +4 -0
- package/docs/integrations/knowledge-kit-live.md +211 -0
- package/docs/kit-authoring-guide.md +169 -0
- package/docs/spec/runtime-hook-surface.md +56 -3
- package/evals/acceptance/run.sh +10 -1
- package/evals/acceptance/test_knowledge_kit_live.sh +221 -0
- package/evals/acceptance/test_pi_harness.sh +15 -0
- package/evals/integration/test_runtime_adapter_activation.sh +113 -1
- package/evals/static/test_universal_bundles.sh +10 -0
- package/integrations/strands/examples/knowledge_kit_live.py +461 -0
- package/integrations/strands/flow_agents_strands/steering.py +54 -1
- package/integrations/strands/tests/test_hooks.py +88 -0
- package/integrations/strands-ts/src/hooks.ts +104 -0
- package/integrations/strands-ts/test/test-steering.ts +159 -0
- package/kits/catalog.json +6 -0
- package/kits/knowledge/adapters/default-store/index.js +902 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1469 -0
- package/kits/knowledge/adapters/flow-runner/telemetry.js +174 -0
- package/kits/knowledge/adapters/similarity-vector/index.js +284 -0
- package/kits/knowledge/docs/README.md +328 -0
- package/kits/knowledge/docs/store-contract.md +650 -0
- package/kits/knowledge/evals/consolidation/suite.test.js +1234 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +675 -0
- package/kits/knowledge/evals/ingest-compile/suite.test.js +574 -0
- package/kits/knowledge/evals/retirement/suite.test.js +1173 -0
- package/kits/knowledge/evals/similarity-vector/suite.test.js +685 -0
- package/kits/knowledge/evals/synthesis/suite.test.js +916 -0
- package/kits/knowledge/flows/compile.flow.json +60 -0
- package/kits/knowledge/flows/consolidate.flow.json +77 -0
- package/kits/knowledge/flows/ingest.flow.json +60 -0
- package/kits/knowledge/flows/retire.flow.json +77 -0
- package/kits/knowledge/flows/store-contract.flow.json +48 -0
- package/kits/knowledge/flows/synthesize.flow.json +77 -0
- package/kits/knowledge/kit.json +98 -0
- package/package.json +1 -1
- package/src/cli/flow-kit.ts +10 -4
- package/src/cli/runtime-adapter.ts +10 -5
- package/src/cli/telemetry-doctor.ts +4 -1
- package/src/runtime-adapters.ts +35 -0
- package/src/tools/build-universal-bundles.ts +18 -1
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Kit — Telemetry Helper
|
|
3
|
+
*
|
|
4
|
+
* Emits canonical Flow Agents telemetry events (schema v0.3.0) to a JSONL
|
|
5
|
+
* sink file. Matches the event shape produced by telemetry.sh and the Python
|
|
6
|
+
* TelemetrySink in integrations/strands/flow_agents_strands/telemetry.py.
|
|
7
|
+
*
|
|
8
|
+
* Zero runtime dependencies beyond Node.js built-ins.
|
|
9
|
+
* Fails open: telemetry errors never block kit operations.
|
|
10
|
+
*
|
|
11
|
+
* Sink path: <workspace>/.telemetry/full.jsonl
|
|
12
|
+
* The workspace is resolved from FLOW_AGENTS_WORKSPACE env var, falling back
|
|
13
|
+
* to process.cwd().
|
|
14
|
+
*
|
|
15
|
+
* @module adapters/flow-runner/telemetry
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import * as fs from "node:fs";
|
|
19
|
+
import * as path from "node:path";
|
|
20
|
+
import * as crypto from "node:crypto";
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Schema constants
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
const SCHEMA_VERSION = "0.3.0";
|
|
27
|
+
|
|
28
|
+
// Canonical event name → schema event_type (mirrors telemetry.sh schema_event_type())
|
|
29
|
+
const CANONICAL_TO_SCHEMA = {
|
|
30
|
+
agentSpawn: "session.start",
|
|
31
|
+
userPromptSubmit: "turn.user",
|
|
32
|
+
preToolUse: "tool.invoke",
|
|
33
|
+
permissionRequest: "tool.permission_request",
|
|
34
|
+
postToolUse: "tool.result",
|
|
35
|
+
stop: "session.end",
|
|
36
|
+
subagentStart: "agent.delegate",
|
|
37
|
+
subagentStop: "agent.delegate",
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
function schemaEventType(canonical) {
|
|
41
|
+
return CANONICAL_TO_SCHEMA[canonical] || "unknown";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Sink resolution
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
function resolveSinkPath(workspace) {
|
|
49
|
+
const ws = workspace || process.env.FLOW_AGENTS_WORKSPACE || process.cwd();
|
|
50
|
+
return path.join(ws, ".telemetry", "full.jsonl");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// KnowledgeTelemetry
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Thin telemetry sink for the Knowledge Kit flow runner.
|
|
59
|
+
*
|
|
60
|
+
* Usage:
|
|
61
|
+
* const tel = new KnowledgeTelemetry({ workspace: "/path/to/workspace" });
|
|
62
|
+
* tel.emitGate("knowledge.ingest", "classify-gate", { category: "research", record_id: id });
|
|
63
|
+
*/
|
|
64
|
+
export class KnowledgeTelemetry {
|
|
65
|
+
/**
|
|
66
|
+
* @param {{ workspace?: string, agentName?: string, sessionId?: string }} options
|
|
67
|
+
*/
|
|
68
|
+
constructor({ workspace, agentName, sessionId } = {}) {
|
|
69
|
+
this._sinkPath = resolveSinkPath(workspace);
|
|
70
|
+
this._agentName = agentName || "knowledge-kit";
|
|
71
|
+
this._sessionId = sessionId || crypto.randomUUID();
|
|
72
|
+
|
|
73
|
+
// Ensure the sink directory exists; fail open on error
|
|
74
|
+
try {
|
|
75
|
+
fs.mkdirSync(path.dirname(this._sinkPath), { recursive: true });
|
|
76
|
+
} catch {
|
|
77
|
+
// fail open
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// -------------------------------------------------------------------------
|
|
82
|
+
// Core emit
|
|
83
|
+
// -------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Build and append a canonical telemetry event to the JSONL sink.
|
|
87
|
+
* Returns the emitted event object (useful for tests).
|
|
88
|
+
* Fails open: never throws.
|
|
89
|
+
*
|
|
90
|
+
* @param {string} canonicalEvent - canonical event name (e.g. "preToolUse")
|
|
91
|
+
* @param {object} [extra] - additional fields merged into the event
|
|
92
|
+
* @returns {object} the emitted event
|
|
93
|
+
*/
|
|
94
|
+
emit(canonicalEvent, extra) {
|
|
95
|
+
const schemaType = schemaEventType(canonicalEvent);
|
|
96
|
+
const event = {
|
|
97
|
+
schema_version: SCHEMA_VERSION,
|
|
98
|
+
timestamp: String(Date.now()),
|
|
99
|
+
session_id: this._sessionId,
|
|
100
|
+
event_id: crypto.randomUUID(),
|
|
101
|
+
event_type: schemaType,
|
|
102
|
+
agent: {
|
|
103
|
+
name: this._agentName,
|
|
104
|
+
runtime: "knowledge-kit",
|
|
105
|
+
version: "unknown",
|
|
106
|
+
},
|
|
107
|
+
hook: {
|
|
108
|
+
event_name: canonicalEvent,
|
|
109
|
+
runtime_session_id: "",
|
|
110
|
+
turn_id: "",
|
|
111
|
+
transcript_path: "",
|
|
112
|
+
model: "",
|
|
113
|
+
source: "knowledge-kit",
|
|
114
|
+
stop_hook_active: null,
|
|
115
|
+
last_assistant_message: "",
|
|
116
|
+
raw_input: null,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
if (extra && typeof extra === "object") {
|
|
121
|
+
Object.assign(event, extra);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
fs.appendFileSync(this._sinkPath, JSON.stringify(event) + "\n", "utf8");
|
|
126
|
+
} catch {
|
|
127
|
+
// fail open — telemetry must never block kit operations
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return event;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// -------------------------------------------------------------------------
|
|
134
|
+
// Semantic helpers
|
|
135
|
+
// -------------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Emit a gate event. Used at each flow gate point in the runner.
|
|
139
|
+
*
|
|
140
|
+
* @param {string} flowId - e.g. "knowledge.ingest"
|
|
141
|
+
* @param {string} gateId - e.g. "classify-gate"
|
|
142
|
+
* @param {object} [context] - gate-specific context payload
|
|
143
|
+
* @returns {object} the emitted event
|
|
144
|
+
*/
|
|
145
|
+
emitGate(flowId, gateId, context) {
|
|
146
|
+
return this.emit("preToolUse", {
|
|
147
|
+
tool: {
|
|
148
|
+
name: `${flowId}.${gateId}`,
|
|
149
|
+
normalized_name: "flow.gate",
|
|
150
|
+
input: context || null,
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Emit a gate-result event.
|
|
157
|
+
*
|
|
158
|
+
* @param {string} flowId - e.g. "knowledge.ingest"
|
|
159
|
+
* @param {string} gateId - e.g. "classify-gate"
|
|
160
|
+
* @param {object} [result] - gate result payload
|
|
161
|
+
* @returns {object} the emitted event
|
|
162
|
+
*/
|
|
163
|
+
emitGateResult(flowId, gateId, result) {
|
|
164
|
+
return this.emit("postToolUse", {
|
|
165
|
+
tool: {
|
|
166
|
+
name: `${flowId}.${gateId}`,
|
|
167
|
+
normalized_name: "flow.gate",
|
|
168
|
+
output: result || null,
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export default KnowledgeTelemetry;
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Kit — Vector Similarity Adapter
|
|
3
|
+
*
|
|
4
|
+
* Provides a drop-in SimilarityDetector implementation backed by dense vector
|
|
5
|
+
* embeddings (cosine similarity) instead of the default category-prefix /
|
|
6
|
+
* link-overlap heuristic.
|
|
7
|
+
*
|
|
8
|
+
* SimilarityDetector interface (from adapters/flow-runner/index.js):
|
|
9
|
+
* async (concept: Record, candidates: Record[], store: KnowledgeStoreAdapter) => string[]
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* import { createVectorSimilarityDetector } from './adapters/similarity-vector/index.js';
|
|
13
|
+
*
|
|
14
|
+
* // Ollama (default):
|
|
15
|
+
* const detector = createVectorSimilarityDetector();
|
|
16
|
+
*
|
|
17
|
+
* // Ollama with non-default model/host:
|
|
18
|
+
* const detector = createVectorSimilarityDetector({
|
|
19
|
+
* host: 'http://localhost:11434',
|
|
20
|
+
* model: 'nomic-embed-text',
|
|
21
|
+
* threshold: 0.60,
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Injectable embed fn (for tests / custom providers):
|
|
25
|
+
* const detector = createVectorSimilarityDetector({
|
|
26
|
+
* embed: async (texts) => texts.map(() => [0.1, 0.9, 0.0]),
|
|
27
|
+
* threshold: 0.60,
|
|
28
|
+
* });
|
|
29
|
+
*
|
|
30
|
+
* // Pass to synthesize:
|
|
31
|
+
* await runner.synthesize(conceptId, {
|
|
32
|
+
* proposedBody: '...',
|
|
33
|
+
* rationale: '...',
|
|
34
|
+
* similarityDetector: detector,
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* Zero npm dependencies — uses Node.js built-in fetch (Node >= 18).
|
|
38
|
+
*
|
|
39
|
+
* Fail-closed policy:
|
|
40
|
+
* If the embedding call fails (network error, non-200, malformed response),
|
|
41
|
+
* the detector throws an Error with code="EMBED_FAILURE". This is intentional:
|
|
42
|
+
* silently returning [] would look identical to "no similar records found" and
|
|
43
|
+
* mask infrastructure failures as legitimate empty clusters, blocking synthesis
|
|
44
|
+
* with a misleading MISSING_EVIDENCE rather than a clear infrastructure error.
|
|
45
|
+
*
|
|
46
|
+
* @module adapters/similarity-vector
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Pure cosine similarity (exported for tests)
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compute the cosine similarity between two equal-length numeric vectors.
|
|
55
|
+
*
|
|
56
|
+
* Returns a value in [-1, 1]:
|
|
57
|
+
* 1.0 — identical direction
|
|
58
|
+
* 0.0 — orthogonal
|
|
59
|
+
* -1.0 — opposite direction
|
|
60
|
+
*
|
|
61
|
+
* Edge cases:
|
|
62
|
+
* - Zero-magnitude vector(s): returns 0 (no similarity signal).
|
|
63
|
+
* - Empty or unequal-length vectors: returns 0.
|
|
64
|
+
*
|
|
65
|
+
* @param {number[]} a
|
|
66
|
+
* @param {number[]} b
|
|
67
|
+
* @returns {number}
|
|
68
|
+
*/
|
|
69
|
+
export function cosineSimilarity(a, b) {
|
|
70
|
+
if (!Array.isArray(a) || !Array.isArray(b)) return 0;
|
|
71
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
72
|
+
|
|
73
|
+
let dot = 0;
|
|
74
|
+
let magA = 0;
|
|
75
|
+
let magB = 0;
|
|
76
|
+
|
|
77
|
+
for (let i = 0; i < a.length; i++) {
|
|
78
|
+
dot += a[i] * b[i];
|
|
79
|
+
magA += a[i] * a[i];
|
|
80
|
+
magB += b[i] * b[i];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
84
|
+
if (denom === 0) return 0;
|
|
85
|
+
return dot / denom;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Ollama embed call
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Call ollama's /api/embed endpoint.
|
|
94
|
+
*
|
|
95
|
+
* Throws an Error with code="EMBED_FAILURE" on any failure.
|
|
96
|
+
*
|
|
97
|
+
* @param {string} host
|
|
98
|
+
* @param {string} model
|
|
99
|
+
* @param {string[]} texts
|
|
100
|
+
* @returns {Promise<number[][]>}
|
|
101
|
+
*/
|
|
102
|
+
async function ollamaEmbed(host, model, texts) {
|
|
103
|
+
const url = `${host}/api/embed`;
|
|
104
|
+
let response;
|
|
105
|
+
try {
|
|
106
|
+
response = await fetch(url, {
|
|
107
|
+
method: "POST",
|
|
108
|
+
headers: { "Content-Type": "application/json" },
|
|
109
|
+
body: JSON.stringify({ model, input: texts }),
|
|
110
|
+
});
|
|
111
|
+
} catch (cause) {
|
|
112
|
+
const err = new Error(
|
|
113
|
+
`EMBED_FAILURE: embedding call to ${url} failed — ${cause.message}`
|
|
114
|
+
);
|
|
115
|
+
err.code = "EMBED_FAILURE";
|
|
116
|
+
err.cause = cause;
|
|
117
|
+
throw err;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
const body = await response.text().catch(() => "(unreadable)");
|
|
122
|
+
const err = new Error(
|
|
123
|
+
`EMBED_FAILURE: embedding call to ${url} returned HTTP ${response.status}: ${body}`
|
|
124
|
+
);
|
|
125
|
+
err.code = "EMBED_FAILURE";
|
|
126
|
+
throw err;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let data;
|
|
130
|
+
try {
|
|
131
|
+
data = await response.json();
|
|
132
|
+
} catch (cause) {
|
|
133
|
+
const err = new Error(
|
|
134
|
+
`EMBED_FAILURE: embedding response from ${url} was not valid JSON — ${cause.message}`
|
|
135
|
+
);
|
|
136
|
+
err.code = "EMBED_FAILURE";
|
|
137
|
+
err.cause = cause;
|
|
138
|
+
throw err;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ollama /api/embed returns { embeddings: number[][] }
|
|
142
|
+
if (!data.embeddings || !Array.isArray(data.embeddings)) {
|
|
143
|
+
const err = new Error(
|
|
144
|
+
`EMBED_FAILURE: embedding response missing .embeddings array (got: ${JSON.stringify(Object.keys(data || {}))})`
|
|
145
|
+
);
|
|
146
|
+
err.code = "EMBED_FAILURE";
|
|
147
|
+
throw err;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (data.embeddings.length !== texts.length) {
|
|
151
|
+
const err = new Error(
|
|
152
|
+
`EMBED_FAILURE: expected ${texts.length} embedding(s), got ${data.embeddings.length}`
|
|
153
|
+
);
|
|
154
|
+
err.code = "EMBED_FAILURE";
|
|
155
|
+
throw err;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return data.embeddings;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ---------------------------------------------------------------------------
|
|
162
|
+
// createVectorSimilarityDetector
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Create a SimilarityDetector backed by dense vector embeddings.
|
|
167
|
+
*
|
|
168
|
+
* The returned detector satisfies the SimilarityDetector interface:
|
|
169
|
+
* async (concept, candidates, store) => string[]
|
|
170
|
+
*
|
|
171
|
+
* @param {object} [options]
|
|
172
|
+
* @param {((texts: string[]) => Promise<number[][]>)} [options.embed]
|
|
173
|
+
* Injectable embedding function. When provided, `host` and `model` are
|
|
174
|
+
* ignored. Signature: async (texts: string[]) => number[][]
|
|
175
|
+
* Must return one vector per input text.
|
|
176
|
+
* @param {string} [options.host="http://localhost:11434"]
|
|
177
|
+
* Ollama server base URL. Used when `embed` is not provided.
|
|
178
|
+
* @param {string} [options.model="nomic-embed-text"]
|
|
179
|
+
* Embedding model name passed to ollama. Used when `embed` is not provided.
|
|
180
|
+
* @param {number} [options.threshold=0.60]
|
|
181
|
+
* Minimum cosine similarity score for a candidate to be included.
|
|
182
|
+
* Range: [-1, 1]. Default 0.60 is calibrated for nomic-embed-text where
|
|
183
|
+
* semantically related texts from the same domain typically score ≥ 0.70
|
|
184
|
+
* and unrelated texts score < 0.50.
|
|
185
|
+
* @param {((record: object) => string)} [options.text]
|
|
186
|
+
* Extractor that converts a store record to the text to embed.
|
|
187
|
+
* Default: `record.title + "\n" + record.body`.
|
|
188
|
+
* @returns {(concept: object, candidates: object[], store: object) => Promise<string[]>}
|
|
189
|
+
*/
|
|
190
|
+
export function createVectorSimilarityDetector(options = {}) {
|
|
191
|
+
const {
|
|
192
|
+
embed: injectEmbed = null,
|
|
193
|
+
host = "http://localhost:11434",
|
|
194
|
+
model = "nomic-embed-text",
|
|
195
|
+
threshold = 0.60,
|
|
196
|
+
text: extractText = defaultTextExtractor,
|
|
197
|
+
} = options;
|
|
198
|
+
|
|
199
|
+
// Resolve the actual embed function once (avoid re-resolving on each call)
|
|
200
|
+
const embedFn = injectEmbed
|
|
201
|
+
? injectEmbed
|
|
202
|
+
: (texts) => ollamaEmbed(host, model, texts);
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* SimilarityDetector: returns candidate IDs whose cosine similarity to the
|
|
206
|
+
* concept embedding meets or exceeds `threshold`.
|
|
207
|
+
*
|
|
208
|
+
* Fail-closed: any embedding failure throws EMBED_FAILURE rather than
|
|
209
|
+
* silently returning [].
|
|
210
|
+
*
|
|
211
|
+
* @param {object} concept
|
|
212
|
+
* @param {object[]} candidates
|
|
213
|
+
* @param {object} _store (not used by vector detector; kept for interface compat)
|
|
214
|
+
* @returns {Promise<string[]>}
|
|
215
|
+
*/
|
|
216
|
+
async function vectorSimilarityDetector(concept, candidates, _store) {
|
|
217
|
+
if (!candidates || candidates.length === 0) {
|
|
218
|
+
return [];
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Exclude retired records from the working set (Addendum B — R3)
|
|
222
|
+
const activeCandidates = candidates.filter(
|
|
223
|
+
(c) => (c.status || "active") !== "retired"
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
if (activeCandidates.length === 0) {
|
|
227
|
+
return [];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const conceptText = extractText(concept);
|
|
231
|
+
|
|
232
|
+
// Build the batch: concept first, then all active candidates.
|
|
233
|
+
// One round-trip minimises latency and keeps the batch API simple.
|
|
234
|
+
const allTexts = [conceptText, ...activeCandidates.map(extractText)];
|
|
235
|
+
|
|
236
|
+
// Embedding call — throws EMBED_FAILURE on any infrastructure error.
|
|
237
|
+
const embeddings = await embedFn(allTexts);
|
|
238
|
+
|
|
239
|
+
// Validate count: the embed fn must return one vector per input text.
|
|
240
|
+
// A count mismatch would produce silent wrong results (undefined vectors
|
|
241
|
+
// scoring 0 and being excluded) — throw EMBED_FAILURE instead.
|
|
242
|
+
if (!Array.isArray(embeddings) || embeddings.length !== allTexts.length) {
|
|
243
|
+
const err = new Error(
|
|
244
|
+
`EMBED_FAILURE: embed function returned ${Array.isArray(embeddings) ? embeddings.length : typeof embeddings} vector(s) but expected ${allTexts.length} (1 concept + ${activeCandidates.length} active candidates)`
|
|
245
|
+
);
|
|
246
|
+
err.code = 'EMBED_FAILURE';
|
|
247
|
+
throw err;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const conceptVec = embeddings[0];
|
|
251
|
+
const similar = [];
|
|
252
|
+
|
|
253
|
+
for (let i = 0; i < activeCandidates.length; i++) {
|
|
254
|
+
const candidateVec = embeddings[i + 1];
|
|
255
|
+
const score = cosineSimilarity(conceptVec, candidateVec);
|
|
256
|
+
if (score >= threshold) {
|
|
257
|
+
similar.push(activeCandidates[i].id);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return similar;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return vectorSimilarityDetector;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
// Helpers
|
|
269
|
+
// ---------------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Default text extractor: title + newline + body.
|
|
273
|
+
* Gracefully handles missing fields.
|
|
274
|
+
*
|
|
275
|
+
* @param {object} record
|
|
276
|
+
* @returns {string}
|
|
277
|
+
*/
|
|
278
|
+
function defaultTextExtractor(record) {
|
|
279
|
+
const title = record?.title || "";
|
|
280
|
+
const body = record?.body || "";
|
|
281
|
+
return `${title}\n${body}`;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
export default createVectorSimilarityDetector;
|