@kontourai/flow-agents 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release-please.yml +13 -1
- package/AGENTS.md +8 -1
- package/CHANGELOG.md +18 -0
- package/evals/static/test_universal_bundles.sh +10 -0
- package/kits/knowledge/adapters/default-store/index.js +93 -12
- package/kits/knowledge/adapters/flow-runner/index.js +290 -0
- package/kits/knowledge/adapters/similarity-vector/index.js +284 -0
- package/kits/knowledge/docs/README.md +193 -0
- package/kits/knowledge/docs/store-contract.md +124 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +10 -5
- package/kits/knowledge/evals/retirement/suite.test.js +1173 -0
- package/kits/knowledge/evals/similarity-vector/suite.test.js +685 -0
- package/kits/knowledge/evals/synthesis/suite.test.js +10 -3
- package/kits/knowledge/flows/retire.flow.json +77 -0
- package/kits/knowledge/kit.json +21 -1
- package/package.json +1 -1
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Kit — Vector Similarity Adapter
|
|
3
|
+
*
|
|
4
|
+
* Provides a drop-in SimilarityDetector implementation backed by dense vector
|
|
5
|
+
* embeddings (cosine similarity) instead of the default category-prefix /
|
|
6
|
+
* link-overlap heuristic.
|
|
7
|
+
*
|
|
8
|
+
* SimilarityDetector interface (from adapters/flow-runner/index.js):
|
|
9
|
+
* async (concept: Record, candidates: Record[], store: KnowledgeStoreAdapter) => string[]
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* import { createVectorSimilarityDetector } from './adapters/similarity-vector/index.js';
|
|
13
|
+
*
|
|
14
|
+
* // Ollama (default):
|
|
15
|
+
* const detector = createVectorSimilarityDetector();
|
|
16
|
+
*
|
|
17
|
+
* // Ollama with non-default model/host:
|
|
18
|
+
* const detector = createVectorSimilarityDetector({
|
|
19
|
+
* host: 'http://localhost:11434',
|
|
20
|
+
* model: 'nomic-embed-text',
|
|
21
|
+
* threshold: 0.60,
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* // Injectable embed fn (for tests / custom providers):
|
|
25
|
+
* const detector = createVectorSimilarityDetector({
|
|
26
|
+
* embed: async (texts) => texts.map(() => [0.1, 0.9, 0.0]),
|
|
27
|
+
* threshold: 0.60,
|
|
28
|
+
* });
|
|
29
|
+
*
|
|
30
|
+
* // Pass to synthesize:
|
|
31
|
+
* await runner.synthesize(conceptId, {
|
|
32
|
+
* proposedBody: '...',
|
|
33
|
+
* rationale: '...',
|
|
34
|
+
* similarityDetector: detector,
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* Zero npm dependencies — uses Node.js built-in fetch (Node >= 18).
|
|
38
|
+
*
|
|
39
|
+
* Fail-closed policy:
|
|
40
|
+
* If the embedding call fails (network error, non-200, malformed response),
|
|
41
|
+
* the detector throws an Error with code="EMBED_FAILURE". This is intentional:
|
|
42
|
+
* silently returning [] would look identical to "no similar records found" and
|
|
43
|
+
* mask infrastructure failures as legitimate empty clusters, blocking synthesis
|
|
44
|
+
* with a misleading MISSING_EVIDENCE rather than a clear infrastructure error.
|
|
45
|
+
*
|
|
46
|
+
* @module adapters/similarity-vector
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Pure cosine similarity (exported for tests)
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compute the cosine similarity between two equal-length numeric vectors.
|
|
55
|
+
*
|
|
56
|
+
* Returns a value in [-1, 1]:
|
|
57
|
+
* 1.0 — identical direction
|
|
58
|
+
* 0.0 — orthogonal
|
|
59
|
+
* -1.0 — opposite direction
|
|
60
|
+
*
|
|
61
|
+
* Edge cases:
|
|
62
|
+
* - Zero-magnitude vector(s): returns 0 (no similarity signal).
|
|
63
|
+
* - Empty or unequal-length vectors: returns 0.
|
|
64
|
+
*
|
|
65
|
+
* @param {number[]} a
|
|
66
|
+
* @param {number[]} b
|
|
67
|
+
* @returns {number}
|
|
68
|
+
*/
|
|
69
|
+
export function cosineSimilarity(a, b) {
|
|
70
|
+
if (!Array.isArray(a) || !Array.isArray(b)) return 0;
|
|
71
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
72
|
+
|
|
73
|
+
let dot = 0;
|
|
74
|
+
let magA = 0;
|
|
75
|
+
let magB = 0;
|
|
76
|
+
|
|
77
|
+
for (let i = 0; i < a.length; i++) {
|
|
78
|
+
dot += a[i] * b[i];
|
|
79
|
+
magA += a[i] * a[i];
|
|
80
|
+
magB += b[i] * b[i];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
84
|
+
if (denom === 0) return 0;
|
|
85
|
+
return dot / denom;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Ollama embed call
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Call ollama's /api/embed endpoint.
|
|
94
|
+
*
|
|
95
|
+
* Throws an Error with code="EMBED_FAILURE" on any failure.
|
|
96
|
+
*
|
|
97
|
+
* @param {string} host
|
|
98
|
+
* @param {string} model
|
|
99
|
+
* @param {string[]} texts
|
|
100
|
+
* @returns {Promise<number[][]>}
|
|
101
|
+
*/
|
|
102
|
+
async function ollamaEmbed(host, model, texts) {
|
|
103
|
+
const url = `${host}/api/embed`;
|
|
104
|
+
let response;
|
|
105
|
+
try {
|
|
106
|
+
response = await fetch(url, {
|
|
107
|
+
method: "POST",
|
|
108
|
+
headers: { "Content-Type": "application/json" },
|
|
109
|
+
body: JSON.stringify({ model, input: texts }),
|
|
110
|
+
});
|
|
111
|
+
} catch (cause) {
|
|
112
|
+
const err = new Error(
|
|
113
|
+
`EMBED_FAILURE: embedding call to ${url} failed — ${cause.message}`
|
|
114
|
+
);
|
|
115
|
+
err.code = "EMBED_FAILURE";
|
|
116
|
+
err.cause = cause;
|
|
117
|
+
throw err;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (!response.ok) {
|
|
121
|
+
const body = await response.text().catch(() => "(unreadable)");
|
|
122
|
+
const err = new Error(
|
|
123
|
+
`EMBED_FAILURE: embedding call to ${url} returned HTTP ${response.status}: ${body}`
|
|
124
|
+
);
|
|
125
|
+
err.code = "EMBED_FAILURE";
|
|
126
|
+
throw err;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let data;
|
|
130
|
+
try {
|
|
131
|
+
data = await response.json();
|
|
132
|
+
} catch (cause) {
|
|
133
|
+
const err = new Error(
|
|
134
|
+
`EMBED_FAILURE: embedding response from ${url} was not valid JSON — ${cause.message}`
|
|
135
|
+
);
|
|
136
|
+
err.code = "EMBED_FAILURE";
|
|
137
|
+
err.cause = cause;
|
|
138
|
+
throw err;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ollama /api/embed returns { embeddings: number[][] }
|
|
142
|
+
if (!data.embeddings || !Array.isArray(data.embeddings)) {
|
|
143
|
+
const err = new Error(
|
|
144
|
+
`EMBED_FAILURE: embedding response missing .embeddings array (got: ${JSON.stringify(Object.keys(data || {}))})`
|
|
145
|
+
);
|
|
146
|
+
err.code = "EMBED_FAILURE";
|
|
147
|
+
throw err;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (data.embeddings.length !== texts.length) {
|
|
151
|
+
const err = new Error(
|
|
152
|
+
`EMBED_FAILURE: expected ${texts.length} embedding(s), got ${data.embeddings.length}`
|
|
153
|
+
);
|
|
154
|
+
err.code = "EMBED_FAILURE";
|
|
155
|
+
throw err;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return data.embeddings;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ---------------------------------------------------------------------------
|
|
162
|
+
// createVectorSimilarityDetector
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Create a SimilarityDetector backed by dense vector embeddings.
|
|
167
|
+
*
|
|
168
|
+
* The returned detector satisfies the SimilarityDetector interface:
|
|
169
|
+
* async (concept, candidates, store) => string[]
|
|
170
|
+
*
|
|
171
|
+
* @param {object} [options]
|
|
172
|
+
* @param {((texts: string[]) => Promise<number[][]>)} [options.embed]
|
|
173
|
+
* Injectable embedding function. When provided, `host` and `model` are
|
|
174
|
+
* ignored. Signature: async (texts: string[]) => number[][]
|
|
175
|
+
* Must return one vector per input text.
|
|
176
|
+
* @param {string} [options.host="http://localhost:11434"]
|
|
177
|
+
* Ollama server base URL. Used when `embed` is not provided.
|
|
178
|
+
* @param {string} [options.model="nomic-embed-text"]
|
|
179
|
+
* Embedding model name passed to ollama. Used when `embed` is not provided.
|
|
180
|
+
* @param {number} [options.threshold=0.60]
|
|
181
|
+
* Minimum cosine similarity score for a candidate to be included.
|
|
182
|
+
* Range: [-1, 1]. Default 0.60 is calibrated for nomic-embed-text where
|
|
183
|
+
* semantically related texts from the same domain typically score ≥ 0.70
|
|
184
|
+
* and unrelated texts score < 0.50.
|
|
185
|
+
* @param {((record: object) => string)} [options.text]
|
|
186
|
+
* Extractor that converts a store record to the text to embed.
|
|
187
|
+
* Default: `record.title + "\n" + record.body`.
|
|
188
|
+
* @returns {(concept: object, candidates: object[], store: object) => Promise<string[]>}
|
|
189
|
+
*/
|
|
190
|
+
export function createVectorSimilarityDetector(options = {}) {
|
|
191
|
+
const {
|
|
192
|
+
embed: injectEmbed = null,
|
|
193
|
+
host = "http://localhost:11434",
|
|
194
|
+
model = "nomic-embed-text",
|
|
195
|
+
threshold = 0.60,
|
|
196
|
+
text: extractText = defaultTextExtractor,
|
|
197
|
+
} = options;
|
|
198
|
+
|
|
199
|
+
// Resolve the actual embed function once (avoid re-resolving on each call)
|
|
200
|
+
const embedFn = injectEmbed
|
|
201
|
+
? injectEmbed
|
|
202
|
+
: (texts) => ollamaEmbed(host, model, texts);
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* SimilarityDetector: returns candidate IDs whose cosine similarity to the
|
|
206
|
+
* concept embedding meets or exceeds `threshold`.
|
|
207
|
+
*
|
|
208
|
+
* Fail-closed: any embedding failure throws EMBED_FAILURE rather than
|
|
209
|
+
* silently returning [].
|
|
210
|
+
*
|
|
211
|
+
* @param {object} concept
|
|
212
|
+
* @param {object[]} candidates
|
|
213
|
+
* @param {object} _store (not used by vector detector; kept for interface compat)
|
|
214
|
+
* @returns {Promise<string[]>}
|
|
215
|
+
*/
|
|
216
|
+
async function vectorSimilarityDetector(concept, candidates, _store) {
|
|
217
|
+
if (!candidates || candidates.length === 0) {
|
|
218
|
+
return [];
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Exclude retired records from the working set (Addendum B — R3)
|
|
222
|
+
const activeCandidates = candidates.filter(
|
|
223
|
+
(c) => (c.status || "active") !== "retired"
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
if (activeCandidates.length === 0) {
|
|
227
|
+
return [];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const conceptText = extractText(concept);
|
|
231
|
+
|
|
232
|
+
// Build the batch: concept first, then all active candidates.
|
|
233
|
+
// One round-trip minimises latency and keeps the batch API simple.
|
|
234
|
+
const allTexts = [conceptText, ...activeCandidates.map(extractText)];
|
|
235
|
+
|
|
236
|
+
// Embedding call — throws EMBED_FAILURE on any infrastructure error.
|
|
237
|
+
const embeddings = await embedFn(allTexts);
|
|
238
|
+
|
|
239
|
+
// Validate count: the embed fn must return one vector per input text.
|
|
240
|
+
// A count mismatch would produce silent wrong results (undefined vectors
|
|
241
|
+
// scoring 0 and being excluded) — throw EMBED_FAILURE instead.
|
|
242
|
+
if (!Array.isArray(embeddings) || embeddings.length !== allTexts.length) {
|
|
243
|
+
const err = new Error(
|
|
244
|
+
`EMBED_FAILURE: embed function returned ${Array.isArray(embeddings) ? embeddings.length : typeof embeddings} vector(s) but expected ${allTexts.length} (1 concept + ${activeCandidates.length} active candidates)`
|
|
245
|
+
);
|
|
246
|
+
err.code = 'EMBED_FAILURE';
|
|
247
|
+
throw err;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const conceptVec = embeddings[0];
|
|
251
|
+
const similar = [];
|
|
252
|
+
|
|
253
|
+
for (let i = 0; i < activeCandidates.length; i++) {
|
|
254
|
+
const candidateVec = embeddings[i + 1];
|
|
255
|
+
const score = cosineSimilarity(conceptVec, candidateVec);
|
|
256
|
+
if (score >= threshold) {
|
|
257
|
+
similar.push(activeCandidates[i].id);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return similar;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return vectorSimilarityDetector;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
// Helpers
|
|
269
|
+
// ---------------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Default text extractor: title + newline + body.
|
|
273
|
+
* Gracefully handles missing fields.
|
|
274
|
+
*
|
|
275
|
+
* @param {object} record
|
|
276
|
+
* @returns {string}
|
|
277
|
+
*/
|
|
278
|
+
function defaultTextExtractor(record) {
|
|
279
|
+
const title = record?.title || "";
|
|
280
|
+
const body = record?.body || "";
|
|
281
|
+
return `${title}\n${body}`;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
export default createVectorSimilarityDetector;
|
|
@@ -24,6 +24,15 @@ See [`store-contract.md`](store-contract.md) for the full specification. Quick r
|
|
|
24
24
|
| `raw` | Unprocessed source material — excerpts, transcripts, URLs with notes. |
|
|
25
25
|
| `compiled` | Normalized, editor-reviewed distillations of raw records. |
|
|
26
26
|
| `concept` | Named ideas or principles that other records reference. |
|
|
27
|
+
| `snapshot` | Bounded decision summary for a topic (Addendum A). |
|
|
28
|
+
|
|
29
|
+
**Record status lifecycle**
|
|
30
|
+
|
|
31
|
+
| Status | Meaning | Default |
|
|
32
|
+
|---|---|---|
|
|
33
|
+
| `active` | Live, part of the working set. | Yes (records without status field are treated as active). |
|
|
34
|
+
| `implemented` | Decision was shipped; transitional state before archival. | No |
|
|
35
|
+
| `retired` | Excluded from default working-set queries; history preserved. | No |
|
|
27
36
|
|
|
28
37
|
**Mutation operations**
|
|
29
38
|
|
|
@@ -35,6 +44,8 @@ See [`store-contract.md`](store-contract.md) for the full specification. Quick r
|
|
|
35
44
|
| `propose` | `agent`, `proposal` (non-empty) |
|
|
36
45
|
| `apply` | `agent`, `new_body` (non-empty), `rationale` (non-empty) |
|
|
37
46
|
| `reject` | `agent`, `reason` (non-empty) |
|
|
47
|
+
| `supersede` | `agent`, `rationale` (non-empty), non-empty `supersededIds` array |
|
|
48
|
+
| `retire` | `agent`, `rationale` (non-empty), `implementedByRef` (when `targetStatus="implemented"`) |
|
|
38
49
|
|
|
39
50
|
Every mutation throws with `error.code === "MISSING_EVIDENCE"` when required evidence is absent.
|
|
40
51
|
|
|
@@ -127,9 +138,191 @@ and adapter infrastructure remain the foundation.
|
|
|
127
138
|
|
|
128
139
|
---
|
|
129
140
|
|
|
141
|
+
## Decision Lifecycle — Retiring Records (S7)
|
|
142
|
+
|
|
143
|
+
Implemented or obsolete records can be retired from the working set via the `knowledge.retire`
|
|
144
|
+
flow. Retirement is **non-destructive**: the record body, links, and creation provenance remain
|
|
145
|
+
intact; the record is simply excluded from the default working set.
|
|
146
|
+
|
|
147
|
+
### Status transitions
|
|
148
|
+
|
|
149
|
+
| From | To | Evidence required |
|
|
150
|
+
|---|---|---|
|
|
151
|
+
| `active` | `implemented` | `rationale` (non-empty) + `implementedByRef` (non-empty ref to implementing artifact) |
|
|
152
|
+
| `active` | `retired` | `rationale` (non-empty) |
|
|
153
|
+
| `implemented` | `retired` | `rationale` (non-empty) |
|
|
154
|
+
| `retired` | *(any)* | Invalid — `retired` is terminal |
|
|
155
|
+
|
|
156
|
+
### Working-set exclusion
|
|
157
|
+
|
|
158
|
+
Retired records are excluded from:
|
|
159
|
+
|
|
160
|
+
- `listByType(type)` — default query
|
|
161
|
+
- `listByCategory(category, options)` — default query
|
|
162
|
+
- `defaultSimilarityDetector` — default cluster candidates
|
|
163
|
+
- `createVectorSimilarityDetector` — vector cluster candidates
|
|
164
|
+
|
|
165
|
+
Add `{ includeRetired: true }` to any query to restore retired records.
|
|
166
|
+
|
|
167
|
+
`get(id)` **always** returns the full record regardless of status.
|
|
168
|
+
|
|
169
|
+
### Using the retire flow
|
|
170
|
+
|
|
171
|
+
```js
|
|
172
|
+
import { KnowledgeFlowRunner } from './adapters/flow-runner/index.js';
|
|
173
|
+
|
|
174
|
+
const runner = new KnowledgeFlowRunner({ store, workspace });
|
|
175
|
+
|
|
176
|
+
// Retire a compiled decision record that was implemented
|
|
177
|
+
const result = await runner.retire(compiledId, {
|
|
178
|
+
targetStatus: 'implemented',
|
|
179
|
+
rationale: 'REST API shipped in v1.0 (PR #42).',
|
|
180
|
+
implementedByRef: 'https://github.com/org/repo/pull/42',
|
|
181
|
+
decision: 'apply',
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
// Retire an obsolete concept record
|
|
185
|
+
await runner.retire(conceptId, {
|
|
186
|
+
targetStatus: 'retired',
|
|
187
|
+
rationale: 'Superseded by new architecture decision in ADR-007.',
|
|
188
|
+
decision: 'apply',
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
// Reject a retirement proposal (status unchanged)
|
|
192
|
+
await runner.retire(recordId, {
|
|
193
|
+
targetStatus: 'retired',
|
|
194
|
+
rationale: 'Proposing retirement.',
|
|
195
|
+
decision: 'reject',
|
|
196
|
+
rejectReason: 'Still needed for reference.',
|
|
197
|
+
});
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Accessing retired records with provenance
|
|
201
|
+
|
|
202
|
+
```js
|
|
203
|
+
// Always works — returns full record including retirement evidence
|
|
204
|
+
const record = await store.get(retiredId);
|
|
205
|
+
console.log(record.status); // "retired"
|
|
206
|
+
console.log(record.mutation_log); // includes retire entry with rationale
|
|
207
|
+
|
|
208
|
+
// Query all retired records of a type
|
|
209
|
+
const allCompiled = await store.listByType('compiled', { includeRetired: true });
|
|
210
|
+
|
|
211
|
+
// Get history from snapshot provenance
|
|
212
|
+
const snapshot = await store.get(snapshotId);
|
|
213
|
+
for (const srcId of snapshot.provenance.source_ids) {
|
|
214
|
+
const src = await store.get(srcId); // works even if src is retired
|
|
215
|
+
console.log(src.id, src.status);
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
130
221
|
## Non-Goals (this iteration)
|
|
131
222
|
|
|
132
223
|
- Vector/semantic retrieval (parked as I10)
|
|
133
224
|
- Multi-user concurrency
|
|
134
225
|
- Store migrations
|
|
135
226
|
- Personal-KB import (parked as I11)
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Similarity Detectors
|
|
231
|
+
|
|
232
|
+
The `synthesize` and `consolidate` flows accept a pluggable `similarityDetector` option. A
|
|
233
|
+
detector has the signature:
|
|
234
|
+
|
|
235
|
+
```js
|
|
236
|
+
async (concept: Record, candidates: Record[], store: KnowledgeStoreAdapter) => string[]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
It receives the target concept, all compiled candidates, and the store; it returns the IDs of
|
|
240
|
+
candidates that are similar enough to form a cluster. The `KnowledgeFlowRunner` uses the cluster
|
|
241
|
+
as its evidence base — an empty cluster throws `MISSING_EVIDENCE` at the detect-cluster gate.
|
|
242
|
+
|
|
243
|
+
### Choosing a detector
|
|
244
|
+
|
|
245
|
+
| Detector | Best for | Tradeoff |
|
|
246
|
+
|---|---|---|
|
|
247
|
+
| `defaultSimilarityDetector` (built-in) | Fast, zero-config. Works well when records share a structured category taxonomy and inter-record wikilinks. | Relies on category prefixes and link-overlap (Jaccard ≥ 0.10). Misses semantic similarity across category boundaries. |
|
|
248
|
+
| `createVectorSimilarityDetector` | Semantic clustering. Finds similar records regardless of how they were categorised. | Requires an embedding backend (ollama by default). Adds latency proportional to cluster size. |
|
|
249
|
+
|
|
250
|
+
### Vector detector — ollama embedding
|
|
251
|
+
|
|
252
|
+
The vector adapter lives at `adapters/similarity-vector/index.js`. It is zero-dependency and
|
|
253
|
+
calls ollama's `/api/embed` endpoint via the built-in `fetch`.
|
|
254
|
+
|
|
255
|
+
```js
|
|
256
|
+
import { createVectorSimilarityDetector } from './adapters/similarity-vector/index.js';
|
|
257
|
+
|
|
258
|
+
// Default: uses ollama at localhost:11434 with nomic-embed-text
|
|
259
|
+
const detector = createVectorSimilarityDetector();
|
|
260
|
+
|
|
261
|
+
// Or customise host, model, and threshold:
|
|
262
|
+
const detector = createVectorSimilarityDetector({
|
|
263
|
+
host: 'http://localhost:11434',
|
|
264
|
+
model: 'nomic-embed-text',
|
|
265
|
+
threshold: 0.60, // cosine similarity cutoff
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Pass to synthesize:
|
|
269
|
+
await runner.synthesize(conceptId, {
|
|
270
|
+
proposedBody: '...',
|
|
271
|
+
rationale: '...',
|
|
272
|
+
similarityDetector: detector,
|
|
273
|
+
});
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Starting ollama:**
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
ollama serve &
|
|
280
|
+
ollama pull nomic-embed-text # 274 MB, one-time pull
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
**Threshold guidance:**
|
|
284
|
+
|
|
285
|
+
The default threshold of `0.60` is validated against `nomic-embed-text` (768-dim). Empirical
|
|
286
|
+
scores observed in the eval suite:
|
|
287
|
+
|
|
288
|
+
| Pair | Score |
|
|
289
|
+
|---|---|
|
|
290
|
+
| Semantically similar API design texts | ~0.77 |
|
|
291
|
+
| Semantically unrelated (API vs. bread baking) | ~0.41 |
|
|
292
|
+
|
|
293
|
+
A threshold of `0.60` cleanly separates these two classes. If your domain records are more
|
|
294
|
+
homogeneous (narrow vocabulary, very similar boilerplate) you may need to raise the threshold
|
|
295
|
+
to `0.70–0.80` to avoid over-clustering.
|
|
296
|
+
|
|
297
|
+
### Fail-closed rationale
|
|
298
|
+
|
|
299
|
+
The vector detector throws an `Error` with `code="EMBED_FAILURE"` rather than returning `[]`
|
|
300
|
+
when the embedding call fails (network error, HTTP error, malformed response, wrong vector
|
|
301
|
+
count). This is intentional.
|
|
302
|
+
|
|
303
|
+
A detector that silently returns `[]` on infrastructure failure is indistinguishable from one
|
|
304
|
+
that found no similar records. The result is a misleading `MISSING_EVIDENCE` at the detect-cluster
|
|
305
|
+
gate, which looks like "this concept has no sources" rather than "the embedding service is down".
|
|
306
|
+
|
|
307
|
+
Failing closed makes the infrastructure problem visible immediately, at the right level, with a
|
|
308
|
+
clear error code. Operators can catch `EMBED_FAILURE` separately from `MISSING_EVIDENCE` and
|
|
309
|
+
route them to different alerting channels.
|
|
310
|
+
|
|
311
|
+
### Injecting a custom embed function
|
|
312
|
+
|
|
313
|
+
For tests or alternative providers (OpenAI, Cohere, etc.), pass `embed` directly:
|
|
314
|
+
|
|
315
|
+
```js
|
|
316
|
+
const detector = createVectorSimilarityDetector({
|
|
317
|
+
embed: async (texts) => {
|
|
318
|
+
// texts: string[] — one per record (title + "\n" + body by default)
|
|
319
|
+
// must return: number[][] — one vector per input text
|
|
320
|
+
const response = await myEmbeddingAPI.embed(texts);
|
|
321
|
+
return response.vectors;
|
|
322
|
+
},
|
|
323
|
+
threshold: 0.70,
|
|
324
|
+
});
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
The `embed` function is called once per `synthesize`/`consolidate` call with all texts in a
|
|
328
|
+
single batch (concept first, then candidates).
|
|
@@ -524,3 +524,127 @@ Superseded snapshots MUST remain queryable:
|
|
|
524
524
|
|
|
525
525
|
There is no `"archived"` or `"deleted"` status field; the supersession chain is expressed entirely
|
|
526
526
|
through links and mutation log entries.
|
|
527
|
+
|
|
528
|
+
---
|
|
529
|
+
|
|
530
|
+
## Addendum B — Record Status Lifecycle (S7)
|
|
531
|
+
|
|
532
|
+
### B.1 Status Field
|
|
533
|
+
|
|
534
|
+
Every record envelope (§1.1) gains an optional `status` field:
|
|
535
|
+
|
|
536
|
+
| Field | Type | Required | Default | Description |
|
|
537
|
+
|---|---|---|---|---|
|
|
538
|
+
| `status` | `"active"` \| `"implemented"` \| `"retired"` | no | `"active"` | Lifecycle status of the record. Records without a status field are treated as `"active"`. |
|
|
539
|
+
|
|
540
|
+
`status` is a mutable field but MUST only change via the `retire` mutation op (§B.4).
|
|
541
|
+
Direct field updates via the `update` op MUST NOT change `status`; `update` MUST ignore `status` if
|
|
542
|
+
supplied in the fields argument.
|
|
543
|
+
|
|
544
|
+
### B.2 Allowed Status Transitions
|
|
545
|
+
|
|
546
|
+
| From | To | Op | Required Evidence |
|
|
547
|
+
|---|---|---|---|
|
|
548
|
+
| `"active"` | `"implemented"` | `retire` | `implementedByRef` (non-empty, references the implementing artifact/commit/PR) |
|
|
549
|
+
| `"active"` | `"retired"` | `retire` | `rationale` (non-empty, explains obsolescence) |
|
|
550
|
+
| `"implemented"` | `"retired"` | `retire` | `rationale` (non-empty) |
|
|
551
|
+
|
|
552
|
+
No other transitions are permitted. Attempting an invalid transition MUST throw with
|
|
553
|
+
`error.code === "MISSING_EVIDENCE"` and a human-readable `message`.
|
|
554
|
+
|
|
555
|
+
Records in `"retired"` status have no further transitions — they are terminal.
|
|
556
|
+
|
|
557
|
+
### B.3 Working-Set Exclusion
|
|
558
|
+
|
|
559
|
+
Records with `status === "retired"` are EXCLUDED from the default working set:
|
|
560
|
+
|
|
561
|
+
- `listByType(type)` returns only non-retired records by default.
|
|
562
|
+
- `listByCategory(category, options?)` returns only non-retired records by default.
|
|
563
|
+
- `defaultSimilarityDetector` considers only non-retired compiled records as candidates.
|
|
564
|
+
- The vector similarity detector (`createVectorSimilarityDetector`) considers only non-retired
|
|
565
|
+
compiled records as candidates.
|
|
566
|
+
|
|
567
|
+
All four filtering surfaces accept an `includeRetired: true` option (or equivalent flag on the
|
|
568
|
+
similarity detector) to restore retired records to the result set.
|
|
569
|
+
|
|
570
|
+
Retired records remain **fully queryable with provenance**:
|
|
571
|
+
- `get(id)` always returns the full record regardless of status.
|
|
572
|
+
- `listByType(type, { includeRetired: true })` returns all records of that type.
|
|
573
|
+
- `listByCategory(category, { includeRetired: true })` returns all matching records.
|
|
574
|
+
- The record's `mutation_log` carries the full retirement evidence.
|
|
575
|
+
|
|
576
|
+
### B.4 `retire` Mutation Operation
|
|
577
|
+
|
|
578
|
+
The `retire` op transitions a record from `"active"` or `"implemented"` to the target status.
|
|
579
|
+
It NEVER deletes the record. The record body, links, and provenance remain intact.
|
|
580
|
+
|
|
581
|
+
**Required fields:**
|
|
582
|
+
|
|
583
|
+
| Field | Location | Description |
|
|
584
|
+
|---|---|---|
|
|
585
|
+
| `id` | argument | ID of the record to retire. |
|
|
586
|
+
| `targetStatus` | argument | Target status: `"implemented"` or `"retired"`. |
|
|
587
|
+
| `agent` | evidence | Agent performing the retirement. |
|
|
588
|
+
| `rationale` | evidence | Non-empty string explaining why the record is being retired. Required for all target statuses. |
|
|
589
|
+
|
|
590
|
+
**Conditional evidence fields:**
|
|
591
|
+
|
|
592
|
+
| Field | Location | Condition | Description |
|
|
593
|
+
|---|---|---|---|
|
|
594
|
+
| `implementedByRef` | evidence | `targetStatus === "implemented"` | Non-empty reference to the implementing artifact (commit SHA, PR URL, issue number, etc.). |
|
|
595
|
+
| `supersededByRef` | evidence | optional for `targetStatus === "retired"` | Reference to a superseding record or artifact. |
|
|
596
|
+
|
|
597
|
+
**Rejection conditions:**
|
|
598
|
+
- Record with `id` does not exist.
|
|
599
|
+
- `targetStatus` is not `"implemented"` or `"retired"`.
|
|
600
|
+
- Current status transition is invalid (see §B.2).
|
|
601
|
+
- `rationale` is missing or empty.
|
|
602
|
+
- `targetStatus === "implemented"` and `implementedByRef` is missing or empty.
|
|
603
|
+
- Missing `agent` in evidence.
|
|
604
|
+
|
|
605
|
+
**Post-conditions:**
|
|
606
|
+
- Record `status` is updated to `targetStatus`.
|
|
607
|
+
- Record `updated_at` is refreshed.
|
|
608
|
+
- A mutation log entry (op=`"retire"`) is appended, carrying `targetStatus`, `rationale`,
|
|
609
|
+
and any supplied `implementedByRef` / `supersededByRef`.
|
|
610
|
+
- The record body, `links`, and creation `provenance` are NOT changed.
|
|
611
|
+
- `get(id)` returns the full record with the updated status.
|
|
612
|
+
- `listByType(type)` (without `includeRetired`) no longer returns this record if
|
|
613
|
+
`targetStatus === "retired"`.
|
|
614
|
+
|
|
615
|
+
### B.5 Adapter Contract Extension
|
|
616
|
+
|
|
617
|
+
The adapter interface (§8) is extended:
|
|
618
|
+
|
|
619
|
+
```ts
|
|
620
|
+
interface KnowledgeStoreAdapter {
|
|
621
|
+
// ... existing methods ...
|
|
622
|
+
retire(id: string, targetStatus: "implemented" | "retired", evidence: RetireEvidence): Promise<void>;
|
|
623
|
+
listByType(type: RecordType, options?: { includeRetired?: boolean }): Promise<Record[]>;
|
|
624
|
+
listByCategory(category: string, options?: { prefix?: boolean; includeRetired?: boolean }): Promise<Record[]>;
|
|
625
|
+
}
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
`RetireEvidence`:
|
|
629
|
+
```ts
|
|
630
|
+
interface RetireEvidence {
|
|
631
|
+
agent: string;
|
|
632
|
+
rationale: string;
|
|
633
|
+
implementedByRef?: string; // required when targetStatus === "implemented"
|
|
634
|
+
supersededByRef?: string; // optional
|
|
635
|
+
note?: string;
|
|
636
|
+
}
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
### B.6 Provenance and History Guarantee
|
|
640
|
+
|
|
641
|
+
Retired records MUST remain reachable from:
|
|
642
|
+
- `get(id)` — always returns the full record.
|
|
643
|
+
- `listByType(type, { includeRetired: true })` and `listByCategory(category, { includeRetired: true })`.
|
|
644
|
+
- Snapshot `provenance.source_ids` — any snapshot that included the record in its cluster before
|
|
645
|
+
retirement retains the reference intact. The retired record can be retrieved via `get(sourceId)`.
|
|
646
|
+
- The retirement `mutation_log` entry carries the full evidence of why and when the record was
|
|
647
|
+
retired and by whom.
|
|
648
|
+
|
|
649
|
+
There is no deletion of records. Physical purge (if ever needed) is a separate, future policy
|
|
650
|
+
hook not defined in this version.
|
|
@@ -411,12 +411,17 @@ describe("Knowledge Kit Store Contract Suite", () => {
|
|
|
411
411
|
);
|
|
412
412
|
});
|
|
413
413
|
|
|
414
|
-
test("
|
|
415
|
-
|
|
414
|
+
test("propose accepts any record type as target (Addendum B: retire flow needs non-concept targets)", async () => {
|
|
415
|
+
// Addendum B (S7) extends propose to accept any record type as the target,
|
|
416
|
+
// enabling the retire flow to attach proposals to compiled/raw/snapshot records.
|
|
417
|
+
const rawTarget = await store.create({ type: "raw", title: "NC", body: "nc", category: "test", provenance: { agent: "tester" } });
|
|
416
418
|
const pid = await store.create({ type: "raw", title: "P3", body: "p", category: "test", provenance: { agent: "tester" } });
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
419
|
+
// Should NOT throw — all record types are valid proposal targets
|
|
420
|
+
await store.propose(rawTarget, pid, { agent: "tester", proposal: "retirement proposal" });
|
|
421
|
+
const { forward } = await store.getLinks(pid);
|
|
422
|
+
assert.ok(
|
|
423
|
+
forward.some((l) => l.target_id === rawTarget && l.kind === "proposes"),
|
|
424
|
+
"propose on raw record creates proposes link (Addendum B extension)"
|
|
420
425
|
);
|
|
421
426
|
});
|
|
422
427
|
|