@agfpd/iapeer-memory-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Embedding provider adapters (ADR-013) on top of the shared HTTP client.
3
+ *
4
+ * Providers (`EmbeddingProvider`):
5
+ * - `openai` — OpenAI-compatible `/v1/embeddings`:
6
+ * request { input: [...texts], model }
7
+ * response { data: [{ embedding: [number...] }, ...] }
8
+ * This is the DEFAULT and the behaviour inherited verbatim from the
9
+ * reference client (which spoke this wire format against TEI's
10
+ * OpenAI-compatible endpoint). It covers OpenAI, NVIDIA NIM
11
+ * (openai-compat), and the key local case — Ollama / LM Studio /
12
+ * llama.cpp server on localhost.
13
+ * - `tei` — native TEI `/embed`:
14
+ * request { inputs: [...texts] }
15
+ * response [ [number...], ... ]
16
+ *
17
+ * Graceful degradation: `{vectors: null, status}` when the endpoint is
18
+ * unavailable or returns malformed data — search falls back to BM25-only.
19
+ * Timeout & circuit breaker live in the shared client (`http-client.ts`);
20
+ * this module keeps its OWN breaker instance so embedding failures never
21
+ * block the reranker and vice versa.
22
+ */
23
+
24
+ import { makeCircuitBreaker, postJson } from "./http-client.js";
25
+ import type { ProviderCallStatus } from "./http-client.js";
26
+
27
+ export type EmbeddingProvider = "tei" | "openai";
28
+
29
+ export const EMBEDDING_PROVIDERS: readonly EmbeddingProvider[] = ["tei", "openai"];
30
+
31
+ export function isEmbeddingProvider(value: string): value is EmbeddingProvider {
32
+ return (EMBEDDING_PROVIDERS as readonly string[]).includes(value);
33
+ }
34
+
35
+ export type EmbeddingConfig = {
36
+ endpoint: string;
37
+ model: string;
38
+ dimensions: number;
39
+ batchSize: number;
40
+ // Bearer token when the endpoint sits behind auth (cloud providers,
41
+ // reverse-proxied local servers). Null for direct local access.
42
+ apiKey: string | null;
43
+ /** Wire format (ADR-013). Default "openai" — the inherited behaviour. */
44
+ provider?: EmbeddingProvider;
45
+ };
46
+
47
+ /**
48
+ * Status of a single embedding call, surfaced up to the vault_search
49
+ * response (`pipeline.embedding`).
50
+ *
51
+ * - `ok` — valid response;
52
+ * - `timeout` — fetch aborted by the default timeout;
53
+ * - `error` — non-2xx, network failure or malformed payload;
54
+ * - `circuit-open` — skipped without a network call (breaker in cooldown).
55
+ */
56
+ export type EmbeddingStatus = ProviderCallStatus;
57
+
58
+ export type EmbedBatchResult = {
59
+ vectors: Float32Array[] | null;
60
+ status: EmbeddingStatus;
61
+ };
62
+
63
+ export type EmbedQueryResult = {
64
+ vector: Float32Array | null;
65
+ status: EmbeddingStatus;
66
+ };
67
+
68
+ // Per-process breaker: resets on process restart, which is what we want —
69
+ // every new session gives the endpoint a fresh chance.
70
+ const breaker = makeCircuitBreaker();
71
+
72
+ /**
73
+ * Test-only helper to reset the circuit breaker between test cases.
74
+ */
75
+ export function _resetEmbeddingCircuitForTests(): void {
76
+ breaker._resetForTests();
77
+ }
78
+
79
+ function buildRequestBody(batch: string[], config: EmbeddingConfig): unknown {
80
+ const provider = config.provider ?? "openai";
81
+ switch (provider) {
82
+ case "openai":
83
+ return { input: batch, model: config.model };
84
+ case "tei":
85
+ return { inputs: batch };
86
+ }
87
+ }
88
+
89
+ /** Parse one response into vectors, or null when the shape is wrong. */
90
+ function parseResponse(json: unknown, config: EmbeddingConfig): Float32Array[] | null {
91
+ const provider = config.provider ?? "openai";
92
+ try {
93
+ if (provider === "openai") {
94
+ const data = (json as { data: Array<{ embedding: number[] }> }).data;
95
+ return data.map((item) => new Float32Array(item.embedding));
96
+ }
97
+ // tei: a bare array of float arrays
98
+ const rows = json as number[][];
99
+ if (!Array.isArray(rows)) return null;
100
+ return rows.map((row) => new Float32Array(row));
101
+ } catch {
102
+ return null;
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Embed a batch of texts. Returns `{vectors, status}` where `vectors === null`
108
+ * on any non-ok status. Callers check `status` when they need to tell a
109
+ * timeout from an error.
110
+ */
111
+ export async function embedTexts(
112
+ texts: string[],
113
+ config: EmbeddingConfig,
114
+ signal?: AbortSignal,
115
+ ): Promise<EmbedBatchResult> {
116
+ if (!texts.length) return { vectors: [], status: "ok" };
117
+
118
+ const allVectors: Float32Array[] = [];
119
+
120
+ for (let i = 0; i < texts.length; i += config.batchSize) {
121
+ const batch = texts.slice(i, i + config.batchSize);
122
+
123
+ const result = await postJson({
124
+ endpoint: config.endpoint,
125
+ body: buildRequestBody(batch, config),
126
+ apiKey: config.apiKey,
127
+ signal,
128
+ breaker,
129
+ });
130
+ if (result.status !== "ok") {
131
+ return { vectors: null, status: result.status };
132
+ }
133
+
134
+ const vectors = parseResponse(result.json, config);
135
+ if (vectors === null) {
136
+ breaker.recordFailure();
137
+ return { vectors: null, status: "error" };
138
+ }
139
+ allVectors.push(...vectors);
140
+ }
141
+
142
+ return { vectors: allVectors, status: "ok" };
143
+ }
144
+
145
+ /**
146
+ * Embed a single text. Returns `{vector, status}` where `vector === null`
147
+ * on any non-ok status.
148
+ */
149
+ export async function embedQuery(
150
+ text: string,
151
+ config: EmbeddingConfig,
152
+ ): Promise<EmbedQueryResult> {
153
+ const result = await embedTexts([text], config);
154
+ return {
155
+ vector: result.vectors?.[0] ?? null,
156
+ status: result.status,
157
+ };
158
+ }
159
+
160
+ /**
161
+ * Cosine similarity of two vectors.
162
+ */
163
+ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
164
+ let dot = 0;
165
+ let normA = 0;
166
+ let normB = 0;
167
+ for (let i = 0; i < a.length; i++) {
168
+ dot += a[i] * b[i];
169
+ normA += a[i] * a[i];
170
+ normB += b[i] * b[i];
171
+ }
172
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
173
+ return denom === 0 ? 0 : dot / denom;
174
+ }
@@ -0,0 +1,352 @@
1
+ /**
2
+ * fm-update — YAML-aware structural frontmatter edits for batch paths.
3
+ *
4
+ * TS port of the reference `scripts/mergemind-fm-update.py` (behavioural
5
+ * parity against `tests/python/test_fm_update.py`, 38 fixtures).
6
+ *
7
+ * Structural tool for safe frontmatter changes: `set` scalar, `unset` key,
8
+ * `list-add` / `list-remove` item. The round-trip parser preserves key
9
+ * order and value form (scalar vs block-list). By construction an orphan
10
+ * list item is impossible: unsetting a key removes the key AND its items
11
+ * atomically; scalar→list promotion normalises the value.
12
+ *
13
+ * Why: batch edits of multi-line YAML with sed/regex broke structure — sed
14
+ * edited one line without knowing ` - item` lines hang under it. The
15
+ * 2026-05-20 incident destroyed frontmatter on ~180 notes. `sed` over
16
+ * frontmatter is BANNED; every bash-path edit goes through this module.
17
+ *
18
+ * After structural changes the same `frontmatter-fill` logic the post-write
19
+ * hook uses stamps attribution (`last_edited_by` / `updated` /
20
+ * `needs_review` via curator-set, ADR-006) — the bash path stamps
21
+ * equivalently to the harness edit path BY CONSTRUCTION.
22
+ *
23
+ * CLI contract (the package facade binary wires argv to `fmUpdate`):
24
+ *
25
+ * iapeer-memory fm-update [--agent NAME] [--vault PATH] [--no-stamp]
26
+ * [--set KEY VALUE | --unset KEY | --list-add KEY VALUE
27
+ * | --list-remove KEY VALUE]...
28
+ * FILE [FILE ...]
29
+ *
30
+ * - `--agent` defaults to `resolveAgentName()` (PEER_PERSONALITY first,
31
+ * never cwd guessing — нюанс 10);
32
+ * - with no operations it is a pure attribution stamp (the reference
33
+ * `stamp.sh` equivalent);
34
+ * - operations apply to every file, in category order set → unset →
35
+ * list-add → list-remove (predictable regardless of flag order);
36
+ * - non-.md / missing files are skipped silently.
37
+ */
38
+
39
+ import fs from "node:fs";
40
+ import type { TaxonomyPreset } from "./taxonomy.js";
41
+ import {
42
+ assemble,
43
+ atomicWrite,
44
+ processFile,
45
+ splitFrontmatter,
46
+ stripBrokenDelims,
47
+ yamlDoubleQuote,
48
+ yamlNeedsQuoting,
49
+ } from "./frontmatter-fill.js";
50
+
51
+ const KEY_RE = /^([A-Za-z_][\w-]*)\s*:\s*(.*?)\s*$/;
52
+ const ITEM_RE = /^\s+-\s+(.*?)\s*$/;
53
+
54
+ export class Scalar {
55
+ constructor(public value: string) {}
56
+ }
57
+
58
+ export class FmList {
59
+ constructor(public items: string[] = []) {}
60
+ }
61
+
62
+ export type Entry = Scalar | FmList;
63
+
64
+ function stripPairedQuotes(v: string): string {
65
+ if (
66
+ v.length >= 2 &&
67
+ (v.startsWith('"') || v.startsWith("'")) &&
68
+ v.endsWith(v[0])
69
+ ) {
70
+ return v.slice(1, -1);
71
+ }
72
+ return v;
73
+ }
74
+
75
+ /**
76
+ * Serialise a scalar as valid YAML through the SHARED normaliser from
77
+ * frontmatter-fill (one source of quoting rules). The round-trip parser
78
+ * strips quotes/delimiters on read; here they come back VALIDLY: a value
79
+ * with `: ` / an indicator / a dangling quote / guillemets-with-colon →
80
+ * double-quoted, a safe plain scalar → raw.
81
+ */
82
+ export function yamlSafeScalar(value: string): string {
83
+ if (yamlNeedsQuoting(value)) {
84
+ return yamlDoubleQuote(stripBrokenDelims(value));
85
+ }
86
+ return value;
87
+ }
88
+
89
+ /**
90
+ * Round-trip AST for the frontmatter YAML subset: top-level scalars,
91
+ * block-style lists (`key:` + ` - item`), inline lists (`key: [a, b]`),
92
+ * inline null (`key: null` / `key: ~`). Anything else is dropped — that IS
93
+ * the structural sanitisation of incident states (a lone `- value` without
94
+ * an open key above is silently discarded, never re-attached).
95
+ *
96
+ * Invariant: list items live INSIDE FmList — an orphan is impossible.
97
+ * Empty scalars and empty lists are dropped after parsing, so the AST
98
+ * state equals the serialised state.
99
+ */
100
+ export class Frontmatter {
101
+ private entries = new Map<string, Entry>();
102
+ private order: string[] = [];
103
+
104
+ static fromText(text: string): Frontmatter {
105
+ const fm = new Frontmatter();
106
+ let currentListKey: string | null = null;
107
+ for (const rawLine of text.split("\n")) {
108
+ const line = rawLine.replace(/\r$/, "");
109
+ if (!line.trim()) {
110
+ currentListKey = null;
111
+ continue;
112
+ }
113
+
114
+ const mKey = KEY_RE.exec(line);
115
+ if (mKey) {
116
+ const key = mKey[1];
117
+ let value = mKey[2].trim();
118
+ if (value.startsWith("[") && value.endsWith("]")) {
119
+ const inner = value.slice(1, -1).trim();
120
+ const items = inner
121
+ ? inner.split(",").map((s) => s.trim()).filter(Boolean)
122
+ : [];
123
+ fm.setEntry(key, new FmList(items));
124
+ currentListKey = null;
125
+ } else if (value === "" || value === "null" || value === "~") {
126
+ fm.setEntry(key, new Scalar(""));
127
+ currentListKey = value === "" ? key : null;
128
+ } else {
129
+ value = stripPairedQuotes(value);
130
+ fm.setEntry(key, new Scalar(value));
131
+ currentListKey = null;
132
+ }
133
+ continue;
134
+ }
135
+
136
+ const mItem = ITEM_RE.exec(line);
137
+ if (mItem && currentListKey !== null) {
138
+ const item = stripPairedQuotes(mItem[1].trim());
139
+ const existing = fm.entries.get(currentListKey);
140
+ if (existing instanceof Scalar) {
141
+ fm.entries.set(currentListKey, new FmList([item]));
142
+ } else if (existing instanceof FmList) {
143
+ existing.items.push(item);
144
+ }
145
+ continue;
146
+ }
147
+
148
+ // Orphan line — drop (root protection against broken YAML).
149
+ currentListKey = null;
150
+ }
151
+ fm.dropEmpties();
152
+ return fm;
153
+ }
154
+
155
+ private dropEmpties(): void {
156
+ for (const key of [...this.order]) {
157
+ const entry = this.entries.get(key)!;
158
+ if (entry instanceof Scalar && entry.value === "") this.remove(key);
159
+ else if (entry instanceof FmList && entry.items.length === 0) this.remove(key);
160
+ }
161
+ }
162
+
163
+ private setEntry(key: string, entry: Entry): void {
164
+ if (!this.entries.has(key)) this.order.push(key);
165
+ this.entries.set(key, entry);
166
+ }
167
+
168
+ has(key: string): boolean {
169
+ return this.entries.has(key);
170
+ }
171
+
172
+ get(key: string): Entry | undefined {
173
+ return this.entries.get(key);
174
+ }
175
+
176
+ setScalar(key: string, value: string): void {
177
+ this.setEntry(key, new Scalar(value));
178
+ }
179
+
180
+ remove(key: string): boolean {
181
+ if (!this.entries.has(key)) return false;
182
+ this.entries.delete(key);
183
+ this.order.splice(this.order.indexOf(key), 1);
184
+ return true;
185
+ }
186
+
187
+ listAppend(key: string, value: string): void {
188
+ const existing = this.entries.get(key);
189
+ if (existing === undefined) {
190
+ this.setEntry(key, new FmList([value]));
191
+ return;
192
+ }
193
+ if (existing instanceof Scalar) {
194
+ const promoted = new FmList(existing.value ? [existing.value] : []);
195
+ if (!promoted.items.includes(value)) promoted.items.push(value);
196
+ this.entries.set(key, promoted);
197
+ return;
198
+ }
199
+ if (!existing.items.includes(value)) existing.items.push(value);
200
+ }
201
+
202
+ listRemove(key: string, value: string): void {
203
+ const existing = this.entries.get(key);
204
+ if (existing === undefined) return;
205
+ if (existing instanceof Scalar) {
206
+ if (existing.value === value) this.remove(key);
207
+ return;
208
+ }
209
+ const i = existing.items.indexOf(value);
210
+ if (i !== -1) existing.items.splice(i, 1);
211
+ if (existing.items.length === 0) this.remove(key);
212
+ }
213
+
214
+ toText(): string {
215
+ const out: string[] = [];
216
+ for (const key of this.order) {
217
+ const entry = this.entries.get(key)!;
218
+ if (entry instanceof Scalar) {
219
+ if (entry.value === "") continue;
220
+ out.push(`${key}: ${yamlSafeScalar(entry.value)}\n`);
221
+ } else {
222
+ if (!entry.items.length) continue;
223
+ out.push(`${key}:\n`);
224
+ for (const item of entry.items) out.push(` - ${item}\n`);
225
+ }
226
+ }
227
+ return out.join("");
228
+ }
229
+ }
230
+
231
+ export type OpKind = "set" | "unset" | "list-add" | "list-remove";
232
+
233
+ export type Op = {
234
+ kind: OpKind;
235
+ key: string;
236
+ value?: string;
237
+ };
238
+
239
+ export function applyOps(fm: Frontmatter, ops: Op[]): void {
240
+ for (const op of ops) {
241
+ switch (op.kind) {
242
+ case "set":
243
+ fm.setScalar(op.key, op.value!);
244
+ break;
245
+ case "unset":
246
+ fm.remove(op.key);
247
+ break;
248
+ case "list-add":
249
+ fm.listAppend(op.key, op.value!);
250
+ break;
251
+ case "list-remove":
252
+ fm.listRemove(op.key, op.value!);
253
+ break;
254
+ default:
255
+ throw new Error(`Unknown operation: ${String(op.kind)}`);
256
+ }
257
+ }
258
+ }
259
+
260
+ /**
261
+ * Apply structural operations to a file's frontmatter. Returns true when
262
+ * the file changed. Creates the frontmatter block when absent AND an
263
+ * operation produced content; unset-only on a missing block is a no-op.
264
+ */
265
+ export function updateFile(filePath: string, ops: Op[]): boolean {
266
+ let stat: fs.Stats;
267
+ try {
268
+ stat = fs.statSync(filePath);
269
+ } catch {
270
+ return false;
271
+ }
272
+ if (!stat.isFile()) return false;
273
+ const content = fs.readFileSync(filePath, "utf-8");
274
+ const [fmBlock, rest] = splitFrontmatter(content);
275
+ const hadFrontmatter = Boolean(fmBlock);
276
+ const fm = Frontmatter.fromText(fmBlock);
277
+ applyOps(fm, ops);
278
+ const newFm = fm.toText();
279
+ if (!hadFrontmatter && !newFm) return false;
280
+ const newContent = assemble(newFm, rest);
281
+ if (newContent === content) return false;
282
+ atomicWrite(filePath, newContent);
283
+ return true;
284
+ }
285
+
286
+ /**
287
+ * Collect operations in category order: set → unset → list-add →
288
+ * list-remove (inside each category — argument order). The categorical
289
+ * order keeps behaviour predictable regardless of flag order.
290
+ */
291
+ export function collectOps(opts: {
292
+ set?: Array<[string, string]>;
293
+ unset?: string[];
294
+ listAdd?: Array<[string, string]>;
295
+ listRemove?: Array<[string, string]>;
296
+ }): Op[] {
297
+ const ops: Op[] = [];
298
+ for (const [key, value] of opts.set ?? []) ops.push({ kind: "set", key, value });
299
+ for (const key of opts.unset ?? []) ops.push({ kind: "unset", key });
300
+ for (const [key, value] of opts.listAdd ?? []) ops.push({ kind: "list-add", key, value });
301
+ for (const [key, value] of opts.listRemove ?? []) ops.push({ kind: "list-remove", key, value });
302
+ return ops;
303
+ }
304
+
305
+ export type FmUpdateOptions = {
306
+ files: string[];
307
+ ops?: Op[];
308
+ /** Writer identity; resolve via resolveAgentName at the CLI level. */
309
+ agent?: string | null;
310
+ vault?: string;
311
+ taxonomy: TaxonomyPreset;
312
+ curatorSet?: readonly string[];
313
+ /** false = structural ops only, no attribution stamp. */
314
+ stamp?: boolean;
315
+ /** Injectable for tests. */
316
+ now?: Date;
317
+ };
318
+
319
+ /**
320
+ * The fm-update entry: structural ops + attribution stamp through the SAME
321
+ * fill logic the post-write hook uses (zone resolved from the path; outside
322
+ * the whitelist the stamp is a no-op). With no ops — pure stamp.
323
+ */
324
+ export function fmUpdate(opts: FmUpdateOptions): void {
325
+ const ops = opts.ops ?? [];
326
+ const agent = (opts.agent ?? "").trim();
327
+ const vault = (opts.vault ?? "").trim();
328
+ const stamp = opts.stamp ?? true;
329
+
330
+ for (const filePath of opts.files) {
331
+ if (!filePath.endsWith(".md")) continue;
332
+ let isFile = false;
333
+ try {
334
+ isFile = fs.statSync(filePath).isFile();
335
+ } catch {
336
+ isFile = false;
337
+ }
338
+ if (!isFile) continue;
339
+
340
+ if (ops.length) updateFile(filePath, ops);
341
+ if (!stamp) continue;
342
+ if (!agent) continue;
343
+ processFile(filePath, {
344
+ zone: "auto",
345
+ agent,
346
+ vault,
347
+ taxonomy: opts.taxonomy,
348
+ curatorSet: opts.curatorSet,
349
+ now: opts.now,
350
+ });
351
+ }
352
+ }