mark-improving-agent 2.3.2 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.3.2
1
+ 2.3.4
@@ -0,0 +1,405 @@
1
+ /**
2
+ * Binary Vector Memory Compression
3
+ *
4
+ * High-performance binary vector quantization for memory embeddings.
5
+ * Based on QuIVer: Rethinking ANN Graph Topology via Training-Free Binary Quantization (Xiao et al., 2026)
6
+ *
7
+ * Achieves 32x compression: 1536-dim float32 (6144 bytes) → 192 bytes binary
8
+ * Uses Hamming distance (XOR + POPCOUNT) for fast similarity search.
9
+ *
10
+ * @module core/memory
11
+ * @fileoverview Binary vector quantization for memory compression
12
+ */
13
+ import { createLogger } from '../../utils/logger.js';
14
+ const logger = createLogger('[BinaryVector]');
15
+ /**
16
+ * Default configuration for 1536-dim embeddings (OpenAI text-embedding-3-small)
17
+ */
18
+ export const DEFAULT_BINARY_CONFIG = {
19
+ dimension: 1536,
20
+ normalized: true,
21
+ trackStats: true,
22
+ };
23
+ /**
24
+ * Create a binary vector index
25
+ *
26
+ * @param config - Configuration for the index
27
+ * @returns Empty binary index ready for vectors
28
+ */
29
+ export function createBinaryIndex(config) {
30
+ logger.info('Creating binary vector index', { dimension: config.dimension });
31
+ return {
32
+ vectors: new Uint8Array(0),
33
+ indices: [],
34
+ metadata: [],
35
+ config,
36
+ };
37
+ }
38
+ /**
39
+ * Quantize a float32 vector to binary using sign-bit quantization
40
+ *
41
+ * @param vector - Float32 array (dimension length)
42
+ * @param normalized - Whether vector is L2 normalized (skip magnitude check if true)
43
+ * @returns Uint8Array of packed binary bits
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const vector = new Float32Array([0.5, -0.3, 0.1, -0.8]);
48
+ * const binary = quantizeToBinary(vector);
49
+ * // Result: Uint8Array [0b10001001] (4 dimensions = 4 bits = 1 byte)
50
+ * ```
51
+ */
52
+ export function quantizeToBinary(vector, normalized = true) {
53
+ const dim = vector.length;
54
+ const bytes = Math.ceil(dim / 8);
55
+ const result = new Uint8Array(bytes);
56
+ for (let i = 0; i < dim; i++) {
57
+ // Sign bit: positive = 1, negative = 0
58
+ const bitIndex = i % 8;
59
+ const byteIndex = Math.floor(i / 8);
60
+ if (vector[i] >= 0) {
61
+ result[byteIndex] |= (1 << (7 - bitIndex));
62
+ }
63
+ }
64
+ return result;
65
+ }
66
+ /**
67
+ * Dequantize binary vector back to approximate float32
68
+ *
69
+ * Note: This is lossy - returns center-of-mass values (±0.5 for normalized vectors)
70
+ *
71
+ * @param binary - Packed binary bits
72
+ * @param dimension - Original dimension
73
+ * @param normalized - Whether to return normalized values
74
+ * @returns Approximate float32 vector
75
+ */
76
+ export function dequantizeFromBinary(binary, dimension, normalized = true) {
77
+ const result = new Float32Array(dimension);
78
+ for (let i = 0; i < dimension; i++) {
79
+ const bitIndex = i % 8;
80
+ const byteIndex = Math.floor(i / 8);
81
+ const bit = (binary[byteIndex] >> (7 - bitIndex)) & 1;
82
+ // For normalized vectors: 1 → +0.5, 0 → -0.5
83
+ // This gives approximate reconstruction
84
+ result[i] = normalized ? (bit ? 0.5 : -0.5) : (bit ? 0 : 0);
85
+ }
86
+ return result;
87
+ }
88
+ /**
89
+ * Calculate Hamming distance between two binary vectors
90
+ *
91
+ * @param a - First binary vector
92
+ * @param b - Second binary vector
93
+ * @returns Hamming distance (count of differing bits)
94
+ *
95
+ * @example
96
+ * ```typescript
97
+ * const dist = hammingDistance(binaryA, binaryB);
98
+ * // dist = 0 means identical, dist = dimension means opposite
99
+ * ```
100
+ */
101
+ export function hammingDistance(a, b) {
102
+ if (a.length !== b.length) {
103
+ throw new Error(`Binary vectors must have same length: ${a.length} vs ${b.length}`);
104
+ }
105
+ let distance = 0;
106
+ for (let i = 0; i < a.length; i++) {
107
+ // XOR gives 1s where bits differ, POPCOUNT counts them
108
+ const xored = a[i] ^ b[i];
109
+ // POPCOUNT via built-in
110
+ let count = xored;
111
+ count = (count & 0x55) + ((count >> 1) & 0x55);
112
+ count = (count & 0x33) + ((count >> 2) & 0x33);
113
+ count = (count & 0x0F) + ((count >> 4) & 0x0F);
114
+ distance += count;
115
+ }
116
+ return distance;
117
+ }
118
+ /**
119
+ * Calculate similarity from Hamming distance
120
+ *
121
+ * @param hammingDist - Hamming distance
122
+ * @param dimension - Vector dimension
123
+ * @returns Similarity score 0-1 (1 = identical)
124
+ */
125
+ export function hammingToSimilarity(hammingDist, dimension) {
126
+ return 1 - (hammingDist / dimension);
127
+ }
128
+ /**
129
+ * Add vectors to the binary index
130
+ *
131
+ * @param index - Binary index to add to
132
+ * @param vectors - Float32Array vectors (each dimension-long)
133
+ * @param ids - Unique identifiers for each vector
134
+ * @param metadata - Optional metadata (importance, timestamp)
135
+ * @returns Updated index
136
+ */
137
+ export function addToBinaryIndex(index, vectors, ids, metadata) {
138
+ const dim = index.config.dimension;
139
+ if (vectors.length !== ids.length) {
140
+ throw new Error(`Vectors count (${vectors.length}) must match IDs count (${ids.length})`);
141
+ }
142
+ // Calculate total bytes needed
143
+ const bytesPerVector = Math.ceil(dim / 8);
144
+ const newTotalBytes = index.vectors.length + (vectors.length * bytesPerVector);
145
+ // Create new expanded vectors array
146
+ const newVectors = new Uint8Array(newTotalBytes);
147
+ if (index.vectors.length > 0) {
148
+ newVectors.set(index.vectors);
149
+ }
150
+ // Quantize and add each vector
151
+ let offset = index.vectors.length;
152
+ for (let i = 0; i < vectors.length; i++) {
153
+ if (vectors[i].length !== dim) {
154
+ throw new Error(`Vector dimension ${vectors[i].length} doesn't match index dimension ${dim}`);
155
+ }
156
+ const binary = quantizeToBinary(vectors[i], index.config.normalized);
157
+ newVectors.set(binary, offset);
158
+ offset += bytesPerVector;
159
+ }
160
+ return {
161
+ vectors: newVectors,
162
+ indices: [...index.indices, ...ids.map((_, i) => index.indices.length + i)],
163
+ metadata: [...index.metadata, ...metadata || ids.map(() => ({ id: '', importance: 0.5, timestamp: Date.now() }))],
164
+ config: index.config,
165
+ };
166
+ }
167
+ /**
168
+ * Search for similar vectors in the binary index using Hamming distance
169
+ *
170
+ * @param index - Binary index to search
171
+ * @param queryVector - Query vector to search for
172
+ * @param k - Number of results to return
173
+ * @returns Top-k results sorted by similarity (highest first)
174
+ *
175
+ * @example
176
+ * ```typescript
177
+ * const results = searchBinaryIndex(index, queryEmbedding, 10);
178
+ * // Returns top 10 most similar memories
179
+ * ```
180
+ */
181
+ export function searchBinaryIndex(index, queryVector, k) {
182
+ const dim = index.config.dimension;
183
+ const bytesPerVector = Math.ceil(dim / 8);
184
+ if (queryVector.length !== dim) {
185
+ throw new Error(`Query dimension ${queryVector.length} doesn't match index dimension ${dim}`);
186
+ }
187
+ // Quantize query
188
+ const queryBinary = quantizeToBinary(queryVector, index.config.normalized);
189
+ // Search all vectors
190
+ const results = [];
191
+ let totalHamming = 0;
192
+ for (let i = 0; i < index.indices.length; i++) {
193
+ const offset = i * bytesPerVector;
194
+ const vectorBinary = index.vectors.subarray(offset, offset + bytesPerVector);
195
+ const hDist = hammingDistance(queryBinary, vectorBinary);
196
+ totalHamming += hDist;
197
+ results.push({
198
+ index: i,
199
+ id: index.metadata[i]?.id || String(index.indices[i]),
200
+ hammingDistance: hDist,
201
+ similarity: hammingToSimilarity(hDist, dim),
202
+ metadata: index.metadata[i] || { importance: 0.5, timestamp: Date.now() },
203
+ });
204
+ }
205
+ // Sort by similarity (highest first)
206
+ results.sort((a, b) => b.similarity - a.similarity);
207
+ // Return top-k
208
+ const topK = results.slice(0, k);
209
+ if (index.config.trackStats) {
210
+ logger.debug(`Search completed: ${index.indices.length} vectors, avg Hamming: ${(totalHamming / index.indices.length).toFixed(2)}`);
211
+ }
212
+ return topK;
213
+ }
214
+ /**
215
+ * Get compression statistics
216
+ *
217
+ * @param index - Binary index
218
+ * @returns Compression statistics
219
+ */
220
+ export function getCompressionStats(index) {
221
+ const dim = index.config.dimension;
222
+ const float32Bytes = dim * 4; // 4 bytes per float32
223
+ const binaryBytes = Math.ceil(dim / 8);
224
+ const vectorsCount = index.indices.length;
225
+ const originalBytes = vectorsCount * float32Bytes;
226
+ const compressedBytes = vectorsCount * binaryBytes;
227
+ return {
228
+ originalBytes,
229
+ compressedBytes,
230
+ compressionRatio: originalBytes / compressedBytes,
231
+ vectorsStored: vectorsCount,
232
+ dimension: dim,
233
+ avgHammingDistance: 0, // Calculated on demand
234
+ };
235
+ }
236
+ /**
237
+ * Save binary index to binary format
238
+ *
239
+ * @param index - Binary index to save
240
+ * @returns Buffer containing serialized index
241
+ */
242
+ export function serializeBinaryIndex(index) {
243
+ const configBytes = JSON.stringify(index.config).length;
244
+ const metadataBytes = JSON.stringify(index.metadata).length;
245
+ const indicesBytes = index.indices.length * 4; // 4 bytes per number
246
+ const totalBytes = 4 + configBytes + 4 + metadataBytes + 4 + indicesBytes + index.vectors.length + 4 + index.indices.length * 8;
247
+ const buffer = new ArrayBuffer(totalBytes);
248
+ const view = new DataView(buffer);
249
+ const uint8 = new Uint8Array(buffer);
250
+ let offset = 0;
251
+ // Config
252
+ const configStr = JSON.stringify(index.config);
253
+ view.setUint32(offset, configStr.length);
254
+ offset += 4;
255
+ uint8.set(new TextEncoder().encode(configStr), offset);
256
+ offset += configStr.length;
257
+ // Metadata
258
+ const metadataStr = JSON.stringify(index.metadata);
259
+ view.setUint32(offset, metadataStr.length);
260
+ offset += 4;
261
+ uint8.set(new TextEncoder().encode(metadataStr), offset);
262
+ offset += metadataStr.length;
263
+ // Indices
264
+ view.setUint32(offset, index.indices.length);
265
+ offset += 4;
266
+ for (const idx of index.indices) {
267
+ view.setUint32(offset, idx);
268
+ offset += 4;
269
+ }
270
+ // Vectors
271
+ uint8.set(index.vectors, offset);
272
+ offset += index.vectors.length;
273
+ // Stored IDs count for compatibility
274
+ view.setUint32(offset, index.metadata.length);
275
+ return buffer;
276
+ }
277
+ /**
278
+ * Load binary index from buffer
279
+ *
280
+ * @param buffer - Serialized index buffer
281
+ * @returns Reconstructed BinaryIndex
282
+ */
283
+ export function deserializeBinaryIndex(buffer) {
284
+ const view = new DataView(buffer);
285
+ const uint8 = new Uint8Array(buffer);
286
+ let offset = 0;
287
+ // Config
288
+ const configLen = view.getUint32(offset);
289
+ offset += 4;
290
+ const configStr = new TextDecoder().decode(uint8.subarray(offset, offset + configLen));
291
+ offset += configLen;
292
+ const config = JSON.parse(configStr);
293
+ // Metadata
294
+ const metadataLen = view.getUint32(offset);
295
+ offset += 4;
296
+ const metadataStr = new TextDecoder().decode(uint8.subarray(offset, offset + metadataLen));
297
+ offset += metadataLen;
298
+ const metadata = JSON.parse(metadataStr);
299
+ // Indices
300
+ const indicesLen = view.getUint32(offset);
301
+ offset += 4;
302
+ const indices = [];
303
+ for (let i = 0; i < indicesLen; i++) {
304
+ indices.push(view.getUint32(offset));
305
+ offset += 4;
306
+ }
307
+ // Vectors
308
+ const vectorsLen = buffer.byteLength - offset - 4;
309
+ const vectors = uint8.subarray(offset, offset + vectorsLen);
310
+ return {
311
+ vectors,
312
+ indices,
313
+ metadata,
314
+ config,
315
+ };
316
+ }
317
+ /**
318
+ * Create a binary vector compressor for memory embeddings
319
+ * Integrates with existing HeartFlow memory system
320
+ *
321
+ * @param config - Configuration for the compressor
322
+ * @returns Memory compression interface
323
+ */
324
+ export function createBinaryVectorCompressor(config = {}) {
325
+ const fullConfig = { ...DEFAULT_BINARY_CONFIG, ...config };
326
+ let index = createBinaryIndex(fullConfig);
327
+ logger.info('Binary vector compressor initialized', { ...fullConfig });
328
+ return {
329
+ /**
330
+ * Compress and store an embedding
331
+ */
332
+ store(id, embedding, importance = 0.5) {
333
+ index = addToBinaryIndex(index, [embedding], [id], [{ id, importance, timestamp: Date.now() }]);
334
+ logger.debug(`Stored embedding ${id}, total: ${index.indices.length}`);
335
+ },
336
+ /**
337
+ * Compress and store multiple embeddings
338
+ */
339
+ storeBatch(ids, embeddings, importances) {
340
+ const metadata = ids.map((id, i) => ({
341
+ id,
342
+ importance: importances?.[i] ?? 0.5,
343
+ timestamp: Date.now(),
344
+ }));
345
+ index = addToBinaryIndex(index, embeddings, ids, metadata);
346
+ logger.debug(`Stored ${ids.length} embeddings, total: ${index.indices.length}`);
347
+ },
348
+ /**
349
+ * Search for similar embeddings
350
+ */
351
+ search(queryEmbedding, k = 10) {
352
+ return searchBinaryIndex(index, queryEmbedding, k);
353
+ },
354
+ /**
355
+ * Get compression statistics
356
+ */
357
+ getStats() {
358
+ const stats = getCompressionStats(index);
359
+ return {
360
+ ...stats,
361
+ stored: index.indices.length,
362
+ compressionPercent: `${stats.compressionRatio.toFixed(1)}x`,
363
+ };
364
+ },
365
+ /**
366
+ * Clear all stored vectors
367
+ */
368
+ clear() {
369
+ index = createBinaryIndex(fullConfig);
370
+ logger.info('Binary vector index cleared');
371
+ },
372
+ /**
373
+ * Export serialized index
374
+ */
375
+ export() {
376
+ return serializeBinaryIndex(index);
377
+ },
378
+ /**
379
+ * Import serialized index
380
+ */
381
+ import(buffer) {
382
+ index = deserializeBinaryIndex(buffer);
383
+ logger.info(`Imported binary index with ${index.indices.length} vectors`);
384
+ },
385
+ };
386
+ }
387
+ /**
388
+ * Common embedding dimensions for major embedding models
389
+ */
390
+ export const EMBEDDING_DIMENSIONS = {
391
+ /** OpenAI text-embedding-3-small, text-embedding-3-large */
392
+ OPENAI_1536: 1536,
393
+ /** OpenAI text-embedding-ada-002 */
394
+ OPENAI_1536_ADA: 1536,
395
+ /** OpenAI text-embedding-3-large (3072 dim) */
396
+ OPENAI_3072: 3072,
397
+ /** Cohere embed-english-v3.0 */
398
+ COHERE_1024: 1024,
399
+ /** Cohere embed-multilingual-v3.0 */
400
+ COHERE_MULTILINGUAL: 1024,
401
+ /** Vertex AI textembedding-gecko */
402
+ VERTEX_GECKO: 768,
403
+ /** Default dimension */
404
+ DEFAULT: 1536,
405
+ };
@@ -10,3 +10,5 @@ export * from './adaptive-rag.js';
10
10
  export { createContextFragmentationEngine } from './context-fragmentation.js';
11
11
  export { createHybridSearchEngine, createBM25Index, bm25Score, normalizeBM25Scores, DEFAULT_HYBRID_CONFIG } from './hybrid-search.js';
12
12
  export { createPatternRecognizer } from './pattern-recognizer.js';
13
+ export { createMemoryObserver } from './observer.js';
14
+ export { createBinaryVectorCompressor, quantizeToBinary, hammingDistance, hammingToSimilarity, createBinaryIndex, addToBinaryIndex, searchBinaryIndex, getCompressionStats, serializeBinaryIndex, deserializeBinaryIndex, DEFAULT_BINARY_CONFIG, EMBEDDING_DIMENSIONS } from './binary-vector.js';
@@ -0,0 +1,350 @@
1
+ /**
2
+ * Memory Observer - Silent Background Memory Writer
3
+ *
4
+ * Implements the Mnemostroma-inspired Observer pattern: the AI agent NEVER writes
5
+ * memory directly. Instead, this Observer sidecar silently watches all I/O,
6
+ * extracts entities, embeds, scores, and indexes content automatically.
7
+ *
8
+ * Key principles:
9
+ * - AI never writes memory — Observer does it silently
10
+ * - Dual async pipeline: Observer (write) + Content Branch (versioned artifacts)
11
+ * - Memory Hulling: separates conversational noise from extractable kernels
12
+ * - 20ms hot buffer retrieval, ~50ms full extraction latency
13
+ *
14
+ * Based on: GG-QandV/mnemostroma (https://github.com/GG-QandV/mnemostroma)
15
+ *
16
+ * @module core/memory
17
+ * @fileoverview Observer pattern for silent memory writing
18
+ */
19
+ import { randomUUID } from 'crypto';
20
+ import { createLogger } from '../../utils/logger.js';
21
+ import { createEmbedder, cosineSimilarity } from './embedder.js';
22
+ const logger = createLogger('[MemoryObserver]');
23
+ const DEFAULT_OBSERVER_CONFIG = {
24
+ importanceThreshold: 0.3,
25
+ hotBufferSize: 50,
26
+ autoExtract: true,
27
+ debounceMs: 100,
28
+ maxShellAge: 5 * 60 * 1000, // 5 minutes
29
+ embedder: createEmbedder({ dimensions: 128 }),
30
+ };
31
+ /**
32
+ * Noise patterns that indicate conversational shell (not worth storing)
33
+ */
34
+ const SHELL_PATTERNS = [
35
+ /^(hi|hello|hey|what's up|howdy)\b/i,
36
+ /^(thanks?|thank you|thx|ty|much appreciated)\b/i,
37
+ /^(okay|ok|okk|kk|sure|yes|no|nah|yeah|yep|nope)\b/i,
38
+ /^(lol|lmao|rofl|haha|heh)\b/i,
39
+ /^(bye|goodbye|see you|cya|good night|night)\b/i,
40
+ /^(sorry|apologies|my bad|whoops)\b/i,
41
+ /^(please|pls|plz|could you|would you)\b/i,
42
+ /^(interesting|cool|nice|awesome|great|good)\b/i,
43
+ /^(i see|i understand|i get it|got it|understood)\b/i,
44
+ /^[\s.,!?;:]*$/,
45
+ /^(oh|ah|uh|um|hmm|well)\b/i,
46
+ /^(let me know|feel free|take care|talk later)\b/i,
47
+ ];
48
+ /**
49
+ * Anchor patterns that indicate important kernels (decisions, deadlines, facts)
50
+ */
51
+ const ANCHOR_PATTERNS = [
52
+ /\b(decided|decision|agreed|chosen|selected|picked|settled)\b/i,
53
+ /\b(must|have to|need to|required|mandatory|essential)\b/i,
54
+ /\b(deadline|due|by|before|after|until|timing)\b/i,
55
+ /\b(never|always|don't|do not|must not|forbidden)\b/i,
56
+ /\b(important|critical|key|vital|priority)\b/i,
57
+ /\b(remember|forget|keep in mind|note that)\b/i,
58
+ /\b(because|since|reason|why|therefore|thus)\b/i,
59
+ /\b(fact|true|real|actually|indeed|in fact)\b/i,
60
+ /\b(but|however|although|except|except for)\b/i,
61
+ /\b(name|date|location|price|cost|amount)\b/i,
62
+ /\b(user|client|customer|they|their)\b.*\b(want|need|prefer|like)\b/i,
63
+ ];
64
+ /**
65
+ * Kernel type classifiers
66
+ */
67
+ function classifyKernel(content, context) {
68
+ const lower = content.toLowerCase();
69
+ const fullText = [content, ...context].join(' ').toLowerCase();
70
+ if (/deadline|due|by |before |after |timing/i.test(lower))
71
+ return 'constraint';
72
+ if (/decided|agreed|chosen|picked|selected/i.test(lower))
73
+ return 'decision';
74
+ if (/must|have to|need to|required|mandatory/i.test(lower))
75
+ return 'rule';
76
+ if (/fact|true|actual|indeed|in fact|real/i.test(fullText))
77
+ return 'fact';
78
+ if (/remember|forget|note|keep in mind/i.test(lower))
79
+ return 'context';
80
+ if (/name|date|location|price|amount/i.test(lower))
81
+ return 'entity';
82
+ return 'context';
83
+ }
84
+ /**
85
+ * Score importance based on content features
86
+ */
87
+ function scoreImportance(content, isAnchor) {
88
+ let score = 0.5;
89
+ if (isAnchor)
90
+ score += 0.2;
91
+ const words = content.split(/\s+/).length;
92
+ if (words >= 5 && words <= 50)
93
+ score += 0.15;
94
+ else if (words > 100)
95
+ score -= 0.1;
96
+ if (/\d+/.test(content))
97
+ score += 0.1;
98
+ if (/[A-Z][a-z]+\s+[A-Z][a-z]+/.test(content))
99
+ score += 0.1;
100
+ if (/\?$/.test(content))
101
+ score += 0.05;
102
+ if (/!$/.test(content))
103
+ score += 0.05;
104
+ return Math.max(0, Math.min(1, score));
105
+ }
106
+ /**
107
+ * Check if content is shell (noise)
108
+ */
109
+ function isShell(content) {
110
+ const trimmed = content.trim();
111
+ if (trimmed.length < 3)
112
+ return true;
113
+ for (const pattern of SHELL_PATTERNS) {
114
+ if (pattern.test(trimmed))
115
+ return true;
116
+ }
117
+ return false;
118
+ }
119
+ /**
120
+ * Extract anchors from content
121
+ */
122
+ function extractAnchors(content) {
123
+ const anchors = [];
124
+ for (const pattern of ANCHOR_PATTERNS) {
125
+ const match = content.match(pattern);
126
+ if (match) {
127
+ anchors.push(match[0]);
128
+ }
129
+ }
130
+ return anchors;
131
+ }
132
+ function buildMemoryObserver(config = {}) {
133
+ const cfg = { ...DEFAULT_OBSERVER_CONFIG, ...config };
134
+ const hotBuffer = [];
135
+ const anchorIndex = new Map();
136
+ const tagIndex = new Map();
137
+ let kernelsExtracted = 0;
138
+ let shellDiscarded = 0;
139
+ let lastFlush = null;
140
+ let sessionId = randomUUID();
141
+ let previousSessionId = null;
142
+ /**
143
+ * Hull content — separate kernels from shell
144
+ */
145
+ function hull(content, context = []) {
146
+ const kernels = [];
147
+ const shell = [];
148
+ const sentences = content.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 2);
149
+ for (const sentence of sentences) {
150
+ if (isShell(sentence)) {
151
+ shell.push(sentence);
152
+ shellDiscarded++;
153
+ continue;
154
+ }
155
+ const anchors = extractAnchors(sentence);
156
+ const isAnchor = anchors.length > 0;
157
+ const importance = scoreImportance(sentence, isAnchor);
158
+ if (importance >= cfg.importanceThreshold) {
159
+ const type = classifyKernel(sentence, context);
160
+ const kernel = {
161
+ type,
162
+ content: sentence,
163
+ importance,
164
+ tags: [type],
165
+ anchors,
166
+ sourceExcerpt: sentence.slice(0, 200),
167
+ };
168
+ if (isAnchor) {
169
+ kernel.tags.push('anchor');
170
+ }
171
+ kernels.push(kernel);
172
+ kernelsExtracted++;
173
+ for (const anchor of anchors) {
174
+ anchorIndex.set(anchor, {
175
+ content: sentence,
176
+ type,
177
+ timestamp: Date.now(),
178
+ });
179
+ }
180
+ for (const tag of kernel.tags) {
181
+ if (!tagIndex.has(tag)) {
182
+ tagIndex.set(tag, []);
183
+ }
184
+ tagIndex.get(tag).push(kernel);
185
+ }
186
+ }
187
+ else {
188
+ shell.push(sentence);
189
+ shellDiscarded++;
190
+ }
191
+ }
192
+ return { kernels, shell };
193
+ }
194
+ function maintainHotBuffer() {
195
+ while (hotBuffer.length > cfg.hotBufferSize) {
196
+ let minIdx = 0;
197
+ let minImportance = Infinity;
198
+ for (let i = 0; i < hotBuffer.length; i++) {
199
+ if (hotBuffer[i].importance < minImportance) {
200
+ minImportance = hotBuffer[i].importance;
201
+ minIdx = i;
202
+ }
203
+ }
204
+ hotBuffer.splice(minIdx, 1);
205
+ }
206
+ }
207
+ async function semanticSearch(query, limit = 5) {
208
+ try {
209
+ const queryEmbedding = cfg.embedder.embed(query);
210
+ const results = [];
211
+ for (const kernel of hotBuffer) {
212
+ const kernelEmbedding = cfg.embedder.embed(kernel.content);
213
+ const similarity = cosineSimilarity(queryEmbedding, kernelEmbedding);
214
+ if (similarity > 0.3) {
215
+ results.push({ content: kernel.content, score: similarity });
216
+ }
217
+ }
218
+ results.sort((a, b) => b.score - a.score);
219
+ return results.slice(0, limit);
220
+ }
221
+ catch (err) {
222
+ logger.warn('Semantic search failed, falling back to keyword', { error: err });
223
+ const queryLower = query.toLowerCase();
224
+ return hotBuffer
225
+ .filter(k => k.content.toLowerCase().includes(queryLower))
226
+ .slice(0, limit)
227
+ .map(k => ({ content: k.content, score: k.importance }));
228
+ }
229
+ }
230
+ return {
231
+ observe(content, stream = 'semantic', context) {
232
+ const start = Date.now();
233
+ const { kernels, shell } = hull(content, context);
234
+ hotBuffer.push(...kernels);
235
+ maintainHotBuffer();
236
+ const processingMs = Date.now() - start;
237
+ logger.debug(`Observed [${stream}]: ${kernels.length} kernels extracted, ${shell.length} shell discarded (${processingMs}ms)`);
238
+ return {
239
+ id: randomUUID(),
240
+ stream,
241
+ kernels,
242
+ shell,
243
+ timestamp: Date.now(),
244
+ processingMs,
245
+ };
246
+ },
247
+ async flush(tier = 'learned') {
248
+ if (hotBuffer.length === 0)
249
+ return [];
250
+ const entries = [];
251
+ for (const kernel of hotBuffer) {
252
+ const entry = {
253
+ id: randomUUID(),
254
+ tier,
255
+ content: kernel.content,
256
+ importance: kernel.importance,
257
+ tags: kernel.tags,
258
+ timestamp: Date.now(),
259
+ accessCount: 0,
260
+ lastAccessed: Date.now(),
261
+ source: 'self',
262
+ };
263
+ entries.push(entry);
264
+ }
265
+ lastFlush = Date.now();
266
+ logger.info(`Flushed ${entries.length} kernels to ${tier} tier`);
267
+ return entries;
268
+ },
269
+ async ctx_semantic(query, limit = 5) {
270
+ return semanticSearch(query, limit);
271
+ },
272
+ ctx_anchors(limit = 20) {
273
+ const results = Array.from(anchorIndex.entries())
274
+ .map(([, data]) => ({
275
+ content: data.content,
276
+ type: data.type,
277
+ timestamp: data.timestamp,
278
+ }))
279
+ .sort((a, b) => b.timestamp - a.timestamp);
280
+ return results.slice(0, limit);
281
+ },
282
+ async ctx_search(tags, limit = 10) {
283
+ const matchingKernels = [];
284
+ for (const tag of tags) {
285
+ const kernels = tagIndex.get(tag) || [];
286
+ matchingKernels.push(...kernels);
287
+ }
288
+ const seen = new Set();
289
+ const unique = matchingKernels.filter(k => {
290
+ if (seen.has(k.content))
291
+ return false;
292
+ seen.add(k.content);
293
+ return true;
294
+ });
295
+ return unique.slice(0, limit).map(k => ({
296
+ id: randomUUID(),
297
+ tier: 'learned',
298
+ content: k.content,
299
+ importance: k.importance,
300
+ tags: k.tags,
301
+ timestamp: Date.now(),
302
+ accessCount: 0,
303
+ lastAccessed: Date.now(),
304
+ }));
305
+ },
306
+ ctx_bridge(prevSessionId) {
307
+ const decisions = Array.from(anchorIndex.entries())
308
+ .filter(([, data]) => data.type === 'decision')
309
+ .map(([, data]) => data.content);
310
+ const constraints = Array.from(anchorIndex.entries())
311
+ .filter(([, data]) => data.type === 'constraint' || data.type === 'rule')
312
+ .map(([, data]) => data.content);
313
+ const importantFacts = Array.from(anchorIndex.entries())
314
+ .filter(([, data]) => data.type === 'fact')
315
+ .map(([, data]) => ({
316
+ fact: data.content,
317
+ certainty: 0.8,
318
+ }));
319
+ const contextSummary = hotBuffer.length > 0
320
+ ? hotBuffer.slice(0, 5).map(k => k.content).join(' ')
321
+ : '';
322
+ const packet = {
323
+ sessionId,
324
+ previousSessionId: prevSessionId || previousSessionId,
325
+ context: contextSummary,
326
+ decisions,
327
+ constraints,
328
+ pendingTasks: [],
329
+ importantFacts,
330
+ timestamp: Date.now(),
331
+ agentVersion: '2.3.3',
332
+ };
333
+ previousSessionId = sessionId;
334
+ sessionId = randomUUID();
335
+ return packet;
336
+ },
337
+ getStats() {
338
+ return {
339
+ hotBufferSize: hotBuffer.length,
340
+ kernelsExtracted,
341
+ shellDiscarded,
342
+ lastFlush,
343
+ };
344
+ },
345
+ };
346
+ }
347
+ export function createMemoryObserver(config) {
348
+ logger.info('Creating Memory Observer (Mnemostroma-inspired)');
349
+ return buildMemoryObserver(config);
350
+ }
@@ -0,0 +1,436 @@
1
+ /**
2
+ * Execution Governor - Cross-Context Tool Call Governance
3
+ *
4
+ * Inspired by chitin (Execution kernel for AI coding agents)
5
+ * Provides declarative policy enforcement for tool calls across different execution contexts.
6
+ *
7
+ * Features:
8
+ * - Gating tool calls against typed policies
9
+ * - Severity ladder with lockdown counter
10
+ * - Tamper-evident audit chain
11
+ * - Bounds enforcement on push-shaped actions
12
+ * - Heuristic signals for blast-radius, floundering, drift
13
+ *
14
+ * @module core/security
15
+ * @fileoverview Execution governance for HeartFlow
16
+ */
17
+ import { createLogger } from '../../utils/logger.js';
18
+ import * as crypto from 'crypto';
19
+ const logger = createLogger('[ExecutionGovernor]');
20
+ // ============================================================
21
+ // Default Configuration
22
+ // ============================================================
23
+ const DEFAULT_CONFIG = {
24
+ enableAuditChain: true,
25
+ enableSeverityLockdown: true,
26
+ maxLockdownCount: 5,
27
+ severityThresholds: {
28
+ low: 3,
29
+ medium: 5,
30
+ high: 10,
31
+ critical: 20,
32
+ },
33
+ policies: [
34
+ {
35
+ id: 'default-read',
36
+ action: 'read',
37
+ allow: true,
38
+ severity: 'info',
39
+ },
40
+ {
41
+ id: 'default-write',
42
+ action: 'write',
43
+ allow: true,
44
+ conditions: {
45
+ bounds: {
46
+ maxLinesChanged: 500,
47
+ maxFilesAffected: 10,
48
+ },
49
+ requireConfirmation: true,
50
+ },
51
+ severity: 'medium',
52
+ },
53
+ {
54
+ id: 'default-delete',
55
+ action: 'delete',
56
+ allow: true,
57
+ conditions: {
58
+ bounds: {
59
+ maxFilesAffected: 1,
60
+ },
61
+ requireConfirmation: true,
62
+ },
63
+ severity: 'high',
64
+ },
65
+ {
66
+ id: 'default-execute',
67
+ action: 'execute',
68
+ allow: false,
69
+ severity: 'critical',
70
+ },
71
+ {
72
+ id: 'default-network',
73
+ action: 'network',
74
+ allow: true,
75
+ conditions: {
76
+ maxPerHour: 50,
77
+ },
78
+ severity: 'medium',
79
+ },
80
+ ],
81
+ defaultPolicy: {
82
+ id: 'default-unknown',
83
+ action: 'unknown',
84
+ allow: false,
85
+ severity: 'high',
86
+ },
87
+ };
88
+ // ============================================================
89
+ // Helper Functions
90
+ // ============================================================
91
+ /**
92
+ * Classify a tool call into a canonical action
93
+ */
94
+ function classifyAction(toolName, args) {
95
+ const lowerTool = toolName.toLowerCase();
96
+ // Read actions
97
+ if (lowerTool.includes('read') || lowerTool.includes('get') || lowerTool.includes('fetch') ||
98
+ lowerTool.includes('search') || lowerTool.includes('query') || lowerTool.includes('select')) {
99
+ return 'read';
100
+ }
101
+ // Write actions
102
+ if (lowerTool.includes('write') || lowerTool.includes('create') || lowerTool.includes('add') ||
103
+ lowerTool.includes('insert') || lowerTool.includes('post') || lowerTool.includes('put')) {
104
+ return 'write';
105
+ }
106
+ // Delete actions
107
+ if (lowerTool.includes('delete') || lowerTool.includes('remove') || lowerTool.includes('drop') ||
108
+ lowerTool.includes('unlink') || lowerTool.includes('rm')) {
109
+ return 'delete';
110
+ }
111
+ // Execute actions
112
+ if (lowerTool.includes('exec') || lowerTool.includes('run') || lowerTool.includes('execute') ||
113
+ lowerTool.includes('spawn') || lowerTool.includes('fork') || lowerTool.includes('shell')) {
114
+ return 'execute';
115
+ }
116
+ // Network actions
117
+ if (lowerTool.includes('http') || lowerTool.includes('fetch') || lowerTool.includes('request') ||
118
+ lowerTool.includes('send') || lowerTool.includes('post') || lowerTool.includes('url')) {
119
+ return 'network';
120
+ }
121
+ // Filesystem actions
122
+ if (lowerTool.includes('file') || lowerTool.includes('dir') || lowerTool.includes('path') ||
123
+ lowerTool.includes('mkdir') || lowerTool.includes('stat') || lowerTool.includes('ls')) {
124
+ return 'filesystem';
125
+ }
126
+ // Process actions
127
+ if (lowerTool.includes('process') || lowerTool.includes('pid') || lowerTool.includes('kill') ||
128
+ lowerTool.includes('signal')) {
129
+ return 'process';
130
+ }
131
+ // Memory actions
132
+ if (lowerTool.includes('memory') || lowerTool.includes('store') || lowerTool.includes('recall') ||
133
+ lowerTool.includes('embed')) {
134
+ return 'memory';
135
+ }
136
+ // Agent actions
137
+ if (lowerTool.includes('agent') || lowerTool.includes('spawn') || lowerTool.includes('delegate') ||
138
+ lowerTool.includes('subagent')) {
139
+ return 'agent';
140
+ }
141
+ return 'unknown';
142
+ }
143
+ /**
144
+ * Calculate heuristic signals from envelope context
145
+ */
146
+ function calculateHeuristics(envelope) {
147
+ const context = envelope.context;
148
+ return {
149
+ blastRadius: context.blastRadius || 'low',
150
+ floundering: context.floundering || false,
151
+ drift: context.drift || 0,
152
+ repetitionCount: 0, // Tracked in agent state
153
+ urgency: 0.5, // Default
154
+ stakes: envelope.action === 'delete' || envelope.action === 'execute' ? 0.9 : 0.3,
155
+ };
156
+ }
157
+ /**
158
+ * Hash data for audit chain
159
+ */
160
+ function hashData(data, prevHash) {
161
+ const payload = JSON.stringify({ data, prevHash, timestamp: Date.now() });
162
+ return crypto.createHash('sha256').update(payload).digest('hex');
163
+ }
164
+ // ============================================================
165
+ // Execution Governor Class
166
+ // ============================================================
167
+ export class ExecutionGovernor {
168
+ config;
169
+ agentStates = new Map();
170
+ auditChain = [];
171
+ decisionHistory = new Map();
172
+ constructor(config = {}) {
173
+ this.config = { ...DEFAULT_CONFIG, ...config };
174
+ logger.info('ExecutionGovernor initialized', {
175
+ policiesCount: this.config.policies.length,
176
+ auditChainEnabled: this.config.enableAuditChain,
177
+ });
178
+ }
179
+ /**
180
+ * Process a tool call and return governance decision
181
+ */
182
+ async gate(envelope) {
183
+ // Auto-classify action if not provided
184
+ if (envelope.action === 'unknown') {
185
+ envelope.action = classifyAction(envelope.toolName, envelope.args);
186
+ }
187
+ // Calculate heuristic signals
188
+ const signals = calculateHeuristics(envelope);
189
+ // Get or create agent state
190
+ let state = this.agentStates.get(envelope.context.sessionId);
191
+ if (!state) {
192
+ state = this.createAgentState(envelope.context.sessionId);
193
+ this.agentStates.set(envelope.context.sessionId, state);
194
+ }
195
+ // Check for lockdown
196
+ if (state.locked && this.config.enableSeverityLockdown) {
197
+ const decision = this.createDecision(envelope, 'deny', 'Agent is in lockdown due to severity threshold breach');
198
+ this.appendAudit('decision', decision);
199
+ return decision;
200
+ }
201
+ // Check severity thresholds
202
+ if (this.config.enableSeverityLockdown) {
203
+ const severityLevel = this.checkSeverityThreshold(state);
204
+ if (severityLevel) {
205
+ state.lockdownCount++;
206
+ if (state.lockdownCount >= this.config.maxLockdownCount) {
207
+ state.locked = true;
208
+ this.appendAudit('lockdown', { sessionId: envelope.context.sessionId, count: state.lockdownCount });
209
+ logger.warn('Agent locked due to severity threshold', { sessionId: envelope.context.sessionId });
210
+ }
211
+ }
212
+ }
213
+ // Find matching policy
214
+ const policy = this.findMatchingPolicy(envelope.action);
215
+ // Evaluate bounds if present
216
+ let reason = 'Policy evaluation';
217
+ let decision = 'allow';
218
+ if (policy) {
219
+ if (!policy.allow) {
220
+ decision = 'deny';
221
+ reason = `Policy ${policy.id} explicitly denies ${envelope.action}`;
222
+ }
223
+ else if (policy.conditions?.requireConfirmation) {
224
+ decision = 'escalate';
225
+ reason = `${envelope.action} requires operator confirmation`;
226
+ }
227
+ else if (policy.conditions?.bounds) {
228
+ const boundCheck = this.checkBounds(envelope, policy.conditions.bounds);
229
+ if (!boundCheck.allowed) {
230
+ decision = 'deny';
231
+ reason = boundCheck.reason || 'Bounds check failed';
232
+ }
233
+ }
234
+ // Update severity counts
235
+ if (this.config.enableSeverityLockdown && decision !== 'allow') {
236
+ state.severityCounts[policy.severity] = (state.severityCounts[policy.severity] || 0) + 1;
237
+ }
238
+ }
239
+ else {
240
+ decision = this.config.defaultPolicy.allow ? 'allow' : 'deny';
241
+ reason = `No matching policy, using default: ${this.config.defaultPolicy.allow ? 'allow' : 'deny'}`;
242
+ }
243
+ // Update state
244
+ state.totalToolCalls++;
245
+ state.lastActivity = Date.now();
246
+ // Create decision
247
+ const governanceDecision = this.createDecision(envelope, decision, reason, policy ?? undefined);
248
+ this.decisionHistory.set(envelope.id, governanceDecision);
249
+ this.appendAudit('decision', governanceDecision);
250
+ if (decision === 'deny') {
251
+ logger.info('Tool call denied', { envelopeId: envelope.id, action: envelope.action, reason });
252
+ }
253
+ else if (decision === 'escalate') {
254
+ logger.info('Tool call escalated', { envelopeId: envelope.id, action: envelope.action, reason });
255
+ }
256
+ return governanceDecision;
257
+ }
258
+ /**
259
+ * Create a new tool call envelope
260
+ */
261
+ createEnvelope(toolName, args, context) {
262
+ return {
263
+ id: crypto.randomUUID(),
264
+ action: classifyAction(toolName, args),
265
+ toolName,
266
+ args,
267
+ context,
268
+ };
269
+ }
270
+ /**
271
+ * Unlock a locked agent
272
+ */
273
+ unlock(sessionId) {
274
+ const state = this.agentStates.get(sessionId);
275
+ if (!state)
276
+ return false;
277
+ state.locked = false;
278
+ state.lockdownCount = 0;
279
+ state.severityCounts = {};
280
+ this.appendAudit('unlock', { sessionId });
281
+ logger.info('Agent unlocked', { sessionId });
282
+ return true;
283
+ }
284
+ /**
285
+ * Reset agent state
286
+ */
287
+ resetState(sessionId) {
288
+ const state = this.agentStates.get(sessionId);
289
+ if (!state)
290
+ return false;
291
+ const newState = this.createAgentState(sessionId);
292
+ this.agentStates.set(sessionId, newState);
293
+ this.appendAudit('state_reset', { sessionId });
294
+ logger.info('Agent state reset', { sessionId });
295
+ return true;
296
+ }
297
+ /**
298
+ * Get agent state
299
+ */
300
+ getAgentState(sessionId) {
301
+ return this.agentStates.get(sessionId) || null;
302
+ }
303
+ /**
304
+ * Get audit chain
305
+ */
306
+ getAuditChain() {
307
+ return [...this.auditChain];
308
+ }
309
+ /**
310
+ * Get decision from history
311
+ */
312
+ getDecision(envelopeId) {
313
+ return this.decisionHistory.get(envelopeId) || null;
314
+ }
315
+ /**
316
+ * Verify audit chain integrity
317
+ */
318
+ verifyAuditChain() {
319
+ for (let i = 1; i < this.auditChain.length; i++) {
320
+ if (this.auditChain[i].prevHash !== this.auditChain[i - 1].hash) {
321
+ return { valid: false, brokenAt: i };
322
+ }
323
+ }
324
+ return { valid: true };
325
+ }
326
+ // Private helper methods
327
+ createAgentState(sessionId) {
328
+ return {
329
+ sessionId,
330
+ severityCounts: {},
331
+ lockdownCount: 0,
332
+ totalToolCalls: 0,
333
+ lastActivity: Date.now(),
334
+ locked: false,
335
+ };
336
+ }
337
+ checkSeverityThreshold(state) {
338
+ const counts = state.severityCounts;
339
+ if ((counts['critical'] || 0) >= this.config.severityThresholds.critical)
340
+ return 'critical';
341
+ if ((counts['high'] || 0) >= this.config.severityThresholds.high)
342
+ return 'high';
343
+ if ((counts['medium'] || 0) >= this.config.severityThresholds.medium)
344
+ return 'medium';
345
+ if ((counts['low'] || 0) >= this.config.severityThresholds.low)
346
+ return 'low';
347
+ return null;
348
+ }
349
+ findMatchingPolicy(action) {
350
+ return this.config.policies.find(p => p.action === action) || null;
351
+ }
352
+ checkBounds(envelope, bounds) {
353
+ if (!bounds)
354
+ return { allowed: true };
355
+ // Check filesystem bounds
356
+ if (bounds.allowedPaths || bounds.deniedPaths) {
357
+ const path = this.extractPath(envelope.args);
358
+ if (path) {
359
+ if (bounds.deniedPaths?.some(p => path.includes(p))) {
360
+ return { allowed: false, reason: `Path ${path} is in denied list` };
361
+ }
362
+ if (bounds.allowedPaths && !bounds.allowedPaths.some(p => path.includes(p))) {
363
+ return { allowed: false, reason: `Path ${path} is not in allowed list` };
364
+ }
365
+ }
366
+ }
367
+ // Check data size
368
+ if (bounds.maxDataSize) {
369
+ const size = this.estimateDataSize(envelope.args);
370
+ if (size > bounds.maxDataSize) {
371
+ return { allowed: false, reason: `Data size ${size} exceeds limit ${bounds.maxDataSize}` };
372
+ }
373
+ }
374
+ return { allowed: true };
375
+ }
376
+ extractPath(args) {
377
+ for (const key of ['path', 'file', 'filePath', 'target', 'destination']) {
378
+ if (args[key] && typeof args[key] === 'string') {
379
+ return args[key];
380
+ }
381
+ }
382
+ return null;
383
+ }
384
+ estimateDataSize(args) {
385
+ try {
386
+ return JSON.stringify(args).length;
387
+ }
388
+ catch {
389
+ return 0;
390
+ }
391
+ }
392
+ createDecision(envelope, decision, reason, policy) {
393
+ const decisionData = {
394
+ envelopeId: envelope.id,
395
+ action: envelope.action,
396
+ decision,
397
+ reason,
398
+ timestamp: Date.now(),
399
+ };
400
+ return {
401
+ ...decisionData,
402
+ matchedPolicy: policy,
403
+ hash: hashData(decisionData, this.auditChain.length > 0 ? this.auditChain[this.auditChain.length - 1].hash : 'genesis'),
404
+ };
405
+ }
406
+ appendAudit(type, payload) {
407
+ if (!this.config.enableAuditChain)
408
+ return;
409
+ const event = {
410
+ type,
411
+ payload,
412
+ timestamp: Date.now(),
413
+ hash: '',
414
+ prevHash: this.auditChain.length > 0 ? this.auditChain[this.auditChain.length - 1].hash : 'genesis',
415
+ };
416
+ event.hash = hashData({ type, payload, timestamp: event.timestamp }, event.prevHash);
417
+ this.auditChain.push(event);
418
+ // Keep chain bounded to last 10000 events
419
+ if (this.auditChain.length > 10000) {
420
+ this.auditChain = this.auditChain.slice(-5000);
421
+ }
422
+ }
423
+ }
424
+ // ============================================================
425
+ // Factory Function
426
+ // ============================================================
427
+ /**
428
+ * Create an ExecutionGovernor instance with optional custom config
429
+ */
430
+ export function createExecutionGovernor(config) {
431
+ return new ExecutionGovernor(config);
432
+ }
433
+ // ============================================================
434
+ // Export default config for customization
435
+ // ============================================================
436
+ export { DEFAULT_CONFIG };
@@ -1,2 +1,3 @@
1
1
  export * from './privacy.js';
2
2
  export * from './agent-shield.js';
3
+ export { ExecutionGovernor, createExecutionGovernor, DEFAULT_CONFIG } from './execution-governor.js';
package/dist/version.js CHANGED
@@ -1 +1 @@
1
- export const VERSION = '2.3.2';
1
+ export const VERSION = '2.3.4';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mark-improving-agent",
3
- "version": "2.3.2",
3
+ "version": "2.3.4",
4
4
  "description": "Self-evolving AI agent with permanent memory, identity continuity, and self-evolution — for AI agents that need to remember, learn, and evolve across sessions",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",