@yamo/memory-mesh 2.3.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +8 -2
  2. package/bin/memory_mesh.js +1 -1
  3. package/lib/llm/client.d.ts +86 -0
  4. package/lib/llm/client.js +300 -357
  5. package/lib/llm/client.ts +334 -0
  6. package/lib/llm/index.d.ts +17 -0
  7. package/lib/llm/index.js +16 -8
  8. package/lib/llm/index.ts +18 -0
  9. package/lib/memory/adapters/client.d.ts +120 -0
  10. package/lib/memory/adapters/client.js +519 -0
  11. package/lib/memory/adapters/client.ts +519 -0
  12. package/lib/memory/adapters/config.d.ts +130 -0
  13. package/lib/memory/adapters/config.js +190 -0
  14. package/lib/memory/adapters/config.ts +190 -0
  15. package/lib/memory/adapters/errors.d.ts +84 -0
  16. package/lib/memory/adapters/errors.js +129 -0
  17. package/lib/memory/adapters/errors.ts +129 -0
  18. package/lib/memory/context-manager.d.ts +41 -0
  19. package/lib/memory/context-manager.js +345 -0
  20. package/lib/memory/context-manager.ts +345 -0
  21. package/lib/memory/embeddings/factory.d.ts +57 -0
  22. package/lib/memory/embeddings/factory.js +149 -0
  23. package/lib/memory/embeddings/factory.ts +149 -0
  24. package/lib/memory/embeddings/index.d.ts +2 -0
  25. package/lib/memory/embeddings/index.js +3 -0
  26. package/lib/memory/embeddings/index.ts +3 -0
  27. package/lib/memory/embeddings/service.d.ts +134 -0
  28. package/lib/memory/embeddings/service.js +516 -0
  29. package/lib/memory/embeddings/service.ts +516 -0
  30. package/lib/memory/index.d.ts +9 -0
  31. package/lib/memory/index.js +10 -1
  32. package/lib/memory/index.ts +10 -0
  33. package/lib/memory/memory-mesh.d.ts +332 -0
  34. package/lib/memory/memory-mesh.js +1470 -678
  35. package/lib/memory/memory-mesh.ts +1517 -0
  36. package/lib/memory/memory-translator.d.ts +14 -0
  37. package/lib/memory/memory-translator.js +126 -0
  38. package/lib/memory/memory-translator.ts +126 -0
  39. package/lib/memory/schema.d.ts +130 -0
  40. package/lib/memory/schema.js +184 -0
  41. package/lib/memory/schema.ts +184 -0
  42. package/lib/memory/scorer.d.ts +25 -0
  43. package/lib/memory/scorer.js +78 -0
  44. package/lib/memory/scorer.ts +78 -0
  45. package/lib/memory/search/index.d.ts +1 -0
  46. package/lib/memory/search/index.js +2 -0
  47. package/lib/memory/search/index.ts +2 -0
  48. package/lib/memory/search/keyword-search.d.ts +46 -0
  49. package/lib/memory/search/keyword-search.js +136 -0
  50. package/lib/memory/search/keyword-search.ts +136 -0
  51. package/lib/scrubber/config/defaults.d.ts +46 -0
  52. package/lib/scrubber/config/defaults.js +50 -57
  53. package/lib/scrubber/config/defaults.ts +55 -0
  54. package/lib/scrubber/errors/scrubber-error.d.ts +22 -0
  55. package/lib/scrubber/errors/scrubber-error.js +28 -32
  56. package/lib/scrubber/errors/scrubber-error.ts +44 -0
  57. package/lib/scrubber/index.d.ts +5 -0
  58. package/lib/scrubber/index.js +4 -23
  59. package/lib/scrubber/index.ts +6 -0
  60. package/lib/scrubber/scrubber.d.ts +44 -0
  61. package/lib/scrubber/scrubber.js +100 -121
  62. package/lib/scrubber/scrubber.ts +109 -0
  63. package/lib/scrubber/stages/chunker.d.ts +25 -0
  64. package/lib/scrubber/stages/chunker.js +74 -91
  65. package/lib/scrubber/stages/chunker.ts +104 -0
  66. package/lib/scrubber/stages/metadata-annotator.d.ts +17 -0
  67. package/lib/scrubber/stages/metadata-annotator.js +55 -65
  68. package/lib/scrubber/stages/metadata-annotator.ts +75 -0
  69. package/lib/scrubber/stages/normalizer.d.ts +16 -0
  70. package/lib/scrubber/stages/normalizer.js +42 -50
  71. package/lib/scrubber/stages/normalizer.ts +60 -0
  72. package/lib/scrubber/stages/semantic-filter.d.ts +16 -0
  73. package/lib/scrubber/stages/semantic-filter.js +42 -52
  74. package/lib/scrubber/stages/semantic-filter.ts +62 -0
  75. package/lib/scrubber/stages/structural-cleaner.d.ts +18 -0
  76. package/lib/scrubber/stages/structural-cleaner.js +66 -75
  77. package/lib/scrubber/stages/structural-cleaner.ts +83 -0
  78. package/lib/scrubber/stages/validator.d.ts +17 -0
  79. package/lib/scrubber/stages/validator.js +46 -56
  80. package/lib/scrubber/stages/validator.ts +67 -0
  81. package/lib/scrubber/telemetry.d.ts +29 -0
  82. package/lib/scrubber/telemetry.js +54 -58
  83. package/lib/scrubber/telemetry.ts +62 -0
  84. package/lib/scrubber/utils/hash.d.ts +14 -0
  85. package/lib/scrubber/utils/hash.js +30 -32
  86. package/lib/scrubber/utils/hash.ts +40 -0
  87. package/lib/scrubber/utils/html-parser.d.ts +14 -0
  88. package/lib/scrubber/utils/html-parser.js +32 -39
  89. package/lib/scrubber/utils/html-parser.ts +46 -0
  90. package/lib/scrubber/utils/pattern-matcher.d.ts +12 -0
  91. package/lib/scrubber/utils/pattern-matcher.js +48 -57
  92. package/lib/scrubber/utils/pattern-matcher.ts +64 -0
  93. package/lib/scrubber/utils/token-counter.d.ts +18 -0
  94. package/lib/scrubber/utils/token-counter.js +24 -25
  95. package/lib/scrubber/utils/token-counter.ts +32 -0
  96. package/lib/utils/logger.d.ts +19 -0
  97. package/lib/utils/logger.js +65 -0
  98. package/lib/utils/logger.ts +65 -0
  99. package/lib/utils/skill-metadata.d.ts +24 -0
  100. package/lib/utils/skill-metadata.js +133 -0
  101. package/lib/utils/skill-metadata.ts +133 -0
  102. package/lib/yamo/emitter.d.ts +46 -0
  103. package/lib/yamo/emitter.js +79 -143
  104. package/lib/yamo/emitter.ts +171 -0
  105. package/lib/yamo/index.d.ts +14 -0
  106. package/lib/yamo/index.js +6 -7
  107. package/lib/yamo/index.ts +16 -0
  108. package/lib/yamo/schema.d.ts +56 -0
  109. package/lib/yamo/schema.js +82 -108
  110. package/lib/yamo/schema.ts +133 -0
  111. package/package.json +13 -8
  112. package/index.d.ts +0 -111
  113. package/lib/embeddings/factory.js +0 -151
  114. package/lib/embeddings/index.js +0 -2
  115. package/lib/embeddings/service.js +0 -586
  116. package/lib/index.js +0 -6
  117. package/lib/lancedb/client.js +0 -633
  118. package/lib/lancedb/config.js +0 -215
  119. package/lib/lancedb/errors.js +0 -144
  120. package/lib/lancedb/index.js +0 -4
  121. package/lib/lancedb/schema.js +0 -217
  122. package/lib/search/index.js +0 -1
  123. package/lib/search/keyword-search.js +0 -144
  124. package/lib/utils/index.js +0 -1
@@ -0,0 +1,1517 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * Memory Mesh - Vector Memory Storage with LanceDB
4
+ * Provides persistent semantic memory for YAMO OS using LanceDB backend
5
+ *
6
+ * CLI Interface:
7
+ * node tools/memory_mesh.js ingest '{"content": "...", "metadata": {...}}'
8
+ * node tools/memory_mesh.js search '{"query": "...", "limit": 10}'
9
+ * node tools/memory_mesh.js get '{"id": "..."}'
10
+ * node tools/memory_mesh.js delete '{"id": "..."}'
11
+ * node tools/memory_mesh.js stats '{}'
12
+ *
13
+ * Also supports STDIN input for YAMO skill compatibility:
14
+ * echo '{"action": "ingest", "content": "..."}' | node tools/memory_mesh.js
15
+ */
16
+ import { fileURLToPath } from "url";
17
+ import fs from "fs";
18
+ import path from "path";
19
+ import crypto from "crypto";
20
+ import { LanceDBClient } from "./adapters/client.js";
21
+ import { getConfig } from "./adapters/config.js";
22
+ import { getEmbeddingDimension, createSynthesizedSkillSchema, } from "./schema.js";
23
+ import { handleError } from "./adapters/errors.js";
24
+ import EmbeddingFactory from "./embeddings/factory.js";
25
+ import { Scrubber } from "../scrubber/scrubber.js";
26
+ import { extractSkillIdentity, extractSkillTags, } from "../utils/skill-metadata.js";
27
+ import { KeywordSearch } from "./search/keyword-search.js";
28
+ import { YamoEmitter } from "../yamo/emitter.js";
29
+ import { LLMClient } from "../llm/client.js";
30
+ import * as lancedb from "@lancedb/lancedb";
31
+ import { createLogger } from "../utils/logger.js";
32
+ const logger = createLogger("brain");
33
+ /**
34
+ * MemoryMesh class for managing vector memory storage
35
+ */
36
+ export class MemoryMesh {
37
+ client;
38
+ config;
39
+ embeddingFactory;
40
+ keywordSearch;
41
+ isInitialized;
42
+ vectorDimension;
43
+ enableYamo;
44
+ enableLLM;
45
+ enableMemory;
46
+ agentId;
47
+ yamoTable;
48
+ skillTable;
49
+ llmClient;
50
+ scrubber;
51
+ queryCache;
52
+ cacheConfig;
53
+ skillDirectories; // Store skill directories for synthesis
54
+ dbDir; // Store custom dbDir for in-memory databases
55
+ /**
56
+ * Create a new MemoryMesh instance
57
+ * @param {Object} [options={}]
58
+ */
59
+ constructor(options = {}) {
60
+ this.client = null;
61
+ this.config = null;
62
+ this.embeddingFactory = new EmbeddingFactory();
63
+ this.keywordSearch = new KeywordSearch();
64
+ this.isInitialized = false;
65
+ this.vectorDimension = 384; // Will be set during init()
66
+ // YAMO and LLM support
67
+ this.enableYamo = options.enableYamo !== false;
68
+ this.enableLLM = options.enableLLM !== false;
69
+ this.enableMemory = options.enableMemory !== false;
70
+ this.agentId = options.agentId || "YAMO_AGENT";
71
+ this.yamoTable = null;
72
+ this.skillTable = null;
73
+ this.llmClient = this.enableLLM ? new LLMClient() : null;
74
+ // Store skill directories for synthesis
75
+ if (Array.isArray(options.skill_directories)) {
76
+ this.skillDirectories = options.skill_directories;
77
+ }
78
+ else if (options.skill_directories) {
79
+ this.skillDirectories = [options.skill_directories];
80
+ }
81
+ else {
82
+ this.skillDirectories = ["skills"];
83
+ }
84
+ // Initialize LLM client if enabled
85
+ if (this.enableLLM) {
86
+ this.llmClient = new LLMClient({
87
+ provider: options.llmProvider,
88
+ apiKey: options.llmApiKey,
89
+ model: options.llmModel,
90
+ maxTokens: options.llmMaxTokens,
91
+ });
92
+ }
93
+ // Scrubber for Layer 0 sanitization
94
+ this.scrubber = new Scrubber({
95
+ enabled: true,
96
+ chunking: {
97
+ minTokens: 1, // Allow short memories
98
+ }, // Type cast for partial config
99
+ validation: {
100
+ enforceMinLength: false, // Disable strict length validation
101
+ },
102
+ });
103
+ // Simple LRU cache for search queries (5 minute TTL)
104
+ this.queryCache = new Map();
105
+ this.cacheConfig = {
106
+ maxSize: 500,
107
+ ttlMs: 5 * 60 * 1000, // 5 minutes
108
+ };
109
+ // Store custom dbDir for test isolation
110
+ this.dbDir = options.dbDir;
111
+ }
112
+ /**
113
+ * Generate a cache key from query and options
114
+ * @private
115
+ */
116
+ _generateCacheKey(query, options = {}) {
117
+ const normalizedOptions = {
118
+ limit: options.limit || 10,
119
+ filter: options.filter || null,
120
+ // Normalize options that affect results
121
+ };
122
+ return `search:${query}:${JSON.stringify(normalizedOptions)}`;
123
+ }
124
+ /**
125
+ * Get cached result if valid
126
+ * @private
127
+ *
128
+ * Race condition fix: The delete-then-set pattern for LRU tracking creates a window
129
+ * where another operation could observe the key as missing. We use a try-finally
130
+ * pattern to ensure atomicity at the application level.
131
+ */
132
+ _getCachedResult(key) {
133
+ const entry = this.queryCache.get(key);
134
+ if (!entry) {
135
+ return null;
136
+ }
137
+ // Check TTL - must be done before any mutation
138
+ const now = Date.now();
139
+ if (now - entry.timestamp > this.cacheConfig.ttlMs) {
140
+ this.queryCache.delete(key);
141
+ return null;
142
+ }
143
+ // Move to end (most recently used) - delete and re-add with updated timestamp
144
+ // While not truly atomic, the key remains accessible during the operation
145
+ // since we already have the entry reference
146
+ this.queryCache.delete(key);
147
+ this.queryCache.set(key, {
148
+ ...entry,
149
+ timestamp: now, // Update timestamp for LRU tracking
150
+ });
151
+ return entry.result;
152
+ }
153
+ /**
154
+ * Cache a search result
155
+ * @private
156
+ */
157
+ _cacheResult(key, result) {
158
+ // Evict oldest if at max size
159
+ if (this.queryCache.size >= this.cacheConfig.maxSize) {
160
+ const firstKey = this.queryCache.keys().next().value;
161
+ if (firstKey !== undefined) {
162
+ this.queryCache.delete(firstKey);
163
+ }
164
+ }
165
+ this.queryCache.set(key, {
166
+ result,
167
+ timestamp: Date.now(),
168
+ });
169
+ }
170
+ /**
171
+ * Clear all cached results
172
+ */
173
+ clearCache() {
174
+ this.queryCache.clear();
175
+ }
176
+ /**
177
+ * Get cache statistics
178
+ */
179
+ getCacheStats() {
180
+ return {
181
+ size: this.queryCache.size,
182
+ maxSize: this.cacheConfig.maxSize,
183
+ ttlMs: this.cacheConfig.ttlMs,
184
+ };
185
+ }
186
+ /**
187
+ * Validate and sanitize metadata to prevent prototype pollution
188
+ * @private
189
+ */
190
+ _validateMetadata(metadata) {
191
+ if (typeof metadata !== "object" || metadata === null) {
192
+ throw new Error("Metadata must be a non-null object");
193
+ }
194
+ // Sanitize keys to prevent prototype pollution
195
+ const sanitized = {};
196
+ for (const [key, value] of Object.entries(metadata)) {
197
+ // Skip dangerous keys that could pollute prototype
198
+ if (key === "__proto__" || key === "constructor" || key === "prototype") {
199
+ continue;
200
+ }
201
+ // Skip inherited properties
202
+ if (!Object.prototype.hasOwnProperty.call(metadata, key)) {
203
+ continue;
204
+ }
205
+ sanitized[key] = value;
206
+ }
207
+ return sanitized;
208
+ }
209
+ /**
210
+ * Sanitize and validate content before storage
211
+ * @private
212
+ */
213
+ _sanitizeContent(content) {
214
+ if (typeof content !== "string") {
215
+ throw new Error("Content must be a string");
216
+ }
217
+ // Limit content length
218
+ const MAX_CONTENT_LENGTH = 100000; // 100KB limit
219
+ if (content.length > MAX_CONTENT_LENGTH) {
220
+ throw new Error(`Content exceeds maximum length of ${MAX_CONTENT_LENGTH} characters`);
221
+ }
222
+ return content.trim();
223
+ }
224
+ /**
225
+ * Initialize the LanceDB client
226
+ */
227
+ async init() {
228
+ if (this.isInitialized) {
229
+ return;
230
+ }
231
+ if (!this.enableMemory) {
232
+ this.isInitialized = true;
233
+ if (process.env.YAMO_DEBUG === "true") {
234
+ logger.debug("MemoryMesh initialization skipped (enableMemory=false)");
235
+ }
236
+ return;
237
+ }
238
+ try {
239
+ // Load configuration
240
+ this.config = getConfig();
241
+ // Detect vector dimension from embedding model configuration
242
+ const modelName = process.env.EMBEDDING_MODEL_NAME || "Xenova/all-MiniLM-L6-v2";
243
+ const envDimension = parseInt(process.env.EMBEDDING_DIMENSION || "0") || null;
244
+ this.vectorDimension = envDimension || getEmbeddingDimension(modelName);
245
+ // Only log in debug mode to avoid corrupting spinner/REPL display
246
+ if (process.env.YAMO_DEBUG === "true") {
247
+ logger.debug({ dimension: this.vectorDimension, model: modelName }, "Using vector dimension");
248
+ }
249
+ // Use custom dbDir if provided (for test isolation), otherwise use config
250
+ const dbUri = this.dbDir || this.config.LANCEDB_URI;
251
+ // Create LanceDBClient with detected dimension
252
+ this.client = new LanceDBClient({
253
+ uri: dbUri,
254
+ tableName: this.config.LANCEDB_MEMORY_TABLE,
255
+ vectorDimension: this.vectorDimension,
256
+ maxRetries: 3,
257
+ retryDelay: 1000,
258
+ });
259
+ // Connect to database
260
+ await this.client.connect();
261
+ // Configure embedding factory from environment
262
+ const embeddingConfigs = this._parseEmbeddingConfig();
263
+ this.embeddingFactory.configure(embeddingConfigs);
264
+ await this.embeddingFactory.init();
265
+ // Hydrate Keyword Search (In-Memory)
266
+ if (this.client) {
267
+ try {
268
+ const allRecords = await this.client.getAll({ limit: 10000 });
269
+ this.keywordSearch.load(allRecords);
270
+ }
271
+ catch (_e) {
272
+ // Ignore if table doesn't exist yet
273
+ }
274
+ }
275
+ // Initialize extension tables if enabled
276
+ if (this.enableYamo && this.client && this.client.db) {
277
+ try {
278
+ const { createYamoTable } = await import("../yamo/schema.js");
279
+ this.yamoTable = await createYamoTable(this.client.db, "yamo_blocks");
280
+ // Initialize synthesized skills table (Recursive Skill Synthesis)
281
+ // const { createSynthesizedSkillSchema } = await import('./schema'); // Imported statically now
282
+ const existingTables = await this.client.db.tableNames();
283
+ if (existingTables.includes("synthesized_skills")) {
284
+ this.skillTable =
285
+ await this.client.db.openTable("synthesized_skills");
286
+ }
287
+ else {
288
+ const skillSchema = createSynthesizedSkillSchema(this.vectorDimension);
289
+ this.skillTable = await this.client.db.createTable("synthesized_skills", [], {
290
+ schema: skillSchema,
291
+ });
292
+ }
293
+ if (process.env.YAMO_DEBUG === "true") {
294
+ logger.debug("YAMO blocks and synthesized skills tables initialized");
295
+ }
296
+ }
297
+ catch (e) {
298
+ logger.warn({ err: e }, "Failed to initialize extension tables");
299
+ }
300
+ }
301
+ this.isInitialized = true;
302
+ }
303
+ catch (error) {
304
+ const e = error instanceof Error ? error : new Error(String(error));
305
+ throw e;
306
+ }
307
+ }
308
+ /**
309
+ * Add content to memory with auto-generated embedding and scrubbing.
310
+ *
311
+ * This is the primary method for storing information in the memory mesh.
312
+ * The content goes through several processing steps:
313
+ *
314
+ * 1. **Scrubbing**: PII and sensitive data are sanitized (if enabled)
315
+ * 2. **Validation**: Content length and metadata are validated
316
+ * 3. **Embedding**: Content is converted to a vector representation
317
+ * 4. **Storage**: Record is stored in LanceDB with metadata
318
+ * 5. **Emission**: Optional YAMO block emitted for provenance tracking
319
+ *
320
+ * @param content - The text content to store in memory
321
+ * @param metadata - Optional metadata (type, source, tags, etc.)
322
+ * @returns Promise with memory record containing id, content, metadata, created_at
323
+ *
324
+ * @example
325
+ * ```typescript
326
+ * const memory = await mesh.add("User likes TypeScript", {
327
+ * type: "preference",
328
+ * source: "chat",
329
+ * tags: ["programming", "languages"]
330
+ * });
331
+ * ```
332
+ *
333
+ * @throws {Error} If content exceeds max length (100KB)
334
+ * @throws {Error} If embedding generation fails
335
+ * @throws {Error} If database client is not initialized
336
+ */
337
+ async add(content, metadata = {}) {
338
+ await this.init();
339
+ const type = metadata.type || "event";
340
+ const enrichedMetadata = { ...metadata, type };
341
+ try {
342
+ let processedContent = content;
343
+ let scrubbedMetadata = {};
344
+ try {
345
+ const scrubbedResult = await this.scrubber.process({
346
+ content: content,
347
+ source: "memory-api",
348
+ type: "txt",
349
+ });
350
+ if (scrubbedResult.success && scrubbedResult.chunks.length > 0) {
351
+ processedContent = scrubbedResult.chunks
352
+ .map((c) => c.text)
353
+ .join("\n\n");
354
+ if (scrubbedResult.metadata) {
355
+ scrubbedMetadata = {
356
+ ...scrubbedResult.metadata,
357
+ scrubber_telemetry: JSON.stringify(scrubbedResult.telemetry),
358
+ };
359
+ }
360
+ }
361
+ }
362
+ catch (scrubError) {
363
+ if (process.env.YAMO_DEBUG === "true") {
364
+ logger.error({ err: scrubError }, "Scrubber failed");
365
+ }
366
+ }
367
+ const sanitizedContent = this._sanitizeContent(processedContent);
368
+ const sanitizedMetadata = this._validateMetadata({
369
+ ...scrubbedMetadata,
370
+ ...enrichedMetadata,
371
+ });
372
+ if (process.env.YAMO_DEBUG === "true") {
373
+ console.error("[DEBUG] brain.add() scrubbedMetadata.type:", scrubbedMetadata.type);
374
+ console.error("[DEBUG] brain.add() enrichedMetadata.type:", enrichedMetadata.type);
375
+ console.error("[DEBUG] brain.add() sanitizedMetadata.type:", sanitizedMetadata.type);
376
+ }
377
+ const vector = await this.embeddingFactory.embed(sanitizedContent);
378
+ // Dedup: search by the already-computed vector before inserting.
379
+ // Catches exact duplicates regardless of which write path is used,
380
+ // protecting callers that bypass captureInteraction()'s dedup guard.
381
+ if (this.client) {
382
+ const nearest = await this.client.search(vector, { limit: 1 });
383
+ if (nearest.length > 0 && nearest[0].content === sanitizedContent) {
384
+ return {
385
+ id: nearest[0].id,
386
+ content: sanitizedContent,
387
+ metadata: sanitizedMetadata,
388
+ created_at: new Date().toISOString(),
389
+ };
390
+ }
391
+ }
392
+ const id = `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
393
+ const record = {
394
+ id,
395
+ vector,
396
+ content: sanitizedContent,
397
+ metadata: JSON.stringify(sanitizedMetadata),
398
+ };
399
+ if (process.env.YAMO_DEBUG === "true") {
400
+ console.error("[DEBUG] record.metadata.type:", JSON.parse(record.metadata).type);
401
+ }
402
+ if (!this.client) {
403
+ throw new Error("Database client not initialized");
404
+ }
405
+ const result = await this.client.add(record);
406
+ if (process.env.YAMO_DEBUG === "true") {
407
+ try {
408
+ console.error("[DEBUG] result.metadata.type:", JSON.parse(result.metadata).type);
409
+ }
410
+ catch {
411
+ console.error("[DEBUG] result.metadata:", result.metadata);
412
+ }
413
+ }
414
+ this.keywordSearch.add(record.id, record.content, sanitizedMetadata);
415
+ if (this.enableYamo) {
416
+ this._emitYamoBlock("retain", result.id, YamoEmitter.buildRetainBlock({
417
+ content: sanitizedContent,
418
+ metadata: sanitizedMetadata,
419
+ id: result.id,
420
+ agentId: this.agentId,
421
+ memoryType: sanitizedMetadata.type || "event",
422
+ })).catch((error) => {
423
+ // Log emission failures in debug mode but don't throw
424
+ if (process.env.YAMO_DEBUG === "true") {
425
+ logger.warn({ err: error }, "Failed to emit YAMO block (retain)");
426
+ }
427
+ });
428
+ }
429
+ return {
430
+ id: result.id,
431
+ content: sanitizedContent,
432
+ metadata: sanitizedMetadata,
433
+ created_at: new Date().toISOString(),
434
+ };
435
+ }
436
+ catch (error) {
437
+ throw error instanceof Error ? error : new Error(String(error));
438
+ }
439
+ }
440
+ /**
441
+ * Reflect on recent memories
442
+ */
443
+ async reflect(options = {}) {
444
+ await this.init();
445
+ const lookback = options.lookback || 10;
446
+ const topic = options.topic;
447
+ const generate = options.generate !== false;
448
+ let memories = [];
449
+ if (topic) {
450
+ memories = await this.search(topic, { limit: lookback });
451
+ }
452
+ else {
453
+ const all = await this.getAll();
454
+ memories = all
455
+ .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
456
+ .slice(0, lookback);
457
+ }
458
+ const prompt = `Review these memories. Synthesize a high-level "belief" or "observation".`;
459
+ if (!generate || !this.enableLLM || !this.llmClient) {
460
+ return {
461
+ topic,
462
+ count: memories.length,
463
+ context: memories.map((m) => ({
464
+ content: m.content,
465
+ type: m.metadata?.type || "event",
466
+ id: m.id,
467
+ })),
468
+ prompt,
469
+ };
470
+ }
471
+ let reflection = "";
472
+ let confidence = 0;
473
+ try {
474
+ const result = await this.llmClient.reflect(prompt, memories);
475
+ reflection = result.reflection;
476
+ confidence = result.confidence;
477
+ }
478
+ catch (_error) {
479
+ reflection = `Aggregated from ${memories.length} memories on topic: ${topic || "general"}`;
480
+ confidence = 0.5;
481
+ }
482
+ const reflectionId = `reflect_${Date.now()}_${crypto.randomBytes(4).toString("hex")}`;
483
+ await this.add(reflection, {
484
+ type: "reflection",
485
+ topic: topic || "general",
486
+ source_memory_count: memories.length,
487
+ confidence,
488
+ generated_at: new Date().toISOString(),
489
+ });
490
+ let yamoBlock = null;
491
+ if (this.enableYamo) {
492
+ yamoBlock = YamoEmitter.buildReflectBlock({
493
+ topic: topic || "general",
494
+ memoryCount: memories.length,
495
+ agentId: this.agentId,
496
+ reflection,
497
+ confidence,
498
+ });
499
+ await this._emitYamoBlock("reflect", reflectionId, yamoBlock);
500
+ }
501
+ return {
502
+ id: reflectionId,
503
+ topic: topic || "general",
504
+ reflection,
505
+ confidence,
506
+ sourceMemoryCount: memories.length,
507
+ yamoBlock,
508
+ createdAt: new Date().toISOString(),
509
+ };
510
+ }
511
+ /**
512
+ * Ingest synthesized skill
513
+ * @param sourceFilePath - If provided, skip file write (file already exists)
514
+ */
515
+ async ingestSkill(yamoText, metadata = {}, sourceFilePath) {
516
+ await this.init();
517
+ if (!this.skillTable) {
518
+ throw new Error("Skill table not initialized");
519
+ }
520
+ // DEBUG: Trace sourceFilePath parameter
521
+ if (process.env.YAMO_DEBUG_PATHS === "true") {
522
+ console.error(`[BRAIN.ingestSkill] sourceFilePath parameter: ${sourceFilePath || "undefined"}`);
523
+ }
524
+ try {
525
+ const identity = extractSkillIdentity(yamoText);
526
+ const name = metadata.name || identity.name;
527
+ const intent = identity.intent;
528
+ const description = identity.description;
529
+ // RECURSION DETECTION: Check for recursive naming patterns
530
+ // Patterns like "SkillSkill", "SkillSkillSkill" indicate filename-derived names
531
+ const recursivePattern = /^(Skill|skill){2,}/;
532
+ if (recursivePattern.test(name)) {
533
+ logger.warn({ originalName: name }, "Detected recursive naming pattern, rejecting ingestion to prevent loop");
534
+ throw new Error(`Recursive naming pattern detected: ${name}. Skills must have proper name: field.`);
535
+ }
536
+ // Extract tags for tag-aware embeddings (improves semantic search)
537
+ const tags = extractSkillTags(yamoText);
538
+ const tagText = tags.length > 0 ? `\nTags: ${tags.join(", ")}` : "";
539
+ const embeddingText = `Skill: ${name}\nIntent: ${intent}${tagText}\nDescription: ${description}`;
540
+ const vector = await this.embeddingFactory.embed(embeddingText);
541
+ const id = `skill_${Date.now()}_${crypto.randomBytes(2).toString("hex")}`;
542
+ const skillMetadata = {
543
+ reliability: 0.5,
544
+ use_count: 0,
545
+ source: "manual",
546
+ ...metadata,
547
+ // Store source file path for policy loading and parent discovery
548
+ ...(sourceFilePath && { source_file: sourceFilePath }),
549
+ };
550
+ const record = {
551
+ id,
552
+ name,
553
+ intent,
554
+ yamo_text: yamoText,
555
+ vector,
556
+ metadata: JSON.stringify(skillMetadata),
557
+ created_at: new Date(),
558
+ };
559
+ await this.skillTable.add([record]);
560
+ // NEW: Persist to filesystem for longevity and visibility
561
+ // Skip if sourceFilePath provided (file already exists from SkillCreator)
562
+ // Skip if using in-memory database (:memory:)
563
+ if (!sourceFilePath && this.dbDir !== ":memory:") {
564
+ try {
565
+ const skillsDir = path.resolve(process.cwd(), this.skillDirectories[0] || "skills");
566
+ if (!fs.existsSync(skillsDir)) {
567
+ fs.mkdirSync(skillsDir, { recursive: true });
568
+ }
569
+ // Robust filename with length limit to prevent ENAMETOOLONG
570
+ const safeName = name
571
+ .toLowerCase()
572
+ .replace(/[^a-z0-9]/g, "-")
573
+ .replace(/-+/g, "-")
574
+ .substring(0, 50);
575
+ const fileName = `skill-${safeName}.yamo`;
576
+ const filePath = path.join(skillsDir, fileName);
577
+ // Only write if file doesn't already exist to prevent duplicates
578
+ if (!fs.existsSync(filePath)) {
579
+ fs.writeFileSync(filePath, yamoText, "utf8");
580
+ if (process.env.YAMO_DEBUG === "true") {
581
+ logger.debug({ filePath }, "Skill persisted to file");
582
+ }
583
+ }
584
+ }
585
+ catch (fileError) {
586
+ logger.warn({ err: fileError }, "Failed to persist skill to file");
587
+ }
588
+ }
589
+ return { id, name, intent };
590
+ }
591
+ catch (error) {
592
+ throw new Error(`Skill ingestion failed: ${error.message}`);
593
+ }
594
+ }
595
+ /**
596
+ * Recursive Skill Synthesis
597
+ */
598
+ async synthesize(options = {}) {
599
+ await this.init();
600
+ const topic = options.topic || "general_improvement";
601
+ const enrichedPrompt = options.enrichedPrompt || topic; // PHASE 4: Use enriched prompt
602
+ // const lookback = options.lookback || 20;
603
+ logger.info({ topic, enrichedPrompt }, "Synthesizing logic");
604
+ // OPTIMIZATION: If we have an execution engine (kernel), use SkillCreator!
605
+ if (this._kernel_execute) {
606
+ logger.info("Dispatching to SkillCreator agent...");
607
+ try {
608
+ // Use stored skill directories
609
+ const skillDirs = this.skillDirectories;
610
+ // Track existing .yamo files before SkillCreator runs
611
+ const filesBefore = new Set();
612
+ for (const dir of skillDirs) {
613
+ if (fs.existsSync(dir)) {
614
+ const walk = (currentDir) => {
615
+ try {
616
+ const entries = fs.readdirSync(currentDir, {
617
+ withFileTypes: true,
618
+ });
619
+ for (const entry of entries) {
620
+ const fullPath = path.join(currentDir, entry.name);
621
+ if (entry.isDirectory()) {
622
+ walk(fullPath);
623
+ }
624
+ else if (entry.isFile() && entry.name.endsWith(".yamo")) {
625
+ filesBefore.add(fullPath);
626
+ }
627
+ }
628
+ }
629
+ catch (e) {
630
+ // Skip directories we can't read
631
+ logger.debug({ dir, error: e }, "Could not read directory");
632
+ }
633
+ };
634
+ walk(dir);
635
+ }
636
+ }
637
+ // PHASE 4: Use enriched prompt for SkillCreator
638
+ await this._kernel_execute(`SkillCreator: design a new skill to handle ${enrichedPrompt}`, {
639
+ v1_1_enabled: true,
640
+ });
641
+ // Find newly created .yamo file
642
+ let newSkillFile;
643
+ for (const dir of skillDirs) {
644
+ if (fs.existsSync(dir)) {
645
+ const walk = (currentDir) => {
646
+ try {
647
+ const entries = fs.readdirSync(currentDir, {
648
+ withFileTypes: true,
649
+ });
650
+ for (const entry of entries) {
651
+ const fullPath = path.join(currentDir, entry.name);
652
+ if (entry.isDirectory()) {
653
+ walk(fullPath);
654
+ }
655
+ else if (entry.isFile() && entry.name.endsWith(".yamo")) {
656
+ if (!filesBefore.has(fullPath)) {
657
+ newSkillFile = fullPath;
658
+ }
659
+ }
660
+ }
661
+ }
662
+ catch (e) {
663
+ logger.debug({ dir, error: e }, "Could not read directory");
664
+ }
665
+ };
666
+ walk(dir);
667
+ }
668
+ }
669
+ // Ingest the newly created skill file
670
+ if (newSkillFile) {
671
+ logger.info({ skillFile: newSkillFile }, "Ingesting newly synthesized skill");
672
+ let skillContent = fs.readFileSync(newSkillFile, "utf8");
673
+ // PHASE 4: Expand compressed → canonical for disk storage
674
+ // Skills created by evolution are typically compressed; expand to canonical for readability
675
+ // Skip expansion in test environment or when disabled
676
+ const expansionEnabled = process.env.YAMO_EXPANSION_ENABLED !== "false";
677
+ const isCompressed = !skillContent.includes("---") ||
678
+ (skillContent.includes("---") &&
679
+ skillContent.split("---").length <= 1);
680
+ if (expansionEnabled && isCompressed) {
681
+ logger.info({ skillFile: newSkillFile }, "Expanding compressed skill to canonical format");
682
+ try {
683
+ const expanded = await this._kernel_execute("skill-expansion-system-prompt.yamo", {
684
+ input_yamo: skillContent,
685
+ });
686
+ if (expanded && expanded.canonical_yamo) {
687
+ skillContent = expanded.canonical_yamo;
688
+ // Write expanded canonical format back to disk
689
+ fs.writeFileSync(newSkillFile, skillContent, "utf8");
690
+ logger.info({ skillFile: newSkillFile }, "Skill expanded to canonical format on disk");
691
+ }
692
+ }
693
+ catch (e) {
694
+ logger.warn({ err: e }, "Failed to expand skill to canonical, using compressed format");
695
+ }
696
+ }
697
+ // ENSURE: Synthesized skills always have proper metadata with meaningful name
698
+ // This prevents duplicate skill-agent-{timestamp}.yamo files
699
+ const synIdentity = extractSkillIdentity(skillContent);
700
+ const hasName = !synIdentity.name.startsWith("Unnamed_");
701
+ if (!skillContent.includes("---") || !hasName) {
702
+ logger.info({ skillFile: newSkillFile }, "Adding metadata block to synthesized skill");
703
+ const intent = synIdentity.intent !== "general_procedure"
704
+ ? synIdentity.intent.replace(/[^a-zA-Z0-9]/g, "")
705
+ : "Synthesized";
706
+ const PascalCase = intent.charAt(0).toUpperCase() + intent.slice(1);
707
+ const skillName = `${PascalCase}_${Date.now().toString(36)}`;
708
+ const metadata = `---
709
+ name: ${skillName}
710
+ version: 1.0.0
711
+ author: YAMO Evolution
712
+ license: MIT
713
+ tags: synthesized, evolution, auto-generated
714
+ description: Auto-generated skill to handle: ${enrichedPrompt || topic}
715
+ ---
716
+ `;
717
+ // Prepend metadata if skill doesn't have it
718
+ if (!skillContent.startsWith("---")) {
719
+ skillContent = metadata + skillContent;
720
+ // Write back to disk with proper metadata
721
+ fs.writeFileSync(newSkillFile, skillContent, "utf8");
722
+ logger.info({ skillFile: newSkillFile, skillName }, "Added metadata block to synthesized skill");
723
+ }
724
+ }
725
+ const skill = await this.ingestSkill(skillContent, {
726
+ source: "synthesized",
727
+ trigger_topic: topic,
728
+ }, newSkillFile);
729
+ return {
730
+ status: "success",
731
+ analysis: "SkillCreator orchestrated evolution",
732
+ skill_id: skill.id,
733
+ skill_name: skill.name,
734
+ yamo_text: skillContent,
735
+ };
736
+ }
737
+ // Fallback if no new file found
738
+ return {
739
+ status: "success",
740
+ analysis: "SkillCreator orchestrated evolution (no file detected)",
741
+ skill_name: topic.split(" ")[0],
742
+ };
743
+ }
744
+ catch (e) {
745
+ logger.error({ err: e }, "SkillCreator agent failed");
746
+ return {
747
+ status: "error",
748
+ error: e.message,
749
+ analysis: "SkillCreator agent failed",
750
+ };
751
+ }
752
+ }
753
+ // SkillCreator is required for synthesis
754
+ if (!this._kernel_execute) {
755
+ throw new Error("Kernel execution (_kernel_execute) is required for synthesis. Use YamoKernel instead of MemoryMesh directly.");
756
+ }
757
+ // Should never reach here
758
+ return {
759
+ status: "error",
760
+ analysis: "Unexpected state in synthesis",
761
+ };
762
+ }
763
+ /**
764
+ * Update reliability
765
+ */
766
+ async updateSkillReliability(id, success) {
767
+ await this.init();
768
+ if (!this.skillTable) {
769
+ throw new Error("Skill table not initialized");
770
+ }
771
+ try {
772
+ const results = await this.skillTable
773
+ .query()
774
+ .filter(`id == '${id}'`)
775
+ .toArray();
776
+ if (results.length === 0) {
777
+ throw new Error(`Skill ${id} not found`);
778
+ }
779
+ const record = results[0];
780
+ const metadata = JSON.parse(record.metadata);
781
+ const adjustment = success ? 0.1 : -0.2;
782
+ metadata.reliability = Math.max(0, Math.min(1.0, (metadata.reliability || 0.5) + adjustment));
783
+ metadata.use_count = (metadata.use_count || 0) + 1;
784
+ metadata.last_used = new Date().toISOString();
785
+ await this.skillTable.update({
786
+ where: `id == '${id}'`,
787
+ values: { metadata: JSON.stringify(metadata) },
788
+ });
789
+ return {
790
+ id,
791
+ reliability: metadata.reliability,
792
+ use_count: metadata.use_count,
793
+ };
794
+ }
795
+ catch (error) {
796
+ throw new Error(`Failed to update skill reliability: ${error.message}`);
797
+ }
798
+ }
799
+ /**
800
+ * Prune skills
801
+ */
802
+ async pruneSkills(threshold = 0.3) {
803
+ await this.init();
804
+ if (!this.skillTable) {
805
+ throw new Error("Skill table not initialized");
806
+ }
807
+ try {
808
+ const allSkills = await this.skillTable.query().toArray();
809
+ let prunedCount = 0;
810
+ for (const skill of allSkills) {
811
+ const metadata = JSON.parse(skill.metadata);
812
+ if (metadata.reliability < threshold) {
813
+ await this.skillTable.delete(`id == '${skill.id}'`);
814
+ prunedCount++;
815
+ }
816
+ }
817
+ return {
818
+ pruned_count: prunedCount,
819
+ total_remaining: allSkills.length - prunedCount,
820
+ };
821
+ }
822
+ catch (error) {
823
+ throw new Error(`Pruning failed: ${error.message}`);
824
+ }
825
+ }
826
+ /**
827
+ * List all synthesized skills
828
+ * @param {Object} [options={}] - Search options
829
+ * @returns {Promise<Array>} Normalized skill results
830
+ */
831
+ async listSkills(options = {}) {
832
+ await this.init();
833
+ if (!this.skillTable) {
834
+ return [];
835
+ }
836
+ try {
837
+ const limit = options.limit || 10;
838
+ const results = await this.skillTable.query().limit(limit).toArray();
839
+ return results.map((r) => ({
840
+ ...r,
841
+ score: 1.0, // Full score for direct listing
842
+ // Parse metadata JSON string to object
843
+ metadata: typeof r.metadata === "string" ? JSON.parse(r.metadata) : r.metadata,
844
+ }));
845
+ }
846
+ catch (error) {
847
+ if (process.env.YAMO_DEBUG === "true") {
848
+ logger.error({ err: error }, "Skill list failed");
849
+ }
850
+ return [];
851
+ }
852
+ }
853
+ /**
854
+ * Search for synthesized skills by semantic intent
855
+ * @param {string} query - Search query (intent description)
856
+ * @param {Object} [options={}] - Search options
857
+ * @returns {Promise<Array>} Normalized skill results
858
+ */
859
+ async searchSkills(query, options = {}) {
860
+ await this.init();
861
+ if (!this.skillTable) {
862
+ return [];
863
+ }
864
+ try {
865
+ // 1. Check for explicit skill targeting (e.g., "Architect: ...")
866
+ const explicitMatch = query.match(/^([a-zA-Z0-9_-]+):/);
867
+ if (explicitMatch) {
868
+ const targetName = explicitMatch[1];
869
+ const directResults = await this.skillTable
870
+ .query()
871
+ .where(`name == '${targetName}'`)
872
+ .limit(1)
873
+ .toArray();
874
+ if (directResults.length > 0) {
875
+ return directResults.map((r) => ({
876
+ ...r,
877
+ score: 1.0, // Maximum score for explicit target
878
+ }));
879
+ }
880
+ }
881
+ // 2. Hybrid search: vector + keyword matching
882
+ const limit = options.limit || 5;
883
+ // 2a. Vector search (get more candidates for fusion)
884
+ const vector = await this.embeddingFactory.embed(query);
885
+ const vectorResults = await this.skillTable
886
+ .search(vector)
887
+ .limit(limit * 3)
888
+ .toArray();
889
+ // 2b. Keyword matching against skill fields (including tags)
890
+ const queryTokens = this._tokenizeQuery(query);
891
+ const keywordScores = new Map();
892
+ let maxKeywordScore = 0;
893
+ for (const result of vectorResults) {
894
+ let score = 0;
895
+ const nameTokens = this._tokenizeQuery(result.name);
896
+ const intentTokens = this._tokenizeQuery(result.intent || "");
897
+ const tags = extractSkillTags(result.yamo_text);
898
+ const tagTokens = tags.flatMap((t) => this._tokenizeQuery(t));
899
+ const descTokens = this._tokenizeQuery(result.yamo_text.substring(0, 500)); // First 500 chars
900
+ // Token matching with field-based weights
901
+ // Support both exact and partial matches (for compound words)
902
+ for (const qToken of queryTokens) {
903
+ // Exact or partial match in name
904
+ if (nameTokens.some((nt) => nt === qToken || qToken.includes(nt) || nt.includes(qToken))) {
905
+ score += 10.0; // Highest: name match
906
+ }
907
+ // Exact or partial match in tags
908
+ if (tagTokens.some((tt) => tt === qToken || qToken.includes(tt) || tt.includes(qToken))) {
909
+ score += 7.0; // High: tag match
910
+ }
911
+ // Exact match in intent
912
+ if (intentTokens.some((it) => it === qToken)) {
913
+ score += 5.0; // Medium: intent match
914
+ }
915
+ // Exact match in description
916
+ if (descTokens.some((dt) => dt === qToken)) {
917
+ score += 1.0; // Low: description match
918
+ }
919
+ }
920
+ if (score > 0) {
921
+ keywordScores.set(result.id, score);
922
+ maxKeywordScore = Math.max(maxKeywordScore, score);
923
+ }
924
+ }
925
+ // 2c. Combine scores using weighted fusion
926
+ const fusedResults = vectorResults.map((r) => {
927
+ // Normalize vector distance to [0, 1] similarity score
928
+ // LanceDB cosine distance ranges from 0 (identical) to 2 (opposite)
929
+ const rawDistance = r._distance !== undefined ? r._distance : 1.0;
930
+ const vectorScore = Math.max(0, Math.min(1.0, 1 - rawDistance / 2));
931
+ const keywordScore = keywordScores.get(r.id) || 0;
932
+ // Normalize keyword score by max observed (or use fixed max to avoid division by zero)
933
+ const normalizedKeyword = maxKeywordScore > 0 ? keywordScore / maxKeywordScore : 0;
934
+ // Weighted combination: 70% keyword, 30% vector
935
+ // Keywords get higher weight to prioritize exact matches
936
+ const combinedScore = 0.7 * normalizedKeyword + 0.3 * vectorScore;
937
+ return {
938
+ ...r,
939
+ score: combinedScore,
940
+ _vectorScore: vectorScore,
941
+ _keywordScore: keywordScore,
942
+ };
943
+ });
944
+ // Sort by combined score and return top results
945
+ // Don't normalize - we already calculated hybrid scores
946
+ return fusedResults
947
+ .sort((a, b) => b.score - a.score)
948
+ .slice(0, limit)
949
+ .map((r) => ({
950
+ ...r,
951
+ // Parse metadata JSON string to object for policy loading
952
+ metadata: typeof r.metadata === "string"
953
+ ? JSON.parse(r.metadata)
954
+ : r.metadata,
955
+ }))
956
+ .map((r) => ({
957
+ ...r,
958
+ score: parseFloat(r.score.toFixed(2)), // Round for consistency
959
+ }));
960
+ }
961
+ catch (error) {
962
+ if (process.env.YAMO_DEBUG === "true") {
963
+ logger.error({ err: error }, "Skill search failed");
964
+ }
965
+ return [];
966
+ }
967
+ }
968
+ /**
969
+ * Get recent YAMO logs for the heartbeat
970
+ * @param {Object} options
971
+ */
972
+ async getYamoLog(options = {}) {
973
+ if (!this.yamoTable) {
974
+ return [];
975
+ }
976
+ const limit = options.limit || 10;
977
+ const maxRetries = 5;
978
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
979
+ try {
980
+ // orderBy might not be in LanceDB types but is supported in runtime
981
+ const query = this.yamoTable.query();
982
+ let results;
983
+ try {
984
+ results = await query
985
+ .orderBy("timestamp", "desc")
986
+ .limit(limit)
987
+ .toArray();
988
+ }
989
+ catch (_e) {
990
+ // Fallback if orderBy not supported
991
+ results = await query.limit(1000).toArray(); // Get more and sort manually
992
+ }
993
+ // Sort newest first in memory
994
+ return results
995
+ .sort((a, b) => {
996
+ const tA = a.timestamp instanceof Date
997
+ ? a.timestamp.getTime()
998
+ : Number(a.timestamp);
999
+ const tB = b.timestamp instanceof Date
1000
+ ? b.timestamp.getTime()
1001
+ : Number(b.timestamp);
1002
+ return tB - tA;
1003
+ })
1004
+ .slice(0, limit)
1005
+ .map((r) => ({
1006
+ id: r.id,
1007
+ yamoText: r.yamo_text,
1008
+ timestamp: r.timestamp,
1009
+ }));
1010
+ }
1011
+ catch (error) {
1012
+ const msg = error.message || "";
1013
+ const isRetryable = msg.includes("LanceError(IO)") ||
1014
+ msg.includes("next batch") ||
1015
+ msg.includes("No such file") ||
1016
+ msg.includes("busy");
1017
+ if (isRetryable && attempt < maxRetries) {
1018
+ // If we suspect stale table handle, try to refresh it
1019
+ try {
1020
+ // Re-open table to get fresh file handles
1021
+ const { createYamoTable } = await import("../yamo/schema.js");
1022
+ if (this.dbDir) {
1023
+ const db = await lancedb.connect(this.dbDir);
1024
+ this.yamoTable = await createYamoTable(db, "yamo_blocks");
1025
+ if (process.env.YAMO_DEBUG === "true") {
1026
+ logger.debug({ attempt, msg: msg.substring(0, 100) }, "Refreshed yamoTable handle during retry");
1027
+ }
1028
+ }
1029
+ }
1030
+ catch (e) {
1031
+ logger.warn({ err: e }, "Failed to refresh table handle during retry");
1032
+ }
1033
+ const delay = 500 * Math.pow(2, attempt - 1); // 500ms, 1000ms, 2000ms, 4000ms
1034
+ await new Promise((resolve) => setTimeout(resolve, delay));
1035
+ continue;
1036
+ }
1037
+ // Only log warning on final failure
1038
+ if (attempt === maxRetries) {
1039
+ logger.warn({ err: error }, "Failed to get log after retries");
1040
+ }
1041
+ else if (!isRetryable) {
1042
+ // Non-retryable error
1043
+ logger.warn({ err: error }, "Failed to get log (non-retryable)");
1044
+ break;
1045
+ }
1046
+ }
1047
+ }
1048
+ return [];
1049
+ }
1050
+ /**
1051
+ * Emit a YAMO block to the YAMO blocks table
1052
+ * @private
1053
+ *
1054
+ * Note: YAMO emission is non-critical - failures are logged but don't throw
1055
+ * to prevent disrupting the main operation.
1056
+ */
1057
+ async _emitYamoBlock(operationType, memoryId, yamoText) {
1058
+ if (!this.yamoTable) {
1059
+ return;
1060
+ }
1061
+ const yamoId = `yamo_${operationType}_${Date.now()}_${crypto.randomBytes(4).toString("hex")}`;
1062
+ try {
1063
+ await this.yamoTable.add([
1064
+ {
1065
+ id: yamoId,
1066
+ agent_id: this.agentId,
1067
+ operation_type: operationType,
1068
+ yamo_text: yamoText,
1069
+ timestamp: new Date(),
1070
+ block_hash: null,
1071
+ prev_hash: null,
1072
+ metadata: JSON.stringify({
1073
+ memory_id: memoryId || null,
1074
+ timestamp: new Date().toISOString(),
1075
+ }),
1076
+ },
1077
+ ]);
1078
+ }
1079
+ catch (error) {
1080
+ // Log emission failures in debug mode
1081
+ // Emission is non-critical, so we don't throw
1082
+ if (process.env.YAMO_DEBUG === "true") {
1083
+ logger.warn({ err: error, operationType }, "YAMO emission failed");
1084
+ }
1085
+ }
1086
+ }
1087
+ /**
1088
+ * Search memory using hybrid vector + keyword search with Reciprocal Rank Fusion (RRF).
1089
+ *
1090
+ * This method performs semantic search by combining:
1091
+ * 1. **Vector Search**: Uses embeddings to find semantically similar content
1092
+ * 2. **Keyword Search**: Uses BM25-style keyword matching
1093
+ * 3. **RRF Fusion**: Combines both result sets using Reciprocal Rank Fusion
1094
+ *
1095
+ * The RRF algorithm scores each document as: `sum(1 / (k + rank))` where k=60.
1096
+ * This gives higher scores to documents that rank well in BOTH searches.
1097
+ *
1098
+ * **Performance**: Uses adaptive sorting strategy
1099
+ * - Small datasets (≤ 2× limit): Full sort O(n log n)
1100
+ * - Large datasets: Partial selection sort O(n×k) where k=limit
1101
+ *
1102
+ * **Caching**: Results are cached for 5 minutes by default (configurable via options)
1103
+ *
1104
+ * @param query - The search query text
1105
+ * @param options - Search options
1106
+ * @param options.limit - Maximum results to return (default: 10)
1107
+ * @param options.filter - LanceDB filter expression (e.g., "type == 'preference'")
1108
+ * @param options.useCache - Enable/disable result caching (default: true)
1109
+ * @returns Promise with array of search results, sorted by relevance score
1110
+ *
1111
+ * @example
1112
+ * ```typescript
1113
+ * // Simple search
1114
+ * const results = await mesh.search("TypeScript preferences");
1115
+ *
1116
+ * // Search with filter
1117
+ * const code = await mesh.search("bug fix", { filter: "type == 'error'" });
1118
+ *
1119
+ * // Search with limit
1120
+ * const top3 = await mesh.search("security issues", { limit: 3 });
1121
+ * ```
1122
+ *
1123
+ * @throws {Error} If embedding generation fails
1124
+ * @throws {Error} If database client is not initialized
1125
+ */
1126
+ async search(query, options = {}) {
1127
+ await this.init();
1128
+ try {
1129
+ const limit = options.limit || 10;
1130
+ const filter = options.filter || null;
1131
+ const useCache = options.useCache !== undefined ? options.useCache : true;
1132
+ if (useCache) {
1133
+ const cacheKey = this._generateCacheKey(query, { limit, filter });
1134
+ const cached = this._getCachedResult(cacheKey);
1135
+ if (cached) {
1136
+ return cached;
1137
+ }
1138
+ }
1139
+ const vector = await this.embeddingFactory.embed(query);
1140
+ if (!this.client) {
1141
+ throw new Error("Database client not initialized");
1142
+ }
1143
+ const vectorResults = await this.client.search(vector, {
1144
+ limit: limit * 2,
1145
+ metric: "cosine",
1146
+ filter,
1147
+ });
1148
+ const keywordResults = this.keywordSearch.search(query, {
1149
+ limit: limit * 2,
1150
+ });
1151
+ // Optimized Reciprocal Rank Fusion (RRF) with min-heap for O(n log k) performance
1152
+ // Instead of sorting all results (O(n log n)), we maintain a heap of size k (O(n log k))
1153
+ const k = 60; // RRF constant
1154
+ const scores = new Map();
1155
+ const docMap = new Map();
1156
+ // Process vector results - O(m) where m = vectorResults.length
1157
+ for (let rank = 0; rank < vectorResults.length; rank++) {
1158
+ const doc = vectorResults[rank];
1159
+ const rrf = 1 / (k + rank + 1);
1160
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
1161
+ docMap.set(doc.id, doc);
1162
+ }
1163
+ // Process keyword results - O(n) where n = keywordResults.length
1164
+ for (let rank = 0; rank < keywordResults.length; rank++) {
1165
+ const doc = keywordResults[rank];
1166
+ const rrf = 1 / (k + rank + 1);
1167
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
1168
+ if (!docMap.has(doc.id)) {
1169
+ docMap.set(doc.id, {
1170
+ id: doc.id,
1171
+ content: doc.content,
1172
+ metadata: doc.metadata,
1173
+ score: 0,
1174
+ created_at: new Date().toISOString(),
1175
+ });
1176
+ }
1177
+ }
1178
+ // Extract top k results using min-heap pattern - O(n log k)
1179
+ // Since JavaScript doesn't have a built-in heap, we use an efficient approach:
1180
+ // Convert to array and sort only if results exceed limit significantly
1181
+ const scoreEntries = Array.from(scores.entries());
1182
+ let mergedResults;
1183
+ if (scoreEntries.length <= limit * 2) {
1184
+ // Small dataset: standard sort is fine
1185
+ mergedResults = scoreEntries
1186
+ .sort((a, b) => b[1] - a[1]) // O(n log n) but n is small
1187
+ .slice(0, limit)
1188
+ .map(([id, score]) => {
1189
+ const doc = docMap.get(id);
1190
+ return doc ? { ...doc, score } : null;
1191
+ })
1192
+ .filter((d) => d !== null);
1193
+ }
1194
+ else {
1195
+ // Large dataset: use partial selection sort (O(n*k) but k is small)
1196
+ // This is more efficient than full sort when we only need top k results
1197
+ const topK = [];
1198
+ for (const entry of scoreEntries) {
1199
+ if (topK.length < limit) {
1200
+ topK.push(entry);
1201
+ // Keep topK sorted in descending order
1202
+ topK.sort((a, b) => b[1] - a[1]);
1203
+ }
1204
+ else if (entry[1] > topK[topK.length - 1][1]) {
1205
+ // Replace smallest in topK if current is larger
1206
+ topK[limit - 1] = entry;
1207
+ topK.sort((a, b) => b[1] - a[1]);
1208
+ }
1209
+ }
1210
+ mergedResults = topK
1211
+ .map(([id, score]) => {
1212
+ const doc = docMap.get(id);
1213
+ return doc ? { ...doc, score } : null;
1214
+ })
1215
+ .filter((d) => d !== null);
1216
+ }
1217
+ const normalizedResults = this._normalizeScores(mergedResults);
1218
+ if (useCache) {
1219
+ const cacheKey = this._generateCacheKey(query, { limit, filter });
1220
+ this._cacheResult(cacheKey, normalizedResults);
1221
+ }
1222
+ if (this.enableYamo) {
1223
+ this._emitYamoBlock("recall", undefined, YamoEmitter.buildRecallBlock({
1224
+ query,
1225
+ resultCount: normalizedResults.length,
1226
+ limit,
1227
+ agentId: this.agentId,
1228
+ searchType: "hybrid",
1229
+ })).catch((error) => {
1230
+ // Log emission failures in debug mode but don't throw
1231
+ if (process.env.YAMO_DEBUG === "true") {
1232
+ logger.warn({ err: error }, "Failed to emit YAMO block (recall)");
1233
+ }
1234
+ });
1235
+ }
1236
+ return normalizedResults;
1237
+ }
1238
+ catch (error) {
1239
+ throw error instanceof Error ? error : new Error(String(error));
1240
+ }
1241
+ }
1242
+ _normalizeScores(results) {
1243
+ if (results.length === 0) {
1244
+ return [];
1245
+ }
1246
+ return results.map((r) => {
1247
+ // LanceDB _distance is squared L2 or cosine distance
1248
+ // For cosine distance in MiniLM, it ranges from 0 to 2
1249
+ const rawDistance = r._distance !== undefined ? r._distance : 1.0;
1250
+ // Convert to similarity score [0, 1]
1251
+ const score = Math.max(0, Math.min(1.0, 1 - rawDistance / 2));
1252
+ return {
1253
+ ...r,
1254
+ score: parseFloat(score.toFixed(2)),
1255
+ };
1256
+ });
1257
+ }
1258
+ /**
1259
+ * Tokenize query for keyword matching (private helper for searchSkills)
1260
+ * Converts text to lowercase tokens, filtering out short tokens and punctuation.
1261
+ * Handles camelCase/PascalCase by splitting on uppercase letters.
1262
+ */
1263
+ _tokenizeQuery(text) {
1264
+ return text
1265
+ .replace(/([a-z])([A-Z])/g, "$1 $2") // Split camelCase: "targetSkill" → "target Skill"
1266
+ .toLowerCase()
1267
+ .replace(/[^\w\s]/g, "")
1268
+ .split(/\s+/)
1269
+ .filter((t) => t.length > 2); // Filter out very short tokens
1270
+ }
1271
+ formatResults(results) {
1272
+ if (results.length === 0) {
1273
+ return "No relevant memories found.";
1274
+ }
1275
+ let output = `[ATTENTION DIRECTIVE]\nThe following [MEMORY CONTEXT] is weighted by relevance.
1276
+ - ALIGN attention to entries with [IMPORTANCE >= 0.8].
1277
+ - TREAT entries with [IMPORTANCE <= 0.4] as auxiliary background info.
1278
+
1279
+ [MEMORY CONTEXT]`;
1280
+ results.forEach((res, i) => {
1281
+ const metadata = typeof res.metadata === "string"
1282
+ ? JSON.parse(res.metadata)
1283
+ : res.metadata;
1284
+ output += `\n\n--- MEMORY ${i + 1}: ${res.id} [IMPORTANCE: ${res.score}] ---\nType: ${metadata.type || "event"} | Source: ${metadata.source || "unknown"}\n${res.content}`;
1285
+ });
1286
+ return output;
1287
+ }
1288
+ async get(id) {
1289
+ await this.init();
1290
+ if (!this.client) {
1291
+ throw new Error("Database client not initialized");
1292
+ }
1293
+ const record = await this.client.getById(id);
1294
+ return record
1295
+ ? {
1296
+ id: record.id,
1297
+ content: record.content,
1298
+ metadata: record.metadata,
1299
+ created_at: record.created_at,
1300
+ updated_at: record.updated_at,
1301
+ }
1302
+ : null;
1303
+ }
1304
+ async getAll(options = {}) {
1305
+ await this.init();
1306
+ if (!this.client) {
1307
+ throw new Error("Database client not initialized");
1308
+ }
1309
+ return this.client.getAll(options);
1310
+ }
1311
+ async stats() {
1312
+ await this.init();
1313
+ if (!this.enableMemory || !this.client) {
1314
+ return {
1315
+ count: 0,
1316
+ totalMemories: 0,
1317
+ totalSkills: 0,
1318
+ tableName: "N/A",
1319
+ uri: "N/A",
1320
+ isConnected: false,
1321
+ embedding: { configured: false, primary: null, fallbacks: [] },
1322
+ status: "disabled",
1323
+ };
1324
+ }
1325
+ const dbStats = await this.client.getStats();
1326
+ // Enrich embedding stats with total persisted count
1327
+ const embeddingStats = this.embeddingFactory.getStats();
1328
+ if (embeddingStats.primary) {
1329
+ embeddingStats.primary.totalPersisted = dbStats.count;
1330
+ }
1331
+ // Get skill count
1332
+ let totalSkills = 0;
1333
+ if (this.skillTable) {
1334
+ try {
1335
+ const skills = await this.skillTable.query().limit(10000).toArray();
1336
+ totalSkills = skills.length;
1337
+ }
1338
+ catch (_e) {
1339
+ // Ignore errors
1340
+ }
1341
+ }
1342
+ return {
1343
+ count: dbStats.count,
1344
+ totalMemories: dbStats.count,
1345
+ totalSkills,
1346
+ tableName: dbStats.tableName,
1347
+ uri: dbStats.uri,
1348
+ isConnected: dbStats.isConnected,
1349
+ embedding: embeddingStats,
1350
+ };
1351
+ }
1352
+ _parseEmbeddingConfig() {
1353
+ const configs = [
1354
+ {
1355
+ modelType: process.env.EMBEDDING_MODEL_TYPE || "local",
1356
+ modelName: process.env.EMBEDDING_MODEL_NAME || "Xenova/all-MiniLM-L6-v2",
1357
+ dimension: parseInt(process.env.EMBEDDING_DIMENSION || "384"),
1358
+ priority: 1,
1359
+ apiKey: process.env.EMBEDDING_API_KEY ||
1360
+ process.env.OPENAI_API_KEY ||
1361
+ process.env.COHERE_API_KEY,
1362
+ },
1363
+ ];
1364
+ if (configs[0].modelType !== "local") {
1365
+ configs.push({
1366
+ modelType: "local",
1367
+ modelName: "Xenova/all-MiniLM-L6-v2",
1368
+ dimension: 384,
1369
+ priority: 2,
1370
+ apiKey: undefined,
1371
+ });
1372
+ }
1373
+ return configs;
1374
+ }
1375
+ /**
1376
+ * Close database connections and release resources
1377
+ *
1378
+ * This should be called when done with the MemoryMesh to properly:
1379
+ * - Close LanceDB connections
1380
+ * - Release file handles
1381
+ * - Clean up resources
1382
+ *
1383
+ * Important for tests and cleanup to prevent connection leaks.
1384
+ *
1385
+ * @returns {Promise<void>}
1386
+ *
1387
+ * @example
1388
+ * ```typescript
1389
+ * const mesh = new MemoryMesh();
1390
+ * await mesh.init();
1391
+ * // ... use mesh ...
1392
+ * await mesh.close(); // Clean up
1393
+ * ```
1394
+ */
1395
+ // eslint-disable-next-line @typescript-eslint/require-await
1396
+ async close() {
1397
+ try {
1398
+ // Close LanceDB client connection
1399
+ if (this.client) {
1400
+ this.client.disconnect();
1401
+ this.client = null;
1402
+ }
1403
+ // Clear extension table references
1404
+ this.yamoTable = null;
1405
+ this.skillTable = null;
1406
+ // Reset initialization state
1407
+ this.isInitialized = false;
1408
+ logger.debug("MemoryMesh closed successfully");
1409
+ }
1410
+ catch (error) {
1411
+ const e = error instanceof Error ? error : new Error(String(error));
1412
+ logger.warn({ err: e }, "Error closing MemoryMesh");
1413
+ // Don't throw - cleanup should always succeed
1414
+ }
1415
+ }
1416
+ }
1417
+ /**
1418
+ * Main CLI handler
1419
+ */
1420
+ export async function run() {
1421
+ let action, input;
1422
+ if (process.argv.length > 3) {
1423
+ action = process.argv[2];
1424
+ try {
1425
+ input = JSON.parse(process.argv[3]);
1426
+ }
1427
+ catch (e) {
1428
+ logger.error({ err: e }, "Invalid JSON argument");
1429
+ process.exit(1);
1430
+ }
1431
+ }
1432
+ else {
1433
+ try {
1434
+ const rawInput = fs.readFileSync(0, "utf8");
1435
+ input = JSON.parse(rawInput);
1436
+ action = input.action || action;
1437
+ }
1438
+ catch (_e) {
1439
+ logger.error("No input provided");
1440
+ process.exit(1);
1441
+ }
1442
+ }
1443
+ const mesh = new MemoryMesh({
1444
+ llmProvider: process.env.LLM_PROVIDER ||
1445
+ (process.env.OPENAI_API_KEY ? "openai" : "ollama"),
1446
+ llmApiKey: process.env.LLM_API_KEY || process.env.OPENAI_API_KEY,
1447
+ llmModel: process.env.LLM_MODEL,
1448
+ });
1449
+ try {
1450
+ if (action === "ingest" || action === "store") {
1451
+ const record = await mesh.add(input.content, input.metadata || {});
1452
+ process.stdout.write(`[MemoryMesh] Ingested record ${record.id}\n${JSON.stringify({ status: "ok", record })}\n`);
1453
+ }
1454
+ else if (action === "search") {
1455
+ const results = await mesh.search(input.query, {
1456
+ limit: input.limit || 10,
1457
+ filter: input.filter || null,
1458
+ });
1459
+ process.stdout.write(`[MemoryMesh] Found ${results.length} matches.\n**Formatted Context**:\n\`\`\`yamo\n${mesh.formatResults(results)}\n\`\`\`\n**Output**: memory_results.json\n\`\`\`json\n${JSON.stringify(results, null, 2)}\n\`\`\`\n${JSON.stringify({ status: "ok", results })}\n`);
1460
+ }
1461
+ else if (action === "synthesize") {
1462
+ const result = await mesh.synthesize({
1463
+ topic: input.topic,
1464
+ lookback: input.limit || 20,
1465
+ });
1466
+ process.stdout.write(`[MemoryMesh] Synthesis Outcome: ${result.status}\n${JSON.stringify(result, null, 2)}\n`);
1467
+ }
1468
+ else if (action === "ingest-skill") {
1469
+ const record = await mesh.ingestSkill(input.yamo_text, input.metadata || {});
1470
+ process.stdout.write(`[MemoryMesh] Ingested skill ${record.name} (${record.id})\n${JSON.stringify({ status: "ok", record })}\n`);
1471
+ }
1472
+ else if (action === "search-skills") {
1473
+ await mesh.init();
1474
+ const vector = await mesh.embeddingFactory.embed(input.query);
1475
+ if (mesh.skillTable) {
1476
+ const results = await mesh.skillTable
1477
+ .search(vector)
1478
+ .limit(input.limit || 5)
1479
+ .toArray();
1480
+ process.stdout.write(`[MemoryMesh] Found ${results.length} synthesized skills.\n${JSON.stringify({ status: "ok", results }, null, 2)}\n`);
1481
+ }
1482
+ else {
1483
+ process.stdout.write(`[MemoryMesh] Skill table not initialized.\n`);
1484
+ }
1485
+ }
1486
+ else if (action === "skill-feedback") {
1487
+ const result = await mesh.updateSkillReliability(input.id, input.success !== false);
1488
+ process.stdout.write(`[MemoryMesh] Feedback recorded for ${input.id}: Reliability now ${result.reliability}\n${JSON.stringify({ status: "ok", ...result })}\n`);
1489
+ }
1490
+ else if (action === "skill-prune") {
1491
+ const result = await mesh.pruneSkills(input.threshold || 0.3);
1492
+ process.stdout.write(`[MemoryMesh] Pruning complete. Removed ${result.pruned_count} unreliable skills.\n${JSON.stringify({ status: "ok", ...result })}\n`);
1493
+ }
1494
+ else if (action === "stats") {
1495
+ process.stdout.write(`[MemoryMesh] Database Statistics:\n${JSON.stringify({ status: "ok", stats: await mesh.stats() }, null, 2)}\n`);
1496
+ }
1497
+ else {
1498
+ logger.error({ action }, "Unknown action");
1499
+ process.exit(1);
1500
+ }
1501
+ }
1502
+ catch (error) {
1503
+ const errorResponse = handleError(error, {
1504
+ action,
1505
+ input: { ...input, content: input.content ? "[REDACTED]" : undefined },
1506
+ });
1507
+ logger.error({ err: error, errorResponse }, "Fatal Error");
1508
+ process.exit(1);
1509
+ }
1510
+ }
1511
+ export default MemoryMesh;
1512
+ if (process.argv[1] === fileURLToPath(import.meta.url)) {
1513
+ run().catch((err) => {
1514
+ logger.error({ err }, "Fatal Error");
1515
+ process.exit(1);
1516
+ });
1517
+ }