@yamo/memory-mesh 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +80 -0
- package/bin/memory_mesh.js +69 -0
- package/bin/scrubber.js +81 -0
- package/index.d.ts +111 -0
- package/lib/adapters/index.js +3 -0
- package/lib/embeddings/factory.js +150 -0
- package/lib/embeddings/index.js +2 -0
- package/lib/embeddings/service.js +586 -0
- package/lib/index.js +18 -0
- package/lib/lancedb/client.js +631 -0
- package/lib/lancedb/config.js +215 -0
- package/lib/lancedb/errors.js +144 -0
- package/lib/lancedb/index.js +4 -0
- package/lib/lancedb/schema.js +197 -0
- package/lib/memory/index.js +3 -0
- package/lib/memory/memory-context-manager.js +388 -0
- package/lib/memory/memory-mesh.js +910 -0
- package/lib/memory/memory-translator.js +130 -0
- package/lib/memory/migrate-memory.js +227 -0
- package/lib/memory/migrate-to-v2.js +120 -0
- package/lib/memory/scorer.js +85 -0
- package/lib/memory/vector-memory.js +364 -0
- package/lib/privacy/audit-logger.js +176 -0
- package/lib/privacy/dlp-redactor.js +72 -0
- package/lib/privacy/index.js +10 -0
- package/lib/reporting/skill-report-generator.js +283 -0
- package/lib/scrubber/.gitkeep +1 -0
- package/lib/scrubber/config/defaults.js +62 -0
- package/lib/scrubber/errors/scrubber-error.js +43 -0
- package/lib/scrubber/index.js +25 -0
- package/lib/scrubber/scrubber.js +130 -0
- package/lib/scrubber/stages/chunker.js +103 -0
- package/lib/scrubber/stages/metadata-annotator.js +74 -0
- package/lib/scrubber/stages/normalizer.js +59 -0
- package/lib/scrubber/stages/semantic-filter.js +61 -0
- package/lib/scrubber/stages/structural-cleaner.js +82 -0
- package/lib/scrubber/stages/validator.js +66 -0
- package/lib/scrubber/telemetry.js +66 -0
- package/lib/scrubber/utils/hash.js +39 -0
- package/lib/scrubber/utils/html-parser.js +45 -0
- package/lib/scrubber/utils/pattern-matcher.js +63 -0
- package/lib/scrubber/utils/token-counter.js +31 -0
- package/lib/search/filter.js +275 -0
- package/lib/search/hybrid.js +137 -0
- package/lib/search/index.js +3 -0
- package/lib/search/pattern-miner.js +160 -0
- package/lib/utils/error-sanitizer.js +84 -0
- package/lib/utils/handoff-validator.js +85 -0
- package/lib/utils/index.js +4 -0
- package/lib/utils/spinner.js +190 -0
- package/lib/utils/streaming-client.js +128 -0
- package/package.json +39 -0
- package/skills/SKILL.md +462 -0
- package/skills/skill-scrubber.yamo +41 -0
|
@@ -0,0 +1,910 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Mesh - Vector Memory Storage with LanceDB
|
|
3
|
+
* Provides persistent semantic memory for YAMO OS using LanceDB backend
|
|
4
|
+
*
|
|
5
|
+
* CLI Interface:
|
|
6
|
+
* node tools/memory_mesh.js ingest '{"content": "...", "metadata": {...}}'
|
|
7
|
+
* node tools/memory_mesh.js search '{"query": "...", "limit": 10}'
|
|
8
|
+
* node tools/memory_mesh.js get '{"id": "..."}'
|
|
9
|
+
* node tools/memory_mesh.js delete '{"id": "..."}'
|
|
10
|
+
* node tools/memory_mesh.js stats '{}'
|
|
11
|
+
*
|
|
12
|
+
* Also supports STDIN input for YAMO skill compatibility:
|
|
13
|
+
* echo '{"action": "ingest", "content": "..."}' | node tools/memory_mesh.js
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { fileURLToPath } from 'url';
|
|
17
|
+
import fs from "fs";
|
|
18
|
+
import { LanceDBClient } from "../lancedb/client.js";
|
|
19
|
+
import { getConfig } from "../lancedb/config.js";
|
|
20
|
+
import { getEmbeddingDimension } from "../lancedb/schema.js";
|
|
21
|
+
import { handleError, StorageError, QueryError } from "../lancedb/errors.js";
|
|
22
|
+
import EmbeddingFactory from "../embeddings/factory.js";
|
|
23
|
+
import { Scrubber } from "../scrubber/scrubber.js";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* MemoryMesh class for managing vector memory storage
|
|
27
|
+
*/
|
|
28
|
+
class MemoryMesh {
|
|
29
|
+
/**
|
|
30
|
+
* Create a new MemoryMesh instance
|
|
31
|
+
*/
|
|
32
|
+
constructor() {
|
|
33
|
+
this.client = null;
|
|
34
|
+
this.config = null;
|
|
35
|
+
this.embeddingFactory = new EmbeddingFactory();
|
|
36
|
+
this.isInitialized = false;
|
|
37
|
+
this.vectorDimension = 384; // Will be set during init()
|
|
38
|
+
|
|
39
|
+
// Scrubber for Layer 0 sanitization
|
|
40
|
+
this.scrubber = new Scrubber({
|
|
41
|
+
enabled: true,
|
|
42
|
+
chunking: {
|
|
43
|
+
minTokens: 1 // Allow short memories
|
|
44
|
+
},
|
|
45
|
+
validation: {
|
|
46
|
+
enforceMinLength: false // Disable strict length validation
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Simple LRU cache for search queries (5 minute TTL)
|
|
51
|
+
this.queryCache = new Map();
|
|
52
|
+
this.cacheConfig = {
|
|
53
|
+
maxSize: 500,
|
|
54
|
+
ttlMs: 5 * 60 * 1000, // 5 minutes
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Generate a cache key from query and options
|
|
60
|
+
* @private
|
|
61
|
+
* @param {string} query - Search query
|
|
62
|
+
* @param {Object} options - Search options
|
|
63
|
+
* @returns {string} Cache key
|
|
64
|
+
*/
|
|
65
|
+
_generateCacheKey(query, options = {}) {
|
|
66
|
+
const normalizedOptions = {
|
|
67
|
+
limit: options.limit || 10,
|
|
68
|
+
filter: options.filter || null,
|
|
69
|
+
// Normalize options that affect results
|
|
70
|
+
};
|
|
71
|
+
return `search:${query}:${JSON.stringify(normalizedOptions)}`;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Get cached result if valid
|
|
76
|
+
* @private
|
|
77
|
+
* @param {string} key - Cache key
|
|
78
|
+
* @returns {Object|null} Cached result or null if expired/missing
|
|
79
|
+
*/
|
|
80
|
+
_getCachedResult(key) {
|
|
81
|
+
const entry = this.queryCache.get(key);
|
|
82
|
+
if (!entry) return null;
|
|
83
|
+
|
|
84
|
+
// Check TTL
|
|
85
|
+
if (Date.now() - entry.timestamp > this.cacheConfig.ttlMs) {
|
|
86
|
+
this.queryCache.delete(key);
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Move to end (most recently used)
|
|
91
|
+
this.queryCache.delete(key);
|
|
92
|
+
this.queryCache.set(key, entry);
|
|
93
|
+
|
|
94
|
+
return entry.result;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Cache a search result
|
|
99
|
+
* @private
|
|
100
|
+
* @param {string} key - Cache key
|
|
101
|
+
* @param {Object} result - Search result to cache
|
|
102
|
+
*/
|
|
103
|
+
_cacheResult(key, result) {
|
|
104
|
+
// Evict oldest if at max size
|
|
105
|
+
if (this.queryCache.size >= this.cacheConfig.maxSize) {
|
|
106
|
+
const firstKey = this.queryCache.keys().next().value;
|
|
107
|
+
this.queryCache.delete(firstKey);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
this.queryCache.set(key, {
|
|
111
|
+
result,
|
|
112
|
+
timestamp: Date.now()
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Clear all cached results
|
|
118
|
+
*/
|
|
119
|
+
clearCache() {
|
|
120
|
+
this.queryCache.clear();
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get cache statistics
|
|
125
|
+
* @returns {Object} Cache stats
|
|
126
|
+
*/
|
|
127
|
+
getCacheStats() {
|
|
128
|
+
return {
|
|
129
|
+
size: this.queryCache.size,
|
|
130
|
+
maxSize: this.cacheConfig.maxSize,
|
|
131
|
+
ttlMs: this.cacheConfig.ttlMs
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Validate and sanitize metadata to prevent prototype pollution
|
|
137
|
+
* @param {Object} metadata - Metadata to validate
|
|
138
|
+
* @returns {Object} Sanitized metadata
|
|
139
|
+
* @private
|
|
140
|
+
*/
|
|
141
|
+
_validateMetadata(metadata) {
|
|
142
|
+
if (typeof metadata !== 'object' || metadata === null) {
|
|
143
|
+
throw new Error('Metadata must be a non-null object');
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Sanitize keys to prevent prototype pollution
|
|
147
|
+
const sanitized = {};
|
|
148
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
149
|
+
// Skip dangerous keys that could pollute prototype
|
|
150
|
+
// Note: 'constructor' and 'prototype' are handled by hasOwnProperty check
|
|
151
|
+
// '.__proto__' needs explicit check because Object.entries() doesn't include it
|
|
152
|
+
if (key === '__proto__' || key === 'constructor' || key === 'prototype') {
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
// Skip inherited properties
|
|
156
|
+
if (!Object.prototype.hasOwnProperty.call(metadata, key)) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
sanitized[key] = value;
|
|
160
|
+
}
|
|
161
|
+
return sanitized;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Sanitize and validate content before storage
|
|
166
|
+
* @param {string} content - Content to sanitize
|
|
167
|
+
* @returns {string} Sanitized content
|
|
168
|
+
* @private
|
|
169
|
+
*/
|
|
170
|
+
_sanitizeContent(content) {
|
|
171
|
+
if (typeof content !== 'string') {
|
|
172
|
+
throw new Error('Content must be a string');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Limit content length
|
|
176
|
+
const MAX_CONTENT_LENGTH = 100000; // 100KB limit
|
|
177
|
+
if (content.length > MAX_CONTENT_LENGTH) {
|
|
178
|
+
throw new Error(`Content exceeds maximum length of ${MAX_CONTENT_LENGTH} characters`);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return content.trim();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Initialize the LanceDB client
|
|
186
|
+
* @returns {Promise<void>}
|
|
187
|
+
*/
|
|
188
|
+
async init() {
|
|
189
|
+
if (this.isInitialized) {
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
// Load configuration
|
|
195
|
+
this.config = getConfig();
|
|
196
|
+
|
|
197
|
+
// Detect vector dimension from embedding model configuration
|
|
198
|
+
const modelName = process.env.EMBEDDING_MODEL_NAME || 'Xenova/all-MiniLM-L6-v2';
|
|
199
|
+
const envDimension = parseInt(process.env.EMBEDDING_DIMENSION || '0') || null;
|
|
200
|
+
this.vectorDimension = envDimension || getEmbeddingDimension(modelName);
|
|
201
|
+
|
|
202
|
+
// Only log in debug mode to avoid corrupting spinner/REPL display
|
|
203
|
+
if (process.env.YAMO_DEBUG === 'true') {
|
|
204
|
+
console.error(`[MemoryMesh] Using vector dimension: ${this.vectorDimension} (model: ${modelName})`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Create LanceDBClient with detected dimension
|
|
208
|
+
this.client = new LanceDBClient({
|
|
209
|
+
uri: this.config.LANCEDB_URI,
|
|
210
|
+
tableName: this.config.LANCEDB_MEMORY_TABLE,
|
|
211
|
+
vectorDimension: this.vectorDimension,
|
|
212
|
+
maxRetries: 3,
|
|
213
|
+
retryDelay: 1000
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Connect to database
|
|
217
|
+
await this.client.connect();
|
|
218
|
+
|
|
219
|
+
// Configure embedding factory from environment
|
|
220
|
+
const embeddingConfigs = this._parseEmbeddingConfig();
|
|
221
|
+
this.embeddingFactory.configure(embeddingConfigs);
|
|
222
|
+
await this.embeddingFactory.init();
|
|
223
|
+
|
|
224
|
+
this.isInitialized = true;
|
|
225
|
+
|
|
226
|
+
} catch (error) {
|
|
227
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
228
|
+
throw handleError(e, { context: 'MemoryMesh.init' });
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Add content to memory with auto-generated embedding
|
|
234
|
+
* @param {string} content - Text content to store
|
|
235
|
+
* @param {Object} metadata - Optional metadata tags
|
|
236
|
+
* @returns {Promise<Object>} Created record with ID
|
|
237
|
+
*/
|
|
238
|
+
async add(content, metadata = {}) {
|
|
239
|
+
await this.init();
|
|
240
|
+
|
|
241
|
+
try {
|
|
242
|
+
// Layer 0: Scrubber Sanitization
|
|
243
|
+
let processedContent = content;
|
|
244
|
+
let scrubbedMetadata = {};
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
const scrubbedResult = await this.scrubber.process({
|
|
248
|
+
content: content,
|
|
249
|
+
source: 'memory-api',
|
|
250
|
+
type: 'txt' // Default to text
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
if (scrubbedResult.success && scrubbedResult.chunks.length > 0) {
|
|
254
|
+
// Reconstruct cleaned content
|
|
255
|
+
processedContent = scrubbedResult.chunks.map(c => c.text).join('\n\n');
|
|
256
|
+
|
|
257
|
+
// Merge scrubber telemetry/metadata if useful
|
|
258
|
+
if (scrubbedResult.metadata) {
|
|
259
|
+
scrubbedMetadata = {
|
|
260
|
+
...scrubbedResult.metadata,
|
|
261
|
+
scrubber_telemetry: JSON.stringify(scrubbedResult.telemetry)
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch (scrubError) {
|
|
266
|
+
// Fallback to raw content if scrubber fails, but log it
|
|
267
|
+
if (process.env.YAMO_DEBUG === 'true') {
|
|
268
|
+
const message = scrubError instanceof Error ? scrubError.message : String(scrubError);
|
|
269
|
+
console.error(`[MemoryMesh] Scrubber failed: ${message}`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Validate and sanitize inputs (legacy check)
|
|
274
|
+
const sanitizedContent = this._sanitizeContent(processedContent);
|
|
275
|
+
const sanitizedMetadata = this._validateMetadata({ ...metadata, ...scrubbedMetadata });
|
|
276
|
+
|
|
277
|
+
// Generate ID
|
|
278
|
+
const id = `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
279
|
+
|
|
280
|
+
// Generate embedding using EmbeddingFactory
|
|
281
|
+
const vector = await this.embeddingFactory.embed(sanitizedContent);
|
|
282
|
+
|
|
283
|
+
// Prepare record data with sanitized metadata
|
|
284
|
+
const record = {
|
|
285
|
+
id,
|
|
286
|
+
vector,
|
|
287
|
+
content: sanitizedContent,
|
|
288
|
+
metadata: JSON.stringify(sanitizedMetadata)
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
// Add to LanceDB
|
|
293
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
294
|
+
const result = await this.client.add(record);
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
id: result.id,
|
|
298
|
+
content: sanitizedContent,
|
|
299
|
+
metadata: sanitizedMetadata,
|
|
300
|
+
created_at: new Date().toISOString()
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
} catch (error) {
|
|
305
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
306
|
+
throw handleError(e, { context: 'MemoryMesh.add' });
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Add multiple memory entries in batch for efficiency
|
|
312
|
+
* @param {Array<{content: string, metadata?: Object}>} entries - Array of entries to add
|
|
313
|
+
* @returns {Promise<Object>} Result with count and IDs
|
|
314
|
+
*/
|
|
315
|
+
async addBatch(entries) {
|
|
316
|
+
if (!Array.isArray(entries) || entries.length === 0) {
|
|
317
|
+
throw new Error('Entries must be a non-empty array');
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
await this.init();
|
|
321
|
+
|
|
322
|
+
try {
|
|
323
|
+
const now = Date.now();
|
|
324
|
+
const records = [];
|
|
325
|
+
|
|
326
|
+
// Process entries in parallel for embeddings
|
|
327
|
+
const embeddingPromises = entries.map(async (entry, index) => {
|
|
328
|
+
// Layer 0: Scrubber Sanitization
|
|
329
|
+
let processedContent = entry.content;
|
|
330
|
+
let scrubbedMetadata = {};
|
|
331
|
+
|
|
332
|
+
try {
|
|
333
|
+
const scrubbedResult = await this.scrubber.process({
|
|
334
|
+
content: entry.content,
|
|
335
|
+
source: 'memory-batch',
|
|
336
|
+
type: 'txt'
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
if (scrubbedResult.success && scrubbedResult.chunks.length > 0) {
|
|
340
|
+
processedContent = scrubbedResult.chunks.map(c => c.text).join('\n\n');
|
|
341
|
+
if (scrubbedResult.metadata) {
|
|
342
|
+
scrubbedMetadata = {
|
|
343
|
+
...scrubbedResult.metadata,
|
|
344
|
+
scrubber_telemetry: JSON.stringify(scrubbedResult.telemetry)
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
} catch (e) {
|
|
349
|
+
// Fallback silently
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const sanitizedContent = this._sanitizeContent(processedContent);
|
|
353
|
+
const sanitizedMetadata = this._validateMetadata({ ...(entry.metadata || {}), ...scrubbedMetadata });
|
|
354
|
+
|
|
355
|
+
const id = `mem_${now}_${Math.random().toString(36).substr(2, 9)}_${index}`;
|
|
356
|
+
const vector = await this.embeddingFactory.embed(sanitizedContent);
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
id,
|
|
360
|
+
vector,
|
|
361
|
+
content: sanitizedContent,
|
|
362
|
+
metadata: JSON.stringify(sanitizedMetadata)
|
|
363
|
+
};
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
const recordsWithEmbeddings = await Promise.all(embeddingPromises);
|
|
367
|
+
|
|
368
|
+
// Add all records to database
|
|
369
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
370
|
+
const result = await this.client.addBatch(recordsWithEmbeddings);
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
count: result.count,
|
|
374
|
+
success: result.success,
|
|
375
|
+
ids: recordsWithEmbeddings.map(r => r.id)
|
|
376
|
+
};
|
|
377
|
+
|
|
378
|
+
} catch (error) {
|
|
379
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
380
|
+
throw handleError(e, { context: 'MemoryMesh.addBatch', count: entries.length });
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Search memory by semantic similarity
|
|
386
|
+
* @param {string} query - Search query text
|
|
387
|
+
* @param {Object} options - Search options
|
|
388
|
+
* @param {number} [options.limit=10] - Maximum number of results
|
|
389
|
+
* @param {string} [options.filter] - Optional filter expression
|
|
390
|
+
* @param {boolean} [options.useCache=true] - Whether to use query cache
|
|
391
|
+
* @returns {Promise<Array>} Search results with scores
|
|
392
|
+
*/
|
|
393
|
+
async search(query, options = {}) {
|
|
394
|
+
await this.init();
|
|
395
|
+
|
|
396
|
+
try {
|
|
397
|
+
const limit = options.limit || 10;
|
|
398
|
+
const filter = options.filter || null;
|
|
399
|
+
// @ts-ignore
|
|
400
|
+
const useCache = options.useCache !== undefined ? options.useCache : true;
|
|
401
|
+
|
|
402
|
+
// Check cache first (unless disabled)
|
|
403
|
+
if (useCache) {
|
|
404
|
+
const cacheKey = this._generateCacheKey(query, { limit, filter });
|
|
405
|
+
const cached = this._getCachedResult(cacheKey);
|
|
406
|
+
if (cached) {
|
|
407
|
+
return cached;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Generate embedding using EmbeddingFactory
|
|
412
|
+
const vector = await this.embeddingFactory.embed(query);
|
|
413
|
+
|
|
414
|
+
// Perform semantic search
|
|
415
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
416
|
+
const results = await this.client.search(vector, {
|
|
417
|
+
limit,
|
|
418
|
+
metric: 'cosine',
|
|
419
|
+
filter
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
// Format results
|
|
423
|
+
const formattedResults = results.map(result => ({
|
|
424
|
+
id: result.id,
|
|
425
|
+
content: result.content,
|
|
426
|
+
metadata: result.metadata,
|
|
427
|
+
score: result.score,
|
|
428
|
+
created_at: result.created_at
|
|
429
|
+
}));
|
|
430
|
+
|
|
431
|
+
// Cache the result (unless disabled)
|
|
432
|
+
if (useCache) {
|
|
433
|
+
const cacheKey = this._generateCacheKey(query, { limit, filter });
|
|
434
|
+
this._cacheResult(cacheKey, formattedResults);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
return formattedResults;
|
|
438
|
+
|
|
439
|
+
} catch (error) {
|
|
440
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
441
|
+
throw handleError(e, { context: 'MemoryMesh.search', query });
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Get a record by ID
|
|
447
|
+
* @param {string} id - Record ID
|
|
448
|
+
* @returns {Promise<Object|null>} Record object or null if not found
|
|
449
|
+
*/
|
|
450
|
+
async get(id) {
|
|
451
|
+
await this.init();
|
|
452
|
+
|
|
453
|
+
try {
|
|
454
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
455
|
+
const record = await this.client.getById(id);
|
|
456
|
+
|
|
457
|
+
if (!record) {
|
|
458
|
+
return null;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return {
|
|
462
|
+
id: record.id,
|
|
463
|
+
content: record.content,
|
|
464
|
+
metadata: record.metadata,
|
|
465
|
+
created_at: record.created_at,
|
|
466
|
+
updated_at: record.updated_at
|
|
467
|
+
};
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
} catch (error) {
|
|
471
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
472
|
+
throw handleError(e, { context: 'MemoryMesh.get', id });
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Get all memory records
|
|
478
|
+
* @param {Object} options - Options
|
|
479
|
+
* @param {number} [options.limit] - Limit results
|
|
480
|
+
* @returns {Promise<Array>} Array of records
|
|
481
|
+
*/
|
|
482
|
+
async getAll(options = {}) {
|
|
483
|
+
await this.init();
|
|
484
|
+
try {
|
|
485
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
486
|
+
return await this.client.getAll(options);
|
|
487
|
+
} catch (error) {
|
|
488
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
489
|
+
throw handleError(e, { context: 'MemoryMesh.getAll' });
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Update a memory record
|
|
495
|
+
* @param {string} id - Record ID
|
|
496
|
+
* @param {string} content - New content
|
|
497
|
+
* @param {Object} metadata - New metadata
|
|
498
|
+
* @returns {Promise<Object>} Result
|
|
499
|
+
*/
|
|
500
|
+
async update(id, content, metadata = {}) {
|
|
501
|
+
await this.init();
|
|
502
|
+
|
|
503
|
+
try {
|
|
504
|
+
// Layer 0: Scrubber Sanitization
|
|
505
|
+
let processedContent = content;
|
|
506
|
+
let scrubbedMetadata = {};
|
|
507
|
+
|
|
508
|
+
try {
|
|
509
|
+
const scrubbedResult = await this.scrubber.process({
|
|
510
|
+
content: content,
|
|
511
|
+
source: 'memory-update',
|
|
512
|
+
type: 'txt'
|
|
513
|
+
});
|
|
514
|
+
|
|
515
|
+
if (scrubbedResult.success && scrubbedResult.chunks.length > 0) {
|
|
516
|
+
processedContent = scrubbedResult.chunks.map(c => c.text).join('\n\n');
|
|
517
|
+
if (scrubbedResult.metadata) {
|
|
518
|
+
scrubbedMetadata = {
|
|
519
|
+
...scrubbedResult.metadata,
|
|
520
|
+
scrubber_telemetry: JSON.stringify(scrubbedResult.telemetry)
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
} catch (e) {
|
|
525
|
+
// Fallback
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
const sanitizedContent = this._sanitizeContent(processedContent);
|
|
529
|
+
const sanitizedMetadata = this._validateMetadata({ ...metadata, ...scrubbedMetadata });
|
|
530
|
+
|
|
531
|
+
// Re-generate embedding
|
|
532
|
+
const vector = await this.embeddingFactory.embed(sanitizedContent);
|
|
533
|
+
|
|
534
|
+
const updateData = {
|
|
535
|
+
vector,
|
|
536
|
+
content: sanitizedContent,
|
|
537
|
+
metadata: JSON.stringify(sanitizedMetadata)
|
|
538
|
+
};
|
|
539
|
+
|
|
540
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
541
|
+
const result = await this.client.update(id, updateData);
|
|
542
|
+
|
|
543
|
+
return {
|
|
544
|
+
id: result.id,
|
|
545
|
+
content: sanitizedContent,
|
|
546
|
+
success: result.success
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
} catch (error) {
|
|
550
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
551
|
+
throw handleError(e, { context: 'MemoryMesh.update', id });
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Delete a record by ID
|
|
557
|
+
* @param {string} id - Record ID to delete
|
|
558
|
+
* @returns {Promise<Object>} Result with success status
|
|
559
|
+
*/
|
|
560
|
+
async delete(id) {
|
|
561
|
+
await this.init();
|
|
562
|
+
|
|
563
|
+
try {
|
|
564
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
565
|
+
const result = await this.client.delete(id);
|
|
566
|
+
|
|
567
|
+
return {
|
|
568
|
+
deleted: result.id,
|
|
569
|
+
success: result.success
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
} catch (error) {
|
|
574
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
575
|
+
throw handleError(e, { context: 'MemoryMesh.delete', id });
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Get database statistics
|
|
581
|
+
* @returns {Promise<Object>} Statistics including count, size, etc.
|
|
582
|
+
*/
|
|
583
|
+
async stats() {
|
|
584
|
+
await this.init();
|
|
585
|
+
|
|
586
|
+
try {
|
|
587
|
+
if (!this.client) throw new Error('Database client not initialized');
|
|
588
|
+
const dbStats = await this.client.getStats();
|
|
589
|
+
const embeddingStats = this.embeddingFactory.getStats();
|
|
590
|
+
|
|
591
|
+
return {
|
|
592
|
+
count: dbStats.count,
|
|
593
|
+
tableName: dbStats.tableName,
|
|
594
|
+
uri: dbStats.uri,
|
|
595
|
+
isConnected: dbStats.isConnected,
|
|
596
|
+
embedding: embeddingStats
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
} catch (error) {
|
|
601
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
602
|
+
throw handleError(e, { context: 'MemoryMesh.stats' });
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Health check for MemoryMesh
|
|
608
|
+
* @returns {Promise<Object>} Health status with checks for all components
|
|
609
|
+
*/
|
|
610
|
+
async healthCheck() {
|
|
611
|
+
const health = {
|
|
612
|
+
status: 'healthy',
|
|
613
|
+
timestamp: new Date().toISOString(),
|
|
614
|
+
checks: {}
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
// Check 1: Database connectivity
|
|
618
|
+
try {
|
|
619
|
+
const startDb = Date.now();
|
|
620
|
+
await this.init();
|
|
621
|
+
const dbLatency = Date.now() - startDb;
|
|
622
|
+
|
|
623
|
+
// @ts-ignore
|
|
624
|
+
health.checks.database = {
|
|
625
|
+
status: 'up',
|
|
626
|
+
latency: dbLatency,
|
|
627
|
+
isConnected: this.client?.isConnected || false,
|
|
628
|
+
tableName: this.client?.tableName || 'unknown'
|
|
629
|
+
};
|
|
630
|
+
} catch (error) {
|
|
631
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
632
|
+
// @ts-ignore
|
|
633
|
+
health.checks.database = {
|
|
634
|
+
status: 'error',
|
|
635
|
+
error: message
|
|
636
|
+
};
|
|
637
|
+
health.status = 'degraded';
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
// Check 2: Embedding service
|
|
641
|
+
try {
|
|
642
|
+
const startEmbedding = Date.now();
|
|
643
|
+
const testEmbedding = await this.embeddingFactory.embed('health check');
|
|
644
|
+
const embeddingLatency = Date.now() - startEmbedding;
|
|
645
|
+
|
|
646
|
+
// @ts-ignore
|
|
647
|
+
health.checks.embedding = {
|
|
648
|
+
status: 'up',
|
|
649
|
+
latency: embeddingLatency,
|
|
650
|
+
dimension: testEmbedding.length,
|
|
651
|
+
configured: true
|
|
652
|
+
};
|
|
653
|
+
} catch (error) {
|
|
654
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
655
|
+
// @ts-ignore
|
|
656
|
+
health.checks.embedding = {
|
|
657
|
+
status: 'error',
|
|
658
|
+
error: message
|
|
659
|
+
};
|
|
660
|
+
health.status = 'degraded';
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Check 3: Get stats (verifies read operations work)
|
|
664
|
+
try {
|
|
665
|
+
const stats = await this.stats();
|
|
666
|
+
// @ts-ignore
|
|
667
|
+
health.checks.stats = {
|
|
668
|
+
status: 'up',
|
|
669
|
+
recordCount: stats.count || 0
|
|
670
|
+
};
|
|
671
|
+
} catch (error) {
|
|
672
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
673
|
+
// @ts-ignore
|
|
674
|
+
health.checks.stats = {
|
|
675
|
+
status: 'warning',
|
|
676
|
+
error: message
|
|
677
|
+
};
|
|
678
|
+
// Don't degrade status for stats failure - it's not critical
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// Check 4: Cache status (if caching enabled)
|
|
682
|
+
if (this.queryCache) {
|
|
683
|
+
// @ts-ignore
|
|
684
|
+
health.checks.cache = {
|
|
685
|
+
status: 'up',
|
|
686
|
+
size: this.queryCache.size || 0,
|
|
687
|
+
max: this.cacheConfig?.maxSize || 'unknown'
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
return health;
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Parse embedding configuration from environment
|
|
696
|
+
* @private
|
|
697
|
+
*/
|
|
698
|
+
_parseEmbeddingConfig() {
|
|
699
|
+
const configs = [];
|
|
700
|
+
|
|
701
|
+
// Primary: from EMBEDDING_MODEL_TYPE
|
|
702
|
+
configs.push({
|
|
703
|
+
modelType: process.env.EMBEDDING_MODEL_TYPE || 'local',
|
|
704
|
+
modelName: process.env.EMBEDDING_MODEL_NAME || 'Xenova/all-MiniLM-L6-v2',
|
|
705
|
+
dimension: parseInt(process.env.EMBEDDING_DIMENSION || '384'),
|
|
706
|
+
priority: 1,
|
|
707
|
+
apiKey: process.env.EMBEDDING_API_KEY || process.env.OPENAI_API_KEY || process.env.COHERE_API_KEY
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
// Fallback 1: local model (if primary is API)
|
|
711
|
+
if (configs[0].modelType !== 'local') {
|
|
712
|
+
configs.push({
|
|
713
|
+
modelType: 'local',
|
|
714
|
+
modelName: 'Xenova/all-MiniLM-L6-v2',
|
|
715
|
+
dimension: 384,
|
|
716
|
+
priority: 2
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Fallback 2: OpenAI (if key available)
|
|
721
|
+
if (process.env.OPENAI_API_KEY && configs[0].modelType !== 'openai') {
|
|
722
|
+
configs.push({
|
|
723
|
+
modelType: 'openai',
|
|
724
|
+
modelName: 'text-embedding-3-small',
|
|
725
|
+
dimension: 1536,
|
|
726
|
+
priority: 3,
|
|
727
|
+
apiKey: process.env.OPENAI_API_KEY
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
return configs;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
/**
|
|
735
|
+
* Build a LanceDB filter expression from an object
|
|
736
|
+
* Supports basic filtering on metadata fields
|
|
737
|
+
* @param {Object} filter - Filter object
|
|
738
|
+
* @returns {string|null} LanceDB filter expression
|
|
739
|
+
* @private
|
|
740
|
+
*/
|
|
741
|
+
_buildFilter(filter) {
|
|
742
|
+
if (!filter || typeof filter !== 'object') {
|
|
743
|
+
return null;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
const conditions = [];
|
|
747
|
+
|
|
748
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
749
|
+
if (typeof value === 'string') {
|
|
750
|
+
conditions.push(`${key} == '${value}'`);
|
|
751
|
+
} else if (typeof value === 'number') {
|
|
752
|
+
conditions.push(`${key} == ${value}`);
|
|
753
|
+
} else if (typeof value === 'boolean') {
|
|
754
|
+
conditions.push(`${key} == ${value}`);
|
|
755
|
+
}
|
|
756
|
+
// Note: Complex filtering on JSON metadata field not supported
|
|
757
|
+
// Filters work on top-level schema fields only
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// @ts-ignore
|
|
761
|
+
return conditions.length > 0 ? conditions.join(' AND ') : null;
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
/**
|
|
766
|
+
* Main CLI handler
|
|
767
|
+
*/
|
|
768
|
+
async function run() {
|
|
769
|
+
let action, input;
|
|
770
|
+
|
|
771
|
+
// Check if arguments are provided via CLI
|
|
772
|
+
if (process.argv.length > 3) {
|
|
773
|
+
action = process.argv[2];
|
|
774
|
+
try {
|
|
775
|
+
input = JSON.parse(process.argv[3]);
|
|
776
|
+
} catch (e) {
|
|
777
|
+
const error = e instanceof Error ? e : new Error(String(e));
|
|
778
|
+
const errorResponse = handleError(error, { context: 'CLI argument parsing' });
|
|
779
|
+
console.error(`❌ Error: Invalid JSON argument: ${error.message}`);
|
|
780
|
+
console.error(`Received: ${process.argv[3]}`);
|
|
781
|
+
console.error(JSON.stringify(errorResponse, null, 2));
|
|
782
|
+
process.exit(1);
|
|
783
|
+
}
|
|
784
|
+
} else {
|
|
785
|
+
// Fallback to STDIN for System Skill compatibility
|
|
786
|
+
try {
|
|
787
|
+
const rawInput = fs.readFileSync(0, 'utf8');
|
|
788
|
+
const data = JSON.parse(rawInput);
|
|
789
|
+
action = data.action || action;
|
|
790
|
+
input = data;
|
|
791
|
+
} catch (e) {
|
|
792
|
+
const error = e instanceof Error ? e : new Error(String(e));
|
|
793
|
+
const errorResponse = handleError(error, { context: 'STDIN parsing' });
|
|
794
|
+
console.error("❌ Error: No input provided via CLI or STDIN.");
|
|
795
|
+
console.error(`Details: ${error.message}`);
|
|
796
|
+
console.error(JSON.stringify(errorResponse, null, 2));
|
|
797
|
+
process.exit(1);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// Create MemoryMesh instance
|
|
802
|
+
const mesh = new MemoryMesh();
|
|
803
|
+
|
|
804
|
+
try {
|
|
805
|
+
// Route to appropriate action
|
|
806
|
+
if (action === 'ingest') {
|
|
807
|
+
// Validate required fields
|
|
808
|
+
if (!input.content) {
|
|
809
|
+
console.error('❌ Error: "content" field is required for ingest action');
|
|
810
|
+
process.exit(1);
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
const record = await mesh.add(input.content, input.metadata || {});
|
|
814
|
+
console.log(`[MemoryMesh] Ingested record ${record.id}`);
|
|
815
|
+
console.log(JSON.stringify({ status: "ok", record }));
|
|
816
|
+
|
|
817
|
+
} else if (action === 'search') {
|
|
818
|
+
// Validate required fields
|
|
819
|
+
if (!input.query) {
|
|
820
|
+
console.error('❌ Error: "query" field is required for search action');
|
|
821
|
+
process.exit(1);
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
const options = {
|
|
825
|
+
limit: input.limit || 10,
|
|
826
|
+
filter: input.filter || null
|
|
827
|
+
};
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
const results = await mesh.search(input.query, options);
|
|
831
|
+
console.log(`[MemoryMesh] Found ${results.length} matches.`);
|
|
832
|
+
|
|
833
|
+
const jsonResult = JSON.stringify(results, null, 2);
|
|
834
|
+
// YAMO Skill compatibility: Output as a marked block for auto-saving
|
|
835
|
+
console.log(`\n**Output**: memory_results.json
|
|
836
|
+
\`\`\`json
|
|
837
|
+
${jsonResult}
|
|
838
|
+
\`\`\`
|
|
839
|
+
`);
|
|
840
|
+
// Also output raw JSON for STDIN callers
|
|
841
|
+
console.log(JSON.stringify({ status: "ok", results }));
|
|
842
|
+
|
|
843
|
+
} else if (action === 'get') {
|
|
844
|
+
// Validate required fields
|
|
845
|
+
if (!input.id) {
|
|
846
|
+
console.error('❌ Error: "id" field is required for get action');
|
|
847
|
+
process.exit(1);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
const record = await mesh.get(input.id);
|
|
851
|
+
|
|
852
|
+
if (!record) {
|
|
853
|
+
console.log(JSON.stringify({ status: "ok", record: null }));
|
|
854
|
+
} else {
|
|
855
|
+
console.log(JSON.stringify({ status: "ok", record }));
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
} else if (action === 'delete') {
|
|
859
|
+
// Validate required fields
|
|
860
|
+
if (!input.id) {
|
|
861
|
+
console.error('❌ Error: "id" field is required for delete action');
|
|
862
|
+
process.exit(1);
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
const result = await mesh.delete(input.id);
|
|
866
|
+
console.log(`[MemoryMesh] Deleted record ${result.deleted}`);
|
|
867
|
+
console.log(JSON.stringify({ status: "ok", ...result }));
|
|
868
|
+
|
|
869
|
+
} else if (action === 'stats') {
|
|
870
|
+
const stats = await mesh.stats();
|
|
871
|
+
console.log('[MemoryMesh] Database Statistics:');
|
|
872
|
+
console.log(JSON.stringify({ status: "ok", stats }, null, 2));
|
|
873
|
+
|
|
874
|
+
} else {
|
|
875
|
+
console.error(`❌ Error: Unknown action "${action}". Valid actions: ingest, search, get, delete, stats`);
|
|
876
|
+
process.exit(1);
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
} catch (error) {
|
|
880
|
+
// Handle errors using the error handler
|
|
881
|
+
const e = error instanceof Error ? error : new Error(String(error));
|
|
882
|
+
const errorResponse = handleError(e, { action, input: { ...input, content: input.content ? '[REDACTED]' : undefined } });
|
|
883
|
+
|
|
884
|
+
if (errorResponse.success === false) {
|
|
885
|
+
console.error(`❌ Fatal Error: ${errorResponse.error.message}`);
|
|
886
|
+
if (process.env.NODE_ENV === 'development' && errorResponse.error.details) {
|
|
887
|
+
console.error(`Details:`, errorResponse.error.details);
|
|
888
|
+
}
|
|
889
|
+
console.error(JSON.stringify(errorResponse, null, 2));
|
|
890
|
+
} else {
|
|
891
|
+
console.error(`❌ Fatal Error: ${e.message}`);
|
|
892
|
+
console.error(e.stack);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
process.exit(1);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
// Export for testing
|
|
900
|
+
export { MemoryMesh };
|
|
901
|
+
export default MemoryMesh;
|
|
902
|
+
|
|
903
|
+
// Run CLI if called directly
|
|
904
|
+
if (process.argv[1] === fileURLToPath(import.meta.url)) {
|
|
905
|
+
run().catch(err => {
|
|
906
|
+
console.error(`❌ Fatal Error: ${err.message}`);
|
|
907
|
+
console.error(err.stack);
|
|
908
|
+
process.exit(1);
|
|
909
|
+
});
|
|
910
|
+
}
|