@yamo/memory-mesh 2.1.1 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,69 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  /**
4
- * MemoryMesh CLI Adapter
5
- * Provides a portable interface for skills to interact with the MemoryMesh system.
6
- *
7
- * Usage:
8
- * node tools/memory_mesh.js store <content_string_or_json> [metadata_json]
9
- * node tools/memory_mesh.js search <query_string> [limit]
4
+ * MemoryMesh CLI Entry Point
5
+ * Delegates to the core CLI handler in lib/memory/memory-mesh.js
10
6
  */
11
7
 
12
- import { MemoryMesh } from '../lib/memory/memory-mesh.js';
13
- import path from 'path';
8
+ import { run } from '../lib/memory/memory-mesh.js';
14
9
 
15
- // Parse arguments
16
- const args = process.argv.slice(2);
17
- const command = args[0];
18
-
19
- async function main() {
20
- try {
21
- const mesh = new MemoryMesh();
22
- // Wait for initialization if necessary (MemoryMesh constructor usually doesn't await,
23
- // but operations might need internal init. We assume standard usage.)
24
-
25
- if (command === 'store') {
26
- const content = args[1];
27
- let metadata = {};
28
- if (args[2]) {
29
- try {
30
- metadata = JSON.parse(args[2]);
31
- } catch (e) {
32
- console.error(JSON.stringify({ error: "Invalid metadata JSON" }));
33
- process.exit(1);
34
- }
35
- }
36
-
37
- if (!content) {
38
- console.error(JSON.stringify({ error: "Content required for store command" }));
39
- process.exit(1);
40
- }
41
-
42
- const result = await mesh.add(content, metadata);
43
- console.log(JSON.stringify({ success: true, id: result.id, message: "Memory stored successfully" }));
44
-
45
- } else if (command === 'search') {
46
- const query = args[1];
47
- const limit = parseInt(args[2]) || 5;
48
-
49
- if (!query) {
50
- console.error(JSON.stringify({ error: "Query required for search command" }));
51
- process.exit(1);
52
- }
53
-
54
- const results = await mesh.search(query, { limit });
55
- console.log(JSON.stringify({ success: true, results: results }));
56
-
57
- } else {
58
- console.error(JSON.stringify({ error: `Unknown command: ${command}` }));
59
- console.error("Usage: node tools/memory_mesh.js [store|search] ...");
60
- process.exit(1);
61
- }
62
-
63
- } catch (error) {
64
- console.error(JSON.stringify({ error: error.message }));
65
- process.exit(1);
66
- }
67
- }
68
-
69
- main();
10
+ // Execute the main CLI handler
11
+ run().catch(err => {
12
+ console.error(`āŒ Fatal Error: ${err.message}`);
13
+ process.exit(1);
14
+ });
package/bin/setup.js CHANGED
@@ -65,21 +65,18 @@ async function copyWithPrompt(src, dest, label) {
65
65
  async function installSkills() {
66
66
  log('\nšŸ“¦ Installing YAMO Skills...', 'blue');
67
67
 
68
- const claudeSkillsDir = join(homedir(), '.claude', 'skills', 'memory-mesh');
69
-
70
- // Check if Claude Code is installed
71
- const claudeDir = join(homedir(), '.claude');
72
- if (!existsSync(claudeDir)) {
73
- log('⚠ Claude Code not detected (~/.claude not found)', 'yellow');
74
- log(' Skills will be skipped. Install Claude Code first.', 'yellow');
75
- return { installed: 0, skipped: 0 };
76
- }
77
-
78
- // Create skills directory
79
- if (!existsSync(claudeSkillsDir)) {
80
- mkdirSync(claudeSkillsDir, { recursive: true });
81
- log(` āœ“ Created ${claudeSkillsDir}`, 'green');
82
- }
68
+ const targetDirs = [
69
+ {
70
+ name: 'Claude Code',
71
+ base: join(homedir(), '.claude'),
72
+ skills: join(homedir(), '.claude', 'skills', 'yamo-super')
73
+ },
74
+ {
75
+ name: 'Gemini CLI',
76
+ base: join(homedir(), '.gemini'),
77
+ skills: join(homedir(), '.gemini', 'skills', 'yamo-super')
78
+ }
79
+ ];
83
80
 
84
81
  const skillsSourceDir = join(packageRoot, 'skills');
85
82
  if (!existsSync(skillsSourceDir)) {
@@ -87,23 +84,41 @@ async function installSkills() {
87
84
  return { installed: 0, skipped: 0 };
88
85
  }
89
86
 
90
- // Copy all skill files
91
- const skillFiles = readdirSync(skillsSourceDir).filter(f =>
92
- f.endsWith('.md') || f.endsWith('.yamo')
93
- );
87
+ const skillFiles = readdirSync(skillsSourceDir);
88
+ let totalInstalled = 0;
89
+ let totalSkipped = 0;
90
+ let detectedCount = 0;
94
91
 
95
- let installed = 0;
96
- let skipped = 0;
92
+ for (const target of targetDirs) {
93
+ // Check if the CLI environment is detected
94
+ if (!existsSync(target.base)) {
95
+ continue;
96
+ }
97
97
 
98
- for (const file of skillFiles) {
99
- const src = join(skillsSourceDir, file);
100
- const dest = join(claudeSkillsDir, file);
101
- const success = await copyWithPrompt(src, dest, file);
102
- if (success) installed++;
103
- else skipped++;
98
+ detectedCount++;
99
+ log(` Installing to ${target.name}...`, 'blue');
100
+
101
+ // Create skills directory
102
+ if (!existsSync(target.skills)) {
103
+ mkdirSync(target.skills, { recursive: true });
104
+ log(` āœ“ Created ${target.skills}`, 'green');
105
+ }
106
+
107
+ for (const file of skillFiles) {
108
+ const src = join(skillsSourceDir, file);
109
+ const dest = join(target.skills, file);
110
+ const success = await copyWithPrompt(src, dest, `${target.name}: ${file}`);
111
+ if (success) totalInstalled++;
112
+ else totalSkipped++;
113
+ }
104
114
  }
105
115
 
106
- return { installed, skipped };
116
+ if (detectedCount === 0) {
117
+ log('⚠ No supported AI environment detected (~/.claude or ~/.gemini not found)', 'yellow');
118
+ log(' Skills will be skipped.', 'yellow');
119
+ }
120
+
121
+ return { installed: totalInstalled, skipped: totalSkipped };
107
122
  }
108
123
 
109
124
  async function installTools() {
@@ -147,11 +162,13 @@ function showUsage() {
147
162
  const pkg = JSON.parse(readFileSync(join(packageRoot, 'package.json'), 'utf-8'));
148
163
 
149
164
  log('\n✨ Setup Complete!', 'bright');
150
- log('\nYAMO Skills installed to: ~/.claude/skills/memory-mesh/', 'blue');
165
+ log('\nYAMO Skills installed to AI CLI environments:', 'blue');
166
+ log(' • ~/.claude/skills/yamo-super/', 'blue');
167
+ log(' • ~/.gemini/skills/yamo-super/', 'blue');
151
168
  log('Tools installed to: ./tools/', 'blue');
152
169
 
153
170
  log('\nšŸ“š Usage:', 'bright');
154
- log(' • Use /yamo-super in Claude Code for workflow automation');
171
+ log(' • Use /yamo-super in Claude or Gemini for workflow automation');
155
172
  log(' • Use /scrubber skill for content sanitization');
156
173
  log(' • Call tools/memory_mesh.js for memory operations');
157
174
 
@@ -167,7 +184,7 @@ async function main() {
167
184
  log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•', 'bright');
168
185
 
169
186
  try {
170
- // Install skills to ~/.claude/skills/memory-mesh/
187
+ // Install skills to ~/.claude/skills/yamo-super/
171
188
  const skillResults = await installSkills();
172
189
 
173
190
  // Install tools to ./tools/
@@ -7,10 +7,11 @@ import EmbeddingService from "./service.js";
7
7
  import { ConfigurationError, EmbeddingError } from "../lancedb/errors.js";
8
8
 
9
9
  class EmbeddingFactory {
10
- constructor() {
10
+ constructor(ServiceClass = EmbeddingService) {
11
11
  this.primaryService = null;
12
12
  this.fallbackServices = [];
13
13
  this.configured = false;
14
+ this.ServiceClass = ServiceClass;
14
15
  }
15
16
 
16
17
  /**
@@ -22,10 +23,10 @@ class EmbeddingFactory {
22
23
  // Sort by priority (lower = higher priority)
23
24
  configs.sort((a, b) => a.priority - b.priority);
24
25
 
25
- this.primaryService = new EmbeddingService(configs[0]);
26
+ this.primaryService = new this.ServiceClass(configs[0]);
26
27
 
27
28
  if (configs.length > 1) {
28
- this.fallbackServices = configs.slice(1).map(c => new EmbeddingService(c));
29
+ this.fallbackServices = configs.slice(1).map(c => new this.ServiceClass(c));
29
30
  }
30
31
 
31
32
  this.configured = true;
@@ -28,6 +28,7 @@ class LanceDBClient {
28
28
  * @param {number} [config.maxRetries] - Maximum connection retries (default: 3)
29
29
  * @param {number} [config.retryDelay] - Delay between retries in ms (default: 1000)
30
30
  * @param {number} [config.vectorDimension] - Vector dimension for embeddings (default: 384)
31
+ * @param {Object} [config.driver] - LanceDB driver instance (for testing)
31
32
  */
32
33
  constructor(config = {}) {
33
34
  this.uri = (config && config.uri) || process.env.LANCEDB_URI || './data/lancedb';
@@ -35,6 +36,7 @@ class LanceDBClient {
35
36
  this.maxRetries = (config && config.maxRetries) || 3;
36
37
  this.retryDelay = (config && config.retryDelay) || 1000;
37
38
  this.vectorDimension = (config && config.vectorDimension) || DEFAULT_VECTOR_DIMENSION;
39
+ this.driver = (config && config.driver) || lancedb;
38
40
 
39
41
  // Connection state
40
42
  this.db = null;
@@ -66,7 +68,7 @@ class LanceDBClient {
66
68
  }
67
69
 
68
70
  // Connect to database
69
- this.db = await lancedb.connect(this.uri);
71
+ this.db = await this.driver.connect(this.uri);
70
72
 
71
73
  // Initialize table with dynamic dimension (creates if doesn't exist, opens if it does)
72
74
  this.table = await createMemoryTableWithDimension(this.db, this.tableName, this.vectorDimension);
@@ -21,6 +21,7 @@ import { getEmbeddingDimension } from "../lancedb/schema.js";
21
21
  import { handleError, StorageError, QueryError } from "../lancedb/errors.js";
22
22
  import EmbeddingFactory from "../embeddings/factory.js";
23
23
  import { Scrubber } from "../scrubber/scrubber.js";
24
+ import { KeywordSearch } from "../search/keyword-search.js";
24
25
 
25
26
  /**
26
27
  * MemoryMesh class for managing vector memory storage
@@ -33,6 +34,7 @@ class MemoryMesh {
33
34
  this.client = null;
34
35
  this.config = null;
35
36
  this.embeddingFactory = new EmbeddingFactory();
37
+ this.keywordSearch = new KeywordSearch();
36
38
  this.isInitialized = false;
37
39
  this.vectorDimension = 384; // Will be set during init()
38
40
 
@@ -221,11 +223,23 @@ class MemoryMesh {
221
223
  this.embeddingFactory.configure(embeddingConfigs);
222
224
  await this.embeddingFactory.init();
223
225
 
226
+ // Hydrate Keyword Search (In-Memory)
227
+ // Note: This is efficient for small datasets (< 10k).
228
+ // For larger, we should persist the inverted index or use LanceDB FTS.
229
+ if (this.client) {
230
+ try {
231
+ const allRecords = await this.client.getAll({ limit: 10000 });
232
+ this.keywordSearch.load(allRecords);
233
+ } catch (e) {
234
+ // Ignore if table doesn't exist yet
235
+ }
236
+ }
237
+
224
238
  this.isInitialized = true;
225
239
 
226
240
  } catch (error) {
227
241
  const e = error instanceof Error ? error : new Error(String(error));
228
- throw handleError(e, { context: 'MemoryMesh.init' });
242
+ throw e;
229
243
  }
230
244
  }
231
245
 
@@ -238,6 +252,10 @@ class MemoryMesh {
238
252
  async add(content, metadata = {}) {
239
253
  await this.init();
240
254
 
255
+ // Default to 'event' if no type provided
256
+ const type = metadata.type || 'event';
257
+ const enrichedMetadata = { ...metadata, type };
258
+
241
259
  try {
242
260
  // Layer 0: Scrubber Sanitization
243
261
  let processedContent = content;
@@ -272,7 +290,7 @@ class MemoryMesh {
272
290
 
273
291
  // Validate and sanitize inputs (legacy check)
274
292
  const sanitizedContent = this._sanitizeContent(processedContent);
275
- const sanitizedMetadata = this._validateMetadata({ ...metadata, ...scrubbedMetadata });
293
+ const sanitizedMetadata = this._validateMetadata({ ...enrichedMetadata, ...scrubbedMetadata });
276
294
 
277
295
  // Generate ID
278
296
  const id = `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
@@ -293,6 +311,9 @@ class MemoryMesh {
293
311
  if (!this.client) throw new Error('Database client not initialized');
294
312
  const result = await this.client.add(record);
295
313
 
314
+ // Add to Keyword Search
315
+ this.keywordSearch.add(record.id, record.content, sanitizedMetadata);
316
+
296
317
  return {
297
318
  id: result.id,
298
319
  content: sanitizedContent,
@@ -303,10 +324,42 @@ class MemoryMesh {
303
324
 
304
325
  } catch (error) {
305
326
  const e = error instanceof Error ? error : new Error(String(error));
306
- throw handleError(e, { context: 'MemoryMesh.add' });
327
+ throw e;
307
328
  }
308
329
  }
309
330
 
331
+ /**
332
+ * Reflect on recent memories to generate insights
333
+ * @param {Object} options
334
+ * @returns {Promise<Object>} Reflection prompt and context
335
+ */
336
+ async reflect(options = {}) {
337
+ await this.init();
338
+ const lookback = options.lookback || 10;
339
+ const topic = options.topic;
340
+
341
+ let memories = [];
342
+ if (topic) {
343
+ memories = await this.search(topic, { limit: lookback });
344
+ } else {
345
+ const all = await this.getAll();
346
+ memories = all
347
+ .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
348
+ .slice(0, lookback);
349
+ }
350
+
351
+ return {
352
+ topic,
353
+ count: memories.length,
354
+ context: memories.map(m => ({
355
+ content: m.content,
356
+ type: m.metadata?.type || 'event',
357
+ id: m.id
358
+ })),
359
+ prompt: `Review these memories. Synthesize a high-level "belief" or "observation".`
360
+ };
361
+ }
362
+
310
363
  /**
311
364
  * Add multiple memory entries in batch for efficiency
312
365
  * @param {Array<{content: string, metadata?: Object}>} entries - Array of entries to add
@@ -377,7 +430,7 @@ class MemoryMesh {
377
430
 
378
431
  } catch (error) {
379
432
  const e = error instanceof Error ? error : new Error(String(error));
380
- throw handleError(e, { context: 'MemoryMesh.addBatch', count: entries.length });
433
+ throw e;
381
434
  }
382
435
  }
383
436
 
@@ -411,34 +464,68 @@ class MemoryMesh {
411
464
  // Generate embedding using EmbeddingFactory
412
465
  const vector = await this.embeddingFactory.embed(query);
413
466
 
414
- // Perform semantic search
467
+ // 1. Vector Search
415
468
  if (!this.client) throw new Error('Database client not initialized');
416
- const results = await this.client.search(vector, {
417
- limit,
469
+ const vectorResults = await this.client.search(vector, {
470
+ limit: limit * 2, // Fetch more for re-ranking
418
471
  metric: 'cosine',
419
472
  filter
420
473
  });
421
474
 
422
- // Format results
423
- const formattedResults = results.map(result => ({
424
- id: result.id,
425
- content: result.content,
426
- metadata: result.metadata,
427
- score: result.score,
428
- created_at: result.created_at
429
- }));
475
+ // 2. Keyword Search
476
+ const keywordResults = this.keywordSearch.search(query, { limit: limit * 2 });
477
+
478
+ // 3. Reciprocal Rank Fusion (RRF)
479
+ const k = 60; // RRF constant
480
+ const scores = new Map(); // id -> score
481
+ const docMap = new Map(); // id -> doc
482
+
483
+ // Process Vector Results
484
+ vectorResults.forEach((doc, rank) => {
485
+ const rrf = 1 / (k + rank + 1);
486
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
487
+ docMap.set(doc.id, doc);
488
+ });
489
+
490
+ // Process Keyword Results
491
+ keywordResults.forEach((doc, rank) => {
492
+ const rrf = 1 / (k + rank + 1);
493
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
494
+
495
+ if (!docMap.has(doc.id)) {
496
+ // Add keyword-only match
497
+ docMap.set(doc.id, {
498
+ id: doc.id,
499
+ content: doc.content,
500
+ metadata: doc.metadata,
501
+ score: 0, // Base score, will be overwritten
502
+ created_at: new Date().toISOString() // Approximate or missing
503
+ });
504
+ }
505
+ });
506
+
507
+ // Sort by RRF score
508
+ const mergedResults = Array.from(scores.entries())
509
+ .sort((a, b) => b[1] - a[1])
510
+ .slice(0, limit)
511
+ .map(([id, score]) => {
512
+ const doc = docMap.get(id);
513
+ if (doc) return { ...doc, score };
514
+ return null;
515
+ })
516
+ .filter(d => d !== null);
430
517
 
431
518
  // Cache the result (unless disabled)
432
519
  if (useCache) {
433
520
  const cacheKey = this._generateCacheKey(query, { limit, filter });
434
- this._cacheResult(cacheKey, formattedResults);
521
+ this._cacheResult(cacheKey, mergedResults);
435
522
  }
436
523
 
437
- return formattedResults;
524
+ return mergedResults;
438
525
 
439
526
  } catch (error) {
440
527
  const e = error instanceof Error ? error : new Error(String(error));
441
- throw handleError(e, { context: 'MemoryMesh.search', query });
528
+ throw e;
442
529
  }
443
530
  }
444
531
 
@@ -469,7 +556,7 @@ class MemoryMesh {
469
556
 
470
557
  } catch (error) {
471
558
  const e = error instanceof Error ? error : new Error(String(error));
472
- throw handleError(e, { context: 'MemoryMesh.get', id });
559
+ throw e;
473
560
  }
474
561
  }
475
562
 
@@ -486,7 +573,7 @@ class MemoryMesh {
486
573
  return await this.client.getAll(options);
487
574
  } catch (error) {
488
575
  const e = error instanceof Error ? error : new Error(String(error));
489
- throw handleError(e, { context: 'MemoryMesh.getAll' });
576
+ throw e;
490
577
  }
491
578
  }
492
579
 
@@ -548,7 +635,7 @@ class MemoryMesh {
548
635
 
549
636
  } catch (error) {
550
637
  const e = error instanceof Error ? error : new Error(String(error));
551
- throw handleError(e, { context: 'MemoryMesh.update', id });
638
+ throw e;
552
639
  }
553
640
  }
554
641
 
@@ -564,6 +651,9 @@ class MemoryMesh {
564
651
  if (!this.client) throw new Error('Database client not initialized');
565
652
  const result = await this.client.delete(id);
566
653
 
654
+ // Remove from Keyword Search
655
+ this.keywordSearch.remove(id);
656
+
567
657
  return {
568
658
  deleted: result.id,
569
659
  success: result.success
@@ -572,7 +662,7 @@ class MemoryMesh {
572
662
 
573
663
  } catch (error) {
574
664
  const e = error instanceof Error ? error : new Error(String(error));
575
- throw handleError(e, { context: 'MemoryMesh.delete', id });
665
+ throw e;
576
666
  }
577
667
  }
578
668
 
@@ -599,7 +689,7 @@ class MemoryMesh {
599
689
 
600
690
  } catch (error) {
601
691
  const e = error instanceof Error ? error : new Error(String(error));
602
- throw handleError(e, { context: 'MemoryMesh.stats' });
692
+ throw e;
603
693
  }
604
694
  }
605
695
 
@@ -803,7 +893,7 @@ async function run() {
803
893
 
804
894
  try {
805
895
  // Route to appropriate action
806
- if (action === 'ingest') {
896
+ if (action === 'ingest' || action === 'store') {
807
897
  // Validate required fields
808
898
  if (!input.content) {
809
899
  console.error('āŒ Error: "content" field is required for ingest action');
@@ -866,6 +956,14 @@ ${jsonResult}
866
956
  console.log(`[MemoryMesh] Deleted record ${result.deleted}`);
867
957
  console.log(JSON.stringify({ status: "ok", ...result }));
868
958
 
959
+ } else if (action === 'export') {
960
+ const records = await mesh.getAll({ limit: input.limit || 10000 });
961
+ console.log(JSON.stringify({ status: "ok", count: records.length, records }));
962
+
963
+ } else if (action === 'reflect') {
964
+ const result = await mesh.reflect({ topic: input.topic, lookback: input.limit });
965
+ console.log(JSON.stringify({ status: "ok", ...result }));
966
+
869
967
  } else if (action === 'stats') {
870
968
  const stats = await mesh.stats();
871
969
  console.log('[MemoryMesh] Database Statistics:');
@@ -896,8 +994,8 @@ ${jsonResult}
896
994
  }
897
995
  }
898
996
 
899
- // Export for testing
900
- export { MemoryMesh };
997
+ // Export for testing and CLI usage
998
+ export { MemoryMesh, run };
901
999
  export default MemoryMesh;
902
1000
 
903
1001
  // Run CLI if called directly
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Simple Keyword Search Engine (In-Memory)
3
+ * Provides basic TF-IDF style retrieval to complement vector search
4
+ */
5
+
6
+ export class KeywordSearch {
7
+ constructor() {
8
+ this.index = new Map(); // token -> Map<docId, tf>
9
+ this.docLengths = new Map(); // docId -> length
10
+ this.idf = new Map(); // token -> idf value
11
+ this.docs = new Map(); // docId -> content (optional, for snippet)
12
+ this.isDirty = false;
13
+ }
14
+
15
+ /**
16
+ * Tokenize text into normalized terms
17
+ * @param {string} text
18
+ * @returns {string[]} tokens
19
+ */
20
+ tokenize(text) {
21
+ if (!text) return [];
22
+ return text.toLowerCase()
23
+ .replace(/[^\w\s]/g, '') // Remove punctuation
24
+ .split(/\s+/)
25
+ .filter(t => t.length > 2) // Filter stopwords/short
26
+ .map(t => t.substring(0, 20)); // Truncate
27
+ }
28
+
29
+ /**
30
+ * Add a document to the index
31
+ * @param {string} id
32
+ * @param {string} content
33
+ * @param {Object} [metadata]
34
+ */
35
+ add(id, content, metadata = {}) {
36
+ const tokens = this.tokenize(content);
37
+ const termFreqs = new Map();
38
+
39
+ tokens.forEach(t => {
40
+ termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
41
+ });
42
+
43
+ this.docLengths.set(id, tokens.length);
44
+ this.docs.set(id, { content, metadata });
45
+
46
+ // Update index
47
+ for (const [token, freq] of termFreqs.entries()) {
48
+ if (!this.index.has(token)) {
49
+ this.index.set(token, new Map());
50
+ }
51
+ this.index.get(token).set(id, freq);
52
+ }
53
+
54
+ this.isDirty = true;
55
+ }
56
+
57
+ /**
58
+ * Remove a document
59
+ * @param {string} id
60
+ */
61
+ remove(id) {
62
+ this.docLengths.delete(id);
63
+ this.docs.delete(id);
64
+
65
+ // This is expensive O(Vocab), but okay for small scale
66
+ for (const docMap of this.index.values()) {
67
+ docMap.delete(id);
68
+ }
69
+ this.isDirty = true;
70
+ }
71
+
72
+ /**
73
+ * Recalculate IDF scores
74
+ */
75
+ _computeStats() {
76
+ if (!this.isDirty) return;
77
+
78
+ const N = this.docLengths.size;
79
+ this.idf.clear();
80
+
81
+ for (const [token, docMap] of this.index.entries()) {
82
+ const df = docMap.size;
83
+ // Standard IDF: log(N / (df + 1)) + 1
84
+ const idf = Math.log(N / (df + 1)) + 1;
85
+ this.idf.set(token, idf);
86
+ }
87
+
88
+ this.isDirty = false;
89
+ }
90
+
91
+ /**
92
+ * Search for query terms
93
+ * @param {string} query
94
+ * @param {Object} options
95
+ * @returns {Array<{id: string, score: number, matches: string[]}>}
96
+ */
97
+ search(query, options = {}) {
98
+ this._computeStats();
99
+
100
+ const tokens = this.tokenize(query);
101
+ const scores = new Map(); // docId -> score
102
+ const matches = new Map(); // docId -> matched tokens
103
+
104
+ const limit = options.limit || 10;
105
+
106
+ for (const token of tokens) {
107
+ const docMap = this.index.get(token);
108
+ if (!docMap) continue;
109
+
110
+ const idf = this.idf.get(token) || 0;
111
+
112
+ for (const [docId, tf] of docMap.entries()) {
113
+ // TF-IDF Score
114
+ // Score = tf * idf * (normalization?)
115
+ // Simple variant:
116
+ const score = tf * idf;
117
+
118
+ scores.set(docId, (scores.get(docId) || 0) + score);
119
+
120
+ if (!matches.has(docId)) matches.set(docId, []);
121
+ matches.get(docId).push(token);
122
+ }
123
+ }
124
+
125
+ // Convert to array and sort
126
+ return Array.from(scores.entries())
127
+ .map(([id, score]) => ({
128
+ id,
129
+ score,
130
+ matches: matches.get(id) || [],
131
+ ...this.docs.get(id)
132
+ }))
133
+ .sort((a, b) => b.score - a.score)
134
+ .slice(0, limit);
135
+ }
136
+
137
+ /**
138
+ * Bulk load records
139
+ * @param {Array} records
140
+ */
141
+ load(records) {
142
+ records.forEach(r => this.add(r.id, r.content, r.metadata));
143
+ }
144
+ }