@yamo/memory-mesh 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,12 +15,16 @@
15
15
 
16
16
  import { fileURLToPath } from 'url';
17
17
  import fs from "fs";
18
+ import crypto from "crypto";
18
19
  import { LanceDBClient } from "../lancedb/client.js";
19
20
  import { getConfig } from "../lancedb/config.js";
20
21
  import { getEmbeddingDimension } from "../lancedb/schema.js";
21
22
  import { handleError, StorageError, QueryError } from "../lancedb/errors.js";
22
23
  import EmbeddingFactory from "../embeddings/factory.js";
23
24
  import { Scrubber } from "../scrubber/scrubber.js";
25
+ import { KeywordSearch } from "../search/keyword-search.js";
26
+ import { YamoEmitter } from "../yamo/emitter.js";
27
+ import { LLMClient } from "../llm/client.js";
24
28
 
25
29
  /**
26
30
  * MemoryMesh class for managing vector memory storage
@@ -28,16 +32,40 @@ import { Scrubber } from "../scrubber/scrubber.js";
28
32
  class MemoryMesh {
29
33
  /**
30
34
  * Create a new MemoryMesh instance
35
+ * @param {Object} [options={}] - Configuration options
36
+ * @param {boolean} [options.enableYamo=true] - Enable YAMO block emission
37
+ * @param {boolean} [options.enableLLM=true] - Enable LLM for reflections
38
+ * @param {string} [options.agentId='default'] - Agent identifier for YAMO blocks
39
+ * @param {string} [options.llmProvider] - LLM provider (openai, anthropic, ollama)
40
+ * @param {string} [options.llmApiKey] - LLM API key
41
+ * @param {string} [options.llmModel] - LLM model name
31
42
  */
32
- constructor() {
43
+ constructor(options = {}) {
33
44
  this.client = null;
34
45
  this.config = null;
35
46
  this.embeddingFactory = new EmbeddingFactory();
47
+ this.keywordSearch = new KeywordSearch();
36
48
  this.isInitialized = false;
37
49
  this.vectorDimension = 384; // Will be set during init()
38
50
 
51
+ // YAMO and LLM support
52
+ this.enableYamo = options.enableYamo !== false; // Default: true
53
+ this.enableLLM = options.enableLLM !== false; // Default: true
54
+ this.agentId = options.agentId || 'default';
55
+ this.yamoTable = null; // Will be initialized in init()
56
+ this.llmClient = null;
57
+
58
+ // Initialize LLM client if enabled
59
+ if (this.enableLLM) {
60
+ this.llmClient = new LLMClient({
61
+ provider: options.llmProvider,
62
+ apiKey: options.llmApiKey,
63
+ model: options.llmModel
64
+ });
65
+ }
66
+
39
67
  // Scrubber for Layer 0 sanitization
40
- this.scrubber = new Scrubber({
68
+ this.scrubber = new Scrubber({
41
69
  enabled: true,
42
70
  chunking: {
43
71
  minTokens: 1 // Allow short memories
@@ -221,11 +249,37 @@ class MemoryMesh {
221
249
  this.embeddingFactory.configure(embeddingConfigs);
222
250
  await this.embeddingFactory.init();
223
251
 
252
+ // Hydrate Keyword Search (In-Memory)
253
+ // Note: This is efficient for small datasets (< 10k).
254
+ // For larger, we should persist the inverted index or use LanceDB FTS.
255
+ if (this.client) {
256
+ try {
257
+ const allRecords = await this.client.getAll({ limit: 10000 });
258
+ this.keywordSearch.load(allRecords);
259
+ } catch (e) {
260
+ // Ignore if table doesn't exist yet
261
+ }
262
+ }
263
+
264
+ // Initialize YAMO blocks table if enabled
265
+ if (this.enableYamo && this.client && this.client.db) {
266
+ try {
267
+ const { createYamoTable } = await import('../yamo/schema.js');
268
+ this.yamoTable = await createYamoTable(this.client.db, 'yamo_blocks');
269
+ if (process.env.YAMO_DEBUG === 'true') {
270
+ console.error('[MemoryMesh] YAMO blocks table initialized');
271
+ }
272
+ } catch (e) {
273
+ // Log warning but don't fail initialization
274
+ console.warn('[MemoryMesh] Failed to initialize YAMO table:', e instanceof Error ? e.message : String(e));
275
+ }
276
+ }
277
+
224
278
  this.isInitialized = true;
225
279
 
226
280
  } catch (error) {
227
281
  const e = error instanceof Error ? error : new Error(String(error));
228
- throw handleError(e, { context: 'MemoryMesh.init' });
282
+ throw e;
229
283
  }
230
284
  }
231
285
 
@@ -238,6 +292,10 @@ class MemoryMesh {
238
292
  async add(content, metadata = {}) {
239
293
  await this.init();
240
294
 
295
+ // Default to 'event' if no type provided
296
+ const type = metadata.type || 'event';
297
+ const enrichedMetadata = { ...metadata, type };
298
+
241
299
  try {
242
300
  // Layer 0: Scrubber Sanitization
243
301
  let processedContent = content;
@@ -272,7 +330,7 @@ class MemoryMesh {
272
330
 
273
331
  // Validate and sanitize inputs (legacy check)
274
332
  const sanitizedContent = this._sanitizeContent(processedContent);
275
- const sanitizedMetadata = this._validateMetadata({ ...metadata, ...scrubbedMetadata });
333
+ const sanitizedMetadata = this._validateMetadata({ ...enrichedMetadata, ...scrubbedMetadata });
276
334
 
277
335
  // Generate ID
278
336
  const id = `mem_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
@@ -293,6 +351,25 @@ class MemoryMesh {
293
351
  if (!this.client) throw new Error('Database client not initialized');
294
352
  const result = await this.client.add(record);
295
353
 
354
+ // Add to Keyword Search
355
+ this.keywordSearch.add(record.id, record.content, sanitizedMetadata);
356
+
357
+ // Emit YAMO block for retain operation (async, non-blocking)
358
+ if (this.enableYamo) {
359
+ // Fire and forget - don't await
360
+ this._emitYamoBlock('retain', result.id, YamoEmitter.buildRetainBlock({
361
+ content: sanitizedContent,
362
+ metadata: sanitizedMetadata,
363
+ id: result.id,
364
+ agentId: this.agentId,
365
+ memoryType: sanitizedMetadata.type || 'event'
366
+ })).catch(err => {
367
+ if (process.env.YAMO_DEBUG === 'true') {
368
+ console.error('[MemoryMesh] YAMO emission failed in add():', err);
369
+ }
370
+ });
371
+ }
372
+
296
373
  return {
297
374
  id: result.id,
298
375
  content: sanitizedContent,
@@ -303,7 +380,142 @@ class MemoryMesh {
303
380
 
304
381
  } catch (error) {
305
382
  const e = error instanceof Error ? error : new Error(String(error));
306
- throw handleError(e, { context: 'MemoryMesh.add' });
383
+ throw e;
384
+ }
385
+ }
386
+
387
+ /**
388
+ * Reflect on recent memories to generate insights (enhanced with LLM + YAMO)
389
+ * @param {Object} options
390
+ * @param {string} [options.topic] - Topic to search for
391
+ * @param {number} [options.lookback=10] - Number of memories to consider
392
+ * @param {boolean} [options.generate=true] - Whether to generate reflection via LLM
393
+ * @returns {Promise<Object>} Reflection result with YAMO block
394
+ */
395
+ async reflect(options = {}) {
396
+ await this.init();
397
+
398
+ const lookback = options.lookback || 10;
399
+ const topic = options.topic;
400
+ const generate = options.generate !== false;
401
+
402
+ // Gather memories
403
+ let memories = [];
404
+ if (topic) {
405
+ memories = await this.search(topic, { limit: lookback });
406
+ } else {
407
+ const all = await this.getAll();
408
+ memories = all
409
+ .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime())
410
+ .slice(0, lookback);
411
+ }
412
+
413
+ const prompt = `Review these memories. Synthesize a high-level "belief" or "observation".`;
414
+
415
+ // Check if LLM generation is requested and available
416
+ if (!generate || !this.enableLLM || !this.llmClient) {
417
+ // Return prompt-only mode (backward compatible)
418
+ return {
419
+ topic,
420
+ count: memories.length,
421
+ context: memories.map(m => ({
422
+ content: m.content,
423
+ type: m.metadata?.type || 'event',
424
+ id: m.id
425
+ })),
426
+ prompt
427
+ };
428
+ }
429
+
430
+ // Generate reflection via LLM
431
+ let reflection = null;
432
+ let confidence = 0;
433
+
434
+ try {
435
+ const result = await this.llmClient.reflect(prompt, memories);
436
+ reflection = result.reflection;
437
+ confidence = result.confidence;
438
+ } catch (error) {
439
+ const errorMessage = error instanceof Error ? error.message : String(error);
440
+ console.warn(`[MemoryMesh] LLM reflection failed: ${errorMessage}`);
441
+ // Fall back to simple aggregation
442
+ reflection = `Aggregated from ${memories.length} memories on topic: ${topic || 'general'}`;
443
+ confidence = 0.5;
444
+ }
445
+
446
+ // Store reflection to memory
447
+ const reflectionId = `reflect_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
448
+ await this.add(reflection, {
449
+ type: 'reflection',
450
+ topic: topic || 'general',
451
+ source_memory_count: memories.length,
452
+ confidence,
453
+ generated_at: new Date().toISOString()
454
+ });
455
+
456
+ // Emit YAMO block if enabled
457
+ let yamoBlock = null;
458
+ if (this.enableYamo) {
459
+ yamoBlock = YamoEmitter.buildReflectBlock({
460
+ topic: topic || 'general',
461
+ memoryCount: memories.length,
462
+ agentId: this.agentId,
463
+ reflection,
464
+ confidence
465
+ });
466
+
467
+ await this._emitYamoBlock('reflect', reflectionId, yamoBlock);
468
+ }
469
+
470
+ return {
471
+ id: reflectionId,
472
+ topic: topic || 'general',
473
+ reflection,
474
+ confidence,
475
+ sourceMemoryCount: memories.length,
476
+ yamoBlock,
477
+ createdAt: new Date().toISOString()
478
+ };
479
+ }
480
+
481
+ /**
482
+ * Emit a YAMO block to the YAMO blocks table
483
+ * @private
484
+ * @param {string} operationType - 'retain', 'recall', 'reflect'
485
+ * @param {string|undefined} memoryId - Associated memory ID (undefined for recall)
486
+ * @param {string} yamoText - The YAMO block text
487
+ */
488
+ async _emitYamoBlock(operationType, memoryId, yamoText) {
489
+ if (!this.yamoTable) {
490
+ if (process.env.YAMO_DEBUG === 'true') {
491
+ console.warn('[MemoryMesh] YAMO table not initialized, skipping emission');
492
+ }
493
+ return;
494
+ }
495
+
496
+ const yamoId = `yamo_${operationType}_${Date.now()}_${crypto.randomBytes(4).toString('hex')}`;
497
+
498
+ try {
499
+ await this.yamoTable.add([{
500
+ id: yamoId,
501
+ agent_id: this.agentId,
502
+ operation_type: operationType,
503
+ yamo_text: yamoText,
504
+ timestamp: new Date(),
505
+ block_hash: null, // Future: blockchain anchoring
506
+ prev_hash: null,
507
+ metadata: JSON.stringify({
508
+ memory_id: memoryId || null,
509
+ timestamp: new Date().toISOString()
510
+ })
511
+ }]);
512
+
513
+ if (process.env.YAMO_DEBUG === 'true') {
514
+ console.log(`[MemoryMesh] YAMO block emitted: ${yamoId}`);
515
+ }
516
+ } catch (error) {
517
+ const errorMessage = error instanceof Error ? error.message : String(error);
518
+ console.error(`[MemoryMesh] Failed to emit YAMO block: ${errorMessage}`);
307
519
  }
308
520
  }
309
521
 
@@ -377,7 +589,7 @@ class MemoryMesh {
377
589
 
378
590
  } catch (error) {
379
591
  const e = error instanceof Error ? error : new Error(String(error));
380
- throw handleError(e, { context: 'MemoryMesh.addBatch', count: entries.length });
592
+ throw e;
381
593
  }
382
594
  }
383
595
 
@@ -411,34 +623,83 @@ class MemoryMesh {
411
623
  // Generate embedding using EmbeddingFactory
412
624
  const vector = await this.embeddingFactory.embed(query);
413
625
 
414
- // Perform semantic search
626
+ // 1. Vector Search
415
627
  if (!this.client) throw new Error('Database client not initialized');
416
- const results = await this.client.search(vector, {
417
- limit,
628
+ const vectorResults = await this.client.search(vector, {
629
+ limit: limit * 2, // Fetch more for re-ranking
418
630
  metric: 'cosine',
419
631
  filter
420
632
  });
421
633
 
422
- // Format results
423
- const formattedResults = results.map(result => ({
424
- id: result.id,
425
- content: result.content,
426
- metadata: result.metadata,
427
- score: result.score,
428
- created_at: result.created_at
429
- }));
634
+ // 2. Keyword Search
635
+ const keywordResults = this.keywordSearch.search(query, { limit: limit * 2 });
636
+
637
+ // 3. Reciprocal Rank Fusion (RRF)
638
+ const k = 60; // RRF constant
639
+ const scores = new Map(); // id -> score
640
+ const docMap = new Map(); // id -> doc
641
+
642
+ // Process Vector Results
643
+ vectorResults.forEach((doc, rank) => {
644
+ const rrf = 1 / (k + rank + 1);
645
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
646
+ docMap.set(doc.id, doc);
647
+ });
648
+
649
+ // Process Keyword Results
650
+ keywordResults.forEach((doc, rank) => {
651
+ const rrf = 1 / (k + rank + 1);
652
+ scores.set(doc.id, (scores.get(doc.id) || 0) + rrf);
653
+
654
+ if (!docMap.has(doc.id)) {
655
+ // Add keyword-only match
656
+ docMap.set(doc.id, {
657
+ id: doc.id,
658
+ content: doc.content,
659
+ metadata: doc.metadata,
660
+ score: 0, // Base score, will be overwritten
661
+ created_at: new Date().toISOString() // Approximate or missing
662
+ });
663
+ }
664
+ });
665
+
666
+ // Sort by RRF score
667
+ const mergedResults = Array.from(scores.entries())
668
+ .sort((a, b) => b[1] - a[1])
669
+ .slice(0, limit)
670
+ .map(([id, score]) => {
671
+ const doc = docMap.get(id);
672
+ if (doc) return { ...doc, score };
673
+ return null;
674
+ })
675
+ .filter(d => d !== null);
430
676
 
431
677
  // Cache the result (unless disabled)
432
678
  if (useCache) {
433
679
  const cacheKey = this._generateCacheKey(query, { limit, filter });
434
- this._cacheResult(cacheKey, formattedResults);
680
+ this._cacheResult(cacheKey, mergedResults);
681
+ }
682
+
683
+ // Emit YAMO block for recall operation (async, non-blocking)
684
+ if (this.enableYamo) {
685
+ this._emitYamoBlock('recall', undefined, YamoEmitter.buildRecallBlock({
686
+ query,
687
+ resultCount: mergedResults.length,
688
+ limit,
689
+ agentId: this.agentId,
690
+ searchType: 'hybrid'
691
+ })).catch(err => {
692
+ if (process.env.YAMO_DEBUG === 'true') {
693
+ console.error('[MemoryMesh] YAMO emission failed in search():', err);
694
+ }
695
+ });
435
696
  }
436
697
 
437
- return formattedResults;
698
+ return mergedResults;
438
699
 
439
700
  } catch (error) {
440
701
  const e = error instanceof Error ? error : new Error(String(error));
441
- throw handleError(e, { context: 'MemoryMesh.search', query });
702
+ throw e;
442
703
  }
443
704
  }
444
705
 
@@ -469,7 +730,7 @@ class MemoryMesh {
469
730
 
470
731
  } catch (error) {
471
732
  const e = error instanceof Error ? error : new Error(String(error));
472
- throw handleError(e, { context: 'MemoryMesh.get', id });
733
+ throw e;
473
734
  }
474
735
  }
475
736
 
@@ -486,7 +747,63 @@ class MemoryMesh {
486
747
  return await this.client.getAll(options);
487
748
  } catch (error) {
488
749
  const e = error instanceof Error ? error : new Error(String(error));
489
- throw handleError(e, { context: 'MemoryMesh.getAll' });
750
+ throw e;
751
+ }
752
+ }
753
+
754
+ /**
755
+ * Get YAMO blocks for this agent (audit trail)
756
+ * @param {Object} options - Query options
757
+ * @param {string} [options.operationType] - Filter by operation type ('retain', 'recall', 'reflect')
758
+ * @param {number} [options.limit=10] - Max results to return
759
+ * @returns {Promise<Array>} List of YAMO blocks
760
+ */
761
+ async getYamoLog(options = {}) {
762
+ if (!this.yamoTable) {
763
+ return [];
764
+ }
765
+
766
+ const limit = options.limit || 10;
767
+ const operationType = options.operationType;
768
+
769
+ try {
770
+ // Use search with empty vector to get all records, then filter
771
+ // This avoids using the protected execute() method
772
+ const allResults = [];
773
+
774
+ // Build query manually using the LanceDB table
775
+ // @ts-ignore - LanceDB types may not match exactly
776
+ const table = this.yamoTable;
777
+
778
+ // Get all records and filter
779
+ // @ts-ignore
780
+ const records = await table.query().limit(limit * 2).toArrow();
781
+
782
+ // Process Arrow table
783
+ for (const row of records) {
784
+ const opType = row.operationType;
785
+ if (!operationType || opType === operationType) {
786
+ allResults.push({
787
+ id: row.id,
788
+ agentId: row.agentId,
789
+ operationType: row.operationType,
790
+ yamoText: row.yamoText,
791
+ timestamp: row.timestamp,
792
+ blockHash: row.blockHash,
793
+ metadata: row.metadata ? JSON.parse(row.metadata) : null
794
+ });
795
+
796
+ if (allResults.length >= limit) {
797
+ break;
798
+ }
799
+ }
800
+ }
801
+
802
+ return allResults;
803
+ } catch (error) {
804
+ const errorMessage = error instanceof Error ? error.message : String(error);
805
+ console.error('[MemoryMesh] Failed to get YAMO log:', errorMessage);
806
+ return [];
490
807
  }
491
808
  }
492
809
 
@@ -548,7 +865,7 @@ class MemoryMesh {
548
865
 
549
866
  } catch (error) {
550
867
  const e = error instanceof Error ? error : new Error(String(error));
551
- throw handleError(e, { context: 'MemoryMesh.update', id });
868
+ throw e;
552
869
  }
553
870
  }
554
871
 
@@ -564,6 +881,9 @@ class MemoryMesh {
564
881
  if (!this.client) throw new Error('Database client not initialized');
565
882
  const result = await this.client.delete(id);
566
883
 
884
+ // Remove from Keyword Search
885
+ this.keywordSearch.remove(id);
886
+
567
887
  return {
568
888
  deleted: result.id,
569
889
  success: result.success
@@ -572,7 +892,7 @@ class MemoryMesh {
572
892
 
573
893
  } catch (error) {
574
894
  const e = error instanceof Error ? error : new Error(String(error));
575
- throw handleError(e, { context: 'MemoryMesh.delete', id });
895
+ throw e;
576
896
  }
577
897
  }
578
898
 
@@ -599,7 +919,7 @@ class MemoryMesh {
599
919
 
600
920
  } catch (error) {
601
921
  const e = error instanceof Error ? error : new Error(String(error));
602
- throw handleError(e, { context: 'MemoryMesh.stats' });
922
+ throw e;
603
923
  }
604
924
  }
605
925
 
@@ -803,7 +1123,7 @@ async function run() {
803
1123
 
804
1124
  try {
805
1125
  // Route to appropriate action
806
- if (action === 'ingest') {
1126
+ if (action === 'ingest' || action === 'store') {
807
1127
  // Validate required fields
808
1128
  if (!input.content) {
809
1129
  console.error('❌ Error: "content" field is required for ingest action');
@@ -866,6 +1186,36 @@ ${jsonResult}
866
1186
  console.log(`[MemoryMesh] Deleted record ${result.deleted}`);
867
1187
  console.log(JSON.stringify({ status: "ok", ...result }));
868
1188
 
1189
+ } else if (action === 'export') {
1190
+ const records = await mesh.getAll({ limit: input.limit || 10000 });
1191
+ console.log(JSON.stringify({ status: "ok", count: records.length, records }));
1192
+
1193
+ } else if (action === 'reflect') {
1194
+ // Enhanced reflect with LLM support
1195
+ const enableLLM = input.llm !== false; // Default true
1196
+ const result = await mesh.reflect({
1197
+ topic: input.topic,
1198
+ lookback: input.limit || 10,
1199
+ generate: enableLLM
1200
+ });
1201
+
1202
+ if (result.reflection) {
1203
+ // New format with LLM-generated reflection
1204
+ console.log(JSON.stringify({
1205
+ status: "ok",
1206
+ reflection: result.reflection,
1207
+ confidence: result.confidence,
1208
+ id: result.id,
1209
+ topic: result.topic,
1210
+ sourceMemoryCount: result.sourceMemoryCount,
1211
+ yamoBlock: result.yamoBlock,
1212
+ createdAt: result.createdAt
1213
+ }));
1214
+ } else {
1215
+ // Old format for backward compatibility (prompt-only mode)
1216
+ console.log(JSON.stringify({ status: "ok", ...result }));
1217
+ }
1218
+
869
1219
  } else if (action === 'stats') {
870
1220
  const stats = await mesh.stats();
871
1221
  console.log('[MemoryMesh] Database Statistics:');
@@ -896,8 +1246,8 @@ ${jsonResult}
896
1246
  }
897
1247
  }
898
1248
 
899
- // Export for testing
900
- export { MemoryMesh };
1249
+ // Export for testing and CLI usage
1250
+ export { MemoryMesh, run };
901
1251
  export default MemoryMesh;
902
1252
 
903
1253
  // Run CLI if called directly
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Simple Keyword Search Engine (In-Memory)
3
+ * Provides basic TF-IDF style retrieval to complement vector search
4
+ */
5
+
6
+ export class KeywordSearch {
7
+ constructor() {
8
+ this.index = new Map(); // token -> Map<docId, tf>
9
+ this.docLengths = new Map(); // docId -> length
10
+ this.idf = new Map(); // token -> idf value
11
+ this.docs = new Map(); // docId -> content (optional, for snippet)
12
+ this.isDirty = false;
13
+ }
14
+
15
+ /**
16
+ * Tokenize text into normalized terms
17
+ * @param {string} text
18
+ * @returns {string[]} tokens
19
+ */
20
+ tokenize(text) {
21
+ if (!text) return [];
22
+ return text.toLowerCase()
23
+ .replace(/[^\w\s]/g, '') // Remove punctuation
24
+ .split(/\s+/)
25
+ .filter(t => t.length > 2) // Filter stopwords/short
26
+ .map(t => t.substring(0, 20)); // Truncate
27
+ }
28
+
29
+ /**
30
+ * Add a document to the index
31
+ * @param {string} id
32
+ * @param {string} content
33
+ * @param {Object} [metadata]
34
+ */
35
+ add(id, content, metadata = {}) {
36
+ const tokens = this.tokenize(content);
37
+ const termFreqs = new Map();
38
+
39
+ tokens.forEach(t => {
40
+ termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
41
+ });
42
+
43
+ this.docLengths.set(id, tokens.length);
44
+ this.docs.set(id, { content, metadata });
45
+
46
+ // Update index
47
+ for (const [token, freq] of termFreqs.entries()) {
48
+ if (!this.index.has(token)) {
49
+ this.index.set(token, new Map());
50
+ }
51
+ this.index.get(token).set(id, freq);
52
+ }
53
+
54
+ this.isDirty = true;
55
+ }
56
+
57
+ /**
58
+ * Remove a document
59
+ * @param {string} id
60
+ */
61
+ remove(id) {
62
+ this.docLengths.delete(id);
63
+ this.docs.delete(id);
64
+
65
+ // This is expensive O(Vocab), but okay for small scale
66
+ for (const docMap of this.index.values()) {
67
+ docMap.delete(id);
68
+ }
69
+ this.isDirty = true;
70
+ }
71
+
72
+ /**
73
+ * Recalculate IDF scores
74
+ */
75
+ _computeStats() {
76
+ if (!this.isDirty) return;
77
+
78
+ const N = this.docLengths.size;
79
+ this.idf.clear();
80
+
81
+ for (const [token, docMap] of this.index.entries()) {
82
+ const df = docMap.size;
83
+ // Standard IDF: log(N / (df + 1)) + 1
84
+ const idf = Math.log(N / (df + 1)) + 1;
85
+ this.idf.set(token, idf);
86
+ }
87
+
88
+ this.isDirty = false;
89
+ }
90
+
91
+ /**
92
+ * Search for query terms
93
+ * @param {string} query
94
+ * @param {Object} options
95
+ * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
96
+ */
97
+ search(query, options = {}) {
98
+ this._computeStats();
99
+
100
+ const tokens = this.tokenize(query);
101
+ const scores = new Map(); // docId -> score
102
+ const matches = new Map(); // docId -> matched tokens
103
+
104
+ const limit = options.limit || 10;
105
+
106
+ for (const token of tokens) {
107
+ const docMap = this.index.get(token);
108
+ if (!docMap) continue;
109
+
110
+ const idf = this.idf.get(token) || 0;
111
+
112
+ for (const [docId, tf] of docMap.entries()) {
113
+ // TF-IDF Score
114
+ // Score = tf * idf * (normalization?)
115
+ // Simple variant:
116
+ const score = tf * idf;
117
+
118
+ scores.set(docId, (scores.get(docId) || 0) + score);
119
+
120
+ if (!matches.has(docId)) matches.set(docId, []);
121
+ matches.get(docId).push(token);
122
+ }
123
+ }
124
+
125
+ // Convert to array and sort
126
+ return Array.from(scores.entries())
127
+ .map(([id, score]) => ({
128
+ id,
129
+ score,
130
+ matches: matches.get(id) || [],
131
+ ...this.docs.get(id)
132
+ }))
133
+ .sort((a, b) => b.score - a.score)
134
+ .slice(0, limit);
135
+ }
136
+
137
+ /**
138
+ * Bulk load records
139
+ * @param {Array} records
140
+ */
141
+ load(records) {
142
+ records.forEach(r => this.add(r.id, r.content, r.metadata));
143
+ }
144
+ }