npm - @yamo/memory-mesh - Versions diffs - 2.3.2 → 3.0.0 - Mend

@yamo/memory-mesh 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/bin/memory_mesh.js +1 -1
package/lib/llm/client.d.ts +111 -0
package/lib/llm/client.js +299 -357
package/lib/llm/client.ts +413 -0
package/lib/llm/index.d.ts +17 -0
package/lib/llm/index.js +15 -8
package/lib/llm/index.ts +19 -0
package/lib/memory/adapters/client.d.ts +183 -0
package/lib/memory/adapters/client.js +518 -0
package/lib/memory/adapters/client.ts +678 -0
package/lib/memory/adapters/config.d.ts +137 -0
package/lib/memory/adapters/config.js +189 -0
package/lib/memory/adapters/config.ts +259 -0
package/lib/memory/adapters/errors.d.ts +76 -0
package/lib/memory/adapters/errors.js +128 -0
package/lib/memory/adapters/errors.ts +166 -0
package/lib/memory/context-manager.d.ts +44 -0
package/lib/memory/context-manager.js +344 -0
package/lib/memory/context-manager.ts +432 -0
package/lib/memory/embeddings/factory.d.ts +59 -0
package/lib/memory/embeddings/factory.js +148 -0
package/lib/{embeddings/factory.js → memory/embeddings/factory.ts} +69 -28
package/lib/memory/embeddings/index.d.ts +2 -0
package/lib/memory/embeddings/index.js +2 -0
package/lib/memory/embeddings/index.ts +2 -0
package/lib/memory/embeddings/service.d.ts +164 -0
package/lib/memory/embeddings/service.js +515 -0
package/lib/{embeddings/service.js → memory/embeddings/service.ts} +223 -156
package/lib/memory/index.d.ts +9 -0
package/lib/memory/index.js +9 -1
package/lib/memory/index.ts +20 -0
package/lib/memory/memory-mesh.d.ts +274 -0
package/lib/memory/memory-mesh.js +1469 -678
package/lib/memory/memory-mesh.ts +1803 -0
package/lib/memory/memory-translator.d.ts +19 -0
package/lib/memory/memory-translator.js +125 -0
package/lib/memory/memory-translator.ts +158 -0
package/lib/memory/schema.d.ts +111 -0
package/lib/memory/schema.js +183 -0
package/lib/memory/schema.ts +267 -0
package/lib/memory/scorer.d.ts +26 -0
package/lib/memory/scorer.js +77 -0
package/lib/memory/scorer.ts +95 -0
package/lib/memory/search/index.d.ts +1 -0
package/lib/memory/search/index.js +1 -0
package/lib/memory/search/index.ts +1 -0
package/lib/memory/search/keyword-search.d.ts +62 -0
package/lib/memory/search/keyword-search.js +135 -0
package/lib/{search/keyword-search.js → memory/search/keyword-search.ts} +66 -36
package/lib/scrubber/config/defaults.d.ts +53 -0
package/lib/scrubber/config/defaults.js +49 -57
package/lib/scrubber/config/defaults.ts +117 -0
package/lib/scrubber/index.d.ts +6 -0
package/lib/scrubber/index.js +3 -23
package/lib/scrubber/index.ts +7 -0
package/lib/scrubber/scrubber.d.ts +61 -0
package/lib/scrubber/scrubber.js +99 -121
package/lib/scrubber/scrubber.ts +168 -0
package/lib/scrubber/stages/chunker.d.ts +13 -0
package/lib/scrubber/stages/metadata-annotator.d.ts +18 -0
package/lib/scrubber/stages/normalizer.d.ts +13 -0
package/lib/scrubber/stages/semantic-filter.d.ts +13 -0
package/lib/scrubber/stages/structural-cleaner.d.ts +13 -0
package/lib/scrubber/stages/validator.d.ts +18 -0
package/lib/scrubber/telemetry.d.ts +36 -0
package/lib/scrubber/telemetry.js +53 -58
package/lib/scrubber/telemetry.ts +99 -0
package/lib/utils/logger.d.ts +29 -0
package/lib/utils/logger.js +64 -0
package/lib/utils/logger.ts +85 -0
package/lib/utils/skill-metadata.d.ts +32 -0
package/lib/utils/skill-metadata.js +132 -0
package/lib/utils/skill-metadata.ts +147 -0
package/lib/yamo/emitter.d.ts +73 -0
package/lib/yamo/emitter.js +78 -143
package/lib/yamo/emitter.ts +249 -0
package/lib/yamo/schema.d.ts +58 -0
package/lib/yamo/schema.js +81 -108
package/lib/yamo/schema.ts +165 -0
package/package.json +11 -8
package/index.d.ts +0 -111
package/lib/embeddings/index.js +0 -2
package/lib/index.js +0 -6
package/lib/lancedb/client.js +0 -633
package/lib/lancedb/config.js +0 -215
package/lib/lancedb/errors.js +0 -144
package/lib/lancedb/index.js +0 -4
package/lib/lancedb/schema.js +0 -217
package/lib/scrubber/errors/scrubber-error.js +0 -43
package/lib/scrubber/stages/chunker.js +0 -103
package/lib/scrubber/stages/metadata-annotator.js +0 -74
package/lib/scrubber/stages/normalizer.js +0 -59
package/lib/scrubber/stages/semantic-filter.js +0 -61
package/lib/scrubber/stages/structural-cleaner.js +0 -82
package/lib/scrubber/stages/validator.js +0 -66
package/lib/scrubber/utils/hash.js +0 -39
package/lib/scrubber/utils/html-parser.js +0 -45
package/lib/scrubber/utils/pattern-matcher.js +0 -63
package/lib/scrubber/utils/token-counter.js +0 -31
package/lib/search/index.js +0 -1
package/lib/utils/index.js +0 -1
package/lib/yamo/index.js +0 -15

package/lib/memory/schema.ts ADDED Viewed

@@ -0,0 +1,267 @@
+/**
+ * LanceDB Schema Definitions for MemoryManager
+ * Uses Apache Arrow Schema format for LanceDB JavaScript SDK
+ *
+ * Supports dynamic vector dimensions for different embedding models:
+ * - all-MiniLM-L6-v2: 384 dimensions
+ * - all-mpnet-base-v2: 768 dimensions
+ * - text-embedding-3-small: 1536 dimensions
+ */
+import * as arrow from "apache-arrow";
+import * as lancedb from "@lancedb/lancedb";
+/**
+ * Default vector dimension (all-MiniLM-L6-v2)
+ */
+export const DEFAULT_VECTOR_DIMENSION = 384;
+/**
+ * Common embedding model dimensions
+ */
+export const EMBEDDING_DIMENSIONS: Record<string, number> = {
+  "Xenova/all-MiniLM-L6-v2": 384,
+  "Xenova/all-mpnet-base-v2": 768,
+  "Xenova/distiluse-base-multilingual-cased-v1": 512,
+  "sentence-transformers/all-MiniLM-L6-v2": 384,
+  "sentence-transformers/all-mpnet-base-v2": 768,
+  "openai/text-embedding-3-small": 1536,
+  "openai/text-embedding-3-large": 3072,
+  "cohere/embed-english-light-v3.0": 1024,
+  "cohere/embed-english-v3.0": 1024,
+};
+/**
+ * Get dimension for a given embedding model
+ * @param {string} modelName - Embedding model name or path
+ * @returns {number} Vector dimension
+ */
+export function getEmbeddingDimension(modelName?: string): number {
+  if (!modelName) {
+    return DEFAULT_VECTOR_DIMENSION;
+  }
+  // Check exact match
+  if (EMBEDDING_DIMENSIONS[modelName]) {
+    return EMBEDDING_DIMENSIONS[modelName];
+  }
+  // Check for partial matches
+  for (const [key, dimension] of Object.entries(EMBEDDING_DIMENSIONS)) {
+    if (modelName.toLowerCase().includes(key.toLowerCase())) {
+      return dimension;
+    }
+  }
+  // Fallback to default
+  return DEFAULT_VECTOR_DIMENSION;
+}
+/**
+ * Create a memory schema with a specific vector dimension
+ * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
+ * @returns {arrow.Schema} Arrow schema with specified dimension
+ */
+export function createMemorySchema(
+  vectorDim: number = DEFAULT_VECTOR_DIMENSION,
+): arrow.Schema {
+  return new arrow.Schema([
+    new arrow.Field("id", new arrow.Utf8(), false),
+    new arrow.Field(
+      "vector",
+      new arrow.FixedSizeList(
+        vectorDim,
+        new arrow.Field("item", new arrow.Float32(), true),
+      ),
+      false,
+    ),
+    new arrow.Field("content", new arrow.Utf8(), false),
+    new arrow.Field("metadata", new arrow.Utf8(), true), // Stored as JSON string
+    new arrow.Field(
+      "created_at",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      false,
+    ),
+    new arrow.Field(
+      "updated_at",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      true,
+    ),
+  ]);
+}
+/**
+ * Create V2 memory schema with automatic recall fields
+ * All new fields are nullable for backward compatibility
+ * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
+ * @returns {arrow.Schema} Arrow schema with V2 fields
+ */
+export function createMemorySchemaV2(
+  vectorDim: number = DEFAULT_VECTOR_DIMENSION,
+): arrow.Schema {
+  return new arrow.Schema([
+    // ========== V1 Fields (Backward Compatible) ==========
+    new arrow.Field("id", new arrow.Utf8(), false),
+    new arrow.Field(
+      "vector",
+      new arrow.FixedSizeList(
+        vectorDim,
+        new arrow.Field("item", new arrow.Float32(), true),
+      ),
+      false,
+    ),
+    new arrow.Field("content", new arrow.Utf8(), false),
+    new arrow.Field("metadata", new arrow.Utf8(), true),
+    new arrow.Field(
+      "created_at",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      false,
+    ),
+    new arrow.Field(
+      "updated_at",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      true,
+    ),
+    // ========== V2 Fields (All Nullable) ==========
+    new arrow.Field("session_id", new arrow.Utf8(), true), // Session association
+    new arrow.Field("agent_id", new arrow.Utf8(), true), // Agent/skill that created memory
+    new arrow.Field("memory_type", new arrow.Utf8(), true), // 'global', 'session', 'agent'
+    new arrow.Field("importance_score", new arrow.Float32(), true), // 0.0-1.0 importance
+    new arrow.Field("access_count", new arrow.Int32(), true), // Popularity tracking
+    new arrow.Field(
+      "last_accessed",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      true,
+    ),
+  ]);
+}
+/**
+ * Create schema for synthesized skills (Recursive Skill Synthesis)
+ * @param {number} vectorDim - Vector dimension for intent embedding
+ * @returns {arrow.Schema} Arrow schema
+ */
+export function createSynthesizedSkillSchema(
+  vectorDim: number = DEFAULT_VECTOR_DIMENSION,
+): arrow.Schema {
+  return new arrow.Schema([
+    new arrow.Field("id", new arrow.Utf8(), false),
+    new arrow.Field("name", new arrow.Utf8(), false),
+    new arrow.Field("intent", new arrow.Utf8(), false),
+    new arrow.Field("yamo_text", new arrow.Utf8(), false),
+    new arrow.Field(
+      "vector",
+      new arrow.FixedSizeList(
+        vectorDim,
+        new arrow.Field("item", new arrow.Float32(), true),
+      ),
+      false,
+    ),
+    new arrow.Field("metadata", new arrow.Utf8(), true), // Stored as JSON: {reliability, use_count, created_at}
+    new arrow.Field(
+      "created_at",
+      new arrow.Timestamp(arrow.TimeUnit.MILLISECOND),
+      false,
+    ),
+  ]);
+}
+/**
+ * Check if a table is using V2 schema
+ * @param {arrow.Schema} schema - Table schema to check
+ * @returns {boolean} True if V2 schema detected
+ */
+export function isSchemaV2(schema: arrow.Schema): boolean {
+  return schema.fields.some((f) => f.name === "session_id");
+}
+/**
+ * Memory table schema using Apache Arrow format (default 384 dimensions)
+ * @deprecated Use createMemorySchema(vectorDim) for dynamic dimensions
+ */
+export const MEMORY_SCHEMA = createMemorySchema(DEFAULT_VECTOR_DIMENSION);
+/**
+ * Index configuration for memory table
+ * Indices should be created after data is inserted
+ */
+export const INDEX_CONFIG = {
+  vector: {
+    index_type: "ivf_pq",
+    metric: "cosine",
+    num_partitions: 256,
+    num_sub_vectors: 8,
+  },
+  full_text: {
+    fields: ["content"],
+  },
+};
+/**
+ * Creates a memory table in LanceDB with the predefined schema (384 dimensions)
+ * @param {lancedb.Connection} db - LanceDB connection
+ * @param {string} tableName - Name of the table to create (default: 'memory_entries')
+ * @returns {Promise<lancedb.Table>} The created or opened table
+ * @throws {Error} If table creation fails
+ * @deprecated Use createMemoryTableWithDimension() for dynamic dimensions
+ */
+export async function createMemoryTable(
+  db: lancedb.Connection,
+  tableName: string = "memory_entries",
+): Promise<lancedb.Table> {
+  return createMemoryTableWithDimension(
+    db,
+    tableName,
+    DEFAULT_VECTOR_DIMENSION,
+  );
+}
+/**
+ * Creates a memory table in LanceDB with a specific vector dimension
+ * @param {lancedb.Connection} db - LanceDB connection
+ * @param {string} tableName - Name of the table to create
+ * @param {number} vectorDim - Vector dimension (384, 768, 1536, etc.)
+ * @returns {Promise<lancedb.Table>} The created or opened table
+ * @throws {Error} If table creation fails
+ */
+export async function createMemoryTableWithDimension(
+  db: lancedb.Connection,
+  tableName: string,
+  vectorDim: number,
+): Promise<lancedb.Table> {
+  try {
+    // Check if table already exists
+    const existingTables = await db.tableNames();
+    if (existingTables.includes(tableName)) {
+      return await db.openTable(tableName);
+    }
+    // Create schema with specified dimension
+    const schema = createMemorySchema(vectorDim);
+    // Create table with schema
+    // LanceDB v0.23.0+ accepts empty array as initial data with schema option
+    const table = await db.createTable(tableName, [], { schema } as any); // Cast to any because lancedb types might be strict about options
+    return table;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(
+      `Failed to create memory table with dimension ${vectorDim}: ${message}`,
+    );
+  }
+}
+export default {
+  MEMORY_SCHEMA,
+  INDEX_CONFIG,
+  createMemoryTable,
+  createMemoryTableWithDimension,
+  createMemorySchema,
+  createMemorySchemaV2,
+  isSchemaV2,
+  getEmbeddingDimension,
+  DEFAULT_VECTOR_DIMENSION,
+  EMBEDDING_DIMENSIONS,
+};

package/lib/memory/scorer.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * MemoryScorer - Calculate memory importance and detect duplicates
+ */
+import { MemoryMesh } from "./memory-mesh.js";
+export declare class MemoryScorer {
+    #private;
+    /**
+     * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
+     */
+    constructor(mesh: MemoryMesh);
+    /**
+     * Calculate importance score for content
+     * @param {string} content - Content to score
+     * @param {Object} metadata - Associated metadata
+     * @returns {Promise<number>} Importance score (0-1)
+     */
+    calculateImportance(content: string, metadata?: any): number;
+    /**
+     * Check if content is duplicate of existing memory
+     * @param {string} content - Content to check
+     * @param {number} threshold - Similarity threshold (default 0.9)
+     * @returns {Promise<boolean>} True if duplicate exists
+     */
+    isDuplicate(content: string, threshold?: number): Promise<boolean>;
+}
+export default MemoryScorer;

package/lib/memory/scorer.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * MemoryScorer - Calculate memory importance and detect duplicates
+ */
+export class MemoryScorer {
+    #mesh;
+    /**
+     * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
+     */
+    constructor(mesh) {
+        this.#mesh = mesh;
+    }
+    /**
+     * Calculate importance score for content
+     * @param {string} content - Content to score
+     * @param {Object} metadata - Associated metadata
+     * @returns {Promise<number>} Importance score (0-1)
+     */
+    calculateImportance(content, metadata = {}) {
+        let score = 0;
+        // Content length (longer = more important, up to a point)
+        const length = content.length;
+        score += Math.min(length / 1000, 0.2);
+        // Has structured data (JSON, code blocks)
+        if (content.includes("```") || content.includes("{")) {
+            score += 0.1;
+        }
+        // Interaction type bonuses
+        if (metadata.interaction_type === "tool_execution") {
+            score += 0.15;
+        }
+        if (metadata.interaction_type === "file_operation") {
+            score += 0.1;
+        }
+        // Tool usage indicates importance
+        if (metadata.tools_used?.length > 0) {
+            score += Math.min(metadata.tools_used.length * 0.05, 0.15);
+        }
+        // File involvement
+        if (metadata.files_involved?.length > 0) {
+            score += Math.min(metadata.files_involved.length * 0.05, 0.15);
+        }
+        // Keywords that indicate importance
+        const importantKeywords = [
+            "error",
+            "bug",
+            "fix",
+            "important",
+            "critical",
+            "note",
+            "remember",
+        ];
+        const lowerContent = content.toLowerCase();
+        const keywordMatches = importantKeywords.filter((k) => lowerContent.includes(k)).length;
+        score += Math.min(keywordMatches * 0.05, 0.15);
+        return Math.min(score, 1.0);
+    }
+    /**
+     * Check if content is duplicate of existing memory
+     * @param {string} content - Content to check
+     * @param {number} threshold - Similarity threshold (default 0.9)
+     * @returns {Promise<boolean>} True if duplicate exists
+     */
+    async isDuplicate(content, threshold = 0.9) {
+        try {
+            const results = await this.#mesh.search(content, {
+                limit: 1,
+                useCache: false,
+            });
+            return results.length > 0 && results[0].score >= threshold;
+        }
+        catch (_error) {
+            // On error, assume not duplicate to allow storage
+            return false;
+        }
+    }
+}
+export default MemoryScorer;

package/lib/memory/scorer.ts ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * MemoryScorer - Calculate memory importance and detect duplicates
+ */
+import { MemoryMesh } from "./memory-mesh.js";
+export class MemoryScorer {
+  #mesh: MemoryMesh;
+  /**
+   * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
+   */
+  constructor(mesh: MemoryMesh) {
+    this.#mesh = mesh;
+  }
+  /**
+   * Calculate importance score for content
+   * @param {string} content - Content to score
+   * @param {Object} metadata - Associated metadata
+   * @returns {Promise<number>} Importance score (0-1)
+   */
+  calculateImportance(content: string, metadata: any = {}): number {
+    let score = 0;
+    // Content length (longer = more important, up to a point)
+    const length = content.length;
+    score += Math.min(length / 1000, 0.2);
+    // Has structured data (JSON, code blocks)
+    if (content.includes("```") || content.includes("{")) {
+      score += 0.1;
+    }
+    // Interaction type bonuses
+    if (metadata.interaction_type === "tool_execution") {
+      score += 0.15;
+    }
+    if (metadata.interaction_type === "file_operation") {
+      score += 0.1;
+    }
+    // Tool usage indicates importance
+    if (metadata.tools_used?.length > 0) {
+      score += Math.min(metadata.tools_used.length * 0.05, 0.15);
+    }
+    // File involvement
+    if (metadata.files_involved?.length > 0) {
+      score += Math.min(metadata.files_involved.length * 0.05, 0.15);
+    }
+    // Keywords that indicate importance
+    const importantKeywords = [
+      "error",
+      "bug",
+      "fix",
+      "important",
+      "critical",
+      "note",
+      "remember",
+    ];
+    const lowerContent = content.toLowerCase();
+    const keywordMatches = importantKeywords.filter((k) =>
+      lowerContent.includes(k),
+    ).length;
+    score += Math.min(keywordMatches * 0.05, 0.15);
+    return Math.min(score, 1.0);
+  }
+  /**
+   * Check if content is duplicate of existing memory
+   * @param {string} content - Content to check
+   * @param {number} threshold - Similarity threshold (default 0.9)
+   * @returns {Promise<boolean>} True if duplicate exists
+   */
+  async isDuplicate(
+    content: string,
+    threshold: number = 0.9,
+  ): Promise<boolean> {
+    try {
+      const results = await this.#mesh.search(content, {
+        limit: 1,
+        useCache: false,
+      });
+      return results.length > 0 && results[0].score >= threshold;
+    } catch (_error) {
+      // On error, assume not duplicate to allow storage
+      return false;
+    }
+  }
+}
+export default MemoryScorer;

package/lib/memory/search/index.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { KeywordSearch } from "./keyword-search.js";

package/lib/memory/search/index.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { KeywordSearch } from "./keyword-search.js";

package/lib/memory/search/index.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { KeywordSearch } from "./keyword-search.js";

package/lib/memory/search/keyword-search.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Simple Keyword Search Engine (In-Memory)
+ * Provides basic TF-IDF style retrieval to complement vector search
+ */
+export interface KeywordDoc {
+    content: string;
+    metadata?: any;
+}
+export interface KeywordSearchResult extends KeywordDoc {
+    id: string;
+    score: number;
+    matches: string[];
+}
+export interface SearchOptions {
+    limit?: number;
+}
+export declare class KeywordSearch {
+    index: Map<string, Map<string, number>>;
+    docLengths: Map<string, number>;
+    idf: Map<string, number>;
+    docs: Map<string, KeywordDoc>;
+    isDirty: boolean;
+    constructor();
+    /**
+     * Tokenize text into normalized terms
+     * @param {string} text
+     * @returns {string[]} tokens
+     */
+    tokenize(text: string): string[];
+    /**
+     * Add a document to the index
+     * @param {string} id
+     * @param {string} content
+     * @param {Object} [metadata]
+     */
+    add(id: string, content: string, metadata?: any): void;
+    /**
+     * Remove a document
+     * @param {string} id
+     */
+    remove(id: string): void;
+    /**
+     * Recalculate IDF scores
+     */
+    _computeStats(): void;
+    /**
+     * Search for query terms
+     * @param {string} query
+     * @param {Object} options
+     * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
+     */
+    search(query: string, options?: SearchOptions): KeywordSearchResult[];
+    /**
+     * Bulk load records
+     * @param {Array} records
+     */
+    load(records: {
+        id: string;
+        content: string;
+        metadata?: any;
+    }[]): void;
+}

package/lib/memory/search/keyword-search.js ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Simple Keyword Search Engine (In-Memory)
+ * Provides basic TF-IDF style retrieval to complement vector search
+ */
+export class KeywordSearch {
+    index; // token -> Map<docId, tf>
+    docLengths; // docId -> length
+    idf; // token -> idf value
+    docs; // docId -> content (optional, for snippet)
+    isDirty;
+    constructor() {
+        this.index = new Map();
+        this.docLengths = new Map();
+        this.idf = new Map();
+        this.docs = new Map();
+        this.isDirty = false;
+    }
+    /**
+     * Tokenize text into normalized terms
+     * @param {string} text
+     * @returns {string[]} tokens
+     */
+    tokenize(text) {
+        if (!text) {
+            return [];
+        }
+        return text
+            .toLowerCase()
+            .replace(/[^\w\s]/g, "") // Remove punctuation
+            .split(/\s+/)
+            .filter((t) => t.length > 2) // Filter stopwords/short
+            .map((t) => t.substring(0, 20)); // Truncate
+    }
+    /**
+     * Add a document to the index
+     * @param {string} id
+     * @param {string} content
+     * @param {Object} [metadata]
+     */
+    add(id, content, metadata = {}) {
+        const tokens = this.tokenize(content);
+        const termFreqs = new Map();
+        tokens.forEach((t) => {
+            termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
+        });
+        this.docLengths.set(id, tokens.length);
+        this.docs.set(id, { content, metadata });
+        // Update index
+        for (const [token, freq] of termFreqs.entries()) {
+            if (!this.index.has(token)) {
+                this.index.set(token, new Map());
+            }
+            this.index.get(token).set(id, freq);
+        }
+        this.isDirty = true;
+    }
+    /**
+     * Remove a document
+     * @param {string} id
+     */
+    remove(id) {
+        this.docLengths.delete(id);
+        this.docs.delete(id);
+        // This is expensive O(Vocab), but okay for small scale
+        for (const docMap of this.index.values()) {
+            docMap.delete(id);
+        }
+        this.isDirty = true;
+    }
+    /**
+     * Recalculate IDF scores
+     */
+    _computeStats() {
+        if (!this.isDirty) {
+            return;
+        }
+        const N = this.docLengths.size;
+        this.idf.clear();
+        for (const [token, docMap] of this.index.entries()) {
+            const df = docMap.size;
+            // Standard IDF: log(N / (df + 1)) + 1
+            const idf = Math.log(N / (df + 1)) + 1;
+            this.idf.set(token, idf);
+        }
+        this.isDirty = false;
+    }
+    /**
+     * Search for query terms
+     * @param {string} query
+     * @param {Object} options
+     * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
+     */
+    search(query, options = {}) {
+        this._computeStats();
+        const tokens = this.tokenize(query);
+        const scores = new Map(); // docId -> score
+        const matches = new Map(); // docId -> matched tokens
+        const limit = options.limit || 10;
+        for (const token of tokens) {
+            const docMap = this.index.get(token);
+            if (!docMap) {
+                continue;
+            }
+            const idf = this.idf.get(token) || 0;
+            for (const [docId, tf] of docMap.entries()) {
+                // TF-IDF Score
+                // Score = tf * idf * (normalization?)
+                // Simple variant:
+                const score = tf * idf;
+                scores.set(docId, (scores.get(docId) || 0) + score);
+                if (!matches.has(docId)) {
+                    matches.set(docId, []);
+                }
+                matches.get(docId).push(token);
+            }
+        }
+        // Convert to array and sort
+        return Array.from(scores.entries())
+            .map(([id, score]) => ({
+            id,
+            score,
+            matches: matches.get(id) || [],
+            ...this.docs.get(id),
+        }))
+            .sort((a, b) => b.score - a.score)
+            .slice(0, limit);
+    }
+    /**
+     * Bulk load records
+     * @param {Array} records
+     */
+    load(records) {
+        records.forEach((r) => this.add(r.id, r.content, r.metadata));
+    }
+}