@yamo/memory-mesh 2.3.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +8 -2
  2. package/bin/memory_mesh.js +1 -1
  3. package/lib/llm/client.d.ts +86 -0
  4. package/lib/llm/client.js +300 -357
  5. package/lib/llm/client.ts +334 -0
  6. package/lib/llm/index.d.ts +17 -0
  7. package/lib/llm/index.js +16 -8
  8. package/lib/llm/index.ts +18 -0
  9. package/lib/memory/adapters/client.d.ts +120 -0
  10. package/lib/memory/adapters/client.js +519 -0
  11. package/lib/memory/adapters/client.ts +519 -0
  12. package/lib/memory/adapters/config.d.ts +130 -0
  13. package/lib/memory/adapters/config.js +190 -0
  14. package/lib/memory/adapters/config.ts +190 -0
  15. package/lib/memory/adapters/errors.d.ts +84 -0
  16. package/lib/memory/adapters/errors.js +129 -0
  17. package/lib/memory/adapters/errors.ts +129 -0
  18. package/lib/memory/context-manager.d.ts +41 -0
  19. package/lib/memory/context-manager.js +345 -0
  20. package/lib/memory/context-manager.ts +345 -0
  21. package/lib/memory/embeddings/factory.d.ts +57 -0
  22. package/lib/memory/embeddings/factory.js +149 -0
  23. package/lib/memory/embeddings/factory.ts +149 -0
  24. package/lib/memory/embeddings/index.d.ts +2 -0
  25. package/lib/memory/embeddings/index.js +3 -0
  26. package/lib/memory/embeddings/index.ts +3 -0
  27. package/lib/memory/embeddings/service.d.ts +134 -0
  28. package/lib/memory/embeddings/service.js +516 -0
  29. package/lib/memory/embeddings/service.ts +516 -0
  30. package/lib/memory/index.d.ts +9 -0
  31. package/lib/memory/index.js +10 -1
  32. package/lib/memory/index.ts +10 -0
  33. package/lib/memory/memory-mesh.d.ts +332 -0
  34. package/lib/memory/memory-mesh.js +1470 -678
  35. package/lib/memory/memory-mesh.ts +1517 -0
  36. package/lib/memory/memory-translator.d.ts +14 -0
  37. package/lib/memory/memory-translator.js +126 -0
  38. package/lib/memory/memory-translator.ts +126 -0
  39. package/lib/memory/schema.d.ts +130 -0
  40. package/lib/memory/schema.js +184 -0
  41. package/lib/memory/schema.ts +184 -0
  42. package/lib/memory/scorer.d.ts +25 -0
  43. package/lib/memory/scorer.js +78 -0
  44. package/lib/memory/scorer.ts +78 -0
  45. package/lib/memory/search/index.d.ts +1 -0
  46. package/lib/memory/search/index.js +2 -0
  47. package/lib/memory/search/index.ts +2 -0
  48. package/lib/memory/search/keyword-search.d.ts +46 -0
  49. package/lib/memory/search/keyword-search.js +136 -0
  50. package/lib/memory/search/keyword-search.ts +136 -0
  51. package/lib/scrubber/config/defaults.d.ts +46 -0
  52. package/lib/scrubber/config/defaults.js +50 -57
  53. package/lib/scrubber/config/defaults.ts +55 -0
  54. package/lib/scrubber/errors/scrubber-error.d.ts +22 -0
  55. package/lib/scrubber/errors/scrubber-error.js +28 -32
  56. package/lib/scrubber/errors/scrubber-error.ts +44 -0
  57. package/lib/scrubber/index.d.ts +5 -0
  58. package/lib/scrubber/index.js +4 -23
  59. package/lib/scrubber/index.ts +6 -0
  60. package/lib/scrubber/scrubber.d.ts +44 -0
  61. package/lib/scrubber/scrubber.js +100 -121
  62. package/lib/scrubber/scrubber.ts +109 -0
  63. package/lib/scrubber/stages/chunker.d.ts +25 -0
  64. package/lib/scrubber/stages/chunker.js +74 -91
  65. package/lib/scrubber/stages/chunker.ts +104 -0
  66. package/lib/scrubber/stages/metadata-annotator.d.ts +17 -0
  67. package/lib/scrubber/stages/metadata-annotator.js +55 -65
  68. package/lib/scrubber/stages/metadata-annotator.ts +75 -0
  69. package/lib/scrubber/stages/normalizer.d.ts +16 -0
  70. package/lib/scrubber/stages/normalizer.js +42 -50
  71. package/lib/scrubber/stages/normalizer.ts +60 -0
  72. package/lib/scrubber/stages/semantic-filter.d.ts +16 -0
  73. package/lib/scrubber/stages/semantic-filter.js +42 -52
  74. package/lib/scrubber/stages/semantic-filter.ts +62 -0
  75. package/lib/scrubber/stages/structural-cleaner.d.ts +18 -0
  76. package/lib/scrubber/stages/structural-cleaner.js +66 -75
  77. package/lib/scrubber/stages/structural-cleaner.ts +83 -0
  78. package/lib/scrubber/stages/validator.d.ts +17 -0
  79. package/lib/scrubber/stages/validator.js +46 -56
  80. package/lib/scrubber/stages/validator.ts +67 -0
  81. package/lib/scrubber/telemetry.d.ts +29 -0
  82. package/lib/scrubber/telemetry.js +54 -58
  83. package/lib/scrubber/telemetry.ts +62 -0
  84. package/lib/scrubber/utils/hash.d.ts +14 -0
  85. package/lib/scrubber/utils/hash.js +30 -32
  86. package/lib/scrubber/utils/hash.ts +40 -0
  87. package/lib/scrubber/utils/html-parser.d.ts +14 -0
  88. package/lib/scrubber/utils/html-parser.js +32 -39
  89. package/lib/scrubber/utils/html-parser.ts +46 -0
  90. package/lib/scrubber/utils/pattern-matcher.d.ts +12 -0
  91. package/lib/scrubber/utils/pattern-matcher.js +48 -57
  92. package/lib/scrubber/utils/pattern-matcher.ts +64 -0
  93. package/lib/scrubber/utils/token-counter.d.ts +18 -0
  94. package/lib/scrubber/utils/token-counter.js +24 -25
  95. package/lib/scrubber/utils/token-counter.ts +32 -0
  96. package/lib/utils/logger.d.ts +19 -0
  97. package/lib/utils/logger.js +65 -0
  98. package/lib/utils/logger.ts +65 -0
  99. package/lib/utils/skill-metadata.d.ts +24 -0
  100. package/lib/utils/skill-metadata.js +133 -0
  101. package/lib/utils/skill-metadata.ts +133 -0
  102. package/lib/yamo/emitter.d.ts +46 -0
  103. package/lib/yamo/emitter.js +79 -143
  104. package/lib/yamo/emitter.ts +171 -0
  105. package/lib/yamo/index.d.ts +14 -0
  106. package/lib/yamo/index.js +6 -7
  107. package/lib/yamo/index.ts +16 -0
  108. package/lib/yamo/schema.d.ts +56 -0
  109. package/lib/yamo/schema.js +82 -108
  110. package/lib/yamo/schema.ts +133 -0
  111. package/package.json +13 -8
  112. package/index.d.ts +0 -111
  113. package/lib/embeddings/factory.js +0 -151
  114. package/lib/embeddings/index.js +0 -2
  115. package/lib/embeddings/service.js +0 -586
  116. package/lib/index.js +0 -6
  117. package/lib/lancedb/client.js +0 -633
  118. package/lib/lancedb/config.js +0 -215
  119. package/lib/lancedb/errors.js +0 -144
  120. package/lib/lancedb/index.js +0 -4
  121. package/lib/lancedb/schema.js +0 -217
  122. package/lib/search/index.js +0 -1
  123. package/lib/search/keyword-search.js +0 -144
  124. package/lib/utils/index.js +0 -1
@@ -0,0 +1,184 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * LanceDB Schema Definitions for MemoryManager
4
+ * Uses Apache Arrow Schema format for LanceDB JavaScript SDK
5
+ *
6
+ * Supports dynamic vector dimensions for different embedding models:
7
+ * - all-MiniLM-L6-v2: 384 dimensions
8
+ * - all-mpnet-base-v2: 768 dimensions
9
+ * - text-embedding-3-small: 1536 dimensions
10
+ */
11
+ import * as arrow from "apache-arrow";
12
+ /**
13
+ * Default vector dimension (all-MiniLM-L6-v2)
14
+ */
15
+ export const DEFAULT_VECTOR_DIMENSION = 384;
16
+ /**
17
+ * Common embedding model dimensions
18
+ */
19
+ export const EMBEDDING_DIMENSIONS = {
20
+ "Xenova/all-MiniLM-L6-v2": 384,
21
+ "Xenova/all-mpnet-base-v2": 768,
22
+ "Xenova/distiluse-base-multilingual-cased-v1": 512,
23
+ "sentence-transformers/all-MiniLM-L6-v2": 384,
24
+ "sentence-transformers/all-mpnet-base-v2": 768,
25
+ "openai/text-embedding-3-small": 1536,
26
+ "openai/text-embedding-3-large": 3072,
27
+ "cohere/embed-english-light-v3.0": 1024,
28
+ "cohere/embed-english-v3.0": 1024,
29
+ };
30
+ /**
31
+ * Get dimension for a given embedding model
32
+ * @param {string} modelName - Embedding model name or path
33
+ * @returns {number} Vector dimension
34
+ */
35
+ export function getEmbeddingDimension(modelName) {
36
+ if (!modelName) {
37
+ return DEFAULT_VECTOR_DIMENSION;
38
+ }
39
+ // Check exact match
40
+ if (EMBEDDING_DIMENSIONS[modelName]) {
41
+ return EMBEDDING_DIMENSIONS[modelName];
42
+ }
43
+ // Check for partial matches
44
+ for (const [key, dimension] of Object.entries(EMBEDDING_DIMENSIONS)) {
45
+ if (modelName.toLowerCase().includes(key.toLowerCase())) {
46
+ return dimension;
47
+ }
48
+ }
49
+ // Fallback to default
50
+ return DEFAULT_VECTOR_DIMENSION;
51
+ }
52
+ /**
53
+ * Create a memory schema with a specific vector dimension
54
+ * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
55
+ * @returns {arrow.Schema} Arrow schema with specified dimension
56
+ */
57
+ export function createMemorySchema(vectorDim = DEFAULT_VECTOR_DIMENSION) {
58
+ return new arrow.Schema([
59
+ new arrow.Field("id", new arrow.Utf8(), false),
60
+ new arrow.Field("vector", new arrow.FixedSizeList(vectorDim, new arrow.Field("item", new arrow.Float32(), true)), false),
61
+ new arrow.Field("content", new arrow.Utf8(), false),
62
+ new arrow.Field("metadata", new arrow.Utf8(), true), // Stored as JSON string
63
+ new arrow.Field("created_at", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
64
+ new arrow.Field("updated_at", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
65
+ ]);
66
+ }
67
+ /**
68
+ * Create V2 memory schema with automatic recall fields
69
+ * All new fields are nullable for backward compatibility
70
+ * @param {number} vectorDim - Vector dimension (e.g., 384, 768, 1536)
71
+ * @returns {arrow.Schema} Arrow schema with V2 fields
72
+ */
73
+ export function createMemorySchemaV2(vectorDim = DEFAULT_VECTOR_DIMENSION) {
74
+ return new arrow.Schema([
75
+ // ========== V1 Fields (Backward Compatible) ==========
76
+ new arrow.Field("id", new arrow.Utf8(), false),
77
+ new arrow.Field("vector", new arrow.FixedSizeList(vectorDim, new arrow.Field("item", new arrow.Float32(), true)), false),
78
+ new arrow.Field("content", new arrow.Utf8(), false),
79
+ new arrow.Field("metadata", new arrow.Utf8(), true),
80
+ new arrow.Field("created_at", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
81
+ new arrow.Field("updated_at", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
82
+ // ========== V2 Fields (All Nullable) ==========
83
+ new arrow.Field("session_id", new arrow.Utf8(), true), // Session association
84
+ new arrow.Field("agent_id", new arrow.Utf8(), true), // Agent/skill that created memory
85
+ new arrow.Field("memory_type", new arrow.Utf8(), true), // 'global', 'session', 'agent'
86
+ new arrow.Field("importance_score", new arrow.Float32(), true), // 0.0-1.0 importance
87
+ new arrow.Field("access_count", new arrow.Int32(), true), // Popularity tracking
88
+ new arrow.Field("last_accessed", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), true),
89
+ ]);
90
+ }
91
+ /**
92
+ * Create schema for synthesized skills (Recursive Skill Synthesis)
93
+ * @param {number} vectorDim - Vector dimension for intent embedding
94
+ * @returns {arrow.Schema} Arrow schema
95
+ */
96
+ export function createSynthesizedSkillSchema(vectorDim = DEFAULT_VECTOR_DIMENSION) {
97
+ return new arrow.Schema([
98
+ new arrow.Field("id", new arrow.Utf8(), false),
99
+ new arrow.Field("name", new arrow.Utf8(), false),
100
+ new arrow.Field("intent", new arrow.Utf8(), false),
101
+ new arrow.Field("yamo_text", new arrow.Utf8(), false),
102
+ new arrow.Field("vector", new arrow.FixedSizeList(vectorDim, new arrow.Field("item", new arrow.Float32(), true)), false),
103
+ new arrow.Field("metadata", new arrow.Utf8(), true), // Stored as JSON: {reliability, use_count, created_at}
104
+ new arrow.Field("created_at", new arrow.Timestamp(arrow.TimeUnit.MILLISECOND), false),
105
+ ]);
106
+ }
107
+ /**
108
+ * Check if a table is using V2 schema
109
+ * @param {arrow.Schema} schema - Table schema to check
110
+ * @returns {boolean} True if V2 schema detected
111
+ */
112
+ export function isSchemaV2(schema) {
113
+ return schema.fields.some((f) => f.name === "session_id");
114
+ }
115
+ /**
116
+ * Memory table schema using Apache Arrow format (default 384 dimensions)
117
+ * @deprecated Use createMemorySchema(vectorDim) for dynamic dimensions
118
+ */
119
+ export const MEMORY_SCHEMA = createMemorySchema(DEFAULT_VECTOR_DIMENSION);
120
+ /**
121
+ * Index configuration for memory table
122
+ * Indices should be created after data is inserted
123
+ */
124
+ export const INDEX_CONFIG = {
125
+ vector: {
126
+ index_type: "ivf_pq",
127
+ metric: "cosine",
128
+ num_partitions: 256,
129
+ num_sub_vectors: 8,
130
+ },
131
+ full_text: {
132
+ fields: ["content"],
133
+ },
134
+ };
135
+ /**
136
+ * Creates a memory table in LanceDB with the predefined schema (384 dimensions)
137
+ * @param {lancedb.Connection} db - LanceDB connection
138
+ * @param {string} tableName - Name of the table to create (default: 'memory_entries')
139
+ * @returns {Promise<lancedb.Table>} The created or opened table
140
+ * @throws {Error} If table creation fails
141
+ * @deprecated Use createMemoryTableWithDimension() for dynamic dimensions
142
+ */
143
+ export async function createMemoryTable(db, tableName = "memory_entries") {
144
+ return createMemoryTableWithDimension(db, tableName, DEFAULT_VECTOR_DIMENSION);
145
+ }
146
+ /**
147
+ * Creates a memory table in LanceDB with a specific vector dimension
148
+ * @param {lancedb.Connection} db - LanceDB connection
149
+ * @param {string} tableName - Name of the table to create
150
+ * @param {number} vectorDim - Vector dimension (384, 768, 1536, etc.)
151
+ * @returns {Promise<lancedb.Table>} The created or opened table
152
+ * @throws {Error} If table creation fails
153
+ */
154
+ export async function createMemoryTableWithDimension(db, tableName, vectorDim) {
155
+ try {
156
+ // Check if table already exists
157
+ const existingTables = await db.tableNames();
158
+ if (existingTables.includes(tableName)) {
159
+ return await db.openTable(tableName);
160
+ }
161
+ // Create schema with specified dimension
162
+ const schema = createMemorySchema(vectorDim);
163
+ // Create table with schema
164
+ // LanceDB v0.23.0+ accepts empty array as initial data with schema option
165
+ const table = await db.createTable(tableName, [], { schema }); // Cast to any because lancedb types might be strict about options
166
+ return table;
167
+ }
168
+ catch (error) {
169
+ const message = error instanceof Error ? error.message : String(error);
170
+ throw new Error(`Failed to create memory table with dimension ${vectorDim}: ${message}`);
171
+ }
172
+ }
173
+ export default {
174
+ MEMORY_SCHEMA,
175
+ INDEX_CONFIG,
176
+ createMemoryTable,
177
+ createMemoryTableWithDimension,
178
+ createMemorySchema,
179
+ createMemorySchemaV2,
180
+ isSchemaV2,
181
+ getEmbeddingDimension,
182
+ DEFAULT_VECTOR_DIMENSION,
183
+ EMBEDDING_DIMENSIONS,
184
+ };
@@ -0,0 +1,25 @@
1
+ /**
2
+ * MemoryScorer - Calculate memory importance and detect duplicates
3
+ */
4
+ export declare class MemoryScorer {
5
+ #private;
6
+ /**
7
+ * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
8
+ */
9
+ constructor(mesh: any);
10
+ /**
11
+ * Calculate importance score for content
12
+ * @param {string} content - Content to score
13
+ * @param {Object} metadata - Associated metadata
14
+ * @returns {Promise<number>} Importance score (0-1)
15
+ */
16
+ calculateImportance(content: any, metadata?: {}): number;
17
+ /**
18
+ * Check if content is duplicate of existing memory
19
+ * @param {string} content - Content to check
20
+ * @param {number} threshold - Similarity threshold (default 0.9)
21
+ * @returns {Promise<boolean>} True if duplicate exists
22
+ */
23
+ isDuplicate(content: any, threshold?: number): Promise<boolean>;
24
+ }
25
+ export default MemoryScorer;
@@ -0,0 +1,78 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * MemoryScorer - Calculate memory importance and detect duplicates
4
+ */
5
+ export class MemoryScorer {
6
+ #mesh;
7
+ /**
8
+ * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
9
+ */
10
+ constructor(mesh) {
11
+ this.#mesh = mesh;
12
+ }
13
+ /**
14
+ * Calculate importance score for content
15
+ * @param {string} content - Content to score
16
+ * @param {Object} metadata - Associated metadata
17
+ * @returns {Promise<number>} Importance score (0-1)
18
+ */
19
+ calculateImportance(content, metadata = {}) {
20
+ let score = 0;
21
+ // Content length (longer = more important, up to a point)
22
+ const length = content.length;
23
+ score += Math.min(length / 1000, 0.2);
24
+ // Has structured data (JSON, code blocks)
25
+ if (content.includes("```") || content.includes("{")) {
26
+ score += 0.1;
27
+ }
28
+ // Interaction type bonuses
29
+ if (metadata.interaction_type === "tool_execution") {
30
+ score += 0.15;
31
+ }
32
+ if (metadata.interaction_type === "file_operation") {
33
+ score += 0.1;
34
+ }
35
+ // Tool usage indicates importance
36
+ if (metadata.tools_used?.length > 0) {
37
+ score += Math.min(metadata.tools_used.length * 0.05, 0.15);
38
+ }
39
+ // File involvement
40
+ if (metadata.files_involved?.length > 0) {
41
+ score += Math.min(metadata.files_involved.length * 0.05, 0.15);
42
+ }
43
+ // Keywords that indicate importance
44
+ const importantKeywords = [
45
+ "error",
46
+ "bug",
47
+ "fix",
48
+ "important",
49
+ "critical",
50
+ "note",
51
+ "remember",
52
+ ];
53
+ const lowerContent = content.toLowerCase();
54
+ const keywordMatches = importantKeywords.filter((k) => lowerContent.includes(k)).length;
55
+ score += Math.min(keywordMatches * 0.05, 0.15);
56
+ return Math.min(score, 1.0);
57
+ }
58
+ /**
59
+ * Check if content is duplicate of existing memory
60
+ * @param {string} content - Content to check
61
+ * @param {number} threshold - Similarity threshold (default 0.9)
62
+ * @returns {Promise<boolean>} True if duplicate exists
63
+ */
64
+ async isDuplicate(content, threshold = 0.9) {
65
+ try {
66
+ const results = await this.#mesh.search(content, {
67
+ limit: 1,
68
+ useCache: false,
69
+ });
70
+ return results.length > 0 && results[0].score >= threshold;
71
+ }
72
+ catch (_error) {
73
+ // On error, assume not duplicate to allow storage
74
+ return false;
75
+ }
76
+ }
77
+ }
78
+ export default MemoryScorer;
@@ -0,0 +1,78 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * MemoryScorer - Calculate memory importance and detect duplicates
4
+ */
5
+ export class MemoryScorer {
6
+ #mesh;
7
+ /**
8
+ * @param {MemoryMesh} mesh - MemoryMesh instance for duplicate checking
9
+ */
10
+ constructor(mesh) {
11
+ this.#mesh = mesh;
12
+ }
13
+ /**
14
+ * Calculate importance score for content
15
+ * @param {string} content - Content to score
16
+ * @param {Object} metadata - Associated metadata
17
+ * @returns {Promise<number>} Importance score (0-1)
18
+ */
19
+ calculateImportance(content, metadata = {}) {
20
+ let score = 0;
21
+ // Content length (longer = more important, up to a point)
22
+ const length = content.length;
23
+ score += Math.min(length / 1000, 0.2);
24
+ // Has structured data (JSON, code blocks)
25
+ if (content.includes("```") || content.includes("{")) {
26
+ score += 0.1;
27
+ }
28
+ // Interaction type bonuses
29
+ if (metadata.interaction_type === "tool_execution") {
30
+ score += 0.15;
31
+ }
32
+ if (metadata.interaction_type === "file_operation") {
33
+ score += 0.1;
34
+ }
35
+ // Tool usage indicates importance
36
+ if (metadata.tools_used?.length > 0) {
37
+ score += Math.min(metadata.tools_used.length * 0.05, 0.15);
38
+ }
39
+ // File involvement
40
+ if (metadata.files_involved?.length > 0) {
41
+ score += Math.min(metadata.files_involved.length * 0.05, 0.15);
42
+ }
43
+ // Keywords that indicate importance
44
+ const importantKeywords = [
45
+ "error",
46
+ "bug",
47
+ "fix",
48
+ "important",
49
+ "critical",
50
+ "note",
51
+ "remember",
52
+ ];
53
+ const lowerContent = content.toLowerCase();
54
+ const keywordMatches = importantKeywords.filter((k) => lowerContent.includes(k)).length;
55
+ score += Math.min(keywordMatches * 0.05, 0.15);
56
+ return Math.min(score, 1.0);
57
+ }
58
+ /**
59
+ * Check if content is duplicate of existing memory
60
+ * @param {string} content - Content to check
61
+ * @param {number} threshold - Similarity threshold (default 0.9)
62
+ * @returns {Promise<boolean>} True if duplicate exists
63
+ */
64
+ async isDuplicate(content, threshold = 0.9) {
65
+ try {
66
+ const results = await this.#mesh.search(content, {
67
+ limit: 1,
68
+ useCache: false,
69
+ });
70
+ return results.length > 0 && results[0].score >= threshold;
71
+ }
72
+ catch (_error) {
73
+ // On error, assume not duplicate to allow storage
74
+ return false;
75
+ }
76
+ }
77
+ }
78
+ export default MemoryScorer;
@@ -0,0 +1 @@
1
+ export { KeywordSearch } from "./keyword-search.js";
@@ -0,0 +1,2 @@
1
+ // @ts-nocheck
2
+ export { KeywordSearch } from "./keyword-search.js";
@@ -0,0 +1,2 @@
1
+ // @ts-nocheck
2
+ export { KeywordSearch } from "./keyword-search.js";
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Simple Keyword Search Engine (In-Memory)
3
+ * Provides basic TF-IDF style retrieval to complement vector search
4
+ */
5
+ export declare class KeywordSearch {
6
+ index: any;
7
+ docLengths: any;
8
+ idf: any;
9
+ docs: any;
10
+ isDirty: any;
11
+ constructor();
12
+ /**
13
+ * Tokenize text into normalized terms
14
+ * @param {string} text
15
+ * @returns {string[]} tokens
16
+ */
17
+ tokenize(text: any): any;
18
+ /**
19
+ * Add a document to the index
20
+ * @param {string} id
21
+ * @param {string} content
22
+ * @param {Object} [metadata]
23
+ */
24
+ add(id: any, content: any, metadata?: {}): void;
25
+ /**
26
+ * Remove a document
27
+ * @param {string} id
28
+ */
29
+ remove(id: any): void;
30
+ /**
31
+ * Recalculate IDF scores
32
+ */
33
+ _computeStats(): void;
34
+ /**
35
+ * Search for query terms
36
+ * @param {string} query
37
+ * @param {Object} options
38
+ * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
39
+ */
40
+ search(query: any, options?: {}): any[];
41
+ /**
42
+ * Bulk load records
43
+ * @param {Array} records
44
+ */
45
+ load(records: any): void;
46
+ }
@@ -0,0 +1,136 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * Simple Keyword Search Engine (In-Memory)
4
+ * Provides basic TF-IDF style retrieval to complement vector search
5
+ */
6
+ export class KeywordSearch {
7
+ index; // token -> Map<docId, tf>
8
+ docLengths; // docId -> length
9
+ idf; // token -> idf value
10
+ docs; // docId -> content (optional, for snippet)
11
+ isDirty;
12
+ constructor() {
13
+ this.index = new Map();
14
+ this.docLengths = new Map();
15
+ this.idf = new Map();
16
+ this.docs = new Map();
17
+ this.isDirty = false;
18
+ }
19
+ /**
20
+ * Tokenize text into normalized terms
21
+ * @param {string} text
22
+ * @returns {string[]} tokens
23
+ */
24
+ tokenize(text) {
25
+ if (!text) {
26
+ return [];
27
+ }
28
+ return text
29
+ .toLowerCase()
30
+ .replace(/[^\w\s]/g, "") // Remove punctuation
31
+ .split(/\s+/)
32
+ .filter((t) => t.length > 2) // Filter stopwords/short
33
+ .map((t) => t.substring(0, 20)); // Truncate
34
+ }
35
+ /**
36
+ * Add a document to the index
37
+ * @param {string} id
38
+ * @param {string} content
39
+ * @param {Object} [metadata]
40
+ */
41
+ add(id, content, metadata = {}) {
42
+ const tokens = this.tokenize(content);
43
+ const termFreqs = new Map();
44
+ tokens.forEach((t) => {
45
+ termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
46
+ });
47
+ this.docLengths.set(id, tokens.length);
48
+ this.docs.set(id, { content, metadata });
49
+ // Update index
50
+ for (const [token, freq] of termFreqs.entries()) {
51
+ if (!this.index.has(token)) {
52
+ this.index.set(token, new Map());
53
+ }
54
+ this.index.get(token).set(id, freq);
55
+ }
56
+ this.isDirty = true;
57
+ }
58
+ /**
59
+ * Remove a document
60
+ * @param {string} id
61
+ */
62
+ remove(id) {
63
+ this.docLengths.delete(id);
64
+ this.docs.delete(id);
65
+ // This is expensive O(Vocab), but okay for small scale
66
+ for (const docMap of this.index.values()) {
67
+ docMap.delete(id);
68
+ }
69
+ this.isDirty = true;
70
+ }
71
+ /**
72
+ * Recalculate IDF scores
73
+ */
74
+ _computeStats() {
75
+ if (!this.isDirty) {
76
+ return;
77
+ }
78
+ const N = this.docLengths.size;
79
+ this.idf.clear();
80
+ for (const [token, docMap] of this.index.entries()) {
81
+ const df = docMap.size;
82
+ // Standard IDF: log(N / (df + 1)) + 1
83
+ const idf = Math.log(N / (df + 1)) + 1;
84
+ this.idf.set(token, idf);
85
+ }
86
+ this.isDirty = false;
87
+ }
88
+ /**
89
+ * Search for query terms
90
+ * @param {string} query
91
+ * @param {Object} options
92
+ * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
93
+ */
94
+ search(query, options = {}) {
95
+ this._computeStats();
96
+ const tokens = this.tokenize(query);
97
+ const scores = new Map(); // docId -> score
98
+ const matches = new Map(); // docId -> matched tokens
99
+ const limit = options.limit || 10;
100
+ for (const token of tokens) {
101
+ const docMap = this.index.get(token);
102
+ if (!docMap) {
103
+ continue;
104
+ }
105
+ const idf = this.idf.get(token) || 0;
106
+ for (const [docId, tf] of docMap.entries()) {
107
+ // TF-IDF Score
108
+ // Score = tf * idf * (normalization?)
109
+ // Simple variant:
110
+ const score = tf * idf;
111
+ scores.set(docId, (scores.get(docId) || 0) + score);
112
+ if (!matches.has(docId)) {
113
+ matches.set(docId, []);
114
+ }
115
+ matches.get(docId).push(token);
116
+ }
117
+ }
118
+ // Convert to array and sort
119
+ return Array.from(scores.entries())
120
+ .map(([id, score]) => ({
121
+ id,
122
+ score,
123
+ matches: matches.get(id) || [],
124
+ ...this.docs.get(id),
125
+ }))
126
+ .sort((a, b) => b.score - a.score)
127
+ .slice(0, limit);
128
+ }
129
+ /**
130
+ * Bulk load records
131
+ * @param {Array} records
132
+ */
133
+ load(records) {
134
+ records.forEach((r) => this.add(r.id, r.content, r.metadata));
135
+ }
136
+ }
@@ -0,0 +1,136 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * Simple Keyword Search Engine (In-Memory)
4
+ * Provides basic TF-IDF style retrieval to complement vector search
5
+ */
6
+ export class KeywordSearch {
7
+ index; // token -> Map<docId, tf>
8
+ docLengths; // docId -> length
9
+ idf; // token -> idf value
10
+ docs; // docId -> content (optional, for snippet)
11
+ isDirty;
12
+ constructor() {
13
+ this.index = new Map();
14
+ this.docLengths = new Map();
15
+ this.idf = new Map();
16
+ this.docs = new Map();
17
+ this.isDirty = false;
18
+ }
19
+ /**
20
+ * Tokenize text into normalized terms
21
+ * @param {string} text
22
+ * @returns {string[]} tokens
23
+ */
24
+ tokenize(text) {
25
+ if (!text) {
26
+ return [];
27
+ }
28
+ return text
29
+ .toLowerCase()
30
+ .replace(/[^\w\s]/g, "") // Remove punctuation
31
+ .split(/\s+/)
32
+ .filter((t) => t.length > 2) // Filter stopwords/short
33
+ .map((t) => t.substring(0, 20)); // Truncate
34
+ }
35
+ /**
36
+ * Add a document to the index
37
+ * @param {string} id
38
+ * @param {string} content
39
+ * @param {Object} [metadata]
40
+ */
41
+ add(id, content, metadata = {}) {
42
+ const tokens = this.tokenize(content);
43
+ const termFreqs = new Map();
44
+ tokens.forEach((t) => {
45
+ termFreqs.set(t, (termFreqs.get(t) || 0) + 1);
46
+ });
47
+ this.docLengths.set(id, tokens.length);
48
+ this.docs.set(id, { content, metadata });
49
+ // Update index
50
+ for (const [token, freq] of termFreqs.entries()) {
51
+ if (!this.index.has(token)) {
52
+ this.index.set(token, new Map());
53
+ }
54
+ this.index.get(token).set(id, freq);
55
+ }
56
+ this.isDirty = true;
57
+ }
58
+ /**
59
+ * Remove a document
60
+ * @param {string} id
61
+ */
62
+ remove(id) {
63
+ this.docLengths.delete(id);
64
+ this.docs.delete(id);
65
+ // This is expensive O(Vocab), but okay for small scale
66
+ for (const docMap of this.index.values()) {
67
+ docMap.delete(id);
68
+ }
69
+ this.isDirty = true;
70
+ }
71
+ /**
72
+ * Recalculate IDF scores
73
+ */
74
+ _computeStats() {
75
+ if (!this.isDirty) {
76
+ return;
77
+ }
78
+ const N = this.docLengths.size;
79
+ this.idf.clear();
80
+ for (const [token, docMap] of this.index.entries()) {
81
+ const df = docMap.size;
82
+ // Standard IDF: log(N / (df + 1)) + 1
83
+ const idf = Math.log(N / (df + 1)) + 1;
84
+ this.idf.set(token, idf);
85
+ }
86
+ this.isDirty = false;
87
+ }
88
+ /**
89
+ * Search for query terms
90
+ * @param {string} query
91
+ * @param {Object} options
92
+ * @returns {Array<{id: string, score: number, matches: string[], content: string, metadata: Object}>}
93
+ */
94
+ search(query, options = {}) {
95
+ this._computeStats();
96
+ const tokens = this.tokenize(query);
97
+ const scores = new Map(); // docId -> score
98
+ const matches = new Map(); // docId -> matched tokens
99
+ const limit = options.limit || 10;
100
+ for (const token of tokens) {
101
+ const docMap = this.index.get(token);
102
+ if (!docMap) {
103
+ continue;
104
+ }
105
+ const idf = this.idf.get(token) || 0;
106
+ for (const [docId, tf] of docMap.entries()) {
107
+ // TF-IDF Score
108
+ // Score = tf * idf * (normalization?)
109
+ // Simple variant:
110
+ const score = tf * idf;
111
+ scores.set(docId, (scores.get(docId) || 0) + score);
112
+ if (!matches.has(docId)) {
113
+ matches.set(docId, []);
114
+ }
115
+ matches.get(docId).push(token);
116
+ }
117
+ }
118
+ // Convert to array and sort
119
+ return Array.from(scores.entries())
120
+ .map(([id, score]) => ({
121
+ id,
122
+ score,
123
+ matches: matches.get(id) || [],
124
+ ...this.docs.get(id),
125
+ }))
126
+ .sort((a, b) => b.score - a.score)
127
+ .slice(0, limit);
128
+ }
129
+ /**
130
+ * Bulk load records
131
+ * @param {Array} records
132
+ */
133
+ load(records) {
134
+ records.forEach((r) => this.add(r.id, r.content, r.metadata));
135
+ }
136
+ }