@danielsimonjr/memory-mcp 11.0.1 → 11.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +22 -22
  2. package/dist/core/EntityManager.d.ts +10 -15
  3. package/dist/core/EntityManager.d.ts.map +1 -1
  4. package/dist/core/EntityManager.js +21 -54
  5. package/dist/core/GraphStorage.d.ts +0 -51
  6. package/dist/core/GraphStorage.d.ts.map +1 -1
  7. package/dist/core/GraphStorage.js +2 -79
  8. package/dist/core/GraphTraversal.d.ts +2 -7
  9. package/dist/core/GraphTraversal.d.ts.map +1 -1
  10. package/dist/core/GraphTraversal.js +2 -19
  11. package/dist/core/ManagerContext.d.ts +0 -4
  12. package/dist/core/ManagerContext.d.ts.map +1 -1
  13. package/dist/core/ManagerContext.js +2 -12
  14. package/dist/core/RelationManager.d.ts.map +1 -1
  15. package/dist/core/RelationManager.js +4 -5
  16. package/dist/core/SQLiteStorage.d.ts.map +1 -1
  17. package/dist/core/SQLiteStorage.js +2 -3
  18. package/dist/core/TransactionManager.d.ts +2 -207
  19. package/dist/core/TransactionManager.d.ts.map +1 -1
  20. package/dist/core/TransactionManager.js +6 -482
  21. package/dist/core/index.d.ts +1 -2
  22. package/dist/core/index.d.ts.map +1 -1
  23. package/dist/core/index.js +1 -3
  24. package/dist/features/ArchiveManager.d.ts +2 -14
  25. package/dist/features/ArchiveManager.d.ts.map +1 -1
  26. package/dist/features/ArchiveManager.js +3 -44
  27. package/dist/features/CompressionManager.d.ts +4 -14
  28. package/dist/features/CompressionManager.d.ts.map +1 -1
  29. package/dist/features/CompressionManager.js +9 -74
  30. package/dist/features/IOManager.d.ts +2 -6
  31. package/dist/features/IOManager.d.ts.map +1 -1
  32. package/dist/features/IOManager.js +10 -105
  33. package/dist/features/StreamingExporter.d.ts +4 -27
  34. package/dist/features/StreamingExporter.d.ts.map +1 -1
  35. package/dist/features/StreamingExporter.js +4 -65
  36. package/dist/features/index.d.ts +0 -2
  37. package/dist/features/index.d.ts.map +1 -1
  38. package/dist/features/index.js +0 -3
  39. package/dist/search/EmbeddingService.d.ts +9 -108
  40. package/dist/search/EmbeddingService.d.ts.map +1 -1
  41. package/dist/search/EmbeddingService.js +15 -187
  42. package/dist/search/FuzzySearch.js +1 -1
  43. package/dist/search/SavedSearchManager.d.ts.map +1 -1
  44. package/dist/search/SavedSearchManager.js +2 -3
  45. package/dist/search/SearchManager.d.ts +1 -42
  46. package/dist/search/SearchManager.d.ts.map +1 -1
  47. package/dist/search/SearchManager.js +0 -115
  48. package/dist/search/SemanticSearch.d.ts +1 -4
  49. package/dist/search/SemanticSearch.d.ts.map +1 -1
  50. package/dist/search/SemanticSearch.js +2 -12
  51. package/dist/search/TFIDFIndexManager.d.ts +0 -88
  52. package/dist/search/TFIDFIndexManager.d.ts.map +1 -1
  53. package/dist/search/TFIDFIndexManager.js +0 -217
  54. package/dist/search/index.d.ts +1 -18
  55. package/dist/search/index.d.ts.map +1 -1
  56. package/dist/search/index.js +1 -32
  57. package/dist/server/MCPServer.d.ts.map +1 -1
  58. package/dist/server/MCPServer.js +4 -1
  59. package/dist/server/responseCompressor.js +5 -5
  60. package/dist/server/toolDefinitions.d.ts.map +1 -1
  61. package/dist/server/toolDefinitions.js +5 -1
  62. package/dist/server/toolHandlers.d.ts +9 -5
  63. package/dist/server/toolHandlers.d.ts.map +1 -1
  64. package/dist/server/toolHandlers.js +23 -8
  65. package/dist/types/index.d.ts +1 -1
  66. package/dist/types/index.d.ts.map +1 -1
  67. package/dist/types/types.d.ts +2 -579
  68. package/dist/types/types.d.ts.map +1 -1
  69. package/dist/utils/compressedCache.d.ts +0 -29
  70. package/dist/utils/compressedCache.d.ts.map +1 -1
  71. package/dist/utils/compressedCache.js +0 -39
  72. package/dist/utils/entityUtils.d.ts +1 -59
  73. package/dist/utils/entityUtils.d.ts.map +1 -1
  74. package/dist/utils/entityUtils.js +3 -113
  75. package/dist/utils/errors.d.ts +0 -18
  76. package/dist/utils/errors.d.ts.map +1 -1
  77. package/dist/utils/errors.js +0 -24
  78. package/dist/utils/index.d.ts +2 -6
  79. package/dist/utils/index.d.ts.map +1 -1
  80. package/dist/utils/index.js +2 -14
  81. package/dist/utils/logger.d.ts +0 -7
  82. package/dist/utils/logger.d.ts.map +1 -1
  83. package/dist/utils/logger.js +2 -9
  84. package/dist/utils/parallelUtils.d.ts +1 -5
  85. package/dist/utils/parallelUtils.d.ts.map +1 -1
  86. package/dist/utils/parallelUtils.js +1 -23
  87. package/dist/utils/schemas.d.ts +16 -16
  88. package/dist/utils/schemas.d.ts.map +1 -1
  89. package/dist/utils/schemas.js +12 -12
  90. package/dist/utils/taskScheduler.d.ts +0 -4
  91. package/dist/utils/taskScheduler.d.ts.map +1 -1
  92. package/dist/utils/taskScheduler.js +1 -21
  93. package/dist/workers/WorkerPool.d.ts +81 -0
  94. package/dist/workers/WorkerPool.d.ts.map +1 -0
  95. package/dist/workers/WorkerPool.js +121 -0
  96. package/dist/workers/index.d.ts +1 -1
  97. package/dist/workers/index.d.ts.map +1 -1
  98. package/dist/workers/levenshteinWorker.js +1 -1
  99. package/package.json +1 -4
  100. package/dist/__tests__/file-path.test.js +0 -119
  101. package/dist/__tests__/knowledge-graph.test.js +0 -318
  102. package/dist/core/GraphEventEmitter.d.ts +0 -202
  103. package/dist/core/GraphEventEmitter.d.ts.map +0 -1
  104. package/dist/core/GraphEventEmitter.js +0 -346
  105. package/dist/features/KeywordExtractor.d.ts +0 -61
  106. package/dist/features/KeywordExtractor.d.ts.map +0 -1
  107. package/dist/features/KeywordExtractor.js +0 -126
  108. package/dist/features/ObservationNormalizer.d.ts +0 -90
  109. package/dist/features/ObservationNormalizer.d.ts.map +0 -1
  110. package/dist/features/ObservationNormalizer.js +0 -193
  111. package/dist/memory.jsonl +0 -1
  112. package/dist/search/BM25Search.d.ts +0 -148
  113. package/dist/search/BM25Search.d.ts.map +0 -1
  114. package/dist/search/BM25Search.js +0 -339
  115. package/dist/search/EarlyTerminationManager.d.ts +0 -140
  116. package/dist/search/EarlyTerminationManager.d.ts.map +0 -1
  117. package/dist/search/EarlyTerminationManager.js +0 -279
  118. package/dist/search/EmbeddingCache.d.ts +0 -175
  119. package/dist/search/EmbeddingCache.d.ts.map +0 -1
  120. package/dist/search/EmbeddingCache.js +0 -246
  121. package/dist/search/HybridScorer.d.ts +0 -181
  122. package/dist/search/HybridScorer.d.ts.map +0 -1
  123. package/dist/search/HybridScorer.js +0 -257
  124. package/dist/search/HybridSearchManager.d.ts +0 -80
  125. package/dist/search/HybridSearchManager.d.ts.map +0 -1
  126. package/dist/search/HybridSearchManager.js +0 -187
  127. package/dist/search/IncrementalIndexer.d.ts +0 -201
  128. package/dist/search/IncrementalIndexer.d.ts.map +0 -1
  129. package/dist/search/IncrementalIndexer.js +0 -342
  130. package/dist/search/OptimizedInvertedIndex.d.ts +0 -163
  131. package/dist/search/OptimizedInvertedIndex.d.ts.map +0 -1
  132. package/dist/search/OptimizedInvertedIndex.js +0 -358
  133. package/dist/search/ParallelSearchExecutor.d.ts +0 -172
  134. package/dist/search/ParallelSearchExecutor.d.ts.map +0 -1
  135. package/dist/search/ParallelSearchExecutor.js +0 -309
  136. package/dist/search/QuantizedVectorStore.d.ts +0 -171
  137. package/dist/search/QuantizedVectorStore.d.ts.map +0 -1
  138. package/dist/search/QuantizedVectorStore.js +0 -307
  139. package/dist/search/QueryAnalyzer.d.ts +0 -76
  140. package/dist/search/QueryAnalyzer.d.ts.map +0 -1
  141. package/dist/search/QueryAnalyzer.js +0 -227
  142. package/dist/search/QueryCostEstimator.d.ts +0 -244
  143. package/dist/search/QueryCostEstimator.d.ts.map +0 -1
  144. package/dist/search/QueryCostEstimator.js +0 -652
  145. package/dist/search/QueryPlanCache.d.ts +0 -220
  146. package/dist/search/QueryPlanCache.d.ts.map +0 -1
  147. package/dist/search/QueryPlanCache.js +0 -379
  148. package/dist/search/QueryPlanner.d.ts +0 -58
  149. package/dist/search/QueryPlanner.d.ts.map +0 -1
  150. package/dist/search/QueryPlanner.js +0 -137
  151. package/dist/search/ReflectionManager.d.ts +0 -120
  152. package/dist/search/ReflectionManager.d.ts.map +0 -1
  153. package/dist/search/ReflectionManager.js +0 -231
  154. package/dist/search/SymbolicSearch.d.ts +0 -61
  155. package/dist/search/SymbolicSearch.d.ts.map +0 -1
  156. package/dist/search/SymbolicSearch.js +0 -163
  157. package/dist/search/TFIDFEventSync.d.ts +0 -85
  158. package/dist/search/TFIDFEventSync.d.ts.map +0 -1
  159. package/dist/search/TFIDFEventSync.js +0 -133
  160. package/dist/utils/BatchProcessor.d.ts +0 -271
  161. package/dist/utils/BatchProcessor.d.ts.map +0 -1
  162. package/dist/utils/BatchProcessor.js +0 -376
  163. package/dist/utils/MemoryMonitor.d.ts +0 -176
  164. package/dist/utils/MemoryMonitor.d.ts.map +0 -1
  165. package/dist/utils/MemoryMonitor.js +0 -305
  166. package/dist/utils/WorkerPoolManager.d.ts +0 -233
  167. package/dist/utils/WorkerPoolManager.d.ts.map +0 -1
  168. package/dist/utils/WorkerPoolManager.js +0 -420
  169. package/dist/utils/operationUtils.d.ts +0 -124
  170. package/dist/utils/operationUtils.d.ts.map +0 -1
  171. package/dist/utils/operationUtils.js +0 -175
  172. package/dist/vitest.config.js +0 -13
@@ -1,193 +0,0 @@
1
- /**
2
- * Observation Normalizer
3
- *
4
- * Phase 11: Transforms observations to be self-contained facts
5
- * through coreference resolution and temporal anchoring.
6
- *
7
- * @module features/ObservationNormalizer
8
- */
9
- /**
10
- * Observation Normalizer transforms observations to self-contained facts.
11
- *
12
- * Applies transformations:
13
- * 1. Coreference resolution: 'He works' -> 'Alice works'
14
- * 2. Temporal anchoring: 'yesterday' -> '2026-01-07'
15
- * 3. Keyword extraction: Identifies important terms
16
- *
17
- * @example
18
- * ```typescript
19
- * const normalizer = new ObservationNormalizer();
20
- * const result = normalizer.normalize(
21
- * 'He started the project yesterday',
22
- * { name: 'Bob', entityType: 'person', observations: [] }
23
- * );
24
- * // result.normalized = 'Bob started the project on 2026-01-07'
25
- * ```
26
- */
27
- export class ObservationNormalizer {
28
- pronounPatterns = {
29
- masculine: /\b(he|him|his)\b/gi,
30
- feminine: /\b(she|her|hers)\b/gi,
31
- neutral: /\b(they|them|their|theirs)\b/gi,
32
- };
33
- relativeTimePatterns = [
34
- [/\byesterday\b/i, (ref) => this.formatDate(this.addDays(ref, -1))],
35
- [/\btoday\b/i, (ref) => this.formatDate(ref)],
36
- [/\btomorrow\b/i, (ref) => this.formatDate(this.addDays(ref, 1))],
37
- [/\blast week\b/i, (ref) => `week of ${this.formatDate(this.addDays(ref, -7))}`],
38
- [/\blast month\b/i, (ref) => this.formatMonth(this.addMonths(ref, -1))],
39
- [/\blast year\b/i, (ref) => `${ref.getFullYear() - 1}`],
40
- [/\bthis week\b/i, (ref) => `week of ${this.formatDate(ref)}`],
41
- [/\bthis month\b/i, (ref) => this.formatMonth(ref)],
42
- [/\bthis year\b/i, (ref) => `${ref.getFullYear()}`],
43
- ];
44
- /**
45
- * Normalize an observation for an entity.
46
- */
47
- normalize(observation, entity, options = {}) {
48
- const { resolveCoreferences = true, anchorTimestamps = true, extractKeywords = false, referenceDate = new Date(), } = options;
49
- let normalized = observation;
50
- const changes = [];
51
- if (resolveCoreferences) {
52
- const corefResult = this.resolveCoreferences(normalized, entity);
53
- if (corefResult.changed) {
54
- normalized = corefResult.text;
55
- changes.push(`Resolved pronouns to '${entity.name}'`);
56
- }
57
- }
58
- if (anchorTimestamps) {
59
- const timeResult = this.anchorTimestamps(normalized, referenceDate);
60
- if (timeResult.changed) {
61
- normalized = timeResult.text;
62
- changes.push(...timeResult.replacements);
63
- }
64
- }
65
- const keywords = extractKeywords
66
- ? this.extractKeywords(normalized)
67
- : undefined;
68
- return {
69
- original: observation,
70
- normalized,
71
- changes,
72
- keywords,
73
- };
74
- }
75
- /**
76
- * Resolve pronouns to entity name.
77
- */
78
- resolveCoreferences(text, entity) {
79
- let result = text;
80
- let changed = false;
81
- // Determine gender hint from entity type or name patterns
82
- const isMasculine = this.guessMasculine(entity);
83
- const isFeminine = this.guessFeminine(entity);
84
- // Replace pronouns based on detected gender
85
- if (isMasculine) {
86
- const newText = result
87
- .replace(this.pronounPatterns.masculine, entity.name);
88
- if (newText !== result) {
89
- result = newText;
90
- changed = true;
91
- }
92
- }
93
- else if (isFeminine) {
94
- const newText = result
95
- .replace(this.pronounPatterns.feminine, entity.name);
96
- if (newText !== result) {
97
- result = newText;
98
- changed = true;
99
- }
100
- }
101
- // Always try neutral pronouns for non-person entities
102
- if (entity.entityType.toLowerCase() !== 'person') {
103
- const newText = result
104
- .replace(this.pronounPatterns.neutral, entity.name);
105
- if (newText !== result) {
106
- result = newText;
107
- changed = true;
108
- }
109
- }
110
- return { text: result, changed };
111
- }
112
- guessMasculine(entity) {
113
- const masculineNames = ['john', 'james', 'bob', 'mike', 'david', 'alex'];
114
- return masculineNames.some(n => entity.name.toLowerCase().includes(n));
115
- }
116
- guessFeminine(entity) {
117
- const feminineNames = ['alice', 'jane', 'sarah', 'mary', 'emma', 'lisa'];
118
- return feminineNames.some(n => entity.name.toLowerCase().includes(n));
119
- }
120
- /**
121
- * Convert relative timestamps to absolute dates.
122
- */
123
- anchorTimestamps(text, referenceDate) {
124
- let result = text;
125
- const replacements = [];
126
- for (const [pattern, resolver] of this.relativeTimePatterns) {
127
- const match = result.match(pattern);
128
- if (match) {
129
- const replacement = resolver(referenceDate);
130
- result = result.replace(pattern, replacement);
131
- replacements.push(`'${match[0]}' -> '${replacement}'`);
132
- }
133
- }
134
- return {
135
- text: result,
136
- changed: replacements.length > 0,
137
- replacements,
138
- };
139
- }
140
- addDays(date, days) {
141
- const result = new Date(date);
142
- result.setDate(result.getDate() + days);
143
- return result;
144
- }
145
- addMonths(date, months) {
146
- const result = new Date(date);
147
- result.setMonth(result.getMonth() + months);
148
- return result;
149
- }
150
- formatDate(date) {
151
- return date.toISOString().split('T')[0];
152
- }
153
- formatMonth(date) {
154
- return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
155
- }
156
- /**
157
- * Extract important keywords from text.
158
- */
159
- extractKeywords(text) {
160
- const stopwords = new Set([
161
- 'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been',
162
- 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
163
- 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
164
- 'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
165
- 'from', 'as', 'into', 'through', 'during', 'before', 'after',
166
- 'above', 'below', 'between', 'under', 'again', 'further',
167
- 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how',
168
- 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such',
169
- 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too',
170
- 'very', 's', 't', 'just', 'don', 'now', 'and', 'but', 'or',
171
- ]);
172
- const words = text
173
- .toLowerCase()
174
- .replace(/[^a-z0-9\s]/g, '')
175
- .split(/\s+/)
176
- .filter(w => w.length > 2 && !stopwords.has(w));
177
- // Return unique keywords
178
- return [...new Set(words)];
179
- }
180
- /**
181
- * Normalize all observations for an entity.
182
- */
183
- normalizeEntity(entity, options = {}) {
184
- const results = entity.observations.map(obs => this.normalize(obs, entity, options));
185
- return {
186
- entity: {
187
- ...entity,
188
- observations: results.map(r => r.normalized),
189
- },
190
- results,
191
- };
192
- }
193
- }
package/dist/memory.jsonl DELETED
@@ -1 +0,0 @@
1
- {"type":"entity","name":"Test Project","entityType":"project","observations":["Phase 1-4 testing"],"createdAt":"2025-11-11T01:02:31.337Z","lastModified":"2025-11-11T01:02:33.329Z","tags":["testing","phase3"],"importance":8}
@@ -1,148 +0,0 @@
1
- /**
2
- * BM25 Search
3
- *
4
- * BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
5
- * Provides improved ranking over TF-IDF by incorporating document length normalization.
6
- *
7
- * Phase 12 Sprint 3: Search Algorithm Optimization
8
- *
9
- * @module search/BM25Search
10
- */
11
- import type { SearchResult } from '../types/index.js';
12
- import type { GraphStorage } from '../core/GraphStorage.js';
13
- /**
14
- * Common English stopwords to filter from queries and documents.
15
- * These words are too common to provide meaningful ranking signal.
16
- */
17
- export declare const STOPWORDS: Set<string>;
18
- /**
19
- * BM25 index entry for a single document.
20
- */
21
- export interface BM25DocumentEntry {
22
- /** Entity name */
23
- entityName: string;
24
- /** Term frequencies in this document */
25
- termFreqs: Map<string, number>;
26
- /** Total number of tokens in document */
27
- docLength: number;
28
- }
29
- /**
30
- * BM25 index structure.
31
- */
32
- export interface BM25Index {
33
- /** Document entries keyed by entity name */
34
- documents: Map<string, BM25DocumentEntry>;
35
- /** Document frequency for each term (number of docs containing term) */
36
- documentFrequency: Map<string, number>;
37
- /** Average document length */
38
- avgDocLength: number;
39
- /** Total number of documents */
40
- totalDocs: number;
41
- }
42
- /**
43
- * BM25 configuration parameters.
44
- */
45
- export interface BM25Config {
46
- /** Term frequency saturation parameter (default: 1.2) */
47
- k1: number;
48
- /** Length normalization parameter (default: 0.75) */
49
- b: number;
50
- }
51
- /**
52
- * Default BM25 parameters based on research recommendations.
53
- */
54
- export declare const DEFAULT_BM25_CONFIG: BM25Config;
55
- /**
56
- * BM25 Search implementation.
57
- *
58
- * BM25 improves over TF-IDF by:
59
- * 1. Saturating term frequency - prevents long documents from dominating
60
- * 2. Document length normalization - accounts for varying document sizes
61
- *
62
- * Formula:
63
- * score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
64
- *
65
- * Where:
66
- * - f(qi,D) is the term frequency of qi in document D
67
- * - |D| is the length of document D
68
- * - avgdl is the average document length
69
- * - k1 and b are free parameters
70
- *
71
- * @example
72
- * ```typescript
73
- * const bm25 = new BM25Search(storage);
74
- * await bm25.buildIndex();
75
- * const results = await bm25.search('machine learning');
76
- * ```
77
- */
78
- export declare class BM25Search {
79
- private storage;
80
- private index;
81
- private config;
82
- constructor(storage: GraphStorage, config?: Partial<BM25Config>);
83
- /**
84
- * Get the current configuration.
85
- */
86
- getConfig(): BM25Config;
87
- /**
88
- * Update configuration parameters.
89
- *
90
- * @param config - New configuration values
91
- */
92
- setConfig(config: Partial<BM25Config>): void;
93
- /**
94
- * Tokenize text into lowercase terms with stopword filtering.
95
- *
96
- * @param text - Text to tokenize
97
- * @param filterStopwords - Whether to filter stopwords (default: true)
98
- * @returns Array of lowercase tokens
99
- */
100
- tokenize(text: string, filterStopwords?: boolean): string[];
101
- /**
102
- * Build the BM25 index from the current graph.
103
- *
104
- * Should be called after significant graph changes.
105
- */
106
- buildIndex(): Promise<void>;
107
- /**
108
- * Search using the BM25 algorithm.
109
- *
110
- * @param query - Search query
111
- * @param limit - Maximum results to return
112
- * @returns Array of search results sorted by BM25 score
113
- */
114
- search(query: string, limit?: number): Promise<SearchResult[]>;
115
- /**
116
- * Update the index for changed entities.
117
- *
118
- * @param changedEntityNames - Names of entities that changed
119
- */
120
- update(changedEntityNames: Set<string>): Promise<void>;
121
- /**
122
- * Remove an entity from the index.
123
- *
124
- * @param entityName - Name of entity to remove
125
- */
126
- remove(entityName: string): boolean;
127
- /**
128
- * Clear the index.
129
- */
130
- clearIndex(): void;
131
- /**
132
- * Check if the index is built.
133
- */
134
- isIndexed(): boolean;
135
- /**
136
- * Get index statistics.
137
- */
138
- getIndexStats(): {
139
- documents: number;
140
- terms: number;
141
- avgDocLength: number;
142
- } | null;
143
- /**
144
- * Convert an entity to searchable text.
145
- */
146
- private entityToText;
147
- }
148
- //# sourceMappingURL=BM25Search.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"BM25Search.d.ts","sourceRoot":"","sources":["../../src/search/BM25Search.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAU,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAG5D;;;GAGG;AACH,eAAO,MAAM,SAAS,aAWpB,CAAC;AAEH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4CAA4C;IAC5C,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC1C,wEAAwE;IACxE,iBAAiB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,EAAE,EAAE,MAAM,CAAC;IACX,qDAAqD;IACrD,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,UAGjC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,UAAU;IAKnB,OAAO,CAAC,OAAO;IAJjB,OAAO,CAAC,KAAK,CAA0B;IACvC,OAAO,CAAC,MAAM,CAAa;gBAGjB,OAAO,EAAE,YAAY,EAC7B,MAAM,GAAE,OAAO,CAAC,UAAU,CAAM;IAKlC;;OAEG;IACH,SAAS,IAAI,UAAU;IAIvB;;;;OAIG;IACH,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI;IAI5C;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,eAAe,GAAE,OAAc,GAAG,MAAM,EAAE;IAajE;;;;OAIG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgDjC;;;;;;OAMG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,MAA8B,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA8E3F;;;;OAIG;IACG,MAAM,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAkE5D;;;;OAIG;IACH,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO;IAmCnC;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,aAAa,IAAI;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAWlF;;OAEG;IACH,OAAO,CAAC,YAAY;CAGrB"}
@@ -1,339 +0,0 @@
1
- /**
2
- * BM25 Search
3
- *
4
- * BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
5
- * Provides improved ranking over TF-IDF by incorporating document length normalization.
6
- *
7
- * Phase 12 Sprint 3: Search Algorithm Optimization
8
- *
9
- * @module search/BM25Search
10
- */
11
- import { SEARCH_LIMITS } from '../utils/constants.js';
12
- /**
13
- * Common English stopwords to filter from queries and documents.
14
- * These words are too common to provide meaningful ranking signal.
15
- */
16
- export const STOPWORDS = new Set([
17
- 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
18
- 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that',
19
- 'the', 'to', 'was', 'were', 'will', 'with', 'you', 'your',
20
- 'this', 'but', 'they', 'have', 'had', 'what', 'when', 'where',
21
- 'who', 'which', 'why', 'how', 'all', 'each', 'every', 'both',
22
- 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'not',
23
- 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can',
24
- 'just', 'should', 'now', 'also', 'being', 'been', 'would',
25
- 'could', 'into', 'over', 'after', 'before', 'between', 'under',
26
- 'again', 'then', 'once', 'here', 'there', 'any', 'about',
27
- ]);
28
- /**
29
- * Default BM25 parameters based on research recommendations.
30
- */
31
- export const DEFAULT_BM25_CONFIG = {
32
- k1: 1.2,
33
- b: 0.75,
34
- };
35
- /**
36
- * BM25 Search implementation.
37
- *
38
- * BM25 improves over TF-IDF by:
39
- * 1. Saturating term frequency - prevents long documents from dominating
40
- * 2. Document length normalization - accounts for varying document sizes
41
- *
42
- * Formula:
43
- * score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
44
- *
45
- * Where:
46
- * - f(qi,D) is the term frequency of qi in document D
47
- * - |D| is the length of document D
48
- * - avgdl is the average document length
49
- * - k1 and b are free parameters
50
- *
51
- * @example
52
- * ```typescript
53
- * const bm25 = new BM25Search(storage);
54
- * await bm25.buildIndex();
55
- * const results = await bm25.search('machine learning');
56
- * ```
57
- */
58
- export class BM25Search {
59
- storage;
60
- index = null;
61
- config;
62
- constructor(storage, config = {}) {
63
- this.storage = storage;
64
- this.config = { ...DEFAULT_BM25_CONFIG, ...config };
65
- }
66
- /**
67
- * Get the current configuration.
68
- */
69
- getConfig() {
70
- return { ...this.config };
71
- }
72
- /**
73
- * Update configuration parameters.
74
- *
75
- * @param config - New configuration values
76
- */
77
- setConfig(config) {
78
- this.config = { ...this.config, ...config };
79
- }
80
- /**
81
- * Tokenize text into lowercase terms with stopword filtering.
82
- *
83
- * @param text - Text to tokenize
84
- * @param filterStopwords - Whether to filter stopwords (default: true)
85
- * @returns Array of lowercase tokens
86
- */
87
- tokenize(text, filterStopwords = true) {
88
- const tokens = text
89
- .toLowerCase()
90
- .replace(/[^\w\s]/g, ' ')
91
- .split(/\s+/)
92
- .filter(token => token.length > 0);
93
- if (filterStopwords) {
94
- return tokens.filter(token => !STOPWORDS.has(token));
95
- }
96
- return tokens;
97
- }
98
- /**
99
- * Build the BM25 index from the current graph.
100
- *
101
- * Should be called after significant graph changes.
102
- */
103
- async buildIndex() {
104
- const graph = await this.storage.loadGraph();
105
- const documents = new Map();
106
- const documentFrequency = new Map();
107
- const termsSeen = new Set();
108
- let totalDocLength = 0;
109
- // First pass: tokenize all documents and count term frequencies
110
- for (const entity of graph.entities) {
111
- const text = this.entityToText(entity);
112
- const tokens = this.tokenize(text);
113
- const termFreqs = new Map();
114
- // Count term frequencies for this document
115
- for (const token of tokens) {
116
- termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
117
- }
118
- // Track which terms appear in this document (for IDF calculation)
119
- termsSeen.clear();
120
- for (const token of tokens) {
121
- if (!termsSeen.has(token)) {
122
- termsSeen.add(token);
123
- documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
124
- }
125
- }
126
- const entry = {
127
- entityName: entity.name,
128
- termFreqs,
129
- docLength: tokens.length,
130
- };
131
- documents.set(entity.name, entry);
132
- totalDocLength += tokens.length;
133
- }
134
- const totalDocs = documents.size;
135
- const avgDocLength = totalDocs > 0 ? totalDocLength / totalDocs : 0;
136
- this.index = {
137
- documents,
138
- documentFrequency,
139
- avgDocLength,
140
- totalDocs,
141
- };
142
- }
143
- /**
144
- * Search using the BM25 algorithm.
145
- *
146
- * @param query - Search query
147
- * @param limit - Maximum results to return
148
- * @returns Array of search results sorted by BM25 score
149
- */
150
- async search(query, limit = SEARCH_LIMITS.DEFAULT) {
151
- const effectiveLimit = Math.min(limit, SEARCH_LIMITS.MAX);
152
- // Ensure index is built
153
- if (!this.index) {
154
- await this.buildIndex();
155
- }
156
- if (!this.index || this.index.documents.size === 0) {
157
- return [];
158
- }
159
- const graph = await this.storage.loadGraph();
160
- const entityMap = new Map(graph.entities.map(e => [e.name, e]));
161
- // Tokenize query
162
- const queryTerms = this.tokenize(query);
163
- if (queryTerms.length === 0) {
164
- return [];
165
- }
166
- const { k1, b } = this.config;
167
- const { documents, documentFrequency, avgDocLength, totalDocs } = this.index;
168
- const results = [];
169
- // Calculate BM25 score for each document
170
- for (const [entityName, docEntry] of documents) {
171
- const entity = entityMap.get(entityName);
172
- if (!entity)
173
- continue;
174
- let score = 0;
175
- const matchedFields = {};
176
- for (const term of queryTerms) {
177
- const tf = docEntry.termFreqs.get(term) || 0;
178
- if (tf === 0)
179
- continue;
180
- // Calculate IDF
181
- const df = documentFrequency.get(term) || 0;
182
- const idf = df > 0 ? Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1) : 0;
183
- // Calculate BM25 score component
184
- const numerator = tf * (k1 + 1);
185
- const denominator = tf + k1 * (1 - b + b * (docEntry.docLength / avgDocLength));
186
- const termScore = idf * (numerator / denominator);
187
- score += termScore;
188
- // Track which fields matched
189
- if (entity.name.toLowerCase().includes(term)) {
190
- matchedFields.name = true;
191
- }
192
- if (entity.entityType.toLowerCase().includes(term)) {
193
- matchedFields.entityType = true;
194
- }
195
- const matchedObs = entity.observations.filter(o => o.toLowerCase().includes(term));
196
- if (matchedObs.length > 0) {
197
- matchedFields.observations = matchedObs;
198
- }
199
- }
200
- if (score > 0) {
201
- results.push({
202
- entity,
203
- score,
204
- matchedFields,
205
- });
206
- }
207
- }
208
- // Sort by score descending and limit
209
- return results
210
- .sort((a, b) => b.score - a.score)
211
- .slice(0, effectiveLimit);
212
- }
213
- /**
214
- * Update the index for changed entities.
215
- *
216
- * @param changedEntityNames - Names of entities that changed
217
- */
218
- async update(changedEntityNames) {
219
- if (!this.index) {
220
- await this.buildIndex();
221
- return;
222
- }
223
- const graph = await this.storage.loadGraph();
224
- const entityMap = new Map(graph.entities.map(e => [e.name, e]));
225
- // Process each changed entity
226
- for (const entityName of changedEntityNames) {
227
- const entity = entityMap.get(entityName);
228
- const existingEntry = this.index.documents.get(entityName);
229
- if (existingEntry) {
230
- // Remove old term frequencies from document frequency counts
231
- for (const [term] of existingEntry.termFreqs) {
232
- const df = this.index.documentFrequency.get(term) || 0;
233
- if (df <= 1) {
234
- this.index.documentFrequency.delete(term);
235
- }
236
- else {
237
- this.index.documentFrequency.set(term, df - 1);
238
- }
239
- }
240
- this.index.documents.delete(entityName);
241
- }
242
- if (entity) {
243
- // Add new entry
244
- const text = this.entityToText(entity);
245
- const tokens = this.tokenize(text);
246
- const termFreqs = new Map();
247
- const termsSeen = new Set();
248
- for (const token of tokens) {
249
- termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
250
- if (!termsSeen.has(token)) {
251
- termsSeen.add(token);
252
- this.index.documentFrequency.set(token, (this.index.documentFrequency.get(token) || 0) + 1);
253
- }
254
- }
255
- const entry = {
256
- entityName: entity.name,
257
- termFreqs,
258
- docLength: tokens.length,
259
- };
260
- this.index.documents.set(entityName, entry);
261
- }
262
- }
263
- // Recalculate average document length
264
- this.index.totalDocs = this.index.documents.size;
265
- let totalLength = 0;
266
- for (const doc of this.index.documents.values()) {
267
- totalLength += doc.docLength;
268
- }
269
- this.index.avgDocLength = this.index.totalDocs > 0
270
- ? totalLength / this.index.totalDocs
271
- : 0;
272
- }
273
- /**
274
- * Remove an entity from the index.
275
- *
276
- * @param entityName - Name of entity to remove
277
- */
278
- remove(entityName) {
279
- if (!this.index) {
280
- return false;
281
- }
282
- const entry = this.index.documents.get(entityName);
283
- if (!entry) {
284
- return false;
285
- }
286
- // Update document frequency counts
287
- for (const [term] of entry.termFreqs) {
288
- const df = this.index.documentFrequency.get(term) || 0;
289
- if (df <= 1) {
290
- this.index.documentFrequency.delete(term);
291
- }
292
- else {
293
- this.index.documentFrequency.set(term, df - 1);
294
- }
295
- }
296
- this.index.documents.delete(entityName);
297
- // Update totals
298
- this.index.totalDocs = this.index.documents.size;
299
- let totalLength = 0;
300
- for (const doc of this.index.documents.values()) {
301
- totalLength += doc.docLength;
302
- }
303
- this.index.avgDocLength = this.index.totalDocs > 0
304
- ? totalLength / this.index.totalDocs
305
- : 0;
306
- return true;
307
- }
308
- /**
309
- * Clear the index.
310
- */
311
- clearIndex() {
312
- this.index = null;
313
- }
314
- /**
315
- * Check if the index is built.
316
- */
317
- isIndexed() {
318
- return this.index !== null;
319
- }
320
- /**
321
- * Get index statistics.
322
- */
323
- getIndexStats() {
324
- if (!this.index) {
325
- return null;
326
- }
327
- return {
328
- documents: this.index.documents.size,
329
- terms: this.index.documentFrequency.size,
330
- avgDocLength: this.index.avgDocLength,
331
- };
332
- }
333
- /**
334
- * Convert an entity to searchable text.
335
- */
336
- entityToText(entity) {
337
- return [entity.name, entity.entityType, ...entity.observations].join(' ');
338
- }
339
- }