@danielsimonjr/memory-mcp 0.47.1 → 9.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +2000 -194
  3. package/dist/__tests__/file-path.test.js +5 -5
  4. package/dist/__tests__/knowledge-graph.test.js +3 -8
  5. package/dist/core/EntityManager.d.ts +266 -0
  6. package/dist/core/EntityManager.d.ts.map +1 -0
  7. package/dist/core/EntityManager.js +85 -133
  8. package/dist/core/GraphEventEmitter.d.ts +202 -0
  9. package/dist/core/GraphEventEmitter.d.ts.map +1 -0
  10. package/dist/core/GraphEventEmitter.js +346 -0
  11. package/dist/core/GraphStorage.d.ts +395 -0
  12. package/dist/core/GraphStorage.d.ts.map +1 -0
  13. package/dist/core/GraphStorage.js +643 -31
  14. package/dist/core/GraphTraversal.d.ts +141 -0
  15. package/dist/core/GraphTraversal.d.ts.map +1 -0
  16. package/dist/core/GraphTraversal.js +573 -0
  17. package/dist/core/HierarchyManager.d.ts +111 -0
  18. package/dist/core/HierarchyManager.d.ts.map +1 -0
  19. package/dist/{features → core}/HierarchyManager.js +14 -9
  20. package/dist/core/ManagerContext.d.ts +72 -0
  21. package/dist/core/ManagerContext.d.ts.map +1 -0
  22. package/dist/core/ManagerContext.js +118 -0
  23. package/dist/core/ObservationManager.d.ts +85 -0
  24. package/dist/core/ObservationManager.d.ts.map +1 -0
  25. package/dist/core/ObservationManager.js +51 -57
  26. package/dist/core/RelationManager.d.ts +131 -0
  27. package/dist/core/RelationManager.d.ts.map +1 -0
  28. package/dist/core/RelationManager.js +31 -7
  29. package/dist/core/SQLiteStorage.d.ts +354 -0
  30. package/dist/core/SQLiteStorage.d.ts.map +1 -0
  31. package/dist/core/SQLiteStorage.js +917 -0
  32. package/dist/core/StorageFactory.d.ts +45 -0
  33. package/dist/core/StorageFactory.d.ts.map +1 -0
  34. package/dist/core/StorageFactory.js +64 -0
  35. package/dist/core/TransactionManager.d.ts +464 -0
  36. package/dist/core/TransactionManager.d.ts.map +1 -0
  37. package/dist/core/TransactionManager.js +490 -13
  38. package/dist/core/index.d.ts +17 -0
  39. package/dist/core/index.d.ts.map +1 -0
  40. package/dist/core/index.js +12 -2
  41. package/dist/features/AnalyticsManager.d.ts +44 -0
  42. package/dist/features/AnalyticsManager.d.ts.map +1 -0
  43. package/dist/features/AnalyticsManager.js +14 -13
  44. package/dist/features/ArchiveManager.d.ts +133 -0
  45. package/dist/features/ArchiveManager.d.ts.map +1 -0
  46. package/dist/features/ArchiveManager.js +221 -14
  47. package/dist/features/CompressionManager.d.ts +117 -0
  48. package/dist/features/CompressionManager.d.ts.map +1 -0
  49. package/dist/features/CompressionManager.js +189 -20
  50. package/dist/features/IOManager.d.ts +225 -0
  51. package/dist/features/IOManager.d.ts.map +1 -0
  52. package/dist/features/IOManager.js +1041 -0
  53. package/dist/features/StreamingExporter.d.ts +123 -0
  54. package/dist/features/StreamingExporter.d.ts.map +1 -0
  55. package/dist/features/StreamingExporter.js +203 -0
  56. package/dist/features/TagManager.d.ts +147 -0
  57. package/dist/features/TagManager.d.ts.map +1 -0
  58. package/dist/features/index.d.ts +12 -0
  59. package/dist/features/index.d.ts.map +1 -0
  60. package/dist/features/index.js +5 -6
  61. package/dist/index.d.ts +9 -0
  62. package/dist/index.d.ts.map +1 -0
  63. package/dist/index.js +12 -45
  64. package/dist/memory.jsonl +1 -18
  65. package/dist/search/BasicSearch.d.ts +51 -0
  66. package/dist/search/BasicSearch.d.ts.map +1 -0
  67. package/dist/search/BasicSearch.js +9 -3
  68. package/dist/search/BooleanSearch.d.ts +98 -0
  69. package/dist/search/BooleanSearch.d.ts.map +1 -0
  70. package/dist/search/BooleanSearch.js +156 -9
  71. package/dist/search/EmbeddingService.d.ts +178 -0
  72. package/dist/search/EmbeddingService.d.ts.map +1 -0
  73. package/dist/search/EmbeddingService.js +358 -0
  74. package/dist/search/FuzzySearch.d.ts +118 -0
  75. package/dist/search/FuzzySearch.d.ts.map +1 -0
  76. package/dist/search/FuzzySearch.js +241 -25
  77. package/dist/search/QueryCostEstimator.d.ts +111 -0
  78. package/dist/search/QueryCostEstimator.d.ts.map +1 -0
  79. package/dist/search/QueryCostEstimator.js +355 -0
  80. package/dist/search/RankedSearch.d.ts +71 -0
  81. package/dist/search/RankedSearch.d.ts.map +1 -0
  82. package/dist/search/RankedSearch.js +54 -6
  83. package/dist/search/SavedSearchManager.d.ts +79 -0
  84. package/dist/search/SavedSearchManager.d.ts.map +1 -0
  85. package/dist/search/SearchFilterChain.d.ts +120 -0
  86. package/dist/search/SearchFilterChain.d.ts.map +1 -0
  87. package/dist/search/SearchFilterChain.js +2 -4
  88. package/dist/search/SearchManager.d.ts +326 -0
  89. package/dist/search/SearchManager.d.ts.map +1 -0
  90. package/dist/search/SearchManager.js +148 -0
  91. package/dist/search/SearchSuggestions.d.ts +27 -0
  92. package/dist/search/SearchSuggestions.d.ts.map +1 -0
  93. package/dist/search/SearchSuggestions.js +1 -1
  94. package/dist/search/SemanticSearch.d.ts +149 -0
  95. package/dist/search/SemanticSearch.d.ts.map +1 -0
  96. package/dist/search/SemanticSearch.js +323 -0
  97. package/dist/search/TFIDFEventSync.d.ts +85 -0
  98. package/dist/search/TFIDFEventSync.d.ts.map +1 -0
  99. package/dist/search/TFIDFEventSync.js +133 -0
  100. package/dist/search/TFIDFIndexManager.d.ts +151 -0
  101. package/dist/search/TFIDFIndexManager.d.ts.map +1 -0
  102. package/dist/search/TFIDFIndexManager.js +232 -17
  103. package/dist/search/VectorStore.d.ts +235 -0
  104. package/dist/search/VectorStore.d.ts.map +1 -0
  105. package/dist/search/VectorStore.js +311 -0
  106. package/dist/search/index.d.ts +21 -0
  107. package/dist/search/index.d.ts.map +1 -0
  108. package/dist/search/index.js +12 -0
  109. package/dist/server/MCPServer.d.ts +21 -0
  110. package/dist/server/MCPServer.d.ts.map +1 -0
  111. package/dist/server/MCPServer.js +4 -4
  112. package/dist/server/responseCompressor.d.ts +94 -0
  113. package/dist/server/responseCompressor.d.ts.map +1 -0
  114. package/dist/server/responseCompressor.js +127 -0
  115. package/dist/server/toolDefinitions.d.ts +27 -0
  116. package/dist/server/toolDefinitions.d.ts.map +1 -0
  117. package/dist/server/toolDefinitions.js +189 -18
  118. package/dist/server/toolHandlers.d.ts +41 -0
  119. package/dist/server/toolHandlers.d.ts.map +1 -0
  120. package/dist/server/toolHandlers.js +467 -75
  121. package/dist/types/index.d.ts +13 -0
  122. package/dist/types/index.d.ts.map +1 -0
  123. package/dist/types/index.js +1 -1
  124. package/dist/types/types.d.ts +1654 -0
  125. package/dist/types/types.d.ts.map +1 -0
  126. package/dist/types/types.js +9 -0
  127. package/dist/utils/compressedCache.d.ts +192 -0
  128. package/dist/utils/compressedCache.d.ts.map +1 -0
  129. package/dist/utils/compressedCache.js +309 -0
  130. package/dist/utils/compressionUtil.d.ts +214 -0
  131. package/dist/utils/compressionUtil.d.ts.map +1 -0
  132. package/dist/utils/compressionUtil.js +247 -0
  133. package/dist/utils/constants.d.ts +245 -0
  134. package/dist/utils/constants.d.ts.map +1 -0
  135. package/dist/utils/constants.js +124 -0
  136. package/dist/utils/entityUtils.d.ts +321 -0
  137. package/dist/utils/entityUtils.d.ts.map +1 -0
  138. package/dist/utils/entityUtils.js +434 -4
  139. package/dist/utils/errors.d.ts +95 -0
  140. package/dist/utils/errors.d.ts.map +1 -0
  141. package/dist/utils/errors.js +24 -0
  142. package/dist/utils/formatters.d.ts +145 -0
  143. package/dist/utils/formatters.d.ts.map +1 -0
  144. package/dist/utils/{paginationUtils.js → formatters.js} +54 -3
  145. package/dist/utils/index.d.ts +23 -0
  146. package/dist/utils/index.d.ts.map +1 -0
  147. package/dist/utils/index.js +69 -31
  148. package/dist/utils/indexes.d.ts +270 -0
  149. package/dist/utils/indexes.d.ts.map +1 -0
  150. package/dist/utils/indexes.js +526 -0
  151. package/dist/utils/logger.d.ts +24 -0
  152. package/dist/utils/logger.d.ts.map +1 -0
  153. package/dist/utils/operationUtils.d.ts +124 -0
  154. package/dist/utils/operationUtils.d.ts.map +1 -0
  155. package/dist/utils/operationUtils.js +175 -0
  156. package/dist/utils/parallelUtils.d.ts +72 -0
  157. package/dist/utils/parallelUtils.d.ts.map +1 -0
  158. package/dist/utils/parallelUtils.js +169 -0
  159. package/dist/utils/schemas.d.ts +374 -0
  160. package/dist/utils/schemas.d.ts.map +1 -0
  161. package/dist/utils/schemas.js +302 -2
  162. package/dist/utils/searchAlgorithms.d.ts +99 -0
  163. package/dist/utils/searchAlgorithms.d.ts.map +1 -0
  164. package/dist/utils/searchAlgorithms.js +167 -0
  165. package/dist/utils/searchCache.d.ts +108 -0
  166. package/dist/utils/searchCache.d.ts.map +1 -0
  167. package/dist/utils/taskScheduler.d.ts +290 -0
  168. package/dist/utils/taskScheduler.d.ts.map +1 -0
  169. package/dist/utils/taskScheduler.js +466 -0
  170. package/dist/workers/index.d.ts +12 -0
  171. package/dist/workers/index.d.ts.map +1 -0
  172. package/dist/workers/index.js +9 -0
  173. package/dist/workers/levenshteinWorker.d.ts +60 -0
  174. package/dist/workers/levenshteinWorker.d.ts.map +1 -0
  175. package/dist/workers/levenshteinWorker.js +98 -0
  176. package/package.json +17 -4
  177. package/dist/__tests__/edge-cases/edge-cases.test.js +0 -406
  178. package/dist/__tests__/integration/workflows.test.js +0 -449
  179. package/dist/__tests__/performance/benchmarks.test.js +0 -413
  180. package/dist/__tests__/unit/core/EntityManager.test.js +0 -334
  181. package/dist/__tests__/unit/core/GraphStorage.test.js +0 -205
  182. package/dist/__tests__/unit/core/RelationManager.test.js +0 -274
  183. package/dist/__tests__/unit/features/CompressionManager.test.js +0 -350
  184. package/dist/__tests__/unit/search/BasicSearch.test.js +0 -311
  185. package/dist/__tests__/unit/search/BooleanSearch.test.js +0 -432
  186. package/dist/__tests__/unit/search/FuzzySearch.test.js +0 -448
  187. package/dist/__tests__/unit/search/RankedSearch.test.js +0 -379
  188. package/dist/__tests__/unit/utils/levenshtein.test.js +0 -77
  189. package/dist/core/KnowledgeGraphManager.js +0 -423
  190. package/dist/features/BackupManager.js +0 -311
  191. package/dist/features/ExportManager.js +0 -305
  192. package/dist/features/ImportExportManager.js +0 -50
  193. package/dist/features/ImportManager.js +0 -328
  194. package/dist/types/analytics.types.js +0 -6
  195. package/dist/types/entity.types.js +0 -7
  196. package/dist/types/import-export.types.js +0 -7
  197. package/dist/types/search.types.js +0 -7
  198. package/dist/types/tag.types.js +0 -6
  199. package/dist/utils/dateUtils.js +0 -89
  200. package/dist/utils/filterUtils.js +0 -155
  201. package/dist/utils/levenshtein.js +0 -62
  202. package/dist/utils/pathUtils.js +0 -115
  203. package/dist/utils/responseFormatter.js +0 -55
  204. package/dist/utils/tagUtils.js +0 -107
  205. package/dist/utils/tfidf.js +0 -90
  206. package/dist/utils/validationHelper.js +0 -99
  207. package/dist/utils/validationUtils.js +0 -109
@@ -0,0 +1,133 @@
1
+ /**
2
+ * TF-IDF Event Sync
3
+ *
4
+ * Phase 10 Sprint 3: Hooks TFIDFIndexManager to graph events for automatic
5
+ * incremental index updates when entities change.
6
+ *
7
+ * @module search/TFIDFEventSync
8
+ */
9
+ /**
10
+ * Phase 10 Sprint 3: Synchronizes TF-IDF index with graph changes via events.
11
+ *
12
+ * Listens to graph events and triggers incremental index updates automatically.
13
+ * More efficient than rebuilding the entire index on every change.
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * const storage = new GraphStorage('/data/memory.jsonl');
18
+ * const indexManager = new TFIDFIndexManager('/data');
19
+ *
20
+ * // Load or build index
21
+ * await indexManager.loadIndex();
22
+ *
23
+ * // Enable automatic sync
24
+ * const sync = new TFIDFEventSync(indexManager, storage.events, storage);
25
+ * sync.enable();
26
+ *
27
+ * // Now entities added to storage will automatically update the index
28
+ * await storage.appendEntity({ name: 'New', entityType: 'test', observations: [] });
29
+ *
30
+ * // Disable when done
31
+ * sync.disable();
32
+ * ```
33
+ */
34
+ export class TFIDFEventSync {
35
+ indexManager;
36
+ eventEmitter;
37
+ storage;
38
+ unsubscribers = [];
39
+ enabled = false;
40
+ /**
41
+ * Create a new TFIDFEventSync instance.
42
+ *
43
+ * @param indexManager - TFIDFIndexManager to sync
44
+ * @param eventEmitter - GraphEventEmitter to listen to
45
+ * @param storage - Storage to fetch entity data from (for updates)
46
+ */
47
+ constructor(indexManager, eventEmitter, storage) {
48
+ this.indexManager = indexManager;
49
+ this.eventEmitter = eventEmitter;
50
+ this.storage = storage;
51
+ }
52
+ /**
53
+ * Enable automatic index synchronization.
54
+ *
55
+ * Subscribes to entity:created, entity:updated, and entity:deleted events.
56
+ */
57
+ enable() {
58
+ if (this.enabled) {
59
+ return;
60
+ }
61
+ // Subscribe to entity events
62
+ this.unsubscribers.push(this.eventEmitter.on('entity:created', this.handleEntityCreated.bind(this)));
63
+ this.unsubscribers.push(this.eventEmitter.on('entity:updated', this.handleEntityUpdated.bind(this)));
64
+ this.unsubscribers.push(this.eventEmitter.on('entity:deleted', this.handleEntityDeleted.bind(this)));
65
+ this.enabled = true;
66
+ }
67
+ /**
68
+ * Disable automatic index synchronization.
69
+ *
70
+ * Unsubscribes from all events.
71
+ */
72
+ disable() {
73
+ if (!this.enabled) {
74
+ return;
75
+ }
76
+ for (const unsubscribe of this.unsubscribers) {
77
+ unsubscribe();
78
+ }
79
+ this.unsubscribers = [];
80
+ this.enabled = false;
81
+ }
82
+ /**
83
+ * Check if synchronization is enabled.
84
+ *
85
+ * @returns True if enabled
86
+ */
87
+ isEnabled() {
88
+ return this.enabled;
89
+ }
90
+ /**
91
+ * Handle entity:created event.
92
+ * @private
93
+ */
94
+ handleEntityCreated(event) {
95
+ if (!this.indexManager.isInitialized()) {
96
+ return;
97
+ }
98
+ this.indexManager.addDocument({
99
+ name: event.entity.name,
100
+ entityType: event.entity.entityType,
101
+ observations: event.entity.observations,
102
+ });
103
+ }
104
+ /**
105
+ * Handle entity:updated event.
106
+ * @private
107
+ */
108
+ async handleEntityUpdated(event) {
109
+ if (!this.indexManager.isInitialized()) {
110
+ return;
111
+ }
112
+ // Fetch the current entity state
113
+ const graph = await this.storage.loadGraph();
114
+ const entity = graph.entities.find(e => e.name === event.entityName);
115
+ if (entity) {
116
+ this.indexManager.updateDocument({
117
+ name: entity.name,
118
+ entityType: entity.entityType,
119
+ observations: entity.observations,
120
+ });
121
+ }
122
+ }
123
+ /**
124
+ * Handle entity:deleted event.
125
+ * @private
126
+ */
127
+ handleEntityDeleted(event) {
128
+ if (!this.indexManager.isInitialized()) {
129
+ return;
130
+ }
131
+ this.indexManager.removeDocument(event.entityName);
132
+ }
133
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * TF-IDF Index Manager
3
+ *
4
+ * Manages pre-calculated TF-IDF indexes for fast ranked search.
5
+ * Handles index building, incremental updates, and persistence.
6
+ *
7
+ * @module search/TFIDFIndexManager
8
+ */
9
+ import type { TFIDFIndex, KnowledgeGraph, ReadonlyKnowledgeGraph } from '../types/index.js';
10
+ /**
11
+ * Manages TF-IDF index lifecycle: building, updating, and persistence.
12
+ */
13
+ export declare class TFIDFIndexManager {
14
+ private indexPath;
15
+ private index;
16
+ constructor(storageDir: string);
17
+ /**
18
+ * Build a complete TF-IDF index from a knowledge graph.
19
+ *
20
+ * @param graph - Knowledge graph to index
21
+ * @returns Newly built TF-IDF index
22
+ */
23
+ buildIndex(graph: ReadonlyKnowledgeGraph): Promise<TFIDFIndex>;
24
+ /**
25
+ * Update the index incrementally when entities change.
26
+ *
27
+ * More efficient than rebuilding the entire index.
28
+ *
29
+ * @param graph - Updated knowledge graph
30
+ * @param changedEntityNames - Names of entities that changed
31
+ */
32
+ updateIndex(graph: ReadonlyKnowledgeGraph, changedEntityNames: Set<string>): Promise<TFIDFIndex>;
33
+ /**
34
+ * Load index from disk.
35
+ *
36
+ * @returns Loaded index or null if not found
37
+ */
38
+ loadIndex(): Promise<TFIDFIndex | null>;
39
+ /**
40
+ * Save index to disk.
41
+ *
42
+ * @param index - Index to save (uses cached index if not provided)
43
+ */
44
+ saveIndex(index?: TFIDFIndex): Promise<void>;
45
+ /**
46
+ * Get the current cached index.
47
+ *
48
+ * @returns Cached index or null if not loaded
49
+ */
50
+ getIndex(): TFIDFIndex | null;
51
+ /**
52
+ * Clear the cached index and delete from disk.
53
+ */
54
+ clearIndex(): Promise<void>;
55
+ /**
56
+ * Check if the index needs rebuilding based on graph state.
57
+ *
58
+ * @param graph - Current knowledge graph
59
+ * @returns True if index should be rebuilt
60
+ */
61
+ needsRebuild(graph: KnowledgeGraph): boolean;
62
+ /**
63
+ * Phase 10 Sprint 3: Add a single document to the index incrementally.
64
+ *
65
+ * More efficient than rebuilding the entire index for single entity additions.
66
+ * Updates TF for the new document and recalculates IDF for affected terms.
67
+ *
68
+ * @param entity - The entity to add
69
+ *
70
+ * @example
71
+ * ```typescript
72
+ * const indexManager = new TFIDFIndexManager('/data');
73
+ * await indexManager.loadIndex();
74
+ *
75
+ * // Add new entity
76
+ * indexManager.addDocument({
77
+ * name: 'NewEntity',
78
+ * entityType: 'person',
79
+ * observations: ['Software engineer']
80
+ * });
81
+ * ```
82
+ */
83
+ addDocument(entity: {
84
+ name: string;
85
+ entityType: string;
86
+ observations: string[];
87
+ }): void;
88
+ /**
89
+ * Phase 10 Sprint 3: Remove a single document from the index incrementally.
90
+ *
91
+ * More efficient than rebuilding the entire index for single entity deletions.
92
+ * Recalculates IDF for terms that were in the removed document.
93
+ *
94
+ * @param entityName - Name of the entity to remove
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * indexManager.removeDocument('DeletedEntity');
99
+ * ```
100
+ */
101
+ removeDocument(entityName: string): void;
102
+ /**
103
+ * Phase 10 Sprint 3: Update a single document in the index incrementally.
104
+ *
105
+ * More efficient than rebuilding the entire index for single entity updates.
106
+ * Handles both term changes and observation updates.
107
+ *
108
+ * @param entity - The updated entity
109
+ *
110
+ * @example
111
+ * ```typescript
112
+ * indexManager.updateDocument({
113
+ * name: 'ExistingEntity',
114
+ * entityType: 'person',
115
+ * observations: ['Updated observations']
116
+ * });
117
+ * ```
118
+ */
119
+ updateDocument(entity: {
120
+ name: string;
121
+ entityType: string;
122
+ observations: string[];
123
+ }): void;
124
+ /**
125
+ * Phase 10 Sprint 3: Recalculate IDF scores for a set of terms.
126
+ *
127
+ * @param terms - Set of terms to recalculate IDF for
128
+ * @private
129
+ */
130
+ private recalculateIDFForTerms;
131
+ /**
132
+ * Phase 10 Sprint 3: Recalculate IDF scores for ALL terms in the index.
133
+ *
134
+ * Called when the total document count changes (add/remove document).
135
+ * @private
136
+ */
137
+ private recalculateAllIDF;
138
+ /**
139
+ * Phase 10 Sprint 3: Check if the index is loaded/initialized.
140
+ *
141
+ * @returns True if index is available
142
+ */
143
+ isInitialized(): boolean;
144
+ /**
145
+ * Phase 10 Sprint 3: Get the number of documents in the index.
146
+ *
147
+ * @returns Document count or 0 if not initialized
148
+ */
149
+ getDocumentCount(): number;
150
+ }
151
+ //# sourceMappingURL=TFIDFIndexManager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TFIDFIndexManager.d.ts","sourceRoot":"","sources":["../../src/search/TFIDFIndexManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAE,UAAU,EAAkB,cAAc,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAgB5G;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,KAAK,CAA2B;gBAE5B,UAAU,EAAE,MAAM;IAI9B;;;;;OAKG;IACG,UAAU,CAAC,KAAK,EAAE,sBAAsB,GAAG,OAAO,CAAC,UAAU,CAAC;IAgDpE;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;IAgEtG;;;;OAIG;IACG,SAAS,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAmB7C;;;;OAIG;IACG,SAAS,CAAC,KAAK,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBlD;;;;OAIG;IACH,QAAQ,IAAI,UAAU,GAAG,IAAI;IAI7B;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IASjC;;;;;OAKG;IACH,YAAY,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO;IAsB5C;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,WAAW,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,EAAE,CAAA;KAAE,GAAG,IAAI;IA+BvF;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAqBxC;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,EAAE,CAAA;KAAE,GAAG,IAAI;IAgD1F;;;;;OAKG;IACH,OAAO,CAAC,sBAAsB;IAkC9B;;;;;OAKG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;;;OAIG;IACH,aAAa,IAAI,OAAO;IAIxB;;;;OAIG;IACH,gBAAgB,IAAI,MAAM;CAG3B"}
@@ -8,7 +8,7 @@
8
8
  */
9
9
  import * as fs from 'fs/promises';
10
10
  import * as path from 'path';
11
- import { calculateIDF, tokenize } from '../utils/tfidf.js';
11
+ import { calculateIDFFromTokenSets, tokenize } from '../utils/index.js';
12
12
  const INDEX_VERSION = '1.0';
13
13
  const INDEX_FILENAME = 'tfidf-index.json';
14
14
  /**
@@ -28,18 +28,17 @@ export class TFIDFIndexManager {
28
28
  */
29
29
  async buildIndex(graph) {
30
30
  const documents = new Map();
31
- const allDocumentTexts = [];
32
- const allTokens = [];
33
- // Build document vectors
31
+ const allTokenSets = [];
32
+ // Build document vectors - tokenize once per document
34
33
  for (const entity of graph.entities) {
35
34
  const documentText = [
36
35
  entity.name,
37
36
  entity.entityType,
38
37
  ...entity.observations,
39
38
  ].join(' ');
40
- allDocumentTexts.push(documentText);
41
39
  const tokens = tokenize(documentText);
42
- allTokens.push(tokens);
40
+ const tokenSet = new Set(tokens);
41
+ allTokenSets.push(tokenSet);
43
42
  // Calculate term frequencies
44
43
  const termFreq = {};
45
44
  for (const term of tokens) {
@@ -51,11 +50,11 @@ export class TFIDFIndexManager {
51
50
  documentText,
52
51
  });
53
52
  }
54
- // Calculate IDF for all terms
53
+ // Calculate IDF for all terms using pre-tokenized sets (O(1) lookup per document)
55
54
  const idf = new Map();
56
- const allTerms = new Set(allTokens.flat());
55
+ const allTerms = new Set(allTokenSets.flatMap(s => Array.from(s)));
57
56
  for (const term of allTerms) {
58
- const idfScore = calculateIDF(term, allDocumentTexts);
57
+ const idfScore = calculateIDFFromTokenSets(term, allTokenSets);
59
58
  idf.set(term, idfScore);
60
59
  }
61
60
  this.index = {
@@ -80,8 +79,7 @@ export class TFIDFIndexManager {
80
79
  return this.buildIndex(graph);
81
80
  }
82
81
  // Rebuild document vectors for changed entities
83
- const allDocumentTexts = [];
84
- const allTokens = [];
82
+ const allTokenSets = [];
85
83
  const updatedDocuments = new Map(this.index.documents);
86
84
  // Remove deleted entities
87
85
  for (const entityName of changedEntityNames) {
@@ -90,16 +88,16 @@ export class TFIDFIndexManager {
90
88
  updatedDocuments.delete(entityName);
91
89
  }
92
90
  }
93
- // Update/add changed entities
91
+ // Update/add changed entities - tokenize once per document
94
92
  for (const entity of graph.entities) {
95
93
  const documentText = [
96
94
  entity.name,
97
95
  entity.entityType,
98
96
  ...entity.observations,
99
97
  ].join(' ');
100
- allDocumentTexts.push(documentText);
101
98
  const tokens = tokenize(documentText);
102
- allTokens.push(tokens);
99
+ const tokenSet = new Set(tokens);
100
+ allTokenSets.push(tokenSet);
103
101
  if (changedEntityNames.has(entity.name)) {
104
102
  // Calculate term frequencies for changed entity
105
103
  const termFreq = {};
@@ -113,11 +111,11 @@ export class TFIDFIndexManager {
113
111
  });
114
112
  }
115
113
  }
116
- // Recalculate IDF (need all documents for accurate IDF)
114
+ // Recalculate IDF using pre-tokenized sets (O(1) lookup per document)
117
115
  const idf = new Map();
118
- const allTerms = new Set(allTokens.flat());
116
+ const allTerms = new Set(allTokenSets.flatMap(s => Array.from(s)));
119
117
  for (const term of allTerms) {
120
- const idfScore = calculateIDF(term, allDocumentTexts);
118
+ const idfScore = calculateIDFFromTokenSets(term, allTokenSets);
121
119
  idf.set(term, idfScore);
122
120
  }
123
121
  this.index = {
@@ -214,4 +212,221 @@ export class TFIDFIndexManager {
214
212
  }
215
213
  return false;
216
214
  }
215
+ // ==================== Phase 10 Sprint 3: Incremental Index Updates ====================
216
+ /**
217
+ * Phase 10 Sprint 3: Add a single document to the index incrementally.
218
+ *
219
+ * More efficient than rebuilding the entire index for single entity additions.
220
+ * Updates TF for the new document and recalculates IDF for affected terms.
221
+ *
222
+ * @param entity - The entity to add
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const indexManager = new TFIDFIndexManager('/data');
227
+ * await indexManager.loadIndex();
228
+ *
229
+ * // Add new entity
230
+ * indexManager.addDocument({
231
+ * name: 'NewEntity',
232
+ * entityType: 'person',
233
+ * observations: ['Software engineer']
234
+ * });
235
+ * ```
236
+ */
237
+ addDocument(entity) {
238
+ if (!this.index) {
239
+ // Can't add to non-existent index
240
+ return;
241
+ }
242
+ // Build document text and tokens
243
+ const documentText = [entity.name, entity.entityType, ...entity.observations].join(' ');
244
+ const tokens = tokenize(documentText);
245
+ // Calculate term frequencies
246
+ const termFreq = {};
247
+ for (const term of tokens) {
248
+ termFreq[term] = (termFreq[term] || 0) + 1;
249
+ }
250
+ // Add to documents map
251
+ this.index.documents.set(entity.name, {
252
+ entityName: entity.name,
253
+ terms: termFreq,
254
+ documentText,
255
+ });
256
+ // Update IDF for ALL terms because N changed (total document count)
257
+ // IDF = log(N/df), and N has increased
258
+ this.recalculateAllIDF();
259
+ // Update timestamp
260
+ this.index.lastUpdated = new Date().toISOString();
261
+ }
262
+ /**
263
+ * Phase 10 Sprint 3: Remove a single document from the index incrementally.
264
+ *
265
+ * More efficient than rebuilding the entire index for single entity deletions.
266
+ * Recalculates IDF for terms that were in the removed document.
267
+ *
268
+ * @param entityName - Name of the entity to remove
269
+ *
270
+ * @example
271
+ * ```typescript
272
+ * indexManager.removeDocument('DeletedEntity');
273
+ * ```
274
+ */
275
+ removeDocument(entityName) {
276
+ if (!this.index) {
277
+ return;
278
+ }
279
+ const document = this.index.documents.get(entityName);
280
+ if (!document) {
281
+ return;
282
+ }
283
+ // Remove from documents map
284
+ this.index.documents.delete(entityName);
285
+ // Update IDF for ALL terms because N changed (total document count)
286
+ // IDF = log(N/df), and N has decreased
287
+ this.recalculateAllIDF();
288
+ // Update timestamp
289
+ this.index.lastUpdated = new Date().toISOString();
290
+ }
291
+ /**
292
+ * Phase 10 Sprint 3: Update a single document in the index incrementally.
293
+ *
294
+ * More efficient than rebuilding the entire index for single entity updates.
295
+ * Handles both term changes and observation updates.
296
+ *
297
+ * @param entity - The updated entity
298
+ *
299
+ * @example
300
+ * ```typescript
301
+ * indexManager.updateDocument({
302
+ * name: 'ExistingEntity',
303
+ * entityType: 'person',
304
+ * observations: ['Updated observations']
305
+ * });
306
+ * ```
307
+ */
308
+ updateDocument(entity) {
309
+ if (!this.index) {
310
+ return;
311
+ }
312
+ const oldDocument = this.index.documents.get(entity.name);
313
+ const oldTerms = oldDocument ? new Set(Object.keys(oldDocument.terms)) : new Set();
314
+ // Build new document
315
+ const documentText = [entity.name, entity.entityType, ...entity.observations].join(' ');
316
+ const tokens = tokenize(documentText);
317
+ const newTerms = new Set(tokens);
318
+ // Calculate term frequencies
319
+ const termFreq = {};
320
+ for (const term of tokens) {
321
+ termFreq[term] = (termFreq[term] || 0) + 1;
322
+ }
323
+ // Update documents map
324
+ this.index.documents.set(entity.name, {
325
+ entityName: entity.name,
326
+ terms: termFreq,
327
+ documentText,
328
+ });
329
+ // Find terms that changed (added or removed)
330
+ const changedTerms = new Set();
331
+ for (const term of oldTerms) {
332
+ if (!newTerms.has(term)) {
333
+ changedTerms.add(term);
334
+ }
335
+ }
336
+ for (const term of newTerms) {
337
+ if (!oldTerms.has(term)) {
338
+ changedTerms.add(term);
339
+ }
340
+ }
341
+ // Recalculate IDF for changed terms
342
+ if (changedTerms.size > 0) {
343
+ this.recalculateIDFForTerms(changedTerms);
344
+ }
345
+ // Update timestamp
346
+ this.index.lastUpdated = new Date().toISOString();
347
+ }
348
+ /**
349
+ * Phase 10 Sprint 3: Recalculate IDF scores for a set of terms.
350
+ *
351
+ * @param terms - Set of terms to recalculate IDF for
352
+ * @private
353
+ */
354
+ recalculateIDFForTerms(terms) {
355
+ if (!this.index) {
356
+ return;
357
+ }
358
+ const totalDocs = this.index.documents.size;
359
+ if (totalDocs === 0) {
360
+ // No documents, clear all IDF for these terms
361
+ for (const term of terms) {
362
+ this.index.idf.delete(term);
363
+ }
364
+ return;
365
+ }
366
+ // Count documents containing each term
367
+ for (const term of terms) {
368
+ let docCount = 0;
369
+ for (const doc of this.index.documents.values()) {
370
+ if (term in doc.terms) {
371
+ docCount++;
372
+ }
373
+ }
374
+ if (docCount > 0) {
375
+ // IDF = log(N / df) where N = total docs, df = doc frequency
376
+ const idfScore = Math.log(totalDocs / docCount);
377
+ this.index.idf.set(term, idfScore);
378
+ }
379
+ else {
380
+ // Term no longer exists in any document
381
+ this.index.idf.delete(term);
382
+ }
383
+ }
384
+ }
385
+ /**
386
+ * Phase 10 Sprint 3: Recalculate IDF scores for ALL terms in the index.
387
+ *
388
+ * Called when the total document count changes (add/remove document).
389
+ * @private
390
+ */
391
+ recalculateAllIDF() {
392
+ if (!this.index) {
393
+ return;
394
+ }
395
+ const totalDocs = this.index.documents.size;
396
+ if (totalDocs === 0) {
397
+ // No documents, clear all IDF
398
+ this.index.idf.clear();
399
+ return;
400
+ }
401
+ // Build term -> document count map
402
+ const termDocCounts = new Map();
403
+ for (const doc of this.index.documents.values()) {
404
+ for (const term of Object.keys(doc.terms)) {
405
+ termDocCounts.set(term, (termDocCounts.get(term) ?? 0) + 1);
406
+ }
407
+ }
408
+ // Clear old IDF and recalculate
409
+ this.index.idf.clear();
410
+ for (const [term, docCount] of termDocCounts) {
411
+ // IDF = log(N / df) where N = total docs, df = doc frequency
412
+ const idfScore = Math.log(totalDocs / docCount);
413
+ this.index.idf.set(term, idfScore);
414
+ }
415
+ }
416
+ /**
417
+ * Phase 10 Sprint 3: Check if the index is loaded/initialized.
418
+ *
419
+ * @returns True if index is available
420
+ */
421
+ isInitialized() {
422
+ return this.index !== null;
423
+ }
424
+ /**
425
+ * Phase 10 Sprint 3: Get the number of documents in the index.
426
+ *
427
+ * @returns Document count or 0 if not initialized
428
+ */
429
+ getDocumentCount() {
430
+ return this.index?.documents.size ?? 0;
431
+ }
217
432
  }