@danielsimonjr/memory-mcp 0.47.1 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +2000 -194
- package/dist/__tests__/file-path.test.js +5 -5
- package/dist/__tests__/knowledge-graph.test.js +3 -8
- package/dist/core/EntityManager.d.ts +266 -0
- package/dist/core/EntityManager.d.ts.map +1 -0
- package/dist/core/EntityManager.js +85 -133
- package/dist/core/GraphEventEmitter.d.ts +202 -0
- package/dist/core/GraphEventEmitter.d.ts.map +1 -0
- package/dist/core/GraphEventEmitter.js +346 -0
- package/dist/core/GraphStorage.d.ts +395 -0
- package/dist/core/GraphStorage.d.ts.map +1 -0
- package/dist/core/GraphStorage.js +643 -31
- package/dist/core/GraphTraversal.d.ts +141 -0
- package/dist/core/GraphTraversal.d.ts.map +1 -0
- package/dist/core/GraphTraversal.js +573 -0
- package/dist/core/HierarchyManager.d.ts +111 -0
- package/dist/core/HierarchyManager.d.ts.map +1 -0
- package/dist/{features → core}/HierarchyManager.js +14 -9
- package/dist/core/ManagerContext.d.ts +72 -0
- package/dist/core/ManagerContext.d.ts.map +1 -0
- package/dist/core/ManagerContext.js +118 -0
- package/dist/core/ObservationManager.d.ts +85 -0
- package/dist/core/ObservationManager.d.ts.map +1 -0
- package/dist/core/ObservationManager.js +51 -57
- package/dist/core/RelationManager.d.ts +131 -0
- package/dist/core/RelationManager.d.ts.map +1 -0
- package/dist/core/RelationManager.js +31 -7
- package/dist/core/SQLiteStorage.d.ts +354 -0
- package/dist/core/SQLiteStorage.d.ts.map +1 -0
- package/dist/core/SQLiteStorage.js +917 -0
- package/dist/core/StorageFactory.d.ts +45 -0
- package/dist/core/StorageFactory.d.ts.map +1 -0
- package/dist/core/StorageFactory.js +64 -0
- package/dist/core/TransactionManager.d.ts +464 -0
- package/dist/core/TransactionManager.d.ts.map +1 -0
- package/dist/core/TransactionManager.js +490 -13
- package/dist/core/index.d.ts +17 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +12 -2
- package/dist/features/AnalyticsManager.d.ts +44 -0
- package/dist/features/AnalyticsManager.d.ts.map +1 -0
- package/dist/features/AnalyticsManager.js +14 -13
- package/dist/features/ArchiveManager.d.ts +133 -0
- package/dist/features/ArchiveManager.d.ts.map +1 -0
- package/dist/features/ArchiveManager.js +221 -14
- package/dist/features/CompressionManager.d.ts +117 -0
- package/dist/features/CompressionManager.d.ts.map +1 -0
- package/dist/features/CompressionManager.js +189 -20
- package/dist/features/IOManager.d.ts +225 -0
- package/dist/features/IOManager.d.ts.map +1 -0
- package/dist/features/IOManager.js +1041 -0
- package/dist/features/StreamingExporter.d.ts +123 -0
- package/dist/features/StreamingExporter.d.ts.map +1 -0
- package/dist/features/StreamingExporter.js +203 -0
- package/dist/features/TagManager.d.ts +147 -0
- package/dist/features/TagManager.d.ts.map +1 -0
- package/dist/features/index.d.ts +12 -0
- package/dist/features/index.d.ts.map +1 -0
- package/dist/features/index.js +5 -6
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -45
- package/dist/memory.jsonl +1 -18
- package/dist/search/BasicSearch.d.ts +51 -0
- package/dist/search/BasicSearch.d.ts.map +1 -0
- package/dist/search/BasicSearch.js +9 -3
- package/dist/search/BooleanSearch.d.ts +98 -0
- package/dist/search/BooleanSearch.d.ts.map +1 -0
- package/dist/search/BooleanSearch.js +156 -9
- package/dist/search/EmbeddingService.d.ts +178 -0
- package/dist/search/EmbeddingService.d.ts.map +1 -0
- package/dist/search/EmbeddingService.js +358 -0
- package/dist/search/FuzzySearch.d.ts +118 -0
- package/dist/search/FuzzySearch.d.ts.map +1 -0
- package/dist/search/FuzzySearch.js +241 -25
- package/dist/search/QueryCostEstimator.d.ts +111 -0
- package/dist/search/QueryCostEstimator.d.ts.map +1 -0
- package/dist/search/QueryCostEstimator.js +355 -0
- package/dist/search/RankedSearch.d.ts +71 -0
- package/dist/search/RankedSearch.d.ts.map +1 -0
- package/dist/search/RankedSearch.js +54 -6
- package/dist/search/SavedSearchManager.d.ts +79 -0
- package/dist/search/SavedSearchManager.d.ts.map +1 -0
- package/dist/search/SearchFilterChain.d.ts +120 -0
- package/dist/search/SearchFilterChain.d.ts.map +1 -0
- package/dist/search/SearchFilterChain.js +2 -4
- package/dist/search/SearchManager.d.ts +326 -0
- package/dist/search/SearchManager.d.ts.map +1 -0
- package/dist/search/SearchManager.js +148 -0
- package/dist/search/SearchSuggestions.d.ts +27 -0
- package/dist/search/SearchSuggestions.d.ts.map +1 -0
- package/dist/search/SearchSuggestions.js +1 -1
- package/dist/search/SemanticSearch.d.ts +149 -0
- package/dist/search/SemanticSearch.d.ts.map +1 -0
- package/dist/search/SemanticSearch.js +323 -0
- package/dist/search/TFIDFEventSync.d.ts +85 -0
- package/dist/search/TFIDFEventSync.d.ts.map +1 -0
- package/dist/search/TFIDFEventSync.js +133 -0
- package/dist/search/TFIDFIndexManager.d.ts +151 -0
- package/dist/search/TFIDFIndexManager.d.ts.map +1 -0
- package/dist/search/TFIDFIndexManager.js +232 -17
- package/dist/search/VectorStore.d.ts +235 -0
- package/dist/search/VectorStore.d.ts.map +1 -0
- package/dist/search/VectorStore.js +311 -0
- package/dist/search/index.d.ts +21 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +12 -0
- package/dist/server/MCPServer.d.ts +21 -0
- package/dist/server/MCPServer.d.ts.map +1 -0
- package/dist/server/MCPServer.js +4 -4
- package/dist/server/responseCompressor.d.ts +94 -0
- package/dist/server/responseCompressor.d.ts.map +1 -0
- package/dist/server/responseCompressor.js +127 -0
- package/dist/server/toolDefinitions.d.ts +27 -0
- package/dist/server/toolDefinitions.d.ts.map +1 -0
- package/dist/server/toolDefinitions.js +189 -18
- package/dist/server/toolHandlers.d.ts +41 -0
- package/dist/server/toolHandlers.d.ts.map +1 -0
- package/dist/server/toolHandlers.js +467 -75
- package/dist/types/index.d.ts +13 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +1 -1
- package/dist/types/types.d.ts +1654 -0
- package/dist/types/types.d.ts.map +1 -0
- package/dist/types/types.js +9 -0
- package/dist/utils/compressedCache.d.ts +192 -0
- package/dist/utils/compressedCache.d.ts.map +1 -0
- package/dist/utils/compressedCache.js +309 -0
- package/dist/utils/compressionUtil.d.ts +214 -0
- package/dist/utils/compressionUtil.d.ts.map +1 -0
- package/dist/utils/compressionUtil.js +247 -0
- package/dist/utils/constants.d.ts +245 -0
- package/dist/utils/constants.d.ts.map +1 -0
- package/dist/utils/constants.js +124 -0
- package/dist/utils/entityUtils.d.ts +321 -0
- package/dist/utils/entityUtils.d.ts.map +1 -0
- package/dist/utils/entityUtils.js +434 -4
- package/dist/utils/errors.d.ts +95 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +24 -0
- package/dist/utils/formatters.d.ts +145 -0
- package/dist/utils/formatters.d.ts.map +1 -0
- package/dist/utils/{paginationUtils.js → formatters.js} +54 -3
- package/dist/utils/index.d.ts +23 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +69 -31
- package/dist/utils/indexes.d.ts +270 -0
- package/dist/utils/indexes.d.ts.map +1 -0
- package/dist/utils/indexes.js +526 -0
- package/dist/utils/logger.d.ts +24 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/operationUtils.d.ts +124 -0
- package/dist/utils/operationUtils.d.ts.map +1 -0
- package/dist/utils/operationUtils.js +175 -0
- package/dist/utils/parallelUtils.d.ts +72 -0
- package/dist/utils/parallelUtils.d.ts.map +1 -0
- package/dist/utils/parallelUtils.js +169 -0
- package/dist/utils/schemas.d.ts +374 -0
- package/dist/utils/schemas.d.ts.map +1 -0
- package/dist/utils/schemas.js +302 -2
- package/dist/utils/searchAlgorithms.d.ts +99 -0
- package/dist/utils/searchAlgorithms.d.ts.map +1 -0
- package/dist/utils/searchAlgorithms.js +167 -0
- package/dist/utils/searchCache.d.ts +108 -0
- package/dist/utils/searchCache.d.ts.map +1 -0
- package/dist/utils/taskScheduler.d.ts +290 -0
- package/dist/utils/taskScheduler.d.ts.map +1 -0
- package/dist/utils/taskScheduler.js +466 -0
- package/dist/workers/index.d.ts +12 -0
- package/dist/workers/index.d.ts.map +1 -0
- package/dist/workers/index.js +9 -0
- package/dist/workers/levenshteinWorker.d.ts +60 -0
- package/dist/workers/levenshteinWorker.d.ts.map +1 -0
- package/dist/workers/levenshteinWorker.js +98 -0
- package/package.json +17 -4
- package/dist/__tests__/edge-cases/edge-cases.test.js +0 -406
- package/dist/__tests__/integration/workflows.test.js +0 -449
- package/dist/__tests__/performance/benchmarks.test.js +0 -413
- package/dist/__tests__/unit/core/EntityManager.test.js +0 -334
- package/dist/__tests__/unit/core/GraphStorage.test.js +0 -205
- package/dist/__tests__/unit/core/RelationManager.test.js +0 -274
- package/dist/__tests__/unit/features/CompressionManager.test.js +0 -350
- package/dist/__tests__/unit/search/BasicSearch.test.js +0 -311
- package/dist/__tests__/unit/search/BooleanSearch.test.js +0 -432
- package/dist/__tests__/unit/search/FuzzySearch.test.js +0 -448
- package/dist/__tests__/unit/search/RankedSearch.test.js +0 -379
- package/dist/__tests__/unit/utils/levenshtein.test.js +0 -77
- package/dist/core/KnowledgeGraphManager.js +0 -423
- package/dist/features/BackupManager.js +0 -311
- package/dist/features/ExportManager.js +0 -305
- package/dist/features/ImportExportManager.js +0 -50
- package/dist/features/ImportManager.js +0 -328
- package/dist/types/analytics.types.js +0 -6
- package/dist/types/entity.types.js +0 -7
- package/dist/types/import-export.types.js +0 -7
- package/dist/types/search.types.js +0 -7
- package/dist/types/tag.types.js +0 -6
- package/dist/utils/dateUtils.js +0 -89
- package/dist/utils/filterUtils.js +0 -155
- package/dist/utils/levenshtein.js +0 -62
- package/dist/utils/pathUtils.js +0 -115
- package/dist/utils/responseFormatter.js +0 -55
- package/dist/utils/tagUtils.js +0 -107
- package/dist/utils/tfidf.js +0 -90
- package/dist/utils/validationHelper.js +0 -99
- package/dist/utils/validationUtils.js +0 -109
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TF-IDF Event Sync
|
|
3
|
+
*
|
|
4
|
+
* Phase 10 Sprint 3: Hooks TFIDFIndexManager to graph events for automatic
|
|
5
|
+
* incremental index updates when entities change.
|
|
6
|
+
*
|
|
7
|
+
* @module search/TFIDFEventSync
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Phase 10 Sprint 3: Synchronizes TF-IDF index with graph changes via events.
|
|
11
|
+
*
|
|
12
|
+
* Listens to graph events and triggers incremental index updates automatically.
|
|
13
|
+
* More efficient than rebuilding the entire index on every change.
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const storage = new GraphStorage('/data/memory.jsonl');
|
|
18
|
+
* const indexManager = new TFIDFIndexManager('/data');
|
|
19
|
+
*
|
|
20
|
+
* // Load or build index
|
|
21
|
+
* await indexManager.loadIndex();
|
|
22
|
+
*
|
|
23
|
+
* // Enable automatic sync
|
|
24
|
+
* const sync = new TFIDFEventSync(indexManager, storage.events, storage);
|
|
25
|
+
* sync.enable();
|
|
26
|
+
*
|
|
27
|
+
* // Now entities added to storage will automatically update the index
|
|
28
|
+
* await storage.appendEntity({ name: 'New', entityType: 'test', observations: [] });
|
|
29
|
+
*
|
|
30
|
+
* // Disable when done
|
|
31
|
+
* sync.disable();
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export class TFIDFEventSync {
|
|
35
|
+
indexManager;
|
|
36
|
+
eventEmitter;
|
|
37
|
+
storage;
|
|
38
|
+
unsubscribers = [];
|
|
39
|
+
enabled = false;
|
|
40
|
+
/**
|
|
41
|
+
* Create a new TFIDFEventSync instance.
|
|
42
|
+
*
|
|
43
|
+
* @param indexManager - TFIDFIndexManager to sync
|
|
44
|
+
* @param eventEmitter - GraphEventEmitter to listen to
|
|
45
|
+
* @param storage - Storage to fetch entity data from (for updates)
|
|
46
|
+
*/
|
|
47
|
+
constructor(indexManager, eventEmitter, storage) {
|
|
48
|
+
this.indexManager = indexManager;
|
|
49
|
+
this.eventEmitter = eventEmitter;
|
|
50
|
+
this.storage = storage;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Enable automatic index synchronization.
|
|
54
|
+
*
|
|
55
|
+
* Subscribes to entity:created, entity:updated, and entity:deleted events.
|
|
56
|
+
*/
|
|
57
|
+
enable() {
|
|
58
|
+
if (this.enabled) {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
// Subscribe to entity events
|
|
62
|
+
this.unsubscribers.push(this.eventEmitter.on('entity:created', this.handleEntityCreated.bind(this)));
|
|
63
|
+
this.unsubscribers.push(this.eventEmitter.on('entity:updated', this.handleEntityUpdated.bind(this)));
|
|
64
|
+
this.unsubscribers.push(this.eventEmitter.on('entity:deleted', this.handleEntityDeleted.bind(this)));
|
|
65
|
+
this.enabled = true;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Disable automatic index synchronization.
|
|
69
|
+
*
|
|
70
|
+
* Unsubscribes from all events.
|
|
71
|
+
*/
|
|
72
|
+
disable() {
|
|
73
|
+
if (!this.enabled) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
for (const unsubscribe of this.unsubscribers) {
|
|
77
|
+
unsubscribe();
|
|
78
|
+
}
|
|
79
|
+
this.unsubscribers = [];
|
|
80
|
+
this.enabled = false;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Check if synchronization is enabled.
|
|
84
|
+
*
|
|
85
|
+
* @returns True if enabled
|
|
86
|
+
*/
|
|
87
|
+
isEnabled() {
|
|
88
|
+
return this.enabled;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Handle entity:created event.
|
|
92
|
+
* @private
|
|
93
|
+
*/
|
|
94
|
+
handleEntityCreated(event) {
|
|
95
|
+
if (!this.indexManager.isInitialized()) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
this.indexManager.addDocument({
|
|
99
|
+
name: event.entity.name,
|
|
100
|
+
entityType: event.entity.entityType,
|
|
101
|
+
observations: event.entity.observations,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Handle entity:updated event.
|
|
106
|
+
* @private
|
|
107
|
+
*/
|
|
108
|
+
async handleEntityUpdated(event) {
|
|
109
|
+
if (!this.indexManager.isInitialized()) {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
// Fetch the current entity state
|
|
113
|
+
const graph = await this.storage.loadGraph();
|
|
114
|
+
const entity = graph.entities.find(e => e.name === event.entityName);
|
|
115
|
+
if (entity) {
|
|
116
|
+
this.indexManager.updateDocument({
|
|
117
|
+
name: entity.name,
|
|
118
|
+
entityType: entity.entityType,
|
|
119
|
+
observations: entity.observations,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Handle entity:deleted event.
|
|
125
|
+
* @private
|
|
126
|
+
*/
|
|
127
|
+
handleEntityDeleted(event) {
|
|
128
|
+
if (!this.indexManager.isInitialized()) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
this.indexManager.removeDocument(event.entityName);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TF-IDF Index Manager
|
|
3
|
+
*
|
|
4
|
+
* Manages pre-calculated TF-IDF indexes for fast ranked search.
|
|
5
|
+
* Handles index building, incremental updates, and persistence.
|
|
6
|
+
*
|
|
7
|
+
* @module search/TFIDFIndexManager
|
|
8
|
+
*/
|
|
9
|
+
import type { TFIDFIndex, KnowledgeGraph, ReadonlyKnowledgeGraph } from '../types/index.js';
|
|
10
|
+
/**
|
|
11
|
+
* Manages TF-IDF index lifecycle: building, updating, and persistence.
|
|
12
|
+
*/
|
|
13
|
+
export declare class TFIDFIndexManager {
|
|
14
|
+
private indexPath;
|
|
15
|
+
private index;
|
|
16
|
+
constructor(storageDir: string);
|
|
17
|
+
/**
|
|
18
|
+
* Build a complete TF-IDF index from a knowledge graph.
|
|
19
|
+
*
|
|
20
|
+
* @param graph - Knowledge graph to index
|
|
21
|
+
* @returns Newly built TF-IDF index
|
|
22
|
+
*/
|
|
23
|
+
buildIndex(graph: ReadonlyKnowledgeGraph): Promise<TFIDFIndex>;
|
|
24
|
+
/**
|
|
25
|
+
* Update the index incrementally when entities change.
|
|
26
|
+
*
|
|
27
|
+
* More efficient than rebuilding the entire index.
|
|
28
|
+
*
|
|
29
|
+
* @param graph - Updated knowledge graph
|
|
30
|
+
* @param changedEntityNames - Names of entities that changed
|
|
31
|
+
*/
|
|
32
|
+
updateIndex(graph: ReadonlyKnowledgeGraph, changedEntityNames: Set<string>): Promise<TFIDFIndex>;
|
|
33
|
+
/**
|
|
34
|
+
* Load index from disk.
|
|
35
|
+
*
|
|
36
|
+
* @returns Loaded index or null if not found
|
|
37
|
+
*/
|
|
38
|
+
loadIndex(): Promise<TFIDFIndex | null>;
|
|
39
|
+
/**
|
|
40
|
+
* Save index to disk.
|
|
41
|
+
*
|
|
42
|
+
* @param index - Index to save (uses cached index if not provided)
|
|
43
|
+
*/
|
|
44
|
+
saveIndex(index?: TFIDFIndex): Promise<void>;
|
|
45
|
+
/**
|
|
46
|
+
* Get the current cached index.
|
|
47
|
+
*
|
|
48
|
+
* @returns Cached index or null if not loaded
|
|
49
|
+
*/
|
|
50
|
+
getIndex(): TFIDFIndex | null;
|
|
51
|
+
/**
|
|
52
|
+
* Clear the cached index and delete from disk.
|
|
53
|
+
*/
|
|
54
|
+
clearIndex(): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Check if the index needs rebuilding based on graph state.
|
|
57
|
+
*
|
|
58
|
+
* @param graph - Current knowledge graph
|
|
59
|
+
* @returns True if index should be rebuilt
|
|
60
|
+
*/
|
|
61
|
+
needsRebuild(graph: KnowledgeGraph): boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Phase 10 Sprint 3: Add a single document to the index incrementally.
|
|
64
|
+
*
|
|
65
|
+
* More efficient than rebuilding the entire index for single entity additions.
|
|
66
|
+
* Updates TF for the new document and recalculates IDF for affected terms.
|
|
67
|
+
*
|
|
68
|
+
* @param entity - The entity to add
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* const indexManager = new TFIDFIndexManager('/data');
|
|
73
|
+
* await indexManager.loadIndex();
|
|
74
|
+
*
|
|
75
|
+
* // Add new entity
|
|
76
|
+
* indexManager.addDocument({
|
|
77
|
+
* name: 'NewEntity',
|
|
78
|
+
* entityType: 'person',
|
|
79
|
+
* observations: ['Software engineer']
|
|
80
|
+
* });
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
addDocument(entity: {
|
|
84
|
+
name: string;
|
|
85
|
+
entityType: string;
|
|
86
|
+
observations: string[];
|
|
87
|
+
}): void;
|
|
88
|
+
/**
|
|
89
|
+
* Phase 10 Sprint 3: Remove a single document from the index incrementally.
|
|
90
|
+
*
|
|
91
|
+
* More efficient than rebuilding the entire index for single entity deletions.
|
|
92
|
+
* Recalculates IDF for terms that were in the removed document.
|
|
93
|
+
*
|
|
94
|
+
* @param entityName - Name of the entity to remove
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```typescript
|
|
98
|
+
* indexManager.removeDocument('DeletedEntity');
|
|
99
|
+
* ```
|
|
100
|
+
*/
|
|
101
|
+
removeDocument(entityName: string): void;
|
|
102
|
+
/**
|
|
103
|
+
* Phase 10 Sprint 3: Update a single document in the index incrementally.
|
|
104
|
+
*
|
|
105
|
+
* More efficient than rebuilding the entire index for single entity updates.
|
|
106
|
+
* Handles both term changes and observation updates.
|
|
107
|
+
*
|
|
108
|
+
* @param entity - The updated entity
|
|
109
|
+
*
|
|
110
|
+
* @example
|
|
111
|
+
* ```typescript
|
|
112
|
+
* indexManager.updateDocument({
|
|
113
|
+
* name: 'ExistingEntity',
|
|
114
|
+
* entityType: 'person',
|
|
115
|
+
* observations: ['Updated observations']
|
|
116
|
+
* });
|
|
117
|
+
* ```
|
|
118
|
+
*/
|
|
119
|
+
updateDocument(entity: {
|
|
120
|
+
name: string;
|
|
121
|
+
entityType: string;
|
|
122
|
+
observations: string[];
|
|
123
|
+
}): void;
|
|
124
|
+
/**
|
|
125
|
+
* Phase 10 Sprint 3: Recalculate IDF scores for a set of terms.
|
|
126
|
+
*
|
|
127
|
+
* @param terms - Set of terms to recalculate IDF for
|
|
128
|
+
* @private
|
|
129
|
+
*/
|
|
130
|
+
private recalculateIDFForTerms;
|
|
131
|
+
/**
|
|
132
|
+
* Phase 10 Sprint 3: Recalculate IDF scores for ALL terms in the index.
|
|
133
|
+
*
|
|
134
|
+
* Called when the total document count changes (add/remove document).
|
|
135
|
+
* @private
|
|
136
|
+
*/
|
|
137
|
+
private recalculateAllIDF;
|
|
138
|
+
/**
|
|
139
|
+
* Phase 10 Sprint 3: Check if the index is loaded/initialized.
|
|
140
|
+
*
|
|
141
|
+
* @returns True if index is available
|
|
142
|
+
*/
|
|
143
|
+
isInitialized(): boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Phase 10 Sprint 3: Get the number of documents in the index.
|
|
146
|
+
*
|
|
147
|
+
* @returns Document count or 0 if not initialized
|
|
148
|
+
*/
|
|
149
|
+
getDocumentCount(): number;
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=TFIDFIndexManager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TFIDFIndexManager.d.ts","sourceRoot":"","sources":["../../src/search/TFIDFIndexManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAE,UAAU,EAAkB,cAAc,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAgB5G;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,KAAK,CAA2B;gBAE5B,UAAU,EAAE,MAAM;IAI9B;;;;;OAKG;IACG,UAAU,CAAC,KAAK,EAAE,sBAAsB,GAAG,OAAO,CAAC,UAAU,CAAC;IAgDpE;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC;IAgEtG;;;;OAIG;IACG,SAAS,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAmB7C;;;;OAIG;IACG,SAAS,CAAC,KAAK,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBlD;;;;OAIG;IACH,QAAQ,IAAI,UAAU,GAAG,IAAI;IAI7B;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IASjC;;;;;OAKG;IACH,YAAY,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO;IAsB5C;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,WAAW,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,EAAE,CAAA;KAAE,GAAG,IAAI;IA+BvF;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAqBxC;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,EAAE,CAAA;KAAE,GAAG,IAAI;IAgD1F;;;;;OAKG;IACH,OAAO,CAAC,sBAAsB;IAkC9B;;;;;OAKG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;;;OAIG;IACH,aAAa,IAAI,OAAO;IAIxB;;;;OAIG;IACH,gBAAgB,IAAI,MAAM;CAG3B"}
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import * as fs from 'fs/promises';
|
|
10
10
|
import * as path from 'path';
|
|
11
|
-
import {
|
|
11
|
+
import { calculateIDFFromTokenSets, tokenize } from '../utils/index.js';
|
|
12
12
|
const INDEX_VERSION = '1.0';
|
|
13
13
|
const INDEX_FILENAME = 'tfidf-index.json';
|
|
14
14
|
/**
|
|
@@ -28,18 +28,17 @@ export class TFIDFIndexManager {
|
|
|
28
28
|
*/
|
|
29
29
|
async buildIndex(graph) {
|
|
30
30
|
const documents = new Map();
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
// Build document vectors
|
|
31
|
+
const allTokenSets = [];
|
|
32
|
+
// Build document vectors - tokenize once per document
|
|
34
33
|
for (const entity of graph.entities) {
|
|
35
34
|
const documentText = [
|
|
36
35
|
entity.name,
|
|
37
36
|
entity.entityType,
|
|
38
37
|
...entity.observations,
|
|
39
38
|
].join(' ');
|
|
40
|
-
allDocumentTexts.push(documentText);
|
|
41
39
|
const tokens = tokenize(documentText);
|
|
42
|
-
|
|
40
|
+
const tokenSet = new Set(tokens);
|
|
41
|
+
allTokenSets.push(tokenSet);
|
|
43
42
|
// Calculate term frequencies
|
|
44
43
|
const termFreq = {};
|
|
45
44
|
for (const term of tokens) {
|
|
@@ -51,11 +50,11 @@ export class TFIDFIndexManager {
|
|
|
51
50
|
documentText,
|
|
52
51
|
});
|
|
53
52
|
}
|
|
54
|
-
// Calculate IDF for all terms
|
|
53
|
+
// Calculate IDF for all terms using pre-tokenized sets (O(1) lookup per document)
|
|
55
54
|
const idf = new Map();
|
|
56
|
-
const allTerms = new Set(
|
|
55
|
+
const allTerms = new Set(allTokenSets.flatMap(s => Array.from(s)));
|
|
57
56
|
for (const term of allTerms) {
|
|
58
|
-
const idfScore =
|
|
57
|
+
const idfScore = calculateIDFFromTokenSets(term, allTokenSets);
|
|
59
58
|
idf.set(term, idfScore);
|
|
60
59
|
}
|
|
61
60
|
this.index = {
|
|
@@ -80,8 +79,7 @@ export class TFIDFIndexManager {
|
|
|
80
79
|
return this.buildIndex(graph);
|
|
81
80
|
}
|
|
82
81
|
// Rebuild document vectors for changed entities
|
|
83
|
-
const
|
|
84
|
-
const allTokens = [];
|
|
82
|
+
const allTokenSets = [];
|
|
85
83
|
const updatedDocuments = new Map(this.index.documents);
|
|
86
84
|
// Remove deleted entities
|
|
87
85
|
for (const entityName of changedEntityNames) {
|
|
@@ -90,16 +88,16 @@ export class TFIDFIndexManager {
|
|
|
90
88
|
updatedDocuments.delete(entityName);
|
|
91
89
|
}
|
|
92
90
|
}
|
|
93
|
-
// Update/add changed entities
|
|
91
|
+
// Update/add changed entities - tokenize once per document
|
|
94
92
|
for (const entity of graph.entities) {
|
|
95
93
|
const documentText = [
|
|
96
94
|
entity.name,
|
|
97
95
|
entity.entityType,
|
|
98
96
|
...entity.observations,
|
|
99
97
|
].join(' ');
|
|
100
|
-
allDocumentTexts.push(documentText);
|
|
101
98
|
const tokens = tokenize(documentText);
|
|
102
|
-
|
|
99
|
+
const tokenSet = new Set(tokens);
|
|
100
|
+
allTokenSets.push(tokenSet);
|
|
103
101
|
if (changedEntityNames.has(entity.name)) {
|
|
104
102
|
// Calculate term frequencies for changed entity
|
|
105
103
|
const termFreq = {};
|
|
@@ -113,11 +111,11 @@ export class TFIDFIndexManager {
|
|
|
113
111
|
});
|
|
114
112
|
}
|
|
115
113
|
}
|
|
116
|
-
// Recalculate IDF
|
|
114
|
+
// Recalculate IDF using pre-tokenized sets (O(1) lookup per document)
|
|
117
115
|
const idf = new Map();
|
|
118
|
-
const allTerms = new Set(
|
|
116
|
+
const allTerms = new Set(allTokenSets.flatMap(s => Array.from(s)));
|
|
119
117
|
for (const term of allTerms) {
|
|
120
|
-
const idfScore =
|
|
118
|
+
const idfScore = calculateIDFFromTokenSets(term, allTokenSets);
|
|
121
119
|
idf.set(term, idfScore);
|
|
122
120
|
}
|
|
123
121
|
this.index = {
|
|
@@ -214,4 +212,221 @@ export class TFIDFIndexManager {
|
|
|
214
212
|
}
|
|
215
213
|
return false;
|
|
216
214
|
}
|
|
215
|
+
// ==================== Phase 10 Sprint 3: Incremental Index Updates ====================
|
|
216
|
+
/**
|
|
217
|
+
* Phase 10 Sprint 3: Add a single document to the index incrementally.
|
|
218
|
+
*
|
|
219
|
+
* More efficient than rebuilding the entire index for single entity additions.
|
|
220
|
+
* Updates TF for the new document and recalculates IDF for affected terms.
|
|
221
|
+
*
|
|
222
|
+
* @param entity - The entity to add
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* const indexManager = new TFIDFIndexManager('/data');
|
|
227
|
+
* await indexManager.loadIndex();
|
|
228
|
+
*
|
|
229
|
+
* // Add new entity
|
|
230
|
+
* indexManager.addDocument({
|
|
231
|
+
* name: 'NewEntity',
|
|
232
|
+
* entityType: 'person',
|
|
233
|
+
* observations: ['Software engineer']
|
|
234
|
+
* });
|
|
235
|
+
* ```
|
|
236
|
+
*/
|
|
237
|
+
addDocument(entity) {
|
|
238
|
+
if (!this.index) {
|
|
239
|
+
// Can't add to non-existent index
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
// Build document text and tokens
|
|
243
|
+
const documentText = [entity.name, entity.entityType, ...entity.observations].join(' ');
|
|
244
|
+
const tokens = tokenize(documentText);
|
|
245
|
+
// Calculate term frequencies
|
|
246
|
+
const termFreq = {};
|
|
247
|
+
for (const term of tokens) {
|
|
248
|
+
termFreq[term] = (termFreq[term] || 0) + 1;
|
|
249
|
+
}
|
|
250
|
+
// Add to documents map
|
|
251
|
+
this.index.documents.set(entity.name, {
|
|
252
|
+
entityName: entity.name,
|
|
253
|
+
terms: termFreq,
|
|
254
|
+
documentText,
|
|
255
|
+
});
|
|
256
|
+
// Update IDF for ALL terms because N changed (total document count)
|
|
257
|
+
// IDF = log(N/df), and N has increased
|
|
258
|
+
this.recalculateAllIDF();
|
|
259
|
+
// Update timestamp
|
|
260
|
+
this.index.lastUpdated = new Date().toISOString();
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Phase 10 Sprint 3: Remove a single document from the index incrementally.
|
|
264
|
+
*
|
|
265
|
+
* More efficient than rebuilding the entire index for single entity deletions.
|
|
266
|
+
* Recalculates IDF for terms that were in the removed document.
|
|
267
|
+
*
|
|
268
|
+
* @param entityName - Name of the entity to remove
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
* ```typescript
|
|
272
|
+
* indexManager.removeDocument('DeletedEntity');
|
|
273
|
+
* ```
|
|
274
|
+
*/
|
|
275
|
+
removeDocument(entityName) {
|
|
276
|
+
if (!this.index) {
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
const document = this.index.documents.get(entityName);
|
|
280
|
+
if (!document) {
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
// Remove from documents map
|
|
284
|
+
this.index.documents.delete(entityName);
|
|
285
|
+
// Update IDF for ALL terms because N changed (total document count)
|
|
286
|
+
// IDF = log(N/df), and N has decreased
|
|
287
|
+
this.recalculateAllIDF();
|
|
288
|
+
// Update timestamp
|
|
289
|
+
this.index.lastUpdated = new Date().toISOString();
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Phase 10 Sprint 3: Update a single document in the index incrementally.
|
|
293
|
+
*
|
|
294
|
+
* More efficient than rebuilding the entire index for single entity updates.
|
|
295
|
+
* Handles both term changes and observation updates.
|
|
296
|
+
*
|
|
297
|
+
* @param entity - The updated entity
|
|
298
|
+
*
|
|
299
|
+
* @example
|
|
300
|
+
* ```typescript
|
|
301
|
+
* indexManager.updateDocument({
|
|
302
|
+
* name: 'ExistingEntity',
|
|
303
|
+
* entityType: 'person',
|
|
304
|
+
* observations: ['Updated observations']
|
|
305
|
+
* });
|
|
306
|
+
* ```
|
|
307
|
+
*/
|
|
308
|
+
updateDocument(entity) {
|
|
309
|
+
if (!this.index) {
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
const oldDocument = this.index.documents.get(entity.name);
|
|
313
|
+
const oldTerms = oldDocument ? new Set(Object.keys(oldDocument.terms)) : new Set();
|
|
314
|
+
// Build new document
|
|
315
|
+
const documentText = [entity.name, entity.entityType, ...entity.observations].join(' ');
|
|
316
|
+
const tokens = tokenize(documentText);
|
|
317
|
+
const newTerms = new Set(tokens);
|
|
318
|
+
// Calculate term frequencies
|
|
319
|
+
const termFreq = {};
|
|
320
|
+
for (const term of tokens) {
|
|
321
|
+
termFreq[term] = (termFreq[term] || 0) + 1;
|
|
322
|
+
}
|
|
323
|
+
// Update documents map
|
|
324
|
+
this.index.documents.set(entity.name, {
|
|
325
|
+
entityName: entity.name,
|
|
326
|
+
terms: termFreq,
|
|
327
|
+
documentText,
|
|
328
|
+
});
|
|
329
|
+
// Find terms that changed (added or removed)
|
|
330
|
+
const changedTerms = new Set();
|
|
331
|
+
for (const term of oldTerms) {
|
|
332
|
+
if (!newTerms.has(term)) {
|
|
333
|
+
changedTerms.add(term);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
for (const term of newTerms) {
|
|
337
|
+
if (!oldTerms.has(term)) {
|
|
338
|
+
changedTerms.add(term);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
// Recalculate IDF for changed terms
|
|
342
|
+
if (changedTerms.size > 0) {
|
|
343
|
+
this.recalculateIDFForTerms(changedTerms);
|
|
344
|
+
}
|
|
345
|
+
// Update timestamp
|
|
346
|
+
this.index.lastUpdated = new Date().toISOString();
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Phase 10 Sprint 3: Recalculate IDF scores for a set of terms.
|
|
350
|
+
*
|
|
351
|
+
* @param terms - Set of terms to recalculate IDF for
|
|
352
|
+
* @private
|
|
353
|
+
*/
|
|
354
|
+
recalculateIDFForTerms(terms) {
|
|
355
|
+
if (!this.index) {
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
const totalDocs = this.index.documents.size;
|
|
359
|
+
if (totalDocs === 0) {
|
|
360
|
+
// No documents, clear all IDF for these terms
|
|
361
|
+
for (const term of terms) {
|
|
362
|
+
this.index.idf.delete(term);
|
|
363
|
+
}
|
|
364
|
+
return;
|
|
365
|
+
}
|
|
366
|
+
// Count documents containing each term
|
|
367
|
+
for (const term of terms) {
|
|
368
|
+
let docCount = 0;
|
|
369
|
+
for (const doc of this.index.documents.values()) {
|
|
370
|
+
if (term in doc.terms) {
|
|
371
|
+
docCount++;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
if (docCount > 0) {
|
|
375
|
+
// IDF = log(N / df) where N = total docs, df = doc frequency
|
|
376
|
+
const idfScore = Math.log(totalDocs / docCount);
|
|
377
|
+
this.index.idf.set(term, idfScore);
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
// Term no longer exists in any document
|
|
381
|
+
this.index.idf.delete(term);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Phase 10 Sprint 3: Recalculate IDF scores for ALL terms in the index.
|
|
387
|
+
*
|
|
388
|
+
* Called when the total document count changes (add/remove document).
|
|
389
|
+
* @private
|
|
390
|
+
*/
|
|
391
|
+
recalculateAllIDF() {
|
|
392
|
+
if (!this.index) {
|
|
393
|
+
return;
|
|
394
|
+
}
|
|
395
|
+
const totalDocs = this.index.documents.size;
|
|
396
|
+
if (totalDocs === 0) {
|
|
397
|
+
// No documents, clear all IDF
|
|
398
|
+
this.index.idf.clear();
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
// Build term -> document count map
|
|
402
|
+
const termDocCounts = new Map();
|
|
403
|
+
for (const doc of this.index.documents.values()) {
|
|
404
|
+
for (const term of Object.keys(doc.terms)) {
|
|
405
|
+
termDocCounts.set(term, (termDocCounts.get(term) ?? 0) + 1);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
// Clear old IDF and recalculate
|
|
409
|
+
this.index.idf.clear();
|
|
410
|
+
for (const [term, docCount] of termDocCounts) {
|
|
411
|
+
// IDF = log(N / df) where N = total docs, df = doc frequency
|
|
412
|
+
const idfScore = Math.log(totalDocs / docCount);
|
|
413
|
+
this.index.idf.set(term, idfScore);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Phase 10 Sprint 3: Check if the index is loaded/initialized.
|
|
418
|
+
*
|
|
419
|
+
* @returns True if index is available
|
|
420
|
+
*/
|
|
421
|
+
isInitialized() {
|
|
422
|
+
return this.index !== null;
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Phase 10 Sprint 3: Get the number of documents in the index.
|
|
426
|
+
*
|
|
427
|
+
* @returns Document count or 0 if not initialized
|
|
428
|
+
*/
|
|
429
|
+
getDocumentCount() {
|
|
430
|
+
return this.index?.documents.size ?? 0;
|
|
431
|
+
}
|
|
217
432
|
}
|