@danielsimonjr/memory-mcp 9.9.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/EntityManager.d.ts +2 -0
- package/dist/core/EntityManager.d.ts.map +1 -1
- package/dist/core/EntityManager.js +14 -9
- package/dist/core/RelationManager.d.ts.map +1 -1
- package/dist/core/RelationManager.js +5 -4
- package/dist/features/CompressionManager.d.ts +3 -1
- package/dist/features/CompressionManager.d.ts.map +1 -1
- package/dist/features/CompressionManager.js +14 -5
- package/dist/search/BM25Search.d.ts +148 -0
- package/dist/search/BM25Search.d.ts.map +1 -0
- package/dist/search/BM25Search.js +339 -0
- package/dist/search/EarlyTerminationManager.d.ts +140 -0
- package/dist/search/EarlyTerminationManager.d.ts.map +1 -0
- package/dist/search/EarlyTerminationManager.js +279 -0
- package/dist/search/EmbeddingCache.d.ts +175 -0
- package/dist/search/EmbeddingCache.d.ts.map +1 -0
- package/dist/search/EmbeddingCache.js +246 -0
- package/dist/search/EmbeddingService.d.ts +108 -9
- package/dist/search/EmbeddingService.d.ts.map +1 -1
- package/dist/search/EmbeddingService.js +187 -15
- package/dist/search/HybridScorer.d.ts +181 -0
- package/dist/search/HybridScorer.d.ts.map +1 -0
- package/dist/search/HybridScorer.js +257 -0
- package/dist/search/IncrementalIndexer.d.ts +201 -0
- package/dist/search/IncrementalIndexer.d.ts.map +1 -0
- package/dist/search/IncrementalIndexer.js +342 -0
- package/dist/search/OptimizedInvertedIndex.d.ts +163 -0
- package/dist/search/OptimizedInvertedIndex.d.ts.map +1 -0
- package/dist/search/OptimizedInvertedIndex.js +358 -0
- package/dist/search/ParallelSearchExecutor.d.ts +172 -0
- package/dist/search/ParallelSearchExecutor.d.ts.map +1 -0
- package/dist/search/ParallelSearchExecutor.js +309 -0
- package/dist/search/QuantizedVectorStore.d.ts +171 -0
- package/dist/search/QuantizedVectorStore.d.ts.map +1 -0
- package/dist/search/QuantizedVectorStore.js +307 -0
- package/dist/search/QueryCostEstimator.d.ts +135 -2
- package/dist/search/QueryCostEstimator.d.ts.map +1 -1
- package/dist/search/QueryCostEstimator.js +298 -1
- package/dist/search/QueryPlanCache.d.ts +220 -0
- package/dist/search/QueryPlanCache.d.ts.map +1 -0
- package/dist/search/QueryPlanCache.js +379 -0
- package/dist/search/ReflectionManager.d.ts +49 -0
- package/dist/search/ReflectionManager.d.ts.map +1 -1
- package/dist/search/ReflectionManager.js +113 -6
- package/dist/search/index.d.ts +12 -3
- package/dist/search/index.d.ts.map +1 -1
- package/dist/search/index.js +20 -2
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -2
- package/dist/types/types.d.ts.map +1 -1
- package/dist/utils/BatchProcessor.d.ts +271 -0
- package/dist/utils/BatchProcessor.d.ts.map +1 -0
- package/dist/utils/BatchProcessor.js +376 -0
- package/dist/utils/MemoryMonitor.d.ts +176 -0
- package/dist/utils/MemoryMonitor.d.ts.map +1 -0
- package/dist/utils/MemoryMonitor.js +305 -0
- package/dist/utils/WorkerPoolManager.d.ts +233 -0
- package/dist/utils/WorkerPoolManager.d.ts.map +1 -0
- package/dist/utils/WorkerPoolManager.js +420 -0
- package/dist/utils/compressedCache.d.ts +29 -0
- package/dist/utils/compressedCache.d.ts.map +1 -1
- package/dist/utils/compressedCache.js +39 -0
- package/dist/utils/entityUtils.d.ts +25 -0
- package/dist/utils/entityUtils.d.ts.map +1 -1
- package/dist/utils/entityUtils.js +33 -0
- package/dist/utils/index.d.ts +4 -1
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +8 -0
- package/package.json +1 -1
|
@@ -228,6 +228,8 @@ export declare class EntityManager {
|
|
|
228
228
|
/**
|
|
229
229
|
* Add tags to multiple entities in a single operation.
|
|
230
230
|
*
|
|
231
|
+
* OPTIMIZED: Uses Map for O(1) entity lookups instead of O(n) find() per entity.
|
|
232
|
+
*
|
|
231
233
|
* @param entityNames - Names of entities to tag
|
|
232
234
|
* @param tags - Tags to add to each entity
|
|
233
235
|
* @returns Array of results showing which tags were added to each entity
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"EntityManager.d.ts","sourceRoot":"","sources":["../../src/core/EntityManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,2BAA2B,EAAE,MAAM,mBAAmB,CAAC;AAC7E,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAyBtD;;GAEG;AACH,qBAAa,aAAa;IACZ,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,YAAY;IAEzC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2CG;IACG,cAAc,CAClB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,CAAC,EAAE,2BAA2B,GACpC,OAAO,CAAC,MAAM,EAAE,CAAC;IA+EpB;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACG,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAoB1D;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACG,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAKrD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoCG;IACG,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAuB3E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAgCG;IACG,WAAW,CACf,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;KAAE,CAAC,GACzD,OAAO,CAAC,MAAM,EAAE,CAAC;IAuCpB;;;;;;;;;OASG;IACG,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAsBvG;;;;;;;OAOG;IACG,UAAU,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"EntityManager.d.ts","sourceRoot":"","sources":["../../src/core/EntityManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,2BAA2B,EAAE,MAAM,mBAAmB,CAAC;AAC7E,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAyBtD;;GAEG;AACH,qBAAa,aAAa;IACZ,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,YAAY;IAEzC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2CG;IACG,cAAc,CAClB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,CAAC,EAAE,2BAA2B,GACpC,OAAO,CAAC,MAAM,EAAE,CAAC;IA+EpB;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACG,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAoB1D;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACG,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAKrD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoCG;IACG,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAuB3E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAgCG;IACG,WAAW,CACf,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;KAAE,CAAC,GACzD,OAAO,CAAC,MAAM,EAAE,CAAC;IAuCpB;;;;;;;;;OASG;IACG,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAsBvG;;;;;;;OAOG;IACG,UAAU,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAgC5G;;;;;;;;OAQG;IACG,aAAa,CAAC,UAAU,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAkBhH;;;;;;;;OAQG;IACG,yBAAyB,CAAC,WAAW,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,EAAE,CAAA;KAAE,EAAE,CAAC;IAuC9H;;;;;;OAMG;IACG,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAuBxG;;;;;;;;;;OAUG;IACG,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;CAmCvH"}
|
|
@@ -357,9 +357,8 @@ export class EntityManager {
|
|
|
357
357
|
* @throws {EntityNotFoundError} If entity is not found
|
|
358
358
|
*/
|
|
359
359
|
async removeTags(entityName, tags) {
|
|
360
|
-
|
|
361
|
-
const
|
|
362
|
-
const entity = graph.entities.find(e => e.name === entityName);
|
|
360
|
+
// OPTIMIZED: Use O(1) NameIndex lookup instead of loadGraph() + O(n) find()
|
|
361
|
+
const entity = this.storage.getEntityByName(entityName);
|
|
363
362
|
if (!entity) {
|
|
364
363
|
throw new EntityNotFoundError(entityName);
|
|
365
364
|
}
|
|
@@ -372,14 +371,13 @@ export class EntityManager {
|
|
|
372
371
|
// Capture existing tags (lowercase) BEFORE filtering to accurately track removals
|
|
373
372
|
const existingTagsLower = entity.tags.map(t => t.toLowerCase());
|
|
374
373
|
// Filter out the tags to remove
|
|
375
|
-
|
|
374
|
+
const newTags = entity.tags.filter(tag => !normalizedTags.includes(tag.toLowerCase()));
|
|
376
375
|
// A tag was removed if it existed in the original tags
|
|
377
376
|
const removedTags = normalizedTags.filter(tag => existingTagsLower.includes(tag));
|
|
378
|
-
// Update
|
|
379
|
-
if (
|
|
380
|
-
|
|
377
|
+
// Update entity via storage if tags were removed
|
|
378
|
+
if (newTags.length < originalLength) {
|
|
379
|
+
await this.storage.updateEntity(entityName, { tags: newTags });
|
|
381
380
|
}
|
|
382
|
-
await this.storage.saveGraph(graph);
|
|
383
381
|
return { entityName, removedTags };
|
|
384
382
|
}
|
|
385
383
|
/**
|
|
@@ -408,6 +406,8 @@ export class EntityManager {
|
|
|
408
406
|
/**
|
|
409
407
|
* Add tags to multiple entities in a single operation.
|
|
410
408
|
*
|
|
409
|
+
* OPTIMIZED: Uses Map for O(1) entity lookups instead of O(n) find() per entity.
|
|
410
|
+
*
|
|
411
411
|
* @param entityNames - Names of entities to tag
|
|
412
412
|
* @param tags - Tags to add to each entity
|
|
413
413
|
* @returns Array of results showing which tags were added to each entity
|
|
@@ -417,8 +417,13 @@ export class EntityManager {
|
|
|
417
417
|
const timestamp = new Date().toISOString();
|
|
418
418
|
const normalizedTags = tags.map(tag => tag.toLowerCase());
|
|
419
419
|
const results = [];
|
|
420
|
+
// OPTIMIZED: Build Map for O(1) lookups instead of O(n) find() per entity
|
|
421
|
+
const entityMap = new Map();
|
|
422
|
+
for (const e of graph.entities) {
|
|
423
|
+
entityMap.set(e.name, e);
|
|
424
|
+
}
|
|
420
425
|
for (const entityName of entityNames) {
|
|
421
|
-
const entity =
|
|
426
|
+
const entity = entityMap.get(entityName);
|
|
422
427
|
if (!entity) {
|
|
423
428
|
continue; // Skip non-existent entities
|
|
424
429
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"RelationManager.d.ts","sourceRoot":"","sources":["../../src/core/RelationManager.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAKtD;;GAEG;AACH,qBAAa,eAAe;IACd,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,YAAY;IAEzC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAwCG;IACG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAiEjE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAqCG;IACG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"RelationManager.d.ts","sourceRoot":"","sources":["../../src/core/RelationManager.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAKtD;;GAEG;AACH,qBAAa,eAAe;IACd,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,YAAY;IAEzC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAwCG;IACG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAiEjE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAqCG;IACG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAuC3D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA+BG;IACG,YAAY,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;CAK5D"}
|
|
@@ -158,10 +158,11 @@ export class RelationManager {
|
|
|
158
158
|
affectedEntityNames.add(rel.from);
|
|
159
159
|
affectedEntityNames.add(rel.to);
|
|
160
160
|
});
|
|
161
|
-
//
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
161
|
+
// OPTIMIZED: Use Set<string> for O(1) lookup instead of O(n) array.some()
|
|
162
|
+
// Create composite keys for relations to delete
|
|
163
|
+
const relationsToDeleteSet = new Set(relations.map(r => `${r.from}|${r.to}|${r.relationType}`));
|
|
164
|
+
// Remove relations with O(1) Set lookup per relation instead of O(m) array scan
|
|
165
|
+
graph.relations = graph.relations.filter(r => !relationsToDeleteSet.has(`${r.from}|${r.to}|${r.relationType}`));
|
|
165
166
|
// Update lastModified for affected entities
|
|
166
167
|
graph.entities.forEach(entity => {
|
|
167
168
|
if (affectedEntityNames.has(entity.name)) {
|
|
@@ -18,8 +18,10 @@ export declare class CompressionManager {
|
|
|
18
18
|
* Prepare an entity for efficient similarity comparisons.
|
|
19
19
|
* Pre-computes all normalized data to avoid repeated computation.
|
|
20
20
|
*
|
|
21
|
+
* Phase 12 Sprint 1: Added nameHash for fast bucketing.
|
|
22
|
+
*
|
|
21
23
|
* @param entity - The entity to prepare
|
|
22
|
-
* @returns PreparedEntity with pre-computed data
|
|
24
|
+
* @returns PreparedEntity with pre-computed data including hash
|
|
23
25
|
*/
|
|
24
26
|
private prepareEntity;
|
|
25
27
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CompressionManager.d.ts","sourceRoot":"","sources":["../../src/features/CompressionManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAY,iBAAiB,EAAE,cAAc,EAAE,2BAA2B,
|
|
1
|
+
{"version":3,"file":"CompressionManager.d.ts","sourceRoot":"","sources":["../../src/features/CompressionManager.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAY,iBAAiB,EAAE,cAAc,EAAE,2BAA2B,EAAkB,MAAM,mBAAmB,CAAC;AAC1I,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAW5D;;GAEG;AACH,qBAAa,kBAAkB;IACjB,OAAO,CAAC,OAAO;gBAAP,OAAO,EAAE,YAAY;IAEzC;;;;;;;;OAQG;IACH,OAAO,CAAC,aAAa;IAYrB;;;;;;OAMG;IACH,OAAO,CAAC,eAAe;IAQvB;;;;;;;;;;;OAWG;IACH,yBAAyB,CAAC,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM;IAwCzD;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAU3B;;;;;;;OAOG;IACH,OAAO,CAAC,2BAA2B;IAoCnC;;;;;;;;;;;;;;;;OAgBG;IACG,cAAc,CAClB,SAAS,GAAE,MAAoC,EAC/C,OAAO,CAAC,EAAE,2BAA2B,GACpC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IA0GtB;;;;;;;;;;;;;;;;;;;;OAoBG;IACG,aAAa,CACjB,WAAW,EAAE,MAAM,EAAE,EACrB,UAAU,CAAC,EAAE,MAAM,EACnB,OAAO,GAAE;QACP,KAAK,CAAC,EAAE,cAAc,CAAC;QACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;KACf,GACL,OAAO,CAAC,MAAM,CAAC;IA8FlB;;;;;;;;;;;OAWG;IACG,aAAa,CACjB,SAAS,GAAE,MAAoC,EAC/C,MAAM,GAAE,OAAe,EACvB,OAAO,CAAC,EAAE,2BAA2B,GACpC,OAAO,CAAC,iBAAiB,CAAC;CAgH9B"}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* @module features/CompressionManager
|
|
8
8
|
*/
|
|
9
|
-
import { levenshteinDistance, checkCancellation, createProgressReporter, createProgress, } from '../utils/index.js';
|
|
9
|
+
import { levenshteinDistance, checkCancellation, createProgressReporter, createProgress, fnv1aHash, } from '../utils/index.js';
|
|
10
10
|
import { EntityNotFoundError, InsufficientEntitiesError } from '../utils/errors.js';
|
|
11
11
|
import { SIMILARITY_WEIGHTS, DEFAULT_DUPLICATE_THRESHOLD } from '../utils/constants.js';
|
|
12
12
|
/**
|
|
@@ -21,16 +21,20 @@ export class CompressionManager {
|
|
|
21
21
|
* Prepare an entity for efficient similarity comparisons.
|
|
22
22
|
* Pre-computes all normalized data to avoid repeated computation.
|
|
23
23
|
*
|
|
24
|
+
* Phase 12 Sprint 1: Added nameHash for fast bucketing.
|
|
25
|
+
*
|
|
24
26
|
* @param entity - The entity to prepare
|
|
25
|
-
* @returns PreparedEntity with pre-computed data
|
|
27
|
+
* @returns PreparedEntity with pre-computed data including hash
|
|
26
28
|
*/
|
|
27
29
|
prepareEntity(entity) {
|
|
30
|
+
const nameLower = entity.name.toLowerCase();
|
|
28
31
|
return {
|
|
29
32
|
entity,
|
|
30
|
-
nameLower
|
|
33
|
+
nameLower,
|
|
31
34
|
typeLower: entity.entityType.toLowerCase(),
|
|
32
35
|
observationSet: new Set(entity.observations.map(o => o.toLowerCase())),
|
|
33
36
|
tagSet: new Set((entity.tags ?? []).map(t => t.toLowerCase())),
|
|
37
|
+
nameHash: fnv1aHash(nameLower),
|
|
34
38
|
};
|
|
35
39
|
}
|
|
36
40
|
/**
|
|
@@ -411,15 +415,20 @@ export class CompressionManager {
|
|
|
411
415
|
// Phase 2: Merge duplicates (50-100% progress)
|
|
412
416
|
const totalGroups = duplicateGroups.length;
|
|
413
417
|
let mergedGroups = 0;
|
|
418
|
+
// OPTIMIZATION: Build entity lookup map for O(1) access during merges
|
|
419
|
+
const entityMap = new Map();
|
|
420
|
+
for (const entity of graph.entities) {
|
|
421
|
+
entityMap.set(entity.name, entity);
|
|
422
|
+
}
|
|
414
423
|
// Merge all duplicates using the same graph instance
|
|
415
424
|
for (const group of duplicateGroups) {
|
|
416
425
|
// Check for cancellation between merges
|
|
417
426
|
checkCancellation(options?.signal, 'compressGraph');
|
|
418
427
|
try {
|
|
419
|
-
// Count observations before merge using
|
|
428
|
+
// Count observations before merge using O(1) lookup
|
|
420
429
|
let totalObservationsBefore = 0;
|
|
421
430
|
for (const name of group) {
|
|
422
|
-
const entity =
|
|
431
|
+
const entity = entityMap.get(name);
|
|
423
432
|
if (entity) {
|
|
424
433
|
totalObservationsBefore += entity.observations.length;
|
|
425
434
|
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Search
|
|
3
|
+
*
|
|
4
|
+
* BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
|
|
5
|
+
* Provides improved ranking over TF-IDF by incorporating document length normalization.
|
|
6
|
+
*
|
|
7
|
+
* Phase 12 Sprint 3: Search Algorithm Optimization
|
|
8
|
+
*
|
|
9
|
+
* @module search/BM25Search
|
|
10
|
+
*/
|
|
11
|
+
import type { SearchResult } from '../types/index.js';
|
|
12
|
+
import type { GraphStorage } from '../core/GraphStorage.js';
|
|
13
|
+
/**
|
|
14
|
+
* Common English stopwords to filter from queries and documents.
|
|
15
|
+
* These words are too common to provide meaningful ranking signal.
|
|
16
|
+
*/
|
|
17
|
+
export declare const STOPWORDS: Set<string>;
|
|
18
|
+
/**
|
|
19
|
+
* BM25 index entry for a single document.
|
|
20
|
+
*/
|
|
21
|
+
export interface BM25DocumentEntry {
|
|
22
|
+
/** Entity name */
|
|
23
|
+
entityName: string;
|
|
24
|
+
/** Term frequencies in this document */
|
|
25
|
+
termFreqs: Map<string, number>;
|
|
26
|
+
/** Total number of tokens in document */
|
|
27
|
+
docLength: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* BM25 index structure.
|
|
31
|
+
*/
|
|
32
|
+
export interface BM25Index {
|
|
33
|
+
/** Document entries keyed by entity name */
|
|
34
|
+
documents: Map<string, BM25DocumentEntry>;
|
|
35
|
+
/** Document frequency for each term (number of docs containing term) */
|
|
36
|
+
documentFrequency: Map<string, number>;
|
|
37
|
+
/** Average document length */
|
|
38
|
+
avgDocLength: number;
|
|
39
|
+
/** Total number of documents */
|
|
40
|
+
totalDocs: number;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* BM25 configuration parameters.
|
|
44
|
+
*/
|
|
45
|
+
export interface BM25Config {
|
|
46
|
+
/** Term frequency saturation parameter (default: 1.2) */
|
|
47
|
+
k1: number;
|
|
48
|
+
/** Length normalization parameter (default: 0.75) */
|
|
49
|
+
b: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Default BM25 parameters based on research recommendations.
|
|
53
|
+
*/
|
|
54
|
+
export declare const DEFAULT_BM25_CONFIG: BM25Config;
|
|
55
|
+
/**
|
|
56
|
+
* BM25 Search implementation.
|
|
57
|
+
*
|
|
58
|
+
* BM25 improves over TF-IDF by:
|
|
59
|
+
* 1. Saturating term frequency - prevents long documents from dominating
|
|
60
|
+
* 2. Document length normalization - accounts for varying document sizes
|
|
61
|
+
*
|
|
62
|
+
* Formula:
|
|
63
|
+
* score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
|
|
64
|
+
*
|
|
65
|
+
* Where:
|
|
66
|
+
* - f(qi,D) is the term frequency of qi in document D
|
|
67
|
+
* - |D| is the length of document D
|
|
68
|
+
* - avgdl is the average document length
|
|
69
|
+
* - k1 and b are free parameters
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```typescript
|
|
73
|
+
* const bm25 = new BM25Search(storage);
|
|
74
|
+
* await bm25.buildIndex();
|
|
75
|
+
* const results = await bm25.search('machine learning');
|
|
76
|
+
* ```
|
|
77
|
+
*/
|
|
78
|
+
export declare class BM25Search {
|
|
79
|
+
private storage;
|
|
80
|
+
private index;
|
|
81
|
+
private config;
|
|
82
|
+
constructor(storage: GraphStorage, config?: Partial<BM25Config>);
|
|
83
|
+
/**
|
|
84
|
+
* Get the current configuration.
|
|
85
|
+
*/
|
|
86
|
+
getConfig(): BM25Config;
|
|
87
|
+
/**
|
|
88
|
+
* Update configuration parameters.
|
|
89
|
+
*
|
|
90
|
+
* @param config - New configuration values
|
|
91
|
+
*/
|
|
92
|
+
setConfig(config: Partial<BM25Config>): void;
|
|
93
|
+
/**
|
|
94
|
+
* Tokenize text into lowercase terms with stopword filtering.
|
|
95
|
+
*
|
|
96
|
+
* @param text - Text to tokenize
|
|
97
|
+
* @param filterStopwords - Whether to filter stopwords (default: true)
|
|
98
|
+
* @returns Array of lowercase tokens
|
|
99
|
+
*/
|
|
100
|
+
tokenize(text: string, filterStopwords?: boolean): string[];
|
|
101
|
+
/**
|
|
102
|
+
* Build the BM25 index from the current graph.
|
|
103
|
+
*
|
|
104
|
+
* Should be called after significant graph changes.
|
|
105
|
+
*/
|
|
106
|
+
buildIndex(): Promise<void>;
|
|
107
|
+
/**
|
|
108
|
+
* Search using the BM25 algorithm.
|
|
109
|
+
*
|
|
110
|
+
* @param query - Search query
|
|
111
|
+
* @param limit - Maximum results to return
|
|
112
|
+
* @returns Array of search results sorted by BM25 score
|
|
113
|
+
*/
|
|
114
|
+
search(query: string, limit?: number): Promise<SearchResult[]>;
|
|
115
|
+
/**
|
|
116
|
+
* Update the index for changed entities.
|
|
117
|
+
*
|
|
118
|
+
* @param changedEntityNames - Names of entities that changed
|
|
119
|
+
*/
|
|
120
|
+
update(changedEntityNames: Set<string>): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Remove an entity from the index.
|
|
123
|
+
*
|
|
124
|
+
* @param entityName - Name of entity to remove
|
|
125
|
+
*/
|
|
126
|
+
remove(entityName: string): boolean;
|
|
127
|
+
/**
|
|
128
|
+
* Clear the index.
|
|
129
|
+
*/
|
|
130
|
+
clearIndex(): void;
|
|
131
|
+
/**
|
|
132
|
+
* Check if the index is built.
|
|
133
|
+
*/
|
|
134
|
+
isIndexed(): boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Get index statistics.
|
|
137
|
+
*/
|
|
138
|
+
getIndexStats(): {
|
|
139
|
+
documents: number;
|
|
140
|
+
terms: number;
|
|
141
|
+
avgDocLength: number;
|
|
142
|
+
} | null;
|
|
143
|
+
/**
|
|
144
|
+
* Convert an entity to searchable text.
|
|
145
|
+
*/
|
|
146
|
+
private entityToText;
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=BM25Search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BM25Search.d.ts","sourceRoot":"","sources":["../../src/search/BM25Search.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAU,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAG5D;;;GAGG;AACH,eAAO,MAAM,SAAS,aAWpB,CAAC;AAEH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4CAA4C;IAC5C,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC1C,wEAAwE;IACxE,iBAAiB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,EAAE,EAAE,MAAM,CAAC;IACX,qDAAqD;IACrD,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,UAGjC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,UAAU;IAKnB,OAAO,CAAC,OAAO;IAJjB,OAAO,CAAC,KAAK,CAA0B;IACvC,OAAO,CAAC,MAAM,CAAa;gBAGjB,OAAO,EAAE,YAAY,EAC7B,MAAM,GAAE,OAAO,CAAC,UAAU,CAAM;IAKlC;;OAEG;IACH,SAAS,IAAI,UAAU;IAIvB;;;;OAIG;IACH,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI;IAI5C;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,eAAe,GAAE,OAAc,GAAG,MAAM,EAAE;IAajE;;;;OAIG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgDjC;;;;;;OAMG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,MAA8B,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA8E3F;;;;OAIG;IACG,MAAM,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAkE5D;;;;OAIG;IACH,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO;IAmCnC;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,aAAa,IAAI;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAWlF;;OAEG;IACH,OAAO,CAAC,YAAY;CAGrB"}
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Search
|
|
3
|
+
*
|
|
4
|
+
* BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
|
|
5
|
+
* Provides improved ranking over TF-IDF by incorporating document length normalization.
|
|
6
|
+
*
|
|
7
|
+
* Phase 12 Sprint 3: Search Algorithm Optimization
|
|
8
|
+
*
|
|
9
|
+
* @module search/BM25Search
|
|
10
|
+
*/
|
|
11
|
+
import { SEARCH_LIMITS } from '../utils/constants.js';
|
|
12
|
+
/**
|
|
13
|
+
* Common English stopwords to filter from queries and documents.
|
|
14
|
+
* These words are too common to provide meaningful ranking signal.
|
|
15
|
+
*/
|
|
16
|
+
export const STOPWORDS = new Set([
|
|
17
|
+
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
|
|
18
|
+
'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that',
|
|
19
|
+
'the', 'to', 'was', 'were', 'will', 'with', 'you', 'your',
|
|
20
|
+
'this', 'but', 'they', 'have', 'had', 'what', 'when', 'where',
|
|
21
|
+
'who', 'which', 'why', 'how', 'all', 'each', 'every', 'both',
|
|
22
|
+
'few', 'more', 'most', 'other', 'some', 'such', 'no', 'not',
|
|
23
|
+
'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can',
|
|
24
|
+
'just', 'should', 'now', 'also', 'being', 'been', 'would',
|
|
25
|
+
'could', 'into', 'over', 'after', 'before', 'between', 'under',
|
|
26
|
+
'again', 'then', 'once', 'here', 'there', 'any', 'about',
|
|
27
|
+
]);
|
|
28
|
+
/**
|
|
29
|
+
* Default BM25 parameters based on research recommendations.
|
|
30
|
+
*/
|
|
31
|
+
export const DEFAULT_BM25_CONFIG = {
|
|
32
|
+
k1: 1.2,
|
|
33
|
+
b: 0.75,
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* BM25 Search implementation.
|
|
37
|
+
*
|
|
38
|
+
* BM25 improves over TF-IDF by:
|
|
39
|
+
* 1. Saturating term frequency - prevents long documents from dominating
|
|
40
|
+
* 2. Document length normalization - accounts for varying document sizes
|
|
41
|
+
*
|
|
42
|
+
* Formula:
|
|
43
|
+
* score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
|
|
44
|
+
*
|
|
45
|
+
* Where:
|
|
46
|
+
* - f(qi,D) is the term frequency of qi in document D
|
|
47
|
+
* - |D| is the length of document D
|
|
48
|
+
* - avgdl is the average document length
|
|
49
|
+
* - k1 and b are free parameters
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const bm25 = new BM25Search(storage);
|
|
54
|
+
* await bm25.buildIndex();
|
|
55
|
+
* const results = await bm25.search('machine learning');
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export class BM25Search {
|
|
59
|
+
storage;
|
|
60
|
+
index = null;
|
|
61
|
+
config;
|
|
62
|
+
constructor(storage, config = {}) {
|
|
63
|
+
this.storage = storage;
|
|
64
|
+
this.config = { ...DEFAULT_BM25_CONFIG, ...config };
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get the current configuration.
|
|
68
|
+
*/
|
|
69
|
+
getConfig() {
|
|
70
|
+
return { ...this.config };
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Update configuration parameters.
|
|
74
|
+
*
|
|
75
|
+
* @param config - New configuration values
|
|
76
|
+
*/
|
|
77
|
+
setConfig(config) {
|
|
78
|
+
this.config = { ...this.config, ...config };
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Tokenize text into lowercase terms with stopword filtering.
|
|
82
|
+
*
|
|
83
|
+
* @param text - Text to tokenize
|
|
84
|
+
* @param filterStopwords - Whether to filter stopwords (default: true)
|
|
85
|
+
* @returns Array of lowercase tokens
|
|
86
|
+
*/
|
|
87
|
+
tokenize(text, filterStopwords = true) {
|
|
88
|
+
const tokens = text
|
|
89
|
+
.toLowerCase()
|
|
90
|
+
.replace(/[^\w\s]/g, ' ')
|
|
91
|
+
.split(/\s+/)
|
|
92
|
+
.filter(token => token.length > 0);
|
|
93
|
+
if (filterStopwords) {
|
|
94
|
+
return tokens.filter(token => !STOPWORDS.has(token));
|
|
95
|
+
}
|
|
96
|
+
return tokens;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Build the BM25 index from the current graph.
|
|
100
|
+
*
|
|
101
|
+
* Should be called after significant graph changes.
|
|
102
|
+
*/
|
|
103
|
+
async buildIndex() {
|
|
104
|
+
const graph = await this.storage.loadGraph();
|
|
105
|
+
const documents = new Map();
|
|
106
|
+
const documentFrequency = new Map();
|
|
107
|
+
const termsSeen = new Set();
|
|
108
|
+
let totalDocLength = 0;
|
|
109
|
+
// First pass: tokenize all documents and count term frequencies
|
|
110
|
+
for (const entity of graph.entities) {
|
|
111
|
+
const text = this.entityToText(entity);
|
|
112
|
+
const tokens = this.tokenize(text);
|
|
113
|
+
const termFreqs = new Map();
|
|
114
|
+
// Count term frequencies for this document
|
|
115
|
+
for (const token of tokens) {
|
|
116
|
+
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
117
|
+
}
|
|
118
|
+
// Track which terms appear in this document (for IDF calculation)
|
|
119
|
+
termsSeen.clear();
|
|
120
|
+
for (const token of tokens) {
|
|
121
|
+
if (!termsSeen.has(token)) {
|
|
122
|
+
termsSeen.add(token);
|
|
123
|
+
documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const entry = {
|
|
127
|
+
entityName: entity.name,
|
|
128
|
+
termFreqs,
|
|
129
|
+
docLength: tokens.length,
|
|
130
|
+
};
|
|
131
|
+
documents.set(entity.name, entry);
|
|
132
|
+
totalDocLength += tokens.length;
|
|
133
|
+
}
|
|
134
|
+
const totalDocs = documents.size;
|
|
135
|
+
const avgDocLength = totalDocs > 0 ? totalDocLength / totalDocs : 0;
|
|
136
|
+
this.index = {
|
|
137
|
+
documents,
|
|
138
|
+
documentFrequency,
|
|
139
|
+
avgDocLength,
|
|
140
|
+
totalDocs,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Search using the BM25 algorithm.
|
|
145
|
+
*
|
|
146
|
+
* @param query - Search query
|
|
147
|
+
* @param limit - Maximum results to return
|
|
148
|
+
* @returns Array of search results sorted by BM25 score
|
|
149
|
+
*/
|
|
150
|
+
async search(query, limit = SEARCH_LIMITS.DEFAULT) {
|
|
151
|
+
const effectiveLimit = Math.min(limit, SEARCH_LIMITS.MAX);
|
|
152
|
+
// Ensure index is built
|
|
153
|
+
if (!this.index) {
|
|
154
|
+
await this.buildIndex();
|
|
155
|
+
}
|
|
156
|
+
if (!this.index || this.index.documents.size === 0) {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
const graph = await this.storage.loadGraph();
|
|
160
|
+
const entityMap = new Map(graph.entities.map(e => [e.name, e]));
|
|
161
|
+
// Tokenize query
|
|
162
|
+
const queryTerms = this.tokenize(query);
|
|
163
|
+
if (queryTerms.length === 0) {
|
|
164
|
+
return [];
|
|
165
|
+
}
|
|
166
|
+
const { k1, b } = this.config;
|
|
167
|
+
const { documents, documentFrequency, avgDocLength, totalDocs } = this.index;
|
|
168
|
+
const results = [];
|
|
169
|
+
// Calculate BM25 score for each document
|
|
170
|
+
for (const [entityName, docEntry] of documents) {
|
|
171
|
+
const entity = entityMap.get(entityName);
|
|
172
|
+
if (!entity)
|
|
173
|
+
continue;
|
|
174
|
+
let score = 0;
|
|
175
|
+
const matchedFields = {};
|
|
176
|
+
for (const term of queryTerms) {
|
|
177
|
+
const tf = docEntry.termFreqs.get(term) || 0;
|
|
178
|
+
if (tf === 0)
|
|
179
|
+
continue;
|
|
180
|
+
// Calculate IDF
|
|
181
|
+
const df = documentFrequency.get(term) || 0;
|
|
182
|
+
const idf = df > 0 ? Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1) : 0;
|
|
183
|
+
// Calculate BM25 score component
|
|
184
|
+
const numerator = tf * (k1 + 1);
|
|
185
|
+
const denominator = tf + k1 * (1 - b + b * (docEntry.docLength / avgDocLength));
|
|
186
|
+
const termScore = idf * (numerator / denominator);
|
|
187
|
+
score += termScore;
|
|
188
|
+
// Track which fields matched
|
|
189
|
+
if (entity.name.toLowerCase().includes(term)) {
|
|
190
|
+
matchedFields.name = true;
|
|
191
|
+
}
|
|
192
|
+
if (entity.entityType.toLowerCase().includes(term)) {
|
|
193
|
+
matchedFields.entityType = true;
|
|
194
|
+
}
|
|
195
|
+
const matchedObs = entity.observations.filter(o => o.toLowerCase().includes(term));
|
|
196
|
+
if (matchedObs.length > 0) {
|
|
197
|
+
matchedFields.observations = matchedObs;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
if (score > 0) {
|
|
201
|
+
results.push({
|
|
202
|
+
entity,
|
|
203
|
+
score,
|
|
204
|
+
matchedFields,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// Sort by score descending and limit
|
|
209
|
+
return results
|
|
210
|
+
.sort((a, b) => b.score - a.score)
|
|
211
|
+
.slice(0, effectiveLimit);
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Update the index for changed entities.
|
|
215
|
+
*
|
|
216
|
+
* @param changedEntityNames - Names of entities that changed
|
|
217
|
+
*/
|
|
218
|
+
async update(changedEntityNames) {
|
|
219
|
+
if (!this.index) {
|
|
220
|
+
await this.buildIndex();
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
const graph = await this.storage.loadGraph();
|
|
224
|
+
const entityMap = new Map(graph.entities.map(e => [e.name, e]));
|
|
225
|
+
// Process each changed entity
|
|
226
|
+
for (const entityName of changedEntityNames) {
|
|
227
|
+
const entity = entityMap.get(entityName);
|
|
228
|
+
const existingEntry = this.index.documents.get(entityName);
|
|
229
|
+
if (existingEntry) {
|
|
230
|
+
// Remove old term frequencies from document frequency counts
|
|
231
|
+
for (const [term] of existingEntry.termFreqs) {
|
|
232
|
+
const df = this.index.documentFrequency.get(term) || 0;
|
|
233
|
+
if (df <= 1) {
|
|
234
|
+
this.index.documentFrequency.delete(term);
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
this.index.documentFrequency.set(term, df - 1);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
this.index.documents.delete(entityName);
|
|
241
|
+
}
|
|
242
|
+
if (entity) {
|
|
243
|
+
// Add new entry
|
|
244
|
+
const text = this.entityToText(entity);
|
|
245
|
+
const tokens = this.tokenize(text);
|
|
246
|
+
const termFreqs = new Map();
|
|
247
|
+
const termsSeen = new Set();
|
|
248
|
+
for (const token of tokens) {
|
|
249
|
+
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
250
|
+
if (!termsSeen.has(token)) {
|
|
251
|
+
termsSeen.add(token);
|
|
252
|
+
this.index.documentFrequency.set(token, (this.index.documentFrequency.get(token) || 0) + 1);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
const entry = {
|
|
256
|
+
entityName: entity.name,
|
|
257
|
+
termFreqs,
|
|
258
|
+
docLength: tokens.length,
|
|
259
|
+
};
|
|
260
|
+
this.index.documents.set(entityName, entry);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Recalculate average document length
|
|
264
|
+
this.index.totalDocs = this.index.documents.size;
|
|
265
|
+
let totalLength = 0;
|
|
266
|
+
for (const doc of this.index.documents.values()) {
|
|
267
|
+
totalLength += doc.docLength;
|
|
268
|
+
}
|
|
269
|
+
this.index.avgDocLength = this.index.totalDocs > 0
|
|
270
|
+
? totalLength / this.index.totalDocs
|
|
271
|
+
: 0;
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Remove an entity from the index.
|
|
275
|
+
*
|
|
276
|
+
* @param entityName - Name of entity to remove
|
|
277
|
+
*/
|
|
278
|
+
remove(entityName) {
|
|
279
|
+
if (!this.index) {
|
|
280
|
+
return false;
|
|
281
|
+
}
|
|
282
|
+
const entry = this.index.documents.get(entityName);
|
|
283
|
+
if (!entry) {
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
// Update document frequency counts
|
|
287
|
+
for (const [term] of entry.termFreqs) {
|
|
288
|
+
const df = this.index.documentFrequency.get(term) || 0;
|
|
289
|
+
if (df <= 1) {
|
|
290
|
+
this.index.documentFrequency.delete(term);
|
|
291
|
+
}
|
|
292
|
+
else {
|
|
293
|
+
this.index.documentFrequency.set(term, df - 1);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
this.index.documents.delete(entityName);
|
|
297
|
+
// Update totals
|
|
298
|
+
this.index.totalDocs = this.index.documents.size;
|
|
299
|
+
let totalLength = 0;
|
|
300
|
+
for (const doc of this.index.documents.values()) {
|
|
301
|
+
totalLength += doc.docLength;
|
|
302
|
+
}
|
|
303
|
+
this.index.avgDocLength = this.index.totalDocs > 0
|
|
304
|
+
? totalLength / this.index.totalDocs
|
|
305
|
+
: 0;
|
|
306
|
+
return true;
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Clear the index.
|
|
310
|
+
*/
|
|
311
|
+
clearIndex() {
|
|
312
|
+
this.index = null;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Check if the index is built.
|
|
316
|
+
*/
|
|
317
|
+
isIndexed() {
|
|
318
|
+
return this.index !== null;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Get index statistics.
|
|
322
|
+
*/
|
|
323
|
+
getIndexStats() {
|
|
324
|
+
if (!this.index) {
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
return {
|
|
328
|
+
documents: this.index.documents.size,
|
|
329
|
+
terms: this.index.documentFrequency.size,
|
|
330
|
+
avgDocLength: this.index.avgDocLength,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Convert an entity to searchable text.
|
|
335
|
+
*/
|
|
336
|
+
entityToText(entity) {
|
|
337
|
+
return [entity.name, entity.entityType, ...entity.observations].join(' ');
|
|
338
|
+
}
|
|
339
|
+
}
|