@danielsimonjr/memory-mcp 11.0.1 → 11.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -22
- package/dist/core/EntityManager.d.ts +10 -15
- package/dist/core/EntityManager.d.ts.map +1 -1
- package/dist/core/EntityManager.js +21 -54
- package/dist/core/GraphStorage.d.ts +0 -51
- package/dist/core/GraphStorage.d.ts.map +1 -1
- package/dist/core/GraphStorage.js +2 -79
- package/dist/core/GraphTraversal.d.ts +2 -7
- package/dist/core/GraphTraversal.d.ts.map +1 -1
- package/dist/core/GraphTraversal.js +2 -19
- package/dist/core/ManagerContext.d.ts +0 -4
- package/dist/core/ManagerContext.d.ts.map +1 -1
- package/dist/core/ManagerContext.js +2 -12
- package/dist/core/RelationManager.d.ts.map +1 -1
- package/dist/core/RelationManager.js +4 -5
- package/dist/core/SQLiteStorage.d.ts.map +1 -1
- package/dist/core/SQLiteStorage.js +2 -3
- package/dist/core/TransactionManager.d.ts +2 -207
- package/dist/core/TransactionManager.d.ts.map +1 -1
- package/dist/core/TransactionManager.js +6 -482
- package/dist/core/index.d.ts +1 -2
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -3
- package/dist/features/ArchiveManager.d.ts +2 -14
- package/dist/features/ArchiveManager.d.ts.map +1 -1
- package/dist/features/ArchiveManager.js +3 -44
- package/dist/features/CompressionManager.d.ts +4 -14
- package/dist/features/CompressionManager.d.ts.map +1 -1
- package/dist/features/CompressionManager.js +9 -74
- package/dist/features/IOManager.d.ts +2 -6
- package/dist/features/IOManager.d.ts.map +1 -1
- package/dist/features/IOManager.js +10 -105
- package/dist/features/StreamingExporter.d.ts +4 -27
- package/dist/features/StreamingExporter.d.ts.map +1 -1
- package/dist/features/StreamingExporter.js +4 -65
- package/dist/features/index.d.ts +0 -2
- package/dist/features/index.d.ts.map +1 -1
- package/dist/features/index.js +0 -3
- package/dist/search/EmbeddingService.d.ts +9 -108
- package/dist/search/EmbeddingService.d.ts.map +1 -1
- package/dist/search/EmbeddingService.js +15 -187
- package/dist/search/FuzzySearch.js +1 -1
- package/dist/search/SavedSearchManager.d.ts.map +1 -1
- package/dist/search/SavedSearchManager.js +2 -3
- package/dist/search/SearchManager.d.ts +1 -42
- package/dist/search/SearchManager.d.ts.map +1 -1
- package/dist/search/SearchManager.js +0 -115
- package/dist/search/SemanticSearch.d.ts +1 -4
- package/dist/search/SemanticSearch.d.ts.map +1 -1
- package/dist/search/SemanticSearch.js +2 -12
- package/dist/search/TFIDFIndexManager.d.ts +0 -88
- package/dist/search/TFIDFIndexManager.d.ts.map +1 -1
- package/dist/search/TFIDFIndexManager.js +0 -217
- package/dist/search/index.d.ts +1 -18
- package/dist/search/index.d.ts.map +1 -1
- package/dist/search/index.js +1 -32
- package/dist/server/MCPServer.d.ts.map +1 -1
- package/dist/server/MCPServer.js +4 -1
- package/dist/server/responseCompressor.js +5 -5
- package/dist/server/toolDefinitions.d.ts.map +1 -1
- package/dist/server/toolDefinitions.js +5 -1
- package/dist/server/toolHandlers.d.ts +9 -5
- package/dist/server/toolHandlers.d.ts.map +1 -1
- package/dist/server/toolHandlers.js +23 -8
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/types.d.ts +2 -579
- package/dist/types/types.d.ts.map +1 -1
- package/dist/utils/compressedCache.d.ts +0 -29
- package/dist/utils/compressedCache.d.ts.map +1 -1
- package/dist/utils/compressedCache.js +0 -39
- package/dist/utils/entityUtils.d.ts +1 -59
- package/dist/utils/entityUtils.d.ts.map +1 -1
- package/dist/utils/entityUtils.js +3 -113
- package/dist/utils/errors.d.ts +0 -18
- package/dist/utils/errors.d.ts.map +1 -1
- package/dist/utils/errors.js +0 -24
- package/dist/utils/index.d.ts +2 -6
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +2 -14
- package/dist/utils/logger.d.ts +0 -7
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js +2 -9
- package/dist/utils/parallelUtils.d.ts +1 -5
- package/dist/utils/parallelUtils.d.ts.map +1 -1
- package/dist/utils/parallelUtils.js +1 -23
- package/dist/utils/schemas.d.ts +16 -16
- package/dist/utils/schemas.d.ts.map +1 -1
- package/dist/utils/schemas.js +12 -12
- package/dist/utils/taskScheduler.d.ts +0 -4
- package/dist/utils/taskScheduler.d.ts.map +1 -1
- package/dist/utils/taskScheduler.js +1 -21
- package/dist/workers/WorkerPool.d.ts +81 -0
- package/dist/workers/WorkerPool.d.ts.map +1 -0
- package/dist/workers/WorkerPool.js +121 -0
- package/dist/workers/index.d.ts +1 -1
- package/dist/workers/index.d.ts.map +1 -1
- package/dist/workers/levenshteinWorker.js +1 -1
- package/package.json +1 -4
- package/dist/__tests__/file-path.test.js +0 -119
- package/dist/__tests__/knowledge-graph.test.js +0 -318
- package/dist/core/GraphEventEmitter.d.ts +0 -202
- package/dist/core/GraphEventEmitter.d.ts.map +0 -1
- package/dist/core/GraphEventEmitter.js +0 -346
- package/dist/features/KeywordExtractor.d.ts +0 -61
- package/dist/features/KeywordExtractor.d.ts.map +0 -1
- package/dist/features/KeywordExtractor.js +0 -126
- package/dist/features/ObservationNormalizer.d.ts +0 -90
- package/dist/features/ObservationNormalizer.d.ts.map +0 -1
- package/dist/features/ObservationNormalizer.js +0 -193
- package/dist/memory.jsonl +0 -1
- package/dist/search/BM25Search.d.ts +0 -148
- package/dist/search/BM25Search.d.ts.map +0 -1
- package/dist/search/BM25Search.js +0 -339
- package/dist/search/EarlyTerminationManager.d.ts +0 -140
- package/dist/search/EarlyTerminationManager.d.ts.map +0 -1
- package/dist/search/EarlyTerminationManager.js +0 -279
- package/dist/search/EmbeddingCache.d.ts +0 -175
- package/dist/search/EmbeddingCache.d.ts.map +0 -1
- package/dist/search/EmbeddingCache.js +0 -246
- package/dist/search/HybridScorer.d.ts +0 -181
- package/dist/search/HybridScorer.d.ts.map +0 -1
- package/dist/search/HybridScorer.js +0 -257
- package/dist/search/HybridSearchManager.d.ts +0 -80
- package/dist/search/HybridSearchManager.d.ts.map +0 -1
- package/dist/search/HybridSearchManager.js +0 -187
- package/dist/search/IncrementalIndexer.d.ts +0 -201
- package/dist/search/IncrementalIndexer.d.ts.map +0 -1
- package/dist/search/IncrementalIndexer.js +0 -342
- package/dist/search/OptimizedInvertedIndex.d.ts +0 -163
- package/dist/search/OptimizedInvertedIndex.d.ts.map +0 -1
- package/dist/search/OptimizedInvertedIndex.js +0 -358
- package/dist/search/ParallelSearchExecutor.d.ts +0 -172
- package/dist/search/ParallelSearchExecutor.d.ts.map +0 -1
- package/dist/search/ParallelSearchExecutor.js +0 -309
- package/dist/search/QuantizedVectorStore.d.ts +0 -171
- package/dist/search/QuantizedVectorStore.d.ts.map +0 -1
- package/dist/search/QuantizedVectorStore.js +0 -307
- package/dist/search/QueryAnalyzer.d.ts +0 -76
- package/dist/search/QueryAnalyzer.d.ts.map +0 -1
- package/dist/search/QueryAnalyzer.js +0 -227
- package/dist/search/QueryCostEstimator.d.ts +0 -244
- package/dist/search/QueryCostEstimator.d.ts.map +0 -1
- package/dist/search/QueryCostEstimator.js +0 -652
- package/dist/search/QueryPlanCache.d.ts +0 -220
- package/dist/search/QueryPlanCache.d.ts.map +0 -1
- package/dist/search/QueryPlanCache.js +0 -379
- package/dist/search/QueryPlanner.d.ts +0 -58
- package/dist/search/QueryPlanner.d.ts.map +0 -1
- package/dist/search/QueryPlanner.js +0 -137
- package/dist/search/ReflectionManager.d.ts +0 -120
- package/dist/search/ReflectionManager.d.ts.map +0 -1
- package/dist/search/ReflectionManager.js +0 -231
- package/dist/search/SymbolicSearch.d.ts +0 -61
- package/dist/search/SymbolicSearch.d.ts.map +0 -1
- package/dist/search/SymbolicSearch.js +0 -163
- package/dist/search/TFIDFEventSync.d.ts +0 -85
- package/dist/search/TFIDFEventSync.d.ts.map +0 -1
- package/dist/search/TFIDFEventSync.js +0 -133
- package/dist/utils/BatchProcessor.d.ts +0 -271
- package/dist/utils/BatchProcessor.d.ts.map +0 -1
- package/dist/utils/BatchProcessor.js +0 -376
- package/dist/utils/MemoryMonitor.d.ts +0 -176
- package/dist/utils/MemoryMonitor.d.ts.map +0 -1
- package/dist/utils/MemoryMonitor.js +0 -305
- package/dist/utils/WorkerPoolManager.d.ts +0 -233
- package/dist/utils/WorkerPoolManager.d.ts.map +0 -1
- package/dist/utils/WorkerPoolManager.js +0 -420
- package/dist/utils/operationUtils.d.ts +0 -124
- package/dist/utils/operationUtils.d.ts.map +0 -1
- package/dist/utils/operationUtils.js +0 -175
- package/dist/vitest.config.js +0 -13
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Observation Normalizer
|
|
3
|
-
*
|
|
4
|
-
* Phase 11: Transforms observations to be self-contained facts
|
|
5
|
-
* through coreference resolution and temporal anchoring.
|
|
6
|
-
*
|
|
7
|
-
* @module features/ObservationNormalizer
|
|
8
|
-
*/
|
|
9
|
-
/**
|
|
10
|
-
* Observation Normalizer transforms observations to self-contained facts.
|
|
11
|
-
*
|
|
12
|
-
* Applies transformations:
|
|
13
|
-
* 1. Coreference resolution: 'He works' -> 'Alice works'
|
|
14
|
-
* 2. Temporal anchoring: 'yesterday' -> '2026-01-07'
|
|
15
|
-
* 3. Keyword extraction: Identifies important terms
|
|
16
|
-
*
|
|
17
|
-
* @example
|
|
18
|
-
* ```typescript
|
|
19
|
-
* const normalizer = new ObservationNormalizer();
|
|
20
|
-
* const result = normalizer.normalize(
|
|
21
|
-
* 'He started the project yesterday',
|
|
22
|
-
* { name: 'Bob', entityType: 'person', observations: [] }
|
|
23
|
-
* );
|
|
24
|
-
* // result.normalized = 'Bob started the project on 2026-01-07'
|
|
25
|
-
* ```
|
|
26
|
-
*/
|
|
27
|
-
export class ObservationNormalizer {
|
|
28
|
-
pronounPatterns = {
|
|
29
|
-
masculine: /\b(he|him|his)\b/gi,
|
|
30
|
-
feminine: /\b(she|her|hers)\b/gi,
|
|
31
|
-
neutral: /\b(they|them|their|theirs)\b/gi,
|
|
32
|
-
};
|
|
33
|
-
relativeTimePatterns = [
|
|
34
|
-
[/\byesterday\b/i, (ref) => this.formatDate(this.addDays(ref, -1))],
|
|
35
|
-
[/\btoday\b/i, (ref) => this.formatDate(ref)],
|
|
36
|
-
[/\btomorrow\b/i, (ref) => this.formatDate(this.addDays(ref, 1))],
|
|
37
|
-
[/\blast week\b/i, (ref) => `week of ${this.formatDate(this.addDays(ref, -7))}`],
|
|
38
|
-
[/\blast month\b/i, (ref) => this.formatMonth(this.addMonths(ref, -1))],
|
|
39
|
-
[/\blast year\b/i, (ref) => `${ref.getFullYear() - 1}`],
|
|
40
|
-
[/\bthis week\b/i, (ref) => `week of ${this.formatDate(ref)}`],
|
|
41
|
-
[/\bthis month\b/i, (ref) => this.formatMonth(ref)],
|
|
42
|
-
[/\bthis year\b/i, (ref) => `${ref.getFullYear()}`],
|
|
43
|
-
];
|
|
44
|
-
/**
|
|
45
|
-
* Normalize an observation for an entity.
|
|
46
|
-
*/
|
|
47
|
-
normalize(observation, entity, options = {}) {
|
|
48
|
-
const { resolveCoreferences = true, anchorTimestamps = true, extractKeywords = false, referenceDate = new Date(), } = options;
|
|
49
|
-
let normalized = observation;
|
|
50
|
-
const changes = [];
|
|
51
|
-
if (resolveCoreferences) {
|
|
52
|
-
const corefResult = this.resolveCoreferences(normalized, entity);
|
|
53
|
-
if (corefResult.changed) {
|
|
54
|
-
normalized = corefResult.text;
|
|
55
|
-
changes.push(`Resolved pronouns to '${entity.name}'`);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
if (anchorTimestamps) {
|
|
59
|
-
const timeResult = this.anchorTimestamps(normalized, referenceDate);
|
|
60
|
-
if (timeResult.changed) {
|
|
61
|
-
normalized = timeResult.text;
|
|
62
|
-
changes.push(...timeResult.replacements);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
const keywords = extractKeywords
|
|
66
|
-
? this.extractKeywords(normalized)
|
|
67
|
-
: undefined;
|
|
68
|
-
return {
|
|
69
|
-
original: observation,
|
|
70
|
-
normalized,
|
|
71
|
-
changes,
|
|
72
|
-
keywords,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
/**
|
|
76
|
-
* Resolve pronouns to entity name.
|
|
77
|
-
*/
|
|
78
|
-
resolveCoreferences(text, entity) {
|
|
79
|
-
let result = text;
|
|
80
|
-
let changed = false;
|
|
81
|
-
// Determine gender hint from entity type or name patterns
|
|
82
|
-
const isMasculine = this.guessMasculine(entity);
|
|
83
|
-
const isFeminine = this.guessFeminine(entity);
|
|
84
|
-
// Replace pronouns based on detected gender
|
|
85
|
-
if (isMasculine) {
|
|
86
|
-
const newText = result
|
|
87
|
-
.replace(this.pronounPatterns.masculine, entity.name);
|
|
88
|
-
if (newText !== result) {
|
|
89
|
-
result = newText;
|
|
90
|
-
changed = true;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
else if (isFeminine) {
|
|
94
|
-
const newText = result
|
|
95
|
-
.replace(this.pronounPatterns.feminine, entity.name);
|
|
96
|
-
if (newText !== result) {
|
|
97
|
-
result = newText;
|
|
98
|
-
changed = true;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
// Always try neutral pronouns for non-person entities
|
|
102
|
-
if (entity.entityType.toLowerCase() !== 'person') {
|
|
103
|
-
const newText = result
|
|
104
|
-
.replace(this.pronounPatterns.neutral, entity.name);
|
|
105
|
-
if (newText !== result) {
|
|
106
|
-
result = newText;
|
|
107
|
-
changed = true;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
return { text: result, changed };
|
|
111
|
-
}
|
|
112
|
-
guessMasculine(entity) {
|
|
113
|
-
const masculineNames = ['john', 'james', 'bob', 'mike', 'david', 'alex'];
|
|
114
|
-
return masculineNames.some(n => entity.name.toLowerCase().includes(n));
|
|
115
|
-
}
|
|
116
|
-
guessFeminine(entity) {
|
|
117
|
-
const feminineNames = ['alice', 'jane', 'sarah', 'mary', 'emma', 'lisa'];
|
|
118
|
-
return feminineNames.some(n => entity.name.toLowerCase().includes(n));
|
|
119
|
-
}
|
|
120
|
-
/**
|
|
121
|
-
* Convert relative timestamps to absolute dates.
|
|
122
|
-
*/
|
|
123
|
-
anchorTimestamps(text, referenceDate) {
|
|
124
|
-
let result = text;
|
|
125
|
-
const replacements = [];
|
|
126
|
-
for (const [pattern, resolver] of this.relativeTimePatterns) {
|
|
127
|
-
const match = result.match(pattern);
|
|
128
|
-
if (match) {
|
|
129
|
-
const replacement = resolver(referenceDate);
|
|
130
|
-
result = result.replace(pattern, replacement);
|
|
131
|
-
replacements.push(`'${match[0]}' -> '${replacement}'`);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
return {
|
|
135
|
-
text: result,
|
|
136
|
-
changed: replacements.length > 0,
|
|
137
|
-
replacements,
|
|
138
|
-
};
|
|
139
|
-
}
|
|
140
|
-
addDays(date, days) {
|
|
141
|
-
const result = new Date(date);
|
|
142
|
-
result.setDate(result.getDate() + days);
|
|
143
|
-
return result;
|
|
144
|
-
}
|
|
145
|
-
addMonths(date, months) {
|
|
146
|
-
const result = new Date(date);
|
|
147
|
-
result.setMonth(result.getMonth() + months);
|
|
148
|
-
return result;
|
|
149
|
-
}
|
|
150
|
-
formatDate(date) {
|
|
151
|
-
return date.toISOString().split('T')[0];
|
|
152
|
-
}
|
|
153
|
-
formatMonth(date) {
|
|
154
|
-
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
|
|
155
|
-
}
|
|
156
|
-
/**
|
|
157
|
-
* Extract important keywords from text.
|
|
158
|
-
*/
|
|
159
|
-
extractKeywords(text) {
|
|
160
|
-
const stopwords = new Set([
|
|
161
|
-
'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
162
|
-
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
|
|
163
|
-
'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
|
164
|
-
'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
|
|
165
|
-
'from', 'as', 'into', 'through', 'during', 'before', 'after',
|
|
166
|
-
'above', 'below', 'between', 'under', 'again', 'further',
|
|
167
|
-
'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how',
|
|
168
|
-
'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such',
|
|
169
|
-
'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too',
|
|
170
|
-
'very', 's', 't', 'just', 'don', 'now', 'and', 'but', 'or',
|
|
171
|
-
]);
|
|
172
|
-
const words = text
|
|
173
|
-
.toLowerCase()
|
|
174
|
-
.replace(/[^a-z0-9\s]/g, '')
|
|
175
|
-
.split(/\s+/)
|
|
176
|
-
.filter(w => w.length > 2 && !stopwords.has(w));
|
|
177
|
-
// Return unique keywords
|
|
178
|
-
return [...new Set(words)];
|
|
179
|
-
}
|
|
180
|
-
/**
|
|
181
|
-
* Normalize all observations for an entity.
|
|
182
|
-
*/
|
|
183
|
-
normalizeEntity(entity, options = {}) {
|
|
184
|
-
const results = entity.observations.map(obs => this.normalize(obs, entity, options));
|
|
185
|
-
return {
|
|
186
|
-
entity: {
|
|
187
|
-
...entity,
|
|
188
|
-
observations: results.map(r => r.normalized),
|
|
189
|
-
},
|
|
190
|
-
results,
|
|
191
|
-
};
|
|
192
|
-
}
|
|
193
|
-
}
|
package/dist/memory.jsonl
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"type":"entity","name":"Test Project","entityType":"project","observations":["Phase 1-4 testing"],"createdAt":"2025-11-11T01:02:31.337Z","lastModified":"2025-11-11T01:02:33.329Z","tags":["testing","phase3"],"importance":8}
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* BM25 Search
|
|
3
|
-
*
|
|
4
|
-
* BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
|
|
5
|
-
* Provides improved ranking over TF-IDF by incorporating document length normalization.
|
|
6
|
-
*
|
|
7
|
-
* Phase 12 Sprint 3: Search Algorithm Optimization
|
|
8
|
-
*
|
|
9
|
-
* @module search/BM25Search
|
|
10
|
-
*/
|
|
11
|
-
import type { SearchResult } from '../types/index.js';
|
|
12
|
-
import type { GraphStorage } from '../core/GraphStorage.js';
|
|
13
|
-
/**
|
|
14
|
-
* Common English stopwords to filter from queries and documents.
|
|
15
|
-
* These words are too common to provide meaningful ranking signal.
|
|
16
|
-
*/
|
|
17
|
-
export declare const STOPWORDS: Set<string>;
|
|
18
|
-
/**
|
|
19
|
-
* BM25 index entry for a single document.
|
|
20
|
-
*/
|
|
21
|
-
export interface BM25DocumentEntry {
|
|
22
|
-
/** Entity name */
|
|
23
|
-
entityName: string;
|
|
24
|
-
/** Term frequencies in this document */
|
|
25
|
-
termFreqs: Map<string, number>;
|
|
26
|
-
/** Total number of tokens in document */
|
|
27
|
-
docLength: number;
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* BM25 index structure.
|
|
31
|
-
*/
|
|
32
|
-
export interface BM25Index {
|
|
33
|
-
/** Document entries keyed by entity name */
|
|
34
|
-
documents: Map<string, BM25DocumentEntry>;
|
|
35
|
-
/** Document frequency for each term (number of docs containing term) */
|
|
36
|
-
documentFrequency: Map<string, number>;
|
|
37
|
-
/** Average document length */
|
|
38
|
-
avgDocLength: number;
|
|
39
|
-
/** Total number of documents */
|
|
40
|
-
totalDocs: number;
|
|
41
|
-
}
|
|
42
|
-
/**
|
|
43
|
-
* BM25 configuration parameters.
|
|
44
|
-
*/
|
|
45
|
-
export interface BM25Config {
|
|
46
|
-
/** Term frequency saturation parameter (default: 1.2) */
|
|
47
|
-
k1: number;
|
|
48
|
-
/** Length normalization parameter (default: 0.75) */
|
|
49
|
-
b: number;
|
|
50
|
-
}
|
|
51
|
-
/**
|
|
52
|
-
* Default BM25 parameters based on research recommendations.
|
|
53
|
-
*/
|
|
54
|
-
export declare const DEFAULT_BM25_CONFIG: BM25Config;
|
|
55
|
-
/**
|
|
56
|
-
* BM25 Search implementation.
|
|
57
|
-
*
|
|
58
|
-
* BM25 improves over TF-IDF by:
|
|
59
|
-
* 1. Saturating term frequency - prevents long documents from dominating
|
|
60
|
-
* 2. Document length normalization - accounts for varying document sizes
|
|
61
|
-
*
|
|
62
|
-
* Formula:
|
|
63
|
-
* score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
|
|
64
|
-
*
|
|
65
|
-
* Where:
|
|
66
|
-
* - f(qi,D) is the term frequency of qi in document D
|
|
67
|
-
* - |D| is the length of document D
|
|
68
|
-
* - avgdl is the average document length
|
|
69
|
-
* - k1 and b are free parameters
|
|
70
|
-
*
|
|
71
|
-
* @example
|
|
72
|
-
* ```typescript
|
|
73
|
-
* const bm25 = new BM25Search(storage);
|
|
74
|
-
* await bm25.buildIndex();
|
|
75
|
-
* const results = await bm25.search('machine learning');
|
|
76
|
-
* ```
|
|
77
|
-
*/
|
|
78
|
-
export declare class BM25Search {
|
|
79
|
-
private storage;
|
|
80
|
-
private index;
|
|
81
|
-
private config;
|
|
82
|
-
constructor(storage: GraphStorage, config?: Partial<BM25Config>);
|
|
83
|
-
/**
|
|
84
|
-
* Get the current configuration.
|
|
85
|
-
*/
|
|
86
|
-
getConfig(): BM25Config;
|
|
87
|
-
/**
|
|
88
|
-
* Update configuration parameters.
|
|
89
|
-
*
|
|
90
|
-
* @param config - New configuration values
|
|
91
|
-
*/
|
|
92
|
-
setConfig(config: Partial<BM25Config>): void;
|
|
93
|
-
/**
|
|
94
|
-
* Tokenize text into lowercase terms with stopword filtering.
|
|
95
|
-
*
|
|
96
|
-
* @param text - Text to tokenize
|
|
97
|
-
* @param filterStopwords - Whether to filter stopwords (default: true)
|
|
98
|
-
* @returns Array of lowercase tokens
|
|
99
|
-
*/
|
|
100
|
-
tokenize(text: string, filterStopwords?: boolean): string[];
|
|
101
|
-
/**
|
|
102
|
-
* Build the BM25 index from the current graph.
|
|
103
|
-
*
|
|
104
|
-
* Should be called after significant graph changes.
|
|
105
|
-
*/
|
|
106
|
-
buildIndex(): Promise<void>;
|
|
107
|
-
/**
|
|
108
|
-
* Search using the BM25 algorithm.
|
|
109
|
-
*
|
|
110
|
-
* @param query - Search query
|
|
111
|
-
* @param limit - Maximum results to return
|
|
112
|
-
* @returns Array of search results sorted by BM25 score
|
|
113
|
-
*/
|
|
114
|
-
search(query: string, limit?: number): Promise<SearchResult[]>;
|
|
115
|
-
/**
|
|
116
|
-
* Update the index for changed entities.
|
|
117
|
-
*
|
|
118
|
-
* @param changedEntityNames - Names of entities that changed
|
|
119
|
-
*/
|
|
120
|
-
update(changedEntityNames: Set<string>): Promise<void>;
|
|
121
|
-
/**
|
|
122
|
-
* Remove an entity from the index.
|
|
123
|
-
*
|
|
124
|
-
* @param entityName - Name of entity to remove
|
|
125
|
-
*/
|
|
126
|
-
remove(entityName: string): boolean;
|
|
127
|
-
/**
|
|
128
|
-
* Clear the index.
|
|
129
|
-
*/
|
|
130
|
-
clearIndex(): void;
|
|
131
|
-
/**
|
|
132
|
-
* Check if the index is built.
|
|
133
|
-
*/
|
|
134
|
-
isIndexed(): boolean;
|
|
135
|
-
/**
|
|
136
|
-
* Get index statistics.
|
|
137
|
-
*/
|
|
138
|
-
getIndexStats(): {
|
|
139
|
-
documents: number;
|
|
140
|
-
terms: number;
|
|
141
|
-
avgDocLength: number;
|
|
142
|
-
} | null;
|
|
143
|
-
/**
|
|
144
|
-
* Convert an entity to searchable text.
|
|
145
|
-
*/
|
|
146
|
-
private entityToText;
|
|
147
|
-
}
|
|
148
|
-
//# sourceMappingURL=BM25Search.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"BM25Search.d.ts","sourceRoot":"","sources":["../../src/search/BM25Search.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAU,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAG5D;;;GAGG;AACH,eAAO,MAAM,SAAS,aAWpB,CAAC;AAEH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,wCAAwC;IACxC,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4CAA4C;IAC5C,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC1C,wEAAwE;IACxE,iBAAiB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gCAAgC;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,EAAE,EAAE,MAAM,CAAC;IACX,qDAAqD;IACrD,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;GAEG;AACH,eAAO,MAAM,mBAAmB,EAAE,UAGjC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,UAAU;IAKnB,OAAO,CAAC,OAAO;IAJjB,OAAO,CAAC,KAAK,CAA0B;IACvC,OAAO,CAAC,MAAM,CAAa;gBAGjB,OAAO,EAAE,YAAY,EAC7B,MAAM,GAAE,OAAO,CAAC,UAAU,CAAM;IAKlC;;OAEG;IACH,SAAS,IAAI,UAAU;IAIvB;;;;OAIG;IACH,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI;IAI5C;;;;;;OAMG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,eAAe,GAAE,OAAc,GAAG,MAAM,EAAE;IAajE;;;;OAIG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgDjC;;;;;;OAMG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,MAA8B,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA8E3F;;;;OAIG;IACG,MAAM,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAkE5D;;;;OAIG;IACH,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO;IAmCnC;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,aAAa,IAAI;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAWlF;;OAEG;IACH,OAAO,CAAC,YAAY;CAGrB"}
|
|
@@ -1,339 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* BM25 Search
|
|
3
|
-
*
|
|
4
|
-
* BM25 (Best Matching 25) relevance scoring algorithm for lexical search.
|
|
5
|
-
* Provides improved ranking over TF-IDF by incorporating document length normalization.
|
|
6
|
-
*
|
|
7
|
-
* Phase 12 Sprint 3: Search Algorithm Optimization
|
|
8
|
-
*
|
|
9
|
-
* @module search/BM25Search
|
|
10
|
-
*/
|
|
11
|
-
import { SEARCH_LIMITS } from '../utils/constants.js';
|
|
12
|
-
/**
|
|
13
|
-
* Common English stopwords to filter from queries and documents.
|
|
14
|
-
* These words are too common to provide meaningful ranking signal.
|
|
15
|
-
*/
|
|
16
|
-
export const STOPWORDS = new Set([
|
|
17
|
-
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
|
|
18
|
-
'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'or', 'that',
|
|
19
|
-
'the', 'to', 'was', 'were', 'will', 'with', 'you', 'your',
|
|
20
|
-
'this', 'but', 'they', 'have', 'had', 'what', 'when', 'where',
|
|
21
|
-
'who', 'which', 'why', 'how', 'all', 'each', 'every', 'both',
|
|
22
|
-
'few', 'more', 'most', 'other', 'some', 'such', 'no', 'not',
|
|
23
|
-
'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can',
|
|
24
|
-
'just', 'should', 'now', 'also', 'being', 'been', 'would',
|
|
25
|
-
'could', 'into', 'over', 'after', 'before', 'between', 'under',
|
|
26
|
-
'again', 'then', 'once', 'here', 'there', 'any', 'about',
|
|
27
|
-
]);
|
|
28
|
-
/**
|
|
29
|
-
* Default BM25 parameters based on research recommendations.
|
|
30
|
-
*/
|
|
31
|
-
export const DEFAULT_BM25_CONFIG = {
|
|
32
|
-
k1: 1.2,
|
|
33
|
-
b: 0.75,
|
|
34
|
-
};
|
|
35
|
-
/**
|
|
36
|
-
* BM25 Search implementation.
|
|
37
|
-
*
|
|
38
|
-
* BM25 improves over TF-IDF by:
|
|
39
|
-
* 1. Saturating term frequency - prevents long documents from dominating
|
|
40
|
-
* 2. Document length normalization - accounts for varying document sizes
|
|
41
|
-
*
|
|
42
|
-
* Formula:
|
|
43
|
-
* score(D,Q) = sum_i( IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D|/avgdl)) )
|
|
44
|
-
*
|
|
45
|
-
* Where:
|
|
46
|
-
* - f(qi,D) is the term frequency of qi in document D
|
|
47
|
-
* - |D| is the length of document D
|
|
48
|
-
* - avgdl is the average document length
|
|
49
|
-
* - k1 and b are free parameters
|
|
50
|
-
*
|
|
51
|
-
* @example
|
|
52
|
-
* ```typescript
|
|
53
|
-
* const bm25 = new BM25Search(storage);
|
|
54
|
-
* await bm25.buildIndex();
|
|
55
|
-
* const results = await bm25.search('machine learning');
|
|
56
|
-
* ```
|
|
57
|
-
*/
|
|
58
|
-
export class BM25Search {
|
|
59
|
-
storage;
|
|
60
|
-
index = null;
|
|
61
|
-
config;
|
|
62
|
-
constructor(storage, config = {}) {
|
|
63
|
-
this.storage = storage;
|
|
64
|
-
this.config = { ...DEFAULT_BM25_CONFIG, ...config };
|
|
65
|
-
}
|
|
66
|
-
/**
|
|
67
|
-
* Get the current configuration.
|
|
68
|
-
*/
|
|
69
|
-
getConfig() {
|
|
70
|
-
return { ...this.config };
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Update configuration parameters.
|
|
74
|
-
*
|
|
75
|
-
* @param config - New configuration values
|
|
76
|
-
*/
|
|
77
|
-
setConfig(config) {
|
|
78
|
-
this.config = { ...this.config, ...config };
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Tokenize text into lowercase terms with stopword filtering.
|
|
82
|
-
*
|
|
83
|
-
* @param text - Text to tokenize
|
|
84
|
-
* @param filterStopwords - Whether to filter stopwords (default: true)
|
|
85
|
-
* @returns Array of lowercase tokens
|
|
86
|
-
*/
|
|
87
|
-
tokenize(text, filterStopwords = true) {
|
|
88
|
-
const tokens = text
|
|
89
|
-
.toLowerCase()
|
|
90
|
-
.replace(/[^\w\s]/g, ' ')
|
|
91
|
-
.split(/\s+/)
|
|
92
|
-
.filter(token => token.length > 0);
|
|
93
|
-
if (filterStopwords) {
|
|
94
|
-
return tokens.filter(token => !STOPWORDS.has(token));
|
|
95
|
-
}
|
|
96
|
-
return tokens;
|
|
97
|
-
}
|
|
98
|
-
/**
|
|
99
|
-
* Build the BM25 index from the current graph.
|
|
100
|
-
*
|
|
101
|
-
* Should be called after significant graph changes.
|
|
102
|
-
*/
|
|
103
|
-
async buildIndex() {
|
|
104
|
-
const graph = await this.storage.loadGraph();
|
|
105
|
-
const documents = new Map();
|
|
106
|
-
const documentFrequency = new Map();
|
|
107
|
-
const termsSeen = new Set();
|
|
108
|
-
let totalDocLength = 0;
|
|
109
|
-
// First pass: tokenize all documents and count term frequencies
|
|
110
|
-
for (const entity of graph.entities) {
|
|
111
|
-
const text = this.entityToText(entity);
|
|
112
|
-
const tokens = this.tokenize(text);
|
|
113
|
-
const termFreqs = new Map();
|
|
114
|
-
// Count term frequencies for this document
|
|
115
|
-
for (const token of tokens) {
|
|
116
|
-
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
117
|
-
}
|
|
118
|
-
// Track which terms appear in this document (for IDF calculation)
|
|
119
|
-
termsSeen.clear();
|
|
120
|
-
for (const token of tokens) {
|
|
121
|
-
if (!termsSeen.has(token)) {
|
|
122
|
-
termsSeen.add(token);
|
|
123
|
-
documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
const entry = {
|
|
127
|
-
entityName: entity.name,
|
|
128
|
-
termFreqs,
|
|
129
|
-
docLength: tokens.length,
|
|
130
|
-
};
|
|
131
|
-
documents.set(entity.name, entry);
|
|
132
|
-
totalDocLength += tokens.length;
|
|
133
|
-
}
|
|
134
|
-
const totalDocs = documents.size;
|
|
135
|
-
const avgDocLength = totalDocs > 0 ? totalDocLength / totalDocs : 0;
|
|
136
|
-
this.index = {
|
|
137
|
-
documents,
|
|
138
|
-
documentFrequency,
|
|
139
|
-
avgDocLength,
|
|
140
|
-
totalDocs,
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Search using the BM25 algorithm.
|
|
145
|
-
*
|
|
146
|
-
* @param query - Search query
|
|
147
|
-
* @param limit - Maximum results to return
|
|
148
|
-
* @returns Array of search results sorted by BM25 score
|
|
149
|
-
*/
|
|
150
|
-
async search(query, limit = SEARCH_LIMITS.DEFAULT) {
|
|
151
|
-
const effectiveLimit = Math.min(limit, SEARCH_LIMITS.MAX);
|
|
152
|
-
// Ensure index is built
|
|
153
|
-
if (!this.index) {
|
|
154
|
-
await this.buildIndex();
|
|
155
|
-
}
|
|
156
|
-
if (!this.index || this.index.documents.size === 0) {
|
|
157
|
-
return [];
|
|
158
|
-
}
|
|
159
|
-
const graph = await this.storage.loadGraph();
|
|
160
|
-
const entityMap = new Map(graph.entities.map(e => [e.name, e]));
|
|
161
|
-
// Tokenize query
|
|
162
|
-
const queryTerms = this.tokenize(query);
|
|
163
|
-
if (queryTerms.length === 0) {
|
|
164
|
-
return [];
|
|
165
|
-
}
|
|
166
|
-
const { k1, b } = this.config;
|
|
167
|
-
const { documents, documentFrequency, avgDocLength, totalDocs } = this.index;
|
|
168
|
-
const results = [];
|
|
169
|
-
// Calculate BM25 score for each document
|
|
170
|
-
for (const [entityName, docEntry] of documents) {
|
|
171
|
-
const entity = entityMap.get(entityName);
|
|
172
|
-
if (!entity)
|
|
173
|
-
continue;
|
|
174
|
-
let score = 0;
|
|
175
|
-
const matchedFields = {};
|
|
176
|
-
for (const term of queryTerms) {
|
|
177
|
-
const tf = docEntry.termFreqs.get(term) || 0;
|
|
178
|
-
if (tf === 0)
|
|
179
|
-
continue;
|
|
180
|
-
// Calculate IDF
|
|
181
|
-
const df = documentFrequency.get(term) || 0;
|
|
182
|
-
const idf = df > 0 ? Math.log((totalDocs - df + 0.5) / (df + 0.5) + 1) : 0;
|
|
183
|
-
// Calculate BM25 score component
|
|
184
|
-
const numerator = tf * (k1 + 1);
|
|
185
|
-
const denominator = tf + k1 * (1 - b + b * (docEntry.docLength / avgDocLength));
|
|
186
|
-
const termScore = idf * (numerator / denominator);
|
|
187
|
-
score += termScore;
|
|
188
|
-
// Track which fields matched
|
|
189
|
-
if (entity.name.toLowerCase().includes(term)) {
|
|
190
|
-
matchedFields.name = true;
|
|
191
|
-
}
|
|
192
|
-
if (entity.entityType.toLowerCase().includes(term)) {
|
|
193
|
-
matchedFields.entityType = true;
|
|
194
|
-
}
|
|
195
|
-
const matchedObs = entity.observations.filter(o => o.toLowerCase().includes(term));
|
|
196
|
-
if (matchedObs.length > 0) {
|
|
197
|
-
matchedFields.observations = matchedObs;
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
if (score > 0) {
|
|
201
|
-
results.push({
|
|
202
|
-
entity,
|
|
203
|
-
score,
|
|
204
|
-
matchedFields,
|
|
205
|
-
});
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
// Sort by score descending and limit
|
|
209
|
-
return results
|
|
210
|
-
.sort((a, b) => b.score - a.score)
|
|
211
|
-
.slice(0, effectiveLimit);
|
|
212
|
-
}
|
|
213
|
-
/**
|
|
214
|
-
* Update the index for changed entities.
|
|
215
|
-
*
|
|
216
|
-
* @param changedEntityNames - Names of entities that changed
|
|
217
|
-
*/
|
|
218
|
-
async update(changedEntityNames) {
|
|
219
|
-
if (!this.index) {
|
|
220
|
-
await this.buildIndex();
|
|
221
|
-
return;
|
|
222
|
-
}
|
|
223
|
-
const graph = await this.storage.loadGraph();
|
|
224
|
-
const entityMap = new Map(graph.entities.map(e => [e.name, e]));
|
|
225
|
-
// Process each changed entity
|
|
226
|
-
for (const entityName of changedEntityNames) {
|
|
227
|
-
const entity = entityMap.get(entityName);
|
|
228
|
-
const existingEntry = this.index.documents.get(entityName);
|
|
229
|
-
if (existingEntry) {
|
|
230
|
-
// Remove old term frequencies from document frequency counts
|
|
231
|
-
for (const [term] of existingEntry.termFreqs) {
|
|
232
|
-
const df = this.index.documentFrequency.get(term) || 0;
|
|
233
|
-
if (df <= 1) {
|
|
234
|
-
this.index.documentFrequency.delete(term);
|
|
235
|
-
}
|
|
236
|
-
else {
|
|
237
|
-
this.index.documentFrequency.set(term, df - 1);
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
this.index.documents.delete(entityName);
|
|
241
|
-
}
|
|
242
|
-
if (entity) {
|
|
243
|
-
// Add new entry
|
|
244
|
-
const text = this.entityToText(entity);
|
|
245
|
-
const tokens = this.tokenize(text);
|
|
246
|
-
const termFreqs = new Map();
|
|
247
|
-
const termsSeen = new Set();
|
|
248
|
-
for (const token of tokens) {
|
|
249
|
-
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
250
|
-
if (!termsSeen.has(token)) {
|
|
251
|
-
termsSeen.add(token);
|
|
252
|
-
this.index.documentFrequency.set(token, (this.index.documentFrequency.get(token) || 0) + 1);
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
const entry = {
|
|
256
|
-
entityName: entity.name,
|
|
257
|
-
termFreqs,
|
|
258
|
-
docLength: tokens.length,
|
|
259
|
-
};
|
|
260
|
-
this.index.documents.set(entityName, entry);
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
// Recalculate average document length
|
|
264
|
-
this.index.totalDocs = this.index.documents.size;
|
|
265
|
-
let totalLength = 0;
|
|
266
|
-
for (const doc of this.index.documents.values()) {
|
|
267
|
-
totalLength += doc.docLength;
|
|
268
|
-
}
|
|
269
|
-
this.index.avgDocLength = this.index.totalDocs > 0
|
|
270
|
-
? totalLength / this.index.totalDocs
|
|
271
|
-
: 0;
|
|
272
|
-
}
|
|
273
|
-
/**
|
|
274
|
-
* Remove an entity from the index.
|
|
275
|
-
*
|
|
276
|
-
* @param entityName - Name of entity to remove
|
|
277
|
-
*/
|
|
278
|
-
remove(entityName) {
|
|
279
|
-
if (!this.index) {
|
|
280
|
-
return false;
|
|
281
|
-
}
|
|
282
|
-
const entry = this.index.documents.get(entityName);
|
|
283
|
-
if (!entry) {
|
|
284
|
-
return false;
|
|
285
|
-
}
|
|
286
|
-
// Update document frequency counts
|
|
287
|
-
for (const [term] of entry.termFreqs) {
|
|
288
|
-
const df = this.index.documentFrequency.get(term) || 0;
|
|
289
|
-
if (df <= 1) {
|
|
290
|
-
this.index.documentFrequency.delete(term);
|
|
291
|
-
}
|
|
292
|
-
else {
|
|
293
|
-
this.index.documentFrequency.set(term, df - 1);
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
this.index.documents.delete(entityName);
|
|
297
|
-
// Update totals
|
|
298
|
-
this.index.totalDocs = this.index.documents.size;
|
|
299
|
-
let totalLength = 0;
|
|
300
|
-
for (const doc of this.index.documents.values()) {
|
|
301
|
-
totalLength += doc.docLength;
|
|
302
|
-
}
|
|
303
|
-
this.index.avgDocLength = this.index.totalDocs > 0
|
|
304
|
-
? totalLength / this.index.totalDocs
|
|
305
|
-
: 0;
|
|
306
|
-
return true;
|
|
307
|
-
}
|
|
308
|
-
/**
|
|
309
|
-
* Clear the index.
|
|
310
|
-
*/
|
|
311
|
-
clearIndex() {
|
|
312
|
-
this.index = null;
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Check if the index is built.
|
|
316
|
-
*/
|
|
317
|
-
isIndexed() {
|
|
318
|
-
return this.index !== null;
|
|
319
|
-
}
|
|
320
|
-
/**
|
|
321
|
-
* Get index statistics.
|
|
322
|
-
*/
|
|
323
|
-
getIndexStats() {
|
|
324
|
-
if (!this.index) {
|
|
325
|
-
return null;
|
|
326
|
-
}
|
|
327
|
-
return {
|
|
328
|
-
documents: this.index.documents.size,
|
|
329
|
-
terms: this.index.documentFrequency.size,
|
|
330
|
-
avgDocLength: this.index.avgDocLength,
|
|
331
|
-
};
|
|
332
|
-
}
|
|
333
|
-
/**
|
|
334
|
-
* Convert an entity to searchable text.
|
|
335
|
-
*/
|
|
336
|
-
entityToText(entity) {
|
|
337
|
-
return [entity.name, entity.entityType, ...entity.observations].join(' ');
|
|
338
|
-
}
|
|
339
|
-
}
|