@danielsimonjr/memory-mcp 0.47.1 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +2000 -194
- package/dist/__tests__/file-path.test.js +5 -5
- package/dist/__tests__/knowledge-graph.test.js +3 -8
- package/dist/core/EntityManager.d.ts +266 -0
- package/dist/core/EntityManager.d.ts.map +1 -0
- package/dist/core/EntityManager.js +85 -133
- package/dist/core/GraphEventEmitter.d.ts +202 -0
- package/dist/core/GraphEventEmitter.d.ts.map +1 -0
- package/dist/core/GraphEventEmitter.js +346 -0
- package/dist/core/GraphStorage.d.ts +395 -0
- package/dist/core/GraphStorage.d.ts.map +1 -0
- package/dist/core/GraphStorage.js +643 -31
- package/dist/core/GraphTraversal.d.ts +141 -0
- package/dist/core/GraphTraversal.d.ts.map +1 -0
- package/dist/core/GraphTraversal.js +573 -0
- package/dist/core/HierarchyManager.d.ts +111 -0
- package/dist/core/HierarchyManager.d.ts.map +1 -0
- package/dist/{features → core}/HierarchyManager.js +14 -9
- package/dist/core/ManagerContext.d.ts +72 -0
- package/dist/core/ManagerContext.d.ts.map +1 -0
- package/dist/core/ManagerContext.js +118 -0
- package/dist/core/ObservationManager.d.ts +85 -0
- package/dist/core/ObservationManager.d.ts.map +1 -0
- package/dist/core/ObservationManager.js +51 -57
- package/dist/core/RelationManager.d.ts +131 -0
- package/dist/core/RelationManager.d.ts.map +1 -0
- package/dist/core/RelationManager.js +31 -7
- package/dist/core/SQLiteStorage.d.ts +354 -0
- package/dist/core/SQLiteStorage.d.ts.map +1 -0
- package/dist/core/SQLiteStorage.js +917 -0
- package/dist/core/StorageFactory.d.ts +45 -0
- package/dist/core/StorageFactory.d.ts.map +1 -0
- package/dist/core/StorageFactory.js +64 -0
- package/dist/core/TransactionManager.d.ts +464 -0
- package/dist/core/TransactionManager.d.ts.map +1 -0
- package/dist/core/TransactionManager.js +490 -13
- package/dist/core/index.d.ts +17 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +12 -2
- package/dist/features/AnalyticsManager.d.ts +44 -0
- package/dist/features/AnalyticsManager.d.ts.map +1 -0
- package/dist/features/AnalyticsManager.js +14 -13
- package/dist/features/ArchiveManager.d.ts +133 -0
- package/dist/features/ArchiveManager.d.ts.map +1 -0
- package/dist/features/ArchiveManager.js +221 -14
- package/dist/features/CompressionManager.d.ts +117 -0
- package/dist/features/CompressionManager.d.ts.map +1 -0
- package/dist/features/CompressionManager.js +189 -20
- package/dist/features/IOManager.d.ts +225 -0
- package/dist/features/IOManager.d.ts.map +1 -0
- package/dist/features/IOManager.js +1041 -0
- package/dist/features/StreamingExporter.d.ts +123 -0
- package/dist/features/StreamingExporter.d.ts.map +1 -0
- package/dist/features/StreamingExporter.js +203 -0
- package/dist/features/TagManager.d.ts +147 -0
- package/dist/features/TagManager.d.ts.map +1 -0
- package/dist/features/index.d.ts +12 -0
- package/dist/features/index.d.ts.map +1 -0
- package/dist/features/index.js +5 -6
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -45
- package/dist/memory.jsonl +1 -18
- package/dist/search/BasicSearch.d.ts +51 -0
- package/dist/search/BasicSearch.d.ts.map +1 -0
- package/dist/search/BasicSearch.js +9 -3
- package/dist/search/BooleanSearch.d.ts +98 -0
- package/dist/search/BooleanSearch.d.ts.map +1 -0
- package/dist/search/BooleanSearch.js +156 -9
- package/dist/search/EmbeddingService.d.ts +178 -0
- package/dist/search/EmbeddingService.d.ts.map +1 -0
- package/dist/search/EmbeddingService.js +358 -0
- package/dist/search/FuzzySearch.d.ts +118 -0
- package/dist/search/FuzzySearch.d.ts.map +1 -0
- package/dist/search/FuzzySearch.js +241 -25
- package/dist/search/QueryCostEstimator.d.ts +111 -0
- package/dist/search/QueryCostEstimator.d.ts.map +1 -0
- package/dist/search/QueryCostEstimator.js +355 -0
- package/dist/search/RankedSearch.d.ts +71 -0
- package/dist/search/RankedSearch.d.ts.map +1 -0
- package/dist/search/RankedSearch.js +54 -6
- package/dist/search/SavedSearchManager.d.ts +79 -0
- package/dist/search/SavedSearchManager.d.ts.map +1 -0
- package/dist/search/SearchFilterChain.d.ts +120 -0
- package/dist/search/SearchFilterChain.d.ts.map +1 -0
- package/dist/search/SearchFilterChain.js +2 -4
- package/dist/search/SearchManager.d.ts +326 -0
- package/dist/search/SearchManager.d.ts.map +1 -0
- package/dist/search/SearchManager.js +148 -0
- package/dist/search/SearchSuggestions.d.ts +27 -0
- package/dist/search/SearchSuggestions.d.ts.map +1 -0
- package/dist/search/SearchSuggestions.js +1 -1
- package/dist/search/SemanticSearch.d.ts +149 -0
- package/dist/search/SemanticSearch.d.ts.map +1 -0
- package/dist/search/SemanticSearch.js +323 -0
- package/dist/search/TFIDFEventSync.d.ts +85 -0
- package/dist/search/TFIDFEventSync.d.ts.map +1 -0
- package/dist/search/TFIDFEventSync.js +133 -0
- package/dist/search/TFIDFIndexManager.d.ts +151 -0
- package/dist/search/TFIDFIndexManager.d.ts.map +1 -0
- package/dist/search/TFIDFIndexManager.js +232 -17
- package/dist/search/VectorStore.d.ts +235 -0
- package/dist/search/VectorStore.d.ts.map +1 -0
- package/dist/search/VectorStore.js +311 -0
- package/dist/search/index.d.ts +21 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +12 -0
- package/dist/server/MCPServer.d.ts +21 -0
- package/dist/server/MCPServer.d.ts.map +1 -0
- package/dist/server/MCPServer.js +4 -4
- package/dist/server/responseCompressor.d.ts +94 -0
- package/dist/server/responseCompressor.d.ts.map +1 -0
- package/dist/server/responseCompressor.js +127 -0
- package/dist/server/toolDefinitions.d.ts +27 -0
- package/dist/server/toolDefinitions.d.ts.map +1 -0
- package/dist/server/toolDefinitions.js +189 -18
- package/dist/server/toolHandlers.d.ts +41 -0
- package/dist/server/toolHandlers.d.ts.map +1 -0
- package/dist/server/toolHandlers.js +467 -75
- package/dist/types/index.d.ts +13 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +1 -1
- package/dist/types/types.d.ts +1654 -0
- package/dist/types/types.d.ts.map +1 -0
- package/dist/types/types.js +9 -0
- package/dist/utils/compressedCache.d.ts +192 -0
- package/dist/utils/compressedCache.d.ts.map +1 -0
- package/dist/utils/compressedCache.js +309 -0
- package/dist/utils/compressionUtil.d.ts +214 -0
- package/dist/utils/compressionUtil.d.ts.map +1 -0
- package/dist/utils/compressionUtil.js +247 -0
- package/dist/utils/constants.d.ts +245 -0
- package/dist/utils/constants.d.ts.map +1 -0
- package/dist/utils/constants.js +124 -0
- package/dist/utils/entityUtils.d.ts +321 -0
- package/dist/utils/entityUtils.d.ts.map +1 -0
- package/dist/utils/entityUtils.js +434 -4
- package/dist/utils/errors.d.ts +95 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +24 -0
- package/dist/utils/formatters.d.ts +145 -0
- package/dist/utils/formatters.d.ts.map +1 -0
- package/dist/utils/{paginationUtils.js → formatters.js} +54 -3
- package/dist/utils/index.d.ts +23 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +69 -31
- package/dist/utils/indexes.d.ts +270 -0
- package/dist/utils/indexes.d.ts.map +1 -0
- package/dist/utils/indexes.js +526 -0
- package/dist/utils/logger.d.ts +24 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/operationUtils.d.ts +124 -0
- package/dist/utils/operationUtils.d.ts.map +1 -0
- package/dist/utils/operationUtils.js +175 -0
- package/dist/utils/parallelUtils.d.ts +72 -0
- package/dist/utils/parallelUtils.d.ts.map +1 -0
- package/dist/utils/parallelUtils.js +169 -0
- package/dist/utils/schemas.d.ts +374 -0
- package/dist/utils/schemas.d.ts.map +1 -0
- package/dist/utils/schemas.js +302 -2
- package/dist/utils/searchAlgorithms.d.ts +99 -0
- package/dist/utils/searchAlgorithms.d.ts.map +1 -0
- package/dist/utils/searchAlgorithms.js +167 -0
- package/dist/utils/searchCache.d.ts +108 -0
- package/dist/utils/searchCache.d.ts.map +1 -0
- package/dist/utils/taskScheduler.d.ts +290 -0
- package/dist/utils/taskScheduler.d.ts.map +1 -0
- package/dist/utils/taskScheduler.js +466 -0
- package/dist/workers/index.d.ts +12 -0
- package/dist/workers/index.d.ts.map +1 -0
- package/dist/workers/index.js +9 -0
- package/dist/workers/levenshteinWorker.d.ts +60 -0
- package/dist/workers/levenshteinWorker.d.ts.map +1 -0
- package/dist/workers/levenshteinWorker.js +98 -0
- package/package.json +17 -4
- package/dist/__tests__/edge-cases/edge-cases.test.js +0 -406
- package/dist/__tests__/integration/workflows.test.js +0 -449
- package/dist/__tests__/performance/benchmarks.test.js +0 -413
- package/dist/__tests__/unit/core/EntityManager.test.js +0 -334
- package/dist/__tests__/unit/core/GraphStorage.test.js +0 -205
- package/dist/__tests__/unit/core/RelationManager.test.js +0 -274
- package/dist/__tests__/unit/features/CompressionManager.test.js +0 -350
- package/dist/__tests__/unit/search/BasicSearch.test.js +0 -311
- package/dist/__tests__/unit/search/BooleanSearch.test.js +0 -432
- package/dist/__tests__/unit/search/FuzzySearch.test.js +0 -448
- package/dist/__tests__/unit/search/RankedSearch.test.js +0 -379
- package/dist/__tests__/unit/utils/levenshtein.test.js +0 -77
- package/dist/core/KnowledgeGraphManager.js +0 -423
- package/dist/features/BackupManager.js +0 -311
- package/dist/features/ExportManager.js +0 -305
- package/dist/features/ImportExportManager.js +0 -50
- package/dist/features/ImportManager.js +0 -328
- package/dist/types/analytics.types.js +0 -6
- package/dist/types/entity.types.js +0 -7
- package/dist/types/import-export.types.js +0 -7
- package/dist/types/search.types.js +0 -7
- package/dist/types/tag.types.js +0 -6
- package/dist/utils/dateUtils.js +0 -89
- package/dist/utils/filterUtils.js +0 -155
- package/dist/utils/levenshtein.js +0 -62
- package/dist/utils/pathUtils.js +0 -115
- package/dist/utils/responseFormatter.js +0 -55
- package/dist/utils/tagUtils.js +0 -107
- package/dist/utils/tfidf.js +0 -90
- package/dist/utils/validationHelper.js +0 -99
- package/dist/utils/validationUtils.js +0 -109
|
@@ -2,25 +2,123 @@
|
|
|
2
2
|
* Fuzzy Search
|
|
3
3
|
*
|
|
4
4
|
* Search with typo tolerance using Levenshtein distance similarity.
|
|
5
|
+
* Uses workerpool for parallel processing on large datasets.
|
|
5
6
|
*
|
|
6
7
|
* @module search/FuzzySearch
|
|
7
8
|
*/
|
|
8
|
-
import { levenshteinDistance } from '../utils/
|
|
9
|
+
import { levenshteinDistance } from '../utils/index.js';
|
|
9
10
|
import { SEARCH_LIMITS } from '../utils/constants.js';
|
|
10
11
|
import { SearchFilterChain } from './SearchFilterChain.js';
|
|
12
|
+
import workerpool from '@danielsimonjr/workerpool';
|
|
13
|
+
import { fileURLToPath } from 'url';
|
|
14
|
+
import { dirname, join, sep } from 'path';
|
|
11
15
|
/**
|
|
12
16
|
* Default fuzzy search similarity threshold (70% match required).
|
|
13
17
|
* Lower values are more permissive (more typos tolerated).
|
|
14
18
|
* Higher values are stricter (fewer typos tolerated).
|
|
15
19
|
*/
|
|
16
20
|
export const DEFAULT_FUZZY_THRESHOLD = 0.7;
|
|
21
|
+
/**
|
|
22
|
+
* Phase 4 Sprint 3: Maximum cache size to prevent memory bloat.
|
|
23
|
+
*/
|
|
24
|
+
const FUZZY_CACHE_MAX_SIZE = 100;
|
|
25
|
+
/**
|
|
26
|
+
* Phase 4 Sprint 3: Cache TTL in milliseconds (5 minutes).
|
|
27
|
+
*/
|
|
28
|
+
const FUZZY_CACHE_TTL_MS = 5 * 60 * 1000;
|
|
29
|
+
/**
|
|
30
|
+
* Phase 7 Sprint 3: Minimum number of entities to activate worker pool.
|
|
31
|
+
*/
|
|
32
|
+
const WORKER_MIN_ENTITIES = 500;
|
|
33
|
+
/**
|
|
34
|
+
* Phase 7 Sprint 3: Maximum threshold for worker pool activation.
|
|
35
|
+
* Higher thresholds have fewer matches, so single-threaded is faster.
|
|
36
|
+
*/
|
|
37
|
+
const WORKER_MAX_THRESHOLD = 0.8;
|
|
17
38
|
/**
|
|
18
39
|
* Performs fuzzy search with configurable similarity threshold.
|
|
19
40
|
*/
|
|
20
41
|
export class FuzzySearch {
|
|
21
42
|
storage;
|
|
22
|
-
|
|
43
|
+
/**
|
|
44
|
+
* Phase 4 Sprint 3: Result cache for fuzzy search.
|
|
45
|
+
* Maps cache key -> cached entity names.
|
|
46
|
+
*/
|
|
47
|
+
fuzzyResultCache = new Map();
|
|
48
|
+
/**
|
|
49
|
+
* Phase 8: Worker pool using workerpool library.
|
|
50
|
+
* Initialized lazily when needed.
|
|
51
|
+
*/
|
|
52
|
+
workerPool = null;
|
|
53
|
+
/**
|
|
54
|
+
* Phase 7 Sprint 3: Path to the worker script.
|
|
55
|
+
*/
|
|
56
|
+
workerPath;
|
|
57
|
+
/**
|
|
58
|
+
* Phase 8: Whether to use worker pool for parallel processing.
|
|
59
|
+
* Can be disabled for testing or when workers are not available.
|
|
60
|
+
*/
|
|
61
|
+
useWorkerPool;
|
|
62
|
+
constructor(storage, options = {}) {
|
|
23
63
|
this.storage = storage;
|
|
64
|
+
this.useWorkerPool = options.useWorkerPool ?? true;
|
|
65
|
+
// Calculate worker path using ESM module resolution
|
|
66
|
+
const currentFileUrl = import.meta.url;
|
|
67
|
+
const currentDir = dirname(fileURLToPath(currentFileUrl));
|
|
68
|
+
// Check if we're running from src/ (during tests) or dist/ (production)
|
|
69
|
+
const isRunningFromSrc = currentDir.includes(`${sep}src${sep}`);
|
|
70
|
+
if (isRunningFromSrc) {
|
|
71
|
+
// During tests, worker is in dist/workers/ relative to project root
|
|
72
|
+
const projectRoot = join(currentDir, '..', '..');
|
|
73
|
+
this.workerPath = join(projectRoot, 'dist', 'workers', 'levenshteinWorker.js');
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
// In production, worker is in dist/workers/ relative to current dist/search/
|
|
77
|
+
this.workerPath = join(currentDir, '..', 'workers', 'levenshteinWorker.js');
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Phase 4 Sprint 3: Generate cache key for fuzzy search parameters.
|
|
82
|
+
*/
|
|
83
|
+
generateCacheKey(query, threshold, tags, minImportance, maxImportance, offset, limit) {
|
|
84
|
+
return JSON.stringify({
|
|
85
|
+
q: query.toLowerCase(),
|
|
86
|
+
t: threshold,
|
|
87
|
+
tags: tags?.sort().join(',') ?? '',
|
|
88
|
+
min: minImportance,
|
|
89
|
+
max: maxImportance,
|
|
90
|
+
off: offset,
|
|
91
|
+
lim: limit,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Phase 4 Sprint 3: Clear the fuzzy search cache.
|
|
96
|
+
*/
|
|
97
|
+
clearCache() {
|
|
98
|
+
this.fuzzyResultCache.clear();
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Phase 4 Sprint 3: Invalidate stale cache entries.
|
|
102
|
+
*/
|
|
103
|
+
cleanupCache() {
|
|
104
|
+
const now = Date.now();
|
|
105
|
+
const entries = Array.from(this.fuzzyResultCache.entries());
|
|
106
|
+
// Remove expired entries
|
|
107
|
+
for (const [key, entry] of entries) {
|
|
108
|
+
if (now - entry.timestamp > FUZZY_CACHE_TTL_MS) {
|
|
109
|
+
this.fuzzyResultCache.delete(key);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// If still over limit, remove oldest entries
|
|
113
|
+
if (this.fuzzyResultCache.size > FUZZY_CACHE_MAX_SIZE) {
|
|
114
|
+
const sortedEntries = entries
|
|
115
|
+
.filter(([k]) => this.fuzzyResultCache.has(k))
|
|
116
|
+
.sort((a, b) => a[1].timestamp - b[1].timestamp);
|
|
117
|
+
const toRemove = sortedEntries.slice(0, this.fuzzyResultCache.size - FUZZY_CACHE_MAX_SIZE);
|
|
118
|
+
for (const [key] of toRemove) {
|
|
119
|
+
this.fuzzyResultCache.delete(key);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
24
122
|
}
|
|
25
123
|
/**
|
|
26
124
|
* Fuzzy search for entities with typo tolerance and pagination.
|
|
@@ -28,6 +126,8 @@ export class FuzzySearch {
|
|
|
28
126
|
* Uses Levenshtein distance to calculate similarity between strings.
|
|
29
127
|
* Matches if similarity >= threshold (0.0 to 1.0).
|
|
30
128
|
*
|
|
129
|
+
* Phase 4 Sprint 3: Implements result caching for repeated queries.
|
|
130
|
+
*
|
|
31
131
|
* @param query - Search query
|
|
32
132
|
* @param threshold - Similarity threshold (0.0 to 1.0), default DEFAULT_FUZZY_THRESHOLD
|
|
33
133
|
* @param tags - Optional tags filter
|
|
@@ -39,25 +139,51 @@ export class FuzzySearch {
|
|
|
39
139
|
*/
|
|
40
140
|
async fuzzySearch(query, threshold = DEFAULT_FUZZY_THRESHOLD, tags, minImportance, maxImportance, offset = 0, limit = SEARCH_LIMITS.DEFAULT) {
|
|
41
141
|
const graph = await this.storage.loadGraph();
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
142
|
+
const queryLower = query.toLowerCase();
|
|
143
|
+
// Phase 4 Sprint 3: Generate cache key and check cache
|
|
144
|
+
const cacheKey = this.generateCacheKey(query, threshold, tags, minImportance, maxImportance, offset, limit);
|
|
145
|
+
const cached = this.fuzzyResultCache.get(cacheKey);
|
|
146
|
+
// Check if cache is valid (entity count hasn't changed)
|
|
147
|
+
if (cached && cached.entityCount === graph.entities.length) {
|
|
148
|
+
const now = Date.now();
|
|
149
|
+
if (now - cached.timestamp < FUZZY_CACHE_TTL_MS) {
|
|
150
|
+
// Return cached results
|
|
151
|
+
const cachedNameSet = new Set(cached.entityNames);
|
|
152
|
+
const cachedEntities = graph.entities.filter(e => cachedNameSet.has(e.name));
|
|
153
|
+
const cachedEntityNames = new Set(cached.entityNames);
|
|
154
|
+
const cachedRelations = graph.relations.filter(r => cachedEntityNames.has(r.from) && cachedEntityNames.has(r.to));
|
|
155
|
+
return { entities: cachedEntities, relations: cachedRelations };
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// Phase 7 Sprint 3: Use worker pool for large graphs with low thresholds
|
|
159
|
+
// Phase 8: Respect useWorkerPool flag for testing
|
|
160
|
+
const shouldUseWorkers = this.useWorkerPool &&
|
|
161
|
+
graph.entities.length >= WORKER_MIN_ENTITIES &&
|
|
162
|
+
threshold < WORKER_MAX_THRESHOLD;
|
|
163
|
+
let fuzzyMatched;
|
|
164
|
+
if (shouldUseWorkers) {
|
|
165
|
+
fuzzyMatched = await this.searchWithWorkers(query, threshold, graph.entities);
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
// Perform single-threaded fuzzy search
|
|
169
|
+
fuzzyMatched = this.performFuzzyMatch(graph.entities, queryLower, threshold);
|
|
170
|
+
}
|
|
55
171
|
// Apply tag and importance filters using SearchFilterChain
|
|
56
172
|
const filters = { tags, minImportance, maxImportance };
|
|
57
173
|
const filteredEntities = SearchFilterChain.applyFilters(fuzzyMatched, filters);
|
|
58
174
|
// Apply pagination using SearchFilterChain
|
|
59
175
|
const pagination = SearchFilterChain.validatePagination(offset, limit);
|
|
60
176
|
const paginatedEntities = SearchFilterChain.paginate(filteredEntities, pagination);
|
|
177
|
+
// Phase 4 Sprint 3: Cache the results
|
|
178
|
+
this.fuzzyResultCache.set(cacheKey, {
|
|
179
|
+
entityNames: paginatedEntities.map(e => e.name),
|
|
180
|
+
entityCount: graph.entities.length,
|
|
181
|
+
timestamp: Date.now(),
|
|
182
|
+
});
|
|
183
|
+
// Cleanup old cache entries periodically
|
|
184
|
+
if (this.fuzzyResultCache.size > FUZZY_CACHE_MAX_SIZE / 2) {
|
|
185
|
+
this.cleanupCache();
|
|
186
|
+
}
|
|
61
187
|
const filteredEntityNames = new Set(paginatedEntities.map(e => e.name));
|
|
62
188
|
const filteredRelations = graph.relations.filter(r => filteredEntityNames.has(r.from) && filteredEntityNames.has(r.to));
|
|
63
189
|
return {
|
|
@@ -66,21 +192,42 @@ export class FuzzySearch {
|
|
|
66
192
|
};
|
|
67
193
|
}
|
|
68
194
|
/**
|
|
69
|
-
*
|
|
195
|
+
* Phase 4 Sprint 3: Perform the actual fuzzy matching logic.
|
|
196
|
+
* Extracted from fuzzySearch for cleaner code structure.
|
|
197
|
+
*/
|
|
198
|
+
performFuzzyMatch(entities, queryLower, threshold) {
|
|
199
|
+
return entities.filter(e => {
|
|
200
|
+
const lowercased = this.storage.getLowercased(e.name);
|
|
201
|
+
// Check name match (use pre-computed lowercase)
|
|
202
|
+
const nameLower = lowercased?.name ?? e.name.toLowerCase();
|
|
203
|
+
if (this.isFuzzyMatchLower(nameLower, queryLower, threshold))
|
|
204
|
+
return true;
|
|
205
|
+
// Check type match (use pre-computed lowercase)
|
|
206
|
+
const typeLower = lowercased?.entityType ?? e.entityType.toLowerCase();
|
|
207
|
+
if (this.isFuzzyMatchLower(typeLower, queryLower, threshold))
|
|
208
|
+
return true;
|
|
209
|
+
// Check observations (use pre-computed lowercase array)
|
|
210
|
+
const obsLower = lowercased?.observations ?? e.observations.map(o => o.toLowerCase());
|
|
211
|
+
return obsLower.some(o =>
|
|
212
|
+
// For observations, split into words and check each word
|
|
213
|
+
o
|
|
214
|
+
.split(/\s+/)
|
|
215
|
+
.some(word => this.isFuzzyMatchLower(word, queryLower, threshold)) ||
|
|
216
|
+
// Also check if the observation contains the query
|
|
217
|
+
this.isFuzzyMatchLower(o, queryLower, threshold));
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Check if two already-lowercase strings match with fuzzy logic.
|
|
70
222
|
*
|
|
71
|
-
*
|
|
72
|
-
* - Strings are identical
|
|
73
|
-
* - One contains the other
|
|
74
|
-
* - Levenshtein similarity >= threshold
|
|
223
|
+
* OPTIMIZED: Skips toLowerCase() calls when strings are already lowercase.
|
|
75
224
|
*
|
|
76
|
-
* @param
|
|
77
|
-
* @param
|
|
225
|
+
* @param s1 - First string (already lowercase)
|
|
226
|
+
* @param s2 - Second string (already lowercase)
|
|
78
227
|
* @param threshold - Similarity threshold (0.0 to 1.0)
|
|
79
228
|
* @returns True if strings match fuzzily
|
|
80
229
|
*/
|
|
81
|
-
|
|
82
|
-
const s1 = str1.toLowerCase();
|
|
83
|
-
const s2 = str2.toLowerCase();
|
|
230
|
+
isFuzzyMatchLower(s1, s2, threshold = 0.7) {
|
|
84
231
|
// Exact match
|
|
85
232
|
if (s1 === s2)
|
|
86
233
|
return true;
|
|
@@ -93,4 +240,73 @@ export class FuzzySearch {
|
|
|
93
240
|
const similarity = 1 - distance / maxLength;
|
|
94
241
|
return similarity >= threshold;
|
|
95
242
|
}
|
|
243
|
+
/**
|
|
244
|
+
* Phase 8: Perform fuzzy search using workerpool for parallel processing.
|
|
245
|
+
*
|
|
246
|
+
* Splits entities into chunks and processes them in parallel using worker threads.
|
|
247
|
+
* Falls back to single-threaded search if worker execution fails.
|
|
248
|
+
*
|
|
249
|
+
* @param query - Search query
|
|
250
|
+
* @param threshold - Similarity threshold
|
|
251
|
+
* @param entities - Entities to search
|
|
252
|
+
* @returns Array of matched entities
|
|
253
|
+
*/
|
|
254
|
+
async searchWithWorkers(query, threshold, entities) {
|
|
255
|
+
try {
|
|
256
|
+
// Initialize worker pool lazily using workerpool
|
|
257
|
+
if (!this.workerPool) {
|
|
258
|
+
// Enable ESM module support for Node.js 20+
|
|
259
|
+
// The 'type: module' option is needed for ESM workers but may not be in @types/node
|
|
260
|
+
const workerThreadOpts = { type: 'module' };
|
|
261
|
+
this.workerPool = workerpool.pool(this.workerPath, {
|
|
262
|
+
maxWorkers: Math.max(1, workerpool.cpus - 1),
|
|
263
|
+
workerType: 'thread',
|
|
264
|
+
workerThreadOpts,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
// Split entities into chunks based on CPU count
|
|
268
|
+
const numWorkers = Math.max(1, workerpool.cpus - 1);
|
|
269
|
+
const chunkSize = Math.ceil(entities.length / numWorkers);
|
|
270
|
+
const chunks = [];
|
|
271
|
+
for (let i = 0; i < entities.length; i += chunkSize) {
|
|
272
|
+
chunks.push(entities.slice(i, i + chunkSize));
|
|
273
|
+
}
|
|
274
|
+
// Prepare worker inputs with lowercased data
|
|
275
|
+
const workerInputs = chunks.map(chunk => ({
|
|
276
|
+
query,
|
|
277
|
+
threshold,
|
|
278
|
+
entities: chunk.map(e => ({
|
|
279
|
+
name: e.name,
|
|
280
|
+
nameLower: e.name.toLowerCase(),
|
|
281
|
+
observations: e.observations.map(o => o.toLowerCase()),
|
|
282
|
+
})),
|
|
283
|
+
}));
|
|
284
|
+
// Execute all chunks in parallel using workerpool with timeout
|
|
285
|
+
const WORKER_TIMEOUT_MS = 30000; // 30 seconds
|
|
286
|
+
const results = await Promise.all(workerInputs.map(input => this.workerPool.exec('searchEntities', [input])
|
|
287
|
+
.timeout(WORKER_TIMEOUT_MS)));
|
|
288
|
+
// Flatten results and extract matched entity names
|
|
289
|
+
const matchedNames = new Set(results.flat().map(r => r.name));
|
|
290
|
+
// Return entities that matched
|
|
291
|
+
return entities.filter(e => matchedNames.has(e.name));
|
|
292
|
+
}
|
|
293
|
+
catch (error) {
|
|
294
|
+
// Worker execution failed - fall back to single-threaded mode
|
|
295
|
+
console.warn(`Worker pool execution failed, falling back to single-threaded fuzzy search: ${error instanceof Error ? error.message : String(error)}`);
|
|
296
|
+
// Use the existing single-threaded implementation
|
|
297
|
+
const queryLower = query.toLowerCase();
|
|
298
|
+
return this.performFuzzyMatch(entities, queryLower, threshold);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Phase 8: Shutdown the worker pool and clean up resources.
|
|
303
|
+
*
|
|
304
|
+
* Should be called when FuzzySearch is no longer needed.
|
|
305
|
+
*/
|
|
306
|
+
async shutdown() {
|
|
307
|
+
if (this.workerPool) {
|
|
308
|
+
await this.workerPool.terminate();
|
|
309
|
+
this.workerPool = null;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
96
312
|
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query Cost Estimator
|
|
3
|
+
*
|
|
4
|
+
* Phase 10 Sprint 4: Estimates the cost of different search methods
|
|
5
|
+
* and recommends the optimal method based on query characteristics
|
|
6
|
+
* and graph size.
|
|
7
|
+
*
|
|
8
|
+
* @module search/QueryCostEstimator
|
|
9
|
+
*/
|
|
10
|
+
import type { SearchMethod, QueryCostEstimate, QueryCostEstimatorOptions } from '../types/index.js';
|
|
11
|
+
/**
|
|
12
|
+
* Phase 10 Sprint 4: Estimates search query costs and recommends optimal methods.
|
|
13
|
+
*
|
|
14
|
+
* Analyzes query characteristics and graph size to estimate execution time
|
|
15
|
+
* and recommend the most appropriate search method.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const estimator = new QueryCostEstimator();
|
|
20
|
+
*
|
|
21
|
+
* // Get estimate for a specific method
|
|
22
|
+
* const estimate = estimator.estimateMethod('ranked', 'test query', 1000);
|
|
23
|
+
*
|
|
24
|
+
* // Get the recommended method for a query
|
|
25
|
+
* const recommendation = estimator.recommendMethod('test query', 1000);
|
|
26
|
+
*
|
|
27
|
+
* // Get estimates for all methods
|
|
28
|
+
* const allEstimates = estimator.estimateAllMethods('test query', 1000);
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
export declare class QueryCostEstimator {
|
|
32
|
+
private options;
|
|
33
|
+
/**
|
|
34
|
+
* Create a new QueryCostEstimator instance.
|
|
35
|
+
*
|
|
36
|
+
* @param options - Optional configuration overrides
|
|
37
|
+
*/
|
|
38
|
+
constructor(options?: QueryCostEstimatorOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Estimate the cost of a specific search method.
|
|
41
|
+
*
|
|
42
|
+
* @param method - The search method to estimate
|
|
43
|
+
* @param query - The search query
|
|
44
|
+
* @param entityCount - Number of entities in the graph
|
|
45
|
+
* @returns Cost estimate for the method
|
|
46
|
+
*/
|
|
47
|
+
estimateMethod(method: SearchMethod, query: string, entityCount: number): QueryCostEstimate;
|
|
48
|
+
/**
|
|
49
|
+
* Internal method to estimate without triggering recursion.
|
|
50
|
+
* @private
|
|
51
|
+
*/
|
|
52
|
+
private estimateMethodInternal;
|
|
53
|
+
/**
|
|
54
|
+
* Get just the recommended method without full estimate (avoids recursion).
|
|
55
|
+
* @private
|
|
56
|
+
*/
|
|
57
|
+
private getRecommendedMethodOnly;
|
|
58
|
+
/**
|
|
59
|
+
* Get estimates for all available search methods.
|
|
60
|
+
*
|
|
61
|
+
* @param query - The search query
|
|
62
|
+
* @param entityCount - Number of entities in the graph
|
|
63
|
+
* @returns Array of estimates for all methods
|
|
64
|
+
*/
|
|
65
|
+
estimateAllMethods(query: string, entityCount: number): QueryCostEstimate[];
|
|
66
|
+
/**
|
|
67
|
+
* Recommend the best search method for a query.
|
|
68
|
+
*
|
|
69
|
+
* @param query - The search query
|
|
70
|
+
* @param entityCount - Number of entities in the graph
|
|
71
|
+
* @param preferredMethods - Optional array of methods to consider (default: all)
|
|
72
|
+
* @returns The recommended method and reason
|
|
73
|
+
*/
|
|
74
|
+
recommendMethod(query: string, entityCount: number, preferredMethods?: SearchMethod[]): {
|
|
75
|
+
method: SearchMethod;
|
|
76
|
+
reason: string;
|
|
77
|
+
estimate: QueryCostEstimate;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Get the base time per entity for a search method.
|
|
81
|
+
* @private
|
|
82
|
+
*/
|
|
83
|
+
private getBaseTimeForMethod;
|
|
84
|
+
/**
|
|
85
|
+
* Calculate a complexity factor based on query characteristics.
|
|
86
|
+
* @private
|
|
87
|
+
*/
|
|
88
|
+
private getQueryComplexityFactor;
|
|
89
|
+
/**
|
|
90
|
+
* Get the complexity level based on entity count.
|
|
91
|
+
* @private
|
|
92
|
+
*/
|
|
93
|
+
private getComplexity;
|
|
94
|
+
/**
|
|
95
|
+
* Generate a human-readable recommendation.
|
|
96
|
+
* @private
|
|
97
|
+
*/
|
|
98
|
+
private getRecommendation;
|
|
99
|
+
/**
|
|
100
|
+
* Score a method based on query characteristics and graph size.
|
|
101
|
+
* Higher score = better fit.
|
|
102
|
+
* @private
|
|
103
|
+
*/
|
|
104
|
+
private scoreMethod;
|
|
105
|
+
/**
|
|
106
|
+
* Get a human-readable reason for why a method was selected.
|
|
107
|
+
* @private
|
|
108
|
+
*/
|
|
109
|
+
private getSelectionReason;
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=QueryCostEstimator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"QueryCostEstimator.d.ts","sourceRoot":"","sources":["../../src/search/QueryCostEstimator.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EACV,YAAY,EACZ,iBAAiB,EACjB,yBAAyB,EAC1B,MAAM,mBAAmB,CAAC;AAe3B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,OAAO,CAAsC;IAErD;;;;OAIG;gBACS,OAAO,CAAC,EAAE,yBAAyB;IAI/C;;;;;;;OAOG;IACH,cAAc,CACZ,MAAM,EAAE,YAAY,EACpB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,GAClB,iBAAiB;IAMpB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAsBhC;;;;;;OAMG;IACH,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAQ3E;;;;;;;OAOG;IACH,eAAe,CACb,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,gBAAgB,CAAC,EAAE,YAAY,EAAE,GAChC;QAAE,MAAM,EAAE,YAAY,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,iBAAiB,CAAA;KAAE;IAuBxE;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAe5B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAyChC;;;OAGG;IACH,OAAO,CAAC,aAAa;IAUrB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA2BzB;;;;OAIG;IACH,OAAO,CAAC,WAAW;IAwFnB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAsC3B"}
|