@sylphx/flow 1.1.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/package.json +1 -1
- package/src/commands/flow-command.ts +28 -0
- package/src/commands/hook-command.ts +10 -230
- package/src/composables/index.ts +0 -1
- package/src/config/servers.ts +35 -78
- package/src/core/interfaces.ts +0 -33
- package/src/domains/index.ts +0 -2
- package/src/index.ts +0 -4
- package/src/services/mcp-service.ts +0 -16
- package/src/targets/claude-code.ts +3 -9
- package/src/targets/functional/claude-code-logic.ts +4 -22
- package/src/targets/opencode.ts +0 -6
- package/src/types/mcp.types.ts +29 -38
- package/src/types/target.types.ts +0 -2
- package/src/types.ts +0 -1
- package/src/utils/sync-utils.ts +106 -0
- package/src/commands/codebase-command.ts +0 -168
- package/src/commands/knowledge-command.ts +0 -161
- package/src/composables/useTargetConfig.ts +0 -45
- package/src/core/formatting/bytes.test.ts +0 -115
- package/src/core/validation/limit.test.ts +0 -155
- package/src/core/validation/query.test.ts +0 -44
- package/src/domains/codebase/index.ts +0 -5
- package/src/domains/codebase/tools.ts +0 -139
- package/src/domains/knowledge/index.ts +0 -10
- package/src/domains/knowledge/resources.ts +0 -537
- package/src/domains/knowledge/tools.ts +0 -174
- package/src/services/search/base-indexer.ts +0 -156
- package/src/services/search/codebase-indexer-types.ts +0 -38
- package/src/services/search/codebase-indexer.ts +0 -647
- package/src/services/search/embeddings-provider.ts +0 -455
- package/src/services/search/embeddings.ts +0 -316
- package/src/services/search/functional-indexer.ts +0 -323
- package/src/services/search/index.ts +0 -27
- package/src/services/search/indexer.ts +0 -380
- package/src/services/search/knowledge-indexer.ts +0 -422
- package/src/services/search/semantic-search.ts +0 -244
- package/src/services/search/tfidf.ts +0 -559
- package/src/services/search/unified-search-service.ts +0 -888
- package/src/services/storage/cache-storage.ts +0 -487
- package/src/services/storage/drizzle-storage.ts +0 -581
- package/src/services/storage/index.ts +0 -15
- package/src/services/storage/lancedb-vector-storage.ts +0 -494
- package/src/services/storage/memory-storage.ts +0 -268
- package/src/services/storage/separated-storage.ts +0 -467
- package/src/services/storage/vector-storage.ts +0 -13
|
@@ -1,422 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Knowledge indexer implementation
|
|
3
|
-
* Extends BaseIndexer for knowledge base indexing
|
|
4
|
-
*
|
|
5
|
-
* Features:
|
|
6
|
-
* - Auto-indexing on startup (mandatory)
|
|
7
|
-
* - File watching for automatic re-indexing on changes
|
|
8
|
-
* - Debounced re-indexing (2 seconds after last change)
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import fs from 'node:fs';
|
|
12
|
-
import path from 'node:path';
|
|
13
|
-
import chokidar from 'chokidar';
|
|
14
|
-
import { logger } from '../../utils/logger.js';
|
|
15
|
-
import { getKnowledgeDir } from '../../utils/paths.js';
|
|
16
|
-
import { VectorStorage } from '../storage/lancedb-vector-storage.js';
|
|
17
|
-
import { BaseIndexer } from './base-indexer.js';
|
|
18
|
-
import type { EmbeddingProvider } from './embeddings.js';
|
|
19
|
-
import { getDefaultEmbeddingProvider } from './embeddings.js';
|
|
20
|
-
import { createIndexer, type Indexer } from './functional-indexer.js';
|
|
21
|
-
import { buildSearchIndex, type SearchIndex } from './tfidf.js';
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Knowledge indexer singleton
|
|
25
|
-
*/
|
|
26
|
-
class KnowledgeIndexer extends BaseIndexer {
|
|
27
|
-
private embeddingProvider?: EmbeddingProvider;
|
|
28
|
-
private vectorStorage?: VectorStorage;
|
|
29
|
-
private watcher?: chokidar.FSWatcher;
|
|
30
|
-
private reindexTimer?: NodeJS.Timeout;
|
|
31
|
-
|
|
32
|
-
constructor(embeddingProvider?: EmbeddingProvider, options?: { autoWatch?: boolean }) {
|
|
33
|
-
super({ name: 'knowledge' });
|
|
34
|
-
this.embeddingProvider = embeddingProvider;
|
|
35
|
-
|
|
36
|
-
// Start file watching only if explicitly enabled or in MCP server context
|
|
37
|
-
// This prevents file watchers from starting during init command or other non-server contexts
|
|
38
|
-
const shouldAutoWatch = options?.autoWatch ?? process.env.MCP_SERVER_MODE === 'true';
|
|
39
|
-
if (shouldAutoWatch) {
|
|
40
|
-
this.startWatching();
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Scan knowledge directory for markdown files
|
|
46
|
-
*/
|
|
47
|
-
private scanKnowledgeFiles(dir: string): Array<{ uri: string; content: string }> {
|
|
48
|
-
const results: Array<{ uri: string; content: string }> = [];
|
|
49
|
-
|
|
50
|
-
const scan = (currentDir: string, baseDir: string) => {
|
|
51
|
-
const entries = fs.readdirSync(currentDir, { withFileTypes: true });
|
|
52
|
-
|
|
53
|
-
for (const entry of entries) {
|
|
54
|
-
const fullPath = path.join(currentDir, entry.name);
|
|
55
|
-
|
|
56
|
-
if (entry.isDirectory()) {
|
|
57
|
-
scan(fullPath, baseDir);
|
|
58
|
-
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
59
|
-
const relativePath = path.relative(baseDir, fullPath);
|
|
60
|
-
const uriPath = relativePath.replace(/\.md$/, '').replace(/\\/g, '/');
|
|
61
|
-
const content = fs.readFileSync(fullPath, 'utf8');
|
|
62
|
-
|
|
63
|
-
results.push({
|
|
64
|
-
uri: `knowledge://${uriPath}`,
|
|
65
|
-
content,
|
|
66
|
-
});
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
scan(dir, dir);
|
|
72
|
-
return results;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Build knowledge index
|
|
77
|
-
*/
|
|
78
|
-
protected async buildIndex(): Promise<SearchIndex> {
|
|
79
|
-
const knowledgeDir = getKnowledgeDir();
|
|
80
|
-
|
|
81
|
-
if (!fs.existsSync(knowledgeDir)) {
|
|
82
|
-
throw new Error(`Knowledge directory not found: ${knowledgeDir}`);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
const files = this.scanKnowledgeFiles(knowledgeDir);
|
|
86
|
-
logger.info('Found knowledge files', { count: files.length });
|
|
87
|
-
|
|
88
|
-
// Build TF-IDF index
|
|
89
|
-
const index = await buildSearchIndex(files);
|
|
90
|
-
|
|
91
|
-
// Build vector index if embedding provider is available
|
|
92
|
-
if (this.embeddingProvider && files.length > 0) {
|
|
93
|
-
logger.info('Building vector index for knowledge');
|
|
94
|
-
|
|
95
|
-
try {
|
|
96
|
-
const vectorPath = path.join(
|
|
97
|
-
getKnowledgeDir(),
|
|
98
|
-
'..',
|
|
99
|
-
'.sylphx-flow',
|
|
100
|
-
'knowledge-vectors.hnsw'
|
|
101
|
-
);
|
|
102
|
-
this.vectorStorage = new VectorStorage(
|
|
103
|
-
vectorPath,
|
|
104
|
-
this.embeddingProvider.dimensions || 1536
|
|
105
|
-
);
|
|
106
|
-
await this.vectorStorage.initialize();
|
|
107
|
-
|
|
108
|
-
// Process files in batches
|
|
109
|
-
const batchSize = 10;
|
|
110
|
-
for (let i = 0; i < files.length; i += batchSize) {
|
|
111
|
-
const batch = files.slice(i, i + batchSize);
|
|
112
|
-
const embeddings = await this.embeddingProvider.generateEmbeddings(
|
|
113
|
-
batch.map((file) => file.content)
|
|
114
|
-
);
|
|
115
|
-
|
|
116
|
-
for (let j = 0; j < batch.length; j++) {
|
|
117
|
-
const file = batch[j];
|
|
118
|
-
const embedding = embeddings[j];
|
|
119
|
-
|
|
120
|
-
await this.vectorStorage.addDocument({
|
|
121
|
-
id: file.uri,
|
|
122
|
-
embedding,
|
|
123
|
-
metadata: {
|
|
124
|
-
type: 'knowledge',
|
|
125
|
-
content: file.content.slice(0, 500),
|
|
126
|
-
category: 'knowledge',
|
|
127
|
-
language: 'markdown',
|
|
128
|
-
},
|
|
129
|
-
});
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
logger.info('Processed knowledge files', {
|
|
133
|
-
processed: Math.min(i + batchSize, files.length),
|
|
134
|
-
total: files.length,
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
await this.vectorStorage.save();
|
|
139
|
-
logger.info('Vector index built successfully');
|
|
140
|
-
} catch (error) {
|
|
141
|
-
logger.error(' Failed to build vector index:', error);
|
|
142
|
-
this.vectorStorage = undefined;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return index;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
/**
|
|
150
|
-
* Start watching knowledge directory for changes
|
|
151
|
-
* MANDATORY: Auto-enabled to prevent stale data from misleading users
|
|
152
|
-
*/
|
|
153
|
-
private startWatching(): void {
|
|
154
|
-
const knowledgeDir = getKnowledgeDir();
|
|
155
|
-
|
|
156
|
-
if (!fs.existsSync(knowledgeDir)) {
|
|
157
|
-
logger.warn('Knowledge directory not found', { knowledgeDir });
|
|
158
|
-
return;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
try {
|
|
162
|
-
this.watcher = chokidar.watch(`${knowledgeDir}/**/*.md`, {
|
|
163
|
-
ignored: /(^|[/\\])\../, // Ignore dotfiles
|
|
164
|
-
persistent: true,
|
|
165
|
-
ignoreInitial: true, // Don't trigger on initial scan
|
|
166
|
-
awaitWriteFinish: {
|
|
167
|
-
stabilityThreshold: 300,
|
|
168
|
-
pollInterval: 100,
|
|
169
|
-
},
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
this.watcher.on('all', (event, filePath) => {
|
|
173
|
-
logger.debug('Knowledge file changed', { event, file: path.basename(filePath) });
|
|
174
|
-
|
|
175
|
-
// Debounce: Wait 2 seconds after last change before re-indexing
|
|
176
|
-
if (this.reindexTimer) {
|
|
177
|
-
clearTimeout(this.reindexTimer);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
this.reindexTimer = setTimeout(() => {
|
|
181
|
-
logger.info('Re-indexing knowledge base due to file changes');
|
|
182
|
-
this.clearCache();
|
|
183
|
-
this.startBackgroundIndexing();
|
|
184
|
-
}, 2000);
|
|
185
|
-
});
|
|
186
|
-
|
|
187
|
-
logger.info('Watching knowledge directory for changes', { knowledgeDir });
|
|
188
|
-
} catch (error) {
|
|
189
|
-
logger.error(' Failed to start file watching:', error);
|
|
190
|
-
// Don't throw - indexing can still work without watching
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Stop watching (for cleanup)
|
|
196
|
-
*/
|
|
197
|
-
stopWatching(): void {
|
|
198
|
-
if (this.reindexTimer) {
|
|
199
|
-
clearTimeout(this.reindexTimer);
|
|
200
|
-
this.reindexTimer = undefined;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
if (this.watcher) {
|
|
204
|
-
this.watcher.close();
|
|
205
|
-
this.watcher = undefined;
|
|
206
|
-
logger.info('Stopped watching knowledge directory');
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Override clearCache to also stop any pending reindex
|
|
212
|
-
*/
|
|
213
|
-
clearCache(): void {
|
|
214
|
-
if (this.reindexTimer) {
|
|
215
|
-
clearTimeout(this.reindexTimer);
|
|
216
|
-
this.reindexTimer = undefined;
|
|
217
|
-
}
|
|
218
|
-
super.clearCache();
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Get vector storage for hybrid search
|
|
223
|
-
* Returns undefined if no embedding provider or vector index not built
|
|
224
|
-
*/
|
|
225
|
-
getVectorStorage(): VectorStorage | undefined {
|
|
226
|
-
return this.vectorStorage;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* Check if vector search is available
|
|
231
|
-
*/
|
|
232
|
-
hasVectorSearch(): boolean {
|
|
233
|
-
return this.vectorStorage !== undefined;
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// Singleton instance
|
|
238
|
-
let knowledgeIndexer: KnowledgeIndexer | null = null;
|
|
239
|
-
|
|
240
|
-
export function getKnowledgeIndexer(
|
|
241
|
-
embeddingProvider?: EmbeddingProvider,
|
|
242
|
-
options?: { autoWatch?: boolean }
|
|
243
|
-
): KnowledgeIndexer {
|
|
244
|
-
if (!knowledgeIndexer) {
|
|
245
|
-
knowledgeIndexer = new KnowledgeIndexer(embeddingProvider, options);
|
|
246
|
-
}
|
|
247
|
-
return knowledgeIndexer;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
export async function getKnowledgeIndexerWithEmbeddings(): Promise<KnowledgeIndexer> {
|
|
251
|
-
const embeddingProvider = await getDefaultEmbeddingProvider();
|
|
252
|
-
return getKnowledgeIndexer(embeddingProvider);
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
// ============================================================================
|
|
256
|
-
// FUNCTIONAL ALTERNATIVE (Composition over Inheritance)
|
|
257
|
-
// ============================================================================
|
|
258
|
-
|
|
259
|
-
/**
|
|
260
|
-
* Create functional knowledge indexer using composition
|
|
261
|
-
* Modern alternative to class-based KnowledgeIndexer
|
|
262
|
-
*
|
|
263
|
-
* @example
|
|
264
|
-
* const indexer = createKnowledgeIndexerFunctional();
|
|
265
|
-
* const status = indexer.getStatus();
|
|
266
|
-
* const index = await indexer.loadIndex();
|
|
267
|
-
*/
|
|
268
|
-
export function createKnowledgeIndexerFunctional(
|
|
269
|
-
embeddingProvider?: EmbeddingProvider
|
|
270
|
-
): Indexer & { stopWatching: () => void } {
|
|
271
|
-
const knowledgeDir = getKnowledgeDir();
|
|
272
|
-
let watcher: chokidar.FSWatcher | undefined;
|
|
273
|
-
let reindexTimer: NodeJS.Timeout | undefined;
|
|
274
|
-
|
|
275
|
-
// Core indexer with pure functions
|
|
276
|
-
const coreIndexer = createIndexer({
|
|
277
|
-
name: 'knowledge',
|
|
278
|
-
buildIndex: async (): Promise<SearchIndex> => {
|
|
279
|
-
if (!fs.existsSync(knowledgeDir)) {
|
|
280
|
-
throw new Error(`Knowledge directory not found: ${knowledgeDir}`);
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
// Scan knowledge files
|
|
284
|
-
const scanKnowledgeFiles = (dir: string): Array<{ uri: string; content: string }> => {
|
|
285
|
-
const results: Array<{ uri: string; content: string }> = [];
|
|
286
|
-
|
|
287
|
-
const scan = (currentDir: string, baseDir: string) => {
|
|
288
|
-
const entries = fs.readdirSync(currentDir, { withFileTypes: true });
|
|
289
|
-
|
|
290
|
-
for (const entry of entries) {
|
|
291
|
-
const fullPath = path.join(currentDir, entry.name);
|
|
292
|
-
|
|
293
|
-
if (entry.isDirectory()) {
|
|
294
|
-
scan(fullPath, baseDir);
|
|
295
|
-
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
296
|
-
const relativePath = path.relative(baseDir, fullPath);
|
|
297
|
-
const uriPath = relativePath.replace(/\.md$/, '').replace(/\\/g, '/');
|
|
298
|
-
const content = fs.readFileSync(fullPath, 'utf8');
|
|
299
|
-
|
|
300
|
-
results.push({
|
|
301
|
-
uri: `knowledge://${uriPath}`,
|
|
302
|
-
content,
|
|
303
|
-
});
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
};
|
|
307
|
-
|
|
308
|
-
scan(dir, dir);
|
|
309
|
-
return results;
|
|
310
|
-
};
|
|
311
|
-
|
|
312
|
-
const files = scanKnowledgeFiles(knowledgeDir);
|
|
313
|
-
logger.info('Found knowledge files', { count: files.length });
|
|
314
|
-
|
|
315
|
-
// Build TF-IDF index
|
|
316
|
-
const index = await buildSearchIndex(files);
|
|
317
|
-
|
|
318
|
-
// Build vector index if embedding provider available
|
|
319
|
-
if (embeddingProvider && files.length > 0) {
|
|
320
|
-
logger.info('Building vector index for knowledge');
|
|
321
|
-
|
|
322
|
-
try {
|
|
323
|
-
const vectorPath = path.join(
|
|
324
|
-
knowledgeDir,
|
|
325
|
-
'..',
|
|
326
|
-
'.sylphx-flow',
|
|
327
|
-
'knowledge-vectors.hnsw'
|
|
328
|
-
);
|
|
329
|
-
const vectorStorage = new VectorStorage(vectorPath, embeddingProvider.dimensions || 1536);
|
|
330
|
-
await vectorStorage.initialize();
|
|
331
|
-
|
|
332
|
-
// Process in batches
|
|
333
|
-
const batchSize = 10;
|
|
334
|
-
for (let i = 0; i < files.length; i += batchSize) {
|
|
335
|
-
const batch = files.slice(i, i + batchSize);
|
|
336
|
-
const embeddings = await embeddingProvider.generateEmbeddings(
|
|
337
|
-
batch.map((file) => file.content)
|
|
338
|
-
);
|
|
339
|
-
|
|
340
|
-
for (let j = 0; j < batch.length; j++) {
|
|
341
|
-
const file = batch[j];
|
|
342
|
-
const embedding = embeddings[j];
|
|
343
|
-
|
|
344
|
-
await vectorStorage.addDocument({
|
|
345
|
-
id: file.uri,
|
|
346
|
-
embedding,
|
|
347
|
-
metadata: {
|
|
348
|
-
type: 'knowledge',
|
|
349
|
-
content: file.content.slice(0, 500),
|
|
350
|
-
category: 'knowledge',
|
|
351
|
-
language: 'markdown',
|
|
352
|
-
},
|
|
353
|
-
});
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
logger.info('Processed knowledge files', {
|
|
357
|
-
processed: Math.min(i + batchSize, files.length),
|
|
358
|
-
total: files.length,
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
await vectorStorage.save();
|
|
363
|
-
logger.info('Vector index built successfully');
|
|
364
|
-
} catch (error) {
|
|
365
|
-
logger.error(' Failed to build vector index:', error);
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
return index;
|
|
370
|
-
},
|
|
371
|
-
});
|
|
372
|
-
|
|
373
|
-
// Add file watching
|
|
374
|
-
if (fs.existsSync(knowledgeDir)) {
|
|
375
|
-
try {
|
|
376
|
-
watcher = chokidar.watch(`${knowledgeDir}/**/*.md`, {
|
|
377
|
-
ignored: /(^|[/\\])\../,
|
|
378
|
-
persistent: true,
|
|
379
|
-
ignoreInitial: true,
|
|
380
|
-
awaitWriteFinish: {
|
|
381
|
-
stabilityThreshold: 300,
|
|
382
|
-
pollInterval: 100,
|
|
383
|
-
},
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
watcher.on('all', (event, filePath) => {
|
|
387
|
-
logger.debug('Knowledge file changed', { event, file: path.basename(filePath) });
|
|
388
|
-
|
|
389
|
-
// Debounce: Wait 2 seconds after last change
|
|
390
|
-
if (reindexTimer) {
|
|
391
|
-
clearTimeout(reindexTimer);
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
reindexTimer = setTimeout(() => {
|
|
395
|
-
logger.info('Re-indexing knowledge base due to file changes');
|
|
396
|
-
coreIndexer.clearCache();
|
|
397
|
-
coreIndexer.startBackgroundIndexing();
|
|
398
|
-
}, 2000);
|
|
399
|
-
});
|
|
400
|
-
|
|
401
|
-
logger.info('Watching knowledge directory for changes', { knowledgeDir });
|
|
402
|
-
} catch (error) {
|
|
403
|
-
logger.error(' Failed to start file watching:', error);
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
// Return enhanced indexer with watching capabilities
|
|
408
|
-
return {
|
|
409
|
-
...coreIndexer,
|
|
410
|
-
stopWatching: () => {
|
|
411
|
-
if (reindexTimer) {
|
|
412
|
-
clearTimeout(reindexTimer);
|
|
413
|
-
reindexTimer = undefined;
|
|
414
|
-
}
|
|
415
|
-
if (watcher) {
|
|
416
|
-
watcher.close();
|
|
417
|
-
watcher = undefined;
|
|
418
|
-
logger.info('Stopped watching knowledge directory');
|
|
419
|
-
}
|
|
420
|
-
},
|
|
421
|
-
};
|
|
422
|
-
}
|
|
@@ -1,244 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Semantic search implementation using TF-IDF
|
|
3
|
-
* Runtime indexing with intelligent caching
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import fs from 'node:fs';
|
|
7
|
-
import path from 'node:path';
|
|
8
|
-
import { getKnowledgeDir } from './paths.js';
|
|
9
|
-
import { buildSearchIndex, processQuery, type SearchIndex } from './tfidf.js';
|
|
10
|
-
import { createLogger } from '../../utils/debug-logger.js';
|
|
11
|
-
|
|
12
|
-
const log = createLogger('search:query');
|
|
13
|
-
|
|
14
|
-
let cachedIndex: SearchIndex | null = null;
|
|
15
|
-
let indexingPromise: Promise<SearchIndex> | null = null;
|
|
16
|
-
const indexingStatus = {
|
|
17
|
-
isIndexing: false,
|
|
18
|
-
progress: 0,
|
|
19
|
-
error: undefined as string | undefined,
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Scan knowledge directory for markdown files
|
|
24
|
-
*/
|
|
25
|
-
function scanKnowledgeFiles(dir: string): Array<{ uri: string; content: string }> {
|
|
26
|
-
const results: Array<{ uri: string; content: string }> = [];
|
|
27
|
-
|
|
28
|
-
function scan(currentDir: string, baseDir: string) {
|
|
29
|
-
const entries = fs.readdirSync(currentDir, { withFileTypes: true });
|
|
30
|
-
|
|
31
|
-
for (const entry of entries) {
|
|
32
|
-
const fullPath = path.join(currentDir, entry.name);
|
|
33
|
-
|
|
34
|
-
if (entry.isDirectory()) {
|
|
35
|
-
scan(fullPath, baseDir);
|
|
36
|
-
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
37
|
-
const relativePath = path.relative(baseDir, fullPath);
|
|
38
|
-
const uriPath = relativePath.replace(/\.md$/, '').replace(/\\/g, '/');
|
|
39
|
-
const content = fs.readFileSync(fullPath, 'utf8');
|
|
40
|
-
|
|
41
|
-
results.push({
|
|
42
|
-
uri: `knowledge://${uriPath}`,
|
|
43
|
-
content,
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
scan(dir, dir);
|
|
50
|
-
return results;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Build knowledge index at runtime
|
|
55
|
-
*/
|
|
56
|
-
async function buildKnowledgeIndex(): Promise<SearchIndex> {
|
|
57
|
-
const knowledgeDir = getKnowledgeDir();
|
|
58
|
-
|
|
59
|
-
if (!fs.existsSync(knowledgeDir)) {
|
|
60
|
-
throw new Error(`Knowledge directory not found: ${knowledgeDir}`);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
log('Building knowledge search index');
|
|
64
|
-
const files = scanKnowledgeFiles(knowledgeDir);
|
|
65
|
-
log(`Found ${files.length} knowledge files`);
|
|
66
|
-
|
|
67
|
-
const index = buildSearchIndex(files);
|
|
68
|
-
log(`Knowledge index built: ${index.totalDocuments} documents, ${index.idf.size} terms`);
|
|
69
|
-
|
|
70
|
-
return index;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Load or build search index (with caching)
|
|
75
|
-
*/
|
|
76
|
-
export async function loadSearchIndex(): Promise<SearchIndex | null> {
|
|
77
|
-
// Return cached index if available
|
|
78
|
-
if (cachedIndex) {
|
|
79
|
-
return cachedIndex;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// If already indexing, wait for it
|
|
83
|
-
if (indexingPromise) {
|
|
84
|
-
return indexingPromise;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// Start indexing
|
|
88
|
-
indexingStatus.isIndexing = true;
|
|
89
|
-
indexingStatus.progress = 0;
|
|
90
|
-
indexingStatus.error = undefined;
|
|
91
|
-
|
|
92
|
-
indexingPromise = buildKnowledgeIndex()
|
|
93
|
-
.then((index) => {
|
|
94
|
-
cachedIndex = index;
|
|
95
|
-
indexingStatus.isIndexing = false;
|
|
96
|
-
indexingStatus.progress = 100;
|
|
97
|
-
return index;
|
|
98
|
-
})
|
|
99
|
-
.catch((error) => {
|
|
100
|
-
indexingStatus.isIndexing = false;
|
|
101
|
-
indexingStatus.error = error instanceof Error ? error.message : String(error);
|
|
102
|
-
log('Failed to build knowledge index:', error instanceof Error ? error.message : String(error));
|
|
103
|
-
throw error;
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
return indexingPromise;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* Start background indexing (non-blocking)
|
|
111
|
-
*/
|
|
112
|
-
export function startKnowledgeIndexing() {
|
|
113
|
-
if (indexingStatus.isIndexing || cachedIndex) {
|
|
114
|
-
return;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
log('Starting background knowledge indexing');
|
|
118
|
-
loadSearchIndex().catch((error) => {
|
|
119
|
-
log('Background knowledge indexing failed:', error instanceof Error ? error.message : String(error));
|
|
120
|
-
});
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* Get indexing status
|
|
125
|
-
*/
|
|
126
|
-
export function getKnowledgeIndexingStatus() {
|
|
127
|
-
return {
|
|
128
|
-
isIndexing: indexingStatus.isIndexing,
|
|
129
|
-
progress: indexingStatus.progress,
|
|
130
|
-
isReady: cachedIndex !== null,
|
|
131
|
-
error: indexingStatus.error,
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Search knowledge base using semantic search
|
|
137
|
-
*/
|
|
138
|
-
export async function semanticSearch(
|
|
139
|
-
query: string,
|
|
140
|
-
options: {
|
|
141
|
-
limit?: number;
|
|
142
|
-
minScore?: number;
|
|
143
|
-
categories?: string[];
|
|
144
|
-
} = {}
|
|
145
|
-
): Promise<
|
|
146
|
-
Array<{
|
|
147
|
-
uri: string;
|
|
148
|
-
score: number;
|
|
149
|
-
matchedTerms: string[];
|
|
150
|
-
relevance: number; // 0-100 percentage
|
|
151
|
-
}>
|
|
152
|
-
> {
|
|
153
|
-
const { limit = 5, minScore = 0.01, categories } = options;
|
|
154
|
-
|
|
155
|
-
const index = await loadSearchIndex();
|
|
156
|
-
if (!index) {
|
|
157
|
-
return [];
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
// Process query into TF-IDF vector
|
|
161
|
-
const queryVector = await processQuery(query, index.idf);
|
|
162
|
-
|
|
163
|
-
// Calculate cosine similarity for each document
|
|
164
|
-
const results = index.documents.map((doc) => {
|
|
165
|
-
let dotProduct = 0;
|
|
166
|
-
const matchedTerms: string[] = [];
|
|
167
|
-
|
|
168
|
-
// Calculate dot product and track matched terms
|
|
169
|
-
for (const [term, queryScore] of queryVector.entries()) {
|
|
170
|
-
const docScore = doc.terms.get(term);
|
|
171
|
-
if (docScore) {
|
|
172
|
-
dotProduct += queryScore * docScore;
|
|
173
|
-
matchedTerms.push(term);
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Calculate query magnitude
|
|
178
|
-
let queryMagnitude = 0;
|
|
179
|
-
for (const score of queryVector.values()) {
|
|
180
|
-
queryMagnitude += score * score;
|
|
181
|
-
}
|
|
182
|
-
queryMagnitude = Math.sqrt(queryMagnitude);
|
|
183
|
-
|
|
184
|
-
// Cosine similarity
|
|
185
|
-
const score =
|
|
186
|
-
queryMagnitude === 0 || doc.magnitude === 0
|
|
187
|
-
? 0
|
|
188
|
-
: dotProduct / (queryMagnitude * doc.magnitude);
|
|
189
|
-
|
|
190
|
-
return {
|
|
191
|
-
uri: doc.uri,
|
|
192
|
-
score,
|
|
193
|
-
matchedTerms,
|
|
194
|
-
relevance: Math.round(score * 100),
|
|
195
|
-
};
|
|
196
|
-
});
|
|
197
|
-
|
|
198
|
-
// Filter by categories if specified
|
|
199
|
-
let filtered = results;
|
|
200
|
-
if (categories && categories.length > 0) {
|
|
201
|
-
filtered = results.filter((result) => {
|
|
202
|
-
const category = result.uri.split('/')[1]; // knowledge://stacks/react-app → stacks
|
|
203
|
-
return categories.includes(category);
|
|
204
|
-
});
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Filter by minimum score and sort
|
|
208
|
-
return filtered
|
|
209
|
-
.filter((result) => result.score >= minScore)
|
|
210
|
-
.sort((a, b) => b.score - a.score)
|
|
211
|
-
.slice(0, limit);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
/**
|
|
215
|
-
* Get search index statistics
|
|
216
|
-
*/
|
|
217
|
-
export async function getSearchIndexStats(): Promise<{
|
|
218
|
-
loaded: boolean;
|
|
219
|
-
totalDocuments: number;
|
|
220
|
-
uniqueTerms: number;
|
|
221
|
-
generatedAt: string;
|
|
222
|
-
version: string;
|
|
223
|
-
} | null> {
|
|
224
|
-
const index = await loadSearchIndex();
|
|
225
|
-
if (!index) {
|
|
226
|
-
return null;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
return {
|
|
230
|
-
loaded: true,
|
|
231
|
-
totalDocuments: index.totalDocuments,
|
|
232
|
-
uniqueTerms: index.idf.size,
|
|
233
|
-
generatedAt: index.metadata.generatedAt,
|
|
234
|
-
version: index.metadata.version,
|
|
235
|
-
};
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
/**
|
|
239
|
-
* Clear cached index (useful for testing)
|
|
240
|
-
*/
|
|
241
|
-
export function clearSearchIndexCache(): void {
|
|
242
|
-
cachedIndex = null;
|
|
243
|
-
indexingPromise = null;
|
|
244
|
-
}
|