@sylphx/flow 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +34 -0
  2. package/package.json +1 -1
  3. package/src/commands/flow-command.ts +28 -0
  4. package/src/commands/hook-command.ts +10 -230
  5. package/src/composables/index.ts +0 -1
  6. package/src/config/servers.ts +35 -78
  7. package/src/core/interfaces.ts +0 -33
  8. package/src/domains/index.ts +0 -2
  9. package/src/index.ts +0 -4
  10. package/src/services/mcp-service.ts +0 -16
  11. package/src/targets/claude-code.ts +3 -9
  12. package/src/targets/functional/claude-code-logic.ts +4 -22
  13. package/src/targets/opencode.ts +0 -6
  14. package/src/types/mcp.types.ts +29 -38
  15. package/src/types/target.types.ts +0 -2
  16. package/src/types.ts +0 -1
  17. package/src/utils/sync-utils.ts +106 -0
  18. package/src/commands/codebase-command.ts +0 -168
  19. package/src/commands/knowledge-command.ts +0 -161
  20. package/src/composables/useTargetConfig.ts +0 -45
  21. package/src/core/formatting/bytes.test.ts +0 -115
  22. package/src/core/validation/limit.test.ts +0 -155
  23. package/src/core/validation/query.test.ts +0 -44
  24. package/src/domains/codebase/index.ts +0 -5
  25. package/src/domains/codebase/tools.ts +0 -139
  26. package/src/domains/knowledge/index.ts +0 -10
  27. package/src/domains/knowledge/resources.ts +0 -537
  28. package/src/domains/knowledge/tools.ts +0 -174
  29. package/src/services/search/base-indexer.ts +0 -156
  30. package/src/services/search/codebase-indexer-types.ts +0 -38
  31. package/src/services/search/codebase-indexer.ts +0 -647
  32. package/src/services/search/embeddings-provider.ts +0 -455
  33. package/src/services/search/embeddings.ts +0 -316
  34. package/src/services/search/functional-indexer.ts +0 -323
  35. package/src/services/search/index.ts +0 -27
  36. package/src/services/search/indexer.ts +0 -380
  37. package/src/services/search/knowledge-indexer.ts +0 -422
  38. package/src/services/search/semantic-search.ts +0 -244
  39. package/src/services/search/tfidf.ts +0 -559
  40. package/src/services/search/unified-search-service.ts +0 -888
  41. package/src/services/storage/cache-storage.ts +0 -487
  42. package/src/services/storage/drizzle-storage.ts +0 -581
  43. package/src/services/storage/index.ts +0 -15
  44. package/src/services/storage/lancedb-vector-storage.ts +0 -494
  45. package/src/services/storage/memory-storage.ts +0 -268
  46. package/src/services/storage/separated-storage.ts +0 -467
  47. package/src/services/storage/vector-storage.ts +0 -13
@@ -1,888 +0,0 @@
1
- /**
2
- * Unified Search Service
3
- * Shared search logic for CLI, MCP, and API
4
- */
5
-
6
- import { filter, map, pipe, take } from '../../utils/functional.js';
7
- import { SeparatedMemoryStorage } from '../storage/separated-storage.js';
8
- import { CodebaseIndexer } from './codebase-indexer.js';
9
- import type { EmbeddingProvider } from './embeddings.js';
10
- import { getDefaultEmbeddingProvider } from './embeddings.js';
11
- import { getKnowledgeIndexer } from './knowledge-indexer.js';
12
- import { searchDocuments } from './tfidf.js';
13
-
14
- export interface SearchResult {
15
- uri: string;
16
- score: number;
17
- title?: string;
18
- content?: string;
19
- metadata?: any;
20
- }
21
-
22
- export interface SearchOptions {
23
- limit?: number;
24
- include_content?: boolean;
25
- file_extensions?: string[];
26
- path_filter?: string;
27
- exclude_paths?: string[];
28
- min_score?: number;
29
- }
30
-
31
- export interface SearchStatus {
32
- codebase: {
33
- indexed: boolean;
34
- fileCount: number;
35
- indexedAt?: string;
36
- isIndexing?: boolean;
37
- progress?: number;
38
- currentFile?: string;
39
- };
40
- knowledge: {
41
- indexed: boolean;
42
- documentCount: number;
43
- isIndexing?: boolean;
44
- progress?: number;
45
- };
46
- }
47
-
48
- /**
49
- * Dependencies for UnifiedSearchService
50
- * Allows dependency injection for testing and flexibility
51
- */
52
- export interface SearchServiceDependencies {
53
- readonly memoryStorage?: SeparatedMemoryStorage;
54
- readonly knowledgeIndexer?: ReturnType<typeof getKnowledgeIndexer>;
55
- readonly codebaseIndexer?: CodebaseIndexer;
56
- readonly embeddingProvider?: EmbeddingProvider;
57
- }
58
-
59
- /**
60
- * UnifiedSearchService Interface
61
- * Public API for search operations
62
- */
63
- export interface UnifiedSearchService {
64
- readonly initialize: () => Promise<void>;
65
- readonly getStatus: () => Promise<SearchStatus>;
66
- readonly searchCodebase: (
67
- query: string,
68
- options?: SearchOptions
69
- ) => Promise<{
70
- results: SearchResult[];
71
- totalIndexed: number;
72
- query: string;
73
- }>;
74
- readonly searchKnowledge: (
75
- query: string,
76
- options?: SearchOptions
77
- ) => Promise<{
78
- results: SearchResult[];
79
- totalIndexed: number;
80
- query: string;
81
- }>;
82
- readonly formatResultsForCLI: (
83
- results: SearchResult[],
84
- query: string,
85
- totalIndexed: number
86
- ) => string;
87
- readonly formatResultsForMCP: (
88
- results: SearchResult[],
89
- query: string,
90
- totalIndexed: number
91
- ) => {
92
- content: Array<{ type: 'text'; text: string }>;
93
- };
94
- readonly getAvailableKnowledgeURIs: () => Promise<string[]>;
95
- readonly startCodebaseWatching: () => void;
96
- readonly stopCodebaseWatching: () => void;
97
- }
98
-
99
- /**
100
- * Internal state for UnifiedSearchService
101
- */
102
- interface UnifiedSearchServiceState {
103
- readonly memoryStorage: SeparatedMemoryStorage;
104
- knowledgeIndexer: ReturnType<typeof getKnowledgeIndexer>;
105
- codebaseIndexer?: CodebaseIndexer;
106
- embeddingProvider?: EmbeddingProvider;
107
- }
108
-
109
- /**
110
- * Create Unified Search Service (Factory Function)
111
- * Shared search logic for CLI, MCP, and API
112
- */
113
- export const createUnifiedSearchService = (
114
- dependencies: SearchServiceDependencies = {}
115
- ): UnifiedSearchService => {
116
- // Mutable state in closure (updated immutably where possible)
117
- const state: UnifiedSearchServiceState = {
118
- memoryStorage: dependencies.memoryStorage || new SeparatedMemoryStorage(),
119
- knowledgeIndexer: dependencies.knowledgeIndexer || getKnowledgeIndexer(),
120
- codebaseIndexer: dependencies.codebaseIndexer,
121
- embeddingProvider: dependencies.embeddingProvider,
122
- };
123
-
124
- /**
125
- * Initialize search service
126
- */
127
- const initialize = async (): Promise<void> => {
128
- await state.memoryStorage.initialize();
129
-
130
- // Initialize embedding provider only if API key exists
131
- if (!state.embeddingProvider && process.env.OPENAI_API_KEY) {
132
- state.embeddingProvider = await getDefaultEmbeddingProvider();
133
- }
134
-
135
- // Reinitialize knowledge indexer with embedding provider (or undefined)
136
- state.knowledgeIndexer = getKnowledgeIndexer(state.embeddingProvider);
137
- };
138
-
139
- /**
140
- * Get search status
141
- */
142
- const getStatus = async (): Promise<SearchStatus> => {
143
- // Codebase status
144
- const codebaseFiles = await state.memoryStorage.getAllCodebaseFiles();
145
- const codebaseStats = await state.memoryStorage.getCodebaseIndexStats();
146
- const codebaseIndexingStatus = state.codebaseIndexer?.getStatus();
147
-
148
- // Knowledge status
149
- const knowledgeStatus = state.knowledgeIndexer.getStatus();
150
- let knowledgeIndexed = false;
151
- let knowledgeDocCount = 0;
152
-
153
- try {
154
- const knowledgeIndex = await state.knowledgeIndexer.loadIndex();
155
- knowledgeIndexed = true;
156
- knowledgeDocCount = knowledgeIndex.totalDocuments;
157
- } catch {
158
- // Not indexed yet
159
- }
160
-
161
- return {
162
- codebase: {
163
- indexed: codebaseFiles.length > 0,
164
- fileCount: codebaseFiles.length,
165
- indexedAt: codebaseStats.indexedAt,
166
- isIndexing: codebaseIndexingStatus?.isIndexing || false,
167
- progress: codebaseIndexingStatus?.progress || 0,
168
- currentFile: codebaseIndexingStatus?.currentFile,
169
- },
170
- knowledge: {
171
- indexed: knowledgeIndexed,
172
- documentCount: knowledgeDocCount,
173
- isIndexing: knowledgeStatus.isIndexing,
174
- progress: knowledgeStatus.progress,
175
- },
176
- };
177
- };
178
-
179
- /**
180
- * Search codebase - shared by CLI and MCP
181
- */
182
- const searchCodebase = async (
183
- query: string,
184
- options: SearchOptions = {}
185
- ): Promise<{
186
- results: SearchResult[];
187
- totalIndexed: number;
188
- query: string;
189
- }> => {
190
- const {
191
- limit = 10,
192
- include_content = true,
193
- file_extensions,
194
- path_filter,
195
- exclude_paths,
196
- min_score = 0.001, // Default: filter out zero-score results
197
- } = options;
198
-
199
- // Check if codebase is indexed
200
- const allFiles = await state.memoryStorage.getAllCodebaseFiles();
201
- if (allFiles.length === 0) {
202
- throw new Error('Codebase not indexed yet. Run "sylphx search reindex" first.');
203
- }
204
-
205
- // Apply filters
206
- let files = allFiles;
207
- if (file_extensions?.length) {
208
- files = files.filter((file) => file_extensions.some((ext) => file.path.endsWith(ext)));
209
- }
210
- if (path_filter) {
211
- files = files.filter((file) => file.path.includes(path_filter));
212
- }
213
- if (exclude_paths?.length) {
214
- files = files.filter((file) => !exclude_paths.some((exclude) => file.path.includes(exclude)));
215
- }
216
-
217
- if (files.length === 0) {
218
- return {
219
- results: [],
220
- totalIndexed: allFiles.length,
221
- query,
222
- };
223
- }
224
-
225
- // Use TF-IDF index from database to avoid rebuilding
226
- const { buildSearchIndexFromDB } = await import('./tfidf.js');
227
- const index = await buildSearchIndexFromDB(state.memoryStorage, {
228
- file_extensions,
229
- path_filter,
230
- exclude_paths,
231
- });
232
-
233
- if (!index) {
234
- throw new Error('No searchable content found');
235
- }
236
-
237
- // Process query TF-IDF vector using database values
238
- const { processQuery } = await import('./tfidf.js');
239
- const queryVector = await processQuery(query, index.idf);
240
-
241
- // Calculate query magnitude
242
- let queryMagnitude = 0;
243
- for (const value of queryVector.values()) {
244
- queryMagnitude += value * value;
245
- }
246
- queryMagnitude = Math.sqrt(queryMagnitude);
247
-
248
- // Calculate similarity manually (don't use searchDocuments to avoid reprocessing query)
249
- const searchResults = index.documents.map((doc) => {
250
- let dotProduct = 0;
251
- const matchedTerms: string[] = [];
252
-
253
- // Calculate dot product
254
- for (const [term, queryScore] of queryVector.entries()) {
255
- const docScore = doc.terms.get(term) || 0;
256
- if (docScore > 0) {
257
- dotProduct += queryScore * docScore;
258
- matchedTerms.push(term);
259
- }
260
- }
261
-
262
- // Calculate cosine similarity
263
- let similarity = 0;
264
- if (queryMagnitude > 0 && doc.magnitude > 0) {
265
- similarity = dotProduct / (queryMagnitude * doc.magnitude);
266
- }
267
-
268
- // Use pure TF-IDF score without extra boosting
269
- // StarCoder2 tokenization is already optimal
270
- const finalScore = similarity;
271
-
272
- return {
273
- uri: doc.uri,
274
- score: finalScore,
275
- matchedTerms,
276
- };
277
- });
278
-
279
- // Convert result format
280
- const results: SearchResult[] = [];
281
- for (const result of searchResults) {
282
- const filename = result.uri?.replace('file://', '') || 'Unknown';
283
- let content = '';
284
-
285
- if (include_content && result.matchedTerms.length > 0) {
286
- const file = await state.memoryStorage.getCodebaseFile(filename);
287
- if (file?.content) {
288
- // Find lines containing matched terms (show context)
289
- const lines = file.content.split('\n');
290
- const matchedLines: string[] = [];
291
-
292
- for (let i = 0; i < lines.length && matchedLines.length < 3; i++) {
293
- const line = lines[i].toLowerCase();
294
- if (result.matchedTerms.some((term) => line.includes(term.toLowerCase()))) {
295
- matchedLines.push(lines[i].substring(0, 100)); // Limit line length
296
- }
297
- }
298
-
299
- if (matchedLines.length > 0) {
300
- content = matchedLines.join('\n');
301
- }
302
- }
303
- }
304
-
305
- results.push({
306
- uri: result.uri,
307
- score: result.score || 0,
308
- title: filename.split('/').pop() || filename,
309
- content: include_content && content ? content : undefined,
310
- });
311
- }
312
-
313
- // Sort by score (descending), filter by min_score, and limit results
314
- const filteredResults = results
315
- .filter((r) => r.score >= min_score)
316
- .sort((a, b) => b.score - a.score)
317
- .slice(0, limit);
318
-
319
- return {
320
- results: filteredResults,
321
- totalIndexed: allFiles.length,
322
- query,
323
- };
324
- };
325
-
326
- /**
327
- * Search knowledge base - shared by CLI and MCP
328
- * Hybrid mode: Uses vector search if API key provided, falls back to TF-IDF otherwise
329
- */
330
- const searchKnowledge = async (
331
- query: string,
332
- options: SearchOptions = {}
333
- ): Promise<{
334
- results: SearchResult[];
335
- totalIndexed: number;
336
- query: string;
337
- }> => {
338
- const { limit = 10, include_content = true } = options;
339
-
340
- try {
341
- const index = await state.knowledgeIndexer.loadIndex();
342
-
343
- // Hybrid mode: Check if vector search is available
344
- const vectorStorage = state.knowledgeIndexer.getVectorStorage();
345
-
346
- let searchResults: Array<{ uri: string; score: number }>;
347
-
348
- if (vectorStorage && state.embeddingProvider) {
349
- // Has API key → Use vector search
350
- try {
351
- const queryEmbeddings = await state.embeddingProvider.generateEmbeddings([query]);
352
- const queryEmbedding = queryEmbeddings[0];
353
-
354
- const vectorResults = await vectorStorage.search(queryEmbedding, {
355
- k: limit,
356
- });
357
-
358
- searchResults = vectorResults.map((result) => ({
359
- uri: result.doc.id,
360
- score: result.similarity,
361
- }));
362
- } catch (_error) {
363
- // Fallback to TF-IDF if vector search fails
364
- searchResults = await searchDocuments(query, index, {
365
- limit,
366
- boostFactors: {
367
- exactMatch: 1.5,
368
- phraseMatch: 2.0,
369
- technicalMatch: 1.8,
370
- identifierMatch: 1.3,
371
- },
372
- });
373
- }
374
- } else {
375
- // No API key → Use TF-IDF search
376
- searchResults = await searchDocuments(query, index, {
377
- limit,
378
- boostFactors: {
379
- exactMatch: 1.5,
380
- phraseMatch: 2.0,
381
- technicalMatch: 1.8,
382
- identifierMatch: 1.3,
383
- },
384
- });
385
- }
386
-
387
- const results: SearchResult[] = searchResults.map((result) => ({
388
- uri: result.uri,
389
- score: result.score || 0,
390
- title: result.uri?.split('/').pop() || 'Unknown',
391
- content: include_content ? '' : undefined,
392
- }));
393
-
394
- return {
395
- results,
396
- totalIndexed: index.totalDocuments,
397
- query,
398
- };
399
- } catch {
400
- throw new Error('Knowledge base not indexed yet');
401
- }
402
- };
403
-
404
- /**
405
- * Format search results for CLI output
406
- */
407
- const formatResultsForCLI = (
408
- results: SearchResult[],
409
- query: string,
410
- totalIndexed: number
411
- ): string => {
412
- if (results.length === 0) {
413
- return `📭 No results found for "${query}"\n\n**Total indexed files:** ${totalIndexed}`;
414
- }
415
-
416
- const summary = `✓ Found ${results.length} result(s) for "${query}":\n\n`;
417
- const formattedResults = results
418
- .map((result, i) => {
419
- let line = `${i + 1}. **${result.title}** (Score: ${result.score.toFixed(3)})`;
420
-
421
- // Display full path or URI
422
- if (result.uri.startsWith('file://')) {
423
- const filePath = result.uri.replace('file://', '');
424
- line += `\n 📁 Path: \`${filePath}\``;
425
- } else if (result.uri.startsWith('knowledge://')) {
426
- line += `\n 📚 Source: ${result.uri}`;
427
- } else {
428
- line += `\n 🔗 URI: ${result.uri}`;
429
- }
430
-
431
- if (result.content) {
432
- line += `\n \`\`\`\n${result.content}\n\`\`\``;
433
- }
434
- return line;
435
- })
436
- .join('\n\n');
437
-
438
- return summary + formattedResults;
439
- };
440
-
441
- /**
442
- * Format search results for MCP response
443
- */
444
- const formatResultsForMCP = (
445
- results: SearchResult[],
446
- query: string,
447
- _totalIndexed: number
448
- ): {
449
- content: Array<{ type: 'text'; text: string }>;
450
- } => {
451
- const summary = `Found ${results.length} result(s) for "${query}":\n\n`;
452
- const formattedResults = results
453
- .map((result, i) => {
454
- let line = `${i + 1}. **${result.title}** (Score: ${result.score.toFixed(3)})`;
455
-
456
- // Include URI for knowledge_get tool
457
- if (result.uri.startsWith('file://')) {
458
- const filePath = result.uri.replace('file://', '');
459
- line += `\n 📁 Path: \`${filePath}\``;
460
- } else if (result.uri.startsWith('knowledge://')) {
461
- line += `\n 📚 URI: ${result.uri}`;
462
- } else {
463
- line += `\n 🔗 URI: ${result.uri}`;
464
- }
465
-
466
- if (result.content) {
467
- line += `\n\`\`\`\n${result.content}\n\`\`\``;
468
- }
469
- return line;
470
- })
471
- .join('\n\n');
472
-
473
- return {
474
- content: [
475
- {
476
- type: 'text',
477
- text: summary + formattedResults,
478
- },
479
- ],
480
- };
481
- };
482
-
483
- /**
484
- * Get all available knowledge URIs - dynamically generated
485
- */
486
- const getAvailableKnowledgeURIs = async (): Promise<string[]> => {
487
- try {
488
- const index = await state.knowledgeIndexer.loadIndex();
489
- return index.documents.map((doc) => doc.uri);
490
- } catch {
491
- return [];
492
- }
493
- };
494
-
495
- /**
496
- * Start codebase file watching
497
- * IMPORTANT: Only call when codebase tools are enabled in MCP server
498
- * Prevents stale codebase data from misleading users
499
- */
500
- const startCodebaseWatching = (): void => {
501
- if (!state.codebaseIndexer) {
502
- state.codebaseIndexer = new CodebaseIndexer();
503
- }
504
- state.codebaseIndexer.startWatching();
505
- };
506
-
507
- /**
508
- * Stop codebase file watching
509
- * Called when codebase tools are disabled or MCP server shuts down
510
- */
511
- const stopCodebaseWatching = (): void => {
512
- if (state.codebaseIndexer) {
513
- state.codebaseIndexer.stopWatching();
514
- }
515
- };
516
-
517
- // Return service interface
518
- return {
519
- initialize,
520
- getStatus,
521
- searchCodebase,
522
- searchKnowledge,
523
- formatResultsForCLI,
524
- formatResultsForMCP,
525
- getAvailableKnowledgeURIs,
526
- startCodebaseWatching,
527
- stopCodebaseWatching,
528
- };
529
- };
530
-
531
- // ============================================================================
532
- // FUNCTIONAL SEARCH PIPELINES (Pure Functions)
533
- // ============================================================================
534
-
535
- /**
536
- * Pure function: Apply file extension filter
537
- */
538
- const filterByExtensions = (extensions?: string[]) =>
539
- filter((file: any) => !extensions?.length || extensions.some((ext) => file.path.endsWith(ext)));
540
-
541
- /**
542
- * Pure function: Apply path filter
543
- */
544
- const filterByPath = (pathFilter?: string) =>
545
- filter((file: any) => !pathFilter || file.path.includes(pathFilter));
546
-
547
- /**
548
- * Pure function: Apply exclude paths filter
549
- */
550
- const filterByExcludePaths = (excludePaths?: string[]) =>
551
- filter(
552
- (file: any) =>
553
- !excludePaths?.length || !excludePaths.some((exclude) => file.path.includes(exclude))
554
- );
555
-
556
- /**
557
- * Pure function: Calculate cosine similarity between query and document
558
- */
559
- const calculateSimilarity =
560
- (queryVector: Map<string, number>, queryMagnitude: number) => (doc: any) => {
561
- let dotProduct = 0;
562
- const matchedTerms: string[] = [];
563
-
564
- // Calculate dot product
565
- for (const [term, queryScore] of queryVector.entries()) {
566
- const docScore = doc.terms.get(term) || 0;
567
- if (docScore > 0) {
568
- dotProduct += queryScore * docScore;
569
- matchedTerms.push(term);
570
- }
571
- }
572
-
573
- // Calculate cosine similarity
574
- let similarity = 0;
575
- if (queryMagnitude > 0 && doc.magnitude > 0) {
576
- similarity = dotProduct / (queryMagnitude * doc.magnitude);
577
- }
578
-
579
- return {
580
- uri: doc.uri,
581
- score: similarity,
582
- matchedTerms,
583
- };
584
- };
585
-
586
- /**
587
- * Pure function: Extract matched lines from content
588
- */
589
- const extractMatchedLines = (
590
- content: string,
591
- matchedTerms: string[],
592
- maxLines = 3,
593
- maxLineLength = 100
594
- ): string => {
595
- const lines = content.split('\n');
596
- const matchedLines: string[] = [];
597
-
598
- for (let i = 0; i < lines.length && matchedLines.length < maxLines; i++) {
599
- const line = lines[i].toLowerCase();
600
- if (matchedTerms.some((term) => line.includes(term.toLowerCase()))) {
601
- matchedLines.push(lines[i].substring(0, maxLineLength));
602
- }
603
- }
604
-
605
- return matchedLines.join('\n');
606
- };
607
-
608
- /**
609
- * Pure function: Convert search result to SearchResult format
610
- */
611
- const toSearchResult =
612
- (includeContent: boolean) =>
613
- (result: {
614
- uri: string;
615
- score: number;
616
- matchedTerms: string[];
617
- content?: string;
618
- }): SearchResult => {
619
- const filename = result.uri?.replace('file://', '') || 'Unknown';
620
- return {
621
- uri: result.uri,
622
- score: result.score || 0,
623
- title: filename.split('/').pop() || filename,
624
- content: includeContent && result.content ? result.content : undefined,
625
- };
626
- };
627
-
628
- /**
629
- * Pure function: Filter by minimum score
630
- */
631
- const filterByMinScore = (minScore: number) =>
632
- filter((result: SearchResult) => result.score >= minScore);
633
-
634
- /**
635
- * Functional searchCodebase implementation
636
- * Uses pure functions and pipelines instead of imperative code
637
- *
638
- * @example
639
- * const results = await searchCodebaseFunctional(storage, 'authentication', { limit: 5 });
640
- */
641
- export async function searchCodebaseFunctional(
642
- storage: SeparatedMemoryStorage,
643
- query: string,
644
- options: SearchOptions = {}
645
- ): Promise<{
646
- results: SearchResult[];
647
- totalIndexed: number;
648
- query: string;
649
- }> {
650
- const {
651
- limit = 10,
652
- include_content = true,
653
- file_extensions,
654
- path_filter,
655
- exclude_paths,
656
- min_score = 0.001,
657
- } = options;
658
-
659
- // Get all files
660
- const allFiles = await storage.getAllCodebaseFiles();
661
- if (allFiles.length === 0) {
662
- throw new Error('Codebase not indexed yet. Run "sylphx search reindex" first.');
663
- }
664
-
665
- // Apply filters using functional pipeline
666
- const files = pipe(
667
- allFiles,
668
- filterByExtensions(file_extensions),
669
- filterByPath(path_filter),
670
- filterByExcludePaths(exclude_paths)
671
- );
672
-
673
- if (files.length === 0) {
674
- return {
675
- results: [],
676
- totalIndexed: allFiles.length,
677
- query,
678
- };
679
- }
680
-
681
- // Build index
682
- const { buildSearchIndexFromDB } = await import('./tfidf.js');
683
- const index = await buildSearchIndexFromDB(storage, {
684
- file_extensions,
685
- path_filter,
686
- exclude_paths,
687
- });
688
-
689
- if (!index) {
690
- throw new Error('No searchable content found');
691
- }
692
-
693
- // Process query
694
- const { processQuery } = await import('./tfidf.js');
695
- const queryVector = await processQuery(query, index.idf);
696
-
697
- // Calculate query magnitude
698
- const queryMagnitude = Math.sqrt(
699
- Array.from(queryVector.values()).reduce((sum, val) => sum + val * val, 0)
700
- );
701
-
702
- // Calculate similarities using functional pipeline
703
- const searchResults = pipe(
704
- index.documents,
705
- map(calculateSimilarity(queryVector, queryMagnitude))
706
- );
707
-
708
- // Extract content for results with matched terms (async operation)
709
- const resultsWithContent = await Promise.all(
710
- searchResults.map(async (result) => {
711
- if (include_content && result.matchedTerms.length > 0) {
712
- const filename = result.uri?.replace('file://', '') || '';
713
- const file = await storage.getCodebaseFile(filename);
714
- const content = file?.content ? extractMatchedLines(file.content, result.matchedTerms) : '';
715
- return { ...result, content };
716
- }
717
- return { ...result, content: '' };
718
- })
719
- );
720
-
721
- // Final pipeline: convert format, filter, sort, limit
722
- const filteredResults = pipe(
723
- resultsWithContent,
724
- map(toSearchResult(include_content)),
725
- filterByMinScore(min_score),
726
- (results) => results.sort((a, b) => b.score - a.score),
727
- take(limit)
728
- );
729
-
730
- return {
731
- results: filteredResults,
732
- totalIndexed: allFiles.length,
733
- query,
734
- };
735
- }
736
-
737
- /**
738
- * Functional searchKnowledge implementation
739
- * Hybrid mode: Uses vector search if available, falls back to TF-IDF
740
- *
741
- * @example
742
- * const results = await searchKnowledgeFunctional(indexer, embeddingProvider, 'react hooks', { limit: 10 });
743
- */
744
- export async function searchKnowledgeFunctional(
745
- knowledgeIndexer: ReturnType<typeof getKnowledgeIndexer>,
746
- embeddingProvider: EmbeddingProvider | undefined,
747
- query: string,
748
- options: SearchOptions = {}
749
- ): Promise<{
750
- results: SearchResult[];
751
- totalIndexed: number;
752
- query: string;
753
- }> {
754
- const { limit = 10, include_content = true } = options;
755
-
756
- try {
757
- const index = await knowledgeIndexer.loadIndex();
758
-
759
- // Hybrid mode: Check if vector search is available
760
- const vectorStorage = knowledgeIndexer.getVectorStorage();
761
-
762
- let searchResults: Array<{ uri: string; score: number }>;
763
-
764
- if (vectorStorage && embeddingProvider) {
765
- // Has API key → Use vector search
766
- try {
767
- const queryEmbeddings = await embeddingProvider.generateEmbeddings([query]);
768
- const queryEmbedding = queryEmbeddings[0];
769
-
770
- const vectorResults = await vectorStorage.search(queryEmbedding, {
771
- k: limit,
772
- });
773
-
774
- searchResults = vectorResults.map((result) => ({
775
- uri: result.doc.id,
776
- score: result.similarity,
777
- }));
778
- } catch {
779
- // Fallback to TF-IDF if vector search fails
780
- searchResults = await searchDocuments(query, index, {
781
- limit,
782
- boostFactors: {
783
- exactMatch: 1.5,
784
- phraseMatch: 2.0,
785
- technicalMatch: 1.8,
786
- identifierMatch: 1.3,
787
- },
788
- });
789
- }
790
- } else {
791
- // No API key → Use TF-IDF search
792
- searchResults = await searchDocuments(query, index, {
793
- limit,
794
- boostFactors: {
795
- exactMatch: 1.5,
796
- phraseMatch: 2.0,
797
- technicalMatch: 1.8,
798
- identifierMatch: 1.3,
799
- },
800
- });
801
- }
802
-
803
- // Functional pipeline: map to SearchResult format
804
- const results = pipe(
805
- searchResults,
806
- map((result: any) => ({
807
- uri: result.uri,
808
- score: result.score || 0,
809
- title: result.uri?.split('/').pop() || 'Unknown',
810
- content: include_content ? '' : undefined,
811
- }))
812
- );
813
-
814
- return {
815
- results,
816
- totalIndexed: index.totalDocuments,
817
- query,
818
- };
819
- } catch {
820
- throw new Error('Knowledge base not indexed yet');
821
- }
822
- }
823
-
824
- // ============================================================================
825
- // FACTORY PATTERN & DEPENDENCY INJECTION
826
- // ============================================================================
827
-
828
- /**
829
- * Create search service with custom dependencies
830
- * Useful for testing and custom configurations
831
- *
832
- * @example
833
- * // Custom service for testing
834
- * const testService = createSearchService({
835
- * memoryStorage: mockStorage,
836
- * knowledgeIndexer: mockKnowledgeIndexer,
837
- * });
838
- *
839
- * // Custom service with specific configuration
840
- * const customService = createSearchService({
841
- * embeddingProvider: myEmbeddingProvider,
842
- * });
843
- */
844
- export const createSearchService = (
845
- dependencies?: SearchServiceDependencies
846
- ): UnifiedSearchService => {
847
- return createUnifiedSearchService(dependencies);
848
- };
849
-
850
- /**
851
- * Lazy search service instance (singleton)
852
- * Used by CLI and MCP for standard operation
853
- *
854
- * IMPORTANT: This is NOT created at module load time to avoid starting
855
- * file watchers during init command. Use getSearchService() to access it.
856
- */
857
- let _searchServiceInstance: UnifiedSearchService | null = null;
858
-
859
- /**
860
- * Get the search service singleton (lazy initialization)
861
- * This is the preferred way to access the search service.
862
- */
863
- export function getSearchService(): UnifiedSearchService {
864
- if (!_searchServiceInstance) {
865
- _searchServiceInstance = createUnifiedSearchService();
866
- }
867
- return _searchServiceInstance;
868
- }
869
-
870
- /**
871
- * Create test search service with mock dependencies
872
- * Convenience function for testing
873
- *
874
- * @example
875
- * const testService = createTestSearchService({
876
- * memoryStorage: mockStorage,
877
- * });
878
- */
879
- export const createTestSearchService = (
880
- mockDependencies: Partial<SearchServiceDependencies> = {}
881
- ): UnifiedSearchService => {
882
- return createUnifiedSearchService({
883
- memoryStorage: mockDependencies.memoryStorage,
884
- knowledgeIndexer: mockDependencies.knowledgeIndexer,
885
- codebaseIndexer: mockDependencies.codebaseIndexer,
886
- embeddingProvider: mockDependencies.embeddingProvider,
887
- });
888
- };