@sylphx/flow 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/package.json +1 -1
  3. package/src/commands/hook-command.ts +10 -230
  4. package/src/composables/index.ts +0 -1
  5. package/src/config/servers.ts +35 -78
  6. package/src/core/interfaces.ts +0 -33
  7. package/src/domains/index.ts +0 -2
  8. package/src/index.ts +0 -4
  9. package/src/services/mcp-service.ts +0 -16
  10. package/src/targets/claude-code.ts +3 -9
  11. package/src/targets/functional/claude-code-logic.ts +4 -22
  12. package/src/targets/opencode.ts +0 -6
  13. package/src/types/mcp.types.ts +29 -38
  14. package/src/types/target.types.ts +0 -2
  15. package/src/types.ts +0 -1
  16. package/src/commands/codebase-command.ts +0 -168
  17. package/src/commands/knowledge-command.ts +0 -161
  18. package/src/composables/useTargetConfig.ts +0 -45
  19. package/src/core/formatting/bytes.test.ts +0 -115
  20. package/src/core/validation/limit.test.ts +0 -155
  21. package/src/core/validation/query.test.ts +0 -44
  22. package/src/domains/codebase/index.ts +0 -5
  23. package/src/domains/codebase/tools.ts +0 -139
  24. package/src/domains/knowledge/index.ts +0 -10
  25. package/src/domains/knowledge/resources.ts +0 -537
  26. package/src/domains/knowledge/tools.ts +0 -174
  27. package/src/services/search/base-indexer.ts +0 -156
  28. package/src/services/search/codebase-indexer-types.ts +0 -38
  29. package/src/services/search/codebase-indexer.ts +0 -647
  30. package/src/services/search/embeddings-provider.ts +0 -455
  31. package/src/services/search/embeddings.ts +0 -316
  32. package/src/services/search/functional-indexer.ts +0 -323
  33. package/src/services/search/index.ts +0 -27
  34. package/src/services/search/indexer.ts +0 -380
  35. package/src/services/search/knowledge-indexer.ts +0 -422
  36. package/src/services/search/semantic-search.ts +0 -244
  37. package/src/services/search/tfidf.ts +0 -559
  38. package/src/services/search/unified-search-service.ts +0 -888
  39. package/src/services/storage/cache-storage.ts +0 -487
  40. package/src/services/storage/drizzle-storage.ts +0 -581
  41. package/src/services/storage/index.ts +0 -15
  42. package/src/services/storage/lancedb-vector-storage.ts +0 -494
  43. package/src/services/storage/memory-storage.ts +0 -268
  44. package/src/services/storage/separated-storage.ts +0 -467
  45. package/src/services/storage/vector-storage.ts +0 -13
@@ -1,559 +0,0 @@
1
- /**
2
- * TF-IDF (Term Frequency-Inverse Document Frequency) implementation
3
- * Used for ranking document relevance in semantic search
4
- */
5
-
6
- import { AdvancedCodeTokenizer } from '../../utils/advanced-tokenizer.js';
7
- import type { SeparatedMemoryStorage } from './separated-storage.js';
8
-
9
- export interface DocumentVector {
10
- uri: string;
11
- terms: Map<string, number>; // term → TF-IDF score
12
- rawTerms: Map<string, number>; // term → raw frequency
13
- magnitude: number; // Vector magnitude for cosine similarity
14
- }
15
-
16
- export interface SearchIndex {
17
- documents: DocumentVector[];
18
- idf: Map<string, number>; // term → IDF score
19
- totalDocuments: number;
20
- metadata: {
21
- generatedAt: string;
22
- version: string;
23
- };
24
- }
25
-
26
- /**
27
- * Build search index from database (shared between CLI and MCP)
28
- */
29
- export async function buildSearchIndexFromDB(
30
- memoryStorage: SeparatedMemoryStorage,
31
- filters?: {
32
- file_extensions?: string[];
33
- path_filter?: string;
34
- exclude_paths?: string[];
35
- }
36
- ): Promise<SearchIndex | null> {
37
- try {
38
- // Get all files from database
39
- let files = await memoryStorage.getAllCodebaseFiles();
40
-
41
- // Apply filters
42
- if (filters) {
43
- if (filters.file_extensions && filters.file_extensions.length > 0) {
44
- files = files.filter((file) =>
45
- filters.file_extensions?.some((ext: string) => file.path.endsWith(ext))
46
- );
47
- }
48
-
49
- if (filters.path_filter) {
50
- files = files.filter((file) => file.path.includes(filters.path_filter!));
51
- }
52
-
53
- if (filters.exclude_paths && filters.exclude_paths.length > 0) {
54
- files = files.filter(
55
- (file) => !filters.exclude_paths?.some((exclude: string) => file.path.includes(exclude))
56
- );
57
- }
58
- }
59
-
60
- if (files.length === 0) {
61
- return null;
62
- }
63
-
64
- // Build search documents - read TF-IDF terms directly from database
65
- const documents = [];
66
- for (const file of files) {
67
- const tfidfDoc = await memoryStorage.getTFIDFDocument(file.path);
68
- if (tfidfDoc) {
69
- // Get TF-IDF terms from database (already calculated)
70
- const tfidfTerms = await memoryStorage.getTFIDFTerms(file.path);
71
- const terms = new Map<string, number>();
72
- const rawTermsMap = new Map<string, number>();
73
-
74
- // Use TF-IDF terms for search scoring
75
- for (const [term, tfidfScore] of Object.entries(tfidfTerms)) {
76
- terms.set(term, tfidfScore as number);
77
- }
78
-
79
- // Use rawTerms for reference
80
- const rawTerms = tfidfDoc.rawTerms || {};
81
- for (const [term, freq] of Object.entries(rawTerms)) {
82
- rawTermsMap.set(term, freq as number);
83
- }
84
-
85
- documents.push({
86
- uri: `file://${file.path}`,
87
- terms,
88
- rawTerms: rawTermsMap,
89
- magnitude: tfidfDoc.magnitude,
90
- });
91
- }
92
- }
93
-
94
- if (documents.length === 0) {
95
- return null;
96
- }
97
-
98
- // Get IDF values from database
99
- const idfRecords = await memoryStorage.getIDFValues();
100
- const idf = new Map<string, number>();
101
- for (const [term, value] of Object.entries(idfRecords)) {
102
- idf.set(term, value as number);
103
- }
104
-
105
- return {
106
- documents,
107
- idf,
108
- totalDocuments: documents.length,
109
- metadata: {
110
- generatedAt: new Date().toISOString(),
111
- version: '1.0.0',
112
- },
113
- };
114
- } catch (error) {
115
- console.error('[ERROR] Failed to build search index from database:', error);
116
- return null;
117
- }
118
- }
119
-
120
- /**
121
- * Calculate Term Frequency (TF)
122
- * TF = (number of times term appears in document) / (total terms in document)
123
- */
124
- function calculateTF(termFrequency: Map<string, number>): Map<string, number> {
125
- const totalTerms = Array.from(termFrequency.values()).reduce((sum, freq) => sum + freq, 0);
126
- const tf = new Map<string, number>();
127
-
128
- for (const [term, freq] of termFrequency.entries()) {
129
- tf.set(term, freq / totalTerms);
130
- }
131
-
132
- return tf;
133
- }
134
-
135
- /**
136
- * Calculate Inverse Document Frequency (IDF)
137
- * IDF = log(total documents / documents containing term)
138
- */
139
- function calculateIDF(
140
- documents: Map<string, number>[],
141
- totalDocuments: number
142
- ): Map<string, number> {
143
- const documentFrequency = new Map<string, number>();
144
-
145
- // Count how many documents contain each term
146
- for (const doc of documents) {
147
- const uniqueTerms = new Set(doc.keys());
148
- for (const term of uniqueTerms) {
149
- documentFrequency.set(term, (documentFrequency.get(term) || 0) + 1);
150
- }
151
- }
152
-
153
- // Calculate IDF for each term
154
- const idf = new Map<string, number>();
155
- for (const [term, docFreq] of documentFrequency.entries()) {
156
- idf.set(term, Math.log(totalDocuments / docFreq));
157
- }
158
-
159
- return idf;
160
- }
161
-
162
- /**
163
- * Calculate TF-IDF scores for a document
164
- */
165
- function calculateTFIDF(tf: Map<string, number>, idf: Map<string, number>): Map<string, number> {
166
- const tfidf = new Map<string, number>();
167
-
168
- for (const [term, tfScore] of tf.entries()) {
169
- const idfScore = idf.get(term) || 0;
170
- tfidf.set(term, tfScore * idfScore);
171
- }
172
-
173
- return tfidf;
174
- }
175
-
176
- /**
177
- * Calculate vector magnitude for cosine similarity
178
- */
179
- function calculateMagnitude(vector: Map<string, number>): number {
180
- let sum = 0;
181
- for (const value of vector.values()) {
182
- sum += value * value;
183
- }
184
- return Math.sqrt(sum);
185
- }
186
-
187
- // Global tokenizer instance for performance
188
- let globalTokenizer: AdvancedCodeTokenizer | null = null;
189
- let tokenizerInitialized = false;
190
-
191
- /**
192
- * Get or create the global tokenizer
193
- */
194
- async function getTokenizer(): Promise<AdvancedCodeTokenizer> {
195
- if (!globalTokenizer) {
196
- globalTokenizer = new AdvancedCodeTokenizer({
197
- modelPath: './models/starcoder2',
198
- });
199
- }
200
-
201
- if (!tokenizerInitialized) {
202
- // Silently initialize - no console output
203
- const originalLog = console.log;
204
- const originalError = console.error;
205
- console.log = () => {}; // Temporarily silence console.log
206
- console.error = () => {}; // Temporarily silence console.error
207
- try {
208
- await globalTokenizer.initialize();
209
- tokenizerInitialized = true;
210
- } finally {
211
- console.log = originalLog; // Restore console.log
212
- console.error = originalError; // Restore console.error
213
- }
214
- }
215
-
216
- return globalTokenizer;
217
- }
218
-
219
- /**
220
- * Extract terms using our advanced tokenizer
221
- */
222
- async function extractTerms(content: string): Promise<Map<string, number>> {
223
- const tokenizer = await getTokenizer();
224
- const result = await tokenizer.tokenize(content);
225
- const terms = new Map<string, number>();
226
-
227
- // Use token scores as TF weights
228
- for (const token of result.tokens) {
229
- const term = token.text.toLowerCase();
230
- const currentScore = terms.get(term) || 0;
231
- terms.set(term, currentScore + token.score);
232
- }
233
-
234
- return terms;
235
- }
236
-
237
- /**
238
- * Extract simple tokens for query processing
239
- */
240
- async function extractQueryTokens(query: string): Promise<string[]> {
241
- const tokenizer = await getTokenizer();
242
- const result = await tokenizer.tokenize(query);
243
-
244
- // Return unique tokens, sorted by score (highest first)
245
- const uniqueTokens = new Map<string, string>();
246
- for (const token of result.tokens) {
247
- const lowerText = token.text.toLowerCase();
248
- if (!uniqueTokens.has(lowerText) || token.score > 0.8) {
249
- uniqueTokens.set(lowerText, token.text);
250
- }
251
- }
252
-
253
- return Array.from(uniqueTokens.values());
254
- }
255
-
256
- export interface BuildIndexProgress {
257
- current: number;
258
- total: number;
259
- fileName: string;
260
- status: 'processing' | 'completed' | 'skipped';
261
- }
262
-
263
- /**
264
- * Build TF-IDF search index from documents using our advanced tokenizer
265
- */
266
- export async function buildSearchIndex(
267
- documents: Array<{ uri: string; content: string }>,
268
- onProgress?: (progress: BuildIndexProgress) => void
269
- ): Promise<SearchIndex> {
270
- // Process documents one by one to avoid hanging
271
- const batchSize = 1; // Process 1 document at a time to avoid hanging
272
- const documentTerms: Array<{ uri: string; terms: Map<string, number> }> = [];
273
-
274
- for (let i = 0; i < documents.length; i += batchSize) {
275
- const batch = documents.slice(i, i + batchSize);
276
-
277
- // Process sequentially to avoid hanging
278
- const batchResults = [];
279
- for (let j = 0; j < batch.length; j++) {
280
- const doc = batch[j];
281
- const fileName = doc.uri.split('/').pop() || doc.uri;
282
-
283
- // Report progress
284
- onProgress?.({
285
- current: i + j + 1,
286
- total: documents.length,
287
- fileName,
288
- status: 'processing',
289
- });
290
-
291
- try {
292
- const result = await extractTerms(doc.content);
293
-
294
- batchResults.push({
295
- uri: doc.uri,
296
- terms: result,
297
- });
298
-
299
- // Report completion
300
- onProgress?.({
301
- current: i + j + 1,
302
- total: documents.length,
303
- fileName,
304
- status: 'completed',
305
- });
306
- } catch (_error) {
307
- batchResults.push({
308
- uri: doc.uri,
309
- terms: new Map<string, number>(),
310
- });
311
-
312
- // Report skip
313
- onProgress?.({
314
- current: i + j + 1,
315
- total: documents.length,
316
- fileName,
317
- status: 'skipped',
318
- });
319
- }
320
- }
321
-
322
- documentTerms.push(...batchResults);
323
- }
324
-
325
- // Calculate IDF scores
326
- const idf = calculateIDF(
327
- documentTerms.map((d) => d.terms),
328
- documents.length
329
- );
330
-
331
- // Calculate TF-IDF for each document
332
- const documentVectors: DocumentVector[] = documentTerms.map((doc) => {
333
- const tf = calculateTF(doc.terms);
334
- const tfidf = calculateTFIDF(tf, idf);
335
- const magnitude = calculateMagnitude(tfidf);
336
-
337
- return {
338
- uri: doc.uri,
339
- terms: tfidf,
340
- rawTerms: doc.terms,
341
- magnitude,
342
- };
343
- });
344
-
345
- return {
346
- documents: documentVectors,
347
- idf,
348
- totalDocuments: documents.length,
349
- metadata: {
350
- generatedAt: new Date().toISOString(),
351
- version: '5.0.0',
352
- tokenizer: 'AdvancedCodeTokenizer',
353
- features: [
354
- 'Industry-leading code understanding',
355
- 'Advanced technical term recognition',
356
- 'Optimized for code search',
357
- 'Simple and effective approach',
358
- 'No unnecessary complexity',
359
- ],
360
- },
361
- };
362
- }
363
-
364
- /**
365
- * Calculate cosine similarity between query and document
366
- */
367
- export function calculateCosineSimilarity(
368
- queryVector: Map<string, number>,
369
- docVector: DocumentVector
370
- ): number {
371
- let dotProduct = 0;
372
-
373
- // Calculate dot product
374
- for (const [term, queryScore] of queryVector.entries()) {
375
- const docScore = docVector.terms.get(term) || 0;
376
- dotProduct += queryScore * docScore;
377
- }
378
-
379
- // Calculate query magnitude
380
- const queryMagnitude = calculateMagnitude(queryVector);
381
-
382
- if (queryMagnitude === 0 || docVector.magnitude === 0) {
383
- return 0;
384
- }
385
-
386
- return dotProduct / (queryMagnitude * docVector.magnitude);
387
- }
388
-
389
- /**
390
- * Process query into TF-IDF vector using database values
391
- */
392
- export async function processQuery(
393
- query: string,
394
- idf: Map<string, number>
395
- ): Promise<Map<string, number>> {
396
- const terms = await extractQueryTokens(query);
397
- const queryVector = new Map<string, number>();
398
-
399
- // 為每個查詢詞使用 IDF 值(查詢本身無 TF-IDF,直接用 IDF)
400
- for (const term of terms) {
401
- const lowerTerm = term.toLowerCase();
402
- const idfValue = idf.get(lowerTerm) || 0;
403
-
404
- // 純粹用 IDF 值,完全信任 StarCoder2 嘅 tokenization
405
- if (idfValue > 0) {
406
- queryVector.set(lowerTerm, idfValue);
407
- }
408
- }
409
-
410
- return queryVector;
411
- }
412
-
413
- /**
414
- * Search documents using TF-IDF and cosine similarity with Advanced Code Tokenizer
415
- */
416
- export async function searchDocuments(
417
- query: string,
418
- index: SearchIndex,
419
- options: {
420
- limit?: number;
421
- minScore?: number;
422
- boostFactors?: {
423
- exactMatch?: number; // Boost for exact term matches
424
- phraseMatch?: number; // Boost for phrase matches
425
- technicalMatch?: number; // Boost for technical term matches
426
- identifierMatch?: number; // Boost for identifier matches
427
- };
428
- } = {}
429
- ): Promise<Array<{ uri: string; score: number; matchedTerms: string[] }>> {
430
- const { limit = 10, minScore = 0, boostFactors = {} } = options;
431
- const {
432
- exactMatch = 1.5,
433
- phraseMatch = 2.0,
434
- technicalMatch = 1.8,
435
- identifierMatch = 1.3,
436
- } = boostFactors;
437
-
438
- // Process query using Advanced Code Tokenizer
439
- const queryVector = await processQuery(query, index.idf);
440
- const queryTokens = (await extractQueryTokens(query)).map((t) => t.toLowerCase());
441
-
442
- // Calculate similarity for each document
443
- const results = index.documents.map((doc) => {
444
- let score = calculateCosineSimilarity(queryVector, doc);
445
-
446
- // Boost for exact term matches with enhanced scoring
447
- const matchedTerms: string[] = [];
448
- for (const token of queryTokens) {
449
- if (doc.rawTerms.has(token)) {
450
- // Apply different boost factors based on term characteristics
451
- let boostFactor = exactMatch;
452
-
453
- // Additional boost for technical terms
454
- if (isTechnicalTerm(token)) {
455
- boostFactor = Math.max(boostFactor, technicalMatch);
456
- }
457
-
458
- // Additional boost for identifiers
459
- if (isIdentifier(token)) {
460
- boostFactor = Math.max(boostFactor, identifierMatch);
461
- }
462
-
463
- score *= boostFactor;
464
- matchedTerms.push(token);
465
- }
466
- }
467
-
468
- // Enhanced phrase match detection (all query terms appear in document)
469
- if (matchedTerms.length === queryTokens.length && queryTokens.length > 1) {
470
- score *= phraseMatch;
471
- }
472
-
473
- // Contextual relevance boost for longer queries
474
- if (queryTokens.length > 3 && matchedTerms.length >= queryTokens.length * 0.7) {
475
- score *= 1.2; // Boost for partial matches on complex queries
476
- }
477
-
478
- return {
479
- uri: doc.uri,
480
- score,
481
- matchedTerms,
482
- };
483
- });
484
-
485
- // Filter and sort
486
- return results
487
- .filter((result) => result.score >= minScore)
488
- .sort((a, b) => b.score - a.score)
489
- .slice(0, limit);
490
- }
491
-
492
- /**
493
- * Check if a term is likely a technical term
494
- */
495
- function isTechnicalTerm(term: string): boolean {
496
- const technicalPatterns = [
497
- /\b[A-Z]{2,}\b/, // Acronyms like HTTP, API, JSON
498
- /\b[A-Z][a-z]+(?:[A-Z][a-z]+)+\b/, // PascalCase like ComponentName
499
- /\b[a-z]+[A-Z][a-z]*\b/, // camelCase like functionName
500
- /\b\w+(?:Dir|Config|File|Path|Data|Service|Manager|Handler)\b/, // Common suffixes
501
- /\b(?:get|set|is|has|can|should|will|do)[A-Z]\w*\b/, // Common prefixes
502
- /\b(?:http|https|json|xml|yaml|sql|api|url|uri)\b/, // Technical keywords
503
- ];
504
-
505
- return technicalPatterns.some((pattern) => pattern.test(term));
506
- }
507
-
508
- /**
509
- * Check if a term is likely an identifier
510
- */
511
- function isIdentifier(term: string): boolean {
512
- // Identifiers typically contain letters and numbers, maybe underscores
513
- return /^[a-zA-Z][a-zA-Z0-9_]*$/.test(term) && term.length > 1;
514
- }
515
-
516
- /**
517
- * Serialize search index to JSON
518
- */
519
- export function serializeIndex(index: SearchIndex): string {
520
- const serializable = {
521
- documents: index.documents.map((doc) => ({
522
- uri: doc.uri,
523
- terms: Array.from(doc.terms.entries()),
524
- rawTerms: Array.from(doc.rawTerms.entries()),
525
- magnitude: doc.magnitude,
526
- })),
527
- idf: Array.from(index.idf.entries()),
528
- totalDocuments: index.totalDocuments,
529
- metadata: index.metadata,
530
- };
531
-
532
- return JSON.stringify(serializable, null, 2);
533
- }
534
-
535
- /**
536
- * Deserialize search index from JSON
537
- */
538
- export function deserializeIndex(json: string): SearchIndex {
539
- const data = JSON.parse(json);
540
-
541
- return {
542
- documents: data.documents.map(
543
- (doc: {
544
- uri: string;
545
- terms: [string, number][];
546
- rawTerms: [string, number][];
547
- magnitude: number;
548
- }) => ({
549
- uri: doc.uri,
550
- terms: new Map(doc.terms),
551
- rawTerms: new Map(doc.rawTerms),
552
- magnitude: doc.magnitude,
553
- })
554
- ),
555
- idf: new Map(data.idf),
556
- totalDocuments: data.totalDocuments,
557
- metadata: data.metadata,
558
- };
559
- }