@sylphx/flow 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/commands/hook-command.ts +10 -230
- package/src/composables/index.ts +0 -1
- package/src/config/servers.ts +35 -78
- package/src/core/interfaces.ts +0 -33
- package/src/domains/index.ts +0 -2
- package/src/index.ts +0 -4
- package/src/services/mcp-service.ts +0 -16
- package/src/targets/claude-code.ts +3 -9
- package/src/targets/functional/claude-code-logic.ts +4 -22
- package/src/targets/opencode.ts +0 -6
- package/src/types/mcp.types.ts +29 -38
- package/src/types/target.types.ts +0 -2
- package/src/types.ts +0 -1
- package/src/commands/codebase-command.ts +0 -168
- package/src/commands/knowledge-command.ts +0 -161
- package/src/composables/useTargetConfig.ts +0 -45
- package/src/core/formatting/bytes.test.ts +0 -115
- package/src/core/validation/limit.test.ts +0 -155
- package/src/core/validation/query.test.ts +0 -44
- package/src/domains/codebase/index.ts +0 -5
- package/src/domains/codebase/tools.ts +0 -139
- package/src/domains/knowledge/index.ts +0 -10
- package/src/domains/knowledge/resources.ts +0 -537
- package/src/domains/knowledge/tools.ts +0 -174
- package/src/services/search/base-indexer.ts +0 -156
- package/src/services/search/codebase-indexer-types.ts +0 -38
- package/src/services/search/codebase-indexer.ts +0 -647
- package/src/services/search/embeddings-provider.ts +0 -455
- package/src/services/search/embeddings.ts +0 -316
- package/src/services/search/functional-indexer.ts +0 -323
- package/src/services/search/index.ts +0 -27
- package/src/services/search/indexer.ts +0 -380
- package/src/services/search/knowledge-indexer.ts +0 -422
- package/src/services/search/semantic-search.ts +0 -244
- package/src/services/search/tfidf.ts +0 -559
- package/src/services/search/unified-search-service.ts +0 -888
- package/src/services/storage/cache-storage.ts +0 -487
- package/src/services/storage/drizzle-storage.ts +0 -581
- package/src/services/storage/index.ts +0 -15
- package/src/services/storage/lancedb-vector-storage.ts +0 -494
- package/src/services/storage/memory-storage.ts +0 -268
- package/src/services/storage/separated-storage.ts +0 -467
- package/src/services/storage/vector-storage.ts +0 -13
|
@@ -1,581 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Drizzle ORM-based storage for Sylphx Flow
|
|
3
|
-
* Type-safe database operations replacing raw SQL
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { and, count, desc, eq, like } from 'drizzle-orm';
|
|
7
|
-
import { type Database, DrizzleDatabase } from '../../db/index.js';
|
|
8
|
-
import type {
|
|
9
|
-
CodebaseFile,
|
|
10
|
-
NewMemory,
|
|
11
|
-
NewTfidfIdf,
|
|
12
|
-
NewTfidfTerm,
|
|
13
|
-
TfidfDocument,
|
|
14
|
-
} from '../../db/schema.js';
|
|
15
|
-
import * as schema from '../../db/schema.js';
|
|
16
|
-
import { executeOperation, ValidationError } from '../../utils/database-errors.js';
|
|
17
|
-
|
|
18
|
-
// Memory entry interface (backward compatibility)
|
|
19
|
-
export interface MemoryEntry {
|
|
20
|
-
key: string;
|
|
21
|
-
namespace: string;
|
|
22
|
-
value: unknown;
|
|
23
|
-
timestamp: number;
|
|
24
|
-
created_at: string;
|
|
25
|
-
updated_at: string;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// Drizzle-based memory storage
|
|
29
|
-
export class DrizzleMemoryStorage {
|
|
30
|
-
private drizzleDb: DrizzleDatabase;
|
|
31
|
-
private db: Database;
|
|
32
|
-
|
|
33
|
-
constructor(options?: { useHomeDir?: boolean }) {
|
|
34
|
-
this.drizzleDb = new DrizzleDatabase(options);
|
|
35
|
-
this.db = this.drizzleDb.db;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
async initialize(): Promise<void> {
|
|
39
|
-
await this.drizzleDb.initialize();
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Safely serialize a value to JSON string
|
|
44
|
-
*/
|
|
45
|
-
private safeSerialize(value: any): string {
|
|
46
|
-
try {
|
|
47
|
-
return JSON.stringify(value);
|
|
48
|
-
} catch (error) {
|
|
49
|
-
throw new Error(`Failed to serialize value: ${(error as Error).message}`);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Safely deserialize a JSON string to value
|
|
55
|
-
*/
|
|
56
|
-
private safeDeserialize(value: string): any {
|
|
57
|
-
try {
|
|
58
|
-
return JSON.parse(value);
|
|
59
|
-
} catch {
|
|
60
|
-
// Return raw string if JSON parsing fails
|
|
61
|
-
return value;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
async set(key: string, value: any, namespace = 'default'): Promise<void> {
|
|
66
|
-
// Input validation
|
|
67
|
-
if (!key || typeof key !== 'string') {
|
|
68
|
-
throw new ValidationError('Key must be a non-empty string', 'key', key);
|
|
69
|
-
}
|
|
70
|
-
if (!namespace || typeof namespace !== 'string') {
|
|
71
|
-
throw new ValidationError('Namespace must be a non-empty string', 'namespace', namespace);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
await executeOperation(
|
|
75
|
-
'memory.set',
|
|
76
|
-
async () => {
|
|
77
|
-
const now = new Date();
|
|
78
|
-
const timestamp = now.getTime();
|
|
79
|
-
const created_at = now.toISOString();
|
|
80
|
-
const updated_at = created_at;
|
|
81
|
-
const serializedValue = this.safeSerialize(value);
|
|
82
|
-
|
|
83
|
-
// Check if entry exists
|
|
84
|
-
const existing = await this.get(key, namespace);
|
|
85
|
-
|
|
86
|
-
if (existing) {
|
|
87
|
-
// Update existing entry
|
|
88
|
-
await this.db
|
|
89
|
-
.update(schema.memory)
|
|
90
|
-
.set({
|
|
91
|
-
value: serializedValue,
|
|
92
|
-
timestamp,
|
|
93
|
-
updated_at,
|
|
94
|
-
})
|
|
95
|
-
.where(and(eq(schema.memory.key, key), eq(schema.memory.namespace, namespace)));
|
|
96
|
-
} else {
|
|
97
|
-
// Insert new entry
|
|
98
|
-
const newMemory: NewMemory = {
|
|
99
|
-
key,
|
|
100
|
-
namespace,
|
|
101
|
-
value: serializedValue,
|
|
102
|
-
timestamp,
|
|
103
|
-
created_at,
|
|
104
|
-
updated_at,
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
await this.db.insert(schema.memory).values(newMemory);
|
|
108
|
-
}
|
|
109
|
-
},
|
|
110
|
-
{ key, namespace }
|
|
111
|
-
);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
async get(key: string, namespace = 'default'): Promise<MemoryEntry | null> {
|
|
115
|
-
const result = await this.db
|
|
116
|
-
.select()
|
|
117
|
-
.from(schema.memory)
|
|
118
|
-
.where(and(eq(schema.memory.key, key), eq(schema.memory.namespace, namespace)))
|
|
119
|
-
.limit(1);
|
|
120
|
-
|
|
121
|
-
if (result.length === 0) {
|
|
122
|
-
return null;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const row = result[0];
|
|
126
|
-
return {
|
|
127
|
-
key: row.key,
|
|
128
|
-
namespace: row.namespace,
|
|
129
|
-
value: this.safeDeserialize(row.value),
|
|
130
|
-
timestamp: row.timestamp,
|
|
131
|
-
created_at: row.created_at,
|
|
132
|
-
updated_at: row.updated_at,
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
async getAll(): Promise<MemoryEntry[]> {
|
|
137
|
-
const result = await this.db
|
|
138
|
-
.select()
|
|
139
|
-
.from(schema.memory)
|
|
140
|
-
.orderBy(desc(schema.memory.timestamp));
|
|
141
|
-
|
|
142
|
-
return result.map((row) => ({
|
|
143
|
-
key: row.key,
|
|
144
|
-
namespace: row.namespace,
|
|
145
|
-
value: this.safeDeserialize(row.value),
|
|
146
|
-
timestamp: row.timestamp,
|
|
147
|
-
created_at: row.created_at,
|
|
148
|
-
updated_at: row.updated_at,
|
|
149
|
-
}));
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
async search(pattern: string, namespace?: string): Promise<MemoryEntry[]> {
|
|
153
|
-
// Input validation and sanitization
|
|
154
|
-
if (!pattern || typeof pattern !== 'string') {
|
|
155
|
-
throw new Error('Search pattern must be a non-empty string');
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Sanitize pattern to prevent SQL injection
|
|
159
|
-
// Only allow alphanumeric, spaces, and wildcards
|
|
160
|
-
const sanitizedPattern = pattern.replace(/[^a-zA-Z0-9\s*_-]/g, '');
|
|
161
|
-
if (sanitizedPattern !== pattern) {
|
|
162
|
-
throw new Error('Search pattern contains invalid characters');
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const searchPattern = sanitizedPattern.replace(/\*/g, '%');
|
|
166
|
-
|
|
167
|
-
let whereCondition = like(schema.memory.key, searchPattern);
|
|
168
|
-
|
|
169
|
-
if (namespace && namespace !== 'all') {
|
|
170
|
-
// Validate namespace
|
|
171
|
-
if (!/^[a-zA-Z0-9_-]+$/.test(namespace)) {
|
|
172
|
-
throw new Error('Namespace contains invalid characters');
|
|
173
|
-
}
|
|
174
|
-
whereCondition = and(whereCondition, eq(schema.memory.namespace, namespace))!;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const result = await this.db
|
|
178
|
-
.select()
|
|
179
|
-
.from(schema.memory)
|
|
180
|
-
.where(whereCondition)
|
|
181
|
-
.orderBy(desc(schema.memory.timestamp))
|
|
182
|
-
.limit(1000); // Add reasonable limit to prevent excessive results
|
|
183
|
-
|
|
184
|
-
return result.map((row) => ({
|
|
185
|
-
key: row.key,
|
|
186
|
-
namespace: row.namespace,
|
|
187
|
-
value: this.safeDeserialize(row.value),
|
|
188
|
-
timestamp: row.timestamp,
|
|
189
|
-
created_at: row.created_at,
|
|
190
|
-
updated_at: row.updated_at,
|
|
191
|
-
}));
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
async delete(key: string, namespace = 'default'): Promise<boolean> {
|
|
195
|
-
const result = await this.db
|
|
196
|
-
.delete(schema.memory)
|
|
197
|
-
.where(and(eq(schema.memory.key, key), eq(schema.memory.namespace, namespace)));
|
|
198
|
-
|
|
199
|
-
return result.rowsAffected > 0;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
async clear(namespace?: string): Promise<void> {
|
|
203
|
-
if (namespace && namespace !== 'all') {
|
|
204
|
-
await this.db.delete(schema.memory).where(eq(schema.memory.namespace, namespace));
|
|
205
|
-
} else {
|
|
206
|
-
await this.db.delete(schema.memory);
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
async getStats(): Promise<{
|
|
211
|
-
totalEntries: number;
|
|
212
|
-
namespaces: string[];
|
|
213
|
-
namespaceCounts: Record<string, number>;
|
|
214
|
-
oldestEntry: string | null;
|
|
215
|
-
newestEntry: string | null;
|
|
216
|
-
}> {
|
|
217
|
-
// Use aggregation queries to fix N+1 query performance issue
|
|
218
|
-
const [totalResult, namespaceResult, oldestResult, newestResult] = await Promise.all([
|
|
219
|
-
// Get total count using Drizzle count function
|
|
220
|
-
this.db
|
|
221
|
-
.select({ count: count() })
|
|
222
|
-
.from(schema.memory),
|
|
223
|
-
|
|
224
|
-
// Get namespace counts in a single query
|
|
225
|
-
this.db
|
|
226
|
-
.select({
|
|
227
|
-
namespace: schema.memory.namespace,
|
|
228
|
-
count: count(),
|
|
229
|
-
})
|
|
230
|
-
.from(schema.memory)
|
|
231
|
-
.groupBy(schema.memory.namespace)
|
|
232
|
-
.orderBy(schema.memory.namespace),
|
|
233
|
-
|
|
234
|
-
// Get oldest entry
|
|
235
|
-
this.db
|
|
236
|
-
.select({ created_at: schema.memory.created_at })
|
|
237
|
-
.from(schema.memory)
|
|
238
|
-
.orderBy(schema.memory.created_at)
|
|
239
|
-
.limit(1),
|
|
240
|
-
|
|
241
|
-
// Get newest entry
|
|
242
|
-
this.db
|
|
243
|
-
.select({ created_at: schema.memory.created_at })
|
|
244
|
-
.from(schema.memory)
|
|
245
|
-
.orderBy(desc(schema.memory.created_at))
|
|
246
|
-
.limit(1),
|
|
247
|
-
]);
|
|
248
|
-
|
|
249
|
-
const totalEntries = totalResult[0]?.count || 0;
|
|
250
|
-
const namespaces = namespaceResult.map((row) => row.namespace);
|
|
251
|
-
const namespaceCounts = Object.fromEntries(
|
|
252
|
-
namespaceResult.map((row) => [row.namespace, row.count])
|
|
253
|
-
);
|
|
254
|
-
const oldestEntry = oldestResult[0]?.created_at || null;
|
|
255
|
-
const newestEntry = newestResult[0]?.created_at || null;
|
|
256
|
-
|
|
257
|
-
return {
|
|
258
|
-
totalEntries,
|
|
259
|
-
namespaces,
|
|
260
|
-
namespaceCounts,
|
|
261
|
-
oldestEntry,
|
|
262
|
-
newestEntry,
|
|
263
|
-
};
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// Load method for compatibility with existing code
|
|
267
|
-
async load(): Promise<{ namespaces: Record<string, Record<string, any>> }> {
|
|
268
|
-
const entries = await this.getAll();
|
|
269
|
-
const namespaces: Record<string, Record<string, any>> = {};
|
|
270
|
-
|
|
271
|
-
entries.forEach((entry) => {
|
|
272
|
-
if (!namespaces[entry.namespace]) {
|
|
273
|
-
namespaces[entry.namespace] = {};
|
|
274
|
-
}
|
|
275
|
-
namespaces[entry.namespace][entry.key] = entry.value;
|
|
276
|
-
});
|
|
277
|
-
|
|
278
|
-
return { namespaces };
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
// Close database connection
|
|
282
|
-
async close(): Promise<void> {
|
|
283
|
-
await this.drizzleDb.close();
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
// Get database path for debugging
|
|
287
|
-
getDatabasePath(): string {
|
|
288
|
-
return this.drizzleDb.getDatabasePath();
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// ===== Codebase Index Caching =====
|
|
292
|
-
|
|
293
|
-
// Store codebase metadata
|
|
294
|
-
async setCodebaseMetadata(key: string, value: any): Promise<void> {
|
|
295
|
-
const serializedValue = this.safeSerialize(value);
|
|
296
|
-
|
|
297
|
-
await this.db
|
|
298
|
-
.insert(schema.codebaseMetadata)
|
|
299
|
-
.values({
|
|
300
|
-
key,
|
|
301
|
-
value: serializedValue,
|
|
302
|
-
})
|
|
303
|
-
.onConflictDoUpdate({
|
|
304
|
-
target: schema.codebaseMetadata.key,
|
|
305
|
-
set: { value: serializedValue },
|
|
306
|
-
});
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// Get codebase metadata
|
|
310
|
-
async getCodebaseMetadata(key: string): Promise<any> {
|
|
311
|
-
const result = await this.db
|
|
312
|
-
.select()
|
|
313
|
-
.from(schema.codebaseMetadata)
|
|
314
|
-
.where(eq(schema.codebaseMetadata.key, key))
|
|
315
|
-
.limit(1);
|
|
316
|
-
|
|
317
|
-
if (result.length === 0) {
|
|
318
|
-
return null;
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
return this.safeDeserialize(result[0].value);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
// Store file information
|
|
325
|
-
async upsertCodebaseFile(file: {
|
|
326
|
-
path: string;
|
|
327
|
-
mtime: number;
|
|
328
|
-
hash: string;
|
|
329
|
-
content?: string;
|
|
330
|
-
language?: string;
|
|
331
|
-
size?: number;
|
|
332
|
-
}): Promise<void> {
|
|
333
|
-
const indexedAt = new Date().toISOString();
|
|
334
|
-
|
|
335
|
-
await this.db
|
|
336
|
-
.insert(schema.codebaseFiles)
|
|
337
|
-
.values({
|
|
338
|
-
path: file.path,
|
|
339
|
-
mtime: file.mtime,
|
|
340
|
-
hash: file.hash,
|
|
341
|
-
content: file.content || null,
|
|
342
|
-
language: file.language || null,
|
|
343
|
-
size: file.size || null,
|
|
344
|
-
indexedAt,
|
|
345
|
-
})
|
|
346
|
-
.onConflictDoUpdate({
|
|
347
|
-
target: schema.codebaseFiles.path,
|
|
348
|
-
set: {
|
|
349
|
-
mtime: file.mtime,
|
|
350
|
-
hash: file.hash,
|
|
351
|
-
content: file.content || null,
|
|
352
|
-
language: file.language || null,
|
|
353
|
-
size: file.size || null,
|
|
354
|
-
indexedAt,
|
|
355
|
-
},
|
|
356
|
-
});
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
// Get file information
|
|
360
|
-
async getCodebaseFile(path: string): Promise<CodebaseFile | null> {
|
|
361
|
-
const result = await this.db
|
|
362
|
-
.select()
|
|
363
|
-
.from(schema.codebaseFiles)
|
|
364
|
-
.where(eq(schema.codebaseFiles.path, path))
|
|
365
|
-
.limit(1);
|
|
366
|
-
|
|
367
|
-
return result.length > 0 ? result[0] : null;
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
// Get all files
|
|
371
|
-
async getAllCodebaseFiles(): Promise<CodebaseFile[]> {
|
|
372
|
-
return await this.db
|
|
373
|
-
.select({
|
|
374
|
-
path: schema.codebaseFiles.path,
|
|
375
|
-
mtime: schema.codebaseFiles.mtime,
|
|
376
|
-
hash: schema.codebaseFiles.hash,
|
|
377
|
-
content: schema.codebaseFiles.content,
|
|
378
|
-
language: schema.codebaseFiles.language,
|
|
379
|
-
size: schema.codebaseFiles.size,
|
|
380
|
-
indexedAt: schema.codebaseFiles.indexedAt,
|
|
381
|
-
})
|
|
382
|
-
.from(schema.codebaseFiles)
|
|
383
|
-
.orderBy(schema.codebaseFiles.path);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// Delete file
|
|
387
|
-
async deleteCodebaseFile(path: string): Promise<void> {
|
|
388
|
-
await this.db.delete(schema.codebaseFiles).where(eq(schema.codebaseFiles.path, path));
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// Store TF-IDF document
|
|
392
|
-
async upsertTFIDFDocument(
|
|
393
|
-
filePath: string,
|
|
394
|
-
document: {
|
|
395
|
-
magnitude: number;
|
|
396
|
-
termCount: number;
|
|
397
|
-
rawTerms: Record<string, number>;
|
|
398
|
-
}
|
|
399
|
-
): Promise<void> {
|
|
400
|
-
await this.db
|
|
401
|
-
.insert(schema.tfidfDocuments)
|
|
402
|
-
.values({
|
|
403
|
-
filePath,
|
|
404
|
-
magnitude: document.magnitude,
|
|
405
|
-
termCount: document.termCount,
|
|
406
|
-
rawTerms: this.safeSerialize(document.rawTerms),
|
|
407
|
-
})
|
|
408
|
-
.onConflictDoUpdate({
|
|
409
|
-
target: schema.tfidfDocuments.filePath,
|
|
410
|
-
set: {
|
|
411
|
-
magnitude: document.magnitude,
|
|
412
|
-
termCount: document.termCount,
|
|
413
|
-
rawTerms: this.safeSerialize(document.rawTerms),
|
|
414
|
-
},
|
|
415
|
-
});
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
// Get TF-IDF document
|
|
419
|
-
async getTFIDFDocument(filePath: string): Promise<TfidfDocument | null> {
|
|
420
|
-
const result = await this.db
|
|
421
|
-
.select()
|
|
422
|
-
.from(schema.tfidfDocuments)
|
|
423
|
-
.where(eq(schema.tfidfDocuments.filePath, filePath))
|
|
424
|
-
.limit(1);
|
|
425
|
-
|
|
426
|
-
if (result.length === 0) {
|
|
427
|
-
return null;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
const row = result[0];
|
|
431
|
-
return {
|
|
432
|
-
...row,
|
|
433
|
-
rawTerms: this.safeDeserialize(row.rawTerms),
|
|
434
|
-
};
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
// Store TF-IDF terms for a file
|
|
438
|
-
async setTFIDFTerms(filePath: string, terms: Record<string, number>): Promise<void> {
|
|
439
|
-
// Delete existing terms for this file
|
|
440
|
-
await this.db.delete(schema.tfidfTerms).where(eq(schema.tfidfTerms.filePath, filePath));
|
|
441
|
-
|
|
442
|
-
// Insert new terms
|
|
443
|
-
if (Object.keys(terms).length > 0) {
|
|
444
|
-
const newTerms: NewTfidfTerm[] = Object.entries(terms).map(([term, frequency]) => ({
|
|
445
|
-
filePath,
|
|
446
|
-
term,
|
|
447
|
-
frequency,
|
|
448
|
-
}));
|
|
449
|
-
|
|
450
|
-
// Insert in batches to avoid parameter limits
|
|
451
|
-
const batchSize = 100;
|
|
452
|
-
for (let i = 0; i < newTerms.length; i += batchSize) {
|
|
453
|
-
const batch = newTerms.slice(i, i + batchSize);
|
|
454
|
-
await this.db.insert(schema.tfidfTerms).values(batch);
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Get TF-IDF terms for a file
|
|
460
|
-
async getTFIDFTerms(filePath: string): Promise<Record<string, number>> {
|
|
461
|
-
const result = await this.db
|
|
462
|
-
.select()
|
|
463
|
-
.from(schema.tfidfTerms)
|
|
464
|
-
.where(eq(schema.tfidfTerms.filePath, filePath));
|
|
465
|
-
|
|
466
|
-
const terms: Record<string, number> = {};
|
|
467
|
-
result.forEach((row) => {
|
|
468
|
-
terms[row.term] = row.frequency;
|
|
469
|
-
});
|
|
470
|
-
|
|
471
|
-
return terms;
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
// Store IDF values
|
|
475
|
-
async setIDFValues(idfValues: Record<string, number>): Promise<void> {
|
|
476
|
-
// Clear existing IDF values
|
|
477
|
-
await this.db.delete(schema.tfidfIdf);
|
|
478
|
-
|
|
479
|
-
// Insert new values
|
|
480
|
-
if (Object.keys(idfValues).length > 0) {
|
|
481
|
-
const newIdfValues: NewTfidfIdf[] = Object.entries(idfValues).map(([term, idfValue]) => ({
|
|
482
|
-
term,
|
|
483
|
-
idfValue,
|
|
484
|
-
}));
|
|
485
|
-
|
|
486
|
-
// Insert in batches
|
|
487
|
-
const batchSize = 100;
|
|
488
|
-
for (let i = 0; i < newIdfValues.length; i += batchSize) {
|
|
489
|
-
const batch = newIdfValues.slice(i, i + batchSize);
|
|
490
|
-
await this.db.insert(schema.tfidfIdf).values(batch);
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
// Get IDF values
|
|
496
|
-
async getIDFValues(): Promise<Record<string, number>> {
|
|
497
|
-
const result = await this.db.select().from(schema.tfidfIdf);
|
|
498
|
-
|
|
499
|
-
const idfValues: Record<string, number> = {};
|
|
500
|
-
result.forEach((row) => {
|
|
501
|
-
idfValues[row.term] = row.idfValue;
|
|
502
|
-
});
|
|
503
|
-
|
|
504
|
-
return idfValues;
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
// Get codebase index stats
|
|
508
|
-
async getCodebaseIndexStats(): Promise<{
|
|
509
|
-
fileCount: number;
|
|
510
|
-
termCount: number;
|
|
511
|
-
indexedAt?: string;
|
|
512
|
-
version?: string;
|
|
513
|
-
}> {
|
|
514
|
-
// Get file count
|
|
515
|
-
const fileCountResult = await this.db
|
|
516
|
-
.select({ count: schema.codebaseFiles.path })
|
|
517
|
-
.from(schema.codebaseFiles);
|
|
518
|
-
const fileCount = fileCountResult.length;
|
|
519
|
-
|
|
520
|
-
// Get term count
|
|
521
|
-
const termCountResult = await this.db
|
|
522
|
-
.selectDistinct({ term: schema.tfidfTerms.term })
|
|
523
|
-
.from(schema.tfidfTerms);
|
|
524
|
-
const termCount = termCountResult.length;
|
|
525
|
-
|
|
526
|
-
// Get metadata
|
|
527
|
-
const [indexedAt, version] = await Promise.all([
|
|
528
|
-
this.getCodebaseMetadata('indexedAt'),
|
|
529
|
-
this.getCodebaseMetadata('version'),
|
|
530
|
-
]);
|
|
531
|
-
|
|
532
|
-
return {
|
|
533
|
-
fileCount,
|
|
534
|
-
termCount,
|
|
535
|
-
indexedAt,
|
|
536
|
-
version,
|
|
537
|
-
};
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
// Clear codebase index
|
|
541
|
-
async clearCodebaseIndex(): Promise<void> {
|
|
542
|
-
await Promise.all([
|
|
543
|
-
this.db.delete(schema.codebaseFiles),
|
|
544
|
-
this.db.delete(schema.tfidfTerms),
|
|
545
|
-
this.db.delete(schema.tfidfDocuments),
|
|
546
|
-
this.db.delete(schema.tfidfIdf),
|
|
547
|
-
this.db.delete(schema.codebaseMetadata),
|
|
548
|
-
]);
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
// Search TF-IDF terms (for debugging/analysis)
|
|
552
|
-
async searchTFIDFTerms(termPattern: string): Promise<any[]> {
|
|
553
|
-
// Input validation
|
|
554
|
-
if (!termPattern || typeof termPattern !== 'string') {
|
|
555
|
-
throw new Error('Term pattern must be a non-empty string');
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
// Sanitize term pattern to prevent SQL injection
|
|
559
|
-
const sanitizedPattern = termPattern.replace(/[^a-zA-Z0-9\s_-]/g, '');
|
|
560
|
-
if (sanitizedPattern !== termPattern) {
|
|
561
|
-
throw new Error('Term pattern contains invalid characters');
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
const searchPattern = `%${sanitizedPattern}%`;
|
|
565
|
-
|
|
566
|
-
const result = await this.db
|
|
567
|
-
.select({
|
|
568
|
-
path: schema.codebaseFiles.path,
|
|
569
|
-
term: schema.tfidfTerms.term,
|
|
570
|
-
frequency: schema.tfidfTerms.frequency,
|
|
571
|
-
language: schema.codebaseFiles.language,
|
|
572
|
-
})
|
|
573
|
-
.from(schema.tfidfTerms)
|
|
574
|
-
.innerJoin(schema.codebaseFiles, eq(schema.tfidfTerms.filePath, schema.codebaseFiles.path))
|
|
575
|
-
.where(like(schema.tfidfTerms.term, searchPattern))
|
|
576
|
-
.orderBy(desc(schema.tfidfTerms.frequency))
|
|
577
|
-
.limit(100);
|
|
578
|
-
|
|
579
|
-
return result;
|
|
580
|
-
}
|
|
581
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Storage service - Data persistence layer
|
|
3
|
-
* All storage implementations and interfaces
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
export { default as CacheStorage } from './cache-storage.js';
|
|
7
|
-
// Database drivers
|
|
8
|
-
export { default as DrizzleStorage } from './drizzle-storage.js';
|
|
9
|
-
// Vector storage implementations
|
|
10
|
-
export { default as LanceDBVectorStorage } from './lancedb-vector-storage.js';
|
|
11
|
-
// Database storage implementations
|
|
12
|
-
export { default as MemoryStorage } from './memory-storage.js';
|
|
13
|
-
export { default as SeparatedStorage } from './separated-storage.js';
|
|
14
|
-
// Core storage interfaces
|
|
15
|
-
export type { VectorDocument, VectorStorage } from './vector-storage.js';
|