codecritique 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -114
- package/package.json +10 -9
- package/src/content-retrieval.test.js +775 -0
- package/src/custom-documents.test.js +440 -0
- package/src/feedback-loader.test.js +529 -0
- package/src/llm.test.js +256 -0
- package/src/project-analyzer.test.js +747 -0
- package/src/rag-analyzer.js +12 -0
- package/src/rag-analyzer.test.js +1109 -0
- package/src/rag-review.test.js +317 -0
- package/src/setupTests.js +131 -0
- package/src/zero-shot-classifier-open.test.js +278 -0
- package/src/embeddings/cache-manager.js +0 -364
- package/src/embeddings/constants.js +0 -40
- package/src/embeddings/database.js +0 -921
- package/src/embeddings/errors.js +0 -208
- package/src/embeddings/factory.js +0 -447
- package/src/embeddings/file-processor.js +0 -851
- package/src/embeddings/model-manager.js +0 -337
- package/src/embeddings/similarity-calculator.js +0 -97
- package/src/embeddings/types.js +0 -113
- package/src/pr-history/analyzer.js +0 -579
- package/src/pr-history/bot-detector.js +0 -123
- package/src/pr-history/cli-utils.js +0 -204
- package/src/pr-history/comment-processor.js +0 -549
- package/src/pr-history/database.js +0 -819
- package/src/pr-history/github-client.js +0 -629
- package/src/technology-keywords.json +0 -753
- package/src/utils/command.js +0 -48
- package/src/utils/constants.js +0 -263
- package/src/utils/context-inference.js +0 -364
- package/src/utils/document-detection.js +0 -105
- package/src/utils/file-validation.js +0 -271
- package/src/utils/git.js +0 -232
- package/src/utils/language-detection.js +0 -170
- package/src/utils/logging.js +0 -24
- package/src/utils/markdown.js +0 -132
- package/src/utils/mobilebert-tokenizer.js +0 -141
- package/src/utils/pr-chunking.js +0 -276
- package/src/utils/string-utils.js +0 -28
|
@@ -1,921 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Database Manager Module
|
|
3
|
-
*
|
|
4
|
-
* This module provides centralized database management for embeddings
|
|
5
|
-
* using LanceDB and Apache Arrow.
|
|
6
|
-
*
|
|
7
|
-
* Features:
|
|
8
|
-
* - Database connection management
|
|
9
|
-
* - Table initialization and schema management
|
|
10
|
-
* - Adaptive vector indexing
|
|
11
|
-
* - Project-specific data cleanup
|
|
12
|
-
* - Database maintenance operations
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* @typedef {import('./types.js').DatabaseSchema} DatabaseSchema
|
|
17
|
-
* @typedef {import('@lancedb/lancedb').Connection} LanceDBConnection
|
|
18
|
-
* @typedef {import('@lancedb/lancedb').Table} LanceDBTable
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
import fs from 'node:fs';
|
|
22
|
-
import path from 'node:path';
|
|
23
|
-
import * as lancedb from '@lancedb/lancedb';
|
|
24
|
-
import { Field, FixedSizeList, Float32, Int32, Schema, Utf8 } from 'apache-arrow';
|
|
25
|
-
import chalk from 'chalk';
|
|
26
|
-
import { debug } from '../utils/logging.js';
|
|
27
|
-
import { EMBEDDING_DIMENSIONS, TABLE_NAMES } from './constants.js';
|
|
28
|
-
import { LANCEDB_PATH } from './constants.js';
|
|
29
|
-
import { createDatabaseError, ERROR_CODES } from './errors.js';
|
|
30
|
-
|
|
31
|
-
// ============================================================================
|
|
32
|
-
// DATABASE CONFIGURATION
|
|
33
|
-
// ============================================================================
|
|
34
|
-
|
|
35
|
-
// Database Constants
|
|
36
|
-
const FILE_EMBEDDINGS_TABLE = TABLE_NAMES.FILE_EMBEDDINGS;
|
|
37
|
-
const DOCUMENT_CHUNK_TABLE = TABLE_NAMES.DOCUMENT_CHUNK;
|
|
38
|
-
const PR_COMMENTS_TABLE = TABLE_NAMES.PR_COMMENTS;
|
|
39
|
-
const PROJECT_SUMMARIES_TABLE = TABLE_NAMES.PROJECT_SUMMARIES;
|
|
40
|
-
|
|
41
|
-
// ============================================================================
|
|
42
|
-
// DATABASE MANAGER CLASS
|
|
43
|
-
// ============================================================================
|
|
44
|
-
|
|
45
|
-
export class DatabaseManager {
|
|
46
|
-
constructor(options = {}) {
|
|
47
|
-
this.dbPath = options.dbPath || LANCEDB_PATH;
|
|
48
|
-
this.embeddingDimensions = options.embeddingDimensions || EMBEDDING_DIMENSIONS;
|
|
49
|
-
|
|
50
|
-
// Connection state
|
|
51
|
-
this.dbConnection = null;
|
|
52
|
-
this.tablesInitialized = false;
|
|
53
|
-
this.tableInitializationPromise = null;
|
|
54
|
-
this.cleaningUp = false;
|
|
55
|
-
|
|
56
|
-
// Table names
|
|
57
|
-
this.fileEmbeddingsTable = options.fileEmbeddingsTable || FILE_EMBEDDINGS_TABLE;
|
|
58
|
-
this.documentChunkTable = options.documentChunkTable || DOCUMENT_CHUNK_TABLE;
|
|
59
|
-
this.prCommentsTable = options.prCommentsTable || PR_COMMENTS_TABLE;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// ============================================================================
|
|
63
|
-
// CONNECTION MANAGEMENT
|
|
64
|
-
// ============================================================================
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* Get database connection, creating it if necessary
|
|
68
|
-
* @returns {Promise<LanceDBConnection>} Database connection
|
|
69
|
-
*/
|
|
70
|
-
async getDBConnection() {
|
|
71
|
-
if (!this.dbConnection) {
|
|
72
|
-
console.log(chalk.blue(`Initializing DB connection. Target Path: ${this.dbPath}`));
|
|
73
|
-
if (!fs.existsSync(this.dbPath)) {
|
|
74
|
-
fs.mkdirSync(this.dbPath, { recursive: true });
|
|
75
|
-
}
|
|
76
|
-
this.dbConnection = await lancedb.connect(this.dbPath);
|
|
77
|
-
console.log(chalk.green('LanceDB connected.'));
|
|
78
|
-
}
|
|
79
|
-
return this.dbConnection;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Get database connection with initialized tables
|
|
84
|
-
* @returns {Promise<LanceDBConnection>} Database connection
|
|
85
|
-
*/
|
|
86
|
-
async getDB() {
|
|
87
|
-
const db = await this.getDBConnection();
|
|
88
|
-
if (!this.tablesInitialized) {
|
|
89
|
-
await this.initializeTables();
|
|
90
|
-
}
|
|
91
|
-
return db;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Connect to database (compatibility method)
|
|
96
|
-
* @returns {Promise<LanceDBConnection>} Database connection
|
|
97
|
-
*/
|
|
98
|
-
async connect() {
|
|
99
|
-
return this.getDB();
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Close database connection
|
|
104
|
-
*/
|
|
105
|
-
async closeConnection() {
|
|
106
|
-
if (this.dbConnection) {
|
|
107
|
-
console.log('Closing LanceDB connection...');
|
|
108
|
-
await this.dbConnection.close();
|
|
109
|
-
this.dbConnection = null;
|
|
110
|
-
this.tablesInitialized = false;
|
|
111
|
-
this.tableInitializationPromise = null;
|
|
112
|
-
console.log('LanceDB connection closed.');
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// ============================================================================
|
|
117
|
-
// TABLE INITIALIZATION
|
|
118
|
-
// ============================================================================
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Initialize database tables
|
|
122
|
-
* @returns {Promise<void>}
|
|
123
|
-
*/
|
|
124
|
-
async initializeTables() {
|
|
125
|
-
if (this.tablesInitialized) {
|
|
126
|
-
return;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// If initialization is already in progress, wait for it to complete
|
|
130
|
-
if (this.tableInitializationPromise) {
|
|
131
|
-
await this.tableInitializationPromise;
|
|
132
|
-
return;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
// Start initialization and store the promise
|
|
136
|
-
this.tableInitializationPromise = (async () => {
|
|
137
|
-
try {
|
|
138
|
-
console.log(chalk.blue('Initializing database tables and indices...'));
|
|
139
|
-
const db = await this.getDBConnection();
|
|
140
|
-
await this.ensureTablesExist(db);
|
|
141
|
-
this.tablesInitialized = true;
|
|
142
|
-
console.log(chalk.green('Database tables and indices initialized successfully.'));
|
|
143
|
-
} catch (error) {
|
|
144
|
-
this.tablesInitialized = false;
|
|
145
|
-
console.error(chalk.red('Failed to initialize database tables:'), error);
|
|
146
|
-
throw error; // Re-throw to propagate the error to callers
|
|
147
|
-
} finally {
|
|
148
|
-
// The initialization attempt is over, clear the promise
|
|
149
|
-
this.tableInitializationPromise = null;
|
|
150
|
-
}
|
|
151
|
-
})();
|
|
152
|
-
|
|
153
|
-
await this.tableInitializationPromise;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Ensure all required tables exist with proper schemas
|
|
158
|
-
* @param {LanceDBConnection} db - Database connection
|
|
159
|
-
* @returns {Promise<void>}
|
|
160
|
-
*/
|
|
161
|
-
async ensureTablesExist(db) {
|
|
162
|
-
try {
|
|
163
|
-
const tableNames = await db.tableNames();
|
|
164
|
-
const vectorType = new FixedSizeList(this.embeddingDimensions, new Field('item', new Float32(), true));
|
|
165
|
-
|
|
166
|
-
// File embeddings table schema
|
|
167
|
-
const fileFields = [
|
|
168
|
-
new Field('id', new Utf8(), false),
|
|
169
|
-
new Field('content', new Utf8(), false),
|
|
170
|
-
new Field('type', new Utf8(), false),
|
|
171
|
-
new Field('name', new Utf8(), false),
|
|
172
|
-
new Field('path', new Utf8(), false),
|
|
173
|
-
new Field('project_path', new Utf8(), false),
|
|
174
|
-
new Field('language', new Utf8(), true),
|
|
175
|
-
new Field('content_hash', new Utf8(), false),
|
|
176
|
-
new Field('last_modified', new Utf8(), false),
|
|
177
|
-
new Field('vector', vectorType, false),
|
|
178
|
-
];
|
|
179
|
-
const fileSchema = new Schema(fileFields);
|
|
180
|
-
|
|
181
|
-
// Document chunk table schema
|
|
182
|
-
const documentChunkFields = [
|
|
183
|
-
new Field('id', new Utf8(), false),
|
|
184
|
-
new Field('content', new Utf8(), false),
|
|
185
|
-
new Field('original_document_path', new Utf8(), false),
|
|
186
|
-
new Field('project_path', new Utf8(), false),
|
|
187
|
-
new Field('heading_text', new Utf8(), true),
|
|
188
|
-
new Field('document_title', new Utf8(), true),
|
|
189
|
-
new Field('language', new Utf8(), true),
|
|
190
|
-
new Field('vector', vectorType, false),
|
|
191
|
-
new Field('content_hash', new Utf8(), false),
|
|
192
|
-
new Field('last_modified', new Utf8(), false),
|
|
193
|
-
];
|
|
194
|
-
const documentChunkSchema = new Schema(documentChunkFields);
|
|
195
|
-
|
|
196
|
-
// PR comments table schema
|
|
197
|
-
const prCommentsSchema = this.createPRCommentsSchema();
|
|
198
|
-
|
|
199
|
-
// Create or open tables
|
|
200
|
-
let fileTable, documentChunkTable, prCommentsTable;
|
|
201
|
-
|
|
202
|
-
if (!tableNames.includes(this.fileEmbeddingsTable)) {
|
|
203
|
-
console.log(chalk.yellow(`Creating ${this.fileEmbeddingsTable} table with optimized schema...`));
|
|
204
|
-
fileTable = await db.createEmptyTable(this.fileEmbeddingsTable, fileSchema, { mode: 'create' });
|
|
205
|
-
console.log(chalk.green(`Created ${this.fileEmbeddingsTable} table.`));
|
|
206
|
-
} else {
|
|
207
|
-
fileTable = await db.openTable(this.fileEmbeddingsTable);
|
|
208
|
-
await this._checkSchemaCompatibility(fileTable, this.fileEmbeddingsTable, 'project_path');
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
if (!tableNames.includes(this.documentChunkTable)) {
|
|
212
|
-
console.log(chalk.yellow(`Creating ${this.documentChunkTable} table with optimized schema...`));
|
|
213
|
-
documentChunkTable = await db.createEmptyTable(this.documentChunkTable, documentChunkSchema, { mode: 'create' });
|
|
214
|
-
console.log(chalk.green(`Created ${this.documentChunkTable} table.`));
|
|
215
|
-
} else {
|
|
216
|
-
documentChunkTable = await db.openTable(this.documentChunkTable);
|
|
217
|
-
await this._checkSchemaCompatibility(documentChunkTable, this.documentChunkTable, 'project_path');
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Create PR comments table
|
|
221
|
-
if (!tableNames.includes(this.prCommentsTable)) {
|
|
222
|
-
console.log(chalk.yellow(`Creating ${this.prCommentsTable} table with optimized schema...`));
|
|
223
|
-
prCommentsTable = await db.createEmptyTable(this.prCommentsTable, prCommentsSchema, { mode: 'create' });
|
|
224
|
-
console.log(chalk.green(`Created ${this.prCommentsTable} table.`));
|
|
225
|
-
} else {
|
|
226
|
-
prCommentsTable = await db.openTable(this.prCommentsTable);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
// Create FTS indexes
|
|
230
|
-
await this._createFTSIndexes([
|
|
231
|
-
[fileTable, this.fileEmbeddingsTable, 'content'],
|
|
232
|
-
[documentChunkTable, this.documentChunkTable, 'content'],
|
|
233
|
-
[prCommentsTable, this.prCommentsTable, 'comment_text'],
|
|
234
|
-
]);
|
|
235
|
-
|
|
236
|
-
// Create adaptive vector indexes
|
|
237
|
-
await this._createVectorIndexes([
|
|
238
|
-
[fileTable, this.fileEmbeddingsTable, 'vector'],
|
|
239
|
-
[documentChunkTable, this.documentChunkTable, 'vector'],
|
|
240
|
-
[prCommentsTable, this.prCommentsTable, 'combined_embedding'],
|
|
241
|
-
]);
|
|
242
|
-
} catch (error) {
|
|
243
|
-
console.error(chalk.red(`Error ensuring tables exist: ${error.message}`), error.stack);
|
|
244
|
-
throw error;
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
// ============================================================================
|
|
249
|
-
// TABLE OPERATIONS
|
|
250
|
-
// ============================================================================
|
|
251
|
-
|
|
252
|
-
/**
|
|
253
|
-
* Get table by name
|
|
254
|
-
* @param {string} tableName - Name of the table
|
|
255
|
-
* @returns {Promise<LanceDBTable|null>} Table instance or null if not found
|
|
256
|
-
*/
|
|
257
|
-
async getTable(tableName) {
|
|
258
|
-
try {
|
|
259
|
-
const db = await this.getDBConnection();
|
|
260
|
-
const tableNames = await db.tableNames();
|
|
261
|
-
if (tableNames.includes(tableName)) {
|
|
262
|
-
return await db.openTable(tableName);
|
|
263
|
-
}
|
|
264
|
-
return null;
|
|
265
|
-
} catch (error) {
|
|
266
|
-
console.error(chalk.red(`Error opening table ${tableName}: ${error.message}`), error);
|
|
267
|
-
return null;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
// ============================================================================
|
|
272
|
-
// SCHEMA MANAGEMENT
|
|
273
|
-
// ============================================================================
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* Create PR comments schema
|
|
277
|
-
* @returns {import('apache-arrow').Schema} PR comments schema
|
|
278
|
-
*/
|
|
279
|
-
createPRCommentsSchema() {
|
|
280
|
-
const vectorType = new FixedSizeList(this.embeddingDimensions, new Field('item', new Float32(), true));
|
|
281
|
-
|
|
282
|
-
const fields = [
|
|
283
|
-
new Field('id', new Utf8(), false),
|
|
284
|
-
new Field('pr_number', new Int32(), false),
|
|
285
|
-
new Field('repository', new Utf8(), false),
|
|
286
|
-
new Field('project_path', new Utf8(), false),
|
|
287
|
-
new Field('comment_type', new Utf8(), false),
|
|
288
|
-
new Field('comment_text', new Utf8(), false),
|
|
289
|
-
new Field('comment_embedding', vectorType, false),
|
|
290
|
-
|
|
291
|
-
// Code context fields
|
|
292
|
-
new Field('file_path', new Utf8(), true),
|
|
293
|
-
new Field('line_number', new Int32(), true),
|
|
294
|
-
new Field('line_range_start', new Int32(), true),
|
|
295
|
-
new Field('line_range_end', new Int32(), true),
|
|
296
|
-
new Field('original_code', new Utf8(), true),
|
|
297
|
-
new Field('suggested_code', new Utf8(), true),
|
|
298
|
-
new Field('diff_hunk', new Utf8(), true),
|
|
299
|
-
|
|
300
|
-
// Code embedding
|
|
301
|
-
new Field('code_embedding', vectorType, true),
|
|
302
|
-
new Field('combined_embedding', vectorType, false),
|
|
303
|
-
|
|
304
|
-
// Metadata
|
|
305
|
-
new Field('author', new Utf8(), false),
|
|
306
|
-
new Field('created_at', new Utf8(), false),
|
|
307
|
-
new Field('updated_at', new Utf8(), true),
|
|
308
|
-
new Field('review_id', new Utf8(), true),
|
|
309
|
-
new Field('review_state', new Utf8(), true),
|
|
310
|
-
|
|
311
|
-
// Analysis metadata
|
|
312
|
-
new Field('issue_category', new Utf8(), true),
|
|
313
|
-
new Field('severity', new Utf8(), true),
|
|
314
|
-
new Field('pattern_tags', new Utf8(), true),
|
|
315
|
-
];
|
|
316
|
-
|
|
317
|
-
return new Schema(fields);
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
// ============================================================================
|
|
321
|
-
// INDEXING
|
|
322
|
-
// ============================================================================
|
|
323
|
-
|
|
324
|
-
/**
|
|
325
|
-
* Create adaptive vector indexes based on dataset size
|
|
326
|
-
* @param {LanceDBTable} table - Table instance
|
|
327
|
-
* @param {string} tableName - Table name
|
|
328
|
-
* @param {string} vectorField - Vector field name
|
|
329
|
-
* @returns {Promise<Object>} Index information
|
|
330
|
-
*/
|
|
331
|
-
async createAdaptiveVectorIndexes(table, tableName, vectorField = 'vector') {
|
|
332
|
-
try {
|
|
333
|
-
const rowCount = await table.countRows();
|
|
334
|
-
console.log(chalk.blue(`[${tableName}] Row count: ${rowCount}`));
|
|
335
|
-
|
|
336
|
-
if (rowCount < 100) {
|
|
337
|
-
console.log(chalk.blue(`[${tableName}] Skipping indexing for small dataset (${rowCount} rows). Using exact search.`));
|
|
338
|
-
return { indexType: 'exact', rowCount };
|
|
339
|
-
} else if (rowCount < 1000) {
|
|
340
|
-
console.log(chalk.blue(`[${tableName}] Using exact search for small dataset (${rowCount} rows) - no index needed`));
|
|
341
|
-
return { indexType: 'exact', rowCount };
|
|
342
|
-
} else if (rowCount < 10000) {
|
|
343
|
-
const numPartitions = Math.max(Math.floor(Math.sqrt(rowCount / 50)), 2);
|
|
344
|
-
console.log(
|
|
345
|
-
chalk.blue(`[${tableName}] Creating/updating IVF-Flat index for medium dataset (${rowCount} rows, ${numPartitions} partitions)`)
|
|
346
|
-
);
|
|
347
|
-
await table.createIndex(vectorField, {
|
|
348
|
-
config: lancedb.Index.ivfFlat({ numPartitions }),
|
|
349
|
-
replace: false,
|
|
350
|
-
});
|
|
351
|
-
return { indexType: 'ivf_flat', rowCount, numPartitions };
|
|
352
|
-
} else {
|
|
353
|
-
const numPartitions = Math.max(Math.floor(Math.sqrt(rowCount / 100)), 8);
|
|
354
|
-
const numSubVectors = Math.floor(this.embeddingDimensions / 4);
|
|
355
|
-
console.log(
|
|
356
|
-
chalk.blue(`[${tableName}] Creating/updating IVF-PQ index for large dataset (${rowCount} rows, ${numPartitions} partitions)`)
|
|
357
|
-
);
|
|
358
|
-
await table.createIndex(vectorField, {
|
|
359
|
-
config: lancedb.Index.ivfPq({
|
|
360
|
-
numPartitions,
|
|
361
|
-
numSubVectors,
|
|
362
|
-
numBits: 8,
|
|
363
|
-
}),
|
|
364
|
-
replace: false,
|
|
365
|
-
});
|
|
366
|
-
return { indexType: 'ivf_pq', rowCount, numPartitions, numSubVectors };
|
|
367
|
-
}
|
|
368
|
-
} catch (error) {
|
|
369
|
-
if (error.message.includes('already exists')) {
|
|
370
|
-
console.log(chalk.green(`[${tableName}] Index already up-to-date.`));
|
|
371
|
-
return { indexType: 'existing' };
|
|
372
|
-
}
|
|
373
|
-
console.warn(chalk.yellow(`[${tableName}] Index creation/update failed: ${error.message}. Falling back to exact search.`));
|
|
374
|
-
return { indexType: 'exact_fallback', error: error.message };
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
// ============================================================================
|
|
379
|
-
// CLEANUP OPERATIONS
|
|
380
|
-
// ============================================================================
|
|
381
|
-
|
|
382
|
-
/**
|
|
383
|
-
* Clean up database connection and resources
|
|
384
|
-
*/
|
|
385
|
-
async cleanup() {
|
|
386
|
-
if (this.cleaningUp) {
|
|
387
|
-
return; // Already cleaning up, prevent duplicate calls
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
this.cleaningUp = true;
|
|
391
|
-
|
|
392
|
-
try {
|
|
393
|
-
await this.closeConnection();
|
|
394
|
-
console.log(chalk.green('Database resources cleaned up.'));
|
|
395
|
-
} catch (error) {
|
|
396
|
-
console.error(`Error during database cleanup: ${error.message}`);
|
|
397
|
-
} finally {
|
|
398
|
-
this.cleaningUp = false;
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
/**
|
|
403
|
-
* Clear all embeddings by dropping tables
|
|
404
|
-
* @returns {Promise<boolean>} Success status
|
|
405
|
-
*/
|
|
406
|
-
async clearAllEmbeddings() {
|
|
407
|
-
let db = null;
|
|
408
|
-
try {
|
|
409
|
-
console.log(chalk.cyan('Clearing ALL embeddings by dropping tables...'));
|
|
410
|
-
console.log(chalk.red('WARNING: This will affect all projects on this machine!'));
|
|
411
|
-
|
|
412
|
-
if (!fs.existsSync(this.dbPath)) {
|
|
413
|
-
console.log(chalk.yellow('LanceDB directory does not exist, nothing to clear.'));
|
|
414
|
-
return true;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
db = await lancedb.connect(this.dbPath);
|
|
418
|
-
const tableNames = await db.tableNames();
|
|
419
|
-
let droppedCount = 0;
|
|
420
|
-
|
|
421
|
-
for (const tableName of [this.fileEmbeddingsTable, this.documentChunkTable, this.prCommentsTable]) {
|
|
422
|
-
if (tableNames.includes(tableName)) {
|
|
423
|
-
console.log(chalk.yellow(`Dropping table ${tableName}...`));
|
|
424
|
-
await db.dropTable(tableName);
|
|
425
|
-
console.log(chalk.green(`Table ${tableName} dropped.`));
|
|
426
|
-
droppedCount++;
|
|
427
|
-
} else {
|
|
428
|
-
console.log(chalk.yellow(`Table ${tableName} does not exist.`));
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
if (droppedCount > 0) {
|
|
433
|
-
console.log(chalk.green('All embedding tables have been dropped.'));
|
|
434
|
-
console.log(chalk.yellow('Run the embedding generation process again to recreate tables.'));
|
|
435
|
-
} else {
|
|
436
|
-
console.log(chalk.green('No embedding tables found to drop.'));
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
// Reset connection state
|
|
440
|
-
this.dbConnection = null;
|
|
441
|
-
this.tablesInitialized = false;
|
|
442
|
-
return true;
|
|
443
|
-
} catch (error) {
|
|
444
|
-
console.error(chalk.red(`Error clearing embeddings: ${error.message}`), error);
|
|
445
|
-
this.dbConnection = null;
|
|
446
|
-
this.tablesInitialized = false;
|
|
447
|
-
throw error;
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
/**
|
|
452
|
-
* Clear embeddings for a specific project
|
|
453
|
-
* @param {string} projectPath - Project path
|
|
454
|
-
* @returns {Promise<boolean>} Success status
|
|
455
|
-
*/
|
|
456
|
-
async clearProjectEmbeddings(projectPath = process.cwd()) {
|
|
457
|
-
let db = null;
|
|
458
|
-
try {
|
|
459
|
-
const resolvedProjectPath = path.resolve(projectPath);
|
|
460
|
-
const projectName = path.basename(resolvedProjectPath);
|
|
461
|
-
|
|
462
|
-
// Safety check: ensure project path is valid and not root
|
|
463
|
-
if (!resolvedProjectPath || resolvedProjectPath === '/' || resolvedProjectPath === path.resolve('/')) {
|
|
464
|
-
throw new Error(`Invalid project path: ${resolvedProjectPath}. Cannot clear embeddings for root directory.`);
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// Additional safety: ensure project path is not too generic
|
|
468
|
-
const pathParts = resolvedProjectPath.split(path.sep);
|
|
469
|
-
if (pathParts.length <= 2) {
|
|
470
|
-
throw new Error(`Project path too generic: ${resolvedProjectPath}. For safety, project must be at least 3 levels deep.`);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
console.log(chalk.cyan(`Clearing embeddings for project: ${resolvedProjectPath} (${projectName})`));
|
|
474
|
-
|
|
475
|
-
if (!fs.existsSync(this.dbPath)) {
|
|
476
|
-
console.log(chalk.yellow('LanceDB directory does not exist, nothing to clear.'));
|
|
477
|
-
return true;
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
db = await lancedb.connect(this.dbPath);
|
|
481
|
-
const tableNames = await db.tableNames();
|
|
482
|
-
let deletedCount = 0;
|
|
483
|
-
|
|
484
|
-
// Clear file embeddings for this project
|
|
485
|
-
if (tableNames.includes(this.fileEmbeddingsTable)) {
|
|
486
|
-
const fileTable = await db.openTable(this.fileEmbeddingsTable);
|
|
487
|
-
await this._validateTableHasProjectPath(fileTable, this.fileEmbeddingsTable);
|
|
488
|
-
deletedCount += await this._clearProjectTableRecords(
|
|
489
|
-
db,
|
|
490
|
-
this.fileEmbeddingsTable,
|
|
491
|
-
resolvedProjectPath,
|
|
492
|
-
projectName,
|
|
493
|
-
'project_path'
|
|
494
|
-
);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
// Clear document chunk embeddings for this project
|
|
498
|
-
if (tableNames.includes(this.documentChunkTable)) {
|
|
499
|
-
const docTable = await db.openTable(this.documentChunkTable);
|
|
500
|
-
await this._validateTableHasProjectPath(docTable, this.documentChunkTable);
|
|
501
|
-
deletedCount += await this._clearProjectTableRecords(db, this.documentChunkTable, resolvedProjectPath, projectName, 'project_path');
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
// Clear project summaries for this project
|
|
505
|
-
if (tableNames.includes(PROJECT_SUMMARIES_TABLE)) {
|
|
506
|
-
const summariesTable = await db.openTable(PROJECT_SUMMARIES_TABLE);
|
|
507
|
-
await this._validateTableHasProjectPath(summariesTable, PROJECT_SUMMARIES_TABLE);
|
|
508
|
-
deletedCount += await this._clearProjectTableRecords(db, PROJECT_SUMMARIES_TABLE, resolvedProjectPath, projectName, 'project_path');
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
// Note: PR comments are cleared via separate pr-history:clear command
|
|
512
|
-
// This embeddings:clear command handles file embeddings, document embeddings, and project summaries
|
|
513
|
-
|
|
514
|
-
if (deletedCount > 0) {
|
|
515
|
-
console.log(chalk.green(`Successfully cleared ${deletedCount} embeddings for project: ${resolvedProjectPath}`));
|
|
516
|
-
|
|
517
|
-
// Optimize tables after cleanup to maintain performance
|
|
518
|
-
await this._optimizeTablesAfterCleanup(db, tableNames);
|
|
519
|
-
} else {
|
|
520
|
-
console.log(chalk.yellow(`No embeddings found for project: ${resolvedProjectPath}`));
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
return true;
|
|
524
|
-
} catch (error) {
|
|
525
|
-
console.error(chalk.red(`Error clearing project embeddings: ${error.message}`), error);
|
|
526
|
-
throw error;
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
// ============================================================================
|
|
531
|
-
// PRIVATE METHODS
|
|
532
|
-
// ============================================================================
|
|
533
|
-
|
|
534
|
-
/**
|
|
535
|
-
* Check schema compatibility for existing tables
|
|
536
|
-
* @param {LanceDBTable} table - Table instance
|
|
537
|
-
* @param {string} tableName - Table name
|
|
538
|
-
* @param {string} requiredField - Required field name
|
|
539
|
-
* @private
|
|
540
|
-
*/
|
|
541
|
-
async _checkSchemaCompatibility(table, tableName, requiredField) {
|
|
542
|
-
try {
|
|
543
|
-
const currentSchema = await table.schema;
|
|
544
|
-
if (currentSchema && currentSchema.fields) {
|
|
545
|
-
const hasRequiredField = currentSchema.fields.some((field) => field.name === requiredField);
|
|
546
|
-
if (!hasRequiredField) {
|
|
547
|
-
console.log(chalk.yellow(`Table ${tableName} has old schema without ${requiredField}. Migration needed.`));
|
|
548
|
-
console.log(chalk.yellow(`Please clear embeddings and regenerate them to use the new schema with project isolation.`));
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
} catch (schemaError) {
|
|
552
|
-
debug(`Could not check schema for ${tableName}: ${schemaError.message}`);
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
/**
|
|
557
|
-
* Validate that a table has the project_path field for proper project isolation
|
|
558
|
-
* @param {LanceDBTable} table - Table instance
|
|
559
|
-
* @param {string} tableName - Table name
|
|
560
|
-
* @throws {Error} If table doesn't have project_path field
|
|
561
|
-
* @private
|
|
562
|
-
*/
|
|
563
|
-
async _validateTableHasProjectPath(table, tableName) {
|
|
564
|
-
try {
|
|
565
|
-
const currentSchema = await table.schema;
|
|
566
|
-
if (currentSchema && currentSchema.fields) {
|
|
567
|
-
const hasProjectPath = currentSchema.fields.some((field) => field.name === 'project_path');
|
|
568
|
-
if (!hasProjectPath) {
|
|
569
|
-
throw new Error(
|
|
570
|
-
`Table ${tableName} does not have project_path field. Cannot perform project-specific cleanup. Please regenerate embeddings to use the new schema with project isolation.`
|
|
571
|
-
);
|
|
572
|
-
}
|
|
573
|
-
console.log(chalk.green(`✓ Table ${tableName} has project_path field for proper isolation`));
|
|
574
|
-
} else {
|
|
575
|
-
console.log(chalk.yellow(`Table ${tableName} has no readable schema, skipping validation`));
|
|
576
|
-
}
|
|
577
|
-
} catch (schemaError) {
|
|
578
|
-
// If we can't read the schema, it might be because the table is empty or doesn't exist
|
|
579
|
-
// In this case, we should just warn and continue
|
|
580
|
-
console.log(chalk.yellow(`Warning: Could not validate schema for ${tableName}: ${schemaError.message}`));
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
/**
|
|
585
|
-
* Create FTS indexes for tables
|
|
586
|
-
* @param {Array} tableSpecs - Array of [table, tableName, contentField] tuples
|
|
587
|
-
* @private
|
|
588
|
-
*/
|
|
589
|
-
async _createFTSIndexes(tableSpecs) {
|
|
590
|
-
console.log(chalk.blue('Creating native FTS indexes...'));
|
|
591
|
-
|
|
592
|
-
for (const [table, tableName, contentField] of tableSpecs) {
|
|
593
|
-
try {
|
|
594
|
-
await table.createIndex(contentField, { config: lancedb.Index.fts(), replace: false });
|
|
595
|
-
console.log(chalk.green(`FTS index created/updated for ${tableName}`));
|
|
596
|
-
} catch (error) {
|
|
597
|
-
if (error.message.toLowerCase().includes('already exists')) {
|
|
598
|
-
console.log(chalk.green(`FTS index already exists for ${tableName}.`));
|
|
599
|
-
} else {
|
|
600
|
-
console.warn(chalk.yellow(`FTS index warning for ${tableName}: ${error.message}`));
|
|
601
|
-
}
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
/**
|
|
607
|
-
* Create vector indexes for tables
|
|
608
|
-
* @param {Array} tableSpecs - Array of [table, tableName, vectorField] tuples
|
|
609
|
-
* @private
|
|
610
|
-
*/
|
|
611
|
-
async _createVectorIndexes(tableSpecs) {
|
|
612
|
-
console.log(chalk.blue('Creating adaptive vector indexes...'));
|
|
613
|
-
|
|
614
|
-
const indexResults = [];
|
|
615
|
-
for (const [table, tableName, vectorField] of tableSpecs) {
|
|
616
|
-
const indexInfo = await this.createAdaptiveVectorIndexes(table, tableName, vectorField);
|
|
617
|
-
indexResults.push(indexInfo);
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
console.log(chalk.green(`Indexing complete - ${JSON.stringify(indexResults)}`));
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
/**
|
|
624
|
-
* Optimize tables to sync indices with data and prevent TakeExec panics
|
|
625
|
-
* @param {Array} tableSpecs - Array of [table, tableName] tuples
|
|
626
|
-
* @private
|
|
627
|
-
*/
|
|
628
|
-
async _optimizeTables(tableSpecs) {
|
|
629
|
-
console.log(chalk.blue('Optimizing tables to sync indices with data...'));
|
|
630
|
-
|
|
631
|
-
for (const [table, tableName] of tableSpecs) {
|
|
632
|
-
try {
|
|
633
|
-
console.log(chalk.blue(`Optimizing table: ${tableName}`));
|
|
634
|
-
await table.optimize();
|
|
635
|
-
console.log(chalk.green(`✓ Table ${tableName} optimized successfully`));
|
|
636
|
-
} catch (error) {
|
|
637
|
-
// Handle legacy FTS index upgrade issues in v0.22.2
|
|
638
|
-
if (error.message && error.message.includes('legacy format')) {
|
|
639
|
-
console.warn(
|
|
640
|
-
chalk.yellow(
|
|
641
|
-
`Skipping optimization for ${tableName} due to legacy index format - will be auto-upgraded during normal operations`
|
|
642
|
-
)
|
|
643
|
-
);
|
|
644
|
-
} else {
|
|
645
|
-
console.warn(chalk.yellow(`Warning: Failed to optimize table ${tableName}: ${error.message}`));
|
|
646
|
-
}
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
console.log(chalk.green('Table optimization complete'));
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
/**
|
|
654
|
-
* Clear records from a specific table for a project
|
|
655
|
-
* @param {LanceDBConnection} db - Database connection
|
|
656
|
-
* @param {string} tableName - Table name
|
|
657
|
-
* @param {string} resolvedProjectPath - Resolved project path
|
|
658
|
-
* @param {string} projectName - Project name
|
|
659
|
-
* @param {string} pathField - Path field name
|
|
660
|
-
* @returns {Promise<number>} Number of deleted records
|
|
661
|
-
* @private
|
|
662
|
-
*/
|
|
663
|
-
async _clearProjectTableRecords(db, tableName, resolvedProjectPath, projectName, pathField) {
|
|
664
|
-
const table = await db.openTable(tableName);
|
|
665
|
-
const allRecords = await table.query().toArray();
|
|
666
|
-
|
|
667
|
-
const projectRecords = allRecords.filter((record) => {
|
|
668
|
-
if (!record[pathField]) return false;
|
|
669
|
-
|
|
670
|
-
// Check for project-specific structure
|
|
671
|
-
if (record.id === `__project_structure__${projectName}` || record.id === '__project_structure__') {
|
|
672
|
-
return true;
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
// Check if this record belongs to the current project
|
|
676
|
-
try {
|
|
677
|
-
if (pathField === 'project_path') {
|
|
678
|
-
// For project_path field, do direct equality check
|
|
679
|
-
return record[pathField] === resolvedProjectPath;
|
|
680
|
-
} else {
|
|
681
|
-
// For other path fields (like 'path'), resolve relative to project path
|
|
682
|
-
const absolutePath = path.resolve(resolvedProjectPath, record[pathField]);
|
|
683
|
-
return absolutePath.startsWith(resolvedProjectPath);
|
|
684
|
-
}
|
|
685
|
-
} catch {
|
|
686
|
-
return false;
|
|
687
|
-
}
|
|
688
|
-
});
|
|
689
|
-
|
|
690
|
-
if (projectRecords.length > 0) {
|
|
691
|
-
console.log(chalk.blue(`Found ${projectRecords.length} ${tableName} records for this project`));
|
|
692
|
-
|
|
693
|
-
let deletedCount = 0;
|
|
694
|
-
for (const record of projectRecords) {
|
|
695
|
-
try {
|
|
696
|
-
await table.delete(`id = '${record.id.replace(/'/g, "''")}'`);
|
|
697
|
-
deletedCount++;
|
|
698
|
-
} catch (deleteError) {
|
|
699
|
-
console.warn(chalk.yellow(`Warning: Could not delete record ${record.id}: ${deleteError.message}`));
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
console.log(chalk.green(`Deleted ${deletedCount} ${tableName} records for this project`));
|
|
704
|
-
return deletedCount;
|
|
705
|
-
} else {
|
|
706
|
-
console.log(chalk.yellow(`No ${tableName} records found for this project`));
|
|
707
|
-
return 0;
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
/**
|
|
712
|
-
* Update the vector index for the PR comments table
|
|
713
|
-
* @returns {Promise<void>}
|
|
714
|
-
*/
|
|
715
|
-
async updatePRCommentsIndex() {
|
|
716
|
-
try {
|
|
717
|
-
const table = await this.getTable(this.prCommentsTable);
|
|
718
|
-
if (table) {
|
|
719
|
-
console.log(chalk.blue(`Updating vector index for ${this.prCommentsTable}...`));
|
|
720
|
-
await this.createAdaptiveVectorIndexes(table, this.prCommentsTable, 'combined_embedding');
|
|
721
|
-
|
|
722
|
-
// Optimize table to sync indices with data (conditional due to legacy index issues)
|
|
723
|
-
console.log(chalk.blue(`Optimizing ${this.prCommentsTable} table...`));
|
|
724
|
-
try {
|
|
725
|
-
await table.optimize();
|
|
726
|
-
} catch (optimizeError) {
|
|
727
|
-
if (optimizeError.message && optimizeError.message.includes('legacy format')) {
|
|
728
|
-
console.warn(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`));
|
|
729
|
-
} else {
|
|
730
|
-
throw optimizeError; // Re-throw non-legacy errors
|
|
731
|
-
}
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
console.log(chalk.green(`Vector index for ${this.prCommentsTable} updated and optimized.`));
|
|
735
|
-
}
|
|
736
|
-
} catch (error) {
|
|
737
|
-
console.error(chalk.red(`Error updating PR comments index: ${error.message}`));
|
|
738
|
-
throw createDatabaseError(`Failed to update PR comments index: ${error.message}`, ERROR_CODES.INDEX_UPDATE_ERROR, error);
|
|
739
|
-
}
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
/**
|
|
743
|
-
* Store project summary in database
|
|
744
|
-
* @param {string} projectPath - Project path
|
|
745
|
-
* @param {Object} projectSummary - Project analysis summary
|
|
746
|
-
* @returns {Promise<boolean>} Success status
|
|
747
|
-
*/
|
|
748
|
-
async storeProjectSummary(projectPath, projectSummary) {
|
|
749
|
-
try {
|
|
750
|
-
const resolvedProjectPath = path.resolve(projectPath);
|
|
751
|
-
const projectName = path.basename(resolvedProjectPath);
|
|
752
|
-
|
|
753
|
-
// Get database connection
|
|
754
|
-
const db = await this.getDBConnection();
|
|
755
|
-
const tableNames = await db.tableNames();
|
|
756
|
-
|
|
757
|
-
// Create project summaries table if it doesn't exist
|
|
758
|
-
if (!tableNames.includes(PROJECT_SUMMARIES_TABLE)) {
|
|
759
|
-
await this._createProjectSummariesTable(db);
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
const table = await db.openTable(PROJECT_SUMMARIES_TABLE);
|
|
763
|
-
|
|
764
|
-
// Prepare the record
|
|
765
|
-
const record = {
|
|
766
|
-
id: `project_summary_${projectName}_${Date.now()}`,
|
|
767
|
-
project_path: resolvedProjectPath,
|
|
768
|
-
project_name: projectName,
|
|
769
|
-
summary: JSON.stringify(projectSummary),
|
|
770
|
-
created_at: new Date().toISOString(),
|
|
771
|
-
last_updated: new Date().toISOString(),
|
|
772
|
-
};
|
|
773
|
-
|
|
774
|
-
// Remove any existing summary for this project first
|
|
775
|
-
try {
|
|
776
|
-
const existingRecords = await table
|
|
777
|
-
.query()
|
|
778
|
-
.where(`project_path = '${resolvedProjectPath.replace(/'/g, "''")}'`)
|
|
779
|
-
.toArray();
|
|
780
|
-
|
|
781
|
-
for (const existing of existingRecords) {
|
|
782
|
-
await table.delete(`id = '${existing.id.replace(/'/g, "''")}'`);
|
|
783
|
-
}
|
|
784
|
-
} catch {
|
|
785
|
-
// Continue if no existing records found
|
|
786
|
-
}
|
|
787
|
-
|
|
788
|
-
// Add the new record
|
|
789
|
-
await table.add([record]);
|
|
790
|
-
|
|
791
|
-
// Optimize table to sync indices with data (conditional due to legacy index issues)
|
|
792
|
-
try {
|
|
793
|
-
await table.optimize();
|
|
794
|
-
console.log(chalk.blue(`✓ Project summaries table optimized`));
|
|
795
|
-
} catch (optimizeError) {
|
|
796
|
-
if (optimizeError.message && optimizeError.message.includes('legacy format')) {
|
|
797
|
-
console.warn(chalk.yellow(`Skipping optimization due to legacy index format - will be auto-upgraded during normal operations`));
|
|
798
|
-
} else {
|
|
799
|
-
console.warn(chalk.yellow(`Warning: Failed to optimize project summaries table: ${optimizeError.message}`));
|
|
800
|
-
}
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
console.log(chalk.green(`✅ Project summary stored for: ${resolvedProjectPath}`));
|
|
804
|
-
return true;
|
|
805
|
-
} catch (error) {
|
|
806
|
-
console.error(chalk.red(`Error storing project summary: ${error.message}`));
|
|
807
|
-
throw createDatabaseError(`Failed to store project summary: ${error.message}`, ERROR_CODES.STORAGE_ERROR, error);
|
|
808
|
-
}
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
/**
|
|
812
|
-
* Get stored project summary from database
|
|
813
|
-
* @param {string} projectPath - Project path
|
|
814
|
-
* @returns {Promise<Object|null>} Project summary or null if not found
|
|
815
|
-
*/
|
|
816
|
-
async getProjectSummary(projectPath) {
|
|
817
|
-
try {
|
|
818
|
-
const resolvedProjectPath = path.resolve(projectPath);
|
|
819
|
-
|
|
820
|
-
// Get database connection
|
|
821
|
-
const db = await this.getDBConnection();
|
|
822
|
-
const tableNames = await db.tableNames();
|
|
823
|
-
|
|
824
|
-
// Check if table exists
|
|
825
|
-
if (!tableNames.includes(PROJECT_SUMMARIES_TABLE)) {
|
|
826
|
-
return null;
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
const table = await db.openTable(PROJECT_SUMMARIES_TABLE);
|
|
830
|
-
|
|
831
|
-
// Query for the project summary
|
|
832
|
-
const records = await table
|
|
833
|
-
.query()
|
|
834
|
-
.where(`project_path = '${resolvedProjectPath.replace(/'/g, "''")}'`)
|
|
835
|
-
.toArray();
|
|
836
|
-
|
|
837
|
-
if (records.length === 0) {
|
|
838
|
-
return null;
|
|
839
|
-
}
|
|
840
|
-
|
|
841
|
-
// Get the most recent record (in case there are duplicates)
|
|
842
|
-
const latestRecord = records.sort((a, b) => new Date(b.last_updated).getTime() - new Date(a.last_updated).getTime())[0];
|
|
843
|
-
|
|
844
|
-
// Parse and return the summary
|
|
845
|
-
const summary = JSON.parse(latestRecord.summary);
|
|
846
|
-
summary._metadata = {
|
|
847
|
-
created_at: latestRecord.created_at,
|
|
848
|
-
last_updated: latestRecord.last_updated,
|
|
849
|
-
project_name: latestRecord.project_name,
|
|
850
|
-
};
|
|
851
|
-
|
|
852
|
-
return summary;
|
|
853
|
-
} catch (error) {
|
|
854
|
-
console.error(chalk.red(`Error retrieving project summary: ${error.message}`));
|
|
855
|
-
return null; // Return null instead of throwing to allow graceful fallback
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
/**
|
|
860
|
-
* Optimize tables after cleanup operations
|
|
861
|
-
* @param {LanceDBConnection} db - Database connection
|
|
862
|
-
* @param {Array<string>} availableTableNames - Available table names
|
|
863
|
-
* @private
|
|
864
|
-
*/
|
|
865
|
-
async _optimizeTablesAfterCleanup(db, availableTableNames) {
|
|
866
|
-
console.log(chalk.blue('Optimizing tables after cleanup...'));
|
|
867
|
-
|
|
868
|
-
const tablesToOptimize = [
|
|
869
|
-
{ name: this.fileEmbeddingsTable, displayName: 'File embeddings' },
|
|
870
|
-
{ name: this.documentChunkTable, displayName: 'Document chunks' },
|
|
871
|
-
{ name: this.prCommentsTable, displayName: 'PR comments' },
|
|
872
|
-
{ name: PROJECT_SUMMARIES_TABLE, displayName: 'Project summaries' },
|
|
873
|
-
];
|
|
874
|
-
|
|
875
|
-
for (const { name, displayName } of tablesToOptimize) {
|
|
876
|
-
if (availableTableNames.includes(name)) {
|
|
877
|
-
try {
|
|
878
|
-
const table = await db.openTable(name);
|
|
879
|
-
console.log(chalk.blue(`Optimizing ${displayName} table...`));
|
|
880
|
-
await table.optimize();
|
|
881
|
-
console.log(chalk.green(`✓ ${displayName} table optimized`));
|
|
882
|
-
} catch (error) {
|
|
883
|
-
if (error.message && error.message.includes('legacy format')) {
|
|
884
|
-
console.warn(
|
|
885
|
-
chalk.yellow(
|
|
886
|
-
`Skipping optimization for ${displayName} due to legacy index format - will be auto-upgraded during normal operations`
|
|
887
|
-
)
|
|
888
|
-
);
|
|
889
|
-
} else {
|
|
890
|
-
console.warn(chalk.yellow(`Warning: Failed to optimize ${displayName} table: ${error.message}`));
|
|
891
|
-
}
|
|
892
|
-
}
|
|
893
|
-
}
|
|
894
|
-
}
|
|
895
|
-
|
|
896
|
-
console.log(chalk.green('Post-cleanup table optimization complete'));
|
|
897
|
-
}
|
|
898
|
-
|
|
899
|
-
/**
|
|
900
|
-
* Create project summaries table
|
|
901
|
-
* @param {LanceDBConnection} db - Database connection
|
|
902
|
-
* @returns {Promise<void>}
|
|
903
|
-
* @private
|
|
904
|
-
*/
|
|
905
|
-
async _createProjectSummariesTable(db) {
|
|
906
|
-
console.log(chalk.blue('Creating project summaries table...'));
|
|
907
|
-
|
|
908
|
-
const schema = new Schema([
|
|
909
|
-
new Field('id', new Utf8()),
|
|
910
|
-
new Field('project_path', new Utf8()),
|
|
911
|
-
new Field('project_name', new Utf8()),
|
|
912
|
-
new Field('summary', new Utf8()),
|
|
913
|
-
new Field('created_at', new Utf8()),
|
|
914
|
-
new Field('last_updated', new Utf8()),
|
|
915
|
-
]);
|
|
916
|
-
|
|
917
|
-
// Create table with empty initial data
|
|
918
|
-
await db.createEmptyTable(PROJECT_SUMMARIES_TABLE, schema);
|
|
919
|
-
console.log(chalk.green(`✅ Project summaries table created: ${PROJECT_SUMMARIES_TABLE}`));
|
|
920
|
-
}
|
|
921
|
-
}
|