@optiqcode/cli 2.9.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/index.d.ts +2 -0
- package/dist/commands/index.js +136 -48
- package/dist/index.js +234 -101
- package/dist/utils/files.js +19 -1
- package/package.json +1 -1
package/dist/commands/index.d.ts
CHANGED
package/dist/commands/index.js
CHANGED
|
@@ -49,18 +49,29 @@ export async function index(options) {
|
|
|
49
49
|
console.log(chalk.yellow('\n⚠️ Indexing cancelled'));
|
|
50
50
|
return;
|
|
51
51
|
}
|
|
52
|
+
// Debug mode: show ignore patterns
|
|
53
|
+
if (options.debug) {
|
|
54
|
+
const ignorePatterns = await getGitIgnorePatterns(targetPath);
|
|
55
|
+
console.log(chalk.blue('\n📋 Ignore patterns being used:'));
|
|
56
|
+
console.log(chalk.dim(ignorePatterns.slice(0, 30).join('\n')));
|
|
57
|
+
if (ignorePatterns.length > 30) {
|
|
58
|
+
console.log(chalk.dim(`... and ${ignorePatterns.length - 30} more`));
|
|
59
|
+
}
|
|
60
|
+
console.log('');
|
|
61
|
+
}
|
|
52
62
|
const spinner = ora('Collecting files...').start();
|
|
53
63
|
try {
|
|
54
64
|
// Collect all files
|
|
55
|
-
const files = await collectFiles(targetPath);
|
|
65
|
+
const files = await collectFiles(targetPath, options.verbose || options.debug);
|
|
56
66
|
spinner.text = `Reading ${files.length} files...`;
|
|
57
67
|
// Read file contents
|
|
58
68
|
const fileContents = [];
|
|
69
|
+
let skippedFiles = 0;
|
|
59
70
|
for (const file of files) {
|
|
60
71
|
try {
|
|
61
72
|
const content = await fs.readFile(file, 'utf-8');
|
|
62
73
|
const relativePath = path.relative(targetPath, file);
|
|
63
|
-
//
|
|
74
|
+
// Normalize path separators to forward slashes for cross-platform consistency
|
|
64
75
|
const normalizedPath = relativePath.replace(/\\/g, '/');
|
|
65
76
|
fileContents.push({
|
|
66
77
|
path: normalizedPath,
|
|
@@ -68,63 +79,123 @@ export async function index(options) {
|
|
|
68
79
|
});
|
|
69
80
|
}
|
|
70
81
|
catch (error) {
|
|
71
|
-
|
|
82
|
+
skippedFiles++;
|
|
83
|
+
// Skip files that can't be read (binary, permissions, etc.)
|
|
72
84
|
}
|
|
73
85
|
}
|
|
86
|
+
if (skippedFiles > 0) {
|
|
87
|
+
console.log(chalk.dim(`\n Skipped ${skippedFiles} unreadable files`));
|
|
88
|
+
}
|
|
74
89
|
spinner.text = 'Indexing with Optiq Engine...';
|
|
75
90
|
// Generate repository ID from path
|
|
76
91
|
const repoId = generateRepoId(targetPath);
|
|
77
|
-
|
|
78
|
-
|
|
92
|
+
console.log(chalk.blue('\n📊 Repository ID:'), chalk.bold(repoId));
|
|
93
|
+
console.log(chalk.dim(` Indexing ${fileContents.length} files...\n`));
|
|
94
|
+
// PARALLEL STREAMING: Send multiple batches concurrently for maximum speed
|
|
95
|
+
const BATCH_SIZE = 40; // Files per batch
|
|
96
|
+
const CONCURRENT_BATCHES = 4; // Number of batches to send in parallel
|
|
97
|
+
const MAX_RETRIES = 3;
|
|
98
|
+
const RETRY_DELAY_MS = 2000;
|
|
99
|
+
const TIMEOUT_MS = 180000; // 3 minutes timeout
|
|
79
100
|
let uploadedCount = 0;
|
|
80
101
|
let totalChunksCreated = 0;
|
|
102
|
+
let failedBatches = [];
|
|
103
|
+
const totalBatches = Math.ceil(fileContents.length / BATCH_SIZE);
|
|
104
|
+
// Create all batches upfront
|
|
105
|
+
const batches = [];
|
|
81
106
|
for (let i = 0; i < fileContents.length; i += BATCH_SIZE) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
107
|
+
batches.push({
|
|
108
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
109
|
+
files: fileContents.slice(i, i + BATCH_SIZE),
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
// Process a single batch with retries
|
|
113
|
+
async function processBatch(batch) {
|
|
114
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
90
115
|
try {
|
|
91
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
116
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
92
117
|
repository_id: repoId,
|
|
93
118
|
branch: 'main',
|
|
94
|
-
files: batch,
|
|
119
|
+
files: batch.files,
|
|
95
120
|
}, {
|
|
96
|
-
headers: {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
121
|
+
headers: { 'Content-Type': 'application/json' },
|
|
122
|
+
timeout: TIMEOUT_MS,
|
|
123
|
+
maxContentLength: Infinity,
|
|
124
|
+
maxBodyLength: Infinity,
|
|
100
125
|
});
|
|
101
|
-
if (
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
126
|
+
if (response.data.success) {
|
|
127
|
+
return {
|
|
128
|
+
success: true,
|
|
129
|
+
batchNum: batch.batchNum,
|
|
130
|
+
chunksCreated: response.data.result?.chunks_created || 0,
|
|
131
|
+
filesCount: batch.files.length,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
const errorMsg = response.data.result?.errors?.join(', ') || 'Unknown API error';
|
|
136
|
+
if (retry < MAX_RETRIES - 1) {
|
|
137
|
+
await sleep(RETRY_DELAY_MS * (retry + 1));
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: errorMsg };
|
|
141
|
+
}
|
|
105
142
|
}
|
|
106
|
-
totalChunksCreated += response.data.result?.chunks_created || 0;
|
|
107
|
-
uploadedCount += batch.length;
|
|
108
|
-
break;
|
|
109
143
|
}
|
|
110
144
|
catch (err) {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
145
|
+
if (retry < MAX_RETRIES - 1) {
|
|
146
|
+
await sleep(RETRY_DELAY_MS * (retry + 1));
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
const errorMsg = err.response?.data?.error || err.message || 'Unknown error';
|
|
150
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: errorMsg };
|
|
116
151
|
}
|
|
117
152
|
}
|
|
118
153
|
}
|
|
119
|
-
|
|
120
|
-
|
|
154
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: 'Max retries exceeded' };
|
|
155
|
+
}
|
|
156
|
+
// Process batches in parallel waves
|
|
157
|
+
let completedBatches = 0;
|
|
158
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
159
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
160
|
+
spinner.text = `Indexing batches ${i + 1}-${Math.min(i + CONCURRENT_BATCHES, batches.length)}/${totalBatches} (${CONCURRENT_BATCHES} concurrent)...`;
|
|
161
|
+
// Fire all batches in this wave concurrently
|
|
162
|
+
const results = await Promise.all(wave.map(processBatch));
|
|
163
|
+
// Process results
|
|
164
|
+
for (const result of results) {
|
|
165
|
+
completedBatches++;
|
|
166
|
+
if (result.success) {
|
|
167
|
+
uploadedCount += result.filesCount;
|
|
168
|
+
totalChunksCreated += result.chunksCreated;
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
failedBatches.push(result.batchNum);
|
|
172
|
+
console.log(chalk.yellow(`\n⚠️ Batch ${result.batchNum} failed: ${result.error}`));
|
|
173
|
+
}
|
|
121
174
|
}
|
|
175
|
+
spinner.text = `Indexed ${completedBatches}/${totalBatches} batches (${uploadedCount} files, ${totalChunksCreated} chunks)...`;
|
|
176
|
+
}
|
|
177
|
+
// Summary
|
|
178
|
+
spinner.stop();
|
|
179
|
+
if (failedBatches.length === 0) {
|
|
180
|
+
console.log(chalk.green('\n✓ Indexing complete!'));
|
|
181
|
+
}
|
|
182
|
+
else if (failedBatches.length < totalBatches) {
|
|
183
|
+
console.log(chalk.yellow(`\n⚠️ Indexing partially complete (${failedBatches.length}/${totalBatches} batches failed)`));
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
console.log(chalk.red('\n✗ Indexing failed - all batches failed'));
|
|
187
|
+
process.exit(1);
|
|
122
188
|
}
|
|
123
|
-
spinner.succeed(chalk.green('✓ Indexing complete'));
|
|
124
189
|
console.log(chalk.blue('📊 Repository ID:'), chalk.bold(repoId));
|
|
125
|
-
console.log(chalk.blue('📁 Files indexed:'), chalk.bold(uploadedCount));
|
|
190
|
+
console.log(chalk.blue('📁 Files indexed:'), chalk.bold(uploadedCount), chalk.dim(`/ ${fileContents.length}`));
|
|
126
191
|
console.log(chalk.blue('📝 Chunks created:'), chalk.bold(totalChunksCreated));
|
|
192
|
+
if (failedBatches.length > 0) {
|
|
193
|
+
console.log(chalk.yellow('⚠️ Failed batches:'), chalk.bold(failedBatches.join(', ')));
|
|
194
|
+
console.log(chalk.dim('\nTip: Run the index command again to retry failed files.'));
|
|
195
|
+
}
|
|
127
196
|
console.log(chalk.dim('\nUse this repo_id for searches'));
|
|
197
|
+
// Exit successfully - prevents hanging due to open handles from prompts/axios
|
|
198
|
+
process.exit(0);
|
|
128
199
|
}
|
|
129
200
|
catch (error) {
|
|
130
201
|
spinner.fail(chalk.red('✗ Indexing failed'));
|
|
@@ -134,27 +205,44 @@ export async function index(options) {
|
|
|
134
205
|
else {
|
|
135
206
|
console.log(chalk.red(error.message));
|
|
136
207
|
}
|
|
208
|
+
// Exit with error code
|
|
209
|
+
process.exit(1);
|
|
137
210
|
}
|
|
138
211
|
}
|
|
139
|
-
|
|
212
|
+
function sleep(ms) {
|
|
213
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
214
|
+
}
|
|
215
|
+
async function collectFiles(dir, verbose = false) {
|
|
140
216
|
const files = [];
|
|
141
217
|
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
218
|
+
const ignoredDirs = [];
|
|
142
219
|
async function walk(currentPath) {
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
220
|
+
try {
|
|
221
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
222
|
+
for (const entry of entries) {
|
|
223
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
224
|
+
const relativePath = path.relative(dir, fullPath);
|
|
225
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
226
|
+
if (verbose && entry.isDirectory()) {
|
|
227
|
+
ignoredDirs.push(relativePath);
|
|
228
|
+
}
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
if (entry.isDirectory()) {
|
|
232
|
+
await walk(fullPath);
|
|
233
|
+
}
|
|
234
|
+
else if (entry.isFile()) {
|
|
235
|
+
files.push(fullPath);
|
|
236
|
+
}
|
|
155
237
|
}
|
|
156
238
|
}
|
|
239
|
+
catch (error) {
|
|
240
|
+
// Skip directories we can't read (permissions, etc.)
|
|
241
|
+
}
|
|
157
242
|
}
|
|
158
243
|
await walk(dir);
|
|
244
|
+
if (verbose && ignoredDirs.length > 0) {
|
|
245
|
+
console.log(chalk.dim(`\n Ignored directories: ${ignoredDirs.slice(0, 20).join(', ')}${ignoredDirs.length > 20 ? '...' : ''}`));
|
|
246
|
+
}
|
|
159
247
|
return files;
|
|
160
248
|
}
|
package/dist/index.js
CHANGED
|
@@ -10,8 +10,8 @@ import { getConfig, saveConfig } from './utils/config.js';
|
|
|
10
10
|
import { isValidDirectory, getGitIgnorePatterns, shouldIgnoreFile } from './utils/files.js';
|
|
11
11
|
const BACKEND_URL = process.env.OPTIQ_BACKEND_URL || 'https://api.optiqcode.com';
|
|
12
12
|
const ENGINE_URL = process.env.OPTIQ_ENGINE_URL || 'http://25.36.113.3:3002';
|
|
13
|
-
// Debug mode - set OPTIQ_DEBUG=1 to see request details
|
|
14
|
-
const DEBUG = process.env.OPTIQ_DEBUG === '1';
|
|
13
|
+
// Debug mode - set OPTIQ_DEBUG=1 or OPTIQ_DEBUG=true to see request details
|
|
14
|
+
const DEBUG = process.env.OPTIQ_DEBUG === '1' || process.env.OPTIQ_DEBUG === 'true' || process.env.OPTIQ_DEBUG?.toLowerCase() === 'true';
|
|
15
15
|
// Helper to generate repository ID from path
|
|
16
16
|
function generateRepoId(targetPath) {
|
|
17
17
|
// Use path basename + hash of full path for uniqueness
|
|
@@ -267,10 +267,13 @@ async function main() {
|
|
|
267
267
|
}
|
|
268
268
|
}
|
|
269
269
|
async function indexOnce(targetPath, config) {
|
|
270
|
+
console.log(chalk.cyan('\n🚀 Starting index...\n'));
|
|
270
271
|
const spinner = ora({ text: 'Collecting files...', color: 'cyan' }).start();
|
|
271
272
|
try {
|
|
273
|
+
spinner.stop();
|
|
272
274
|
const files = await collectFiles(targetPath);
|
|
273
|
-
|
|
275
|
+
console.log(chalk.cyan(`\n📁 Collected ${files.length} files to index\n`));
|
|
276
|
+
spinner.start(`Reading ${files.length} files...`);
|
|
274
277
|
// Read files in parallel (100 at a time)
|
|
275
278
|
const PARALLEL_READS = 100;
|
|
276
279
|
const MAX_FILE_SIZE = 100_000; // 100KB max per file
|
|
@@ -302,7 +305,7 @@ async function indexOnce(targetPath, config) {
|
|
|
302
305
|
const BATCH_SIZE = 50;
|
|
303
306
|
let totalChunks = 0;
|
|
304
307
|
if (DEBUG) {
|
|
305
|
-
console.log(chalk.gray(`\n[DEBUG] Sending to: ${ENGINE_URL}/api/v1/
|
|
308
|
+
console.log(chalk.gray(`\n[DEBUG] Sending to: ${ENGINE_URL}/api/v1/index_simple`));
|
|
306
309
|
console.log(chalk.gray(`[DEBUG] Repo ID: ${repoId}`));
|
|
307
310
|
console.log(chalk.gray(`[DEBUG] Total files: ${filesArray.length}`));
|
|
308
311
|
}
|
|
@@ -311,75 +314,102 @@ async function indexOnce(targetPath, config) {
|
|
|
311
314
|
if (DEBUG) {
|
|
312
315
|
console.log(chalk.gray(`\n[DEBUG] Total files: ${filesArray.length}, Batch size: ${BATCH_SIZE}, Total batches: ${totalBatches}`));
|
|
313
316
|
}
|
|
317
|
+
// PARALLEL BATCH PROCESSING - send multiple batches concurrently
|
|
318
|
+
const CONCURRENT_BATCHES = 4;
|
|
319
|
+
const MAX_RETRIES = 3;
|
|
320
|
+
const RETRY_DELAY_MS = 2000;
|
|
321
|
+
// Create all batches upfront
|
|
322
|
+
const batches = [];
|
|
314
323
|
for (let i = 0; i < filesArray.length; i += BATCH_SIZE) {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
let
|
|
326
|
-
while (retries > 0) {
|
|
324
|
+
batches.push({
|
|
325
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
326
|
+
files: filesArray.slice(i, i + BATCH_SIZE),
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
if (DEBUG) {
|
|
330
|
+
console.log(chalk.gray(`\n[DEBUG] Created ${batches.length} batches, processing ${CONCURRENT_BATCHES} concurrently`));
|
|
331
|
+
}
|
|
332
|
+
// Process a single batch with retries
|
|
333
|
+
async function processBatch(batch) {
|
|
334
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
327
335
|
try {
|
|
328
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
336
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
329
337
|
repository_id: repoId,
|
|
330
338
|
branch: 'main',
|
|
331
|
-
files: batch,
|
|
339
|
+
files: batch.files,
|
|
332
340
|
}, {
|
|
333
|
-
headers: {
|
|
334
|
-
|
|
335
|
-
},
|
|
336
|
-
timeout: 300000, // 5 minutes per batch
|
|
341
|
+
headers: { 'Content-Type': 'application/json' },
|
|
342
|
+
timeout: 300000,
|
|
337
343
|
});
|
|
338
|
-
if (
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
return;
|
|
344
|
+
if (response.data.success) {
|
|
345
|
+
return {
|
|
346
|
+
success: true,
|
|
347
|
+
batchNum: batch.batchNum,
|
|
348
|
+
chunksCreated: response.data.result?.chunks_created || 0,
|
|
349
|
+
};
|
|
345
350
|
}
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
351
|
+
else {
|
|
352
|
+
const errorMsg = response.data.result?.errors?.join(', ') || 'Unknown API error';
|
|
353
|
+
if (retry < MAX_RETRIES - 1) {
|
|
354
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
355
|
+
}
|
|
356
|
+
else {
|
|
357
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: errorMsg };
|
|
358
|
+
}
|
|
349
359
|
}
|
|
350
|
-
break;
|
|
351
360
|
}
|
|
352
361
|
catch (err) {
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
// Stop spinner to show error clearly
|
|
356
|
-
spinner.stop();
|
|
357
|
-
console.log(chalk.red(`\n✗ Batch ${batchNum} failed: ${err.code || err.message}`));
|
|
358
|
-
console.log(chalk.yellow(` Files: ${batch.map(f => f.path).join(', ')}`));
|
|
359
|
-
console.log(chalk.gray(` Sizes: ${batch.map(f => `${f.path}(${Math.round(f.content.length / 1024)}KB)`).join(', ')}`));
|
|
360
|
-
if (DEBUG && err.response) {
|
|
361
|
-
console.log(chalk.gray(`[DEBUG] Response status: ${err.response.status}`));
|
|
362
|
-
console.log(chalk.gray(`[DEBUG] Response data: ${JSON.stringify(err.response.data)}`));
|
|
362
|
+
if (retry < MAX_RETRIES - 1) {
|
|
363
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
363
364
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
spinner.start(`Retrying batch ${batchNum}...`);
|
|
365
|
+
else {
|
|
366
|
+
const errorMsg = err.response?.data?.error || err.message || 'Unknown error';
|
|
367
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: errorMsg };
|
|
368
368
|
}
|
|
369
369
|
}
|
|
370
370
|
}
|
|
371
|
-
|
|
372
|
-
|
|
371
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: 'Max retries exceeded' };
|
|
372
|
+
}
|
|
373
|
+
// Process batches in parallel waves
|
|
374
|
+
let completedBatches = 0;
|
|
375
|
+
let failedBatches = [];
|
|
376
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
377
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
378
|
+
const waveEnd = Math.min(i + CONCURRENT_BATCHES, batches.length);
|
|
379
|
+
spinner.text = `Indexing batches ${i + 1}-${waveEnd}/${totalBatches} (${CONCURRENT_BATCHES} concurrent)...`;
|
|
380
|
+
if (DEBUG) {
|
|
381
|
+
console.log(chalk.gray(`\n[DEBUG] Starting wave: batches ${i + 1}-${waveEnd}`));
|
|
382
|
+
}
|
|
383
|
+
// Fire all batches in this wave concurrently
|
|
384
|
+
const results = await Promise.all(wave.map(processBatch));
|
|
385
|
+
// Process results
|
|
386
|
+
spinner.stop();
|
|
387
|
+
for (const result of results) {
|
|
388
|
+
completedBatches++;
|
|
389
|
+
if (result.success) {
|
|
390
|
+
totalChunks += result.chunksCreated;
|
|
391
|
+
console.log(chalk.green(`✓ Batch ${result.batchNum}/${totalBatches}: ${result.chunksCreated} chunks`));
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
failedBatches.push(result.batchNum);
|
|
395
|
+
console.log(chalk.red(`✗ Batch ${result.batchNum}/${totalBatches} failed: ${result.error}`));
|
|
396
|
+
}
|
|
373
397
|
}
|
|
398
|
+
spinner.start(`Indexed ${completedBatches}/${totalBatches} batches (${totalChunks} chunks)...`);
|
|
374
399
|
}
|
|
375
400
|
if (DEBUG) {
|
|
376
401
|
console.log(chalk.gray(`\n[DEBUG] All ${totalBatches} batches complete. Total chunks: ${totalChunks}`));
|
|
377
402
|
}
|
|
403
|
+
if (failedBatches.length > 0) {
|
|
404
|
+
console.log(chalk.yellow(`\n⚠️ ${failedBatches.length} batches failed: ${failedBatches.join(', ')}`));
|
|
405
|
+
}
|
|
378
406
|
spinner.succeed(chalk.cyan('Indexed'));
|
|
379
407
|
console.log(chalk.gray(` ${filesArray.length} files • ${totalChunks} chunks`));
|
|
380
408
|
console.log(chalk.cyan('\n📊 Repository ID:'));
|
|
381
409
|
console.log(chalk.white(` ${repoId}`));
|
|
382
410
|
console.log(chalk.gray('\n Use this ID for searches\n'));
|
|
411
|
+
// Exit successfully - prevents hanging due to open handles from prompts/axios
|
|
412
|
+
process.exit(0);
|
|
383
413
|
}
|
|
384
414
|
catch (error) {
|
|
385
415
|
spinner.fail(chalk.red('Failed'));
|
|
@@ -389,6 +419,8 @@ async function indexOnce(targetPath, config) {
|
|
|
389
419
|
else {
|
|
390
420
|
console.log(chalk.gray(' ' + error.message));
|
|
391
421
|
}
|
|
422
|
+
// Exit with error code
|
|
423
|
+
process.exit(1);
|
|
392
424
|
}
|
|
393
425
|
}
|
|
394
426
|
async function watchDirectory(targetPath, config) {
|
|
@@ -425,52 +457,51 @@ async function watchDirectory(targetPath, config) {
|
|
|
425
457
|
if (DEBUG) {
|
|
426
458
|
console.log(chalk.gray(`\n[DEBUG] Watch initial index: ${filesArray.length} files, ${totalBatches} batches`));
|
|
427
459
|
}
|
|
460
|
+
// PARALLEL BATCH PROCESSING for initial watch index
|
|
461
|
+
const CONCURRENT_BATCHES = 4;
|
|
462
|
+
const MAX_RETRIES = 3;
|
|
463
|
+
const RETRY_DELAY_MS = 2000;
|
|
464
|
+
// Create all batches
|
|
465
|
+
const batches = [];
|
|
428
466
|
for (let i = 0; i < filesArray.length; i += BATCH_SIZE) {
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
467
|
+
batches.push({
|
|
468
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
469
|
+
files: filesArray.slice(i, i + BATCH_SIZE),
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
if (DEBUG) {
|
|
473
|
+
console.log(chalk.gray(`\n[DEBUG] Watch initial: ${batches.length} batches, ${CONCURRENT_BATCHES} concurrent`));
|
|
474
|
+
}
|
|
475
|
+
// Process a single batch with retries
|
|
476
|
+
async function processWatchBatch(batch) {
|
|
477
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
439
478
|
try {
|
|
440
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
headers: {
|
|
446
|
-
'Content-Type': 'application/json',
|
|
447
|
-
},
|
|
448
|
-
timeout: 300000, // 5 minutes per batch
|
|
449
|
-
});
|
|
450
|
-
if (DEBUG) {
|
|
451
|
-
console.log(chalk.gray(`[DEBUG] Watch batch ${batchNum} response: success=${response.data.success}, chunks=${response.data.result?.chunks_created}`));
|
|
452
|
-
}
|
|
453
|
-
if (!response.data.success) {
|
|
454
|
-
spinner.fail(chalk.red('Failed'));
|
|
455
|
-
console.log(chalk.gray(response.data.result?.errors?.join('\n') || 'Unknown error'));
|
|
456
|
-
return;
|
|
457
|
-
}
|
|
458
|
-
break;
|
|
479
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, { repository_id: repoId, branch: 'main', files: batch.files }, { headers: { 'Content-Type': 'application/json' }, timeout: 300000 });
|
|
480
|
+
if (response.data.success)
|
|
481
|
+
return true;
|
|
482
|
+
if (retry < MAX_RETRIES - 1)
|
|
483
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
459
484
|
}
|
|
460
485
|
catch (err) {
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
if (
|
|
464
|
-
|
|
465
|
-
}
|
|
466
|
-
if (retries > 0) {
|
|
467
|
-
spinner.text = `Retrying batch ${batchNum}... (${retries} left)`;
|
|
468
|
-
await new Promise(r => setTimeout(r, 2000));
|
|
469
|
-
}
|
|
486
|
+
if (DEBUG)
|
|
487
|
+
console.log(chalk.gray(`[DEBUG] Watch batch ${batch.batchNum} error: ${err.message}`));
|
|
488
|
+
if (retry < MAX_RETRIES - 1)
|
|
489
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
470
490
|
}
|
|
471
491
|
}
|
|
472
|
-
|
|
473
|
-
|
|
492
|
+
return false;
|
|
493
|
+
}
|
|
494
|
+
// Process in parallel waves
|
|
495
|
+
let completedBatches = 0;
|
|
496
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
497
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
498
|
+
spinner.text = `Indexing... ${Math.min(i + CONCURRENT_BATCHES, batches.length)}/${totalBatches} batches`;
|
|
499
|
+
const results = await Promise.all(wave.map(processWatchBatch));
|
|
500
|
+
completedBatches += results.length;
|
|
501
|
+
const failures = results.filter(r => !r).length;
|
|
502
|
+
if (failures > 0) {
|
|
503
|
+
spinner.fail(chalk.red(`Failed (${failures} batches failed)`));
|
|
504
|
+
return;
|
|
474
505
|
}
|
|
475
506
|
}
|
|
476
507
|
if (DEBUG) {
|
|
@@ -589,10 +620,10 @@ async function watchDirectory(targetPath, config) {
|
|
|
589
620
|
}
|
|
590
621
|
// Filter out deleted files (content: null) - send only files with content
|
|
591
622
|
const filesToIndex = filesArray.filter(f => f.content !== null);
|
|
592
|
-
// For incremental updates, use the
|
|
623
|
+
// For incremental updates, use the simplified index endpoint
|
|
593
624
|
// The Rust engine handles upserts automatically
|
|
594
625
|
if (filesToIndex.length > 0) {
|
|
595
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
626
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
596
627
|
repository_id: repoId,
|
|
597
628
|
branch: 'main',
|
|
598
629
|
files: filesToIndex,
|
|
@@ -689,23 +720,125 @@ async function watchDirectory(targetPath, config) {
|
|
|
689
720
|
async function collectFiles(dir) {
|
|
690
721
|
const files = [];
|
|
691
722
|
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
723
|
+
const ignoredItems = [];
|
|
724
|
+
// Always log ignore patterns for debugging
|
|
725
|
+
console.log(chalk.gray('\n📋 Checking ignore patterns...'));
|
|
692
726
|
async function walk(currentPath) {
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
const
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
727
|
+
try {
|
|
728
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
729
|
+
for (const entry of entries) {
|
|
730
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
731
|
+
const relativePath = path.relative(dir, fullPath);
|
|
732
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
733
|
+
// Log ignored directories (especially admin!)
|
|
734
|
+
if (entry.isDirectory()) {
|
|
735
|
+
ignoredItems.push(`DIR: ${relativePath}`);
|
|
736
|
+
// Special check for admin
|
|
737
|
+
if (relativePath.includes('admin')) {
|
|
738
|
+
console.log(chalk.yellow(`⚠️ IGNORING admin directory: ${relativePath}`));
|
|
739
|
+
// Find which pattern matched
|
|
740
|
+
for (const pattern of ignorePatterns) {
|
|
741
|
+
if (relativePath.includes(pattern) || pattern.includes('admin')) {
|
|
742
|
+
console.log(chalk.yellow(` Matched pattern: "${pattern}"`));
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
continue;
|
|
748
|
+
}
|
|
749
|
+
if (entry.isDirectory()) {
|
|
750
|
+
await walk(fullPath);
|
|
751
|
+
}
|
|
752
|
+
else if (entry.isFile()) {
|
|
753
|
+
files.push(fullPath);
|
|
754
|
+
// Log admin files being collected
|
|
755
|
+
if (relativePath.includes('admin')) {
|
|
756
|
+
console.log(chalk.green(`✓ Found admin file: ${relativePath}`));
|
|
757
|
+
}
|
|
758
|
+
}
|
|
699
759
|
}
|
|
700
|
-
|
|
701
|
-
|
|
760
|
+
}
|
|
761
|
+
catch (error) {
|
|
762
|
+
console.log(chalk.yellow(`⚠️ Cannot read directory: ${currentPath}`));
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
await walk(dir);
|
|
766
|
+
// Show summary of ignored directories
|
|
767
|
+
if (ignoredItems.length > 0) {
|
|
768
|
+
console.log(chalk.gray(`\n📁 Ignored ${ignoredItems.length} items`));
|
|
769
|
+
// Show first 20
|
|
770
|
+
for (const item of ignoredItems.slice(0, 20)) {
|
|
771
|
+
console.log(chalk.gray(` ${item}`));
|
|
772
|
+
}
|
|
773
|
+
if (ignoredItems.length > 20) {
|
|
774
|
+
console.log(chalk.gray(` ... and ${ignoredItems.length - 20} more`));
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
// Check if any admin files were found
|
|
778
|
+
const adminFiles = files.filter(f => f.includes('admin'));
|
|
779
|
+
if (adminFiles.length > 0) {
|
|
780
|
+
console.log(chalk.green(`\n✓ Found ${adminFiles.length} files in admin directories`));
|
|
781
|
+
}
|
|
782
|
+
else {
|
|
783
|
+
console.log(chalk.yellow(`\n⚠️ No admin files found! Check if admin is being ignored.`));
|
|
784
|
+
}
|
|
785
|
+
console.log('');
|
|
786
|
+
// Log first 10 ignore patterns from gitignore
|
|
787
|
+
const gitignorePatterns = ignorePatterns.slice(180); // Skip default patterns
|
|
788
|
+
if (gitignorePatterns.length > 0) {
|
|
789
|
+
console.log(chalk.gray('📋 .gitignore patterns:'));
|
|
790
|
+
for (const p of gitignorePatterns.slice(0, 15)) {
|
|
791
|
+
console.log(chalk.gray(` ${p}`));
|
|
792
|
+
}
|
|
793
|
+
if (gitignorePatterns.length > 15) {
|
|
794
|
+
console.log(chalk.gray(` ... and ${gitignorePatterns.length - 15} more`));
|
|
795
|
+
}
|
|
796
|
+
console.log('');
|
|
797
|
+
}
|
|
798
|
+
return files;
|
|
799
|
+
}
|
|
800
|
+
// Keep old version for reference - DELETE THIS
|
|
801
|
+
async function collectFilesOld(dir) {
|
|
802
|
+
const files = [];
|
|
803
|
+
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
804
|
+
const ignoredDirs = [];
|
|
805
|
+
async function walk(currentPath) {
|
|
806
|
+
try {
|
|
807
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
808
|
+
for (const entry of entries) {
|
|
809
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
810
|
+
const relativePath = path.relative(dir, fullPath);
|
|
811
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
812
|
+
if (DEBUG && entry.isDirectory()) {
|
|
813
|
+
ignoredDirs.push(relativePath);
|
|
814
|
+
}
|
|
815
|
+
continue;
|
|
816
|
+
}
|
|
817
|
+
if (entry.isDirectory()) {
|
|
818
|
+
await walk(fullPath);
|
|
819
|
+
}
|
|
820
|
+
else if (entry.isFile()) {
|
|
821
|
+
files.push(fullPath);
|
|
822
|
+
}
|
|
702
823
|
}
|
|
703
|
-
|
|
704
|
-
|
|
824
|
+
}
|
|
825
|
+
catch (error) {
|
|
826
|
+
// Skip directories we can't read
|
|
827
|
+
if (DEBUG) {
|
|
828
|
+
console.log(chalk.yellow(`[DEBUG] Cannot read directory: ${currentPath}`));
|
|
705
829
|
}
|
|
706
830
|
}
|
|
707
831
|
}
|
|
708
832
|
await walk(dir);
|
|
833
|
+
if (DEBUG && ignoredDirs.length > 0) {
|
|
834
|
+
console.log(chalk.gray(`\n[DEBUG] Ignored directories (${ignoredDirs.length}):`));
|
|
835
|
+
for (const d of ignoredDirs.slice(0, 30)) {
|
|
836
|
+
console.log(chalk.gray(` - ${d}`));
|
|
837
|
+
}
|
|
838
|
+
if (ignoredDirs.length > 30) {
|
|
839
|
+
console.log(chalk.gray(` ... and ${ignoredDirs.length - 30} more`));
|
|
840
|
+
}
|
|
841
|
+
}
|
|
709
842
|
return files;
|
|
710
843
|
}
|
|
711
844
|
main().catch((error) => {
|
package/dist/utils/files.js
CHANGED
|
@@ -51,7 +51,8 @@ const DEFAULT_IGNORE_PATTERNS = [
|
|
|
51
51
|
'build',
|
|
52
52
|
'eggs',
|
|
53
53
|
'.eggs',
|
|
54
|
-
'lib'
|
|
54
|
+
// NOTE: 'lib' removed - it's a common source directory in JS/TS projects
|
|
55
|
+
// Python's lib/ is usually inside venv/ which is already ignored
|
|
55
56
|
'lib64',
|
|
56
57
|
'parts',
|
|
57
58
|
'sdist',
|
|
@@ -303,16 +304,33 @@ export async function getGitIgnorePatterns(dir) {
|
|
|
303
304
|
}
|
|
304
305
|
return patterns;
|
|
305
306
|
}
|
|
307
|
+
// Patterns that should only match at root level (not nested directories)
|
|
308
|
+
const ROOT_ONLY_PATTERNS = new Set([
|
|
309
|
+
// NOTE: 'lib' removed - it's a common source directory in JS/TS projects
|
|
310
|
+
'lib64', 'bin', 'out', 'build', 'dist', 'vendor', 'tmp', 'temp',
|
|
311
|
+
'env', 'venv', 'ENV', '.venv', 'eggs', '.eggs', 'parts', 'sdist', 'var', 'wheels',
|
|
312
|
+
'deps', 'packages', 'site', 'coverage', 'htmlcov',
|
|
313
|
+
]);
|
|
306
314
|
export function shouldIgnoreFile(relativePath, patterns) {
|
|
307
315
|
// Normalize path separators to forward slashes for consistent matching
|
|
308
316
|
const normalizedPath = relativePath.replace(/\\/g, '/');
|
|
309
317
|
const parts = normalizedPath.split('/');
|
|
310
318
|
const fileName = parts[parts.length - 1];
|
|
319
|
+
const firstPart = parts[0]; // Root-level directory
|
|
311
320
|
for (const pattern of patterns) {
|
|
312
321
|
// Skip empty patterns
|
|
313
322
|
if (!pattern)
|
|
314
323
|
continue;
|
|
324
|
+
// For root-only patterns, only match if it's the first directory component
|
|
325
|
+
if (ROOT_ONLY_PATTERNS.has(pattern)) {
|
|
326
|
+
if (firstPart === pattern) {
|
|
327
|
+
return true;
|
|
328
|
+
}
|
|
329
|
+
// Don't match nested directories like frontend/src/lib
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
315
332
|
// Exact directory/file name match (e.g., 'node_modules', '.git')
|
|
333
|
+
// These match anywhere in the path
|
|
316
334
|
if (parts.includes(pattern)) {
|
|
317
335
|
return true;
|
|
318
336
|
}
|