@optiqcode/cli 2.8.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/index.d.ts +2 -0
- package/dist/commands/index.js +132 -48
- package/dist/index.js +247 -86
- package/dist/utils/files.js +16 -0
- package/package.json +1 -1
package/dist/commands/index.d.ts
CHANGED
package/dist/commands/index.js
CHANGED
|
@@ -49,18 +49,29 @@ export async function index(options) {
|
|
|
49
49
|
console.log(chalk.yellow('\n⚠️ Indexing cancelled'));
|
|
50
50
|
return;
|
|
51
51
|
}
|
|
52
|
+
// Debug mode: show ignore patterns
|
|
53
|
+
if (options.debug) {
|
|
54
|
+
const ignorePatterns = await getGitIgnorePatterns(targetPath);
|
|
55
|
+
console.log(chalk.blue('\n📋 Ignore patterns being used:'));
|
|
56
|
+
console.log(chalk.dim(ignorePatterns.slice(0, 30).join('\n')));
|
|
57
|
+
if (ignorePatterns.length > 30) {
|
|
58
|
+
console.log(chalk.dim(`... and ${ignorePatterns.length - 30} more`));
|
|
59
|
+
}
|
|
60
|
+
console.log('');
|
|
61
|
+
}
|
|
52
62
|
const spinner = ora('Collecting files...').start();
|
|
53
63
|
try {
|
|
54
64
|
// Collect all files
|
|
55
|
-
const files = await collectFiles(targetPath);
|
|
65
|
+
const files = await collectFiles(targetPath, options.verbose || options.debug);
|
|
56
66
|
spinner.text = `Reading ${files.length} files...`;
|
|
57
67
|
// Read file contents
|
|
58
68
|
const fileContents = [];
|
|
69
|
+
let skippedFiles = 0;
|
|
59
70
|
for (const file of files) {
|
|
60
71
|
try {
|
|
61
72
|
const content = await fs.readFile(file, 'utf-8');
|
|
62
73
|
const relativePath = path.relative(targetPath, file);
|
|
63
|
-
//
|
|
74
|
+
// Normalize path separators to forward slashes for cross-platform consistency
|
|
64
75
|
const normalizedPath = relativePath.replace(/\\/g, '/');
|
|
65
76
|
fileContents.push({
|
|
66
77
|
path: normalizedPath,
|
|
@@ -68,62 +79,120 @@ export async function index(options) {
|
|
|
68
79
|
});
|
|
69
80
|
}
|
|
70
81
|
catch (error) {
|
|
71
|
-
|
|
82
|
+
skippedFiles++;
|
|
83
|
+
// Skip files that can't be read (binary, permissions, etc.)
|
|
72
84
|
}
|
|
73
85
|
}
|
|
86
|
+
if (skippedFiles > 0) {
|
|
87
|
+
console.log(chalk.dim(`\n Skipped ${skippedFiles} unreadable files`));
|
|
88
|
+
}
|
|
74
89
|
spinner.text = 'Indexing with Optiq Engine...';
|
|
75
90
|
// Generate repository ID from path
|
|
76
91
|
const repoId = generateRepoId(targetPath);
|
|
77
|
-
|
|
78
|
-
|
|
92
|
+
console.log(chalk.blue('\n📊 Repository ID:'), chalk.bold(repoId));
|
|
93
|
+
console.log(chalk.dim(` Indexing ${fileContents.length} files...\n`));
|
|
94
|
+
// PARALLEL STREAMING: Send multiple batches concurrently for maximum speed
|
|
95
|
+
const BATCH_SIZE = 40; // Files per batch
|
|
96
|
+
const CONCURRENT_BATCHES = 4; // Number of batches to send in parallel
|
|
97
|
+
const MAX_RETRIES = 3;
|
|
98
|
+
const RETRY_DELAY_MS = 2000;
|
|
99
|
+
const TIMEOUT_MS = 180000; // 3 minutes timeout
|
|
79
100
|
let uploadedCount = 0;
|
|
80
101
|
let totalChunksCreated = 0;
|
|
102
|
+
let failedBatches = [];
|
|
103
|
+
const totalBatches = Math.ceil(fileContents.length / BATCH_SIZE);
|
|
104
|
+
// Create all batches upfront
|
|
105
|
+
const batches = [];
|
|
81
106
|
for (let i = 0; i < fileContents.length; i += BATCH_SIZE) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
107
|
+
batches.push({
|
|
108
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
109
|
+
files: fileContents.slice(i, i + BATCH_SIZE),
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
// Process a single batch with retries
|
|
113
|
+
async function processBatch(batch) {
|
|
114
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
90
115
|
try {
|
|
91
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
116
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
92
117
|
repository_id: repoId,
|
|
93
118
|
branch: 'main',
|
|
94
|
-
files: batch,
|
|
119
|
+
files: batch.files,
|
|
95
120
|
}, {
|
|
96
|
-
headers: {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
121
|
+
headers: { 'Content-Type': 'application/json' },
|
|
122
|
+
timeout: TIMEOUT_MS,
|
|
123
|
+
maxContentLength: Infinity,
|
|
124
|
+
maxBodyLength: Infinity,
|
|
100
125
|
});
|
|
101
|
-
if (
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
126
|
+
if (response.data.success) {
|
|
127
|
+
return {
|
|
128
|
+
success: true,
|
|
129
|
+
batchNum: batch.batchNum,
|
|
130
|
+
chunksCreated: response.data.result?.chunks_created || 0,
|
|
131
|
+
filesCount: batch.files.length,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
const errorMsg = response.data.result?.errors?.join(', ') || 'Unknown API error';
|
|
136
|
+
if (retry < MAX_RETRIES - 1) {
|
|
137
|
+
await sleep(RETRY_DELAY_MS * (retry + 1));
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: errorMsg };
|
|
141
|
+
}
|
|
105
142
|
}
|
|
106
|
-
totalChunksCreated += response.data.result?.chunks_created || 0;
|
|
107
|
-
uploadedCount += batch.length;
|
|
108
|
-
break;
|
|
109
143
|
}
|
|
110
144
|
catch (err) {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
145
|
+
if (retry < MAX_RETRIES - 1) {
|
|
146
|
+
await sleep(RETRY_DELAY_MS * (retry + 1));
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
const errorMsg = err.response?.data?.error || err.message || 'Unknown error';
|
|
150
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: errorMsg };
|
|
116
151
|
}
|
|
117
152
|
}
|
|
118
153
|
}
|
|
119
|
-
|
|
120
|
-
|
|
154
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, filesCount: 0, error: 'Max retries exceeded' };
|
|
155
|
+
}
|
|
156
|
+
// Process batches in parallel waves
|
|
157
|
+
let completedBatches = 0;
|
|
158
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
159
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
160
|
+
spinner.text = `Indexing batches ${i + 1}-${Math.min(i + CONCURRENT_BATCHES, batches.length)}/${totalBatches} (${CONCURRENT_BATCHES} concurrent)...`;
|
|
161
|
+
// Fire all batches in this wave concurrently
|
|
162
|
+
const results = await Promise.all(wave.map(processBatch));
|
|
163
|
+
// Process results
|
|
164
|
+
for (const result of results) {
|
|
165
|
+
completedBatches++;
|
|
166
|
+
if (result.success) {
|
|
167
|
+
uploadedCount += result.filesCount;
|
|
168
|
+
totalChunksCreated += result.chunksCreated;
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
failedBatches.push(result.batchNum);
|
|
172
|
+
console.log(chalk.yellow(`\n⚠️ Batch ${result.batchNum} failed: ${result.error}`));
|
|
173
|
+
}
|
|
121
174
|
}
|
|
175
|
+
spinner.text = `Indexed ${completedBatches}/${totalBatches} batches (${uploadedCount} files, ${totalChunksCreated} chunks)...`;
|
|
176
|
+
}
|
|
177
|
+
// Summary
|
|
178
|
+
spinner.stop();
|
|
179
|
+
if (failedBatches.length === 0) {
|
|
180
|
+
console.log(chalk.green('\n✓ Indexing complete!'));
|
|
181
|
+
}
|
|
182
|
+
else if (failedBatches.length < totalBatches) {
|
|
183
|
+
console.log(chalk.yellow(`\n⚠️ Indexing partially complete (${failedBatches.length}/${totalBatches} batches failed)`));
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
console.log(chalk.red('\n✗ Indexing failed - all batches failed'));
|
|
187
|
+
return;
|
|
122
188
|
}
|
|
123
|
-
spinner.succeed(chalk.green('✓ Indexing complete'));
|
|
124
189
|
console.log(chalk.blue('📊 Repository ID:'), chalk.bold(repoId));
|
|
125
|
-
console.log(chalk.blue('📁 Files indexed:'), chalk.bold(uploadedCount));
|
|
190
|
+
console.log(chalk.blue('📁 Files indexed:'), chalk.bold(uploadedCount), chalk.dim(`/ ${fileContents.length}`));
|
|
126
191
|
console.log(chalk.blue('📝 Chunks created:'), chalk.bold(totalChunksCreated));
|
|
192
|
+
if (failedBatches.length > 0) {
|
|
193
|
+
console.log(chalk.yellow('⚠️ Failed batches:'), chalk.bold(failedBatches.join(', ')));
|
|
194
|
+
console.log(chalk.dim('\nTip: Run the index command again to retry failed files.'));
|
|
195
|
+
}
|
|
127
196
|
console.log(chalk.dim('\nUse this repo_id for searches'));
|
|
128
197
|
}
|
|
129
198
|
catch (error) {
|
|
@@ -136,25 +205,40 @@ export async function index(options) {
|
|
|
136
205
|
}
|
|
137
206
|
}
|
|
138
207
|
}
|
|
139
|
-
|
|
208
|
+
function sleep(ms) {
|
|
209
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
210
|
+
}
|
|
211
|
+
async function collectFiles(dir, verbose = false) {
|
|
140
212
|
const files = [];
|
|
141
213
|
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
214
|
+
const ignoredDirs = [];
|
|
142
215
|
async function walk(currentPath) {
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
216
|
+
try {
|
|
217
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
218
|
+
for (const entry of entries) {
|
|
219
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
220
|
+
const relativePath = path.relative(dir, fullPath);
|
|
221
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
222
|
+
if (verbose && entry.isDirectory()) {
|
|
223
|
+
ignoredDirs.push(relativePath);
|
|
224
|
+
}
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
if (entry.isDirectory()) {
|
|
228
|
+
await walk(fullPath);
|
|
229
|
+
}
|
|
230
|
+
else if (entry.isFile()) {
|
|
231
|
+
files.push(fullPath);
|
|
232
|
+
}
|
|
155
233
|
}
|
|
156
234
|
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
// Skip directories we can't read (permissions, etc.)
|
|
237
|
+
}
|
|
157
238
|
}
|
|
158
239
|
await walk(dir);
|
|
240
|
+
if (verbose && ignoredDirs.length > 0) {
|
|
241
|
+
console.log(chalk.dim(`\n Ignored directories: ${ignoredDirs.slice(0, 20).join(', ')}${ignoredDirs.length > 20 ? '...' : ''}`));
|
|
242
|
+
}
|
|
159
243
|
return files;
|
|
160
244
|
}
|
package/dist/index.js
CHANGED
|
@@ -10,8 +10,8 @@ import { getConfig, saveConfig } from './utils/config.js';
|
|
|
10
10
|
import { isValidDirectory, getGitIgnorePatterns, shouldIgnoreFile } from './utils/files.js';
|
|
11
11
|
const BACKEND_URL = process.env.OPTIQ_BACKEND_URL || 'https://api.optiqcode.com';
|
|
12
12
|
const ENGINE_URL = process.env.OPTIQ_ENGINE_URL || 'http://25.36.113.3:3002';
|
|
13
|
-
// Debug mode - set OPTIQ_DEBUG=1 to see request details
|
|
14
|
-
const DEBUG = process.env.OPTIQ_DEBUG === '1';
|
|
13
|
+
// Debug mode - set OPTIQ_DEBUG=1 or OPTIQ_DEBUG=true to see request details
|
|
14
|
+
const DEBUG = process.env.OPTIQ_DEBUG === '1' || process.env.OPTIQ_DEBUG === 'true' || process.env.OPTIQ_DEBUG?.toLowerCase() === 'true';
|
|
15
15
|
// Helper to generate repository ID from path
|
|
16
16
|
function generateRepoId(targetPath) {
|
|
17
17
|
// Use path basename + hash of full path for uniqueness
|
|
@@ -267,10 +267,13 @@ async function main() {
|
|
|
267
267
|
}
|
|
268
268
|
}
|
|
269
269
|
async function indexOnce(targetPath, config) {
|
|
270
|
+
console.log(chalk.cyan('\n🚀 Starting index...\n'));
|
|
270
271
|
const spinner = ora({ text: 'Collecting files...', color: 'cyan' }).start();
|
|
271
272
|
try {
|
|
273
|
+
spinner.stop();
|
|
272
274
|
const files = await collectFiles(targetPath);
|
|
273
|
-
|
|
275
|
+
console.log(chalk.cyan(`\n📁 Collected ${files.length} files to index\n`));
|
|
276
|
+
spinner.start(`Reading ${files.length} files...`);
|
|
274
277
|
// Read files in parallel (100 at a time)
|
|
275
278
|
const PARALLEL_READS = 100;
|
|
276
279
|
const MAX_FILE_SIZE = 100_000; // 100KB max per file
|
|
@@ -299,64 +302,106 @@ async function indexOnce(targetPath, config) {
|
|
|
299
302
|
// Generate repository ID from path
|
|
300
303
|
const repoId = generateRepoId(targetPath);
|
|
301
304
|
// Upload in batches
|
|
302
|
-
const BATCH_SIZE =
|
|
305
|
+
const BATCH_SIZE = 50;
|
|
303
306
|
let totalChunks = 0;
|
|
304
307
|
if (DEBUG) {
|
|
305
|
-
console.log(chalk.gray(`\n[DEBUG] Sending to: ${ENGINE_URL}/api/v1/
|
|
308
|
+
console.log(chalk.gray(`\n[DEBUG] Sending to: ${ENGINE_URL}/api/v1/index_simple`));
|
|
306
309
|
console.log(chalk.gray(`[DEBUG] Repo ID: ${repoId}`));
|
|
307
310
|
console.log(chalk.gray(`[DEBUG] Total files: ${filesArray.length}`));
|
|
308
311
|
}
|
|
309
312
|
spinner.text = `Indexing ${filesArray.length} files...`;
|
|
310
313
|
const totalBatches = Math.ceil(filesArray.length / BATCH_SIZE);
|
|
314
|
+
if (DEBUG) {
|
|
315
|
+
console.log(chalk.gray(`\n[DEBUG] Total files: ${filesArray.length}, Batch size: ${BATCH_SIZE}, Total batches: ${totalBatches}`));
|
|
316
|
+
}
|
|
317
|
+
// PARALLEL BATCH PROCESSING - send multiple batches concurrently
|
|
318
|
+
const CONCURRENT_BATCHES = 4;
|
|
319
|
+
const MAX_RETRIES = 3;
|
|
320
|
+
const RETRY_DELAY_MS = 2000;
|
|
321
|
+
// Create all batches upfront
|
|
322
|
+
const batches = [];
|
|
311
323
|
for (let i = 0; i < filesArray.length; i += BATCH_SIZE) {
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
324
|
+
batches.push({
|
|
325
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
326
|
+
files: filesArray.slice(i, i + BATCH_SIZE),
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
if (DEBUG) {
|
|
330
|
+
console.log(chalk.gray(`\n[DEBUG] Created ${batches.length} batches, processing ${CONCURRENT_BATCHES} concurrently`));
|
|
331
|
+
}
|
|
332
|
+
// Process a single batch with retries
|
|
333
|
+
async function processBatch(batch) {
|
|
334
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
323
335
|
try {
|
|
324
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
336
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
325
337
|
repository_id: repoId,
|
|
326
338
|
branch: 'main',
|
|
327
|
-
files: batch,
|
|
339
|
+
files: batch.files,
|
|
328
340
|
}, {
|
|
329
|
-
headers: {
|
|
330
|
-
|
|
331
|
-
},
|
|
332
|
-
timeout: 300000, // 5 minutes per batch
|
|
341
|
+
headers: { 'Content-Type': 'application/json' },
|
|
342
|
+
timeout: 300000,
|
|
333
343
|
});
|
|
334
|
-
if (
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
344
|
+
if (response.data.success) {
|
|
345
|
+
return {
|
|
346
|
+
success: true,
|
|
347
|
+
batchNum: batch.batchNum,
|
|
348
|
+
chunksCreated: response.data.result?.chunks_created || 0,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
else {
|
|
352
|
+
const errorMsg = response.data.result?.errors?.join(', ') || 'Unknown API error';
|
|
353
|
+
if (retry < MAX_RETRIES - 1) {
|
|
354
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
355
|
+
}
|
|
356
|
+
else {
|
|
357
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: errorMsg };
|
|
358
|
+
}
|
|
338
359
|
}
|
|
339
|
-
totalChunks += response.data.result?.chunks_created || 0;
|
|
340
|
-
break;
|
|
341
360
|
}
|
|
342
361
|
catch (err) {
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
console.log(chalk.gray(` Sizes: ${batch.map(f => `${f.path}(${Math.round(f.content.length / 1024)}KB)`).join(', ')}`));
|
|
350
|
-
if (retries > 0) {
|
|
351
|
-
console.log(chalk.gray(` Retrying... (${retries} left)\n`));
|
|
352
|
-
await new Promise(r => setTimeout(r, 2000));
|
|
353
|
-
spinner.start(`Retrying batch ${batchNum}...`);
|
|
362
|
+
if (retry < MAX_RETRIES - 1) {
|
|
363
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
const errorMsg = err.response?.data?.error || err.message || 'Unknown error';
|
|
367
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: errorMsg };
|
|
354
368
|
}
|
|
355
369
|
}
|
|
356
370
|
}
|
|
357
|
-
|
|
358
|
-
|
|
371
|
+
return { success: false, batchNum: batch.batchNum, chunksCreated: 0, error: 'Max retries exceeded' };
|
|
372
|
+
}
|
|
373
|
+
// Process batches in parallel waves
|
|
374
|
+
let completedBatches = 0;
|
|
375
|
+
let failedBatches = [];
|
|
376
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
377
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
378
|
+
const waveEnd = Math.min(i + CONCURRENT_BATCHES, batches.length);
|
|
379
|
+
spinner.text = `Indexing batches ${i + 1}-${waveEnd}/${totalBatches} (${CONCURRENT_BATCHES} concurrent)...`;
|
|
380
|
+
if (DEBUG) {
|
|
381
|
+
console.log(chalk.gray(`\n[DEBUG] Starting wave: batches ${i + 1}-${waveEnd}`));
|
|
359
382
|
}
|
|
383
|
+
// Fire all batches in this wave concurrently
|
|
384
|
+
const results = await Promise.all(wave.map(processBatch));
|
|
385
|
+
// Process results
|
|
386
|
+
spinner.stop();
|
|
387
|
+
for (const result of results) {
|
|
388
|
+
completedBatches++;
|
|
389
|
+
if (result.success) {
|
|
390
|
+
totalChunks += result.chunksCreated;
|
|
391
|
+
console.log(chalk.green(`✓ Batch ${result.batchNum}/${totalBatches}: ${result.chunksCreated} chunks`));
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
failedBatches.push(result.batchNum);
|
|
395
|
+
console.log(chalk.red(`✗ Batch ${result.batchNum}/${totalBatches} failed: ${result.error}`));
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
spinner.start(`Indexed ${completedBatches}/${totalBatches} batches (${totalChunks} chunks)...`);
|
|
399
|
+
}
|
|
400
|
+
if (DEBUG) {
|
|
401
|
+
console.log(chalk.gray(`\n[DEBUG] All ${totalBatches} batches complete. Total chunks: ${totalChunks}`));
|
|
402
|
+
}
|
|
403
|
+
if (failedBatches.length > 0) {
|
|
404
|
+
console.log(chalk.yellow(`\n⚠️ ${failedBatches.length} batches failed: ${failedBatches.join(', ')}`));
|
|
360
405
|
}
|
|
361
406
|
spinner.succeed(chalk.cyan('Indexed'));
|
|
362
407
|
console.log(chalk.gray(` ${filesArray.length} files • ${totalChunks} chunks`));
|
|
@@ -403,47 +448,61 @@ async function watchDirectory(targetPath, config) {
|
|
|
403
448
|
spinner.text = `Reading... ${Math.min(i + PARALLEL_READS, files.length)}/${files.length}`;
|
|
404
449
|
}
|
|
405
450
|
// Upload in small batches to avoid gateway timeouts
|
|
406
|
-
const BATCH_SIZE =
|
|
451
|
+
const BATCH_SIZE = 50;
|
|
452
|
+
const totalBatches = Math.ceil(filesArray.length / BATCH_SIZE);
|
|
453
|
+
if (DEBUG) {
|
|
454
|
+
console.log(chalk.gray(`\n[DEBUG] Watch initial index: ${filesArray.length} files, ${totalBatches} batches`));
|
|
455
|
+
}
|
|
456
|
+
// PARALLEL BATCH PROCESSING for initial watch index
|
|
457
|
+
const CONCURRENT_BATCHES = 4;
|
|
458
|
+
const MAX_RETRIES = 3;
|
|
459
|
+
const RETRY_DELAY_MS = 2000;
|
|
460
|
+
// Create all batches
|
|
461
|
+
const batches = [];
|
|
407
462
|
for (let i = 0; i < filesArray.length; i += BATCH_SIZE) {
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
463
|
+
batches.push({
|
|
464
|
+
batchNum: Math.floor(i / BATCH_SIZE) + 1,
|
|
465
|
+
files: filesArray.slice(i, i + BATCH_SIZE),
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
if (DEBUG) {
|
|
469
|
+
console.log(chalk.gray(`\n[DEBUG] Watch initial: ${batches.length} batches, ${CONCURRENT_BATCHES} concurrent`));
|
|
470
|
+
}
|
|
471
|
+
// Process a single batch with retries
|
|
472
|
+
async function processWatchBatch(batch) {
|
|
473
|
+
for (let retry = 0; retry < MAX_RETRIES; retry++) {
|
|
416
474
|
try {
|
|
417
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
headers: {
|
|
423
|
-
'Content-Type': 'application/json',
|
|
424
|
-
},
|
|
425
|
-
timeout: 300000, // 5 minutes per batch
|
|
426
|
-
});
|
|
427
|
-
if (!response.data.success) {
|
|
428
|
-
spinner.fail(chalk.red('Failed'));
|
|
429
|
-
console.log(chalk.gray(response.data.result?.errors?.join('\n') || 'Unknown error'));
|
|
430
|
-
return;
|
|
431
|
-
}
|
|
432
|
-
break;
|
|
475
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, { repository_id: repoId, branch: 'main', files: batch.files }, { headers: { 'Content-Type': 'application/json' }, timeout: 300000 });
|
|
476
|
+
if (response.data.success)
|
|
477
|
+
return true;
|
|
478
|
+
if (retry < MAX_RETRIES - 1)
|
|
479
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
433
480
|
}
|
|
434
481
|
catch (err) {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
if (
|
|
438
|
-
|
|
439
|
-
await new Promise(r => setTimeout(r, 2000));
|
|
440
|
-
}
|
|
482
|
+
if (DEBUG)
|
|
483
|
+
console.log(chalk.gray(`[DEBUG] Watch batch ${batch.batchNum} error: ${err.message}`));
|
|
484
|
+
if (retry < MAX_RETRIES - 1)
|
|
485
|
+
await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (retry + 1)));
|
|
441
486
|
}
|
|
442
487
|
}
|
|
443
|
-
|
|
444
|
-
|
|
488
|
+
return false;
|
|
489
|
+
}
|
|
490
|
+
// Process in parallel waves
|
|
491
|
+
let completedBatches = 0;
|
|
492
|
+
for (let i = 0; i < batches.length; i += CONCURRENT_BATCHES) {
|
|
493
|
+
const wave = batches.slice(i, i + CONCURRENT_BATCHES);
|
|
494
|
+
spinner.text = `Indexing... ${Math.min(i + CONCURRENT_BATCHES, batches.length)}/${totalBatches} batches`;
|
|
495
|
+
const results = await Promise.all(wave.map(processWatchBatch));
|
|
496
|
+
completedBatches += results.length;
|
|
497
|
+
const failures = results.filter(r => !r).length;
|
|
498
|
+
if (failures > 0) {
|
|
499
|
+
spinner.fail(chalk.red(`Failed (${failures} batches failed)`));
|
|
500
|
+
return;
|
|
445
501
|
}
|
|
446
502
|
}
|
|
503
|
+
if (DEBUG) {
|
|
504
|
+
console.log(chalk.gray(`[DEBUG] Watch initial index complete: all ${totalBatches} batches done`));
|
|
505
|
+
}
|
|
447
506
|
spinner.succeed(chalk.cyan(`Indexed ${files.length} files`));
|
|
448
507
|
console.log(chalk.gray(` Repo ID: ${repoId}\n`));
|
|
449
508
|
}
|
|
@@ -557,10 +616,10 @@ async function watchDirectory(targetPath, config) {
|
|
|
557
616
|
}
|
|
558
617
|
// Filter out deleted files (content: null) - send only files with content
|
|
559
618
|
const filesToIndex = filesArray.filter(f => f.content !== null);
|
|
560
|
-
// For incremental updates, use the
|
|
619
|
+
// For incremental updates, use the simplified index endpoint
|
|
561
620
|
// The Rust engine handles upserts automatically
|
|
562
621
|
if (filesToIndex.length > 0) {
|
|
563
|
-
const response = await axios.post(`${ENGINE_URL}/api/v1/
|
|
622
|
+
const response = await axios.post(`${ENGINE_URL}/api/v1/index_simple`, {
|
|
564
623
|
repository_id: repoId,
|
|
565
624
|
branch: 'main',
|
|
566
625
|
files: filesToIndex,
|
|
@@ -657,23 +716,125 @@ async function watchDirectory(targetPath, config) {
|
|
|
657
716
|
async function collectFiles(dir) {
|
|
658
717
|
const files = [];
|
|
659
718
|
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
719
|
+
const ignoredItems = [];
|
|
720
|
+
// Always log ignore patterns for debugging
|
|
721
|
+
console.log(chalk.gray('\n📋 Checking ignore patterns...'));
|
|
660
722
|
async function walk(currentPath) {
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
const
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
723
|
+
try {
|
|
724
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
725
|
+
for (const entry of entries) {
|
|
726
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
727
|
+
const relativePath = path.relative(dir, fullPath);
|
|
728
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
729
|
+
// Log ignored directories (especially admin!)
|
|
730
|
+
if (entry.isDirectory()) {
|
|
731
|
+
ignoredItems.push(`DIR: ${relativePath}`);
|
|
732
|
+
// Special check for admin
|
|
733
|
+
if (relativePath.includes('admin')) {
|
|
734
|
+
console.log(chalk.yellow(`⚠️ IGNORING admin directory: ${relativePath}`));
|
|
735
|
+
// Find which pattern matched
|
|
736
|
+
for (const pattern of ignorePatterns) {
|
|
737
|
+
if (relativePath.includes(pattern) || pattern.includes('admin')) {
|
|
738
|
+
console.log(chalk.yellow(` Matched pattern: "${pattern}"`));
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
continue;
|
|
744
|
+
}
|
|
745
|
+
if (entry.isDirectory()) {
|
|
746
|
+
await walk(fullPath);
|
|
747
|
+
}
|
|
748
|
+
else if (entry.isFile()) {
|
|
749
|
+
files.push(fullPath);
|
|
750
|
+
// Log admin files being collected
|
|
751
|
+
if (relativePath.includes('admin')) {
|
|
752
|
+
console.log(chalk.green(`✓ Found admin file: ${relativePath}`));
|
|
753
|
+
}
|
|
754
|
+
}
|
|
667
755
|
}
|
|
668
|
-
|
|
669
|
-
|
|
756
|
+
}
|
|
757
|
+
catch (error) {
|
|
758
|
+
console.log(chalk.yellow(`⚠️ Cannot read directory: ${currentPath}`));
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
await walk(dir);
|
|
762
|
+
// Show summary of ignored directories
|
|
763
|
+
if (ignoredItems.length > 0) {
|
|
764
|
+
console.log(chalk.gray(`\n📁 Ignored ${ignoredItems.length} items`));
|
|
765
|
+
// Show first 20
|
|
766
|
+
for (const item of ignoredItems.slice(0, 20)) {
|
|
767
|
+
console.log(chalk.gray(` ${item}`));
|
|
768
|
+
}
|
|
769
|
+
if (ignoredItems.length > 20) {
|
|
770
|
+
console.log(chalk.gray(` ... and ${ignoredItems.length - 20} more`));
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// Check if any admin files were found
|
|
774
|
+
const adminFiles = files.filter(f => f.includes('admin'));
|
|
775
|
+
if (adminFiles.length > 0) {
|
|
776
|
+
console.log(chalk.green(`\n✓ Found ${adminFiles.length} files in admin directories`));
|
|
777
|
+
}
|
|
778
|
+
else {
|
|
779
|
+
console.log(chalk.yellow(`\n⚠️ No admin files found! Check if admin is being ignored.`));
|
|
780
|
+
}
|
|
781
|
+
console.log('');
|
|
782
|
+
// Log first 10 ignore patterns from gitignore
|
|
783
|
+
const gitignorePatterns = ignorePatterns.slice(180); // Skip default patterns
|
|
784
|
+
if (gitignorePatterns.length > 0) {
|
|
785
|
+
console.log(chalk.gray('📋 .gitignore patterns:'));
|
|
786
|
+
for (const p of gitignorePatterns.slice(0, 15)) {
|
|
787
|
+
console.log(chalk.gray(` ${p}`));
|
|
788
|
+
}
|
|
789
|
+
if (gitignorePatterns.length > 15) {
|
|
790
|
+
console.log(chalk.gray(` ... and ${gitignorePatterns.length - 15} more`));
|
|
791
|
+
}
|
|
792
|
+
console.log('');
|
|
793
|
+
}
|
|
794
|
+
return files;
|
|
795
|
+
}
|
|
796
|
+
// Keep old version for reference - DELETE THIS
|
|
797
|
+
async function collectFilesOld(dir) {
|
|
798
|
+
const files = [];
|
|
799
|
+
const ignorePatterns = await getGitIgnorePatterns(dir);
|
|
800
|
+
const ignoredDirs = [];
|
|
801
|
+
async function walk(currentPath) {
|
|
802
|
+
try {
|
|
803
|
+
const entries = await fs.readdir(currentPath, { withFileTypes: true });
|
|
804
|
+
for (const entry of entries) {
|
|
805
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
806
|
+
const relativePath = path.relative(dir, fullPath);
|
|
807
|
+
if (shouldIgnoreFile(relativePath, ignorePatterns)) {
|
|
808
|
+
if (DEBUG && entry.isDirectory()) {
|
|
809
|
+
ignoredDirs.push(relativePath);
|
|
810
|
+
}
|
|
811
|
+
continue;
|
|
812
|
+
}
|
|
813
|
+
if (entry.isDirectory()) {
|
|
814
|
+
await walk(fullPath);
|
|
815
|
+
}
|
|
816
|
+
else if (entry.isFile()) {
|
|
817
|
+
files.push(fullPath);
|
|
818
|
+
}
|
|
670
819
|
}
|
|
671
|
-
|
|
672
|
-
|
|
820
|
+
}
|
|
821
|
+
catch (error) {
|
|
822
|
+
// Skip directories we can't read
|
|
823
|
+
if (DEBUG) {
|
|
824
|
+
console.log(chalk.yellow(`[DEBUG] Cannot read directory: ${currentPath}`));
|
|
673
825
|
}
|
|
674
826
|
}
|
|
675
827
|
}
|
|
676
828
|
await walk(dir);
|
|
829
|
+
if (DEBUG && ignoredDirs.length > 0) {
|
|
830
|
+
console.log(chalk.gray(`\n[DEBUG] Ignored directories (${ignoredDirs.length}):`));
|
|
831
|
+
for (const d of ignoredDirs.slice(0, 30)) {
|
|
832
|
+
console.log(chalk.gray(` - ${d}`));
|
|
833
|
+
}
|
|
834
|
+
if (ignoredDirs.length > 30) {
|
|
835
|
+
console.log(chalk.gray(` ... and ${ignoredDirs.length - 30} more`));
|
|
836
|
+
}
|
|
837
|
+
}
|
|
677
838
|
return files;
|
|
678
839
|
}
|
|
679
840
|
main().catch((error) => {
|
package/dist/utils/files.js
CHANGED
|
@@ -303,16 +303,32 @@ export async function getGitIgnorePatterns(dir) {
|
|
|
303
303
|
}
|
|
304
304
|
return patterns;
|
|
305
305
|
}
|
|
306
|
+
// Patterns that should only match at root level (not nested directories)
|
|
307
|
+
const ROOT_ONLY_PATTERNS = new Set([
|
|
308
|
+
'lib', 'lib64', 'bin', 'out', 'build', 'dist', 'vendor', 'tmp', 'temp',
|
|
309
|
+
'env', 'venv', 'ENV', '.venv', 'eggs', '.eggs', 'parts', 'sdist', 'var', 'wheels',
|
|
310
|
+
'deps', 'packages', 'site', 'coverage', 'htmlcov',
|
|
311
|
+
]);
|
|
306
312
|
export function shouldIgnoreFile(relativePath, patterns) {
|
|
307
313
|
// Normalize path separators to forward slashes for consistent matching
|
|
308
314
|
const normalizedPath = relativePath.replace(/\\/g, '/');
|
|
309
315
|
const parts = normalizedPath.split('/');
|
|
310
316
|
const fileName = parts[parts.length - 1];
|
|
317
|
+
const firstPart = parts[0]; // Root-level directory
|
|
311
318
|
for (const pattern of patterns) {
|
|
312
319
|
// Skip empty patterns
|
|
313
320
|
if (!pattern)
|
|
314
321
|
continue;
|
|
322
|
+
// For root-only patterns, only match if it's the first directory component
|
|
323
|
+
if (ROOT_ONLY_PATTERNS.has(pattern)) {
|
|
324
|
+
if (firstPart === pattern) {
|
|
325
|
+
return true;
|
|
326
|
+
}
|
|
327
|
+
// Don't match nested directories like frontend/src/lib
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
315
330
|
// Exact directory/file name match (e.g., 'node_modules', '.git')
|
|
331
|
+
// These match anywhere in the path
|
|
316
332
|
if (parts.includes(pattern)) {
|
|
317
333
|
return true;
|
|
318
334
|
}
|