magector 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -3
- package/package.json +5 -5
- package/src/cli.js +68 -14
- package/src/init.js +28 -5
- package/src/mcp-server.js +10 -2
package/README.md
CHANGED
|
@@ -318,11 +318,53 @@ The `describe` command and `magento_describe` MCP tool require an Anthropic API
|
|
|
318
318
|
| `MAGECTOR_DB` | Path to index database | `./.magector/index.db` |
|
|
319
319
|
| `MAGECTOR_BIN` | Path to magector-core binary | Auto-detected |
|
|
320
320
|
| `MAGECTOR_MODELS` | Path to ONNX model directory | `~/.magector/models/` |
|
|
321
|
-
| `MAGECTOR_INDEX_TIMEOUT` | Indexing timeout in milliseconds | `
|
|
322
|
-
| `MAGECTOR_THREADS` | Max ONNX threads
|
|
323
|
-
| `
|
|
321
|
+
| `MAGECTOR_INDEX_TIMEOUT` | Indexing wall-clock timeout in milliseconds. Override for very large codebases or CPU-constrained environments. | `14400000` (4 h) |
|
|
322
|
+
| `MAGECTOR_THREADS` | Max ONNX intra-op + rayon parsing threads. Equivalent to the `--threads` CLI flag. | Half of CPU cores |
|
|
323
|
+
| `OMP_NUM_THREADS` | Fallback thread limit if `MAGECTOR_THREADS` is not set (de facto standard for ONNX/OpenMP). | — |
|
|
324
|
+
| `MAGECTOR_BATCH_SIZE` | Embedding batch size (higher = faster, more RAM). Equivalent to `--batch-size`. | `256` |
|
|
324
325
|
| `ANTHROPIC_API_KEY` | API key for description generation (`describe` command) | — |
|
|
325
326
|
|
|
327
|
+
### Constraining CPU usage during indexing
|
|
328
|
+
|
|
329
|
+
Indexing a large enterprise codebase (~80K files) can saturate CPU during PHASE 2 (ONNX embedding generation). To keep a developer machine responsive while indexing, lower the thread count:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
npx magector index --threads 2 # use only 2 cores for both parsing and embedding
|
|
333
|
+
MAGECTOR_THREADS=2 npx magector index # equivalent via env var
|
|
334
|
+
OMP_NUM_THREADS=2 npx magector index # also honored as a fallback
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
The `--threads` flag and `MAGECTOR_THREADS` / `OMP_NUM_THREADS` env vars constrain **both** the rayon thread pool used by PHASE 1 (parallel AST parsing) and the ONNX intra-op thread pool used by PHASE 2 (embedding inference). The active thread source is logged at startup so you can verify it took effect:
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
INFO Rayon global pool: 2 threads (available: 16)
|
|
341
|
+
INFO ONNX intra_threads: 2 (available: 16, source: --threads flag)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
For very large or CPU-constrained runs, you may also need to extend the wall-clock timeout (default 4 hours):
|
|
345
|
+
|
|
346
|
+
```bash
|
|
347
|
+
MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index --threads 2 # 8 h timeout, 2 threads
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Resume after timeout or interrupt
|
|
351
|
+
|
|
352
|
+
Indexing writes a crash-safe checkpoint to disk every 50 batches (~12,800 files). If the process is killed or times out mid-run, **just re-run `npx magector index`** — it auto-resumes from the last checkpoint:
|
|
353
|
+
|
|
354
|
+
```bash
|
|
355
|
+
npx magector index
|
|
356
|
+
# ♻️ Resuming from previous run: 38400 vectors across 12200 files already indexed
|
|
357
|
+
# ✓ Found 79771 total files; 12200 already indexed, 67571 remaining to process
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
The indexer collects already-embedded file paths from the existing DB, filters them out of file discovery, preserves the existing HNSW state, and only parses/embeds the files that aren't in the DB yet. Partial resume also picks up new files added to the tree since the previous run.
|
|
361
|
+
|
|
362
|
+
To force a full rebuild (e.g. after a schema change or if you want to discard stale vectors), pass `--force`:
|
|
363
|
+
|
|
364
|
+
```bash
|
|
365
|
+
npx magector index --force
|
|
366
|
+
```
|
|
367
|
+
|
|
326
368
|
---
|
|
327
369
|
|
|
328
370
|
## MCP Server Tools
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "magector",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.2",
|
|
4
4
|
"description": "Semantic code search for Magento 2 — index, search, MCP server",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/mcp-server.js",
|
|
@@ -37,10 +37,10 @@
|
|
|
37
37
|
"ruvector": "^0.1.96"
|
|
38
38
|
},
|
|
39
39
|
"optionalDependencies": {
|
|
40
|
-
"@magector/cli-darwin-arm64": "1.7.
|
|
41
|
-
"@magector/cli-linux-x64": "1.7.
|
|
42
|
-
"@magector/cli-linux-arm64": "1.7.
|
|
43
|
-
"@magector/cli-win32-x64": "1.7.
|
|
40
|
+
"@magector/cli-darwin-arm64": "1.7.2",
|
|
41
|
+
"@magector/cli-linux-x64": "1.7.2",
|
|
42
|
+
"@magector/cli-linux-arm64": "1.7.2",
|
|
43
|
+
"@magector/cli-win32-x64": "1.7.2"
|
|
44
44
|
},
|
|
45
45
|
"keywords": [
|
|
46
46
|
"magento",
|
package/src/cli.js
CHANGED
|
@@ -30,21 +30,37 @@ Usage:
|
|
|
30
30
|
npx magector setup [path] IDE setup only (no indexing)
|
|
31
31
|
npx magector help Show this help
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
-l, --limit <n>
|
|
35
|
-
-f, --format <fmt>
|
|
33
|
+
Search options:
|
|
34
|
+
-l, --limit <n> Number of search results (default: 10)
|
|
35
|
+
-f, --format <fmt> Output format: text, json (default: text)
|
|
36
|
+
|
|
37
|
+
Index options:
|
|
38
|
+
--threads <n> Max ONNX/rayon threads (default: half of CPU cores).
|
|
39
|
+
Lower this on shared developer machines to keep the
|
|
40
|
+
system responsive during indexing.
|
|
41
|
+
--batch-size <n> Embedding batch size (default: 256). Higher = faster
|
|
42
|
+
but more RAM.
|
|
43
|
+
--force Discard any existing index and rebuild from scratch.
|
|
44
|
+
Without --force, indexing auto-resumes from the last
|
|
45
|
+
incremental save (written every ~50 batches).
|
|
36
46
|
|
|
37
47
|
Environment Variables:
|
|
38
|
-
MAGENTO_ROOT
|
|
39
|
-
MAGECTOR_DB
|
|
40
|
-
MAGECTOR_BIN
|
|
41
|
-
MAGECTOR_MODELS
|
|
48
|
+
MAGENTO_ROOT Path to Magento installation (default: cwd)
|
|
49
|
+
MAGECTOR_DB Path to index database (default: ./.magector/index.db)
|
|
50
|
+
MAGECTOR_BIN Path to magector-core binary
|
|
51
|
+
MAGECTOR_MODELS Path to ONNX model directory
|
|
52
|
+
MAGECTOR_THREADS Max threads (overridden by --threads)
|
|
53
|
+
MAGECTOR_BATCH_SIZE Embedding batch size (overridden by --batch-size)
|
|
54
|
+
MAGECTOR_INDEX_TIMEOUT Indexing wall-clock timeout in ms (default: 14400000 = 4h)
|
|
55
|
+
OMP_NUM_THREADS Fallback thread limit if MAGECTOR_THREADS unset
|
|
42
56
|
|
|
43
57
|
Examples:
|
|
44
58
|
npx magector init /var/www/magento
|
|
45
59
|
npx magector search "product price calculation"
|
|
46
60
|
npx magector search "checkout controller" -l 20
|
|
47
61
|
npx magector index
|
|
62
|
+
npx magector index --threads 4 --batch-size 128
|
|
63
|
+
MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index # 8h timeout
|
|
48
64
|
npx magector mcp
|
|
49
65
|
`);
|
|
50
66
|
}
|
|
@@ -67,12 +83,16 @@ function parseArgs(argv) {
|
|
|
67
83
|
opts.verbose = true;
|
|
68
84
|
} else if (argv[i] === '--force') {
|
|
69
85
|
opts.force = true;
|
|
86
|
+
} else if (argv[i] === '--threads') {
|
|
87
|
+
opts.threads = argv[++i];
|
|
88
|
+
} else if (argv[i] === '--batch-size') {
|
|
89
|
+
opts.batchSize = argv[++i];
|
|
70
90
|
}
|
|
71
91
|
}
|
|
72
92
|
return opts;
|
|
73
93
|
}
|
|
74
94
|
|
|
75
|
-
async function runIndex(targetPath) {
|
|
95
|
+
async function runIndex(targetPath, opts = {}) {
|
|
76
96
|
const config = getConfig();
|
|
77
97
|
const root = targetPath || config.magentoRoot;
|
|
78
98
|
const binary = resolveBinary();
|
|
@@ -85,7 +105,9 @@ async function runIndex(targetPath) {
|
|
|
85
105
|
const magectorDir = path.resolve(root, '.magector');
|
|
86
106
|
mkdirSync(magectorDir, { recursive: true });
|
|
87
107
|
|
|
88
|
-
|
|
108
|
+
// Default 4 hours — generous enough for ~80K-file enterprise codebases under
|
|
109
|
+
// CPU constraint. Override via MAGECTOR_INDEX_TIMEOUT (milliseconds).
|
|
110
|
+
const indexTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 14400000;
|
|
89
111
|
try {
|
|
90
112
|
const indexArgs = [
|
|
91
113
|
'index',
|
|
@@ -93,6 +115,21 @@ async function runIndex(targetPath) {
|
|
|
93
115
|
'-d', path.resolve(config.dbPath),
|
|
94
116
|
'-c', modelPath
|
|
95
117
|
];
|
|
118
|
+
// Forward thread/batch limits to the Rust binary. The Rust side already
|
|
119
|
+
// honors MAGECTOR_THREADS / OMP_NUM_THREADS via env, but explicit flags
|
|
120
|
+
// give the user a CLI-level override and make the limit visible in logs.
|
|
121
|
+
if (opts.threads != null) {
|
|
122
|
+
indexArgs.push('--threads', String(opts.threads));
|
|
123
|
+
}
|
|
124
|
+
if (opts.batchSize != null) {
|
|
125
|
+
indexArgs.push('--batch-size', String(opts.batchSize));
|
|
126
|
+
}
|
|
127
|
+
// --force discards any existing partial index and rebuilds from scratch.
|
|
128
|
+
// Without it, the Rust indexer auto-resumes from the last incremental
|
|
129
|
+
// save on disk and only re-embeds files that aren't in the DB yet.
|
|
130
|
+
if (opts.force) {
|
|
131
|
+
indexArgs.push('--force');
|
|
132
|
+
}
|
|
96
133
|
// Pass descriptions DB if it exists
|
|
97
134
|
const descDbPath = path.resolve(root, '.magector', 'sqlite.db');
|
|
98
135
|
if (existsSync(descDbPath)) {
|
|
@@ -106,7 +143,15 @@ async function runIndex(targetPath) {
|
|
|
106
143
|
process.exit(err.status);
|
|
107
144
|
}
|
|
108
145
|
if (err.message && err.message.includes('ETIMEDOUT')) {
|
|
109
|
-
console.error(
|
|
146
|
+
console.error(
|
|
147
|
+
`Indexing timed out after ${indexTimeout / 1000}s.\n` +
|
|
148
|
+
`Partial progress was saved to disk every ~50 batches — re-run\n` +
|
|
149
|
+
`'npx magector index' to auto-resume from the last checkpoint.\n` +
|
|
150
|
+
`\n` +
|
|
151
|
+
`For large codebases or CPU-constrained environments, also consider:\n` +
|
|
152
|
+
` MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index # 8 hours\n` +
|
|
153
|
+
` npx magector index --threads 2 # lower CPU usage`
|
|
154
|
+
);
|
|
110
155
|
} else {
|
|
111
156
|
console.error(`Indexing error: ${err.message}`);
|
|
112
157
|
}
|
|
@@ -202,13 +247,22 @@ async function main() {
|
|
|
202
247
|
await checkForUpdate(command, args);
|
|
203
248
|
|
|
204
249
|
switch (command) {
|
|
205
|
-
case 'init':
|
|
206
|
-
|
|
250
|
+
case 'init': {
|
|
251
|
+
const initArgv = args.slice(1);
|
|
252
|
+
const initTarget = initArgv.find(a => !a.startsWith('-'));
|
|
253
|
+
const initOpts = parseArgs(initArgv);
|
|
254
|
+
await init(initTarget, initOpts);
|
|
207
255
|
break;
|
|
256
|
+
}
|
|
208
257
|
|
|
209
|
-
case 'index':
|
|
210
|
-
|
|
258
|
+
case 'index': {
|
|
259
|
+
// First non-flag arg after `index` is the path; everything else is options.
|
|
260
|
+
const indexArgv = args.slice(1);
|
|
261
|
+
const targetPath = indexArgv.find(a => !a.startsWith('-'));
|
|
262
|
+
const indexOpts = parseArgs(indexArgv);
|
|
263
|
+
await runIndex(targetPath, indexOpts);
|
|
211
264
|
break;
|
|
265
|
+
}
|
|
212
266
|
|
|
213
267
|
case 'search': {
|
|
214
268
|
const query = args.slice(1).filter(a => !a.startsWith('-')).join(' ');
|
package/src/init.js
CHANGED
|
@@ -198,8 +198,13 @@ function updateGitignore(projectPath) {
|
|
|
198
198
|
|
|
199
199
|
/**
|
|
200
200
|
* Main init function.
|
|
201
|
+
*
|
|
202
|
+
* @param {string} projectPath - Magento root (defaults to cwd).
|
|
203
|
+
* @param {object} [opts]
|
|
204
|
+
* @param {number|string} [opts.threads] - Forwarded as `--threads` to magector-core.
|
|
205
|
+
* @param {number|string} [opts.batchSize] - Forwarded as `--batch-size`.
|
|
201
206
|
*/
|
|
202
|
-
export async function init(projectPath) {
|
|
207
|
+
export async function init(projectPath, opts = {}) {
|
|
203
208
|
projectPath = path.resolve(projectPath || process.cwd());
|
|
204
209
|
mkdirSync(path.join(projectPath, '.magector'), { recursive: true });
|
|
205
210
|
const dbPath = path.join(projectPath, '.magector', 'index.db');
|
|
@@ -244,21 +249,39 @@ export async function init(projectPath) {
|
|
|
244
249
|
// 4. Run indexing
|
|
245
250
|
console.log('\nIndexing codebase...');
|
|
246
251
|
const startTime = Date.now();
|
|
252
|
+
// Default 4 hours — generous enough for ~80K-file enterprise codebases under
|
|
253
|
+
// CPU constraint. Override via MAGECTOR_INDEX_TIMEOUT (milliseconds).
|
|
254
|
+
const initTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 14400000;
|
|
247
255
|
try {
|
|
248
|
-
|
|
256
|
+
const indexArgs = [
|
|
249
257
|
'index',
|
|
250
258
|
'-m', projectPath,
|
|
251
259
|
'-d', dbPath,
|
|
252
260
|
'-c', modelPath
|
|
253
|
-
]
|
|
261
|
+
];
|
|
262
|
+
if (opts.threads != null) {
|
|
263
|
+
indexArgs.push('--threads', String(opts.threads));
|
|
264
|
+
}
|
|
265
|
+
if (opts.batchSize != null) {
|
|
266
|
+
indexArgs.push('--batch-size', String(opts.batchSize));
|
|
267
|
+
}
|
|
268
|
+
if (opts.force) {
|
|
269
|
+
indexArgs.push('--force');
|
|
270
|
+
}
|
|
271
|
+
execFileSync(binary, indexArgs, { timeout: initTimeout, stdio: 'inherit' });
|
|
254
272
|
} catch (err) {
|
|
255
273
|
if (err.status) {
|
|
256
274
|
console.error('Indexing failed.');
|
|
257
275
|
process.exit(err.status);
|
|
258
276
|
}
|
|
259
|
-
const initTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 1800000;
|
|
260
277
|
if (err.message && err.message.includes('ETIMEDOUT')) {
|
|
261
|
-
console.error(
|
|
278
|
+
console.error(
|
|
279
|
+
`Indexing timed out after ${initTimeout / 1000}s.\n` +
|
|
280
|
+
`For large codebases or CPU-constrained environments, increase the timeout:\n` +
|
|
281
|
+
` MAGECTOR_INDEX_TIMEOUT=28800000 npx magector init ${projectPath} # 8 hours\n` +
|
|
282
|
+
`Or reduce CPU usage:\n` +
|
|
283
|
+
` npx magector init ${projectPath} --threads 2`
|
|
284
|
+
);
|
|
262
285
|
} else {
|
|
263
286
|
console.error(`Indexing error: ${err.message}`);
|
|
264
287
|
}
|
package/src/mcp-server.js
CHANGED
|
@@ -552,13 +552,21 @@ function rustIndex(magentoRoot) {
|
|
|
552
552
|
if (existsSync(descDbPath)) {
|
|
553
553
|
indexArgs.push('--descriptions-db', descDbPath);
|
|
554
554
|
}
|
|
555
|
-
|
|
555
|
+
// Default 4 hours, matching cli.js/init.js. Previously 1800000 (30 min),
|
|
556
|
+
// which was too short for ~80K-file enterprise Magento installs under CPU
|
|
557
|
+
// constraint and caused silent re-index loops via the MCP auto-index path.
|
|
558
|
+
const indexTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 14400000;
|
|
556
559
|
try {
|
|
557
560
|
const result = execFileSync(config.rustBinary, indexArgs, { encoding: 'utf-8', timeout: indexTimeout, stdio: ['pipe', 'pipe', 'pipe'], env: rustEnv });
|
|
558
561
|
return result;
|
|
559
562
|
} catch (err) {
|
|
560
563
|
if (err.message && err.message.includes('ETIMEDOUT')) {
|
|
561
|
-
throw new Error(
|
|
564
|
+
throw new Error(
|
|
565
|
+
`Indexing timed out after ${indexTimeout / 1000}s. Partial progress was saved ` +
|
|
566
|
+
`to disk — the next indexing run will auto-resume from the last checkpoint. ` +
|
|
567
|
+
`To raise the timeout further, set MAGECTOR_INDEX_TIMEOUT (milliseconds) in the ` +
|
|
568
|
+
`MCP server env, e.g. MAGECTOR_INDEX_TIMEOUT=28800000 for 8 hours.`
|
|
569
|
+
);
|
|
562
570
|
}
|
|
563
571
|
throw err;
|
|
564
572
|
}
|