magector 1.7.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -318,11 +318,35 @@ The `describe` command and `magento_describe` MCP tool require an Anthropic API
318
318
  | `MAGECTOR_DB` | Path to index database | `./.magector/index.db` |
319
319
  | `MAGECTOR_BIN` | Path to magector-core binary | Auto-detected |
320
320
  | `MAGECTOR_MODELS` | Path to ONNX model directory | `~/.magector/models/` |
321
- | `MAGECTOR_INDEX_TIMEOUT` | Indexing timeout in milliseconds | `1800000` (30 min) |
322
- | `MAGECTOR_THREADS` | Max ONNX threads for embedding generation | Half of CPU cores |
323
- | `MAGECTOR_BATCH_SIZE` | Embedding batch size (higher = faster, more RAM) | `256` |
321
+ | `MAGECTOR_INDEX_TIMEOUT` | Indexing wall-clock timeout in milliseconds. Override for very large codebases or CPU-constrained environments. | `14400000` (4 h) |
322
+ | `MAGECTOR_THREADS` | Max ONNX intra-op + rayon parsing threads. Equivalent to the `--threads` CLI flag. | Half of CPU cores |
323
+ | `OMP_NUM_THREADS` | Fallback thread limit if `MAGECTOR_THREADS` is not set (de facto standard for ONNX/OpenMP). | |
324
+ | `MAGECTOR_BATCH_SIZE` | Embedding batch size (higher = faster, more RAM). Equivalent to `--batch-size`. | `256` |
324
325
  | `ANTHROPIC_API_KEY` | API key for description generation (`describe` command) | — |
325
326
 
327
+ ### Constraining CPU usage during indexing
328
+
329
+ Indexing a large enterprise codebase (~80K files) can saturate CPU during PHASE 2 (ONNX embedding generation). To keep a developer machine responsive while indexing, lower the thread count:
330
+
331
+ ```bash
332
+ npx magector index --threads 2 # use only 2 cores for both parsing and embedding
333
+ MAGECTOR_THREADS=2 npx magector index # equivalent via env var
334
+ OMP_NUM_THREADS=2 npx magector index # also honored as a fallback
335
+ ```
336
+
337
+ The `--threads` flag and `MAGECTOR_THREADS` / `OMP_NUM_THREADS` env vars constrain **both** the rayon thread pool used by PHASE 1 (parallel AST parsing) and the ONNX intra-op thread pool used by PHASE 2 (embedding inference). The active thread source is logged at startup so you can verify it took effect:
338
+
339
+ ```
340
+ INFO Rayon global pool: 2 threads (available: 16)
341
+ INFO ONNX intra_threads: 2 (available: 16, source: --threads flag)
342
+ ```
343
+
344
+ For very large or CPU-constrained runs, you may also need to extend the wall-clock timeout (default 4 hours):
345
+
346
+ ```bash
347
+ MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index --threads 2 # 8 h timeout, 2 threads
348
+ ```
349
+
326
350
  ---
327
351
 
328
352
  ## MCP Server Tools
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "magector",
3
- "version": "1.7.0",
3
+ "version": "1.7.1",
4
4
  "description": "Semantic code search for Magento 2 — index, search, MCP server",
5
5
  "type": "module",
6
6
  "main": "src/mcp-server.js",
@@ -37,10 +37,10 @@
37
37
  "ruvector": "^0.1.96"
38
38
  },
39
39
  "optionalDependencies": {
40
- "@magector/cli-darwin-arm64": "1.7.0",
41
- "@magector/cli-linux-x64": "1.7.0",
42
- "@magector/cli-linux-arm64": "1.7.0",
43
- "@magector/cli-win32-x64": "1.7.0"
40
+ "@magector/cli-darwin-arm64": "1.7.1",
41
+ "@magector/cli-linux-x64": "1.7.1",
42
+ "@magector/cli-linux-arm64": "1.7.1",
43
+ "@magector/cli-win32-x64": "1.7.1"
44
44
  },
45
45
  "keywords": [
46
46
  "magento",
package/src/cli.js CHANGED
@@ -30,21 +30,35 @@ Usage:
30
30
  npx magector setup [path] IDE setup only (no indexing)
31
31
  npx magector help Show this help
32
32
 
33
- Options:
34
- -l, --limit <n> Number of search results (default: 10)
35
- -f, --format <fmt> Output format: text, json (default: text)
33
+ Search options:
34
+ -l, --limit <n> Number of search results (default: 10)
35
+ -f, --format <fmt> Output format: text, json (default: text)
36
+
37
+ Index options:
38
+ --threads <n> Max ONNX/rayon threads (default: half of CPU cores).
39
+ Lower this on shared developer machines to keep the
40
+ system responsive during indexing.
41
+ --batch-size <n> Embedding batch size (default: 256). Higher = faster
42
+ but more RAM.
43
+ --force Force re-index even if index exists
36
44
 
37
45
  Environment Variables:
38
- MAGENTO_ROOT Path to Magento installation (default: cwd)
39
- MAGECTOR_DB Path to index database (default: ./.magector/index.db)
40
- MAGECTOR_BIN Path to magector-core binary
41
- MAGECTOR_MODELS Path to ONNX model directory
46
+ MAGENTO_ROOT Path to Magento installation (default: cwd)
47
+ MAGECTOR_DB Path to index database (default: ./.magector/index.db)
48
+ MAGECTOR_BIN Path to magector-core binary
49
+ MAGECTOR_MODELS Path to ONNX model directory
50
+ MAGECTOR_THREADS Max threads (overridden by --threads)
51
+ MAGECTOR_BATCH_SIZE Embedding batch size (overridden by --batch-size)
52
+ MAGECTOR_INDEX_TIMEOUT Indexing wall-clock timeout in ms (default: 14400000 = 4h)
53
+ OMP_NUM_THREADS Fallback thread limit if MAGECTOR_THREADS unset
42
54
 
43
55
  Examples:
44
56
  npx magector init /var/www/magento
45
57
  npx magector search "product price calculation"
46
58
  npx magector search "checkout controller" -l 20
47
59
  npx magector index
60
+ npx magector index --threads 4 --batch-size 128
61
+ MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index # 8h timeout
48
62
  npx magector mcp
49
63
  `);
50
64
  }
@@ -67,12 +81,16 @@ function parseArgs(argv) {
67
81
  opts.verbose = true;
68
82
  } else if (argv[i] === '--force') {
69
83
  opts.force = true;
84
+ } else if (argv[i] === '--threads') {
85
+ opts.threads = argv[++i];
86
+ } else if (argv[i] === '--batch-size') {
87
+ opts.batchSize = argv[++i];
70
88
  }
71
89
  }
72
90
  return opts;
73
91
  }
74
92
 
75
- async function runIndex(targetPath) {
93
+ async function runIndex(targetPath, opts = {}) {
76
94
  const config = getConfig();
77
95
  const root = targetPath || config.magentoRoot;
78
96
  const binary = resolveBinary();
@@ -85,7 +103,9 @@ async function runIndex(targetPath) {
85
103
  const magectorDir = path.resolve(root, '.magector');
86
104
  mkdirSync(magectorDir, { recursive: true });
87
105
 
88
- const indexTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 1800000;
106
+ // Default 4 hours generous enough for ~80K-file enterprise codebases under
107
+ // CPU constraint. Override via MAGECTOR_INDEX_TIMEOUT (milliseconds).
108
+ const indexTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 14400000;
89
109
  try {
90
110
  const indexArgs = [
91
111
  'index',
@@ -93,6 +113,15 @@ async function runIndex(targetPath) {
93
113
  '-d', path.resolve(config.dbPath),
94
114
  '-c', modelPath
95
115
  ];
116
+ // Forward thread/batch limits to the Rust binary. The Rust side already
117
+ // honors MAGECTOR_THREADS / OMP_NUM_THREADS via env, but explicit flags
118
+ // give the user a CLI-level override and make the limit visible in logs.
119
+ if (opts.threads != null) {
120
+ indexArgs.push('--threads', String(opts.threads));
121
+ }
122
+ if (opts.batchSize != null) {
123
+ indexArgs.push('--batch-size', String(opts.batchSize));
124
+ }
96
125
  // Pass descriptions DB if it exists
97
126
  const descDbPath = path.resolve(root, '.magector', 'sqlite.db');
98
127
  if (existsSync(descDbPath)) {
@@ -106,7 +135,13 @@ async function runIndex(targetPath) {
106
135
  process.exit(err.status);
107
136
  }
108
137
  if (err.message && err.message.includes('ETIMEDOUT')) {
109
- console.error(`Indexing timed out after ${indexTimeout / 1000}s. For large codebases, increase the timeout:\n MAGECTOR_INDEX_TIMEOUT=3600000 npx magector index`);
138
+ console.error(
139
+ `Indexing timed out after ${indexTimeout / 1000}s.\n` +
140
+ `For large codebases or CPU-constrained environments, increase the timeout:\n` +
141
+ ` MAGECTOR_INDEX_TIMEOUT=28800000 npx magector index # 8 hours\n` +
142
+ `Or reduce CPU usage with fewer threads:\n` +
143
+ ` npx magector index --threads 2`
144
+ );
110
145
  } else {
111
146
  console.error(`Indexing error: ${err.message}`);
112
147
  }
@@ -202,13 +237,22 @@ async function main() {
202
237
  await checkForUpdate(command, args);
203
238
 
204
239
  switch (command) {
205
- case 'init':
206
- await init(args[1]);
240
+ case 'init': {
241
+ const initArgv = args.slice(1);
242
+ const initTarget = initArgv.find(a => !a.startsWith('-'));
243
+ const initOpts = parseArgs(initArgv);
244
+ await init(initTarget, initOpts);
207
245
  break;
246
+ }
208
247
 
209
- case 'index':
210
- await runIndex(args[1]);
248
+ case 'index': {
249
+ // First non-flag arg after `index` is the path; everything else is options.
250
+ const indexArgv = args.slice(1);
251
+ const targetPath = indexArgv.find(a => !a.startsWith('-'));
252
+ const indexOpts = parseArgs(indexArgv);
253
+ await runIndex(targetPath, indexOpts);
211
254
  break;
255
+ }
212
256
 
213
257
  case 'search': {
214
258
  const query = args.slice(1).filter(a => !a.startsWith('-')).join(' ');
package/src/init.js CHANGED
@@ -198,8 +198,13 @@ function updateGitignore(projectPath) {
198
198
 
199
199
  /**
200
200
  * Main init function.
201
+ *
202
+ * @param {string} projectPath - Magento root (defaults to cwd).
203
+ * @param {object} [opts]
204
+ * @param {number|string} [opts.threads] - Forwarded as `--threads` to magector-core.
205
+ * @param {number|string} [opts.batchSize] - Forwarded as `--batch-size`.
201
206
  */
202
- export async function init(projectPath) {
207
+ export async function init(projectPath, opts = {}) {
203
208
  projectPath = path.resolve(projectPath || process.cwd());
204
209
  mkdirSync(path.join(projectPath, '.magector'), { recursive: true });
205
210
  const dbPath = path.join(projectPath, '.magector', 'index.db');
@@ -244,21 +249,36 @@ export async function init(projectPath) {
244
249
  // 4. Run indexing
245
250
  console.log('\nIndexing codebase...');
246
251
  const startTime = Date.now();
252
+ // Default 4 hours — generous enough for ~80K-file enterprise codebases under
253
+ // CPU constraint. Override via MAGECTOR_INDEX_TIMEOUT (milliseconds).
254
+ const initTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 14400000;
247
255
  try {
248
- execFileSync(binary, [
256
+ const indexArgs = [
249
257
  'index',
250
258
  '-m', projectPath,
251
259
  '-d', dbPath,
252
260
  '-c', modelPath
253
- ], { timeout: parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 1800000, stdio: 'inherit' });
261
+ ];
262
+ if (opts.threads != null) {
263
+ indexArgs.push('--threads', String(opts.threads));
264
+ }
265
+ if (opts.batchSize != null) {
266
+ indexArgs.push('--batch-size', String(opts.batchSize));
267
+ }
268
+ execFileSync(binary, indexArgs, { timeout: initTimeout, stdio: 'inherit' });
254
269
  } catch (err) {
255
270
  if (err.status) {
256
271
  console.error('Indexing failed.');
257
272
  process.exit(err.status);
258
273
  }
259
- const initTimeout = parseInt(process.env.MAGECTOR_INDEX_TIMEOUT, 10) || 1800000;
260
274
  if (err.message && err.message.includes('ETIMEDOUT')) {
261
- console.error(`Indexing timed out after ${initTimeout / 1000}s. For large codebases, increase the timeout:\n MAGECTOR_INDEX_TIMEOUT=3600000 npx magector init ${projectPath}`);
275
+ console.error(
276
+ `Indexing timed out after ${initTimeout / 1000}s.\n` +
277
+ `For large codebases or CPU-constrained environments, increase the timeout:\n` +
278
+ ` MAGECTOR_INDEX_TIMEOUT=28800000 npx magector init ${projectPath} # 8 hours\n` +
279
+ `Or reduce CPU usage:\n` +
280
+ ` npx magector init ${projectPath} --threads 2`
281
+ );
262
282
  } else {
263
283
  console.error(`Indexing error: ${err.message}`);
264
284
  }