nex-code 0.4.38 → 0.4.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@
14
14
  * - No iteration cap by default — runs until stopped
15
15
  */
16
16
 
17
- const { execSync } = require("child_process");
17
+ const { execSync, spawn } = require("child_process");
18
18
  const fs = require("fs");
19
19
  const path = require("path");
20
20
 
@@ -44,6 +44,14 @@ function getBenchmark() {
44
44
  // Track experiment history within the session
45
45
  let experiments = [];
46
46
  let loopActive = false;
47
+ let sessionBaselineScore = null; // set on first ar_run_benchmark call
48
+
49
+ // ─── Watch Mode state ───────────────────────────────────────────
50
+ let _watchProcess = null;
51
+ let _watchCallbacks = { onFailure: null };
52
+ let _watchTestCommand = null;
53
+ let _watchDebounceTimer = null;
54
+ const WATCH_DEBOUNCE_MS = 2000;
47
55
 
48
56
  function getLogPath() {
49
57
  const dir = path.join(process.cwd(), ".nex", "autoresearch");
@@ -271,6 +279,8 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
271
279
  "- If you are blocked from reading a file, SKIP IT and make your edit based on what you already know",
272
280
  "- If 3 consecutive experiments fail to improve, change category focus",
273
281
  "- Simplicity criterion: prefer removing code over adding it — complexity cost must be justified by metric gain",
282
+ "- cli/tools/index.js uses single-quoted JS strings — when editing descriptions, use ONLY single quotes inside the text, or escape double quotes as \\\\\" — NEVER put a raw double quote inside a JS string literal or tests will fail with SyntaxError",
283
+ "- Before every Edit call, grep the EXACT old_text from the file first so it matches byte-for-byte",
274
284
  "",
275
285
  "### How the benchmark score works — READ THIS FIRST",
276
286
  "The benchmark sends nex-code's TOOL_DEFINITIONS (schemas) to external models and checks:",
@@ -362,6 +372,53 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
362
372
  console.log("Autoresearch history cleared.");
363
373
  },
364
374
  },
375
+ {
376
+ cmd: "/ar-watch",
377
+ desc: "Start/stop background file watcher that auto-runs tests on changes",
378
+ handler: (args) => {
379
+ // Check feature flag
380
+ let watchEnabled = false;
381
+ try {
382
+ const { feature } = require("../feature-flags");
383
+ watchEnabled = feature("WATCH_MODE");
384
+ } catch {
385
+ // feature-flags not available — check env
386
+ watchEnabled =
387
+ process.env.NEX_FEATURE_WATCH_MODE === "1" ||
388
+ process.env.NEX_FEATURE_WATCH_MODE === "true";
389
+ }
390
+ if (!watchEnabled) {
391
+ console.log(
392
+ "Watch mode is disabled. Enable with NEX_FEATURE_WATCH_MODE=1",
393
+ );
394
+ return;
395
+ }
396
+
397
+ const cmd = args.trim();
398
+ if (cmd === "stop" || cmd === "off") {
399
+ stopWatch();
400
+ console.log("Watch mode stopped.");
401
+ return;
402
+ }
403
+ if (_watchProcess) {
404
+ console.log(
405
+ "Watch mode is already running. Use /ar-watch stop to stop it.",
406
+ );
407
+ return;
408
+ }
409
+
410
+ // Parse: /ar-watch <test command> [--watch-path <glob>]
411
+ const testCommand = cmd || "npm test";
412
+ const watchPath = process.cwd();
413
+ _watchTestCommand = testCommand;
414
+
415
+ startWatch(watchPath, testCommand);
416
+ console.log(`Watch mode started. Monitoring ${watchPath} for changes.`);
417
+ console.log(`Test command: ${testCommand}`);
418
+ console.log("On test failure, the agent will auto-investigate.");
419
+ console.log("Use /ar-watch stop to stop.\n");
420
+ },
421
+ },
365
422
  ],
366
423
 
367
424
  tools: [
@@ -389,11 +446,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
389
446
  execute: async (args) => {
390
447
  // Strip any date-like suffix the model may have hallucinated, then
391
448
  // append today's real date so the branch name is always accurate.
392
- const baseTag = (args.tag || "self-improve")
393
- .replace(/[^a-zA-Z0-9_-]/g, "-")
394
- .replace(/-?\d{4,8}$/, "") // strip trailing YYYYMMDD / YYYYMM / etc.
395
- .replace(/-[a-z]{3}\d{1,2}$/i, "") // strip trailing mon## (e.g. apr15)
396
- .replace(/-+$/, "") || "self-improve";
449
+ const baseTag =
450
+ (args.tag || "self-improve")
451
+ .replace(/[^a-zA-Z0-9_-]/g, "-")
452
+ .replace(/-?\d{4,8}$/, "") // strip trailing YYYYMMDD / YYYYMM / etc.
453
+ .replace(/-[a-z]{3}\d{1,2}$/i, "") // strip trailing mon## (e.g. apr15)
454
+ .replace(/-+$/, "") || "self-improve";
397
455
  const now = new Date();
398
456
  const dateStr =
399
457
  now.toLocaleString("en", { month: "short" }).toLowerCase() +
@@ -402,13 +460,38 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
402
460
  const branchName = `autoresearch/${tag}`;
403
461
 
404
462
  try {
463
+ const currentBranch = gitBranch();
464
+ // If we're already on the target branch, nothing to do
465
+ if (currentBranch === branchName) {
466
+ return JSON.stringify({
467
+ status: "resumed",
468
+ branch: branchName,
469
+ note: "Already on autoresearch branch — continuing experiments.",
470
+ });
471
+ }
472
+
405
473
  // Check if branch already exists
474
+ let branchExists = false;
406
475
  try {
407
476
  execSync(`git rev-parse --verify ${branchName}`, {
408
477
  cwd: process.cwd(),
409
478
  stdio: ["pipe", "pipe", "pipe"],
410
479
  });
411
- // Branch exists — check it out
480
+ branchExists = true;
481
+ } catch {
482
+ // Branch doesn't exist
483
+ }
484
+
485
+ if (branchExists) {
486
+ // Stash any uncommitted changes before switching
487
+ try {
488
+ execSync(`git stash`, {
489
+ cwd: process.cwd(),
490
+ stdio: ["pipe", "pipe", "pipe"],
491
+ });
492
+ } catch {
493
+ // Ignore stash errors (nothing to stash)
494
+ }
412
495
  execSync(`git checkout ${branchName}`, {
413
496
  cwd: process.cwd(),
414
497
  stdio: ["pipe", "pipe", "pipe"],
@@ -418,8 +501,6 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
418
501
  branch: branchName,
419
502
  note: "Branch already existed — resuming experiments on it.",
420
503
  });
421
- } catch {
422
- // Branch doesn't exist — create it
423
504
  }
424
505
 
425
506
  const sourceBranch = gitBranch() || "unknown";
@@ -639,13 +720,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
639
720
  properties: {
640
721
  file: {
641
722
  type: "string",
642
- description:
643
- 'Path to the log file (e.g. "run.log")',
723
+ description: 'Path to the log file (e.g. "run.log")',
644
724
  },
645
725
  patterns: {
646
726
  type: "object",
647
727
  description:
648
- 'Map of metric name to regex pattern with one capture group. ' +
728
+ "Map of metric name to regex pattern with one capture group. " +
649
729
  'Example: {"val_bpb": "val_bpb:\\\\s*([\\\\d.]+)", "memory": "peak_vram_mb:\\\\s*([\\\\d.]+)"}',
650
730
  additionalProperties: { type: "string" },
651
731
  },
@@ -725,7 +805,8 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
725
805
  if (!benchmark) {
726
806
  return JSON.stringify({
727
807
  status: "unavailable",
728
- error: "Benchmark module not found. Make sure cli/benchmark.js exists.",
808
+ error:
809
+ "Benchmark module not found. Make sure cli/benchmark.js exists.",
729
810
  });
730
811
  }
731
812
 
@@ -761,6 +842,11 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
761
842
  ) / 10
762
843
  : 0;
763
844
 
845
+ // Record baseline on first run
846
+ if (sessionBaselineScore === null) {
847
+ sessionBaselineScore = avgScore;
848
+ }
849
+
764
850
  // Find weakest category across all models
765
851
  const categoryTotals = {};
766
852
  const categoryCounts = {};
@@ -777,8 +863,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
777
863
  }
778
864
 
779
865
  // Sort categories by score to find weakest
780
- const sortedCategories = Object.entries(categoryAvgs)
781
- .sort((a, b) => a[1] - b[1]);
866
+ const sortedCategories = Object.entries(categoryAvgs).sort(
867
+ (a, b) => a[1] - b[1],
868
+ );
782
869
 
783
870
  const weakestCategory =
784
871
  sortedCategories.length > 0 ? sortedCategories[0] : null;
@@ -799,8 +886,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
799
886
  return JSON.stringify({
800
887
  status: "benchmark_failed",
801
888
  error: err.message,
802
- elapsed_seconds:
803
- parseFloat(((Date.now() - start) / 1000).toFixed(1)),
889
+ elapsed_seconds: parseFloat(
890
+ ((Date.now() - start) / 1000).toFixed(1),
891
+ ),
804
892
  });
805
893
  }
806
894
  },
@@ -863,6 +951,21 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
863
951
  },
864
952
  execute: async (args) => {
865
953
  loadExperiments();
954
+
955
+ // Enforce keep/revert decision against session baseline
956
+ if (
957
+ sessionBaselineScore !== null &&
958
+ typeof args.metric === "number" &&
959
+ args.kept === true &&
960
+ args.metric < sessionBaselineScore
961
+ ) {
962
+ console.log(
963
+ `\x1b[31m ⚠ Score ${args.metric} < baseline ${sessionBaselineScore} — overriding kept=true to kept=false\x1b[0m`,
964
+ );
965
+ args.kept = false;
966
+ args.status = "discard";
967
+ }
968
+
866
969
  const commit = gitHash();
867
970
  const entry = {
868
971
  id: experiments.length + 1,
@@ -891,7 +994,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
891
994
 
892
995
  const keptCount = experiments.filter((e) => e.kept).length;
893
996
  const revertedCount = experiments.filter((e) => !e.kept).length;
894
- const statusIcon = args.kept ? "\x1b[32m✔ KEPT\x1b[0m" : "\x1b[31m✘ REVERTED\x1b[0m";
997
+ const statusIcon = args.kept
998
+ ? "\x1b[32m✔ KEPT\x1b[0m"
999
+ : "\x1b[31m✘ REVERTED\x1b[0m";
895
1000
  const delta =
896
1001
  prev != null && typeof args.metric === "number"
897
1002
  ? ` (${args.metric > prev ? "+" : ""}${(args.metric - prev).toFixed(1)} pts)`
@@ -1019,6 +1124,23 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
1019
1124
  }
1020
1125
  },
1021
1126
  },
1127
+ {
1128
+ type: "function",
1129
+ function: {
1130
+ name: "ar_watch_status",
1131
+ description:
1132
+ "Get the current status of the background file watcher (watch mode). " +
1133
+ "Returns whether watch mode is active, the test command, and recent failure count.",
1134
+ parameters: { type: "object", properties: {} },
1135
+ },
1136
+ execute: async () => {
1137
+ return JSON.stringify({
1138
+ active: !!_watchProcess,
1139
+ testCommand: _watchTestCommand,
1140
+ pid: _watchProcess ? _watchProcess.pid : null,
1141
+ });
1142
+ },
1143
+ },
1022
1144
  {
1023
1145
  type: "function",
1024
1146
  function: {
@@ -1062,3 +1184,112 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
1062
1184
  },
1063
1185
  ],
1064
1186
  };
1187
+
1188
+ // ─── Watch Mode Implementation ──────────────────────────────────
1189
+
1190
+ /**
1191
+ * Start a background file watcher using fs.watch (recursive).
1192
+ * On file changes, debounces and runs the test command.
1193
+ * If tests fail, injects a mid-run note into the agent conversation.
1194
+ * @param {string} watchPath — directory to watch
1195
+ * @param {string} testCommand — shell command to run on changes
1196
+ */
1197
+ function startWatch(watchPath, testCommand) {
1198
+ if (_watchProcess) stopWatch();
1199
+
1200
+ const ignorePatterns = [
1201
+ /node_modules/,
1202
+ /\.git\//,
1203
+ /\.nex\//,
1204
+ /dist\//,
1205
+ /\.log$/,
1206
+ /\.tmp$/,
1207
+ ];
1208
+
1209
+ try {
1210
+ const watcher = fs.watch(
1211
+ watchPath,
1212
+ { recursive: true },
1213
+ (eventType, filename) => {
1214
+ if (!filename) return;
1215
+ // Skip ignored paths
1216
+ if (ignorePatterns.some((p) => p.test(filename))) return;
1217
+ // Skip non-source files
1218
+ if (
1219
+ !/\.(js|ts|jsx|tsx|py|rb|go|rs|json|yaml|yml|toml|cfg|ini|sh|css|html)$/.test(
1220
+ filename,
1221
+ )
1222
+ )
1223
+ return;
1224
+
1225
+ // Debounce: wait for changes to settle
1226
+ if (_watchDebounceTimer) clearTimeout(_watchDebounceTimer);
1227
+ _watchDebounceTimer = setTimeout(() => {
1228
+ _runWatchTest(testCommand, filename);
1229
+ }, WATCH_DEBOUNCE_MS);
1230
+ },
1231
+ );
1232
+
1233
+ _watchProcess = watcher;
1234
+
1235
+ // Clean up on process exit
1236
+ const cleanup = () => stopWatch();
1237
+ process.on("exit", cleanup);
1238
+ process.on("SIGINT", cleanup);
1239
+ } catch (err) {
1240
+ console.error(`Watch mode failed to start: ${err.message}`);
1241
+ _watchProcess = null;
1242
+ }
1243
+ }
1244
+
1245
+ /**
1246
+ * Stop the background file watcher.
1247
+ */
1248
+ function stopWatch() {
1249
+ if (_watchProcess) {
1250
+ try {
1251
+ _watchProcess.close();
1252
+ } catch {
1253
+ /* already closed */
1254
+ }
1255
+ _watchProcess = null;
1256
+ }
1257
+ if (_watchDebounceTimer) {
1258
+ clearTimeout(_watchDebounceTimer);
1259
+ _watchDebounceTimer = null;
1260
+ }
1261
+ _watchTestCommand = null;
1262
+ }
1263
+
1264
+ /**
1265
+ * Run the test command and handle failures.
1266
+ * @param {string} testCommand
1267
+ * @param {string} changedFile — file that triggered the watch
1268
+ */
1269
+ function _runWatchTest(testCommand, changedFile) {
1270
+ try {
1271
+ execSync(testCommand, {
1272
+ cwd: process.cwd(),
1273
+ stdio: "pipe",
1274
+ timeout: 120000, // 2 minute timeout
1275
+ encoding: "utf-8",
1276
+ });
1277
+ // Tests passed — no action needed
1278
+ } catch (err) {
1279
+ // Tests failed — notify the agent
1280
+ const output = (err.stdout || "") + (err.stderr || "");
1281
+ const truncatedOutput = output.slice(-500); // Last 500 chars of error
1282
+ const failureNote = `[WATCH MODE] Test failure detected after change to ${changedFile}:\n${truncatedOutput}`;
1283
+
1284
+ // Try to inject a note into the agent conversation
1285
+ try {
1286
+ const { injectMidRunNote } = require("../agent");
1287
+ injectMidRunNote(failureNote);
1288
+ } catch {
1289
+ // Agent not in active conversation — just log
1290
+ process.stderr.write(
1291
+ `\n\x1b[33m⚠ Watch: tests failed after ${changedFile} changed\x1b[0m\n`,
1292
+ );
1293
+ }
1294
+ }
1295
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nex-code",
3
- "version": "0.4.38",
3
+ "version": "0.4.40",
4
4
  "description": "Run 400B+ open coding models on your codebase without the hardware bill. Ollama Cloud first — OpenAI, Anthropic, and Gemini when you need them.",
5
5
  "bin": {
6
6
  "nex-code": "./dist/nex-code.js"
@@ -16,8 +16,8 @@
16
16
  },
17
17
  "scripts": {
18
18
  "start": "node dist/nex-code.js",
19
- "build": "esbuild bin/nex-code.js --bundle --platform=node --target=node18 --outfile=dist/nex-code.js --minify --external:axios --external:dotenv --external:playwright && esbuild cli/benchmark.js --bundle --platform=node --target=node18 --outfile=dist/benchmark.js --minify --external:axios --external:dotenv --external:playwright && rm -rf dist/skills && cp -r cli/skills dist/skills",
20
- "dev": "esbuild bin/nex-code.js --bundle --platform=node --target=node18 --outfile=dist/nex-code.js --external:axios --external:dotenv --external:playwright --watch",
19
+ "build": "node scripts/build.js",
20
+ "dev": "node scripts/build.js --dev",
21
21
  "test": "jest --forceExit",
22
22
  "test:orchestrator": "jest tests/orchestrator.test.js --forceExit",
23
23
  "coverage": "jest --coverage --forceExit",
@@ -41,15 +41,12 @@
41
41
  "ollama-cloud",
42
42
  "openai",
43
43
  "anthropic",
44
- "claude",
45
44
  "gemini",
46
45
  "llm",
47
46
  "gpt",
48
47
  "agentic",
49
48
  "terminal",
50
49
  "coding-assistant",
51
- "claude-code-alternative",
52
- "gemini-cli-alternative",
53
50
  "open-source",
54
51
  "free",
55
52
  "qwen3",