nex-code 0.4.38 → 0.4.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -41
- package/dist/benchmark.js +419 -378
- package/dist/nex-code.js +698 -632
- package/dist/skills/autoresearch.js +249 -18
- package/package.json +3 -6
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* - No iteration cap by default — runs until stopped
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
const { execSync } = require("child_process");
|
|
17
|
+
const { execSync, spawn } = require("child_process");
|
|
18
18
|
const fs = require("fs");
|
|
19
19
|
const path = require("path");
|
|
20
20
|
|
|
@@ -44,6 +44,14 @@ function getBenchmark() {
|
|
|
44
44
|
// Track experiment history within the session
|
|
45
45
|
let experiments = [];
|
|
46
46
|
let loopActive = false;
|
|
47
|
+
let sessionBaselineScore = null; // set on first ar_run_benchmark call
|
|
48
|
+
|
|
49
|
+
// ─── Watch Mode state ───────────────────────────────────────────
|
|
50
|
+
let _watchProcess = null;
|
|
51
|
+
let _watchCallbacks = { onFailure: null };
|
|
52
|
+
let _watchTestCommand = null;
|
|
53
|
+
let _watchDebounceTimer = null;
|
|
54
|
+
const WATCH_DEBOUNCE_MS = 2000;
|
|
47
55
|
|
|
48
56
|
function getLogPath() {
|
|
49
57
|
const dir = path.join(process.cwd(), ".nex", "autoresearch");
|
|
@@ -271,6 +279,8 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
271
279
|
"- If you are blocked from reading a file, SKIP IT and make your edit based on what you already know",
|
|
272
280
|
"- If 3 consecutive experiments fail to improve, change category focus",
|
|
273
281
|
"- Simplicity criterion: prefer removing code over adding it — complexity cost must be justified by metric gain",
|
|
282
|
+
"- cli/tools/index.js uses single-quoted JS strings — when editing descriptions, use ONLY single quotes inside the text, or escape double quotes as \\\\\" — NEVER put a raw double quote inside a JS string literal or tests will fail with SyntaxError",
|
|
283
|
+
"- Before every Edit call, grep the EXACT old_text from the file first so it matches byte-for-byte",
|
|
274
284
|
"",
|
|
275
285
|
"### How the benchmark score works — READ THIS FIRST",
|
|
276
286
|
"The benchmark sends nex-code's TOOL_DEFINITIONS (schemas) to external models and checks:",
|
|
@@ -362,6 +372,53 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
362
372
|
console.log("Autoresearch history cleared.");
|
|
363
373
|
},
|
|
364
374
|
},
|
|
375
|
+
{
|
|
376
|
+
cmd: "/ar-watch",
|
|
377
|
+
desc: "Start/stop background file watcher that auto-runs tests on changes",
|
|
378
|
+
handler: (args) => {
|
|
379
|
+
// Check feature flag
|
|
380
|
+
let watchEnabled = false;
|
|
381
|
+
try {
|
|
382
|
+
const { feature } = require("../feature-flags");
|
|
383
|
+
watchEnabled = feature("WATCH_MODE");
|
|
384
|
+
} catch {
|
|
385
|
+
// feature-flags not available — check env
|
|
386
|
+
watchEnabled =
|
|
387
|
+
process.env.NEX_FEATURE_WATCH_MODE === "1" ||
|
|
388
|
+
process.env.NEX_FEATURE_WATCH_MODE === "true";
|
|
389
|
+
}
|
|
390
|
+
if (!watchEnabled) {
|
|
391
|
+
console.log(
|
|
392
|
+
"Watch mode is disabled. Enable with NEX_FEATURE_WATCH_MODE=1",
|
|
393
|
+
);
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const cmd = args.trim();
|
|
398
|
+
if (cmd === "stop" || cmd === "off") {
|
|
399
|
+
stopWatch();
|
|
400
|
+
console.log("Watch mode stopped.");
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
if (_watchProcess) {
|
|
404
|
+
console.log(
|
|
405
|
+
"Watch mode is already running. Use /ar-watch stop to stop it.",
|
|
406
|
+
);
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Parse: /ar-watch <test command> [--watch-path <glob>]
|
|
411
|
+
const testCommand = cmd || "npm test";
|
|
412
|
+
const watchPath = process.cwd();
|
|
413
|
+
_watchTestCommand = testCommand;
|
|
414
|
+
|
|
415
|
+
startWatch(watchPath, testCommand);
|
|
416
|
+
console.log(`Watch mode started. Monitoring ${watchPath} for changes.`);
|
|
417
|
+
console.log(`Test command: ${testCommand}`);
|
|
418
|
+
console.log("On test failure, the agent will auto-investigate.");
|
|
419
|
+
console.log("Use /ar-watch stop to stop.\n");
|
|
420
|
+
},
|
|
421
|
+
},
|
|
365
422
|
],
|
|
366
423
|
|
|
367
424
|
tools: [
|
|
@@ -389,11 +446,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
389
446
|
execute: async (args) => {
|
|
390
447
|
// Strip any date-like suffix the model may have hallucinated, then
|
|
391
448
|
// append today's real date so the branch name is always accurate.
|
|
392
|
-
const baseTag =
|
|
393
|
-
.
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
449
|
+
const baseTag =
|
|
450
|
+
(args.tag || "self-improve")
|
|
451
|
+
.replace(/[^a-zA-Z0-9_-]/g, "-")
|
|
452
|
+
.replace(/-?\d{4,8}$/, "") // strip trailing YYYYMMDD / YYYYMM / etc.
|
|
453
|
+
.replace(/-[a-z]{3}\d{1,2}$/i, "") // strip trailing mon## (e.g. apr15)
|
|
454
|
+
.replace(/-+$/, "") || "self-improve";
|
|
397
455
|
const now = new Date();
|
|
398
456
|
const dateStr =
|
|
399
457
|
now.toLocaleString("en", { month: "short" }).toLowerCase() +
|
|
@@ -402,13 +460,38 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
402
460
|
const branchName = `autoresearch/${tag}`;
|
|
403
461
|
|
|
404
462
|
try {
|
|
463
|
+
const currentBranch = gitBranch();
|
|
464
|
+
// If we're already on the target branch, nothing to do
|
|
465
|
+
if (currentBranch === branchName) {
|
|
466
|
+
return JSON.stringify({
|
|
467
|
+
status: "resumed",
|
|
468
|
+
branch: branchName,
|
|
469
|
+
note: "Already on autoresearch branch — continuing experiments.",
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
|
|
405
473
|
// Check if branch already exists
|
|
474
|
+
let branchExists = false;
|
|
406
475
|
try {
|
|
407
476
|
execSync(`git rev-parse --verify ${branchName}`, {
|
|
408
477
|
cwd: process.cwd(),
|
|
409
478
|
stdio: ["pipe", "pipe", "pipe"],
|
|
410
479
|
});
|
|
411
|
-
|
|
480
|
+
branchExists = true;
|
|
481
|
+
} catch {
|
|
482
|
+
// Branch doesn't exist
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (branchExists) {
|
|
486
|
+
// Stash any uncommitted changes before switching
|
|
487
|
+
try {
|
|
488
|
+
execSync(`git stash`, {
|
|
489
|
+
cwd: process.cwd(),
|
|
490
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
491
|
+
});
|
|
492
|
+
} catch {
|
|
493
|
+
// Ignore stash errors (nothing to stash)
|
|
494
|
+
}
|
|
412
495
|
execSync(`git checkout ${branchName}`, {
|
|
413
496
|
cwd: process.cwd(),
|
|
414
497
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -418,8 +501,6 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
418
501
|
branch: branchName,
|
|
419
502
|
note: "Branch already existed — resuming experiments on it.",
|
|
420
503
|
});
|
|
421
|
-
} catch {
|
|
422
|
-
// Branch doesn't exist — create it
|
|
423
504
|
}
|
|
424
505
|
|
|
425
506
|
const sourceBranch = gitBranch() || "unknown";
|
|
@@ -639,13 +720,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
639
720
|
properties: {
|
|
640
721
|
file: {
|
|
641
722
|
type: "string",
|
|
642
|
-
description:
|
|
643
|
-
'Path to the log file (e.g. "run.log")',
|
|
723
|
+
description: 'Path to the log file (e.g. "run.log")',
|
|
644
724
|
},
|
|
645
725
|
patterns: {
|
|
646
726
|
type: "object",
|
|
647
727
|
description:
|
|
648
|
-
|
|
728
|
+
"Map of metric name to regex pattern with one capture group. " +
|
|
649
729
|
'Example: {"val_bpb": "val_bpb:\\\\s*([\\\\d.]+)", "memory": "peak_vram_mb:\\\\s*([\\\\d.]+)"}',
|
|
650
730
|
additionalProperties: { type: "string" },
|
|
651
731
|
},
|
|
@@ -725,7 +805,8 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
725
805
|
if (!benchmark) {
|
|
726
806
|
return JSON.stringify({
|
|
727
807
|
status: "unavailable",
|
|
728
|
-
error:
|
|
808
|
+
error:
|
|
809
|
+
"Benchmark module not found. Make sure cli/benchmark.js exists.",
|
|
729
810
|
});
|
|
730
811
|
}
|
|
731
812
|
|
|
@@ -761,6 +842,11 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
761
842
|
) / 10
|
|
762
843
|
: 0;
|
|
763
844
|
|
|
845
|
+
// Record baseline on first run
|
|
846
|
+
if (sessionBaselineScore === null) {
|
|
847
|
+
sessionBaselineScore = avgScore;
|
|
848
|
+
}
|
|
849
|
+
|
|
764
850
|
// Find weakest category across all models
|
|
765
851
|
const categoryTotals = {};
|
|
766
852
|
const categoryCounts = {};
|
|
@@ -777,8 +863,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
777
863
|
}
|
|
778
864
|
|
|
779
865
|
// Sort categories by score to find weakest
|
|
780
|
-
const sortedCategories = Object.entries(categoryAvgs)
|
|
781
|
-
|
|
866
|
+
const sortedCategories = Object.entries(categoryAvgs).sort(
|
|
867
|
+
(a, b) => a[1] - b[1],
|
|
868
|
+
);
|
|
782
869
|
|
|
783
870
|
const weakestCategory =
|
|
784
871
|
sortedCategories.length > 0 ? sortedCategories[0] : null;
|
|
@@ -799,8 +886,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
799
886
|
return JSON.stringify({
|
|
800
887
|
status: "benchmark_failed",
|
|
801
888
|
error: err.message,
|
|
802
|
-
elapsed_seconds:
|
|
803
|
-
|
|
889
|
+
elapsed_seconds: parseFloat(
|
|
890
|
+
((Date.now() - start) / 1000).toFixed(1),
|
|
891
|
+
),
|
|
804
892
|
});
|
|
805
893
|
}
|
|
806
894
|
},
|
|
@@ -863,6 +951,21 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
863
951
|
},
|
|
864
952
|
execute: async (args) => {
|
|
865
953
|
loadExperiments();
|
|
954
|
+
|
|
955
|
+
// Enforce keep/revert decision against session baseline
|
|
956
|
+
if (
|
|
957
|
+
sessionBaselineScore !== null &&
|
|
958
|
+
typeof args.metric === "number" &&
|
|
959
|
+
args.kept === true &&
|
|
960
|
+
args.metric < sessionBaselineScore
|
|
961
|
+
) {
|
|
962
|
+
console.log(
|
|
963
|
+
`\x1b[31m ⚠ Score ${args.metric} < baseline ${sessionBaselineScore} — overriding kept=true to kept=false\x1b[0m`,
|
|
964
|
+
);
|
|
965
|
+
args.kept = false;
|
|
966
|
+
args.status = "discard";
|
|
967
|
+
}
|
|
968
|
+
|
|
866
969
|
const commit = gitHash();
|
|
867
970
|
const entry = {
|
|
868
971
|
id: experiments.length + 1,
|
|
@@ -891,7 +994,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
891
994
|
|
|
892
995
|
const keptCount = experiments.filter((e) => e.kept).length;
|
|
893
996
|
const revertedCount = experiments.filter((e) => !e.kept).length;
|
|
894
|
-
const statusIcon = args.kept
|
|
997
|
+
const statusIcon = args.kept
|
|
998
|
+
? "\x1b[32m✔ KEPT\x1b[0m"
|
|
999
|
+
: "\x1b[31m✘ REVERTED\x1b[0m";
|
|
895
1000
|
const delta =
|
|
896
1001
|
prev != null && typeof args.metric === "number"
|
|
897
1002
|
? ` (${args.metric > prev ? "+" : ""}${(args.metric - prev).toFixed(1)} pts)`
|
|
@@ -1019,6 +1124,23 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
1019
1124
|
}
|
|
1020
1125
|
},
|
|
1021
1126
|
},
|
|
1127
|
+
{
|
|
1128
|
+
type: "function",
|
|
1129
|
+
function: {
|
|
1130
|
+
name: "ar_watch_status",
|
|
1131
|
+
description:
|
|
1132
|
+
"Get the current status of the background file watcher (watch mode). " +
|
|
1133
|
+
"Returns whether watch mode is active, the test command, and recent failure count.",
|
|
1134
|
+
parameters: { type: "object", properties: {} },
|
|
1135
|
+
},
|
|
1136
|
+
execute: async () => {
|
|
1137
|
+
return JSON.stringify({
|
|
1138
|
+
active: !!_watchProcess,
|
|
1139
|
+
testCommand: _watchTestCommand,
|
|
1140
|
+
pid: _watchProcess ? _watchProcess.pid : null,
|
|
1141
|
+
});
|
|
1142
|
+
},
|
|
1143
|
+
},
|
|
1022
1144
|
{
|
|
1023
1145
|
type: "function",
|
|
1024
1146
|
function: {
|
|
@@ -1062,3 +1184,112 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
|
|
|
1062
1184
|
},
|
|
1063
1185
|
],
|
|
1064
1186
|
};
|
|
1187
|
+
|
|
1188
|
+
// ─── Watch Mode Implementation ──────────────────────────────────
|
|
1189
|
+
|
|
1190
|
+
/**
|
|
1191
|
+
* Start a background file watcher using fs.watch (recursive).
|
|
1192
|
+
* On file changes, debounces and runs the test command.
|
|
1193
|
+
* If tests fail, injects a mid-run note into the agent conversation.
|
|
1194
|
+
* @param {string} watchPath — directory to watch
|
|
1195
|
+
* @param {string} testCommand — shell command to run on changes
|
|
1196
|
+
*/
|
|
1197
|
+
function startWatch(watchPath, testCommand) {
|
|
1198
|
+
if (_watchProcess) stopWatch();
|
|
1199
|
+
|
|
1200
|
+
const ignorePatterns = [
|
|
1201
|
+
/node_modules/,
|
|
1202
|
+
/\.git\//,
|
|
1203
|
+
/\.nex\//,
|
|
1204
|
+
/dist\//,
|
|
1205
|
+
/\.log$/,
|
|
1206
|
+
/\.tmp$/,
|
|
1207
|
+
];
|
|
1208
|
+
|
|
1209
|
+
try {
|
|
1210
|
+
const watcher = fs.watch(
|
|
1211
|
+
watchPath,
|
|
1212
|
+
{ recursive: true },
|
|
1213
|
+
(eventType, filename) => {
|
|
1214
|
+
if (!filename) return;
|
|
1215
|
+
// Skip ignored paths
|
|
1216
|
+
if (ignorePatterns.some((p) => p.test(filename))) return;
|
|
1217
|
+
// Skip non-source files
|
|
1218
|
+
if (
|
|
1219
|
+
!/\.(js|ts|jsx|tsx|py|rb|go|rs|json|yaml|yml|toml|cfg|ini|sh|css|html)$/.test(
|
|
1220
|
+
filename,
|
|
1221
|
+
)
|
|
1222
|
+
)
|
|
1223
|
+
return;
|
|
1224
|
+
|
|
1225
|
+
// Debounce: wait for changes to settle
|
|
1226
|
+
if (_watchDebounceTimer) clearTimeout(_watchDebounceTimer);
|
|
1227
|
+
_watchDebounceTimer = setTimeout(() => {
|
|
1228
|
+
_runWatchTest(testCommand, filename);
|
|
1229
|
+
}, WATCH_DEBOUNCE_MS);
|
|
1230
|
+
},
|
|
1231
|
+
);
|
|
1232
|
+
|
|
1233
|
+
_watchProcess = watcher;
|
|
1234
|
+
|
|
1235
|
+
// Clean up on process exit
|
|
1236
|
+
const cleanup = () => stopWatch();
|
|
1237
|
+
process.on("exit", cleanup);
|
|
1238
|
+
process.on("SIGINT", cleanup);
|
|
1239
|
+
} catch (err) {
|
|
1240
|
+
console.error(`Watch mode failed to start: ${err.message}`);
|
|
1241
|
+
_watchProcess = null;
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
/**
|
|
1246
|
+
* Stop the background file watcher.
|
|
1247
|
+
*/
|
|
1248
|
+
function stopWatch() {
|
|
1249
|
+
if (_watchProcess) {
|
|
1250
|
+
try {
|
|
1251
|
+
_watchProcess.close();
|
|
1252
|
+
} catch {
|
|
1253
|
+
/* already closed */
|
|
1254
|
+
}
|
|
1255
|
+
_watchProcess = null;
|
|
1256
|
+
}
|
|
1257
|
+
if (_watchDebounceTimer) {
|
|
1258
|
+
clearTimeout(_watchDebounceTimer);
|
|
1259
|
+
_watchDebounceTimer = null;
|
|
1260
|
+
}
|
|
1261
|
+
_watchTestCommand = null;
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
/**
|
|
1265
|
+
* Run the test command and handle failures.
|
|
1266
|
+
* @param {string} testCommand
|
|
1267
|
+
* @param {string} changedFile — file that triggered the watch
|
|
1268
|
+
*/
|
|
1269
|
+
function _runWatchTest(testCommand, changedFile) {
|
|
1270
|
+
try {
|
|
1271
|
+
execSync(testCommand, {
|
|
1272
|
+
cwd: process.cwd(),
|
|
1273
|
+
stdio: "pipe",
|
|
1274
|
+
timeout: 120000, // 2 minute timeout
|
|
1275
|
+
encoding: "utf-8",
|
|
1276
|
+
});
|
|
1277
|
+
// Tests passed — no action needed
|
|
1278
|
+
} catch (err) {
|
|
1279
|
+
// Tests failed — notify the agent
|
|
1280
|
+
const output = (err.stdout || "") + (err.stderr || "");
|
|
1281
|
+
const truncatedOutput = output.slice(-500); // Last 500 chars of error
|
|
1282
|
+
const failureNote = `[WATCH MODE] Test failure detected after change to ${changedFile}:\n${truncatedOutput}`;
|
|
1283
|
+
|
|
1284
|
+
// Try to inject a note into the agent conversation
|
|
1285
|
+
try {
|
|
1286
|
+
const { injectMidRunNote } = require("../agent");
|
|
1287
|
+
injectMidRunNote(failureNote);
|
|
1288
|
+
} catch {
|
|
1289
|
+
// Agent not in active conversation — just log
|
|
1290
|
+
process.stderr.write(
|
|
1291
|
+
`\n\x1b[33m⚠ Watch: tests failed after ${changedFile} changed\x1b[0m\n`,
|
|
1292
|
+
);
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nex-code",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.40",
|
|
4
4
|
"description": "Run 400B+ open coding models on your codebase without the hardware bill. Ollama Cloud first — OpenAI, Anthropic, and Gemini when you need them.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"nex-code": "./dist/nex-code.js"
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
},
|
|
17
17
|
"scripts": {
|
|
18
18
|
"start": "node dist/nex-code.js",
|
|
19
|
-
"build": "
|
|
20
|
-
"dev": "
|
|
19
|
+
"build": "node scripts/build.js",
|
|
20
|
+
"dev": "node scripts/build.js --dev",
|
|
21
21
|
"test": "jest --forceExit",
|
|
22
22
|
"test:orchestrator": "jest tests/orchestrator.test.js --forceExit",
|
|
23
23
|
"coverage": "jest --coverage --forceExit",
|
|
@@ -41,15 +41,12 @@
|
|
|
41
41
|
"ollama-cloud",
|
|
42
42
|
"openai",
|
|
43
43
|
"anthropic",
|
|
44
|
-
"claude",
|
|
45
44
|
"gemini",
|
|
46
45
|
"llm",
|
|
47
46
|
"gpt",
|
|
48
47
|
"agentic",
|
|
49
48
|
"terminal",
|
|
50
49
|
"coding-assistant",
|
|
51
|
-
"claude-code-alternative",
|
|
52
|
-
"gemini-cli-alternative",
|
|
53
50
|
"open-source",
|
|
54
51
|
"free",
|
|
55
52
|
"qwen3",
|