npm - nex-code - Versions diffs - 0.4.37 → 0.4.39 - Mend

nex-code 0.4.37 → 0.4.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +41 -41
package/dist/benchmark.js +397 -360
package/dist/nex-code.js +664 -621
package/dist/skills/autoresearch.js +252 -18
package/package.json +3 -6

package/dist/skills/autoresearch.js CHANGED Viewed

@@ -14,7 +14,7 @@
  * - No iteration cap by default — runs until stopped
  */
-const { execSync } = require("child_process");
+const { execSync, spawn } = require("child_process");
 const fs = require("fs");
 const path = require("path");
@@ -44,6 +44,14 @@ function getBenchmark() {
 // Track experiment history within the session
 let experiments = [];
 let loopActive = false;
+let sessionBaselineScore = null; // set on first ar_run_benchmark call
+// ─── Watch Mode state ───────────────────────────────────────────
+let _watchProcess = null;
+let _watchCallbacks = { onFailure: null };
+let _watchTestCommand = null;
+let _watchDebounceTimer = null;
+const WATCH_DEBOUNCE_MS = 2000;
 function getLogPath() {
   const dir = path.join(process.cwd(), ".nex", "autoresearch");
@@ -263,11 +271,16 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
           "4. If score same or worse: ar_revert, go to Step A with a different hypothesis",
           "",
           "### HARD RULES",
+          "- EVERY response must end with a tool call — NEVER output text as your final message",
+          "- After ar_revert: call ar_checkpoint immediately in the same response as your next hypothesis",
+          "- After ar_run_benchmark: call ar_log_experiment immediately in the same response",
           "- NEVER read more than 2 files per experiment cycle",
           "- NEVER use bash to create branches or run benchmarks — use ar_ tools only",
           "- If you are blocked from reading a file, SKIP IT and make your edit based on what you already know",
           "- If 3 consecutive experiments fail to improve, change category focus",
           "- Simplicity criterion: prefer removing code over adding it — complexity cost must be justified by metric gain",
+          "- cli/tools/index.js uses single-quoted JS strings — when editing descriptions, use ONLY single quotes inside the text, or escape double quotes as \\\\\" — NEVER put a raw double quote inside a JS string literal or tests will fail with SyntaxError",
+          "- Before every Edit call, grep the EXACT old_text from the file first so it matches byte-for-byte",
           "",
           "### How the benchmark score works — READ THIS FIRST",
           "The benchmark sends nex-code's TOOL_DEFINITIONS (schemas) to external models and checks:",
@@ -359,6 +372,53 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
         console.log("Autoresearch history cleared.");
       },
     },
+    {
+      cmd: "/ar-watch",
+      desc: "Start/stop background file watcher that auto-runs tests on changes",
+      handler: (args) => {
+        // Check feature flag
+        let watchEnabled = false;
+        try {
+          const { feature } = require("../feature-flags");
+          watchEnabled = feature("WATCH_MODE");
+        } catch {
+          // feature-flags not available — check env
+          watchEnabled =
+            process.env.NEX_FEATURE_WATCH_MODE === "1" ||
+            process.env.NEX_FEATURE_WATCH_MODE === "true";
+        }
+        if (!watchEnabled) {
+          console.log(
+            "Watch mode is disabled. Enable with NEX_FEATURE_WATCH_MODE=1",
+          );
+          return;
+        }
+        const cmd = args.trim();
+        if (cmd === "stop" || cmd === "off") {
+          stopWatch();
+          console.log("Watch mode stopped.");
+          return;
+        }
+        if (_watchProcess) {
+          console.log(
+            "Watch mode is already running. Use /ar-watch stop to stop it.",
+          );
+          return;
+        }
+        // Parse: /ar-watch <test command> [--watch-path <glob>]
+        const testCommand = cmd || "npm test";
+        const watchPath = process.cwd();
+        _watchTestCommand = testCommand;
+        startWatch(watchPath, testCommand);
+        console.log(`Watch mode started. Monitoring ${watchPath} for changes.`);
+        console.log(`Test command: ${testCommand}`);
+        console.log("On test failure, the agent will auto-investigate.");
+        console.log("Use /ar-watch stop to stop.\n");
+      },
+    },
   ],
   tools: [
@@ -386,11 +446,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
       execute: async (args) => {
         // Strip any date-like suffix the model may have hallucinated, then
         // append today's real date so the branch name is always accurate.
-        const baseTag = (args.tag || "self-improve")
-          .replace(/[^a-zA-Z0-9_-]/g, "-")
-          .replace(/-?\d{4,8}$/, "") // strip trailing YYYYMMDD / YYYYMM / etc.
-          .replace(/-[a-z]{3}\d{1,2}$/i, "") // strip trailing mon## (e.g. apr15)
-          .replace(/-+$/, "") || "self-improve";
+        const baseTag =
+          (args.tag || "self-improve")
+            .replace(/[^a-zA-Z0-9_-]/g, "-")
+            .replace(/-?\d{4,8}$/, "") // strip trailing YYYYMMDD / YYYYMM / etc.
+            .replace(/-[a-z]{3}\d{1,2}$/i, "") // strip trailing mon## (e.g. apr15)
+            .replace(/-+$/, "") || "self-improve";
         const now = new Date();
         const dateStr =
           now.toLocaleString("en", { month: "short" }).toLowerCase() +
@@ -399,13 +460,38 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
         const branchName = `autoresearch/${tag}`;
         try {
+          const currentBranch = gitBranch();
+          // If we're already on the target branch, nothing to do
+          if (currentBranch === branchName) {
+            return JSON.stringify({
+              status: "resumed",
+              branch: branchName,
+              note: "Already on autoresearch branch — continuing experiments.",
+            });
+          }
           // Check if branch already exists
+          let branchExists = false;
           try {
             execSync(`git rev-parse --verify ${branchName}`, {
               cwd: process.cwd(),
               stdio: ["pipe", "pipe", "pipe"],
             });
-            // Branch exists — check it out
+            branchExists = true;
+          } catch {
+            // Branch doesn't exist
+          }
+          if (branchExists) {
+            // Stash any uncommitted changes before switching
+            try {
+              execSync(`git stash`, {
+                cwd: process.cwd(),
+                stdio: ["pipe", "pipe", "pipe"],
+              });
+            } catch {
+              // Ignore stash errors (nothing to stash)
+            }
             execSync(`git checkout ${branchName}`, {
               cwd: process.cwd(),
               stdio: ["pipe", "pipe", "pipe"],
@@ -415,8 +501,6 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
               branch: branchName,
               note: "Branch already existed — resuming experiments on it.",
             });
-          } catch {
-            // Branch doesn't exist — create it
           }
           const sourceBranch = gitBranch() || "unknown";
@@ -636,13 +720,12 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
           properties: {
             file: {
               type: "string",
-              description:
-                'Path to the log file (e.g. "run.log")',
+              description: 'Path to the log file (e.g. "run.log")',
             },
             patterns: {
               type: "object",
               description:
-                'Map of metric name to regex pattern with one capture group. ' +
+                "Map of metric name to regex pattern with one capture group. " +
                 'Example: {"val_bpb": "val_bpb:\\\\s*([\\\\d.]+)", "memory": "peak_vram_mb:\\\\s*([\\\\d.]+)"}',
               additionalProperties: { type: "string" },
             },
@@ -722,7 +805,8 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
         if (!benchmark) {
           return JSON.stringify({
             status: "unavailable",
-            error: "Benchmark module not found. Make sure cli/benchmark.js exists.",
+            error:
+              "Benchmark module not found. Make sure cli/benchmark.js exists.",
           });
         }
@@ -758,6 +842,11 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
                 ) / 10
               : 0;
+          // Record baseline on first run
+          if (sessionBaselineScore === null) {
+            sessionBaselineScore = avgScore;
+          }
           // Find weakest category across all models
           const categoryTotals = {};
           const categoryCounts = {};
@@ -774,8 +863,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
           }
           // Sort categories by score to find weakest
-          const sortedCategories = Object.entries(categoryAvgs)
-            .sort((a, b) => a[1] - b[1]);
+          const sortedCategories = Object.entries(categoryAvgs).sort(
+            (a, b) => a[1] - b[1],
+          );
           const weakestCategory =
             sortedCategories.length > 0 ? sortedCategories[0] : null;
@@ -796,8 +886,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
           return JSON.stringify({
             status: "benchmark_failed",
             error: err.message,
-            elapsed_seconds:
-              parseFloat(((Date.now() - start) / 1000).toFixed(1)),
+            elapsed_seconds: parseFloat(
+              ((Date.now() - start) / 1000).toFixed(1),
+            ),
           });
         }
       },
@@ -860,6 +951,21 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
       },
       execute: async (args) => {
         loadExperiments();
+        // Enforce keep/revert decision against session baseline
+        if (
+          sessionBaselineScore !== null &&
+          typeof args.metric === "number" &&
+          args.kept === true &&
+          args.metric < sessionBaselineScore
+        ) {
+          console.log(
+            `\x1b[31m   ⚠ Score ${args.metric} < baseline ${sessionBaselineScore} — overriding kept=true to kept=false\x1b[0m`,
+          );
+          args.kept = false;
+          args.status = "discard";
+        }
         const commit = gitHash();
         const entry = {
           id: experiments.length + 1,
@@ -888,7 +994,9 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
         const keptCount = experiments.filter((e) => e.kept).length;
         const revertedCount = experiments.filter((e) => !e.kept).length;
-        const statusIcon = args.kept ? "\x1b[32m✔ KEPT\x1b[0m" : "\x1b[31m✘ REVERTED\x1b[0m";
+        const statusIcon = args.kept
+          ? "\x1b[32m✔ KEPT\x1b[0m"
+          : "\x1b[31m✘ REVERTED\x1b[0m";
         const delta =
           prev != null && typeof args.metric === "number"
             ? ` (${args.metric > prev ? "+" : ""}${(args.metric - prev).toFixed(1)} pts)`
@@ -1016,6 +1124,23 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
         }
       },
     },
+    {
+      type: "function",
+      function: {
+        name: "ar_watch_status",
+        description:
+          "Get the current status of the background file watcher (watch mode). " +
+          "Returns whether watch mode is active, the test command, and recent failure count.",
+        parameters: { type: "object", properties: {} },
+      },
+      execute: async () => {
+        return JSON.stringify({
+          active: !!_watchProcess,
+          testCommand: _watchTestCommand,
+          pid: _watchProcess ? _watchProcess.pid : null,
+        });
+      },
+    },
     {
       type: "function",
       function: {
@@ -1059,3 +1184,112 @@ Use ar_run_experiment with output_file to redirect, then ar_extract_metric to re
     },
   ],
 };
+// ─── Watch Mode Implementation ──────────────────────────────────
+/**
+ * Start a background file watcher using fs.watch (recursive).
+ * On file changes, debounces and runs the test command.
+ * If tests fail, injects a mid-run note into the agent conversation.
+ * @param {string} watchPath — directory to watch
+ * @param {string} testCommand — shell command to run on changes
+ */
+function startWatch(watchPath, testCommand) {
+  if (_watchProcess) stopWatch();
+  const ignorePatterns = [
+    /node_modules/,
+    /\.git\//,
+    /\.nex\//,
+    /dist\//,
+    /\.log$/,
+    /\.tmp$/,
+  ];
+  try {
+    const watcher = fs.watch(
+      watchPath,
+      { recursive: true },
+      (eventType, filename) => {
+        if (!filename) return;
+        // Skip ignored paths
+        if (ignorePatterns.some((p) => p.test(filename))) return;
+        // Skip non-source files
+        if (
+          !/\.(js|ts|jsx|tsx|py|rb|go|rs|json|yaml|yml|toml|cfg|ini|sh|css|html)$/.test(
+            filename,
+          )
+        )
+          return;
+        // Debounce: wait for changes to settle
+        if (_watchDebounceTimer) clearTimeout(_watchDebounceTimer);
+        _watchDebounceTimer = setTimeout(() => {
+          _runWatchTest(testCommand, filename);
+        }, WATCH_DEBOUNCE_MS);
+      },
+    );
+    _watchProcess = watcher;
+    // Clean up on process exit
+    const cleanup = () => stopWatch();
+    process.on("exit", cleanup);
+    process.on("SIGINT", cleanup);
+  } catch (err) {
+    console.error(`Watch mode failed to start: ${err.message}`);
+    _watchProcess = null;
+  }
+}
+/**
+ * Stop the background file watcher.
+ */
+function stopWatch() {
+  if (_watchProcess) {
+    try {
+      _watchProcess.close();
+    } catch {
+      /* already closed */
+    }
+    _watchProcess = null;
+  }
+  if (_watchDebounceTimer) {
+    clearTimeout(_watchDebounceTimer);
+    _watchDebounceTimer = null;
+  }
+  _watchTestCommand = null;
+}
+/**
+ * Run the test command and handle failures.
+ * @param {string} testCommand
+ * @param {string} changedFile — file that triggered the watch
+ */
+function _runWatchTest(testCommand, changedFile) {
+  try {
+    execSync(testCommand, {
+      cwd: process.cwd(),
+      stdio: "pipe",
+      timeout: 120000, // 2 minute timeout
+      encoding: "utf-8",
+    });
+    // Tests passed — no action needed
+  } catch (err) {
+    // Tests failed — notify the agent
+    const output = (err.stdout || "") + (err.stderr || "");
+    const truncatedOutput = output.slice(-500); // Last 500 chars of error
+    const failureNote = `[WATCH MODE] Test failure detected after change to ${changedFile}:\n${truncatedOutput}`;
+    // Try to inject a note into the agent conversation
+    try {
+      const { injectMidRunNote } = require("../agent");
+      injectMidRunNote(failureNote);
+    } catch {
+      // Agent not in active conversation — just log
+      process.stderr.write(
+        `\n\x1b[33m⚠ Watch: tests failed after ${changedFile} changed\x1b[0m\n`,
+      );
+    }
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nex-code",
-  "version": "0.4.37",
+  "version": "0.4.39",
   "description": "Run 400B+ open coding models on your codebase without the hardware bill. Ollama Cloud first — OpenAI, Anthropic, and Gemini when you need them.",
   "bin": {
     "nex-code": "./dist/nex-code.js"
@@ -16,8 +16,8 @@
   },
   "scripts": {
     "start": "node dist/nex-code.js",
-    "build": "esbuild bin/nex-code.js --bundle --platform=node --target=node18 --outfile=dist/nex-code.js --minify --external:axios --external:dotenv --external:playwright && esbuild cli/benchmark.js --bundle --platform=node --target=node18 --outfile=dist/benchmark.js --minify --external:axios --external:dotenv --external:playwright && rm -rf dist/skills && cp -r cli/skills dist/skills",
-    "dev": "esbuild bin/nex-code.js --bundle --platform=node --target=node18 --outfile=dist/nex-code.js --external:axios --external:dotenv --external:playwright --watch",
+    "build": "node scripts/build.js",
+    "dev": "node scripts/build.js --dev",
     "test": "jest --forceExit",
     "test:orchestrator": "jest tests/orchestrator.test.js --forceExit",
     "coverage": "jest --coverage --forceExit",
@@ -41,15 +41,12 @@
     "ollama-cloud",
     "openai",
     "anthropic",
-    "claude",
     "gemini",
     "llm",
     "gpt",
     "agentic",
     "terminal",
     "coding-assistant",
-    "claude-code-alternative",
-    "gemini-cli-alternative",
     "open-source",
     "free",
     "qwen3",