npm - nexus-agents - Versions diffs - 2.154.1 → 2.155.0 - Mend

nexus-agents 2.154.1 → 2.155.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/{chunk-3P5WC7AV.js → chunk-KXTOVEZS.js} +344 -2
package/dist/chunk-KXTOVEZS.js.map +1 -0
package/dist/{chunk-RW5ZE7IC.js → chunk-MJRIWG2E.js} +3 -3
package/dist/{chunk-GCZHEGDC.js → chunk-TOYEI3KJ.js} +2 -2
package/dist/cli.js +4 -4
package/dist/cli.js.map +1 -1
package/dist/index.js +2 -2
package/dist/{setup-command-EWIFNU6P.js → setup-command-X75X7ZZD.js} +3 -3
package/package.json +1 -1
package/dist/chunk-3P5WC7AV.js.map +0 -1
/package/dist/{chunk-RW5ZE7IC.js.map → chunk-MJRIWG2E.js.map} +0 -0
/package/dist/{chunk-GCZHEGDC.js.map → chunk-TOYEI3KJ.js.map} +0 -0
/package/dist/{setup-command-EWIFNU6P.js.map → setup-command-X75X7ZZD.js.map} +0 -0

package/dist/{chunk-3P5WC7AV.js → chunk-KXTOVEZS.js} RENAMED Viewed

@@ -18,7 +18,7 @@ import {
   DEFAULT_TASK_TTL_MS,
   DEFAULT_TOOL_RATE_LIMITS,
   clampTaskTtl
-} from "./chunk-RW5ZE7IC.js";
+} from "./chunk-MJRIWG2E.js";
 import {
   executeExpert
 } from "./chunk-F5R53HDK.js";
@@ -46487,6 +46487,326 @@ function getShadowSink() {
   return singletonSink;
 }
+// src/orchestration/meta-strategy-eval.ts
+function groupByStrategy(corpus) {
+  const groups = /* @__PURE__ */ new Map();
+  for (const entry of corpus) {
+    const list = groups.get(entry.expectedStrategy) ?? [];
+    list.push(entry);
+    groups.set(entry.expectedStrategy, list);
+  }
+  return groups;
+}
+function splitCorpus(corpus, testRatio) {
+  const train = [];
+  const test = [];
+  for (const entries of groupByStrategy(corpus).values()) {
+    const testN = Math.max(1, Math.round(entries.length * testRatio));
+    const trainN = Math.max(0, entries.length - testN);
+    train.push(...entries.slice(0, trainN));
+    test.push(...entries.slice(trainN));
+  }
+  return { train, test };
+}
+function evaluateMetaStrategy(corpus, options = {}) {
+  const testRatio = options.testRatio ?? 0.25;
+  const { train, test } = splitCorpus(corpus, testRatio);
+  const orchestrator = createMetaOrchestrator();
+  const learned = createLearnedStrategySelector();
+  for (const entry of train) {
+    const decision = orchestrator.select({ goal: entry.goal });
+    learned.recordOutcome(entry.expectedStrategy, decision, true);
+  }
+  let rulesCorrect = 0;
+  let learnedCorrect = 0;
+  for (const entry of test) {
+    const decision = orchestrator.select({ goal: entry.goal });
+    if (decision.strategy === entry.expectedStrategy) rulesCorrect++;
+    if (learned.predict(decision).strategy === entry.expectedStrategy) learnedCorrect++;
+  }
+  const testCount = test.length;
+  const rulesAccuracy = testCount === 0 ? 0 : rulesCorrect / testCount;
+  const learnedAccuracy = testCount === 0 ? 0 : learnedCorrect / testCount;
+  return {
+    total: corpus.length,
+    trainCount: train.length,
+    testCount,
+    rulesAccuracy,
+    learnedAccuracy,
+    delta: learnedAccuracy - rulesAccuracy
+  };
+}
+// src/orchestration/meta-strategy-corpus.ts
+var META_STRATEGY_CORPUS = [
+  // single-shot — trivial single-step
+  { goal: "rename the variable foo to bar in utils.ts", expectedStrategy: "single-shot" },
+  { goal: "what does the git rebase --onto flag do?", expectedStrategy: "single-shot" },
+  { goal: "format this JSON snippet", expectedStrategy: "single-shot" },
+  { goal: "add a one-line comment explaining this regex", expectedStrategy: "single-shot" },
+  { goal: "convert this value from Celsius to Fahrenheit", expectedStrategy: "single-shot" },
+  {
+    goal: "explain what the useEffect hook does in one paragraph",
+    expectedStrategy: "single-shot"
+  },
+  { goal: "convert this markdown table to CSV", expectedStrategy: "single-shot" },
+  {
+    goal: "rename the function calcTotal to computeTotal in cart.ts",
+    expectedStrategy: "single-shot"
+  },
+  { goal: "what is the time complexity of binary search?", expectedStrategy: "single-shot" },
+  { goal: "format this SQL query for readability", expectedStrategy: "single-shot" },
+  // dev-pipeline — code change needing the test/lint/typecheck gate
+  {
+    goal: "fix the off-by-one bug in pagination.ts and make sure the tests pass",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "add input validation to the login handler with unit tests",
+    expectedStrategy: "dev-pipeline"
+  },
+  { goal: "refactor the auth module and run lint and typecheck", expectedStrategy: "dev-pipeline" },
+  { goal: "implement retry logic in the http client with tests", expectedStrategy: "dev-pipeline" },
+  {
+    goal: "patch the null-pointer in parser.ts and verify the build",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "fix the memory leak in the websocket handler and make the tests green",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "add a rate limiter to the API middleware with unit tests",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "correct the timezone bug in the scheduler and run the test suite",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "implement pagination on the users endpoint with tests and typecheck",
+    expectedStrategy: "dev-pipeline"
+  },
+  {
+    goal: "fix the flaky retry test in queue.ts and keep lint clean",
+    expectedStrategy: "dev-pipeline"
+  },
+  // pipeline — multi-stage templated audit/general
+  { goal: "run a security audit of the payments module", expectedStrategy: "pipeline" },
+  { goal: "do a full quality audit of the API layer", expectedStrategy: "pipeline" },
+  { goal: "produce an architecture review of the data pipeline", expectedStrategy: "pipeline" },
+  { goal: "run the documentation-quality audit over the docs tree", expectedStrategy: "pipeline" },
+  { goal: "perform a compliance audit of the logging subsystem", expectedStrategy: "pipeline" },
+  { goal: "run a performance audit of the checkout flow", expectedStrategy: "pipeline" },
+  { goal: "do a dependency-vulnerability audit of the whole repo", expectedStrategy: "pipeline" },
+  { goal: "produce an accessibility audit of the web frontend", expectedStrategy: "pipeline" },
+  { goal: "run a code-quality audit across the billing service", expectedStrategy: "pipeline" },
+  { goal: "perform a test-coverage audit of the core package", expectedStrategy: "pipeline" },
+  // graph-workflow — DAG / conditional-edge
+  {
+    goal: "build a conditional workflow that branches on the test result then deploys or rolls back",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "orchestrate a DAG fetch then transform then validate then load, halting at the validate gate on failure",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "create a multi-stage workflow with conditional edges depending on the lint outcome",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "wire a workflow where step C runs only if both A and B succeed",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "design a graph workflow with a fan-in join after three sequential gated branches",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "build a workflow that runs A then branches to B or C based on the security-scan verdict",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "create a DAG where build and lint run in parallel then merge into a gated deploy step",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "design a workflow with a retry-loop edge back to the fetch step on transient failure",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "wire a pipeline that skips the migration step when the schema-check passes",
+    expectedStrategy: "graph-workflow"
+  },
+  {
+    goal: "orchestrate a conditional graph: on approval go to publish, otherwise route to a rework node",
+    expectedStrategy: "graph-workflow"
+  },
+  // orchestrate — pattern-based multi-agent
+  {
+    goal: "run a multi-agent wave over these modules to refactor them in parallel",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "fan out independent subtasks across the codebase to add logging",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "orchestrate a swarm of agents to triage the open issues",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "use a multi-agent pattern to migrate each service independently",
+    expectedStrategy: "orchestrate"
+  },
+  { goal: "dispatch a wave of agents to audit each microservice", expectedStrategy: "orchestrate" },
+  {
+    goal: "spawn a swarm of agents to add type annotations across every module in parallel",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "fan out a wave of agents to update the copyright header in each file",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "use a multi-agent pattern to generate unit tests for each untested component concurrently",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "dispatch parallel agents to translate the docs into each supported language",
+    expectedStrategy: "orchestrate"
+  },
+  {
+    goal: "run a fan-out of agents to bump the dependency version in every package independently",
+    expectedStrategy: "orchestrate"
+  },
+  // consensus — multi-perspective decision/vote
+  { goal: "hold a consensus vote on whether to adopt GraphQL", expectedStrategy: "consensus" },
+  { goal: "we need a consensus decision on the database choice", expectedStrategy: "consensus" },
+  {
+    goal: "get multiple perspectives via a vote on the API redesign",
+    expectedStrategy: "consensus"
+  },
+  {
+    goal: "run a consensus panel on the proposed authentication change",
+    expectedStrategy: "consensus"
+  },
+  {
+    goal: "do a multi-perspective review and vote on the migration plan",
+    expectedStrategy: "consensus"
+  },
+  { goal: "hold a vote on which cloud provider to standardize on", expectedStrategy: "consensus" },
+  {
+    goal: "get a consensus decision on whether to drop support for Node 18",
+    expectedStrategy: "consensus"
+  },
+  {
+    goal: "run a multi-perspective panel to decide the caching strategy",
+    expectedStrategy: "consensus"
+  },
+  { goal: "we need a consensus vote on the proposed pricing model", expectedStrategy: "consensus" },
+  {
+    goal: "gather multiple expert opinions and vote on the rollout timeline",
+    expectedStrategy: "consensus"
+  },
+  // spec — greenfield from a written spec
+  { goal: "build a greenfield todo app from this written spec", expectedStrategy: "spec" },
+  {
+    goal: "implement a new microservice from scratch per the attached spec document",
+    expectedStrategy: "spec"
+  },
+  { goal: "create a brand-new CLI tool from this specification", expectedStrategy: "spec" },
+  { goal: "scaffold a greenfield REST API from the provided spec", expectedStrategy: "spec" },
+  {
+    goal: "build the new notification service from scratch following the spec",
+    expectedStrategy: "spec"
+  },
+  {
+    goal: "build a new authentication service from scratch following this specification",
+    expectedStrategy: "spec"
+  },
+  {
+    goal: "scaffold a greenfield mobile backend from the attached spec document",
+    expectedStrategy: "spec"
+  },
+  {
+    goal: "implement a brand-new billing engine from this written spec",
+    expectedStrategy: "spec"
+  },
+  {
+    goal: "create a new GraphQL gateway from scratch per the provided spec",
+    expectedStrategy: "spec"
+  },
+  {
+    goal: "build the greenfield analytics dashboard from this specification",
+    expectedStrategy: "spec"
+  },
+  // research — research-heavy investigation
+  { goal: "research the best vector database for our use case", expectedStrategy: "research" },
+  { goal: "investigate and compare OAuth libraries for Node", expectedStrategy: "research" },
+  {
+    goal: "do a deep research report on consensus algorithms for distributed systems",
+    expectedStrategy: "research"
+  },
+  { goal: "survey the landscape of LLM evaluation frameworks", expectedStrategy: "research" },
+  {
+    goal: "research which benchmarks are worth adopting for our agents",
+    expectedStrategy: "research"
+  },
+  {
+    goal: "research the current best practices for prompt caching across LLM providers",
+    expectedStrategy: "research"
+  },
+  {
+    goal: "compare and evaluate message-queue technologies for our throughput needs",
+    expectedStrategy: "research"
+  },
+  {
+    goal: "survey the state of the art in retrieval-augmented generation",
+    expectedStrategy: "research"
+  },
+  {
+    goal: "investigate which observability stack fits our microservices best",
+    expectedStrategy: "research"
+  },
+  {
+    goal: "research the tradeoffs between monorepo and polyrepo for our team",
+    expectedStrategy: "research"
+  }
+];
+// src/orchestration/meta-strategy-readiness.ts
+var DEFAULT_META_STRATEGY_READINESS_CONFIG = {
+  minTestCases: 20,
+  minDelta: 0.05,
+  minLearnedAccuracy: 0.7
+};
+function pctStr(n) {
+  return String(Math.round(n * 100));
+}
+function evaluateMetaStrategyReadiness(result, config = DEFAULT_META_STRATEGY_READINESS_CONFIG) {
+  const criteria = [
+    {
+      name: "volume",
+      met: result.testCount >= config.minTestCases,
+      detail: `${String(result.testCount)} held-out test cases (need \u2265 ${String(config.minTestCases)})`
+    },
+    {
+      name: "learned-beats-rules",
+      met: result.delta >= config.minDelta,
+      detail: `learned\u2212rules delta ${result.delta.toFixed(2)} (need \u2265 ${config.minDelta.toFixed(2)})`
+    },
+    {
+      name: "learned-accuracy-floor",
+      met: result.learnedAccuracy >= config.minLearnedAccuracy,
+      detail: `learned accuracy ${pctStr(result.learnedAccuracy)}% (need \u2265 ${pctStr(config.minLearnedAccuracy)}%)`
+    }
+  ];
+  const blockers = criteria.filter((c) => !c.met).map((c) => c.name);
+  return { ready: blockers.length === 0, criteria, blockers };
+}
 // src/orchestration/meta-dispatcher.ts
 var MetaDispatchError = class extends Error {
   code;
@@ -47194,12 +47514,34 @@ function toMetaInput(input, mode) {
     ...input.forceStrategy !== void 0 ? { forceStrategy: input.forceStrategy } : {}
   };
 }
+var readinessLogged = false;
+function logMetaStrategyReadinessOnce(logger58) {
+  if (readinessLogged) return;
+  readinessLogged = true;
+  try {
+    const evalResult = evaluateMetaStrategy(META_STRATEGY_CORPUS);
+    const verdict = evaluateMetaStrategyReadiness(evalResult);
+    logger58.info("meta-strategy learned-selector readiness", {
+      ready: verdict.ready,
+      delta: evalResult.delta,
+      learnedAccuracy: evalResult.learnedAccuracy,
+      rulesAccuracy: evalResult.rulesAccuracy,
+      testCount: evalResult.testCount,
+      blockers: verdict.blockers
+    });
+  } catch (err2) {
+    logger58.warn("meta-strategy readiness signal failed (non-fatal)", {
+      error: getErrorMessage(err2)
+    });
+  }
+}
 function selectDecision(input, mode, logger58) {
   const meta = createMetaOrchestrator({
     ...logger58 !== void 0 ? { logger: logger58 } : {},
     shadowSelector: getShadowSelector(),
     shadowSink: getShadowSink()
   });
+  logMetaStrategyReadinessOnce(logger58 ?? createLogger({ component: "RunTool" }));
   return meta.select(toMetaInput(input, mode));
 }
 function routeGoal(input, logger58) {
@@ -51446,4 +51788,4 @@ export {
   shutdownFeedbackSubscriber,
   createEventBusBridge
 };
-//# sourceMappingURL=chunk-3P5WC7AV.js.map
+//# sourceMappingURL=chunk-KXTOVEZS.js.map