npm - @evomap/evolver - Versions diffs - 1.70.0 → 1.74.0 - Mend

@evomap/evolver 1.70.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/assets/gep/candidates.jsonl +1 -6
package/index.js +123 -7
package/package.json +1 -1
package/scripts/validate-suite.js +21 -6
package/src/adapters/hookAdapter.js +3 -1
package/src/adapters/kiro.js +203 -0
package/src/adapters/scripts/evolver-session-start.js +62 -0
package/src/atp/atpExecute.js +285 -0
package/src/atp/atpTaskPickup.js +233 -0
package/src/atp/autoBuyer.js +12 -6
package/src/atp/autoDeliver.js +199 -0
package/src/atp/cliAutobuyPrompt.js +4 -3
package/src/atp/hubClient.js +20 -0
package/src/atp/index.js +10 -1
package/src/atp/questionComposer.js +133 -0
package/src/evolve.js +1 -1
package/src/gep/.integrity +0 -0
package/src/gep/a2aProtocol.js +1 -1
package/src/gep/candidateEval.js +1 -1
package/src/gep/candidates.js +1 -1
package/src/gep/contentHash.js +1 -1
package/src/gep/crypto.js +1 -1
package/src/gep/curriculum.js +1 -1
package/src/gep/deviceId.js +1 -1
package/src/gep/envFingerprint.js +1 -1
package/src/gep/explore.js +1 -1
package/src/gep/hubReview.js +1 -1
package/src/gep/hubSearch.js +1 -1
package/src/gep/hubVerify.js +1 -1
package/src/gep/integrityCheck.js +1 -1
package/src/gep/learningSignals.js +1 -1
package/src/gep/memoryGraph.js +1 -1
package/src/gep/memoryGraphAdapter.js +1 -1
package/src/gep/mutation.js +1 -1
package/src/gep/narrativeMemory.js +1 -1
package/src/gep/personality.js +1 -1
package/src/gep/policyCheck.js +1 -1
package/src/gep/prompt.js +1 -1
package/src/gep/reflection.js +1 -1
package/src/gep/selector.js +1 -1
package/src/gep/shield.js +1 -1
package/src/gep/skillDistiller.js +1 -1
package/src/gep/solidify.js +1 -1
package/src/gep/strategy.js +1 -1
package/src/gep/validator/sandboxExecutor.js +11 -2
package/src/proxy/lifecycle/manager.js +5 -1
package/src/proxy/mailbox/store.js +5 -0
package/src/proxy/server/http.js +47 -4

package/src/atp/autoDeliver.js ADDED Viewed

@@ -0,0 +1,199 @@
+// ATP Auto-Deliver (opt-out, merchant-side)
+// Closes the ATP settlement loop for Evolver merchants by auto-calling
+// submitDelivery for every claimed task that carries an atp_order_id.
+//
+// Without this module, an ATP order sits in `pending` until the 7-day escrow
+// timeout refunds the buyer: the Hub routes the task to a merchant node and
+// marks it claimed, but nothing in the Evolver runtime actually calls
+// /a2a/atp/deliver. This was the root cause of the 0-settled-in-13-days
+// pipeline stall observed in prod on 2026-04-27.
+//
+// Integration contract:
+//   1) Call start({ pollMs }) once at Evolver boot. Default ON.
+//      Disable by setting EVOLVER_ATP_AUTODELIVER=off.
+//   2) The module polls /a2a/task/my every pollMs milliseconds, finds tasks
+//      with atp_order_id + a `result_asset_id` (meaning the task already
+//      completed through solidify), and submits a minimal proofPayload.
+//   3) Each submitted order is remembered in a local ledger so we never
+//      double-submit, even across restarts.
+//
+// Dedup ledger lives alongside autoBuyer's ledger under memory/.
+// Failure modes are non-fatal: network errors are logged, not thrown.
+const fs = require('fs');
+const path = require('path');
+const { getMemoryDir } = require('../gep/paths');
+const hubClient = require('./hubClient');
+const DEFAULT_POLL_MS = 60 * 1000; // 1 min
+const MIN_POLL_MS = 15 * 1000;
+const LEDGER_FILENAME = 'atp-autodeliver-ledger.json';
+const LEDGER_MAX_ENTRIES = 500;
+let _started = false;
+let _pollInterval = null;
+let _pollMs = DEFAULT_POLL_MS;
+let _inflight = false;
+function _ledgerPath() {
+  return path.join(getMemoryDir(), LEDGER_FILENAME);
+}
+function _isEnabled() {
+  const raw = (process.env.EVOLVER_ATP_AUTODELIVER || 'on').toLowerCase().trim();
+  return raw !== 'off' && raw !== '0' && raw !== 'false';
+}
+function _emptyLedger() {
+  return { version: 1, submitted: {} };
+}
+function _readLedger() {
+  try {
+    const p = _ledgerPath();
+    if (!fs.existsSync(p)) return _emptyLedger();
+    const raw = fs.readFileSync(p, 'utf8');
+    const parsed = JSON.parse(raw);
+    if (!parsed || typeof parsed !== 'object' || !parsed.submitted) return _emptyLedger();
+    return parsed;
+  } catch (_) {
+    return _emptyLedger();
+  }
+}
+function _writeLedger(ledger) {
+  try {
+    const dir = getMemoryDir();
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+    // Bound the ledger size so it cannot grow without limit on long-running
+    // merchants. Keep the most-recent entries by insertion order.
+    const entries = Object.entries(ledger.submitted || {});
+    if (entries.length > LEDGER_MAX_ENTRIES) {
+      const trimmed = Object.fromEntries(entries.slice(-LEDGER_MAX_ENTRIES));
+      ledger.submitted = trimmed;
+    }
+    const tmp = _ledgerPath() + '.tmp';
+    fs.writeFileSync(tmp, JSON.stringify(ledger, null, 2));
+    fs.renameSync(tmp, _ledgerPath());
+  } catch (_) {
+    // Non-fatal: next poll will re-attempt from Hub state. Hub-side
+    // submitDelivery is itself idempotent per order id.
+  }
+}
+function _buildProofPayload(task) {
+  // Minimal evidence the Hub's auto verifier will accept. Matches the shape
+  // documented in /a2a/atp/deliver: result/output/pass_rate/signals.
+  const now = new Date().toISOString();
+  return {
+    result: 'completed',
+    asset_id: task.result_asset_id || null,
+    completed_at: task.claimed_at || now,
+    pass_rate: 1.0,
+    signals: Array.isArray(task.signals) ? task.signals.slice(0, 10) : [],
+    submitter: 'evolver_auto_deliver',
+  };
+}
+async function _tick() {
+  if (_inflight) return;
+  _inflight = true;
+  try {
+    const result = await hubClient.listMyTasks(20);
+    if (!result || !result.ok || !result.data) return;
+    const tasks = Array.isArray(result.data.tasks) ? result.data.tasks : [];
+    if (tasks.length === 0) return;
+    const ledger = _readLedger();
+    let wroteLedger = false;
+    for (const task of tasks) {
+      const orderId = task && task.atp_order_id;
+      if (!orderId) continue;
+      if (ledger.submitted[orderId]) continue;
+      // Only deliver once the task has a result asset (i.e. solidify finished).
+      if (!task.result_asset_id) continue;
+      // Don't try to deliver on already-terminal statuses.
+      if (task.status && task.status !== 'claimed' && task.status !== 'completed') continue;
+      const proofPayload = _buildProofPayload(task);
+      const resp = await hubClient.submitDelivery(orderId, proofPayload);
+      if (resp && resp.ok) {
+        ledger.submitted[orderId] = Date.now();
+        wroteLedger = true;
+        console.log('[ATP-AutoDeliver] Delivered order=' + orderId + ' asset=' + (task.result_asset_id || 'none'));
+      } else {
+        // Record terminal-ish errors in the ledger so we do not hammer the
+        // same order every minute. Everything else (transient network) is
+        // retried on the next tick.
+        const err = (resp && resp.error) || 'unknown_error';
+        const status = resp && resp.status;
+        const terminal = status === 400 || status === 404 || status === 409;
+        if (terminal) {
+          ledger.submitted[orderId] = -Date.now();
+          wroteLedger = true;
+        }
+        console.log('[ATP-AutoDeliver] Delivery failed order=' + orderId + ' status=' + (status || 'n/a') + ' err=' + String(err).slice(0, 120));
+      }
+    }
+    if (wroteLedger) _writeLedger(ledger);
+  } catch (err) {
+    console.log('[ATP-AutoDeliver] Tick threw (non-fatal): ' + (err && err.message || err));
+  } finally {
+    _inflight = false;
+  }
+}
+function start(opts) {
+  if (_started) return;
+  if (!_isEnabled()) return;
+  const requested = Number((opts && opts.pollMs) || process.env.ATP_AUTODELIVER_POLL_MS || DEFAULT_POLL_MS);
+  _pollMs = Math.max(MIN_POLL_MS, Math.floor(requested) || DEFAULT_POLL_MS);
+  _started = true;
+  _pollInterval = setInterval(function () {
+    _tick().catch(function () { /* swallowed in _tick */ });
+  }, _pollMs);
+  // Do not await -- fire the first tick asynchronously so start() returns
+  // immediately. This matches the autoBuyer start() semantics.
+  _tick().catch(function () { /* swallowed in _tick */ });
+  console.log('[ATP-AutoDeliver] Started (pollMs=' + _pollMs + ')');
+}
+function stop() {
+  if (_pollInterval) {
+    clearInterval(_pollInterval);
+    _pollInterval = null;
+  }
+  _started = false;
+}
+function isStarted() {
+  return _started;
+}
+function _resetForTests() {
+  stop();
+  _inflight = false;
+  _pollMs = DEFAULT_POLL_MS;
+}
+module.exports = {
+  start,
+  stop,
+  isStarted,
+  __internals: {
+    tick: _tick,
+    readLedger: _readLedger,
+    writeLedger: _writeLedger,
+    buildProofPayload: _buildProofPayload,
+    resetForTests: _resetForTests,
+    constants: {
+      DEFAULT_POLL_MS,
+      MIN_POLL_MS,
+      LEDGER_FILENAME,
+      LEDGER_MAX_ENTRIES,
+    },
+  },
+};

package/src/atp/cliAutobuyPrompt.js CHANGED Viewed

@@ -117,10 +117,10 @@ async function runPrompt(opts) {
   try {
     output.write("\n");
     output.write("[ATP-AutoBuyer] Your evolver can automatically place small-priced\n");
-    output.write("ATP orders when it detects a capability gap (default OFF).\n");
+    output.write("ATP orders when it detects a capability gap (default ON).\n");
     output.write("  - daily hard cap:   ATP_AUTOBUY_DAILY_CAP_CREDITS (default applies)\n");
     output.write("  - per-order cap:    ATP_AUTOBUY_PER_ORDER_CAP_CREDITS\n");
-    output.write("  - unset EVOLVER_ATP_AUTOBUY and restart to disable at any time.\n");
+    output.write("  - set EVOLVER_ATP_AUTOBUY=off and restart to disable at any time.\n");
     output.write("\n");
   } catch (_) {
     return { prompted: false, decision: null, reason: "io_error" };
@@ -128,7 +128,7 @@ async function runPrompt(opts) {
   let answer;
   try {
-    answer = await ask("Enable autoBuyer for this session? [y/n/later] ", {
+    answer = await ask("Keep autoBuyer enabled for this session? [y/n/later] ", {
       input,
       output,
     });
@@ -143,6 +143,7 @@ async function runPrompt(opts) {
   }
   if (answer === "n" || answer === "no") {
     _writeAck(false);
+    env.EVOLVER_ATP_AUTOBUY = "off";
     return { prompted: true, decision: "no", reason: "user_declined" };
   }
   return { prompted: true, decision: "later", reason: "user_postponed" };

package/src/atp/hubClient.js CHANGED Viewed

@@ -240,6 +240,25 @@ function getAtpPolicy() {
   return _get('/atp/policy', '/a2a/atp/policy');
 }
+/**
+ * GET /a2a/task/my?node_id=... -- list this node's claimed tasks
+ *
+ * ATP-originated tasks include an `atp_order_id` field on each task so the
+ * merchant side can pair a completed task with its DeliveryProof and call
+ * submitDelivery. Non-ATP tasks simply omit the field. This is NOT an
+ * /atp/* endpoint so it never routes through the proxy passthrough.
+ *
+ * @param {number} [limit]
+ */
+function listMyTasks(limit) {
+  const nid = getNodeId();
+  const params = new URLSearchParams();
+  params.set('node_id', nid);
+  if (limit) params.set('limit', String(limit));
+  const suffix = '/a2a/task/my?' + params.toString();
+  return _hubGet(suffix);
+}
 module.exports = {
   placeOrder,
   submitDelivery,
@@ -250,6 +269,7 @@ module.exports = {
   getOrderStatus,
   listProofs,
   getAtpPolicy,
+  listMyTasks,
   // exported for tests only
   _isProxyMode: _isProxyMode,
 };

package/src/atp/index.js CHANGED Viewed

@@ -7,7 +7,10 @@
 //   consumerAgent   - ready-to-use consumer agent template
 //   serviceHelper   - service publishing helper
 //   defaultHandler  - default order handler + config helpers for auto-ATP
-//   autoBuyer       - opt-in capability-gap auto order helper with budget caps
+//   autoBuyer       - opt-out capability-gap auto order helper with budget caps
+//   autoDeliver     - opt-out merchant-side submitDelivery daemon
+//   atpTaskPickup   - merchant-side bridge from pre-claimed ATP tasks to sessions_spawn
+//   atpExecute      - end-to-end completer (publish Gene+Capsule, complete, deliver)
 //   cli             - parsers and runners for the `buy`/`orders`/`verify` subcommands
 const hubClient = require('./hubClient');
@@ -16,6 +19,9 @@ const consumerAgent = require('./consumerAgent');
 const serviceHelper = require('./serviceHelper');
 const defaultHandler = require('./defaultHandler');
 const autoBuyer = require('./autoBuyer');
+const autoDeliver = require('./autoDeliver');
+const atpTaskPickup = require('./atpTaskPickup');
+const atpExecute = require('./atpExecute');
 const cli = require('./cli');
 module.exports = {
@@ -25,5 +31,8 @@ module.exports = {
   serviceHelper,
   defaultHandler,
   autoBuyer,
+  autoDeliver,
+  atpTaskPickup,
+  atpExecute,
   cli,
 };

package/src/atp/questionComposer.js ADDED Viewed

@@ -0,0 +1,133 @@
+// ATP Question Composer
+//
+// Generates a natural-language buyer question from raw capability/signal
+// inputs. autoBuyer previously concatenated signals into a string like
+// "Capability gap detected by evolver: code_evolution,performance,..."
+// which is uninformative to the merchant and produces poor answers.
+//
+// This module maps each known capability (or signal prefix) to a template
+// that phrases the request as something a real buyer might ask. When no
+// template matches, falls back to a generic "please help me with <caps>"
+// phrasing. Templates deliberately stay concise (under 240 chars) so buyer
+// budgets and merchant time are not wasted on padding.
+//
+// Templates are intentionally defensive: they never leak Evolver internals
+// ("signals", "cycle", "mutation") into merchant-visible text.
+const DEFAULT_MAX_LEN = 240;
+const TEMPLATES = {
+  code_evolution: [
+    'I want to improve code quality on a small module. Please suggest one concrete, minimal patch I can apply, including the exact files, the change, and why it helps.',
+    'I am iterating on a codebase and would like one high-leverage refactor suggestion. Be specific about the file, the current issue, and the proposed change.',
+  ],
+  performance: [
+    'My app has a slow hot-path and I want one concrete optimization idea. Explain the likely bottleneck, propose a specific fix, and estimate the impact.',
+    'I need help diagnosing a performance issue. Ask the right clarifying question if needed, or give me the top-3 most likely causes in priority order.',
+  ],
+  debugging: [
+    'I am stuck on a bug and need a fresh pair of eyes. Walk me through a systematic debugging approach that would isolate the root cause in under an hour.',
+    'Help me debug a tricky issue: please outline 3 reproduction strategies, each with the signals I should look for to confirm or rule out a hypothesis.',
+  ],
+  testing: [
+    'I want to add tests to an under-tested module. Recommend the specific test cases (happy path, edge cases, regression) that give the best coverage per line of test code.',
+    'Please review a typical testing gap for this kind of module and tell me the 3 test cases I probably missed.',
+  ],
+  documentation: [
+    'I need to write user-facing documentation for a feature. Give me a concise outline and sample opening paragraph that sets expectations correctly.',
+    'Help me rewrite a README section so it is clear to a first-time user. Focus on the smallest change that removes the most confusion.',
+  ],
+  refactoring: [
+    'I want to refactor a module without changing behavior. Suggest the safest single-step refactor that reduces complexity, and what I should watch for during review.',
+    'Please propose a refactoring plan I can apply in small commits, starting with the change that has the highest value/risk ratio.',
+  ],
+  security: [
+    'Review a typical security concern for this kind of service and give me one actionable hardening I should implement first.',
+    'I want a short security checklist for my app. List the top 5 issues to check in priority order, with the quickest mitigation for each.',
+  ],
+  data_analysis: [
+    'I have a dataset and want to extract one useful insight. Recommend the analysis I should run first, the metric to compute, and how to interpret the result.',
+    'Given a typical CSV of user events, which 3 analyses would most likely surface actionable patterns? Explain why for each.',
+  ],
+  architecture: [
+    'Help me think through an architectural trade-off: I need to choose between two patterns for a small service. Give me the decision factors and a recommended default.',
+    'I need a rough architecture sketch for a new feature. Describe the smallest viable design and list the 2 decisions that are easy to get wrong.',
+  ],
+  deployment: [
+    'Help me set up a safe deployment path for my app. Outline the minimum CI/CD steps and the 3 most common pitfalls to avoid.',
+    'I want to harden my deploy pipeline. Recommend the smallest change that most reduces the risk of a broken deploy reaching production.',
+  ],
+  general: [
+    'I have a small task I would like an agent to help with. Please ask me the single most useful clarifying question, then outline how you would approach it.',
+    'Please give me a concise, practical answer for a typical task in this capability. If context is needed, ask one focused clarifying question first.',
+  ],
+};
+function _normalize(s) {
+  return String(s || '').toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '');
+}
+function _pickTemplate(key, hashSeed) {
+  const list = TEMPLATES[key] || TEMPLATES.general;
+  if (!list || list.length === 0) return null;
+  // Deterministic pick from a seed so the same signals yield the same
+  // question across runs (plays nicely with autoBuyer's dedup hash).
+  const n = Math.abs(Number(hashSeed) || 0) % list.length;
+  return list[n];
+}
+function _hashFor(parts) {
+  const s = Array.isArray(parts) ? parts.join('|') : String(parts || '');
+  let h = 0;
+  for (let i = 0; i < s.length; i++) {
+    h = (h * 31 + s.charCodeAt(i)) | 0;
+  }
+  return h;
+}
+function _clip(s, maxLen) {
+  const text = String(s || '').trim();
+  const cap = Math.max(40, Number(maxLen) || DEFAULT_MAX_LEN);
+  if (text.length <= cap) return text;
+  return text.slice(0, cap - 3).replace(/\s+$/, '') + '...';
+}
+/**
+ * Build a natural-language buyer question from capabilities + signals.
+ *
+ * @param {object} opts
+ * @param {string[]} opts.capabilities -- buyer-side capability ids (first one picks the template)
+ * @param {string[]} [opts.signals]    -- evolver signals (used as tiebreaker; never leaked verbatim)
+ * @param {string}   [opts.fallback]   -- caller-provided fallback sentence
+ * @param {number}   [opts.maxLen=240]
+ * @returns {string} -- composed question (never empty)
+ */
+function compose(opts) {
+  const capabilities = Array.isArray(opts && opts.capabilities) ? opts.capabilities : [];
+  const signals = Array.isArray(opts && opts.signals) ? opts.signals : [];
+  const maxLen = Number(opts && opts.maxLen) || DEFAULT_MAX_LEN;
+  const keys = capabilities.map(_normalize).filter(Boolean);
+  const primary = keys.find(function (k) { return TEMPLATES[k]; }) || keys[0] || 'general';
+  const tmplKey = TEMPLATES[primary] ? primary : 'general';
+  const seed = _hashFor(keys.concat(signals.slice(0, 4)));
+  const tmpl = _pickTemplate(tmplKey, seed);
+  if (tmpl) return _clip(tmpl, maxLen);
+  // Generic fallback when TEMPLATES does not have `general` (defensive).
+  const capsText = capabilities.length ? capabilities.slice(0, 3).join(', ') : 'a common task';
+  const fb = (opts && opts.fallback && String(opts.fallback).trim())
+    || 'I would like help with ' + capsText + '. Please provide one concrete, actionable answer.';
+  return _clip(fb, maxLen);
+}
+module.exports = {
+  compose,
+  // exported for tests
+  _normalize,
+  _pickTemplate,
+  _hashFor,
+  TEMPLATES,
+};