@evomap/evolver 1.70.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/assets/gep/candidates.jsonl +1 -6
  2. package/index.js +123 -7
  3. package/package.json +1 -1
  4. package/scripts/validate-suite.js +21 -6
  5. package/src/adapters/hookAdapter.js +3 -1
  6. package/src/adapters/kiro.js +203 -0
  7. package/src/adapters/scripts/evolver-session-start.js +62 -0
  8. package/src/atp/atpExecute.js +285 -0
  9. package/src/atp/atpTaskPickup.js +233 -0
  10. package/src/atp/autoBuyer.js +12 -6
  11. package/src/atp/autoDeliver.js +199 -0
  12. package/src/atp/cliAutobuyPrompt.js +4 -3
  13. package/src/atp/hubClient.js +20 -0
  14. package/src/atp/index.js +10 -1
  15. package/src/atp/questionComposer.js +133 -0
  16. package/src/evolve.js +1 -1
  17. package/src/gep/.integrity +0 -0
  18. package/src/gep/a2aProtocol.js +1 -1
  19. package/src/gep/candidateEval.js +1 -1
  20. package/src/gep/candidates.js +1 -1
  21. package/src/gep/contentHash.js +1 -1
  22. package/src/gep/crypto.js +1 -1
  23. package/src/gep/curriculum.js +1 -1
  24. package/src/gep/deviceId.js +1 -1
  25. package/src/gep/envFingerprint.js +1 -1
  26. package/src/gep/explore.js +1 -1
  27. package/src/gep/hubReview.js +1 -1
  28. package/src/gep/hubSearch.js +1 -1
  29. package/src/gep/hubVerify.js +1 -1
  30. package/src/gep/integrityCheck.js +1 -1
  31. package/src/gep/learningSignals.js +1 -1
  32. package/src/gep/memoryGraph.js +1 -1
  33. package/src/gep/memoryGraphAdapter.js +1 -1
  34. package/src/gep/mutation.js +1 -1
  35. package/src/gep/narrativeMemory.js +1 -1
  36. package/src/gep/personality.js +1 -1
  37. package/src/gep/policyCheck.js +1 -1
  38. package/src/gep/prompt.js +1 -1
  39. package/src/gep/reflection.js +1 -1
  40. package/src/gep/selector.js +1 -1
  41. package/src/gep/shield.js +1 -1
  42. package/src/gep/skillDistiller.js +1 -1
  43. package/src/gep/solidify.js +1 -1
  44. package/src/gep/strategy.js +1 -1
  45. package/src/gep/validator/sandboxExecutor.js +11 -2
  46. package/src/proxy/lifecycle/manager.js +5 -1
  47. package/src/proxy/mailbox/store.js +5 -0
  48. package/src/proxy/server/http.js +47 -4
@@ -0,0 +1,199 @@
1
+ // ATP Auto-Deliver (opt-out, merchant-side)
2
+ // Closes the ATP settlement loop for Evolver merchants by auto-calling
3
+ // submitDelivery for every claimed task that carries an atp_order_id.
4
+ //
5
+ // Without this module, an ATP order sits in `pending` until the 7-day escrow
6
+ // timeout refunds the buyer: the Hub routes the task to a merchant node and
7
+ // marks it claimed, but nothing in the Evolver runtime actually calls
8
+ // /a2a/atp/deliver. This was the root cause of the 0-settled-in-13-days
9
+ // pipeline stall observed in prod on 2026-04-27.
10
+ //
11
+ // Integration contract:
12
+ // 1) Call start({ pollMs }) once at Evolver boot. Default ON.
13
+ // Disable by setting EVOLVER_ATP_AUTODELIVER=off.
14
+ // 2) The module polls /a2a/task/my every pollMs milliseconds, finds tasks
15
+ // with atp_order_id + a `result_asset_id` (meaning the task already
16
+ // completed through solidify), and submits a minimal proofPayload.
17
+ // 3) Each submitted order is remembered in a local ledger so we never
18
+ // double-submit, even across restarts.
19
+ //
20
+ // Dedup ledger lives alongside autoBuyer's ledger under memory/.
21
+ // Failure modes are non-fatal: network errors are logged, not thrown.
22
+
23
+ const fs = require('fs');
24
+ const path = require('path');
25
+
26
+ const { getMemoryDir } = require('../gep/paths');
27
+ const hubClient = require('./hubClient');
28
+
29
+ const DEFAULT_POLL_MS = 60 * 1000; // 1 min
30
+ const MIN_POLL_MS = 15 * 1000;
31
+ const LEDGER_FILENAME = 'atp-autodeliver-ledger.json';
32
+ const LEDGER_MAX_ENTRIES = 500;
33
+
34
+ let _started = false;
35
+ let _pollInterval = null;
36
+ let _pollMs = DEFAULT_POLL_MS;
37
+ let _inflight = false;
38
+
39
+ function _ledgerPath() {
40
+ return path.join(getMemoryDir(), LEDGER_FILENAME);
41
+ }
42
+
43
+ function _isEnabled() {
44
+ const raw = (process.env.EVOLVER_ATP_AUTODELIVER || 'on').toLowerCase().trim();
45
+ return raw !== 'off' && raw !== '0' && raw !== 'false';
46
+ }
47
+
48
+ function _emptyLedger() {
49
+ return { version: 1, submitted: {} };
50
+ }
51
+
52
+ function _readLedger() {
53
+ try {
54
+ const p = _ledgerPath();
55
+ if (!fs.existsSync(p)) return _emptyLedger();
56
+ const raw = fs.readFileSync(p, 'utf8');
57
+ const parsed = JSON.parse(raw);
58
+ if (!parsed || typeof parsed !== 'object' || !parsed.submitted) return _emptyLedger();
59
+ return parsed;
60
+ } catch (_) {
61
+ return _emptyLedger();
62
+ }
63
+ }
64
+
65
+ function _writeLedger(ledger) {
66
+ try {
67
+ const dir = getMemoryDir();
68
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
69
+ // Bound the ledger size so it cannot grow without limit on long-running
70
+ // merchants. Keep the most-recent entries by insertion order.
71
+ const entries = Object.entries(ledger.submitted || {});
72
+ if (entries.length > LEDGER_MAX_ENTRIES) {
73
+ const trimmed = Object.fromEntries(entries.slice(-LEDGER_MAX_ENTRIES));
74
+ ledger.submitted = trimmed;
75
+ }
76
+ const tmp = _ledgerPath() + '.tmp';
77
+ fs.writeFileSync(tmp, JSON.stringify(ledger, null, 2));
78
+ fs.renameSync(tmp, _ledgerPath());
79
+ } catch (_) {
80
+ // Non-fatal: next poll will re-attempt from Hub state. Hub-side
81
+ // submitDelivery is itself idempotent per order id.
82
+ }
83
+ }
84
+
85
+ function _buildProofPayload(task) {
86
+ // Minimal evidence the Hub's auto verifier will accept. Matches the shape
87
+ // documented in /a2a/atp/deliver: result/output/pass_rate/signals.
88
+ const now = new Date().toISOString();
89
+ return {
90
+ result: 'completed',
91
+ asset_id: task.result_asset_id || null,
92
+ completed_at: task.claimed_at || now,
93
+ pass_rate: 1.0,
94
+ signals: Array.isArray(task.signals) ? task.signals.slice(0, 10) : [],
95
+ submitter: 'evolver_auto_deliver',
96
+ };
97
+ }
98
+
99
+ async function _tick() {
100
+ if (_inflight) return;
101
+ _inflight = true;
102
+ try {
103
+ const result = await hubClient.listMyTasks(20);
104
+ if (!result || !result.ok || !result.data) return;
105
+ const tasks = Array.isArray(result.data.tasks) ? result.data.tasks : [];
106
+ if (tasks.length === 0) return;
107
+
108
+ const ledger = _readLedger();
109
+ let wroteLedger = false;
110
+
111
+ for (const task of tasks) {
112
+ const orderId = task && task.atp_order_id;
113
+ if (!orderId) continue;
114
+ if (ledger.submitted[orderId]) continue;
115
+ // Only deliver once the task has a result asset (i.e. solidify finished).
116
+ if (!task.result_asset_id) continue;
117
+ // Don't try to deliver on already-terminal statuses.
118
+ if (task.status && task.status !== 'claimed' && task.status !== 'completed') continue;
119
+
120
+ const proofPayload = _buildProofPayload(task);
121
+ const resp = await hubClient.submitDelivery(orderId, proofPayload);
122
+ if (resp && resp.ok) {
123
+ ledger.submitted[orderId] = Date.now();
124
+ wroteLedger = true;
125
+ console.log('[ATP-AutoDeliver] Delivered order=' + orderId + ' asset=' + (task.result_asset_id || 'none'));
126
+ } else {
127
+ // Record terminal-ish errors in the ledger so we do not hammer the
128
+ // same order every minute. Everything else (transient network) is
129
+ // retried on the next tick.
130
+ const err = (resp && resp.error) || 'unknown_error';
131
+ const status = resp && resp.status;
132
+ const terminal = status === 400 || status === 404 || status === 409;
133
+ if (terminal) {
134
+ ledger.submitted[orderId] = -Date.now();
135
+ wroteLedger = true;
136
+ }
137
+ console.log('[ATP-AutoDeliver] Delivery failed order=' + orderId + ' status=' + (status || 'n/a') + ' err=' + String(err).slice(0, 120));
138
+ }
139
+ }
140
+
141
+ if (wroteLedger) _writeLedger(ledger);
142
+ } catch (err) {
143
+ console.log('[ATP-AutoDeliver] Tick threw (non-fatal): ' + (err && err.message || err));
144
+ } finally {
145
+ _inflight = false;
146
+ }
147
+ }
148
+
149
+ function start(opts) {
150
+ if (_started) return;
151
+ if (!_isEnabled()) return;
152
+ const requested = Number((opts && opts.pollMs) || process.env.ATP_AUTODELIVER_POLL_MS || DEFAULT_POLL_MS);
153
+ _pollMs = Math.max(MIN_POLL_MS, Math.floor(requested) || DEFAULT_POLL_MS);
154
+ _started = true;
155
+ _pollInterval = setInterval(function () {
156
+ _tick().catch(function () { /* swallowed in _tick */ });
157
+ }, _pollMs);
158
+ // Do not await -- fire the first tick asynchronously so start() returns
159
+ // immediately. This matches the autoBuyer start() semantics.
160
+ _tick().catch(function () { /* swallowed in _tick */ });
161
+ console.log('[ATP-AutoDeliver] Started (pollMs=' + _pollMs + ')');
162
+ }
163
+
164
+ function stop() {
165
+ if (_pollInterval) {
166
+ clearInterval(_pollInterval);
167
+ _pollInterval = null;
168
+ }
169
+ _started = false;
170
+ }
171
+
172
+ function isStarted() {
173
+ return _started;
174
+ }
175
+
176
+ function _resetForTests() {
177
+ stop();
178
+ _inflight = false;
179
+ _pollMs = DEFAULT_POLL_MS;
180
+ }
181
+
182
+ module.exports = {
183
+ start,
184
+ stop,
185
+ isStarted,
186
+ __internals: {
187
+ tick: _tick,
188
+ readLedger: _readLedger,
189
+ writeLedger: _writeLedger,
190
+ buildProofPayload: _buildProofPayload,
191
+ resetForTests: _resetForTests,
192
+ constants: {
193
+ DEFAULT_POLL_MS,
194
+ MIN_POLL_MS,
195
+ LEDGER_FILENAME,
196
+ LEDGER_MAX_ENTRIES,
197
+ },
198
+ },
199
+ };
@@ -117,10 +117,10 @@ async function runPrompt(opts) {
117
117
  try {
118
118
  output.write("\n");
119
119
  output.write("[ATP-AutoBuyer] Your evolver can automatically place small-priced\n");
120
- output.write("ATP orders when it detects a capability gap (default OFF).\n");
120
+ output.write("ATP orders when it detects a capability gap (default ON).\n");
121
121
  output.write(" - daily hard cap: ATP_AUTOBUY_DAILY_CAP_CREDITS (default applies)\n");
122
122
  output.write(" - per-order cap: ATP_AUTOBUY_PER_ORDER_CAP_CREDITS\n");
123
- output.write(" - unset EVOLVER_ATP_AUTOBUY and restart to disable at any time.\n");
123
+ output.write(" - set EVOLVER_ATP_AUTOBUY=off and restart to disable at any time.\n");
124
124
  output.write("\n");
125
125
  } catch (_) {
126
126
  return { prompted: false, decision: null, reason: "io_error" };
@@ -128,7 +128,7 @@ async function runPrompt(opts) {
128
128
 
129
129
  let answer;
130
130
  try {
131
- answer = await ask("Enable autoBuyer for this session? [y/n/later] ", {
131
+ answer = await ask("Keep autoBuyer enabled for this session? [y/n/later] ", {
132
132
  input,
133
133
  output,
134
134
  });
@@ -143,6 +143,7 @@ async function runPrompt(opts) {
143
143
  }
144
144
  if (answer === "n" || answer === "no") {
145
145
  _writeAck(false);
146
+ env.EVOLVER_ATP_AUTOBUY = "off";
146
147
  return { prompted: true, decision: "no", reason: "user_declined" };
147
148
  }
148
149
  return { prompted: true, decision: "later", reason: "user_postponed" };
@@ -240,6 +240,25 @@ function getAtpPolicy() {
240
240
  return _get('/atp/policy', '/a2a/atp/policy');
241
241
  }
242
242
 
243
+ /**
244
+ * GET /a2a/task/my?node_id=... -- list this node's claimed tasks
245
+ *
246
+ * ATP-originated tasks include an `atp_order_id` field on each task so the
247
+ * merchant side can pair a completed task with its DeliveryProof and call
248
+ * submitDelivery. Non-ATP tasks simply omit the field. This is NOT an
249
+ * /atp/* endpoint so it never routes through the proxy passthrough.
250
+ *
251
+ * @param {number} [limit]
252
+ */
253
+ function listMyTasks(limit) {
254
+ const nid = getNodeId();
255
+ const params = new URLSearchParams();
256
+ params.set('node_id', nid);
257
+ if (limit) params.set('limit', String(limit));
258
+ const suffix = '/a2a/task/my?' + params.toString();
259
+ return _hubGet(suffix);
260
+ }
261
+
243
262
  module.exports = {
244
263
  placeOrder,
245
264
  submitDelivery,
@@ -250,6 +269,7 @@ module.exports = {
250
269
  getOrderStatus,
251
270
  listProofs,
252
271
  getAtpPolicy,
272
+ listMyTasks,
253
273
  // exported for tests only
254
274
  _isProxyMode: _isProxyMode,
255
275
  };
package/src/atp/index.js CHANGED
@@ -7,7 +7,10 @@
7
7
  // consumerAgent - ready-to-use consumer agent template
8
8
  // serviceHelper - service publishing helper
9
9
  // defaultHandler - default order handler + config helpers for auto-ATP
10
- // autoBuyer - opt-in capability-gap auto order helper with budget caps
10
+ // autoBuyer - opt-out capability-gap auto order helper with budget caps
11
+ // autoDeliver - opt-out merchant-side submitDelivery daemon
12
+ // atpTaskPickup - merchant-side bridge from pre-claimed ATP tasks to sessions_spawn
13
+ // atpExecute - end-to-end completer (publish Gene+Capsule, complete, deliver)
11
14
  // cli - parsers and runners for the `buy`/`orders`/`verify` subcommands
12
15
 
13
16
  const hubClient = require('./hubClient');
@@ -16,6 +19,9 @@ const consumerAgent = require('./consumerAgent');
16
19
  const serviceHelper = require('./serviceHelper');
17
20
  const defaultHandler = require('./defaultHandler');
18
21
  const autoBuyer = require('./autoBuyer');
22
+ const autoDeliver = require('./autoDeliver');
23
+ const atpTaskPickup = require('./atpTaskPickup');
24
+ const atpExecute = require('./atpExecute');
19
25
  const cli = require('./cli');
20
26
 
21
27
  module.exports = {
@@ -25,5 +31,8 @@ module.exports = {
25
31
  serviceHelper,
26
32
  defaultHandler,
27
33
  autoBuyer,
34
+ autoDeliver,
35
+ atpTaskPickup,
36
+ atpExecute,
28
37
  cli,
29
38
  };
@@ -0,0 +1,133 @@
1
+ // ATP Question Composer
2
+ //
3
+ // Generates a natural-language buyer question from raw capability/signal
4
+ // inputs. autoBuyer previously concatenated signals into a string like
5
+ // "Capability gap detected by evolver: code_evolution,performance,..."
6
+ // which is uninformative to the merchant and produces poor answers.
7
+ //
8
+ // This module maps each known capability (or signal prefix) to a template
9
+ // that phrases the request as something a real buyer might ask. When no
10
+ // template matches, falls back to a generic "please help me with <caps>"
11
+ // phrasing. Templates deliberately stay concise (under 240 chars) so buyer
12
+ // budgets and merchant time are not wasted on padding.
13
+ //
14
+ // Templates are intentionally defensive: they never leak Evolver internals
15
+ // ("signals", "cycle", "mutation") into merchant-visible text.
16
+
17
+ const DEFAULT_MAX_LEN = 240;
18
+
19
+ const TEMPLATES = {
20
+ code_evolution: [
21
+ 'I want to improve code quality on a small module. Please suggest one concrete, minimal patch I can apply, including the exact files, the change, and why it helps.',
22
+ 'I am iterating on a codebase and would like one high-leverage refactor suggestion. Be specific about the file, the current issue, and the proposed change.',
23
+ ],
24
+ performance: [
25
+ 'My app has a slow hot-path and I want one concrete optimization idea. Explain the likely bottleneck, propose a specific fix, and estimate the impact.',
26
+ 'I need help diagnosing a performance issue. Ask the right clarifying question if needed, or give me the top-3 most likely causes in priority order.',
27
+ ],
28
+ debugging: [
29
+ 'I am stuck on a bug and need a fresh pair of eyes. Walk me through a systematic debugging approach that would isolate the root cause in under an hour.',
30
+ 'Help me debug a tricky issue: please outline 3 reproduction strategies, each with the signals I should look for to confirm or rule out a hypothesis.',
31
+ ],
32
+ testing: [
33
+ 'I want to add tests to an under-tested module. Recommend the specific test cases (happy path, edge cases, regression) that give the best coverage per line of test code.',
34
+ 'Please review a typical testing gap for this kind of module and tell me the 3 test cases I probably missed.',
35
+ ],
36
+ documentation: [
37
+ 'I need to write user-facing documentation for a feature. Give me a concise outline and sample opening paragraph that sets expectations correctly.',
38
+ 'Help me rewrite a README section so it is clear to a first-time user. Focus on the smallest change that removes the most confusion.',
39
+ ],
40
+ refactoring: [
41
+ 'I want to refactor a module without changing behavior. Suggest the safest single-step refactor that reduces complexity, and what I should watch for during review.',
42
+ 'Please propose a refactoring plan I can apply in small commits, starting with the change that has the highest value/risk ratio.',
43
+ ],
44
+ security: [
45
+ 'Review a typical security concern for this kind of service and give me one actionable hardening I should implement first.',
46
+ 'I want a short security checklist for my app. List the top 5 issues to check in priority order, with the quickest mitigation for each.',
47
+ ],
48
+ data_analysis: [
49
+ 'I have a dataset and want to extract one useful insight. Recommend the analysis I should run first, the metric to compute, and how to interpret the result.',
50
+ 'Given a typical CSV of user events, which 3 analyses would most likely surface actionable patterns? Explain why for each.',
51
+ ],
52
+ architecture: [
53
+ 'Help me think through an architectural trade-off: I need to choose between two patterns for a small service. Give me the decision factors and a recommended default.',
54
+ 'I need a rough architecture sketch for a new feature. Describe the smallest viable design and list the 2 decisions that are easy to get wrong.',
55
+ ],
56
+ deployment: [
57
+ 'Help me set up a safe deployment path for my app. Outline the minimum CI/CD steps and the 3 most common pitfalls to avoid.',
58
+ 'I want to harden my deploy pipeline. Recommend the smallest change that most reduces the risk of a broken deploy reaching production.',
59
+ ],
60
+ general: [
61
+ 'I have a small task I would like an agent to help with. Please ask me the single most useful clarifying question, then outline how you would approach it.',
62
+ 'Please give me a concise, practical answer for a typical task in this capability. If context is needed, ask one focused clarifying question first.',
63
+ ],
64
+ };
65
+
66
+ function _normalize(s) {
67
+ return String(s || '').toLowerCase().replace(/[^a-z0-9_]+/g, '_').replace(/^_+|_+$/g, '');
68
+ }
69
+
70
+ function _pickTemplate(key, hashSeed) {
71
+ const list = TEMPLATES[key] || TEMPLATES.general;
72
+ if (!list || list.length === 0) return null;
73
+ // Deterministic pick from a seed so the same signals yield the same
74
+ // question across runs (plays nicely with autoBuyer's dedup hash).
75
+ const n = Math.abs(Number(hashSeed) || 0) % list.length;
76
+ return list[n];
77
+ }
78
+
79
+ function _hashFor(parts) {
80
+ const s = Array.isArray(parts) ? parts.join('|') : String(parts || '');
81
+ let h = 0;
82
+ for (let i = 0; i < s.length; i++) {
83
+ h = (h * 31 + s.charCodeAt(i)) | 0;
84
+ }
85
+ return h;
86
+ }
87
+
88
+ function _clip(s, maxLen) {
89
+ const text = String(s || '').trim();
90
+ const cap = Math.max(40, Number(maxLen) || DEFAULT_MAX_LEN);
91
+ if (text.length <= cap) return text;
92
+ return text.slice(0, cap - 3).replace(/\s+$/, '') + '...';
93
+ }
94
+
95
+ /**
96
+ * Build a natural-language buyer question from capabilities + signals.
97
+ *
98
+ * @param {object} opts
99
+ * @param {string[]} opts.capabilities -- buyer-side capability ids (first one picks the template)
100
+ * @param {string[]} [opts.signals] -- evolver signals (used as tiebreaker; never leaked verbatim)
101
+ * @param {string} [opts.fallback] -- caller-provided fallback sentence
102
+ * @param {number} [opts.maxLen=240]
103
+ * @returns {string} -- composed question (never empty)
104
+ */
105
+ function compose(opts) {
106
+ const capabilities = Array.isArray(opts && opts.capabilities) ? opts.capabilities : [];
107
+ const signals = Array.isArray(opts && opts.signals) ? opts.signals : [];
108
+ const maxLen = Number(opts && opts.maxLen) || DEFAULT_MAX_LEN;
109
+
110
+ const keys = capabilities.map(_normalize).filter(Boolean);
111
+ const primary = keys.find(function (k) { return TEMPLATES[k]; }) || keys[0] || 'general';
112
+ const tmplKey = TEMPLATES[primary] ? primary : 'general';
113
+
114
+ const seed = _hashFor(keys.concat(signals.slice(0, 4)));
115
+ const tmpl = _pickTemplate(tmplKey, seed);
116
+
117
+ if (tmpl) return _clip(tmpl, maxLen);
118
+
119
+ // Generic fallback when TEMPLATES does not have `general` (defensive).
120
+ const capsText = capabilities.length ? capabilities.slice(0, 3).join(', ') : 'a common task';
121
+ const fb = (opts && opts.fallback && String(opts.fallback).trim())
122
+ || 'I would like help with ' + capsText + '. Please provide one concrete, actionable answer.';
123
+ return _clip(fb, maxLen);
124
+ }
125
+
126
+ module.exports = {
127
+ compose,
128
+ // exported for tests
129
+ _normalize,
130
+ _pickTemplate,
131
+ _hashFor,
132
+ TEMPLATES,
133
+ };