@tangle-network/agent-runtime 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/agent.d.ts +5 -5
- package/dist/agent.js +2 -2
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +5 -40
- package/dist/analyst-loop.js +2 -4
- package/dist/{chunk-IJ6FGOPO.js → chunk-5YDS7BLC.js} +12 -7
- package/dist/chunk-5YDS7BLC.js.map +1 -0
- package/dist/{chunk-KEWO4KI6.js → chunk-72JQCHOZ.js} +850 -131
- package/dist/chunk-72JQCHOZ.js.map +1 -0
- package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
- package/dist/chunk-GSUO5QS6.js.map +1 -0
- package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
- package/dist/chunk-HNUXAZIJ.js.map +1 -0
- package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
- package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
- package/dist/chunk-KADIJAD4.js.map +1 -0
- package/dist/{chunk-NYN5RTLP.js → chunk-MGFEUYOH.js} +7 -7
- package/dist/chunk-MGFEUYOH.js.map +1 -0
- package/dist/{chunk-Z2QXVBA6.js → chunk-T4OQQEE3.js} +4 -4
- package/dist/chunk-T4OQQEE3.js.map +1 -0
- package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
- package/dist/{coder-CczgMqFx.d.ts → coder-CVZNGbyg.d.ts} +1 -1
- package/dist/{dynamic-BvllHV6M.d.ts → driver-DYU2sgHr.d.ts} +6 -6
- package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -2
- package/dist/index.d.ts +8 -8
- package/dist/index.js +8 -8
- package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-51BlLlVM.d.ts} +13 -7
- package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-DEm4roYF.d.ts} +11 -11
- package/dist/loop-runner-bin.d.ts +6 -6
- package/dist/loop-runner-bin.js +6 -6
- package/dist/loops.d.ts +5 -5
- package/dist/loops.js +18 -10
- package/dist/mcp/bin.js +6 -6
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +75 -74
- package/dist/mcp/index.js +203 -31
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
- package/dist/profiles.d.ts +8 -8
- package/dist/profiles.js +1 -1
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop--hSoIknW.d.ts → run-loop-DvD4aGiE.d.ts} +2 -2
- package/dist/runtime.d.ts +244 -57
- package/dist/runtime.js +18 -10
- package/dist/{types-1HbsFa7H.d.ts → types-Cbx3dNK5.d.ts} +23 -23
- package/dist/{types-DdzkffAm.d.ts → types-nBMuollC.d.ts} +34 -5
- package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/dist/workflow.d.ts +3 -3
- package/dist/workflow.js +2 -2
- package/dist/workflow.js.map +1 -1
- package/package.json +1 -1
- package/skills/agent-runtime-adoption/SKILL.md +3 -3
- package/skills/generate-eval/SKILL.md +60 -0
- package/skills/loop-writer/SKILL.md +163 -0
- package/dist/chunk-FK53TXOP.js.map +0 -1
- package/dist/chunk-IJ6FGOPO.js.map +0 -1
- package/dist/chunk-KEWO4KI6.js.map +0 -1
- package/dist/chunk-NYN5RTLP.js.map +0 -1
- package/dist/chunk-PRX45WE2.js.map +0 -1
- package/dist/chunk-QR4UUC5P.js.map +0 -1
- package/dist/chunk-Z2QXVBA6.js.map +0 -1
- /package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
- /package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0
|
@@ -3,22 +3,120 @@ import {
|
|
|
3
3
|
PlannerError,
|
|
4
4
|
RuntimeRunStateError,
|
|
5
5
|
ValidationError,
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
extractLlmCallEvent,
|
|
9
|
-
mapWithConcurrency,
|
|
10
|
-
randomSuffix,
|
|
11
|
-
randomUuid,
|
|
12
|
-
sleep,
|
|
13
|
-
stringifySafe,
|
|
14
|
-
throwAbort,
|
|
15
|
-
throwIfAborted,
|
|
16
|
-
withTimeout,
|
|
17
|
-
zeroTokenUsage
|
|
18
|
-
} from "./chunk-PRX45WE2.js";
|
|
6
|
+
extractLlmCallEvent
|
|
7
|
+
} from "./chunk-GSUO5QS6.js";
|
|
19
8
|
|
|
20
9
|
// src/durable/spawn-journal.ts
|
|
21
10
|
import { createHash } from "crypto";
|
|
11
|
+
|
|
12
|
+
// src/runtime/util.ts
|
|
13
|
+
async function deleteBoxSafe(box) {
|
|
14
|
+
if (!box || typeof box.delete !== "function") return true;
|
|
15
|
+
try {
|
|
16
|
+
await box.delete();
|
|
17
|
+
return true;
|
|
18
|
+
} catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
function randomSuffix(len = 8) {
|
|
23
|
+
return Math.random().toString(36).slice(2, 2 + len);
|
|
24
|
+
}
|
|
25
|
+
function randomUuid() {
|
|
26
|
+
return crypto.randomUUID();
|
|
27
|
+
}
|
|
28
|
+
function abortError() {
|
|
29
|
+
const err = new Error("aborted");
|
|
30
|
+
err.name = "AbortError";
|
|
31
|
+
return err;
|
|
32
|
+
}
|
|
33
|
+
function throwAbort() {
|
|
34
|
+
throw abortError();
|
|
35
|
+
}
|
|
36
|
+
function throwIfAborted(signal) {
|
|
37
|
+
if (signal?.aborted) throw abortError();
|
|
38
|
+
}
|
|
39
|
+
function sleep(ms, signal) {
|
|
40
|
+
return new Promise((resolve) => {
|
|
41
|
+
if (signal?.aborted) {
|
|
42
|
+
resolve();
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
let onAbort;
|
|
46
|
+
const timer = setTimeout(() => {
|
|
47
|
+
if (onAbort && signal) signal.removeEventListener("abort", onAbort);
|
|
48
|
+
resolve();
|
|
49
|
+
}, ms);
|
|
50
|
+
if (signal) {
|
|
51
|
+
onAbort = () => {
|
|
52
|
+
clearTimeout(timer);
|
|
53
|
+
resolve();
|
|
54
|
+
};
|
|
55
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
function withTimeout(promise, ms) {
|
|
60
|
+
return new Promise((resolve) => {
|
|
61
|
+
const timer = setTimeout(() => resolve(void 0), ms);
|
|
62
|
+
promise.then(
|
|
63
|
+
(value) => {
|
|
64
|
+
clearTimeout(timer);
|
|
65
|
+
resolve(value);
|
|
66
|
+
},
|
|
67
|
+
() => {
|
|
68
|
+
clearTimeout(timer);
|
|
69
|
+
resolve(void 0);
|
|
70
|
+
}
|
|
71
|
+
);
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
function stringifySafe(value, opts = {}) {
|
|
75
|
+
let s;
|
|
76
|
+
try {
|
|
77
|
+
if (typeof value === "string") {
|
|
78
|
+
s = value;
|
|
79
|
+
} else {
|
|
80
|
+
const json = opts.pretty ? JSON.stringify(value, null, 2) : JSON.stringify(value);
|
|
81
|
+
s = json ?? String(value);
|
|
82
|
+
}
|
|
83
|
+
} catch {
|
|
84
|
+
s = String(value);
|
|
85
|
+
}
|
|
86
|
+
if (opts.max !== void 0 && s.length > opts.max) return `${s.slice(0, opts.max)}\u2026`;
|
|
87
|
+
return s;
|
|
88
|
+
}
|
|
89
|
+
function zeroTokenUsage() {
|
|
90
|
+
return { input: 0, output: 0 };
|
|
91
|
+
}
|
|
92
|
+
function addTokenUsage(acc, delta) {
|
|
93
|
+
acc.input += delta.input ?? 0;
|
|
94
|
+
acc.output += delta.output ?? 0;
|
|
95
|
+
}
|
|
96
|
+
async function mapWithConcurrency(items, limit, fn) {
|
|
97
|
+
const bound = Math.max(1, Math.floor(limit));
|
|
98
|
+
const results = new Array(items.length);
|
|
99
|
+
let next = 0;
|
|
100
|
+
let failed = false;
|
|
101
|
+
const worker = async () => {
|
|
102
|
+
while (!failed) {
|
|
103
|
+
const i = next;
|
|
104
|
+
next += 1;
|
|
105
|
+
if (i >= items.length) return;
|
|
106
|
+
try {
|
|
107
|
+
results[i] = await fn(items[i], i);
|
|
108
|
+
} catch (err) {
|
|
109
|
+
failed = true;
|
|
110
|
+
throw err;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
const workerCount = Math.min(bound, items.length);
|
|
115
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// src/durable/spawn-journal.ts
|
|
22
120
|
function contentAddress(artifact) {
|
|
23
121
|
const hex = createHash("sha256").update(stableStringify(artifact), "utf-8").digest("hex");
|
|
24
122
|
return `sha256:${hex}`;
|
|
@@ -376,18 +474,100 @@ function deterministicCompletion(check) {
|
|
|
376
474
|
};
|
|
377
475
|
}
|
|
378
476
|
|
|
379
|
-
// src/runtime/
|
|
380
|
-
|
|
477
|
+
// src/runtime/personify/analyst.ts
|
|
478
|
+
var judgeEvidenceUri = /^(verdict|judge|score)\b/i;
|
|
479
|
+
var assertTraceDerivedFindings = (findings) => {
|
|
480
|
+
for (const f of findings) {
|
|
481
|
+
for (const ref of f.evidence_refs ?? []) {
|
|
482
|
+
if (ref.kind === "metric" && judgeEvidenceUri.test(ref.uri)) {
|
|
483
|
+
throw new PlannerError(
|
|
484
|
+
`steer-firewall: finding ${stringifySafe(f.finding_id)} cites judge-derived evidence (${stringifySafe(ref.uri)}); findings fed to a combinator's steer decision must be trace-derived, not judge-derived (selector \u2260 judge)`
|
|
485
|
+
);
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
};
|
|
490
|
+
function createScopeAnalyst(scope, options) {
|
|
491
|
+
if (!options.analyst || typeof options.analyst.act !== "function") {
|
|
492
|
+
throw new AnalystError("createScopeAnalyst: analyst must be an Agent with an act() method");
|
|
493
|
+
}
|
|
494
|
+
const label = options.label ?? "analyst";
|
|
495
|
+
return {
|
|
496
|
+
async analyze(input) {
|
|
497
|
+
const task = options.buildTask(input);
|
|
498
|
+
const spawned = scope.spawn(options.analyst, task, {
|
|
499
|
+
budget: options.budget,
|
|
500
|
+
label
|
|
501
|
+
});
|
|
502
|
+
if (!spawned.ok) {
|
|
503
|
+
throw new AnalystError(
|
|
504
|
+
`createScopeAnalyst: analyst spawn refused by the conserved pool (${spawned.reason}); cannot steer node ${stringifySafe(input.nodeId)} on an unrun analyst`
|
|
505
|
+
);
|
|
506
|
+
}
|
|
507
|
+
const settled = await drainAnalystSettlement(scope, spawned.handle.id);
|
|
508
|
+
const findings = readAnalystFindings(settled);
|
|
509
|
+
assertTraceDerivedFindings(findings);
|
|
510
|
+
return findings;
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
async function drainAnalystSettlement(scope, analystId) {
|
|
515
|
+
for (; ; ) {
|
|
516
|
+
const settled = await scope.next();
|
|
517
|
+
if (settled === null) {
|
|
518
|
+
throw new AnalystError(
|
|
519
|
+
`createScopeAnalyst: scope drained before analyst ${stringifySafe(analystId)} settled`
|
|
520
|
+
);
|
|
521
|
+
}
|
|
522
|
+
if (settled.handle.id === analystId) return settled;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
function readAnalystFindings(settled) {
|
|
526
|
+
if (settled.kind === "down") {
|
|
527
|
+
throw new AnalystError(
|
|
528
|
+
`createScopeAnalyst: analyst ${stringifySafe(settled.handle.id)} settled down (${settled.infra ? "infra" : "result"}): ${stringifySafe(settled.reason)}`
|
|
529
|
+
);
|
|
530
|
+
}
|
|
531
|
+
const out = settled.out;
|
|
532
|
+
if (!Array.isArray(out)) {
|
|
533
|
+
throw new PlannerError(
|
|
534
|
+
`createScopeAnalyst: analyst ${stringifySafe(settled.handle.id)} must return AnalystFinding[], got ${stringifySafe(out)}`
|
|
535
|
+
);
|
|
536
|
+
}
|
|
537
|
+
return out;
|
|
538
|
+
}
|
|
539
|
+
function buildSteerContext(findings, settledSoFar) {
|
|
540
|
+
assertTraceDerivedFindings(findings);
|
|
541
|
+
const lastValidScore = observedBestScore(settledSoFar);
|
|
542
|
+
return {
|
|
543
|
+
findings,
|
|
544
|
+
settledSoFar,
|
|
545
|
+
...lastValidScore !== void 0 ? { lastValidScore } : {}
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
function observedBestScore(settledSoFar) {
|
|
549
|
+
let best;
|
|
550
|
+
for (const s of settledSoFar) {
|
|
551
|
+
if (s.kind !== "done") continue;
|
|
552
|
+
const v = s.verdict;
|
|
553
|
+
if (!v || v.valid !== true || typeof v.score !== "number") continue;
|
|
554
|
+
if (best === void 0 || v.score > best) best = v.score;
|
|
555
|
+
}
|
|
556
|
+
return best;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// src/runtime/driver.ts
|
|
560
|
+
function createDriver(options) {
|
|
381
561
|
if (typeof options.planner !== "function") {
|
|
382
|
-
throw new ValidationError("
|
|
562
|
+
throw new ValidationError("createDriver: planner must be a function");
|
|
383
563
|
}
|
|
384
564
|
const maxIterations = options.maxIterations ?? 8;
|
|
385
565
|
if (!Number.isFinite(maxIterations) || maxIterations <= 0) {
|
|
386
|
-
throw new ValidationError("
|
|
566
|
+
throw new ValidationError("createDriver: maxIterations must be > 0");
|
|
387
567
|
}
|
|
388
568
|
const maxFanout = options.maxFanout ?? 4;
|
|
389
569
|
if (!Number.isFinite(maxFanout) || maxFanout < 1) {
|
|
390
|
-
throw new ValidationError("
|
|
570
|
+
throw new ValidationError("createDriver: maxFanout must be >= 1");
|
|
391
571
|
}
|
|
392
572
|
let pending;
|
|
393
573
|
return {
|
|
@@ -498,7 +678,7 @@ async function runAnalyze(analyze, task, history) {
|
|
|
498
678
|
const findings = await analyze({ task, history });
|
|
499
679
|
if (!Array.isArray(findings)) {
|
|
500
680
|
throw new PlannerError(
|
|
501
|
-
`
|
|
681
|
+
`createDriver: analyze hook must return AnalystFinding[], got ${stringifySafe(findings)}`
|
|
502
682
|
);
|
|
503
683
|
}
|
|
504
684
|
assertTraceDerivedFindings(findings);
|
|
@@ -508,23 +688,11 @@ async function runComplete(complete, task, history) {
|
|
|
508
688
|
const verdict = await complete.assess({ task, history });
|
|
509
689
|
if (!verdict || typeof verdict.done !== "boolean" || verdict.determinism !== "deterministic" && verdict.determinism !== "probabilistic") {
|
|
510
690
|
throw new PlannerError(
|
|
511
|
-
`
|
|
691
|
+
`createDriver: complete.assess must return a CompletionVerdict {done, determinism}, got ${stringifySafe(verdict)}`
|
|
512
692
|
);
|
|
513
693
|
}
|
|
514
694
|
return verdict;
|
|
515
695
|
}
|
|
516
|
-
var JUDGE_EVIDENCE_URI = /^(verdict|judge|score)\b/i;
|
|
517
|
-
function assertTraceDerivedFindings(findings) {
|
|
518
|
-
for (const f of findings) {
|
|
519
|
-
for (const ref of f.evidence_refs ?? []) {
|
|
520
|
-
if (ref.kind === "metric" && JUDGE_EVIDENCE_URI.test(ref.uri)) {
|
|
521
|
-
throw new PlannerError(
|
|
522
|
-
`steer-firewall: finding ${stringifySafe(f.finding_id)} cites judge-derived evidence (${stringifySafe(ref.uri)}); analyses fed to the driver must be trace-derived, not judge-derived (selector \u2260 judge)`
|
|
523
|
-
);
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
}
|
|
527
|
-
}
|
|
528
696
|
function renderAnalyses(findings) {
|
|
529
697
|
if (findings.length === 0) return "";
|
|
530
698
|
const rows = findings.map((f) => {
|
|
@@ -535,6 +703,56 @@ function renderAnalyses(findings) {
|
|
|
535
703
|
${rows.join("\n")}`;
|
|
536
704
|
}
|
|
537
705
|
|
|
706
|
+
// src/runtime/inline-sandbox-client.ts
|
|
707
|
+
function isAsyncIterable(v) {
|
|
708
|
+
return typeof v === "object" && v !== null && Symbol.asyncIterator in v;
|
|
709
|
+
}
|
|
710
|
+
async function settle(exec, task, signal) {
|
|
711
|
+
const r = exec.execute(task, signal);
|
|
712
|
+
if (isAsyncIterable(r)) {
|
|
713
|
+
for await (const _ of r) {
|
|
714
|
+
}
|
|
715
|
+
return exec.resultArtifact();
|
|
716
|
+
}
|
|
717
|
+
return r;
|
|
718
|
+
}
|
|
719
|
+
function inlineSandboxClient(factory) {
|
|
720
|
+
let seq = 0;
|
|
721
|
+
return {
|
|
722
|
+
async create(_options) {
|
|
723
|
+
const id = `inline-${seq++}`;
|
|
724
|
+
return {
|
|
725
|
+
id,
|
|
726
|
+
async *streamPrompt(message) {
|
|
727
|
+
const controller = new AbortController();
|
|
728
|
+
const spec = { profile: { name: id }, harness: null };
|
|
729
|
+
const exec = factory(spec, { signal: controller.signal, seams: {} });
|
|
730
|
+
try {
|
|
731
|
+
const artifact = await settle(exec, message, controller.signal);
|
|
732
|
+
const out = artifact.out;
|
|
733
|
+
yield {
|
|
734
|
+
type: "result",
|
|
735
|
+
data: {
|
|
736
|
+
finalText: out?.content ?? "",
|
|
737
|
+
tokenUsage: {
|
|
738
|
+
inputTokens: artifact.spent.tokens.input,
|
|
739
|
+
outputTokens: artifact.spent.tokens.output
|
|
740
|
+
},
|
|
741
|
+
costUsd: artifact.spent.usd
|
|
742
|
+
}
|
|
743
|
+
};
|
|
744
|
+
} finally {
|
|
745
|
+
await exec.teardown("brutalKill").catch(() => {
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
},
|
|
749
|
+
async delete() {
|
|
750
|
+
}
|
|
751
|
+
};
|
|
752
|
+
}
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
|
|
538
756
|
// src/runtime/report-usage.ts
|
|
539
757
|
function reportLoopUsage(cost, result, source = "loop") {
|
|
540
758
|
cost.observe(result.costUsd, source);
|
|
@@ -648,6 +866,7 @@ async function acquireSandbox(client, options, acquire = {}) {
|
|
|
648
866
|
const sleep2 = acquire.sleep ?? ((ms) => sleep(ms, acquire.signal));
|
|
649
867
|
const pollMs = acquire.pollIntervalMs ?? 3e3;
|
|
650
868
|
const deadline = now() + (acquire.readyTimeoutMs ?? 6e5);
|
|
869
|
+
const appearScans = 5;
|
|
651
870
|
const name = options.name ?? acquire.name ?? `loop-sbx-${randomUuid()}`;
|
|
652
871
|
const createOpts = { ...options, name };
|
|
653
872
|
const c = client;
|
|
@@ -663,9 +882,12 @@ async function acquireSandbox(client, options, acquire = {}) {
|
|
|
663
882
|
if (!isRetryable(err)) throw err;
|
|
664
883
|
lastErr = err;
|
|
665
884
|
if (typeof c.list === "function") {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
885
|
+
for (let scan = 0; scan < appearScans && now() < deadline; scan += 1) {
|
|
886
|
+
const found = (await c.list().catch(() => []))?.find((b) => b.name === name);
|
|
887
|
+
if (found)
|
|
888
|
+
return await waitReadyOrDestroy(found, deadline, pollMs, acquire.signal, now, sleep2);
|
|
889
|
+
if (scan < appearScans - 1) await sleep2(pollMs);
|
|
890
|
+
}
|
|
669
891
|
}
|
|
670
892
|
attempt += 1;
|
|
671
893
|
await sleep2(Math.min(pollMs * attempt, 15e3));
|
|
@@ -714,9 +936,13 @@ function isRetryable(err) {
|
|
|
714
936
|
if (typeof status === "number" && RETRYABLE_HTTP.has(status)) return true;
|
|
715
937
|
const name = e.name ?? "";
|
|
716
938
|
if (name === "TimeoutError" || name === "ServerError" || name === "NetworkError") return true;
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
939
|
+
const msg = e.message ?? "";
|
|
940
|
+
if (/\b(timed out|timeout|gateway|temporarily unavailable|too many requests|ECONNRESET|ETIMEDOUT|EAI_AGAIN)\b/i.test(
|
|
941
|
+
msg
|
|
942
|
+
)) {
|
|
943
|
+
return true;
|
|
944
|
+
}
|
|
945
|
+
return /provision failed|edge data plane|not reachable|failed to create sandbox/i.test(msg);
|
|
720
946
|
}
|
|
721
947
|
|
|
722
948
|
// src/runtime/sandbox-backend.ts
|
|
@@ -764,10 +990,31 @@ async function resolveCapabilities(client) {
|
|
|
764
990
|
// src/runtime/sandbox-lineage.ts
|
|
765
991
|
var TEARDOWN_TIMEOUT_MS = 15e3;
|
|
766
992
|
var DEFAULT_FORK_CONCURRENCY = 4;
|
|
993
|
+
async function* pollPromptEvents(box, prompt, sessionId, signal) {
|
|
994
|
+
if (signal.aborted) throwAbort();
|
|
995
|
+
const dispatched = await box.dispatchPrompt(prompt, { sessionId, signal });
|
|
996
|
+
const activeSessionId = dispatched.sessionId;
|
|
997
|
+
const result = await box.session(activeSessionId).result();
|
|
998
|
+
if (signal.aborted) throwAbort();
|
|
999
|
+
yield {
|
|
1000
|
+
type: "result",
|
|
1001
|
+
id: activeSessionId,
|
|
1002
|
+
data: {
|
|
1003
|
+
finalText: result.response ?? "",
|
|
1004
|
+
success: result.success,
|
|
1005
|
+
...result.error ? { error: result.error } : {},
|
|
1006
|
+
...result.usage ? { usage: result.usage } : {}
|
|
1007
|
+
}
|
|
1008
|
+
};
|
|
1009
|
+
}
|
|
1010
|
+
function promptEvents(streaming, box, prompt, sessionId, signal) {
|
|
1011
|
+
return streaming === "poll" ? pollPromptEvents(box, prompt, sessionId, signal) : box.streamPrompt(prompt, { sessionId, signal });
|
|
1012
|
+
}
|
|
767
1013
|
function createSandboxLineage(client, capabilities, options = {}) {
|
|
768
1014
|
if (!client || typeof client.create !== "function") {
|
|
769
1015
|
throw new ValidationError("createSandboxLineage: client.create is required");
|
|
770
1016
|
}
|
|
1017
|
+
const streaming = options.streaming ?? "sse";
|
|
771
1018
|
const forkConcurrency = Math.max(
|
|
772
1019
|
1,
|
|
773
1020
|
Math.floor(options.maxConcurrency ?? DEFAULT_FORK_CONCURRENCY)
|
|
@@ -777,6 +1024,7 @@ function createSandboxLineage(client, capabilities, options = {}) {
|
|
|
777
1024
|
if (signal.aborted) throwAbort();
|
|
778
1025
|
const opts = buildBackendOptions(spec.profile, spec.sandboxOverrides);
|
|
779
1026
|
const box = await acquireSandbox(client, opts, { signal });
|
|
1027
|
+
await spec.prepareBox?.(box, { signal });
|
|
780
1028
|
owned.push(box);
|
|
781
1029
|
return box;
|
|
782
1030
|
};
|
|
@@ -784,13 +1032,13 @@ function createSandboxLineage(client, capabilities, options = {}) {
|
|
|
784
1032
|
async start(spec, prompt, signal) {
|
|
785
1033
|
const box = await acquireFresh(spec, signal);
|
|
786
1034
|
const sessionId = mintSessionId();
|
|
787
|
-
const events = box
|
|
1035
|
+
const events = promptEvents(streaming, box, prompt, sessionId, signal);
|
|
788
1036
|
return { handle: { box, sessionId }, events };
|
|
789
1037
|
},
|
|
790
1038
|
async continue(handle, prompt, signal) {
|
|
791
1039
|
if (signal.aborted) throwAbort();
|
|
792
1040
|
await assertSessionLive(handle.box, handle.sessionId);
|
|
793
|
-
return handle.box
|
|
1041
|
+
return promptEvents(streaming, handle.box, prompt, handle.sessionId, signal);
|
|
794
1042
|
},
|
|
795
1043
|
async fork(parent, prompts, specs, signal) {
|
|
796
1044
|
if (prompts.length === 0) {
|
|
@@ -805,17 +1053,18 @@ function createSandboxLineage(client, capabilities, options = {}) {
|
|
|
805
1053
|
if (checkpointId !== void 0) {
|
|
806
1054
|
const box2 = await forkFromCheckpoint(parent.box, checkpointId, signal);
|
|
807
1055
|
owned.push(box2);
|
|
1056
|
+
await spec.prepareBox?.(box2, { signal });
|
|
808
1057
|
const sessionId2 = mintSessionId();
|
|
809
1058
|
return {
|
|
810
1059
|
handle: { box: box2, sessionId: sessionId2 },
|
|
811
|
-
events:
|
|
1060
|
+
events: promptEvents(streaming, box2, prompt, sessionId2, signal)
|
|
812
1061
|
};
|
|
813
1062
|
}
|
|
814
1063
|
const box = await acquireFresh(spec, signal);
|
|
815
1064
|
const sessionId = mintSessionId();
|
|
816
1065
|
return {
|
|
817
1066
|
handle: { box, sessionId },
|
|
818
|
-
events: box
|
|
1067
|
+
events: promptEvents(streaming, box, prompt, sessionId, signal)
|
|
819
1068
|
};
|
|
820
1069
|
});
|
|
821
1070
|
},
|
|
@@ -884,6 +1133,7 @@ async function runLoop(options) {
|
|
|
884
1133
|
if (!Number.isFinite(maxConcurrency) || maxConcurrency <= 0) {
|
|
885
1134
|
throw new ValidationError("runLoop: maxConcurrency must be > 0");
|
|
886
1135
|
}
|
|
1136
|
+
const sandboxStreaming = options.lineage?.streaming ?? "sse";
|
|
887
1137
|
if (!options.ctx?.sandboxClient || typeof options.ctx.sandboxClient.create !== "function") {
|
|
888
1138
|
throw new ValidationError("runLoop: ctx.sandboxClient.create is required");
|
|
889
1139
|
}
|
|
@@ -1000,6 +1250,7 @@ async function runLoop(options) {
|
|
|
1000
1250
|
output: options.output,
|
|
1001
1251
|
validator: options.validator,
|
|
1002
1252
|
maxConcurrency,
|
|
1253
|
+
streaming: sandboxStreaming,
|
|
1003
1254
|
signal: controller.signal,
|
|
1004
1255
|
ctx: options.ctx,
|
|
1005
1256
|
runId,
|
|
@@ -1059,7 +1310,10 @@ async function setUpLineage(options, maxConcurrency) {
|
|
|
1059
1310
|
}
|
|
1060
1311
|
const capabilities = await probeSandboxCapabilities(options.ctx.sandboxClient);
|
|
1061
1312
|
return {
|
|
1062
|
-
lineage: createSandboxLineage(options.ctx.sandboxClient, capabilities, {
|
|
1313
|
+
lineage: createSandboxLineage(options.ctx.sandboxClient, capabilities, {
|
|
1314
|
+
maxConcurrency,
|
|
1315
|
+
streaming: lineageOpts.streaming
|
|
1316
|
+
}),
|
|
1063
1317
|
options: lineageOpts,
|
|
1064
1318
|
handles: /* @__PURE__ */ new Map(),
|
|
1065
1319
|
canPrune: typeof options.driver.describePlan !== "function"
|
|
@@ -1189,7 +1443,8 @@ async function executeIteration(args) {
|
|
|
1189
1443
|
stream = acquired.events;
|
|
1190
1444
|
} else {
|
|
1191
1445
|
box = await createSandboxForSpec(args.ctx.sandboxClient, spec, args.signal);
|
|
1192
|
-
|
|
1446
|
+
const prompt = spec.taskToPrompt(args.item.task);
|
|
1447
|
+
stream = args.streaming === "poll" ? promptEvents("poll", box, prompt, `${args.runId}-i${args.item.index}`, args.signal) : box.streamPrompt(prompt, { signal: args.signal });
|
|
1193
1448
|
}
|
|
1194
1449
|
const placement = describeSandboxPlacement(args.ctx.sandboxClient, box);
|
|
1195
1450
|
await emitTrace(args.ctx.traceEmitter, {
|
|
@@ -1222,6 +1477,7 @@ async function executeIteration(args) {
|
|
|
1222
1477
|
if (args.validator) {
|
|
1223
1478
|
slot.verdict = await args.validator.validate(slot.output, {
|
|
1224
1479
|
iteration: args.item.index,
|
|
1480
|
+
...box ? { box } : {},
|
|
1225
1481
|
signal: args.signal,
|
|
1226
1482
|
traceEmitter: args.ctx.traceEmitter
|
|
1227
1483
|
});
|
|
@@ -1318,7 +1574,9 @@ function readSandboxId(box) {
|
|
|
1318
1574
|
async function createSandboxForSpec(client, spec, signal) {
|
|
1319
1575
|
const opts = buildBackendOptions(spec.profile, spec.sandboxOverrides);
|
|
1320
1576
|
if (signal.aborted) throwAbort();
|
|
1321
|
-
|
|
1577
|
+
const box = await acquireSandbox(client, opts, { signal });
|
|
1578
|
+
await spec.prepareBox?.(box, { signal });
|
|
1579
|
+
return box;
|
|
1322
1580
|
}
|
|
1323
1581
|
function finalize(args) {
|
|
1324
1582
|
const winner = args.options.selectWinner ? args.options.selectWinner(args.iterations) : args.options.driver.selectWinner?.(args.iterations) ?? defaultSelectWinner(args.iterations);
|
|
@@ -1479,86 +1737,160 @@ function loopDispatch(opts) {
|
|
|
1479
1737
|
return (profile, scenario, ctx) => runLoopForCell(opts, scenario, profile, ctx);
|
|
1480
1738
|
}
|
|
1481
1739
|
|
|
1482
|
-
// src/runtime/
|
|
1483
|
-
|
|
1484
|
-
var
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1740
|
+
// src/runtime/observe.ts
|
|
1741
|
+
import { makeFinding } from "@tangle-network/agent-eval";
|
|
1742
|
+
var observerId = "observe/trace";
|
|
1743
|
+
function summarizeTrace(trace, maxLines) {
|
|
1744
|
+
const lines = [];
|
|
1745
|
+
for (const ev of trace) {
|
|
1746
|
+
const e = ev;
|
|
1747
|
+
const t = (e.type ?? "").toLowerCase();
|
|
1748
|
+
const d = e.data ?? {};
|
|
1749
|
+
const part = d.part ?? {};
|
|
1750
|
+
if (part.type === "tool")
|
|
1751
|
+
lines.push(`tool:${part.tool}${part.state?.status ? `(${part.state.status})` : ""}`);
|
|
1752
|
+
else if (t.includes("error"))
|
|
1753
|
+
lines.push(`ERROR: ${String(d.message ?? d.detail ?? "").slice(0, 200)}`);
|
|
1754
|
+
else if (t === "status" && typeof d.status === "string") lines.push(`status:${d.status}`);
|
|
1755
|
+
else if (t.includes("tool")) lines.push(`tool-event:${t}`);
|
|
1756
|
+
}
|
|
1757
|
+
const out = [];
|
|
1758
|
+
for (const ln of lines) {
|
|
1759
|
+
const prev = out[out.length - 1];
|
|
1760
|
+
const m = prev?.match(/^(.*?)(?: x(\d+))?$/);
|
|
1761
|
+
if (m && m[1] === ln) out[out.length - 1] = `${ln} x${(Number(m[2]) || 1) + 1}`;
|
|
1762
|
+
else out.push(ln);
|
|
1763
|
+
}
|
|
1764
|
+
return out.slice(0, maxLines).join("\n") || "(no tool/error events in trace)";
|
|
1765
|
+
}
|
|
1766
|
+
var findingsSchema = {
|
|
1767
|
+
name: "observer_findings",
|
|
1768
|
+
schema: {
|
|
1769
|
+
type: "object",
|
|
1770
|
+
additionalProperties: false,
|
|
1771
|
+
properties: {
|
|
1772
|
+
findings: {
|
|
1773
|
+
type: "array",
|
|
1774
|
+
items: {
|
|
1775
|
+
type: "object",
|
|
1776
|
+
additionalProperties: false,
|
|
1777
|
+
properties: {
|
|
1778
|
+
area: {
|
|
1779
|
+
type: "string",
|
|
1780
|
+
description: "tool-use | cost | verification | process | failure | latency"
|
|
1781
|
+
},
|
|
1782
|
+
severity: { type: "string", enum: ["critical", "high", "medium", "low", "info"] },
|
|
1783
|
+
claim: {
|
|
1784
|
+
type: "string",
|
|
1785
|
+
description: "what you OBSERVED in the trace (a fact, with the evidence)"
|
|
1786
|
+
},
|
|
1787
|
+
recommended_action: {
|
|
1788
|
+
type: "string",
|
|
1789
|
+
description: "the concrete change for the agent or operator"
|
|
1790
|
+
},
|
|
1791
|
+
audience: {
|
|
1792
|
+
type: "string",
|
|
1793
|
+
enum: ["agent", "operator"],
|
|
1794
|
+
description: "who should act on this"
|
|
1795
|
+
},
|
|
1796
|
+
confidence: { type: "number" }
|
|
1797
|
+
},
|
|
1798
|
+
required: ["area", "severity", "claim", "recommended_action", "audience", "confidence"]
|
|
1799
|
+
}
|
|
1491
1800
|
}
|
|
1492
|
-
}
|
|
1801
|
+
},
|
|
1802
|
+
required: ["findings"]
|
|
1493
1803
|
}
|
|
1494
1804
|
};
|
|
1495
|
-
function
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1805
|
+
async function observe(input, opts) {
|
|
1806
|
+
const traceSummary = summarizeTrace(input.trace, opts.maxTraceLines ?? 80);
|
|
1807
|
+
const res = await opts.chat.chat(
|
|
1808
|
+
{
|
|
1809
|
+
...opts.model ? { model: opts.model } : {},
|
|
1810
|
+
jsonSchema: findingsSchema,
|
|
1811
|
+
messages: [
|
|
1812
|
+
{
|
|
1813
|
+
role: "system",
|
|
1814
|
+
content: "You are a third-person OBSERVER watching an AI agent work. You see its TRACE (what it did), not its grader. From the trace, name SPECIFIC, behavior-grounded findings: wasted/duplicated tool calls, thrash/retries, token/cost waste, missing verification, failure patterns. For each, a concrete recommended_action, and whether the AGENT (fix its skills/prompt/tools) or the OPERATOR (fix framing/decomposition/config) should act. Only claim what the trace shows. No findings if the run was clean."
|
|
1815
|
+
},
|
|
1816
|
+
{
|
|
1817
|
+
role: "user",
|
|
1818
|
+
content: `TASK: ${input.task}
|
|
1819
|
+
|
|
1820
|
+
OUTCOME: ${input.outcome ?? "unknown"}
|
|
1821
|
+
|
|
1822
|
+
FINAL OUTPUT (truncated):
|
|
1823
|
+
${input.output.slice(0, 1200)}
|
|
1824
|
+
|
|
1825
|
+
TRACE (in order; "xN" = repeated):
|
|
1826
|
+
${traceSummary}`
|
|
1827
|
+
}
|
|
1828
|
+
]
|
|
1829
|
+
},
|
|
1830
|
+
{ ...opts.signal ? { signal: opts.signal } : {} }
|
|
1831
|
+
);
|
|
1832
|
+
const parsed = parseFindings(res.content);
|
|
1833
|
+
const producedAt = input.runId ? `${input.runId}` : observerId;
|
|
1834
|
+
const findings = parsed.map(
|
|
1835
|
+
(f) => makeFinding({
|
|
1836
|
+
analyst_id: observerId,
|
|
1837
|
+
area: `${f.area}`,
|
|
1838
|
+
severity: f.severity,
|
|
1839
|
+
claim: f.claim,
|
|
1840
|
+
recommended_action: f.recommended_action,
|
|
1841
|
+
confidence: typeof f.confidence === "number" ? f.confidence : 0.5,
|
|
1842
|
+
evidence_refs: [],
|
|
1843
|
+
// The observer reads BEHAVIOR, never the judge verdict — firewall provenance.
|
|
1844
|
+
derived_from_judge: false,
|
|
1845
|
+
metadata: { audience: f.audience },
|
|
1846
|
+
...input.runId ? { subject: input.runId } : {}
|
|
1847
|
+
})
|
|
1848
|
+
);
|
|
1849
|
+
const learned = [];
|
|
1850
|
+
if (opts.corpus) {
|
|
1851
|
+
for (const f of findings) {
|
|
1852
|
+
const record = {
|
|
1853
|
+
schemaVersion: "1.0.0",
|
|
1854
|
+
id: f.finding_id,
|
|
1855
|
+
runId: input.runId ?? observerId,
|
|
1856
|
+
producedAt: f.produced_at ?? producedAt,
|
|
1857
|
+
area: f.area,
|
|
1858
|
+
claim: f.recommended_action ?? f.claim,
|
|
1859
|
+
...f.claim ? { rationale: f.claim } : {},
|
|
1860
|
+
tags: [...opts.tags ?? [], `audience:${f.metadata?.audience ?? "agent"}`],
|
|
1861
|
+
confidence: f.confidence,
|
|
1862
|
+
evidence: [{ kind: "finding", uri: f.finding_id }]
|
|
1863
|
+
};
|
|
1864
|
+
const r = await opts.corpus.append(record);
|
|
1865
|
+
if (r.succeeded) learned.push(record);
|
|
1526
1866
|
}
|
|
1527
|
-
if (settled.handle.id === analystId) return settled;
|
|
1528
|
-
}
|
|
1529
|
-
}
|
|
1530
|
-
function readAnalystFindings(settled) {
|
|
1531
|
-
if (settled.kind === "down") {
|
|
1532
|
-
throw new AnalystError(
|
|
1533
|
-
`createScopeAnalyst: analyst ${stringifySafe(settled.handle.id)} settled down (${settled.infra ? "infra" : "result"}): ${stringifySafe(settled.reason)}`
|
|
1534
|
-
);
|
|
1535
|
-
}
|
|
1536
|
-
const out = settled.out;
|
|
1537
|
-
if (!Array.isArray(out)) {
|
|
1538
|
-
throw new PlannerError(
|
|
1539
|
-
`createScopeAnalyst: analyst ${stringifySafe(settled.handle.id)} must return AnalystFinding[], got ${stringifySafe(out)}`
|
|
1540
|
-
);
|
|
1541
1867
|
}
|
|
1542
|
-
return
|
|
1868
|
+
return { findings, learned, report: renderReport(findings) };
|
|
1543
1869
|
}
|
|
1544
|
-
function
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
}
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
}
|
|
1561
|
-
|
|
1870
|
+
function parseFindings(content) {
|
|
1871
|
+
let obj;
|
|
1872
|
+
try {
|
|
1873
|
+
obj = JSON.parse(content);
|
|
1874
|
+
} catch {
|
|
1875
|
+
const m = content.match(/\{[\s\S]*\}/);
|
|
1876
|
+
obj = m ? JSON.parse(m[0]) : { findings: [] };
|
|
1877
|
+
}
|
|
1878
|
+
const arr = obj.findings;
|
|
1879
|
+
return Array.isArray(arr) ? arr : [];
|
|
1880
|
+
}
|
|
1881
|
+
function renderReport(findings) {
|
|
1882
|
+
if (findings.length === 0) return "\u2713 clean run \u2014 the observer found nothing to change.";
|
|
1883
|
+
const audience = (f) => f.metadata?.audience ?? "agent";
|
|
1884
|
+
const forAgent = findings.filter((f) => audience(f) === "agent");
|
|
1885
|
+
const forOperator = findings.filter((f) => audience(f) === "operator");
|
|
1886
|
+
const block = (title, fs) => fs.length === 0 ? "" : `**${title}**
|
|
1887
|
+
${fs.map((f) => `- [${f.severity}] ${f.claim}
|
|
1888
|
+
\u2192 ${f.recommended_action ?? ""}`).join("\n")}
|
|
1889
|
+
`;
|
|
1890
|
+
return [
|
|
1891
|
+
block("For the agent (fix skills / prompt / tools)", forAgent),
|
|
1892
|
+
block("For you (the operator)", forOperator)
|
|
1893
|
+
].filter(Boolean).join("\n");
|
|
1562
1894
|
}
|
|
1563
1895
|
|
|
1564
1896
|
// src/runtime/supervise/scope.ts
|
|
@@ -1574,7 +1906,7 @@ function createScope(args) {
|
|
|
1574
1906
|
const spec = agent.executorSpec;
|
|
1575
1907
|
if (!isAgentSpec(spec)) {
|
|
1576
1908
|
throw new ValidationError(
|
|
1577
|
-
`scope.spawn: agent "${agent.name}" exposes no \`executorSpec\` (AgentSpec) to resolve a
|
|
1909
|
+
`scope.spawn: agent "${agent.name}" exposes no \`executorSpec\` (AgentSpec) to resolve a Executor`
|
|
1578
1910
|
);
|
|
1579
1911
|
}
|
|
1580
1912
|
const resolved = args.executors.resolve(spec);
|
|
@@ -1802,7 +2134,7 @@ async function runChild(live, executor, childAbort, task, opts, pool, ticket, bl
|
|
|
1802
2134
|
live.status = "running";
|
|
1803
2135
|
const ran = executor.execute(task, childAbort.signal);
|
|
1804
2136
|
let artifact;
|
|
1805
|
-
if (
|
|
2137
|
+
if (isAsyncIterable2(ran)) {
|
|
1806
2138
|
const spend = await foldStream(ran);
|
|
1807
2139
|
live.spent = spend;
|
|
1808
2140
|
artifact = executor.resultArtifact();
|
|
@@ -1924,7 +2256,7 @@ function downRecord(reason, infra) {
|
|
|
1924
2256
|
function zeroSpend2() {
|
|
1925
2257
|
return { iterations: 0, tokens: { input: 0, output: 0 }, usd: 0, ms: 0 };
|
|
1926
2258
|
}
|
|
1927
|
-
function
|
|
2259
|
+
function isAsyncIterable2(value) {
|
|
1928
2260
|
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
1929
2261
|
}
|
|
1930
2262
|
function isAgentSpec(value) {
|
|
@@ -2463,6 +2795,7 @@ import { estimateCost, isModelPriced } from "@tangle-network/agent-eval";
|
|
|
2463
2795
|
var routerSeamKey = "router";
|
|
2464
2796
|
var sandboxSeamKey = "sandbox";
|
|
2465
2797
|
var cliSeamKey = "cli";
|
|
2798
|
+
var bridgeSeamKey = "bridge";
|
|
2466
2799
|
function contentRef(prefix, value) {
|
|
2467
2800
|
let str;
|
|
2468
2801
|
try {
|
|
@@ -2756,6 +3089,96 @@ function killWithGrace(proc, grace) {
|
|
|
2756
3089
|
}, grace);
|
|
2757
3090
|
});
|
|
2758
3091
|
}
|
|
3092
|
+
var bridgeExecutor = (spec, ctx) => {
|
|
3093
|
+
const seam = readSeam(ctx, bridgeSeamKey, "bridge");
|
|
3094
|
+
if (!seam.bridgeUrl || !seam.bridgeBearer || !seam.model) {
|
|
3095
|
+
throw new ValidationError(
|
|
3096
|
+
"bridgeExecutor: BridgeSeam.bridgeUrl + bridgeBearer + model required"
|
|
3097
|
+
);
|
|
3098
|
+
}
|
|
3099
|
+
const controller = new AbortController();
|
|
3100
|
+
const abortIfSignalled = () => {
|
|
3101
|
+
if (ctx.signal.aborted) controller.abort();
|
|
3102
|
+
};
|
|
3103
|
+
abortIfSignalled();
|
|
3104
|
+
if (!ctx.signal.aborted) ctx.signal.addEventListener("abort", abortIfSignalled, { once: true });
|
|
3105
|
+
let artifact;
|
|
3106
|
+
return {
|
|
3107
|
+
runtime: "cli",
|
|
3108
|
+
async execute(task, signal) {
|
|
3109
|
+
const messages = taskToMessages(task, spec);
|
|
3110
|
+
const started = Date.now();
|
|
3111
|
+
const linked = linkSignals(signal, controller.signal);
|
|
3112
|
+
const timer = seam.timeoutMs ? setTimeout(() => controller.abort(), seam.timeoutMs) : void 0;
|
|
3113
|
+
try {
|
|
3114
|
+
const res = await fetch(`${seam.bridgeUrl.replace(/\/$/, "")}/v1/chat/completions`, {
|
|
3115
|
+
method: "POST",
|
|
3116
|
+
headers: {
|
|
3117
|
+
"content-type": "application/json",
|
|
3118
|
+
authorization: `Bearer ${seam.bridgeBearer}`
|
|
3119
|
+
},
|
|
3120
|
+
body: JSON.stringify({
|
|
3121
|
+
model: seam.model,
|
|
3122
|
+
stream: false,
|
|
3123
|
+
...seam.agentProfile ? { agent_profile: seam.agentProfile } : {},
|
|
3124
|
+
messages
|
|
3125
|
+
}),
|
|
3126
|
+
...linked ? { signal: linked } : {}
|
|
3127
|
+
});
|
|
3128
|
+
if (!res.ok) {
|
|
3129
|
+
throw new ValidationError(
|
|
3130
|
+
`bridgeExecutor: bridge ${res.status}: ${(await res.text()).slice(0, 300)}`
|
|
3131
|
+
);
|
|
3132
|
+
}
|
|
3133
|
+
const data = await res.json();
|
|
3134
|
+
const u = data.usage;
|
|
3135
|
+
const usage = u && typeof u.prompt_tokens === "number" && typeof u.completion_tokens === "number" ? { input: u.prompt_tokens, output: u.completion_tokens } : void 0;
|
|
3136
|
+
const msg = data.choices?.[0]?.message;
|
|
3137
|
+
const content = msg?.content ?? "";
|
|
3138
|
+
const toolCalls = (msg?.tool_calls ?? []).map((t) => t.function?.name ?? "unknown");
|
|
3139
|
+
const spent = {
|
|
3140
|
+
iterations: 1,
|
|
3141
|
+
tokens: usage ? { input: usage.input, output: usage.output } : zeroTokenUsage(),
|
|
3142
|
+
usd: typeof u?.cost === "number" ? u.cost : 0,
|
|
3143
|
+
ms: Date.now() - started
|
|
3144
|
+
};
|
|
3145
|
+
const out = { content, toolCalls };
|
|
3146
|
+
artifact = { outRef: contentRef("bridge", { model: seam.model, content }), out, spent };
|
|
3147
|
+
return artifact;
|
|
3148
|
+
} finally {
|
|
3149
|
+
if (timer) clearTimeout(timer);
|
|
3150
|
+
}
|
|
3151
|
+
},
|
|
3152
|
+
teardown(_grace) {
|
|
3153
|
+
controller.abort();
|
|
3154
|
+
return Promise.resolve({ destroyed: true });
|
|
3155
|
+
},
|
|
3156
|
+
resultArtifact() {
|
|
3157
|
+
if (!artifact) {
|
|
3158
|
+
throw new ValidationError("bridgeExecutor: resultArtifact() read before execute()");
|
|
3159
|
+
}
|
|
3160
|
+
return { ...artifact, spent: artifact.spent };
|
|
3161
|
+
}
|
|
3162
|
+
};
|
|
3163
|
+
};
|
|
3164
|
+
function createExecutor(config) {
|
|
3165
|
+
return (spec, ctx) => {
|
|
3166
|
+
const { backend, ...seam } = config;
|
|
3167
|
+
const seamed = { ...ctx, seams: { ...ctx.seams, [backend]: seam } };
|
|
3168
|
+
switch (config.backend) {
|
|
3169
|
+
case "router":
|
|
3170
|
+
return routerInlineExecutor(spec, seamed);
|
|
3171
|
+
case "bridge":
|
|
3172
|
+
return bridgeExecutor(spec, seamed);
|
|
3173
|
+
case "cli":
|
|
3174
|
+
return cliExecutor(spec, seamed);
|
|
3175
|
+
case "sandbox": {
|
|
3176
|
+
const harness = spec.harness ?? config.harness ?? null;
|
|
3177
|
+
return sandboxExecutor({ ...spec, harness }, seamed);
|
|
3178
|
+
}
|
|
3179
|
+
}
|
|
3180
|
+
};
|
|
3181
|
+
}
|
|
2759
3182
|
function createExecutorRegistry() {
|
|
2760
3183
|
const factories = /* @__PURE__ */ new Map();
|
|
2761
3184
|
factories.set("router", routerInlineExecutor);
|
|
@@ -3538,6 +3961,298 @@ function requireSpend(rolled, id, root) {
|
|
|
3538
3961
|
return spend;
|
|
3539
3962
|
}
|
|
3540
3963
|
|
|
3964
|
+
// src/runtime/sandbox-run.ts
|
|
3965
|
+
async function openSandboxRun(client, options, deliverable) {
|
|
3966
|
+
const runId = options.runId ?? `sandbox-run-${randomSuffix()}`;
|
|
3967
|
+
const now = options.now ?? Date.now;
|
|
3968
|
+
const capabilities = await probeSandboxCapabilities(client);
|
|
3969
|
+
const lineage = createSandboxLineage(client, capabilities, {
|
|
3970
|
+
...options.maxConcurrency !== void 0 ? { maxConcurrency: options.maxConcurrency } : {}
|
|
3971
|
+
});
|
|
3972
|
+
let handle;
|
|
3973
|
+
let started = false;
|
|
3974
|
+
let runStartedAt;
|
|
3975
|
+
let failed = false;
|
|
3976
|
+
let turnCount = 0;
|
|
3977
|
+
function emit(event) {
|
|
3978
|
+
notifyRuntimeHookEvent(
|
|
3979
|
+
options.hooks,
|
|
3980
|
+
{
|
|
3981
|
+
id: `${runId}:${event.target}:${event.phase}${event.stepIndex === void 0 ? "" : `:${event.stepIndex}`}`,
|
|
3982
|
+
runId,
|
|
3983
|
+
scenarioId: options.scenarioId,
|
|
3984
|
+
target: event.target,
|
|
3985
|
+
phase: event.phase,
|
|
3986
|
+
timestamp: event.timestamp,
|
|
3987
|
+
stepIndex: event.stepIndex,
|
|
3988
|
+
payload: event.payload,
|
|
3989
|
+
metadata: { producer: "openSandboxRun" }
|
|
3990
|
+
},
|
|
3991
|
+
{ signal: options.signal }
|
|
3992
|
+
);
|
|
3993
|
+
}
|
|
3994
|
+
const runPayload = () => ({
|
|
3995
|
+
agentName: options.agentRun.name ?? options.agentRun.profile.name ?? "agent",
|
|
3996
|
+
profileName: options.agentRun.profile.name,
|
|
3997
|
+
backendType: backendType(options.agentRun),
|
|
3998
|
+
deliverableKind: deliverable.kind,
|
|
3999
|
+
...deliverable.kind === "artifact" ? { deliverablePath: deliverable.path } : {},
|
|
4000
|
+
...handle ? { sessionId: handle.sessionId, sandboxId: handle.box.id } : {}
|
|
4001
|
+
});
|
|
4002
|
+
const turnPayload = (prompt, turnKind, startedAt, result, error) => ({
|
|
4003
|
+
...runPayload(),
|
|
4004
|
+
turnKind,
|
|
4005
|
+
promptChars: prompt.length,
|
|
4006
|
+
promptHash: hashText(prompt),
|
|
4007
|
+
...result !== void 0 || error !== void 0 ? { durationMs: Math.max(0, now() - startedAt) } : {},
|
|
4008
|
+
...result ? {
|
|
4009
|
+
eventCount: result.events.length,
|
|
4010
|
+
eventTypes: eventTypeCounts(result.events),
|
|
4011
|
+
...result.readError !== void 0 ? { readError: result.readError } : {}
|
|
4012
|
+
} : {},
|
|
4013
|
+
...error !== void 0 ? { error: errorMessage(error) } : {}
|
|
4014
|
+
});
|
|
4015
|
+
async function settle2(box, events) {
|
|
4016
|
+
const collected = [];
|
|
4017
|
+
for await (const ev of events) collected.push(ev);
|
|
4018
|
+
if (deliverable.kind === "events") {
|
|
4019
|
+
return { out: deliverable.fromEvents(collected), events: collected };
|
|
4020
|
+
}
|
|
4021
|
+
throwIfAborted(options.signal);
|
|
4022
|
+
let raw = "";
|
|
4023
|
+
let readError;
|
|
4024
|
+
const readAttempts = 4;
|
|
4025
|
+
const readDelayMs = options.readRetryDelayMs ?? 1e3;
|
|
4026
|
+
for (let attempt = 0; attempt < readAttempts; attempt += 1) {
|
|
4027
|
+
throwIfAborted(options.signal);
|
|
4028
|
+
try {
|
|
4029
|
+
raw = await box.fs.read(deliverable.path);
|
|
4030
|
+
readError = void 0;
|
|
4031
|
+
break;
|
|
4032
|
+
} catch (err) {
|
|
4033
|
+
readError = err instanceof Error ? err.message : String(err);
|
|
4034
|
+
if (attempt < readAttempts - 1 && readDelayMs > 0)
|
|
4035
|
+
await sleep(readDelayMs * (attempt + 1), options.signal);
|
|
4036
|
+
}
|
|
4037
|
+
}
|
|
4038
|
+
return {
|
|
4039
|
+
out: deliverable.fromArtifact(raw, collected),
|
|
4040
|
+
events: collected,
|
|
4041
|
+
...readError !== void 0 ? { readError } : {}
|
|
4042
|
+
};
|
|
4043
|
+
}
|
|
4044
|
+
return {
|
|
4045
|
+
get box() {
|
|
4046
|
+
if (!handle) throw new Error("openSandboxRun: box unavailable before start()");
|
|
4047
|
+
return handle.box;
|
|
4048
|
+
},
|
|
4049
|
+
get sessionId() {
|
|
4050
|
+
if (!handle) throw new Error("openSandboxRun: sessionId unavailable before start()");
|
|
4051
|
+
return handle.sessionId;
|
|
4052
|
+
},
|
|
4053
|
+
async start(prompt) {
|
|
4054
|
+
if (started)
|
|
4055
|
+
throw new Error(
|
|
4056
|
+
"openSandboxRun: start() already called \u2014 use resume() to continue the session"
|
|
4057
|
+
);
|
|
4058
|
+
started = true;
|
|
4059
|
+
runStartedAt = now();
|
|
4060
|
+
emit({
|
|
4061
|
+
target: "agent.run",
|
|
4062
|
+
phase: "before",
|
|
4063
|
+
timestamp: runStartedAt,
|
|
4064
|
+
payload: { ...runPayload(), turnCount: 0 }
|
|
4065
|
+
});
|
|
4066
|
+
const stepIndex = turnCount;
|
|
4067
|
+
const turnStartedAt = now();
|
|
4068
|
+
emit({
|
|
4069
|
+
target: "agent.turn",
|
|
4070
|
+
phase: "before",
|
|
4071
|
+
timestamp: turnStartedAt,
|
|
4072
|
+
stepIndex,
|
|
4073
|
+
payload: turnPayload(prompt, "start", turnStartedAt)
|
|
4074
|
+
});
|
|
4075
|
+
try {
|
|
4076
|
+
const r = await lineage.start(
|
|
4077
|
+
options.agentRun,
|
|
4078
|
+
prompt,
|
|
4079
|
+
options.signal
|
|
4080
|
+
);
|
|
4081
|
+
handle = r.handle;
|
|
4082
|
+
const result = await settle2(handle.box, r.events);
|
|
4083
|
+
turnCount += 1;
|
|
4084
|
+
emit({
|
|
4085
|
+
target: "agent.turn",
|
|
4086
|
+
phase: "after",
|
|
4087
|
+
timestamp: now(),
|
|
4088
|
+
stepIndex,
|
|
4089
|
+
payload: turnPayload(prompt, "start", turnStartedAt, result)
|
|
4090
|
+
});
|
|
4091
|
+
return result;
|
|
4092
|
+
} catch (error) {
|
|
4093
|
+
failed = true;
|
|
4094
|
+
emit({
|
|
4095
|
+
target: "agent.turn",
|
|
4096
|
+
phase: "error",
|
|
4097
|
+
timestamp: now(),
|
|
4098
|
+
stepIndex,
|
|
4099
|
+
payload: turnPayload(prompt, "start", turnStartedAt, void 0, error)
|
|
4100
|
+
});
|
|
4101
|
+
emit({
|
|
4102
|
+
target: "agent.run",
|
|
4103
|
+
phase: "error",
|
|
4104
|
+
timestamp: now(),
|
|
4105
|
+
payload: { ...runPayload(), turnCount, error: errorMessage(error) }
|
|
4106
|
+
});
|
|
4107
|
+
throw error;
|
|
4108
|
+
}
|
|
4109
|
+
},
|
|
4110
|
+
async resume(prompt) {
|
|
4111
|
+
if (!handle) throw new Error("openSandboxRun: resume() called before start()");
|
|
4112
|
+
const stepIndex = turnCount;
|
|
4113
|
+
const turnStartedAt = now();
|
|
4114
|
+
emit({
|
|
4115
|
+
target: "agent.turn",
|
|
4116
|
+
phase: "before",
|
|
4117
|
+
timestamp: turnStartedAt,
|
|
4118
|
+
stepIndex,
|
|
4119
|
+
payload: turnPayload(prompt, "resume", turnStartedAt)
|
|
4120
|
+
});
|
|
4121
|
+
try {
|
|
4122
|
+
const result = await settle2(
|
|
4123
|
+
handle.box,
|
|
4124
|
+
await lineage.continue(handle, prompt, options.signal)
|
|
4125
|
+
);
|
|
4126
|
+
turnCount += 1;
|
|
4127
|
+
emit({
|
|
4128
|
+
target: "agent.turn",
|
|
4129
|
+
phase: "after",
|
|
4130
|
+
timestamp: now(),
|
|
4131
|
+
stepIndex,
|
|
4132
|
+
payload: turnPayload(prompt, "resume", turnStartedAt, result)
|
|
4133
|
+
});
|
|
4134
|
+
return result;
|
|
4135
|
+
} catch (error) {
|
|
4136
|
+
failed = true;
|
|
4137
|
+
emit({
|
|
4138
|
+
target: "agent.turn",
|
|
4139
|
+
phase: "error",
|
|
4140
|
+
timestamp: now(),
|
|
4141
|
+
stepIndex,
|
|
4142
|
+
payload: turnPayload(prompt, "resume", turnStartedAt, void 0, error)
|
|
4143
|
+
});
|
|
4144
|
+
emit({
|
|
4145
|
+
target: "agent.run",
|
|
4146
|
+
phase: "error",
|
|
4147
|
+
timestamp: now(),
|
|
4148
|
+
payload: { ...runPayload(), turnCount, error: errorMessage(error) }
|
|
4149
|
+
});
|
|
4150
|
+
throw error;
|
|
4151
|
+
}
|
|
4152
|
+
},
|
|
4153
|
+
async close() {
|
|
4154
|
+
await lineage.teardown();
|
|
4155
|
+
if (runStartedAt !== void 0) {
|
|
4156
|
+
emit({
|
|
4157
|
+
target: "agent.run",
|
|
4158
|
+
phase: "after",
|
|
4159
|
+
timestamp: now(),
|
|
4160
|
+
payload: {
|
|
4161
|
+
...runPayload(),
|
|
4162
|
+
turnCount,
|
|
4163
|
+
status: failed ? "error" : "completed",
|
|
4164
|
+
durationMs: Math.max(0, now() - runStartedAt)
|
|
4165
|
+
}
|
|
4166
|
+
});
|
|
4167
|
+
}
|
|
4168
|
+
}
|
|
4169
|
+
};
|
|
4170
|
+
}
|
|
4171
|
+
function backendType(spec) {
|
|
4172
|
+
const backend = spec.sandboxOverrides?.backend;
|
|
4173
|
+
return backend?.type;
|
|
4174
|
+
}
|
|
4175
|
+
function eventTypeCounts(events) {
|
|
4176
|
+
const counts = {};
|
|
4177
|
+
for (const event of events) counts[event.type] = (counts[event.type] ?? 0) + 1;
|
|
4178
|
+
return counts;
|
|
4179
|
+
}
|
|
4180
|
+
function hashText(value) {
|
|
4181
|
+
let hash = 2166136261;
|
|
4182
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
4183
|
+
hash ^= value.charCodeAt(i);
|
|
4184
|
+
hash = Math.imul(hash, 16777619);
|
|
4185
|
+
}
|
|
4186
|
+
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
4187
|
+
}
|
|
4188
|
+
function errorMessage(error) {
|
|
4189
|
+
return error instanceof Error ? error.message : String(error);
|
|
4190
|
+
}
|
|
4191
|
+
|
|
4192
|
+
// src/runtime/workspace.ts
|
|
4193
|
+
function localShell() {
|
|
4194
|
+
return async (args, cwd) => {
|
|
4195
|
+
const { execFile } = await import("child_process");
|
|
4196
|
+
const [bin, ...rest] = args;
|
|
4197
|
+
return new Promise((resolve) => {
|
|
4198
|
+
execFile(
|
|
4199
|
+
bin ?? "",
|
|
4200
|
+
rest,
|
|
4201
|
+
{ cwd, encoding: "utf-8", maxBuffer: 64 * 1024 * 1024 },
|
|
4202
|
+
(err, stdout, stderr) => {
|
|
4203
|
+
resolve({
|
|
4204
|
+
stdout: stdout ?? "",
|
|
4205
|
+
stderr: stderr ?? "",
|
|
4206
|
+
code: err ? err.code ?? 1 : 0
|
|
4207
|
+
});
|
|
4208
|
+
}
|
|
4209
|
+
);
|
|
4210
|
+
});
|
|
4211
|
+
};
|
|
4212
|
+
}
|
|
4213
|
+
function gitWorkspace(opts) {
|
|
4214
|
+
const shell = opts.shell ?? localShell();
|
|
4215
|
+
const branch = opts.branch ?? "main";
|
|
4216
|
+
const cfg = opts.noHooks === false ? [] : ["-c", "core.hooksPath=/dev/null"];
|
|
4217
|
+
const ident = ["-c", "user.email=workspace@tangle.local", "-c", "user.name=workspace"];
|
|
4218
|
+
const run = async (args, cwd) => {
|
|
4219
|
+
const res = await shell(["git", ...cfg, ...ident, ...args], cwd);
|
|
4220
|
+
if (res.code !== 0) {
|
|
4221
|
+
throw new Error(
|
|
4222
|
+
`git ${args.join(" ")} failed (${res.code}): ${tail(res.stderr || res.stdout)}`
|
|
4223
|
+
);
|
|
4224
|
+
}
|
|
4225
|
+
return res.stdout;
|
|
4226
|
+
};
|
|
4227
|
+
return {
|
|
4228
|
+
ref: opts.ref,
|
|
4229
|
+
materialize: (dir) => run(["clone", "--branch", branch, opts.ref, dir]).then(() => {
|
|
4230
|
+
}),
|
|
4231
|
+
async commit(dir, message) {
|
|
4232
|
+
await run(["add", "-A"], dir);
|
|
4233
|
+
const status = await run(["status", "--porcelain"], dir);
|
|
4234
|
+
if (!status.trim()) return { ok: true, rev: (await run(["rev-parse", "HEAD"], dir)).trim() };
|
|
4235
|
+
await run(["commit", "-m", message], dir);
|
|
4236
|
+
const pull = await shell(["git", ...cfg, ...ident, "pull", "--rebase", "origin", branch], dir);
|
|
4237
|
+
if (pull.code !== 0) {
|
|
4238
|
+
await shell(["git", ...cfg, "rebase", "--abort"], dir).catch(() => {
|
|
4239
|
+
});
|
|
4240
|
+
return { ok: false, conflict: tail(pull.stderr || pull.stdout) };
|
|
4241
|
+
}
|
|
4242
|
+
const push = await shell(["git", ...cfg, ...ident, "push", "origin", branch], dir);
|
|
4243
|
+
if (push.code !== 0) return { ok: false, conflict: tail(push.stderr || push.stdout) };
|
|
4244
|
+
return { ok: true, rev: (await run(["rev-parse", "HEAD"], dir)).trim() };
|
|
4245
|
+
},
|
|
4246
|
+
async head() {
|
|
4247
|
+
const out = await run(["ls-remote", opts.ref, `refs/heads/${branch}`]);
|
|
4248
|
+
return out.split(/\s+/)[0] ?? "";
|
|
4249
|
+
}
|
|
4250
|
+
};
|
|
4251
|
+
}
|
|
4252
|
+
function tail(s) {
|
|
4253
|
+
return s.slice(-400);
|
|
4254
|
+
}
|
|
4255
|
+
|
|
3541
4256
|
export {
|
|
3542
4257
|
contentAddress,
|
|
3543
4258
|
InMemoryResultBlobStore,
|
|
@@ -3550,8 +4265,12 @@ export {
|
|
|
3550
4265
|
stopSentinel,
|
|
3551
4266
|
sentinelCompletion,
|
|
3552
4267
|
deterministicCompletion,
|
|
3553
|
-
|
|
4268
|
+
assertTraceDerivedFindings,
|
|
4269
|
+
createScopeAnalyst,
|
|
4270
|
+
buildSteerContext,
|
|
4271
|
+
createDriver,
|
|
3554
4272
|
renderAnalyses,
|
|
4273
|
+
inlineSandboxClient,
|
|
3555
4274
|
reportLoopUsage,
|
|
3556
4275
|
defineRuntimeHooks,
|
|
3557
4276
|
composeRuntimeHooks,
|
|
@@ -3565,9 +4284,8 @@ export {
|
|
|
3565
4284
|
createSandboxForSpec,
|
|
3566
4285
|
defaultSelectWinner,
|
|
3567
4286
|
loopDispatch,
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
buildSteerContext,
|
|
4287
|
+
observe,
|
|
4288
|
+
renderReport,
|
|
3571
4289
|
createScope,
|
|
3572
4290
|
settledToIteration,
|
|
3573
4291
|
pipeline,
|
|
@@ -3580,9 +4298,7 @@ export {
|
|
|
3580
4298
|
InMemoryCorpus,
|
|
3581
4299
|
FileCorpus,
|
|
3582
4300
|
renderCorpusToInstructions,
|
|
3583
|
-
|
|
3584
|
-
sandboxExecutor,
|
|
3585
|
-
cliExecutor,
|
|
4301
|
+
createExecutor,
|
|
3586
4302
|
createExecutorRegistry,
|
|
3587
4303
|
spendFromUsageEvents,
|
|
3588
4304
|
createBudgetPool,
|
|
@@ -3594,6 +4310,9 @@ export {
|
|
|
3594
4310
|
definePersona,
|
|
3595
4311
|
runPersonified,
|
|
3596
4312
|
trajectoryReport,
|
|
3597
|
-
equalKOnCost
|
|
4313
|
+
equalKOnCost,
|
|
4314
|
+
openSandboxRun,
|
|
4315
|
+
localShell,
|
|
4316
|
+
gitWorkspace
|
|
3598
4317
|
};
|
|
3599
|
-
//# sourceMappingURL=chunk-
|
|
4318
|
+
//# sourceMappingURL=chunk-72JQCHOZ.js.map
|