@fusionkit/handoff 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +21 -0
- package/dist/agents.js +33 -0
- package/dist/checkpoint-manager.d.ts +14 -0
- package/dist/checkpoint-manager.js +33 -0
- package/dist/defaults.d.ts +18 -0
- package/dist/defaults.js +18 -0
- package/dist/handoff.d.ts +303 -0
- package/dist/handoff.js +593 -0
- package/dist/index.d.ts +29 -0
- package/dist/index.js +20 -0
- package/dist/isolation.d.ts +12 -0
- package/dist/isolation.js +10 -0
- package/dist/policy.d.ts +69 -0
- package/dist/policy.js +79 -0
- package/dist/review.d.ts +57 -0
- package/dist/review.js +110 -0
- package/dist/run-executor.d.ts +76 -0
- package/dist/run-executor.js +71 -0
- package/dist/run.d.ts +88 -0
- package/dist/run.js +159 -0
- package/dist/targets.d.ts +14 -0
- package/dist/targets.js +20 -0
- package/dist/test/plan.test.d.ts +1 -0
- package/dist/test/plan.test.js +93 -0
- package/dist/test/tools.test.d.ts +1 -0
- package/dist/test/tools.test.js +106 -0
- package/dist/test/triggers.test.d.ts +1 -0
- package/dist/test/triggers.test.js +114 -0
- package/dist/tool-journal.d.ts +13 -0
- package/dist/tool-journal.js +37 -0
- package/dist/tools.d.ts +22 -0
- package/dist/tools.js +99 -0
- package/dist/trace-log.d.ts +6 -0
- package/dist/trace-log.js +9 -0
- package/dist/triggers.d.ts +43 -0
- package/dist/triggers.js +64 -0
- package/package.json +32 -0
package/dist/run.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { isTerminalStatus } from "@fusionkit/protocol";
|
|
2
|
+
import { pullRun } from "@fusionkit/workspace";
|
|
3
|
+
import { DEFAULT_POLL_INTERVAL_MS, DEFAULT_WAIT_TIMEOUT_MS } from "./defaults.js";
|
|
4
|
+
/**
|
|
5
|
+
* A continuation that became a governed run. Wraps the plane API with the
|
|
6
|
+
* operations a continuation caller needs: wait, approve, receipt, pull.
|
|
7
|
+
*/
|
|
8
|
+
export class HandoffRun {
|
|
9
|
+
runId;
|
|
10
|
+
target;
|
|
11
|
+
envelope;
|
|
12
|
+
envelopeHash;
|
|
13
|
+
/** Human-readable planner explanation for why this continuation ran. */
|
|
14
|
+
explanation;
|
|
15
|
+
/** Isolation strategy applied at pull time. */
|
|
16
|
+
isolate;
|
|
17
|
+
client;
|
|
18
|
+
actor;
|
|
19
|
+
workspaceDir;
|
|
20
|
+
onTerminal;
|
|
21
|
+
onPulled;
|
|
22
|
+
constructor(input) {
|
|
23
|
+
this.runId = input.runId;
|
|
24
|
+
this.target = input.target;
|
|
25
|
+
this.envelope = input.envelope;
|
|
26
|
+
this.envelopeHash = input.envelopeHash;
|
|
27
|
+
this.explanation = input.explanation ?? "";
|
|
28
|
+
if (input.isolate)
|
|
29
|
+
this.isolate = input.isolate;
|
|
30
|
+
this.client = input.client;
|
|
31
|
+
this.actor = input.actor;
|
|
32
|
+
this.workspaceDir = input.workspaceDir;
|
|
33
|
+
this.onTerminal = input.onTerminal;
|
|
34
|
+
this.onPulled = input.onPulled;
|
|
35
|
+
}
|
|
36
|
+
/** The checkpoint tier this continuation carried. */
|
|
37
|
+
get tier() {
|
|
38
|
+
return this.envelope.checkpoint.tier;
|
|
39
|
+
}
|
|
40
|
+
/** Deep link to this run in the control panel. */
|
|
41
|
+
get url() {
|
|
42
|
+
return this.client.runUiUrl(this.runId);
|
|
43
|
+
}
|
|
44
|
+
/** Where the signed evidence lives: bundle download via the CLI. */
|
|
45
|
+
get auditUrl() {
|
|
46
|
+
return this.client.runBundleUrl(this.runId);
|
|
47
|
+
}
|
|
48
|
+
async status() {
|
|
49
|
+
const view = await this.client.getRun(this.runId);
|
|
50
|
+
return view.status;
|
|
51
|
+
}
|
|
52
|
+
async events() {
|
|
53
|
+
const view = await this.client.getRun(this.runId);
|
|
54
|
+
return view.events;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Poll until the run is terminal or blocked on consent. Consent is a
|
|
58
|
+
* human decision; the SDK surfaces it instead of spinning forever.
|
|
59
|
+
*/
|
|
60
|
+
async wait(options = {}) {
|
|
61
|
+
const timeoutMs = options.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
|
|
62
|
+
const pollMs = options.pollMs ?? DEFAULT_POLL_INTERVAL_MS;
|
|
63
|
+
const deadline = Date.now() + timeoutMs;
|
|
64
|
+
// Polling against a consistent per-iteration snapshot: terminal states
|
|
65
|
+
// are absorbing, so a status flip between polls is observed on the next
|
|
66
|
+
// iteration rather than lost. The interval and ceiling are shared with
|
|
67
|
+
// Handoff.stream via ./defaults.js and caller-tunable per wait().
|
|
68
|
+
for (;;) {
|
|
69
|
+
const view = await this.client.getRun(this.runId);
|
|
70
|
+
if (isTerminalStatus(view.status)) {
|
|
71
|
+
this.onTerminal(this.runId, view.status);
|
|
72
|
+
return { status: view.status, consentRequirements: [] };
|
|
73
|
+
}
|
|
74
|
+
if (view.status === "awaiting_approval") {
|
|
75
|
+
return {
|
|
76
|
+
status: view.status,
|
|
77
|
+
consentRequirements: view.consentRequirements
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
if (Date.now() >= deadline) {
|
|
81
|
+
throw new Error(`run ${this.runId} did not finish within ${timeoutMs}ms`);
|
|
82
|
+
}
|
|
83
|
+
await new Promise((resolve) => setTimeout(resolve, pollMs));
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* The session's combined stdout/stderr, fetched by the content hash
|
|
88
|
+
* recorded in the event chain. Empty when the session produced no output.
|
|
89
|
+
*/
|
|
90
|
+
async sessionLog() {
|
|
91
|
+
const events = await this.events();
|
|
92
|
+
// Reverse scan returns the newest log artifact, which by the harness
|
|
93
|
+
// convention supersedes earlier ones — exactly the right pick when a
|
|
94
|
+
// session emitted more than one.
|
|
95
|
+
for (let i = events.length - 1; i >= 0; i--) {
|
|
96
|
+
const entry = events[i];
|
|
97
|
+
if (!entry)
|
|
98
|
+
continue;
|
|
99
|
+
const event = entry.event;
|
|
100
|
+
if (event.type === "artifact.created" && event.kind === "log") {
|
|
101
|
+
const blob = await this.client.getBlob(event.hash);
|
|
102
|
+
return blob.toString("utf8");
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return "";
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Exit code of the session's final harness command (the run's overall
|
|
109
|
+
* outcome by convention). Sessions that execute multiple commands surface
|
|
110
|
+
* each one as its own command.executed entry in events() for callers that
|
|
111
|
+
* need per-command results.
|
|
112
|
+
*/
|
|
113
|
+
async commandExitCode() {
|
|
114
|
+
const events = await this.events();
|
|
115
|
+
for (let i = events.length - 1; i >= 0; i--) {
|
|
116
|
+
const entry = events[i];
|
|
117
|
+
if (!entry)
|
|
118
|
+
continue;
|
|
119
|
+
if (entry.event.type === "command.executed") {
|
|
120
|
+
return entry.event.exitCode;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return undefined;
|
|
124
|
+
}
|
|
125
|
+
/** Grant required consent as the given actor (defaults to the context actor). */
|
|
126
|
+
async approve(actor) {
|
|
127
|
+
const result = await this.client.approve(this.runId, actor ?? this.actor);
|
|
128
|
+
return result.status;
|
|
129
|
+
}
|
|
130
|
+
/** Cancel the run if it has not been claimed by a runner yet. */
|
|
131
|
+
async cancel(actor) {
|
|
132
|
+
const result = await this.client.cancel(this.runId, actor ?? this.actor);
|
|
133
|
+
return result.status;
|
|
134
|
+
}
|
|
135
|
+
/** The signed, offline-verifiable receipt bundle. */
|
|
136
|
+
receipt() {
|
|
137
|
+
return this.client.getBundle(this.runId);
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Divergence-safe pull of the run's output into the local workspace:
|
|
141
|
+
* applied in place when the workspace is clean at the contract base ref,
|
|
142
|
+
* otherwise materialized on a dedicated branch. A `branch()` isolation
|
|
143
|
+
* strategy (set here or at continueIn/parallel time) always lands on a
|
|
144
|
+
* branch and never touches the working tree.
|
|
145
|
+
*/
|
|
146
|
+
async pull(options = {}) {
|
|
147
|
+
const bundle = await this.receipt();
|
|
148
|
+
const diffHash = bundle.receipt.workspaceOut.diffHash;
|
|
149
|
+
if (!diffHash) {
|
|
150
|
+
this.onPulled(this.runId, "empty");
|
|
151
|
+
return { mode: "empty" };
|
|
152
|
+
}
|
|
153
|
+
const isolate = options.isolate ?? this.isolate;
|
|
154
|
+
const diff = await this.client.getBlob(diffHash);
|
|
155
|
+
const result = pullRun(options.repoDir ?? this.workspaceDir, this.runId, bundle.contract.workspace.baseRef, diff, { forceBranch: isolate?.id === "branch" });
|
|
156
|
+
this.onPulled(this.runId, result.mode);
|
|
157
|
+
return result;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed runtime-target descriptors. No magic strings in the hot path:
|
|
3
|
+
* `targets.pool("eng-prod")` instead of `"eng-prod"`.
|
|
4
|
+
*/
|
|
5
|
+
export type RuntimeTarget = {
|
|
6
|
+
kind: "runtime-target";
|
|
7
|
+
id: string;
|
|
8
|
+
locality: "customer-runner";
|
|
9
|
+
pool: string;
|
|
10
|
+
};
|
|
11
|
+
export declare const targets: {
|
|
12
|
+
/** A named runner pool: outbound-only runners enrolled with the plane. */
|
|
13
|
+
pool(name: string): RuntimeTarget;
|
|
14
|
+
};
|
package/dist/targets.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed runtime-target descriptors. No magic strings in the hot path:
|
|
3
|
+
* `targets.pool("eng-prod")` instead of `"eng-prod"`.
|
|
4
|
+
*/
|
|
5
|
+
export const targets = {
|
|
6
|
+
/** A named runner pool: outbound-only runners enrolled with the plane. */
|
|
7
|
+
pool(name) {
|
|
8
|
+
if (!name)
|
|
9
|
+
throw new Error("a pool name is required");
|
|
10
|
+
return {
|
|
11
|
+
kind: "runtime-target",
|
|
12
|
+
id: `pool:${name}`,
|
|
13
|
+
// The only locality this runtime offers: every pool is served by
|
|
14
|
+
// customer-enrolled runners. The field exists on RuntimeTarget so new
|
|
15
|
+
// localities (managed pools) are an additive change for callers.
|
|
16
|
+
locality: "customer-runner",
|
|
17
|
+
pool: name
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { agents } from "../agents.js";
|
|
4
|
+
import { localFirst, planContinuation } from "../policy.js";
|
|
5
|
+
import { targets } from "../targets.js";
|
|
6
|
+
test("typed descriptors carry no magic strings", () => {
|
|
7
|
+
const target = targets.pool("eng-prod");
|
|
8
|
+
assert.deepEqual(target, {
|
|
9
|
+
kind: "runtime-target",
|
|
10
|
+
id: "pool:eng-prod",
|
|
11
|
+
locality: "customer-runner",
|
|
12
|
+
pool: "eng-prod"
|
|
13
|
+
});
|
|
14
|
+
assert.throws(() => targets.pool(""));
|
|
15
|
+
assert.deepEqual(agents.mock(), { kind: "mock" });
|
|
16
|
+
assert.deepEqual(agents.claudeCode({ version: ">=2.1" }), {
|
|
17
|
+
kind: "claude-code",
|
|
18
|
+
version: ">=2.1"
|
|
19
|
+
});
|
|
20
|
+
assert.deepEqual(agents.codex(), { kind: "codex" });
|
|
21
|
+
assert.deepEqual(agents.pi(), { kind: "pi" });
|
|
22
|
+
});
|
|
23
|
+
test("planner allows within policy and explains why", () => {
|
|
24
|
+
const decision = planContinuation(localFirst({ allowPools: ["eng-prod"] }), {
|
|
25
|
+
target: targets.pool("eng-prod"),
|
|
26
|
+
secrets: ["NPM_TOKEN"],
|
|
27
|
+
budget: {},
|
|
28
|
+
parallelism: 1
|
|
29
|
+
});
|
|
30
|
+
assert.equal(decision.decision, "continue");
|
|
31
|
+
assert.equal(decision.tier, "workspace");
|
|
32
|
+
assert.ok(decision.reasons.some((r) => r.includes("eng-prod")));
|
|
33
|
+
assert.ok(decision.reasons.some((r) => r.includes("NPM_TOKEN")));
|
|
34
|
+
});
|
|
35
|
+
test("planner fails closed on pool, budget, and parallelism violations", () => {
|
|
36
|
+
const policy = localFirst({
|
|
37
|
+
allowPools: ["eng-prod"],
|
|
38
|
+
denyPools: ["prod-db"],
|
|
39
|
+
maxSpendUsd: 10,
|
|
40
|
+
maxDurationMin: 30,
|
|
41
|
+
maxParallelRuns: 2
|
|
42
|
+
});
|
|
43
|
+
const denied = planContinuation(policy, {
|
|
44
|
+
target: targets.pool("untrusted"),
|
|
45
|
+
secrets: [],
|
|
46
|
+
budget: {},
|
|
47
|
+
parallelism: 1
|
|
48
|
+
});
|
|
49
|
+
assert.equal(denied.decision, "deny");
|
|
50
|
+
assert.ok(denied.reasons.some((r) => r.includes("not in the continuation allowlist")));
|
|
51
|
+
const hardDeny = planContinuation(policy, {
|
|
52
|
+
target: targets.pool("prod-db"),
|
|
53
|
+
secrets: [],
|
|
54
|
+
budget: {},
|
|
55
|
+
parallelism: 1
|
|
56
|
+
});
|
|
57
|
+
assert.equal(hardDeny.decision, "deny");
|
|
58
|
+
assert.ok(hardDeny.reasons.some((r) => r.includes("denied by continuation policy")));
|
|
59
|
+
const overBudget = planContinuation(policy, {
|
|
60
|
+
target: targets.pool("eng-prod"),
|
|
61
|
+
secrets: [],
|
|
62
|
+
budget: { maxSpendUsd: 100 },
|
|
63
|
+
parallelism: 1
|
|
64
|
+
});
|
|
65
|
+
assert.equal(overBudget.decision, "deny");
|
|
66
|
+
const overDuration = planContinuation(policy, {
|
|
67
|
+
target: targets.pool("eng-prod"),
|
|
68
|
+
secrets: [],
|
|
69
|
+
budget: { maxDurationMin: 120 },
|
|
70
|
+
parallelism: 1
|
|
71
|
+
});
|
|
72
|
+
assert.equal(overDuration.decision, "deny");
|
|
73
|
+
const tooParallel = planContinuation(policy, {
|
|
74
|
+
target: targets.pool("eng-prod"),
|
|
75
|
+
secrets: [],
|
|
76
|
+
budget: {},
|
|
77
|
+
parallelism: 3
|
|
78
|
+
});
|
|
79
|
+
assert.equal(tooParallel.decision, "deny");
|
|
80
|
+
assert.ok(tooParallel.reasons.some((r) => r.includes("parallel")));
|
|
81
|
+
});
|
|
82
|
+
test("default policy allows any pool with bounded fan-out", () => {
|
|
83
|
+
const policy = localFirst();
|
|
84
|
+
assert.equal(policy.maxParallelRuns, 4);
|
|
85
|
+
assert.equal(policy.disclosure, "minimal-context");
|
|
86
|
+
const decision = planContinuation(policy, {
|
|
87
|
+
target: targets.pool("anything"),
|
|
88
|
+
secrets: [],
|
|
89
|
+
budget: {},
|
|
90
|
+
parallelism: 4
|
|
91
|
+
});
|
|
92
|
+
assert.equal(decision.decision, "continue");
|
|
93
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { MODEL_FUSION_SCHEMA_BUNDLE_HASH } from "@fusionkit/protocol";
|
|
4
|
+
import { handoff } from "../handoff.js";
|
|
5
|
+
import { HandoffToolJournal } from "../tool-journal.js";
|
|
6
|
+
import { targets } from "../targets.js";
|
|
7
|
+
import { localFirst } from "../policy.js";
|
|
8
|
+
function context(policy = localFirst()) {
|
|
9
|
+
// No network traffic occurs until a checkpoint or continuation moves data,
|
|
10
|
+
// so a dummy plane address is fine for pure tool/needs behavior.
|
|
11
|
+
return handoff({
|
|
12
|
+
workspace: ".",
|
|
13
|
+
plane: { url: "http://127.0.0.1:9", adminToken: "unused" },
|
|
14
|
+
policy
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
test("h.tools wraps execute, preserves results, and journals calls", async () => {
|
|
18
|
+
const h = context();
|
|
19
|
+
const seen = [];
|
|
20
|
+
const toolset = {
|
|
21
|
+
add: {
|
|
22
|
+
description: "adds two numbers",
|
|
23
|
+
execute: async (input) => {
|
|
24
|
+
seen.push(input);
|
|
25
|
+
return { sum: input.a + input.b };
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
schemaOnly: { description: "no execute; provider-executed" }
|
|
29
|
+
};
|
|
30
|
+
const wrapped = h.tools(toolset);
|
|
31
|
+
assert.equal(wrapped.schemaOnly, toolset.schemaOnly, "tools without execute pass through");
|
|
32
|
+
assert.equal(wrapped.add.description, "adds two numbers");
|
|
33
|
+
const result = await wrapped.add.execute({ a: 2, b: 3 });
|
|
34
|
+
assert.deepEqual(result, { sum: 5 });
|
|
35
|
+
assert.deepEqual(seen, [{ a: 2, b: 3 }]);
|
|
36
|
+
const events = h.trace().filter((e) => e.type === "tool.called");
|
|
37
|
+
assert.equal(events.length, 1);
|
|
38
|
+
const event = events[0];
|
|
39
|
+
assert.ok(event && event.type === "tool.called");
|
|
40
|
+
assert.equal(event.toolName, "add");
|
|
41
|
+
assert.equal(event.ok, true);
|
|
42
|
+
assert.match(event.inputHash, /^[0-9a-f]{64}$/);
|
|
43
|
+
assert.match(event.outputHash ?? "", /^[0-9a-f]{64}$/);
|
|
44
|
+
});
|
|
45
|
+
test("h.tools journals failures and rethrows them", async () => {
|
|
46
|
+
const h = context();
|
|
47
|
+
const wrapped = h.tools({
|
|
48
|
+
boom: {
|
|
49
|
+
execute: async () => {
|
|
50
|
+
throw new Error("tool exploded");
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
await assert.rejects(() => Promise.resolve(wrapped.boom.execute()), /tool exploded/);
|
|
55
|
+
const events = h.trace().filter((e) => e.type === "tool.called");
|
|
56
|
+
assert.equal(events.length, 1);
|
|
57
|
+
const event = events[0];
|
|
58
|
+
assert.ok(event && event.type === "tool.called");
|
|
59
|
+
assert.equal(event.ok, false);
|
|
60
|
+
assert.equal(event.outputHash, undefined);
|
|
61
|
+
});
|
|
62
|
+
test("h.needs is a pure policy check that records nothing", () => {
|
|
63
|
+
const h = context(localFirst({ allowPools: ["eng-prod"] }));
|
|
64
|
+
assert.equal(h.needs(targets.pool("eng-prod")), true);
|
|
65
|
+
assert.equal(h.needs(targets.pool("untrusted")), false);
|
|
66
|
+
assert.equal(h.trace().length, 0, "needs() must not pollute the trace");
|
|
67
|
+
});
|
|
68
|
+
test("h.summary recomputes counts from the trace", async () => {
|
|
69
|
+
const h = context();
|
|
70
|
+
const wrapped = h.tools({
|
|
71
|
+
noop: { execute: async () => "ok" }
|
|
72
|
+
});
|
|
73
|
+
await wrapped.noop.execute();
|
|
74
|
+
await wrapped.noop.execute();
|
|
75
|
+
h.plan(targets.pool("anywhere"));
|
|
76
|
+
const summary = await h.summary();
|
|
77
|
+
assert.equal(summary.toolCalls, 2);
|
|
78
|
+
assert.equal(summary.continuations.planned, 1);
|
|
79
|
+
assert.equal(summary.continuations.denied, 0);
|
|
80
|
+
assert.equal(summary.checkpoints, 0);
|
|
81
|
+
assert.deepEqual(summary.runs, []);
|
|
82
|
+
});
|
|
83
|
+
test("tool journal can append ToolExecutor results without replacing existing wrapper", () => {
|
|
84
|
+
const journal = new HandoffToolJournal();
|
|
85
|
+
const result = {
|
|
86
|
+
record: {
|
|
87
|
+
schema: "tool-execution-record.v1",
|
|
88
|
+
schema_version: "v1",
|
|
89
|
+
schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
|
|
90
|
+
producer: "test",
|
|
91
|
+
producer_version: "0.1.0",
|
|
92
|
+
producer_git_sha: "0".repeat(40),
|
|
93
|
+
created_at: "2026-06-16T00:00:00.000Z",
|
|
94
|
+
execution_id: "exec_read",
|
|
95
|
+
plan_id: "plan_read",
|
|
96
|
+
status: "succeeded",
|
|
97
|
+
output_hash: "sha256:" + "a".repeat(64)
|
|
98
|
+
},
|
|
99
|
+
output: { ok: true },
|
|
100
|
+
deduped: false,
|
|
101
|
+
decision: { decision: "allow", reason: "test" }
|
|
102
|
+
};
|
|
103
|
+
journal.appendExecutionResult(result);
|
|
104
|
+
assert.equal(journal.length, 1);
|
|
105
|
+
assert.ok(journal.snapshot()?.hash.match(/^[0-9a-f]{64}$/));
|
|
106
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { defineHandoffConfig, handoff } from "../handoff.js";
|
|
4
|
+
import { localFirst } from "../policy.js";
|
|
5
|
+
import { targets } from "../targets.js";
|
|
6
|
+
import { evaluateTriggers, triggers } from "../triggers.js";
|
|
7
|
+
function context(policy = localFirst()) {
|
|
8
|
+
return handoff({
|
|
9
|
+
workspace: ".",
|
|
10
|
+
plane: { url: "http://127.0.0.1:9", adminToken: "unused" },
|
|
11
|
+
policy
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
test("evaluateTriggers fires deterministically against observable state", () => {
|
|
15
|
+
const list = [
|
|
16
|
+
triggers.userRequested(),
|
|
17
|
+
triggers.toolFailed(),
|
|
18
|
+
triggers.slowTools({ thresholdMs: 1000 }),
|
|
19
|
+
triggers.modelEscalated()
|
|
20
|
+
];
|
|
21
|
+
const idle = evaluateTriggers(list, {
|
|
22
|
+
userRequested: false,
|
|
23
|
+
toolFailures: 0,
|
|
24
|
+
totalToolDurationMs: 0,
|
|
25
|
+
modelEscalations: 0
|
|
26
|
+
});
|
|
27
|
+
assert.deepEqual(idle, []);
|
|
28
|
+
const busy = evaluateTriggers(list, {
|
|
29
|
+
userRequested: true,
|
|
30
|
+
toolFailures: 2,
|
|
31
|
+
totalToolDurationMs: 5000,
|
|
32
|
+
modelEscalations: 1
|
|
33
|
+
});
|
|
34
|
+
assert.deepEqual(busy.map((f) => f.trigger.id).sort(), ["model-escalated", "slow-tools", "tool-failed", "user-requested"]);
|
|
35
|
+
for (const fired of busy) {
|
|
36
|
+
assert.ok(fired.reason.length > 0, "every fired trigger explains itself");
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
test("needs() honors continueWhen: allowed pool but no fired trigger means no", async () => {
|
|
40
|
+
const h = context(localFirst({
|
|
41
|
+
allowPools: ["eng-prod"],
|
|
42
|
+
continueWhen: [triggers.toolFailed(), triggers.userRequested()]
|
|
43
|
+
}));
|
|
44
|
+
const target = targets.pool("eng-prod");
|
|
45
|
+
assert.equal(h.needs(target), false, "no trigger has fired yet");
|
|
46
|
+
assert.deepEqual(h.firedTriggers(), []);
|
|
47
|
+
// A journaled tool failure flips the answer.
|
|
48
|
+
const tools = h.tools({
|
|
49
|
+
flaky: {
|
|
50
|
+
execute: async () => {
|
|
51
|
+
throw new Error("network blip");
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
await assert.rejects(() => Promise.resolve(tools.flaky.execute()));
|
|
56
|
+
assert.equal(h.needs(target), true);
|
|
57
|
+
assert.equal(h.firedTriggers()[0]?.trigger.id, "tool-failed");
|
|
58
|
+
// Policy still fails closed on a disallowed pool, triggers or not.
|
|
59
|
+
assert.equal(h.needs(targets.pool("untrusted")), false);
|
|
60
|
+
});
|
|
61
|
+
test("requestContinuation is the explicit user gesture", () => {
|
|
62
|
+
const h = context(localFirst({ continueWhen: [triggers.userRequested()] }));
|
|
63
|
+
assert.equal(h.needs(targets.pool("anywhere")), false);
|
|
64
|
+
h.requestContinuation("user closed the laptop lid");
|
|
65
|
+
assert.equal(h.needs(targets.pool("anywhere")), true);
|
|
66
|
+
const requested = h
|
|
67
|
+
.trace()
|
|
68
|
+
.find((event) => event.type === "continuation.requested");
|
|
69
|
+
assert.ok(requested && requested.type === "continuation.requested");
|
|
70
|
+
assert.equal(requested.reason, "user closed the laptop lid");
|
|
71
|
+
});
|
|
72
|
+
test("model escalation decisions feed triggers and the trace", async () => {
|
|
73
|
+
const h = context(localFirst({ continueWhen: [triggers.modelEscalated()] }));
|
|
74
|
+
assert.equal(h.needs(targets.pool("anywhere")), false);
|
|
75
|
+
h.noteModelDecision({
|
|
76
|
+
model: "local-small",
|
|
77
|
+
route: "local",
|
|
78
|
+
escalated: false,
|
|
79
|
+
reason: "local-first policy"
|
|
80
|
+
});
|
|
81
|
+
assert.equal(h.needs(targets.pool("anywhere")), false, "local routes do not fire");
|
|
82
|
+
h.noteModelDecision({
|
|
83
|
+
model: "cloud-frontier",
|
|
84
|
+
route: "cloud",
|
|
85
|
+
escalated: true,
|
|
86
|
+
reason: "local model failed (context-overflow)"
|
|
87
|
+
});
|
|
88
|
+
assert.equal(h.needs(targets.pool("anywhere")), true);
|
|
89
|
+
const summary = await h.summary();
|
|
90
|
+
assert.deepEqual(summary.modelRoutes, { local: 1, cloud: 1, escalations: 1 });
|
|
91
|
+
});
|
|
92
|
+
test("defineHandoffConfig supplies defaults; explicit config wins", () => {
|
|
93
|
+
defineHandoffConfig({
|
|
94
|
+
plane: { url: "http://127.0.0.1:9", adminToken: "from-defaults" },
|
|
95
|
+
policy: localFirst({ allowPools: ["from-defaults"] })
|
|
96
|
+
});
|
|
97
|
+
try {
|
|
98
|
+
const fromDefaults = handoff({ workspace: "." });
|
|
99
|
+
assert.equal(fromDefaults.needs(targets.pool("from-defaults")), true);
|
|
100
|
+
assert.equal(fromDefaults.needs(targets.pool("other")), false);
|
|
101
|
+
const explicit = handoff({
|
|
102
|
+
workspace: ".",
|
|
103
|
+
policy: localFirst({ allowPools: ["explicit"] })
|
|
104
|
+
});
|
|
105
|
+
assert.equal(explicit.needs(targets.pool("explicit")), true);
|
|
106
|
+
assert.equal(explicit.needs(targets.pool("from-defaults")), false);
|
|
107
|
+
}
|
|
108
|
+
finally {
|
|
109
|
+
defineHandoffConfig({});
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
test("handoff without a plane anywhere fails loudly", () => {
|
|
113
|
+
assert.throws(() => handoff({ workspace: "." }), /requires a plane/);
|
|
114
|
+
});
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ToolCallRecord, ToolExecutionResult } from "@fusionkit/protocol";
|
|
2
|
+
export declare class HandoffToolJournal {
|
|
3
|
+
private readonly entries;
|
|
4
|
+
get length(): number;
|
|
5
|
+
append(record: ToolCallRecord): void;
|
|
6
|
+
appendExecutionResult(result: ToolExecutionResult): void;
|
|
7
|
+
failureCount(): number;
|
|
8
|
+
totalDurationMs(): number;
|
|
9
|
+
snapshot(): {
|
|
10
|
+
blob: Buffer;
|
|
11
|
+
hash: string;
|
|
12
|
+
} | undefined;
|
|
13
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { canonicalize, PROTOCOL_VERSIONS, sha256Hex } from "@fusionkit/protocol";
|
|
2
|
+
export class HandoffToolJournal {
|
|
3
|
+
entries = [];
|
|
4
|
+
get length() {
|
|
5
|
+
return this.entries.length;
|
|
6
|
+
}
|
|
7
|
+
append(record) {
|
|
8
|
+
this.entries.push(record);
|
|
9
|
+
}
|
|
10
|
+
appendExecutionResult(result) {
|
|
11
|
+
this.entries.push({
|
|
12
|
+
seq: this.entries.length + 1,
|
|
13
|
+
ts: result.record.created_at,
|
|
14
|
+
toolName: result.record.execution_id,
|
|
15
|
+
input: { plan_id: result.record.plan_id },
|
|
16
|
+
...(result.output !== undefined ? { output: result.output } : {}),
|
|
17
|
+
...(result.record.error?.message ? { error: result.record.error.message } : {}),
|
|
18
|
+
durationMs: 0
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
failureCount() {
|
|
22
|
+
return this.entries.filter((entry) => entry.error !== undefined).length;
|
|
23
|
+
}
|
|
24
|
+
totalDurationMs() {
|
|
25
|
+
return this.entries.reduce((total, entry) => total + entry.durationMs, 0);
|
|
26
|
+
}
|
|
27
|
+
snapshot() {
|
|
28
|
+
if (this.entries.length === 0)
|
|
29
|
+
return undefined;
|
|
30
|
+
const journal = {
|
|
31
|
+
version: PROTOCOL_VERSIONS.toolJournal,
|
|
32
|
+
entries: [...this.entries]
|
|
33
|
+
};
|
|
34
|
+
const blob = Buffer.from(canonicalize(journal), "utf8");
|
|
35
|
+
return { blob, hash: sha256Hex(blob) };
|
|
36
|
+
}
|
|
37
|
+
}
|
package/dist/tools.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { ToolCallRecord } from "@fusionkit/protocol";
|
|
2
|
+
/**
|
|
3
|
+
* Structural stand-in for an AI SDK tool (or any tool object). The wrapper
|
|
4
|
+
* only cares whether an `execute` function is present at runtime, so the
|
|
5
|
+
* constraint is deliberately loose: concrete tool types (including the `ai`
|
|
6
|
+
* package's `Tool`) remain assignable without this package depending on it.
|
|
7
|
+
*/
|
|
8
|
+
export type ToolLike = object;
|
|
9
|
+
export type ToolCallObservation = {
|
|
10
|
+
record: ToolCallRecord;
|
|
11
|
+
inputHash: string;
|
|
12
|
+
outputHash?: string;
|
|
13
|
+
ok: boolean;
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Wrap a toolset so every invocation is journaled: raw input/output go to
|
|
17
|
+
* the journal (carried as content-addressed semantic state at the next
|
|
18
|
+
* checkpoint), and the observer receives hashes for the local trace.
|
|
19
|
+
* Everything else about each tool — description, schema, identity — is
|
|
20
|
+
* preserved, so the wrapped set drops into generateText unchanged.
|
|
21
|
+
*/
|
|
22
|
+
export declare function wrapTools<T extends Record<string, ToolLike>>(toolset: T, nextSeq: () => number, observe: (observation: ToolCallObservation) => void): T;
|