agentledger-runtime 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -0
- package/examples/README.md +22 -0
- package/examples/quickstart/quickstart.js +7 -0
- package/examples/travel_assistant/travel_assistant.js +396 -0
- package/package.json +21 -0
- package/src/cli.js +733 -0
- package/src/index.d.ts +235 -0
- package/src/index.js +1683 -0
- package/test/runtime.test.js +272 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import { mkdtemp, readFile } from 'node:fs/promises';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import test from 'node:test';
|
|
6
|
+
import { JSONStore, LocalBlobStore, LocalWorker, RetryableAgentError, Runtime, WorkerService, exportEvidence, replay, costAttribution, failureAttribution } from '../src/index.js';
|
|
7
|
+
|
|
8
|
+
test('runtime creates durable run, evidence, and replay summary', async () => {
|
|
9
|
+
const dir = await mkdtemp(join(tmpdir(), 'agentledger-ts-'));
|
|
10
|
+
const path = join(dir, 'state.json');
|
|
11
|
+
const rt = await Runtime.local(path);
|
|
12
|
+
rt.registerTool({ name: 'docs.echo', func: async (args) => ({ echo: args.text }) });
|
|
13
|
+
const { runId } = await rt.createRun({ input: 'hello' });
|
|
14
|
+
const ok = await rt.runOnce({
|
|
15
|
+
runId,
|
|
16
|
+
workerId: 'worker-a',
|
|
17
|
+
agentRole: 'Researcher',
|
|
18
|
+
agent: async (ctx, state) => {
|
|
19
|
+
const result = await ctx.callTool('docs.echo', { text: state.input });
|
|
20
|
+
await ctx.writeState('tool_result', result);
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
assert.equal(ok, true);
|
|
24
|
+
await readFile(path, 'utf8');
|
|
25
|
+
const reopened = await JSONStore.open(path);
|
|
26
|
+
assert.deepEqual(reopened.finalState(runId).tool_result, { echo: 'hello' });
|
|
27
|
+
const bundle = exportEvidence(reopened, runId);
|
|
28
|
+
assert.equal(bundle.schema_version, 'agentledger.evidence.v1');
|
|
29
|
+
assert.ok(bundle.bundle_hash);
|
|
30
|
+
const summary = replay(reopened, runId);
|
|
31
|
+
assert.equal(summary.replay_safe, true);
|
|
32
|
+
assert.equal(summary.event_count, bundle.events.length);
|
|
33
|
+
assert.equal(summary.tool_call_count, 2);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('local blob store roundtrips JSON-compatible values', async () => {
|
|
37
|
+
const dir = await mkdtemp(join(tmpdir(), 'agentledger-ts-blobs-'));
|
|
38
|
+
const blobs = await LocalBlobStore.open(dir);
|
|
39
|
+
const value = { hello: 'world', nested: { n: 1 } };
|
|
40
|
+
const first = await blobs.putJSON(value);
|
|
41
|
+
const second = await blobs.putJSON(value);
|
|
42
|
+
assert.ok(first.digest.startsWith('sha256:'));
|
|
43
|
+
assert.ok(first.ref.startsWith('blob://sha256/'));
|
|
44
|
+
assert.deepEqual(first, second);
|
|
45
|
+
assert.deepEqual(await blobs.getJSON(first.ref), value);
|
|
46
|
+
await assert.rejects(() => blobs.getJSON('unsupported://blob'), /unsupported blob ref/);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('tool schema validation rejects invalid input before execution', async () => {
|
|
50
|
+
const rt = new Runtime(JSONStore.memory());
|
|
51
|
+
let calls = 0;
|
|
52
|
+
rt.registerTool({
|
|
53
|
+
name: 'docs.echo',
|
|
54
|
+
inputSchema: { type: 'object', required: ['text'], additionalProperties: false, properties: { text: { type: 'string', minLength: 1 } } },
|
|
55
|
+
outputSchema: { type: 'object', required: ['echo'], additionalProperties: false, properties: { echo: { type: 'string' } } },
|
|
56
|
+
func: async (args) => { calls += 1; return { echo: args.text }; },
|
|
57
|
+
});
|
|
58
|
+
const { runId } = await rt.createRun({});
|
|
59
|
+
await assert.rejects(() => rt.runOnce({ runId, agentRole: 'SchemaAgent', agent: (ctx) => ctx.callTool('docs.echo', {}) }), /required/);
|
|
60
|
+
assert.equal(calls, 0);
|
|
61
|
+
assert.equal(rt.store.events(runId).some((event) => event.type === 'tool_call_failed' && event.payload.phase === 'input_validation'), true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('tool ledger idempotency reuses side effect response across retry', async () => {
|
|
65
|
+
const rt = new Runtime(JSONStore.memory());
|
|
66
|
+
let calls = 0;
|
|
67
|
+
rt.registerTool({
|
|
68
|
+
name: 'github.create_pr',
|
|
69
|
+
sideEffect: 'external',
|
|
70
|
+
idempotencyRequired: true,
|
|
71
|
+
func: async (args) => {
|
|
72
|
+
calls += 1;
|
|
73
|
+
return { external_id: 'pr-123', title: args.title };
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
const { runId } = await rt.createRun({ title: 'runtime parity' });
|
|
77
|
+
const agent = async (ctx, state) => {
|
|
78
|
+
const result = await ctx.callTool('github.create_pr', { title: state.title });
|
|
79
|
+
if (ctx.attempt === 1) throw new RetryableAgentError('crash after side effect');
|
|
80
|
+
await ctx.writeState('pr', result);
|
|
81
|
+
};
|
|
82
|
+
assert.equal(await rt.runOnce({ runId, agent }), false);
|
|
83
|
+
assert.equal(await rt.runOnce({ runId, workerId: 'worker-b', agent }), true);
|
|
84
|
+
assert.equal(calls, 1);
|
|
85
|
+
assert.equal(rt.store.ledger(runId).length, 1);
|
|
86
|
+
assert.equal(rt.store.ledger(runId)[0].status, 'SUCCEEDED');
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('policy denies unapproved high-risk tool before execution', async () => {
|
|
90
|
+
const rt = new Runtime(JSONStore.memory());
|
|
91
|
+
let calls = 0;
|
|
92
|
+
rt.registerTool({ name: 'repo.write', riskLevel: 'high', func: async () => { calls += 1; return { ok: true }; } });
|
|
93
|
+
const { runId } = await rt.createRun({});
|
|
94
|
+
await assert.rejects(() => rt.runOnce({ runId, agentRole: 'Reviewer', agent: (ctx) => ctx.callTool('repo.write', { path: 'README.md' }) }), /high-risk tool denied/);
|
|
95
|
+
assert.equal(calls, 0);
|
|
96
|
+
assert.equal(rt.store.events(runId).some((event) => event.type === 'tool_permission_decided' && event.payload.allowed === false), true);
|
|
97
|
+
assert.equal(rt.store.steps(runId)[0].status, 'failed');
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test('approval pauses and resumes step', async () => {
|
|
101
|
+
const rt = new Runtime(JSONStore.memory());
|
|
102
|
+
let calls = 0;
|
|
103
|
+
rt.registerTool({
|
|
104
|
+
name: 'github.create_pr',
|
|
105
|
+
riskLevel: 'high',
|
|
106
|
+
approvalRequired: true,
|
|
107
|
+
sideEffect: 'external',
|
|
108
|
+
idempotencyRequired: true,
|
|
109
|
+
func: async () => { calls += 1; return { external_id: 'pr-42' }; },
|
|
110
|
+
});
|
|
111
|
+
const { runId } = await rt.createRun({});
|
|
112
|
+
const agent = async (ctx) => {
|
|
113
|
+
const result = await ctx.callTool('github.create_pr', { title: 'safe' });
|
|
114
|
+
await ctx.writeState('pr', result);
|
|
115
|
+
};
|
|
116
|
+
assert.equal(await rt.runOnce({ runId, workerId: 'worker-a', agentRole: 'Coder', agent }), false);
|
|
117
|
+
assert.equal(calls, 0);
|
|
118
|
+
const approvals = rt.store.approvalRequests(runId);
|
|
119
|
+
assert.equal(approvals.length, 1);
|
|
120
|
+
assert.equal(approvals[0].status, 'PENDING');
|
|
121
|
+
assert.equal(rt.store.steps(runId)[0].status, 'waiting_human');
|
|
122
|
+
await rt.store.approveRequest(approvals[0].approval_id, { approver: 'alice', reason: 'reviewed' });
|
|
123
|
+
assert.equal(await rt.runOnce({ runId, workerId: 'worker-b', agentRole: 'Coder', agent }), true);
|
|
124
|
+
assert.equal(calls, 1);
|
|
125
|
+
|
|
126
|
+
const { runId: deniedRun } = await rt.createRun({});
|
|
127
|
+
assert.equal(await rt.runOnce({ runId: deniedRun, workerId: 'worker-c', agentRole: 'Coder', agent }), false);
|
|
128
|
+
await rt.store.denyRequest(rt.store.approvalRequests(deniedRun)[0].approval_id, { approver: 'bob', reason: 'not allowed' });
|
|
129
|
+
assert.equal(rt.store.steps(deniedRun)[0].status, 'failed');
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test('sandbox-required tool fails closed without executor', async () => {
|
|
133
|
+
const rt = new Runtime(JSONStore.memory());
|
|
134
|
+
let calls = 0;
|
|
135
|
+
rt.registerTool({ name: 'shell.exec', sandboxRequired: true, func: async () => { calls += 1; return { ok: true }; } });
|
|
136
|
+
const { runId } = await rt.createRun({});
|
|
137
|
+
await assert.rejects(() => rt.runOnce({ runId, agentRole: 'Executor', agent: (ctx) => ctx.callTool('shell.exec', { argv: ['echo', 'hi'] }) }), /sandbox executor/);
|
|
138
|
+
assert.equal(calls, 0);
|
|
139
|
+
assert.equal(rt.store.events(runId).some((event) => event.type === 'sandbox_started'), true);
|
|
140
|
+
assert.equal(rt.store.events(runId).some((event) => event.type === 'tool_call_failed'), true);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
test('cost budget and failure attribution are recorded', async () => {
|
|
144
|
+
const rt = new Runtime(JSONStore.memory());
|
|
145
|
+
rt.setBudget({ maxToolCalls: 1 });
|
|
146
|
+
let calls = 0;
|
|
147
|
+
rt.registerTool({ name: 'docs.echo', func: async (args) => { calls += 1; return { echo: args.text }; } });
|
|
148
|
+
const { runId } = await rt.createRun({});
|
|
149
|
+
await assert.rejects(() => rt.runOnce({
|
|
150
|
+
runId,
|
|
151
|
+
agentRole: 'Researcher',
|
|
152
|
+
agent: async (ctx) => {
|
|
153
|
+
await ctx.recordModelCall({ model: 'gpt-test', inputTokens: 10, outputTokens: 5, totalUsd: 0.01 });
|
|
154
|
+
await ctx.callTool('docs.echo', { text: 'first' });
|
|
155
|
+
await ctx.callTool('docs.echo', { text: 'second' });
|
|
156
|
+
},
|
|
157
|
+
}), /tool call budget exceeded/);
|
|
158
|
+
assert.equal(calls, 1);
|
|
159
|
+
const summary = rt.store.costSummary(runId);
|
|
160
|
+
assert.equal(summary.tool_calls, 1);
|
|
161
|
+
assert.equal(summary.model_tokens, 15);
|
|
162
|
+
assert.equal(summary.total_usd, 0.01);
|
|
163
|
+
const cost = costAttribution(rt.store, runId);
|
|
164
|
+
assert.equal(cost.by_agent.Researcher.tool_calls, 1);
|
|
165
|
+
assert.equal(cost.by_agent.Researcher.model_tokens, 15);
|
|
166
|
+
const failure = failureAttribution(rt.store, runId);
|
|
167
|
+
assert.equal(failure.summary.failed_step_count, 1);
|
|
168
|
+
assert.equal(failure.failure_events.some((event) => event.type === 'budget_check_failed'), true);
|
|
169
|
+
assert.equal(failure.failure_events.some((event) => event.type === 'failure_classified'), true);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test('media and stream artifacts are indexed in evidence and replay', async () => {
|
|
173
|
+
const rt = new Runtime(JSONStore.memory());
|
|
174
|
+
const { runId } = await rt.createRun({});
|
|
175
|
+
const ok = await rt.runOnce({
|
|
176
|
+
runId,
|
|
177
|
+
workerId: 'worker-media',
|
|
178
|
+
agentRole: 'MediaAgent',
|
|
179
|
+
agent: async (ctx) => {
|
|
180
|
+
const frame = await ctx.createMediaArtifact('frame-0001', 'frame', {
|
|
181
|
+
uri: 's3://media/demo/frame-0001.jpg',
|
|
182
|
+
mediaMetadata: { mime_type: 'image/jpeg', frame_index: 1 },
|
|
183
|
+
lineage: { source_blob_refs: ['s3://media/demo/input.mp4'], tool_call_ids: ['video.extract_frames'] },
|
|
184
|
+
});
|
|
185
|
+
const checkpoint = await ctx.createStreamCheckpoint('camera-checkpoint', {
|
|
186
|
+
streamId: 'camera-1',
|
|
187
|
+
consumerId: 'vision-agent',
|
|
188
|
+
offset: 7,
|
|
189
|
+
watermark: 1.5,
|
|
190
|
+
chunk: { streamId: 'camera-1', chunkId: 'chunk-7', offset: 7, contentRef: 'blob://sha256/chunk-7.json', sequence: 7 },
|
|
191
|
+
backpressure: { recommended_pause_ms: 100 },
|
|
192
|
+
});
|
|
193
|
+
await ctx.writeState('artifacts', { frame, checkpoint });
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
assert.equal(ok, true);
|
|
197
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
198
|
+
assert.equal(bundle.summary.artifact_count, 2);
|
|
199
|
+
assert.equal(bundle.summary.media_artifact_count, 1);
|
|
200
|
+
assert.equal(bundle.summary.stream_checkpoint_count, 1);
|
|
201
|
+
assert.equal(bundle.media_artifacts[0].kind, 'frame');
|
|
202
|
+
assert.equal(bundle.stream_checkpoints[0].stream_id, 'camera-1');
|
|
203
|
+
const summary = replay(rt.store, runId);
|
|
204
|
+
assert.equal(summary.artifact_count, 2);
|
|
205
|
+
assert.equal(summary.media_artifact_count, 1);
|
|
206
|
+
assert.equal(summary.stream_checkpoint_count, 1);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test('lease recovery fences previous owner', async () => {
|
|
210
|
+
const store = JSONStore.memory();
|
|
211
|
+
const { runId, stepId } = await store.createRun({});
|
|
212
|
+
const claim = await store.claimStep({ workerId: 'stale-worker', runId, leaseSeconds: 0 });
|
|
213
|
+
assert.equal(await store.recoverExpiredLeases(), 1);
|
|
214
|
+
await assert.rejects(() => store.commitStatePatch({ runId, stepId, leaseToken: claim.lease_token, baseVersion: 0, patch: { late: true } }), /invalid or stale lease token/);
|
|
215
|
+
const next = await store.claimStep({ workerId: 'new-worker', runId, leaseSeconds: 60 });
|
|
216
|
+
assert.equal(next.step_id, stepId);
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test('cancellation fences worker', async () => {
|
|
220
|
+
const store = JSONStore.memory();
|
|
221
|
+
const { runId, stepId } = await store.createRun({});
|
|
222
|
+
const claim = await store.claimStep({ workerId: 'worker', runId, leaseSeconds: 60 });
|
|
223
|
+
assert.equal(await store.cancelRun(runId, 'operator requested'), 1);
|
|
224
|
+
await assert.rejects(() => store.commitStatePatch({ runId, stepId, leaseToken: claim.lease_token, baseVersion: 0, patch: { late: true } }), /invalid or stale lease token/);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test('contract fixture is readable and includes TypeScript target', async () => {
|
|
228
|
+
const contract = JSON.parse(await readFile(new URL('../../contracts/agentledger.runtime.v1.json', import.meta.url), 'utf8'));
|
|
229
|
+
assert.equal(contract.contract_version, '1.0');
|
|
230
|
+
assert.ok(contract.language_targets.some((target) => target.language === 'typescript'));
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
test('shared runtime baseline fixture covers preview scenarios', async () => {
|
|
234
|
+
const fixture = JSON.parse(await readFile(new URL('../../contracts/conformance/runtime_baseline.v1.json', import.meta.url), 'utf8'));
|
|
235
|
+
assert.equal(fixture.schema_version, 'agentledger.conformance.runtime_baseline.v1');
|
|
236
|
+
assert.equal(fixture.contract_version, '1.0');
|
|
237
|
+
const names = new Set(fixture.required_scenarios.map((scenario) => scenario.name));
|
|
238
|
+
for (const name of ['durable_run_evidence_replay', 'tool_ledger_idempotent_retry', 'lease_recovery_fences_stale_worker', 'cancellation_fences_worker']) assert.equal(names.has(name), true, `missing shared fixture scenario ${name}`);
|
|
239
|
+
for (const scenario of fixture.required_scenarios) assert.ok(scenario.required_assertions.length > 0, `scenario ${scenario.name} should define assertions`);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test('shared parity fixtures cover implemented scenarios', async () => {
|
|
243
|
+
const fixtures = {
|
|
244
|
+
'policy_approval_sandbox.v1.json': ['agentledger.conformance.policy_approval_sandbox.v1', 'policy_denies_unapproved_high_risk_tool', 'approval_pauses_and_resumes_step', 'sandbox_required_tool_fails_closed'],
|
|
245
|
+
'cost_failure_attribution.v1.json': ['agentledger.conformance.cost_failure_attribution.v1', 'tool_and_model_cost_attributed_to_run_step_role', 'budget_exhaustion_blocks_execution', 'failure_attribution_classifies_agent_tool_model_runtime'],
|
|
246
|
+
'local_persistence.v1.json': ['agentledger.conformance.local_persistence.v1', 'local_store_round_trips_completed_run', 'local_store_preserves_evidence_replay_chain', 'local_store_uses_atomic_snapshot_write'],
|
|
247
|
+
'local_blob_store.v1.json': ['agentledger.conformance.local_blob_store.v1', 'blob_roundtrip_json_value', 'blob_content_address_is_stable', 'blob_bad_ref_is_rejected'],
|
|
248
|
+
'tool_schema_validation.v1.json': ['agentledger.conformance.tool_schema_validation.v1', 'invalid_tool_input_rejected_before_execution', 'valid_tool_input_and_output_pass', 'invalid_tool_output_rejected'],
|
|
249
|
+
'worker_service.v1.json': ['agentledger.conformance.worker_service.v1', 'local_worker_runs_until_terminal', 'worker_service_stops_after_idle_poll', 'worker_loop_recovers_expired_leases'],
|
|
250
|
+
'media_stream_artifacts.v1.json': ['agentledger.conformance.media_stream_artifacts.v1', 'media_artifact_ref_is_indexed_in_evidence', 'stream_checkpoint_ref_is_indexed_in_evidence'],
|
|
251
|
+
'evidence_consumers.v1.json': ['agentledger.conformance.evidence_consumers.v1', 'trace_spans_from_evidence', 'evidence_diff_detects_state_and_event_changes', 'divergence_report_lists_changed_dimensions', 'static_debug_summary_is_exportable'],
|
|
252
|
+
'static_debug_html.v1.json': ['agentledger.conformance.static_debug_html.v1', 'static_debug_html_contains_run_events_and_state'],
|
|
253
|
+
'ops_readiness.v1.json': ['agentledger.conformance.ops_readiness.v1', 'retention_plan_is_non_destructive_and_counts_evidence', 'backup_readiness_reports_required_checks'],
|
|
254
|
+
'storage_schema.v1.json': ['agentledger.conformance.storage_schema.v1', 'latest_schema_version_and_ddl_are_available'],
|
|
255
|
+
'mcp_adapters.v1.json': ['agentledger.conformance.mcp_adapters.v1', 'in_memory_mcp_tool_server_lists_and_calls_tools', 'mcp_tool_descriptor_maps_to_tool_spec', 'in_memory_mcp_context_server_reads_resources'],
|
|
256
|
+
'framework_adapters.v1.json': ['agentledger.conformance.framework_adapters.v1', 'function_adapter_maps_run_spec_and_invokes_agent', 'method_framework_adapter_uses_first_available_method_and_writes_output'],
|
|
257
|
+
'otlp_trace_export.v1.json': ['agentledger.conformance.otlp_trace_export.v1', 'otlp_json_contains_resource_scope_and_spans'],
|
|
258
|
+
'simple_api.v1.json': ['agentledger.conformance.simple_api.v1', 'simple_run_returns_output_and_state'],
|
|
259
|
+
'boundary_lint.v1.json': ['agentledger.conformance.boundary_lint.v1', 'direct_shell_and_http_calls_are_reported', 'ignored_lines_are_not_reported'],
|
|
260
|
+
'scheduler.v1.json': ['agentledger.conformance.scheduler.v1', 'scheduler_status_reports_run_steps_and_cost', 'scheduler_recover_and_cancel_delegate_to_store'],
|
|
261
|
+
'adversarial_review.v1.json': ['agentledger.conformance.adversarial_review.v1', 'clean_evidence_passes_blocker_review', 'pending_high_risk_approval_blocks_review', 'max_total_usd_limit_blocks_review'],
|
|
262
|
+
'evidence_regression.v1.json': ['agentledger.conformance.evidence_regression.v1', 'evidence_health_checks_pass_for_clean_bundle', 'regression_detects_final_state_and_event_type_changes', 'regression_cost_delta_limit_blocks'],
|
|
263
|
+
'failure_injection.v1.json': ['agentledger.conformance.failure_injection.v1', 'retry_exhaustion_marks_run_failed', 'lease_fencing_rejects_stale_commit', 'cancellation_fencing_rejects_late_commit', 'side_effect_idempotency_executes_once_across_retry'],
|
|
264
|
+
'shadow.v1.json': ['agentledger.conformance.shadow.v1', 'shadow_state_diff_reports_changed_keys', 'shadow_report_carries_source_shadow_and_ok'],
|
|
265
|
+
'repro.v1.json': ['agentledger.conformance.repro.v1', 'builtin_golden_names_are_available', 'minimal_success_golden_is_valid_evidence', 'golden_regression_detects_changed_final_state'],
|
|
266
|
+
'time_travel.v1.json': ['agentledger.conformance.time_travel.v1', 'timeline_reconstructs_state_at_selected_seq', 'timeline_marks_state_changed_frames', 'time_travel_report_exports_static_html'],
|
|
267
|
+
};
|
|
268
|
+
for (const [file, required] of Object.entries(fixtures)) {
|
|
269
|
+
const body = await readFile(new URL(`../../contracts/conformance/${file}`, import.meta.url), 'utf8');
|
|
270
|
+
for (const token of required) assert.ok(body.includes(token), `fixture ${file} missing ${token}`);
|
|
271
|
+
}
|
|
272
|
+
});
|