agentledger-runtime 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -0
- package/examples/README.md +22 -0
- package/examples/quickstart/quickstart.js +7 -0
- package/examples/travel_assistant/travel_assistant.js +396 -0
- package/package.json +21 -0
- package/src/cli.js +733 -0
- package/src/index.d.ts +235 -0
- package/src/index.js +1683 -0
- package/test/runtime.test.js +272 -0
package/src/cli.js
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { mkdtemp, rm } from 'node:fs/promises';
|
|
4
|
+
import { dirname, join } from 'node:path';
|
|
5
|
+
import { tmpdir } from 'node:os';
|
|
6
|
+
import process from 'node:process';
|
|
7
|
+
import { FunctionAdapter, InMemoryMCPContextServer, InMemoryMCPToolServer, JSONStore, LocalBlobStore, LocalWorker, MCPContextAdapter, MCPToolAdapter, MethodFrameworkAdapter, RetryableAgentError, Runtime, RuntimeScheduler, WorkerService, checkBackupReadiness, costAttribution, debugHTML, ddlFor, debugSummary, diffEvidence, divergenceReport, exportEvidence, failureAttribution, latestSchemaVersion, migrationsFor, otlpTraceJSON, planRetention, replay, simpleRun, traceJSONL, traceSpans, scanBoundarySource, adversarialReview, evaluateEvidence, evaluateEvidenceRegression, runFailureInjectionSuite, diffStates, shadowReport, builtinGoldenNames, builtinGoldenEvidence, goldenRegression, timeTravel, timeTravelHTML, optionalAdapterCapabilities, PostgresAdapter, S3BlobStoreAdapter, OTLPTransport, DockerSandboxAdapter } from './index.js';
|
|
8
|
+
|
|
9
|
+
const FIXTURE_CHECKS = {
|
|
10
|
+
'runtime_baseline.v1.json': [
|
|
11
|
+
'agentledger.conformance.runtime_baseline.v1',
|
|
12
|
+
'durable_run_evidence_replay',
|
|
13
|
+
'tool_ledger_idempotent_retry',
|
|
14
|
+
'lease_recovery_fences_stale_worker',
|
|
15
|
+
'cancellation_fences_worker',
|
|
16
|
+
],
|
|
17
|
+
'local_persistence.v1.json': [
|
|
18
|
+
'agentledger.conformance.local_persistence.v1',
|
|
19
|
+
'local_store_round_trips_completed_run',
|
|
20
|
+
'local_store_preserves_evidence_replay_chain',
|
|
21
|
+
'local_store_uses_atomic_snapshot_write',
|
|
22
|
+
],
|
|
23
|
+
'local_blob_store.v1.json': [
|
|
24
|
+
'agentledger.conformance.local_blob_store.v1',
|
|
25
|
+
'blob_roundtrip_json_value',
|
|
26
|
+
'blob_content_address_is_stable',
|
|
27
|
+
'blob_bad_ref_is_rejected',
|
|
28
|
+
],
|
|
29
|
+
'tool_schema_validation.v1.json': [
|
|
30
|
+
'agentledger.conformance.tool_schema_validation.v1',
|
|
31
|
+
'invalid_tool_input_rejected_before_execution',
|
|
32
|
+
'valid_tool_input_and_output_pass',
|
|
33
|
+
'invalid_tool_output_rejected',
|
|
34
|
+
],
|
|
35
|
+
'worker_service.v1.json': [
|
|
36
|
+
'agentledger.conformance.worker_service.v1',
|
|
37
|
+
'local_worker_runs_until_terminal',
|
|
38
|
+
'worker_service_stops_after_idle_poll',
|
|
39
|
+
'worker_loop_recovers_expired_leases',
|
|
40
|
+
],
|
|
41
|
+
'policy_approval_sandbox.v1.json': [
|
|
42
|
+
'agentledger.conformance.policy_approval_sandbox.v1',
|
|
43
|
+
'policy_denies_unapproved_high_risk_tool',
|
|
44
|
+
'approval_pauses_and_resumes_step',
|
|
45
|
+
'sandbox_required_tool_fails_closed',
|
|
46
|
+
],
|
|
47
|
+
'cost_failure_attribution.v1.json': [
|
|
48
|
+
'agentledger.conformance.cost_failure_attribution.v1',
|
|
49
|
+
'tool_and_model_cost_attributed_to_run_step_role',
|
|
50
|
+
'budget_exhaustion_blocks_execution',
|
|
51
|
+
'failure_attribution_classifies_agent_tool_model_runtime',
|
|
52
|
+
],
|
|
53
|
+
'media_stream_artifacts.v1.json': [
|
|
54
|
+
'agentledger.conformance.media_stream_artifacts.v1',
|
|
55
|
+
'media_artifact_ref_is_indexed_in_evidence',
|
|
56
|
+
'stream_checkpoint_ref_is_indexed_in_evidence',
|
|
57
|
+
],
|
|
58
|
+
'evidence_consumers.v1.json': [
|
|
59
|
+
'agentledger.conformance.evidence_consumers.v1',
|
|
60
|
+
'trace_spans_from_evidence',
|
|
61
|
+
'evidence_diff_detects_state_and_event_changes',
|
|
62
|
+
'divergence_report_lists_changed_dimensions',
|
|
63
|
+
'static_debug_summary_is_exportable',
|
|
64
|
+
],
|
|
65
|
+
'static_debug_html.v1.json': [
|
|
66
|
+
'agentledger.conformance.static_debug_html.v1',
|
|
67
|
+
'static_debug_html_contains_run_events_and_state',
|
|
68
|
+
],
|
|
69
|
+
'ops_readiness.v1.json': [
|
|
70
|
+
'agentledger.conformance.ops_readiness.v1',
|
|
71
|
+
'retention_plan_is_non_destructive_and_counts_evidence',
|
|
72
|
+
'backup_readiness_reports_required_checks',
|
|
73
|
+
],
|
|
74
|
+
'storage_schema.v1.json': [
|
|
75
|
+
'agentledger.conformance.storage_schema.v1',
|
|
76
|
+
'latest_schema_version_and_ddl_are_available',
|
|
77
|
+
],
|
|
78
|
+
'mcp_adapters.v1.json': [
|
|
79
|
+
'agentledger.conformance.mcp_adapters.v1',
|
|
80
|
+
'in_memory_mcp_tool_server_lists_and_calls_tools',
|
|
81
|
+
'mcp_tool_descriptor_maps_to_tool_spec',
|
|
82
|
+
'in_memory_mcp_context_server_reads_resources',
|
|
83
|
+
],
|
|
84
|
+
'framework_adapters.v1.json': [
|
|
85
|
+
'agentledger.conformance.framework_adapters.v1',
|
|
86
|
+
'function_adapter_maps_run_spec_and_invokes_agent',
|
|
87
|
+
'method_framework_adapter_uses_first_available_method_and_writes_output',
|
|
88
|
+
],
|
|
89
|
+
'otlp_trace_export.v1.json': [
|
|
90
|
+
'agentledger.conformance.otlp_trace_export.v1',
|
|
91
|
+
'otlp_json_contains_resource_scope_and_spans',
|
|
92
|
+
],
|
|
93
|
+
'simple_api.v1.json': [
|
|
94
|
+
'agentledger.conformance.simple_api.v1',
|
|
95
|
+
'simple_run_returns_output_and_state',
|
|
96
|
+
],
|
|
97
|
+
'boundary_lint.v1.json': [
|
|
98
|
+
'agentledger.conformance.boundary_lint.v1',
|
|
99
|
+
'direct_shell_and_http_calls_are_reported',
|
|
100
|
+
'ignored_lines_are_not_reported',
|
|
101
|
+
],
|
|
102
|
+
'scheduler.v1.json': [
|
|
103
|
+
'agentledger.conformance.scheduler.v1',
|
|
104
|
+
'scheduler_status_reports_run_steps_and_cost',
|
|
105
|
+
'scheduler_recover_and_cancel_delegate_to_store',
|
|
106
|
+
],
|
|
107
|
+
'adversarial_review.v1.json': [
|
|
108
|
+
'agentledger.conformance.adversarial_review.v1',
|
|
109
|
+
'clean_evidence_passes_blocker_review',
|
|
110
|
+
'pending_high_risk_approval_blocks_review',
|
|
111
|
+
'max_total_usd_limit_blocks_review',
|
|
112
|
+
],
|
|
113
|
+
'evidence_regression.v1.json': [
|
|
114
|
+
'agentledger.conformance.evidence_regression.v1',
|
|
115
|
+
'evidence_health_checks_pass_for_clean_bundle',
|
|
116
|
+
'regression_detects_final_state_and_event_type_changes',
|
|
117
|
+
'regression_cost_delta_limit_blocks',
|
|
118
|
+
],
|
|
119
|
+
'failure_injection.v1.json': [
|
|
120
|
+
'agentledger.conformance.failure_injection.v1',
|
|
121
|
+
'retry_exhaustion_marks_run_failed',
|
|
122
|
+
'lease_fencing_rejects_stale_commit',
|
|
123
|
+
'cancellation_fencing_rejects_late_commit',
|
|
124
|
+
'side_effect_idempotency_executes_once_across_retry',
|
|
125
|
+
],
|
|
126
|
+
'shadow.v1.json': [
|
|
127
|
+
'agentledger.conformance.shadow.v1',
|
|
128
|
+
'shadow_state_diff_reports_changed_keys',
|
|
129
|
+
'shadow_report_carries_source_shadow_and_ok',
|
|
130
|
+
],
|
|
131
|
+
'repro.v1.json': [
|
|
132
|
+
'agentledger.conformance.repro.v1',
|
|
133
|
+
'builtin_golden_names_are_available',
|
|
134
|
+
'minimal_success_golden_is_valid_evidence',
|
|
135
|
+
'golden_regression_detects_changed_final_state',
|
|
136
|
+
],
|
|
137
|
+
'time_travel.v1.json': [
|
|
138
|
+
'agentledger.conformance.time_travel.v1',
|
|
139
|
+
'timeline_reconstructs_state_at_selected_seq',
|
|
140
|
+
'timeline_marks_state_changed_frames',
|
|
141
|
+
'time_travel_report_exports_static_html',
|
|
142
|
+
],
|
|
143
|
+
'optional_adapters.v1.json': [
|
|
144
|
+
'agentledger.conformance.optional_adapters.v1',
|
|
145
|
+
'optional_backend_capabilities_are_discoverable',
|
|
146
|
+
'postgres',
|
|
147
|
+
'langgraph',
|
|
148
|
+
'shadow-runner',
|
|
149
|
+
],
|
|
150
|
+
'official_adapters.v1.json': [
|
|
151
|
+
'agentledger.conformance.official_adapters.v1',
|
|
152
|
+
'postgres_adapter_plans_and_applies_migrations_with_injected_client',
|
|
153
|
+
's3_blob_adapter_round_trips_json_with_injected_client',
|
|
154
|
+
'otlp_transport_posts_json_with_injected_client',
|
|
155
|
+
'docker_sandbox_adapter_builds_manifest_without_daemon',
|
|
156
|
+
],
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
function findRepoRoot() {
|
|
160
|
+
if (process.env.AGENTLEDGER_REPO_ROOT) return process.env.AGENTLEDGER_REPO_ROOT;
|
|
161
|
+
let current = process.cwd();
|
|
162
|
+
while (true) {
|
|
163
|
+
if (existsSync(join(current, 'contracts', 'agentledger.runtime.v1.json'))) return current;
|
|
164
|
+
const parent = dirname(current);
|
|
165
|
+
if (parent === current) throw new Error('could not find AgentLedger repository root');
|
|
166
|
+
current = parent;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function contractPath() {
|
|
171
|
+
return join(findRepoRoot(), 'contracts', 'agentledger.runtime.v1.json');
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export function validateContract() {
|
|
175
|
+
const body = readFileSync(contractPath(), 'utf8');
|
|
176
|
+
for (const token of ['"contract_version": "1.0"', '"language": "typescript"', '"status": "preview"', 'media_stream_artifacts.v1.json']) {
|
|
177
|
+
if (!body.includes(token)) throw new Error(`contract missing ${token}`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function validateFixtures() {
|
|
182
|
+
validateContract();
|
|
183
|
+
const root = findRepoRoot();
|
|
184
|
+
const checks = [];
|
|
185
|
+
for (const [file, tokens] of Object.entries(FIXTURE_CHECKS)) {
|
|
186
|
+
const body = readFileSync(join(root, 'contracts', 'conformance', file), 'utf8');
|
|
187
|
+
for (const token of tokens) {
|
|
188
|
+
if (!body.includes(token)) throw new Error(`fixture ${file} missing ${token}`);
|
|
189
|
+
}
|
|
190
|
+
checks.push(file);
|
|
191
|
+
}
|
|
192
|
+
return checks;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function usage() {
|
|
196
|
+
return `AgentLedger TypeScript Runtime 1.0.2\n\nUsage:\n agentledger-ts doctor\n agentledger-ts version\n agentledger-ts quickstart\n agentledger-ts conformance\n agentledger-ts contract validate\n agentledger-ts contract export\n\nProject: https://github.com/yaogdu/AgentLedger`;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export async function runRuntimeSmoke() {
|
|
200
|
+
const rt = new Runtime(JSONStore.memory());
|
|
201
|
+
rt.registerTool({ name: 'docs.echo', version: 'v1', sideEffect: 'none', func: async (args) => ({ echo: args.text }) });
|
|
202
|
+
const { runId } = await rt.createRun({ input: 'hello' });
|
|
203
|
+
const ok = await rt.runOnce({
|
|
204
|
+
runId,
|
|
205
|
+
workerId: 'conformance-ts',
|
|
206
|
+
agentRole: 'ConformanceAgent',
|
|
207
|
+
agent: async (ctx, state) => {
|
|
208
|
+
const result = await ctx.callTool('docs.echo', { text: state.input });
|
|
209
|
+
await ctx.writeState('tool_result', result);
|
|
210
|
+
},
|
|
211
|
+
});
|
|
212
|
+
if (!ok) throw new Error('runtime smoke did not complete');
|
|
213
|
+
if (!rt.store.finalState(runId).tool_result) throw new Error('runtime smoke missing final state');
|
|
214
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
215
|
+
const summary = replay(rt.store, runId);
|
|
216
|
+
if (bundle.schema_version !== 'agentledger.evidence.v1' || !summary.replay_safe || summary.event_count !== bundle.events.length) throw new Error('runtime smoke evidence/replay mismatch');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
export async function runSemanticSmokes() {
|
|
221
|
+
await runRuntimeSmoke();
|
|
222
|
+
await runLocalPersistenceSmoke();
|
|
223
|
+
await runLocalBlobStoreSmoke();
|
|
224
|
+
await runToolSchemaValidationSmoke();
|
|
225
|
+
await runWorkerServiceSmoke();
|
|
226
|
+
await runToolLedgerSmoke();
|
|
227
|
+
await runPolicyApprovalSandboxSmoke();
|
|
228
|
+
await runCostFailureSmoke();
|
|
229
|
+
await runMediaStreamSmoke();
|
|
230
|
+
await runEvidenceConsumersSmoke();
|
|
231
|
+
await runStaticDebugHTMLSmoke();
|
|
232
|
+
await runOpsReadinessSmoke();
|
|
233
|
+
await runStorageSchemaSmoke();
|
|
234
|
+
await runMCPAdaptersSmoke();
|
|
235
|
+
await runFrameworkAdaptersSmoke();
|
|
236
|
+
await runOTLPTraceExportSmoke();
|
|
237
|
+
await runSimpleAPISmoke();
|
|
238
|
+
await runBoundaryLintSmoke();
|
|
239
|
+
await runSchedulerSmoke();
|
|
240
|
+
await runAdversarialReviewSmoke();
|
|
241
|
+
await runEvidenceRegressionSmoke();
|
|
242
|
+
if (!(await runFailureInjectionSuite()).passed) throw new Error('failure injection smoke failed');
|
|
243
|
+
runShadowSmoke();
|
|
244
|
+
await runReproGoldenSmoke();
|
|
245
|
+
await runTimeTravelTimelineSmoke();
|
|
246
|
+
runOptionalAdaptersSmoke();
|
|
247
|
+
await runOfficialAdaptersSmoke();
|
|
248
|
+
return ['runtime_smoke_evidence_replay', 'local_persistence_smoke', 'local_blob_store_smoke', 'tool_schema_validation_smoke', 'worker_service_smoke', 'tool_ledger_idempotent_retry', 'policy_approval_sandbox_smoke', 'cost_failure_attribution_smoke', 'media_stream_artifacts_smoke', 'evidence_consumers_smoke', 'static_debug_html_smoke', 'ops_readiness_smoke', 'storage_schema_smoke', 'mcp_adapters_smoke', 'framework_adapters_smoke', 'otlp_trace_export_smoke', 'simple_api_smoke', 'boundary_lint_smoke', 'scheduler_smoke', 'adversarial_review_smoke', 'evidence_regression_smoke', 'failure_injection_smoke', 'shadow_smoke', 'repro_golden_smoke', 'time_travel_timeline_smoke', 'optional_adapters_smoke', 'official_adapters_smoke'];
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
async function runStaticDebugHTMLSmoke() {
|
|
256
|
+
const rt = new Runtime(JSONStore.memory());
|
|
257
|
+
const { runId } = await rt.createRun({ input: 'debug' });
|
|
258
|
+
await rt.runOnce({ runId, workerId: 'worker-debug', agentRole: 'DebugAgent', agent: async (ctx) => ctx.writeState('answer', 'debug') });
|
|
259
|
+
const html = debugHTML(exportEvidence(rt.store, runId));
|
|
260
|
+
for (const token of ['<!doctype html>', 'AgentLedger Debug Report', 'Run', 'Events', 'Final State', 'run_created']) if (!html.includes(token)) throw new Error(`static debug html smoke missing ${token}`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
async function runOpsReadinessSmoke() {
|
|
265
|
+
const rt = new Runtime(JSONStore.memory());
|
|
266
|
+
const { runId } = await rt.createRun({ input: 'ops' });
|
|
267
|
+
await rt.runOnce({ runId, workerId: 'worker-ops', agentRole: 'OpsAgent', agent: async (ctx) => {
|
|
268
|
+
await ctx.createMediaArtifact('frame-ops', 'frame', { uri: 'file://frame.png' });
|
|
269
|
+
await ctx.writeState('answer', 'ops');
|
|
270
|
+
} });
|
|
271
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
272
|
+
const plan = planRetention(bundle);
|
|
273
|
+
if (plan.destructive || plan.event_count !== bundle.events.length || plan.media_artifact_count !== 1 || !plan.actions.includes('export evidence bundle before destructive retention') || !plan.actions.includes('snapshot final state and manifest')) throw new Error('ops retention plan smoke mismatch');
|
|
274
|
+
const report = checkBackupReadiness(bundle);
|
|
275
|
+
for (const name of ['run_metadata_exists', 'payload_refs_resolvable', 'evidence_exportable', 'media_stream_evidence_shape']) if (!report.checks.some((check) => check.name === name && check.passed)) throw new Error(`ops backup readiness missing ${name}`);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
async function runStorageSchemaSmoke() {
|
|
280
|
+
for (const dialect of ['sqlite', 'postgres']) {
|
|
281
|
+
if (latestSchemaVersion(dialect) !== '0001') throw new Error(`storage schema version mismatch for ${dialect}`);
|
|
282
|
+
const migrations = migrationsFor(dialect);
|
|
283
|
+
if (migrations.length !== 1 || migrations[0].name !== 'initial_runtime_metadata' || !migrations[0].checksum.startsWith('sha256:')) throw new Error(`storage schema migrations mismatch for ${dialect}`);
|
|
284
|
+
const ddl = ddlFor(dialect);
|
|
285
|
+
for (const token of ['schema_migrations', 'CREATE TABLE IF NOT EXISTS runs', 'CREATE TABLE IF NOT EXISTS events', 'CREATE TABLE IF NOT EXISTS tool_ledger']) if (!ddl.includes(token)) throw new Error(`storage schema ddl for ${dialect} missing ${token}`);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
async function runMCPAdaptersSmoke() {
|
|
291
|
+
const server = new InMemoryMCPToolServer();
|
|
292
|
+
server.addTool({ name: 'docs.echo', annotations: { side_effect: 'none' } }, (name, args) => ({ name, echo: args.text }));
|
|
293
|
+
server.addTool({ name: 'web.search' }, () => ({ ok: true }));
|
|
294
|
+
const tools = server.listTools();
|
|
295
|
+
if (tools.length !== 2 || tools[0].name !== 'docs.echo') throw new Error('mcp tool server list mismatch');
|
|
296
|
+
let missing = false;
|
|
297
|
+
try { server.callTool('missing', {}); } catch { missing = true; }
|
|
298
|
+
if (!missing) throw new Error('mcp missing tool should fail');
|
|
299
|
+
const spec = new MCPToolAdapter((name, args) => server.callTool(name, args)).toolSpecFromDescriptor({ name: 'github.create_pr', annotations: { side_effect: 'external', risk_level: 'high' } });
|
|
300
|
+
if (spec.name !== 'github.create_pr' || spec.sideEffect !== 'external' || spec.riskLevel !== 'high' || !spec.idempotencyRequired) throw new Error('mcp tool adapter spec mismatch');
|
|
301
|
+
const ctxServer = new InMemoryMCPContextServer();
|
|
302
|
+
ctxServer.addResource({ uri: 'docs://readme', name: 'README', reader: (uri) => ({ uri }) });
|
|
303
|
+
const resources = ctxServer.listResources();
|
|
304
|
+
if (resources.length !== 1 || resources[0].mimeType !== 'application/json') throw new Error('mcp context list mismatch');
|
|
305
|
+
const read = ctxServer.readResource('docs://readme');
|
|
306
|
+
if (!read.content) throw new Error('mcp context read mismatch');
|
|
307
|
+
const readSpec = new MCPContextAdapter((uri) => ctxServer.readResource(uri)).readToolSpec();
|
|
308
|
+
if (readSpec.name !== 'mcp.context.read' || readSpec.sideEffect !== 'none') throw new Error('mcp context adapter spec mismatch');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
async function runFrameworkAdaptersSmoke() {
|
|
313
|
+
const functionAdapter = new FunctionAdapter(async (_ctx, state) => ({ kind: 'function', input: state.input }));
|
|
314
|
+
if (functionAdapter.mapRunSpec().adapter !== 'function') throw new Error('function adapter run spec mismatch');
|
|
315
|
+
const rt = new Runtime(JSONStore.memory());
|
|
316
|
+
const { runId } = await rt.createRun({ input: 'adapter' });
|
|
317
|
+
await rt.runOnce({ runId, workerId: 'worker-adapter', agentRole: functionAdapter.role, agent: functionAdapter.asAgent() });
|
|
318
|
+
if (!rt.store.finalState(runId).output) throw new Error('function adapter missing output');
|
|
319
|
+
const methodAdapter = new MethodFrameworkAdapter({ invoke: (state) => ({ kind: 'method', input: state.input }) }, { methodCandidates: ['ainvoke', 'invoke'], outputKey: 'output' });
|
|
320
|
+
if (methodAdapter.mapRunSpec().adapter !== 'method-framework') throw new Error('method adapter run spec mismatch');
|
|
321
|
+
const rt2 = new Runtime(JSONStore.memory());
|
|
322
|
+
const { runId: runId2 } = await rt2.createRun({ input: 'method' });
|
|
323
|
+
await rt2.runOnce({ runId: runId2, workerId: 'worker-method', agentRole: methodAdapter.role, agent: methodAdapter.asAgent() });
|
|
324
|
+
if (!rt2.store.finalState(runId2).output) throw new Error('method adapter missing output');
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
async function runOTLPTraceExportSmoke() {
|
|
328
|
+
const rt = new Runtime(JSONStore.memory());
|
|
329
|
+
const { runId } = await rt.createRun({ input: 'otlp' });
|
|
330
|
+
await rt.runOnce({ runId, workerId: 'worker-otlp', agentRole: 'TraceAgent', agent: async (ctx) => ctx.writeState('answer', 'otlp') });
|
|
331
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
332
|
+
const otlp = otlpTraceJSON(bundle, { serviceName: 'agentledger-test', serviceVersion: '1.0.0' });
|
|
333
|
+
const body = JSON.stringify(otlp);
|
|
334
|
+
for (const token of ['resourceSpans', 'service.name', 'scopeSpans', 'traceId', 'spanId', 'agentledger.original_trace_id', 'agentledger.run_id']) if (!body.includes(token)) throw new Error(`otlp trace smoke missing ${token}`);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
async function runSimpleAPISmoke() {
|
|
338
|
+
const result = await simpleRun(async (_ctx, state) => ({ message: 'hello', input: state.input }), { initialState: { input: 'world' } });
|
|
339
|
+
if (!result.ok || !result.output || !result.state.output || !result.session_id) throw new Error('simple api smoke result mismatch');
|
|
340
|
+
if (!result.runtime.store.events(result.run_id).some((event) => event.type === 'agent_result_returned')) throw new Error('simple api smoke missing result event');
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
async function runEvidenceConsumersSmoke() {
|
|
344
|
+
const rt = new Runtime(JSONStore.memory());
|
|
345
|
+
rt.registerTool({ name: 'docs.echo', version: 'v1', sideEffect: 'none', func: async (args) => ({ echo: args.text }) });
|
|
346
|
+
const { runId } = await rt.createRun({ input: 'left' });
|
|
347
|
+
await rt.runOnce({ runId, agentRole: 'EvidenceAgent', agent: async (ctx, state) => {
|
|
348
|
+
await ctx.callTool('docs.echo', { text: state.input });
|
|
349
|
+
await ctx.createMediaArtifact('frame-0001', 'frame', { uri: 'file://frame.png', lineage: { source: 'camera' } });
|
|
350
|
+
await ctx.createStreamCheckpoint('audio-checkpoint', { streamId: 'audio-stream', consumerId: 'asr', offset: 1, watermark: '00:00:01', chunk: { chunkId: 'c1', streamId: 'audio-stream', offset: 1 } });
|
|
351
|
+
await ctx.writeState('answer', 'left');
|
|
352
|
+
} });
|
|
353
|
+
const left = exportEvidence(rt.store, runId);
|
|
354
|
+
const right = JSON.parse(JSON.stringify(left));
|
|
355
|
+
right.run = { ...right.run, run_id: `${left.run.run_id}-shadow` };
|
|
356
|
+
right.bundle_hash = 'different';
|
|
357
|
+
right.final_state = { ...right.final_state, answer: 'right' };
|
|
358
|
+
right.events = [...right.events, { ...right.events[right.events.length - 1], seq: right.events.length + 1, type: 'shadow_event' }];
|
|
359
|
+
right.media_artifacts = [];
|
|
360
|
+
right.stream_checkpoints = [];
|
|
361
|
+
const spans = traceSpans(left);
|
|
362
|
+
if (!spans.some((span) => span.span_id === 'evt-000001' && span.attributes['agentledger.run_id'] === runId)) throw new Error('trace span event smoke mismatch');
|
|
363
|
+
if (!spans.some((span) => span.name === 'media_artifact') || !spans.some((span) => span.name === 'stream_checkpoint') || !traceJSONL(left).includes('evt-000001')) throw new Error('trace span artifact smoke mismatch');
|
|
364
|
+
const diff = diffEvidence(left, right);
|
|
365
|
+
if (diff.same || diff.changes.final_state.changed_count < 1 || diff.changes.event_types.changed_count < 1 || diff.changes.media_artifacts.changed_count < 1 || diff.changes.stream_checkpoints.changed_count < 1) throw new Error('evidence diff smoke mismatch');
|
|
366
|
+
const divergence = divergenceReport(left, right);
|
|
367
|
+
for (const dimension of ['events', 'state', 'media_artifacts', 'stream_checkpoints']) if (!divergence.changed_dimensions.includes(dimension)) throw new Error(`divergence smoke missing ${dimension}`);
|
|
368
|
+
const debug = debugSummary(left);
|
|
369
|
+
if (debug.run_id !== runId || debug.event_count !== left.events.length || !debug.final_state.answer) throw new Error('debug summary smoke mismatch');
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
async function runLocalPersistenceSmoke() {
|
|
373
|
+
const dir = await mkdtemp(join(tmpdir(), 'agentledger-ts-conformance-'));
|
|
374
|
+
try {
|
|
375
|
+
const path = join(dir, 'state.json');
|
|
376
|
+
const rt = await Runtime.local(path);
|
|
377
|
+
rt.registerTool({ name: 'docs.persist', version: 'v1', sideEffect: 'external', idempotencyRequired: true, func: async (args) => ({ external_id: 'persist-1', echo: args.text }) });
|
|
378
|
+
const { runId } = await rt.createRun({ input: 'persist' });
|
|
379
|
+
const ok = await rt.runOnce({
|
|
380
|
+
runId,
|
|
381
|
+
workerId: 'worker-persist',
|
|
382
|
+
agentRole: 'PersistenceAgent',
|
|
383
|
+
agent: async (ctx, state) => {
|
|
384
|
+
const result = await ctx.callTool('docs.persist', { text: state.input });
|
|
385
|
+
await ctx.writeState('tool_result', result);
|
|
386
|
+
},
|
|
387
|
+
});
|
|
388
|
+
if (!ok) throw new Error('local persistence smoke did not complete');
|
|
389
|
+
const reopened = await JSONStore.open(path);
|
|
390
|
+
if (!reopened.finalState(runId).tool_result) throw new Error('local persistence smoke missing reopened final state');
|
|
391
|
+
const bundle = exportEvidence(reopened, runId);
|
|
392
|
+
const summary = replay(reopened, runId);
|
|
393
|
+
if (!bundle.bundle_hash || !summary.replay_safe || summary.event_count !== bundle.events.length || reopened.ledger(runId).length !== 1 || summary.tool_call_count === 0) throw new Error('local persistence evidence/replay mismatch');
|
|
394
|
+
} finally {
|
|
395
|
+
await rm(dir, { recursive: true, force: true });
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
async function runLocalBlobStoreSmoke() {
|
|
400
|
+
const dir = await mkdtemp(join(tmpdir(), 'agentledger-ts-blobs-'));
|
|
401
|
+
try {
|
|
402
|
+
const blobs = await LocalBlobStore.open(dir);
|
|
403
|
+
const value = { hello: 'world', nested: { n: 1 } };
|
|
404
|
+
const first = await blobs.putJSON(value);
|
|
405
|
+
const second = await blobs.putJSON(value);
|
|
406
|
+
if (!first.digest.startsWith('sha256:') || !first.ref.startsWith('blob://sha256/')) throw new Error('local blob store invalid digest/ref');
|
|
407
|
+
if (first.digest !== second.digest || first.ref !== second.ref) throw new Error('local blob store ref was not stable');
|
|
408
|
+
assertDeepEqual(await blobs.getJSON(first.ref), value, 'local blob store roundtrip mismatch');
|
|
409
|
+
let rejected = false;
|
|
410
|
+
try {
|
|
411
|
+
await blobs.getJSON('unsupported://blob');
|
|
412
|
+
} catch {
|
|
413
|
+
rejected = true;
|
|
414
|
+
}
|
|
415
|
+
if (!rejected) throw new Error('local blob store accepted unsupported ref');
|
|
416
|
+
} finally {
|
|
417
|
+
await rm(dir, { recursive: true, force: true });
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function assertDeepEqual(left, right, message) {
|
|
422
|
+
if (JSON.stringify(left) !== JSON.stringify(right)) throw new Error(message);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
async function runToolSchemaValidationSmoke() {
|
|
426
|
+
const inputSchema = { type: 'object', required: ['text'], additionalProperties: false, properties: { text: { type: 'string', minLength: 1 } } };
|
|
427
|
+
const outputSchema = { type: 'object', required: ['echo'], additionalProperties: false, properties: { echo: { type: 'string' } } };
|
|
428
|
+
const rt = new Runtime(JSONStore.memory());
|
|
429
|
+
let calls = 0;
|
|
430
|
+
rt.registerTool({ name: 'docs.echo', inputSchema, outputSchema, func: async (args) => { calls += 1; return { echo: args.text }; } });
|
|
431
|
+
const { runId: badRun } = await rt.createRun({});
|
|
432
|
+
let failed = false;
|
|
433
|
+
try {
|
|
434
|
+
await rt.runOnce({ runId: badRun, agentRole: 'SchemaAgent', agent: (ctx) => ctx.callTool('docs.echo', {}) });
|
|
435
|
+
} catch (error) {
|
|
436
|
+
failed = /required|expected/.test(error.message ?? String(error));
|
|
437
|
+
}
|
|
438
|
+
if (!failed || calls !== 0) throw new Error('tool schema input validation smoke mismatch');
|
|
439
|
+
const { runId: goodRun } = await rt.createRun({ text: 'hello' });
|
|
440
|
+
if (!(await rt.runOnce({ runId: goodRun, agentRole: 'SchemaAgent', agent: async (ctx, state) => {
|
|
441
|
+
const result = await ctx.callTool('docs.echo', { text: state.text });
|
|
442
|
+
await ctx.writeState('result', result);
|
|
443
|
+
} })) || calls !== 1) throw new Error('tool schema valid call smoke mismatch');
|
|
444
|
+
const badOutput = new Runtime(JSONStore.memory());
|
|
445
|
+
badOutput.registerTool({ name: 'docs.bad', outputSchema, func: async () => ({ bad: true }) });
|
|
446
|
+
const { runId: badOutRun } = await badOutput.createRun({});
|
|
447
|
+
failed = false;
|
|
448
|
+
try {
|
|
449
|
+
await badOutput.runOnce({ runId: badOutRun, agentRole: 'SchemaAgent', agent: (ctx) => ctx.callTool('docs.bad', {}) });
|
|
450
|
+
} catch (error) {
|
|
451
|
+
failed = /required|not allowed|expected/.test(error.message ?? String(error));
|
|
452
|
+
}
|
|
453
|
+
if (!failed || badOutput.store.events(badOutRun).some((event) => event.type === 'tool_call_completed')) throw new Error('tool schema output validation smoke mismatch');
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
async function runWorkerServiceSmoke() {
|
|
457
|
+
const rt = new Runtime(JSONStore.memory());
|
|
458
|
+
const { runId } = await rt.createRun({ input: 'worker' });
|
|
459
|
+
const agent = async (ctx) => ctx.writeState('done', true);
|
|
460
|
+
const worker = new LocalWorker(rt, { workerId: 'worker-service', agentRole: 'WorkerAgent' });
|
|
461
|
+
const summary = await worker.runUntilIdle({ runId, maxIterations: 3, agent });
|
|
462
|
+
if (summary.attempts !== 1 || summary.succeeded_attempts !== 1 || summary.final_status !== 'completed' || summary.stopped_reason !== 'terminal_status') throw new Error('worker terminal smoke mismatch');
|
|
463
|
+
const service = new WorkerService(worker);
|
|
464
|
+
const terminal = await service.serve({ runId, maxLoops: 3, maxIdlePolls: 1, agent });
|
|
465
|
+
if (terminal.stopped_reason !== 'terminal_status') throw new Error('worker service terminal smoke mismatch');
|
|
466
|
+
const empty = new WorkerService(worker);
|
|
467
|
+
const idle = await empty.serve({ runId: null, maxLoops: 3, maxIdlePolls: 1, agent });
|
|
468
|
+
if (idle.stopped_reason !== 'idle' || idle.idle_polls !== 1 || idle.attempts !== 0) throw new Error('worker service idle smoke mismatch');
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
async function runToolLedgerSmoke() {
|
|
472
|
+
const rt = new Runtime(JSONStore.memory());
|
|
473
|
+
let calls = 0;
|
|
474
|
+
rt.registerTool({ name: 'github.create_pr', sideEffect: 'external', idempotencyRequired: true, func: async (args) => { calls += 1; return { external_id: 'pr-123', title: args.title }; } });
|
|
475
|
+
const { runId } = await rt.createRun({ title: 'runtime parity' });
|
|
476
|
+
const agent = async (ctx, state) => {
|
|
477
|
+
const result = await ctx.callTool('github.create_pr', { title: state.title });
|
|
478
|
+
if (ctx.attempt === 1) throw new RetryableAgentError('crash after side effect');
|
|
479
|
+
await ctx.writeState('pr', result);
|
|
480
|
+
};
|
|
481
|
+
if (await rt.runOnce({ runId, workerId: 'worker-a', agent })) throw new Error('tool ledger smoke first attempt should retry');
|
|
482
|
+
if (!(await rt.runOnce({ runId, workerId: 'worker-b', agent }))) throw new Error('tool ledger smoke second attempt should complete');
|
|
483
|
+
const ledger = rt.store.ledger(runId);
|
|
484
|
+
if (calls !== 1 || ledger.length !== 1 || ledger[0].status !== 'SUCCEEDED') throw new Error('tool ledger smoke mismatch');
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
async function runPolicyApprovalSandboxSmoke() {
|
|
488
|
+
await runPolicySmoke();
|
|
489
|
+
await runApprovalSmoke();
|
|
490
|
+
await runSandboxSmoke();
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
async function runPolicySmoke() {
|
|
494
|
+
const rt = new Runtime(JSONStore.memory());
|
|
495
|
+
let calls = 0;
|
|
496
|
+
rt.registerTool({ name: 'repo.write', riskLevel: 'high', func: async () => { calls += 1; return { ok: true }; } });
|
|
497
|
+
const { runId } = await rt.createRun({});
|
|
498
|
+
let failed = false;
|
|
499
|
+
try {
|
|
500
|
+
await rt.runOnce({ runId, agentRole: 'Reviewer', agent: (ctx) => ctx.callTool('repo.write', { path: 'README.md' }) });
|
|
501
|
+
} catch (error) {
|
|
502
|
+
failed = /high-risk tool denied/.test(error.message ?? String(error));
|
|
503
|
+
}
|
|
504
|
+
if (!failed || calls !== 0 || !rt.store.events(runId).some((event) => event.type === 'tool_permission_decided' && event.payload.allowed === false)) throw new Error('policy smoke mismatch');
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
async function runApprovalSmoke() {
|
|
508
|
+
const rt = new Runtime(JSONStore.memory());
|
|
509
|
+
let calls = 0;
|
|
510
|
+
rt.registerTool({ name: 'github.create_pr', riskLevel: 'high', approvalRequired: true, sideEffect: 'external', idempotencyRequired: true, func: async () => { calls += 1; return { external_id: 'pr-42' }; } });
|
|
511
|
+
const { runId } = await rt.createRun({});
|
|
512
|
+
const agent = async (ctx) => {
|
|
513
|
+
const result = await ctx.callTool('github.create_pr', { title: 'safe' });
|
|
514
|
+
await ctx.writeState('pr', result);
|
|
515
|
+
};
|
|
516
|
+
if (await rt.runOnce({ runId, workerId: 'worker-a', agentRole: 'Coder', agent })) throw new Error('approval smoke should pause');
|
|
517
|
+
const approvals = rt.store.approvalRequests(runId);
|
|
518
|
+
if (calls !== 0 || approvals.length !== 1 || approvals[0].status !== 'PENDING' || rt.store.steps(runId)[0].status !== 'waiting_human') throw new Error('approval smoke pending mismatch');
|
|
519
|
+
await rt.store.approveRequest(approvals[0].approval_id, { approver: 'alice', reason: 'reviewed' });
|
|
520
|
+
if (!(await rt.runOnce({ runId, workerId: 'worker-b', agentRole: 'Coder', agent })) || calls !== 1) throw new Error('approval smoke resume mismatch');
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
async function runSandboxSmoke() {
|
|
524
|
+
const rt = new Runtime(JSONStore.memory());
|
|
525
|
+
let calls = 0;
|
|
526
|
+
rt.registerTool({ name: 'shell.exec', sandboxRequired: true, func: async () => { calls += 1; return { ok: true }; } });
|
|
527
|
+
const { runId } = await rt.createRun({});
|
|
528
|
+
let failed = false;
|
|
529
|
+
try {
|
|
530
|
+
await rt.runOnce({ runId, agentRole: 'Executor', agent: (ctx) => ctx.callTool('shell.exec', { argv: ['echo', 'hi'] }) });
|
|
531
|
+
} catch (error) {
|
|
532
|
+
failed = /sandbox executor/.test(error.message ?? String(error));
|
|
533
|
+
}
|
|
534
|
+
const events = rt.store.events(runId);
|
|
535
|
+
if (!failed || calls !== 0 || !events.some((event) => event.type === 'sandbox_started') || !events.some((event) => event.type === 'tool_call_failed')) throw new Error('sandbox smoke mismatch');
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async function runCostFailureSmoke() {
|
|
539
|
+
const rt = new Runtime(JSONStore.memory());
|
|
540
|
+
rt.setBudget({ maxToolCalls: 1 });
|
|
541
|
+
let calls = 0;
|
|
542
|
+
rt.registerTool({ name: 'docs.echo', func: async (args) => { calls += 1; return { echo: args.text }; } });
|
|
543
|
+
const { runId } = await rt.createRun({});
|
|
544
|
+
let failed = false;
|
|
545
|
+
try {
|
|
546
|
+
await rt.runOnce({ runId, agentRole: 'Researcher', agent: async (ctx) => {
|
|
547
|
+
await ctx.recordModelCall({ model: 'gpt-test', inputTokens: 10, outputTokens: 5, totalUsd: 0.01 });
|
|
548
|
+
await ctx.callTool('docs.echo', { text: 'first' });
|
|
549
|
+
await ctx.callTool('docs.echo', { text: 'second' });
|
|
550
|
+
} });
|
|
551
|
+
} catch (error) {
|
|
552
|
+
failed = /tool call budget exceeded/.test(error.message ?? String(error));
|
|
553
|
+
}
|
|
554
|
+
const summary = rt.store.costSummary(runId);
|
|
555
|
+
const cost = costAttribution(rt.store, runId);
|
|
556
|
+
const failure = failureAttribution(rt.store, runId);
|
|
557
|
+
if (!failed || calls !== 1 || summary.tool_calls !== 1 || summary.model_tokens !== 15 || summary.total_usd !== 0.01 || cost.by_agent.Researcher.tool_calls !== 1 || failure.summary.failed_step_count !== 1 || !failure.failure_events.some((event) => event.type === 'budget_check_failed')) throw new Error('cost/failure smoke mismatch');
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
async function runMediaStreamSmoke() {
|
|
561
|
+
const rt = new Runtime(JSONStore.memory());
|
|
562
|
+
const { runId } = await rt.createRun({});
|
|
563
|
+
const ok = await rt.runOnce({ runId, workerId: 'worker-media', agentRole: 'MediaAgent', agent: async (ctx) => {
|
|
564
|
+
const frame = await ctx.createMediaArtifact('frame-0001', 'frame', { uri: 's3://media/demo/frame-0001.jpg', mediaMetadata: { mime_type: 'image/jpeg' } });
|
|
565
|
+
const checkpoint = await ctx.createStreamCheckpoint('camera-checkpoint', { streamId: 'camera-1', consumerId: 'vision-agent', offset: 7, chunk: { streamId: 'camera-1', chunkId: 'chunk-7', offset: 7 } });
|
|
566
|
+
await ctx.writeState('artifacts', { frame, checkpoint });
|
|
567
|
+
} });
|
|
568
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
569
|
+
const summary = replay(rt.store, runId);
|
|
570
|
+
if (!ok || bundle.summary.artifact_count !== 2 || bundle.summary.media_artifact_count !== 1 || bundle.summary.stream_checkpoint_count !== 1 || summary.media_artifact_count !== 1 || summary.stream_checkpoint_count !== 1) throw new Error('media/stream smoke mismatch');
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
export async function main(args = process.argv.slice(2)) {
|
|
574
|
+
if (args.length === 0 || (args.length === 1 && (args[0] === '--help' || args[0] === 'help'))) {
|
|
575
|
+
console.log(usage());
|
|
576
|
+
return 0;
|
|
577
|
+
}
|
|
578
|
+
if (args.length === 1 && args[0] === 'version') {
|
|
579
|
+
console.log('agentledger-ts 1.0.2');
|
|
580
|
+
return 0;
|
|
581
|
+
}
|
|
582
|
+
if (args.length === 1 && args[0] === 'doctor') {
|
|
583
|
+
console.log(JSON.stringify({ language: 'typescript', version: '1.0.2', status: 'ok', runtime_core_parity: true }, null, 2));
|
|
584
|
+
return 0;
|
|
585
|
+
}
|
|
586
|
+
if (args.length === 1 && args[0] === 'quickstart') {
|
|
587
|
+
const result = await simpleRun(async (_ctx, state) => ({ message: 'hello from typescript', input: state.input }), { initialState: { input: 'world' } });
|
|
588
|
+
console.log(JSON.stringify({ run_id: result.run_id, output: result.output, state: result.state }, null, 2));
|
|
589
|
+
return 0;
|
|
590
|
+
}
|
|
591
|
+
if (args.length === 1 && args[0] === 'conformance') {
|
|
592
|
+
const checks = validateFixtures();
|
|
593
|
+
const semanticChecks = await runSemanticSmokes();
|
|
594
|
+
console.log(JSON.stringify({ language: 'typescript', suite: 'agentledger_runtime_core', passed: true, checks: ['contract_validate', ...checks, ...semanticChecks] }, null, 2));
|
|
595
|
+
return 0;
|
|
596
|
+
}
|
|
597
|
+
if (args.length === 2 && args[0] === 'contract' && args[1] === 'validate') {
|
|
598
|
+
validateContract();
|
|
599
|
+
return 0;
|
|
600
|
+
}
|
|
601
|
+
if (args.length === 2 && args[0] === 'contract' && args[1] === 'export') {
|
|
602
|
+
process.stdout.write(readFileSync(contractPath(), 'utf8'));
|
|
603
|
+
return 0;
|
|
604
|
+
}
|
|
605
|
+
console.error(`unknown command ${args.join(' ')}; run agentledger-ts --help`);
|
|
606
|
+
return 1;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
610
|
+
try {
|
|
611
|
+
process.exitCode = await main();
|
|
612
|
+
} catch (error) {
|
|
613
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
614
|
+
process.exitCode = 1;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
async function runBoundaryLintSmoke() {
|
|
620
|
+
const report = scanBoundarySource('agent.py', "import os\nimport requests\nos.system('echo unsafe')\nrequests.post('https://example.com')\n");
|
|
621
|
+
if (report.passed || report.finding_count !== 2) throw new Error('boundary lint finding count mismatch');
|
|
622
|
+
if (report.findings[0].rule_id !== 'direct-shell-os-system' || report.findings[1].rule_id !== 'direct-http-requests') throw new Error('boundary lint rule mismatch');
|
|
623
|
+
const ignored = scanBoundarySource('agent.py', "import os\n# agentledger: ignore-next-line\nos.system('echo intentional')\n");
|
|
624
|
+
if (!ignored.passed || ignored.finding_count !== 0) throw new Error('boundary lint ignore mismatch');
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
async function runSchedulerSmoke() {
|
|
629
|
+
const store = JSONStore.memory();
|
|
630
|
+
const rt = new Runtime(store);
|
|
631
|
+
const { runId } = await rt.createRun({ input: 'scheduler' });
|
|
632
|
+
const scheduler = new RuntimeScheduler(store);
|
|
633
|
+
let status = scheduler.status(runId);
|
|
634
|
+
if (status.run_id !== runId || status.run_status !== 'pending' || status.steps.length !== 1 || !status.cost_summary) throw new Error('scheduler status mismatch');
|
|
635
|
+
const claim = await store.claimStep({ workerId: 'scheduler-stale', runId, leaseSeconds: 0 });
|
|
636
|
+
if (!claim) throw new Error('scheduler claim failed');
|
|
637
|
+
const recovery = await scheduler.recoverExpiredLeases();
|
|
638
|
+
if (recovery.recovered_steps !== 1) throw new Error('scheduler recovered steps mismatch');
|
|
639
|
+
const cancelled = await scheduler.cancelRun(runId, 'scheduler smoke');
|
|
640
|
+
if (cancelled !== 1) throw new Error('scheduler cancelled steps mismatch');
|
|
641
|
+
status = scheduler.status(runId);
|
|
642
|
+
if (status.run_status !== 'cancelled' || status.steps[0].status !== 'cancelled') throw new Error('scheduler cancelled status mismatch');
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
async function runAdversarialReviewSmoke() {
|
|
647
|
+
const rt = new Runtime(JSONStore.memory());
|
|
648
|
+
const { runId } = await rt.createRun({ input: 'review' });
|
|
649
|
+
await rt.runOnce({ runId, workerId: 'review-worker', agentRole: 'ReviewAgent', agent: async (ctx) => ctx.writeState('answer', 'ok') });
|
|
650
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
651
|
+
const report = adversarialReview(bundle);
|
|
652
|
+
if (!report.passed) throw new Error('clean adversarial review should pass');
|
|
653
|
+
const pending = structuredClone(bundle);
|
|
654
|
+
pending.summary.has_pending_approvals = true;
|
|
655
|
+
pending.approvals = [{ risk_level: 'high', status: 'PENDING' }];
|
|
656
|
+
if (adversarialReview(pending).passed) throw new Error('pending high-risk approval should block');
|
|
657
|
+
const costly = structuredClone(bundle);
|
|
658
|
+
costly.cost_summary.total_usd = 1.0;
|
|
659
|
+
if (adversarialReview(costly, { maxTotalUsd: 0.5 }).passed) throw new Error('cost limit should block');
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
async function runEvidenceRegressionSmoke() {
|
|
664
|
+
const rt = new Runtime(JSONStore.memory());
|
|
665
|
+
const { runId } = await rt.createRun({ input: 'eval' });
|
|
666
|
+
await rt.runOnce({ runId, workerId: 'eval-worker', agentRole: 'EvalAgent', agent: async (ctx) => ctx.writeState('answer', 'ok') });
|
|
667
|
+
const bundle = exportEvidence(rt.store, runId);
|
|
668
|
+
if (!evaluateEvidence(bundle).passed) throw new Error('clean evidence health check should pass');
|
|
669
|
+
const changed = structuredClone(bundle);
|
|
670
|
+
changed.final_state = { answer: 'changed' };
|
|
671
|
+
changed.events.push({ seq: changed.events.length + 1, type: 'extra_event' });
|
|
672
|
+
if (evaluateEvidenceRegression(bundle, changed).passed) throw new Error('regression changes should fail');
|
|
673
|
+
const costly = structuredClone(bundle);
|
|
674
|
+
costly.cost_summary.total_usd = 1.0;
|
|
675
|
+
if (evaluateEvidenceRegression(bundle, costly, { maxTotalUsdDelta: 0.5 }).passed) throw new Error('cost delta should fail');
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
function runShadowSmoke() {
|
|
680
|
+
const diff = diffStates({ answer: 'old', stable: true }, { answer: 'new', stable: true, extra: 1 });
|
|
681
|
+
if (diff.changed_count !== 2) throw new Error('shadow diff changed_count mismatch');
|
|
682
|
+
const report = shadowReport('run_source', 'run_shadow', true, { answer: 'old' }, { answer: 'new' });
|
|
683
|
+
if (report.source_run_id !== 'run_source' || report.shadow_run_id !== 'run_shadow' || !report.ok) throw new Error('shadow report mismatch');
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
async function runReproGoldenSmoke() {
|
|
688
|
+
if (builtinGoldenNames().join(',') !== 'media-stream-checkpoint,minimal-success,tool-ledger-success') throw new Error('builtin golden names mismatch');
|
|
689
|
+
const bundle = await builtinGoldenEvidence('minimal-success');
|
|
690
|
+
if (bundle.schema_version !== 'agentledger.evidence.v1' || bundle.final_state.answer !== 'ok') throw new Error('minimal golden evidence mismatch');
|
|
691
|
+
const changed = structuredClone(bundle);
|
|
692
|
+
changed.final_state = { answer: 'changed' };
|
|
693
|
+
if (goldenRegression(bundle, changed).passed) throw new Error('golden regression should detect final state change');
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
async function runTimeTravelTimelineSmoke() {
|
|
698
|
+
const bundle = await builtinGoldenEvidence('minimal-success');
|
|
699
|
+
const report = timeTravel(bundle, { atSeq: 999, includeStates: true });
|
|
700
|
+
if (report.state_at_seq.answer !== 'ok' || report.event_count !== bundle.events.length) throw new Error('time travel state mismatch');
|
|
701
|
+
if (!report.timeline.some((frame) => frame.state_changed)) throw new Error('time travel missing changed frame');
|
|
702
|
+
const html = timeTravelHTML(report);
|
|
703
|
+
for (const token of ['AgentLedger Time Travel Report', 'State At Selected Point', 'Selected Event']) if (!html.includes(token)) throw new Error(`time travel html missing ${token}`);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
function runOptionalAdaptersSmoke() {
|
|
707
|
+
const caps = optionalAdapterCapabilities();
|
|
708
|
+
const seen = new Set(caps.map((cap) => cap.name));
|
|
709
|
+
for (const cap of caps) {
|
|
710
|
+
if (cap.core_imports_heavy_sdks || !cap.adapter_is_optional || !cap.fail_closed_without_adapter || !cap.contract_surface?.length) throw new Error(`invalid optional adapter capability: ${cap.name}`);
|
|
711
|
+
}
|
|
712
|
+
for (const name of ['postgres', 's3', 'docker', 'langgraph', 'mcp-transport', 'shadow-runner']) {
|
|
713
|
+
if (!seen.has(name)) throw new Error(`missing optional adapter capability: ${name}`);
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
async function runOfficialAdaptersSmoke() {
|
|
718
|
+
const sql = { count: 0, async exec() { this.count += 1; } };
|
|
719
|
+
const pg = new PostgresAdapter(sql);
|
|
720
|
+
if (pg.migrationPlan()[0].dialect !== 'postgres') throw new Error('postgres adapter plan failed');
|
|
721
|
+
await pg.applyMigrations();
|
|
722
|
+
if (sql.count < 2) throw new Error('postgres adapter apply failed');
|
|
723
|
+
const objects = new Map();
|
|
724
|
+
const s3 = new S3BlobStoreAdapter({ async putObject(input) { objects.set(`${input.Bucket}/${input.Key}`, input); }, async getObject(bucket, key) { return { Body: objects.get(`${bucket}/${key}`).Body }; } }, { bucket: 'agentledger-test' });
|
|
725
|
+
const put = await s3.putJSON({ answer: 'ok' });
|
|
726
|
+
if (!put.ref.startsWith('s3://agentledger-test/agentledger/blobs/sha256/')) throw new Error('s3 ref mismatch');
|
|
727
|
+
if ((await s3.getJSON(put.ref)).answer !== 'ok') throw new Error('s3 roundtrip failed');
|
|
728
|
+
const otlp = { contentType: null, async postJSON(_endpoint, _payload, contentType) { this.contentType = contentType; } };
|
|
729
|
+
await new OTLPTransport(otlp, { endpoint: 'http://collector' }).export({});
|
|
730
|
+
if (otlp.contentType !== 'application/json') throw new Error('otlp transport failed');
|
|
731
|
+
const manifest = new DockerSandboxAdapter().manifest({ network: 'deny' }, ['echo', 'ok']);
|
|
732
|
+
if (manifest.network !== 'none' || manifest.read_only_root !== true || manifest.requires_explicit_execution !== true) throw new Error('docker manifest failed');
|
|
733
|
+
}
|