@lumenflow/surfaces 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +40 -0
- package/cli/__tests__/gates.test.ts +97 -0
- package/cli/__tests__/inspect.test.ts +184 -0
- package/cli/__tests__/task-lifecycle.test.ts +203 -0
- package/cli/gates.ts +46 -0
- package/cli/index.ts +6 -0
- package/cli/inspect.ts +138 -0
- package/cli/task-lifecycle.ts +46 -0
- package/http/__tests__/agent-runtime-remote-controls.test.ts +249 -0
- package/http/__tests__/auth-boundary.test.ts +57 -0
- package/http/__tests__/channel-send-governance.test.ts +158 -0
- package/http/__tests__/event-stream.test.ts +340 -0
- package/http/__tests__/phone-device-tool-api.test.ts +177 -0
- package/http/__tests__/remote-exposure.test.ts +212 -0
- package/http/__tests__/run-agent.test.ts +447 -0
- package/http/__tests__/scope-enforcement.test.ts +349 -0
- package/http/__tests__/sidecar-entry.test.ts +158 -0
- package/http/__tests__/tool-api-schema-validation.test.ts +213 -0
- package/http/__tests__/tool-api.test.ts +491 -0
- package/http/__tests__/tool-discovery.test.ts +384 -0
- package/http/ag-ui-adapter.ts +352 -0
- package/http/auth.ts +294 -0
- package/http/control-plane-event-subscriber.ts +233 -0
- package/http/event-stream.ts +216 -0
- package/http/index.ts +10 -0
- package/http/run-agent.ts +416 -0
- package/http/server.ts +329 -0
- package/http/sidecar-entry.ts +218 -0
- package/http/task-api.ts +307 -0
- package/http/tool-api.ts +373 -0
- package/http/tool-discovery.ts +159 -0
- package/mcp/__tests__/server.test.ts +554 -0
- package/mcp/index.ts +4 -0
- package/mcp/server.ts +250 -0
- package/package.json +51 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
// Copyright (c) 2026 Hellmai Ltd
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import { mkdtemp, mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
|
5
|
+
import { tmpdir } from 'node:os';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
8
|
+
import type {
|
|
9
|
+
ClaimTaskResult,
|
|
10
|
+
CompleteTaskResult,
|
|
11
|
+
CreateTaskResult,
|
|
12
|
+
KernelRuntime,
|
|
13
|
+
TaskSpec,
|
|
14
|
+
TaskInspection,
|
|
15
|
+
ToolOutput,
|
|
16
|
+
} from '@lumenflow/kernel';
|
|
17
|
+
import { createTaskLifecycleCommands, initializeTaskLifecycleCommands } from '../task-lifecycle.js';
|
|
18
|
+
|
|
19
|
+
function createTaskSpec(taskId: string): TaskSpec {
|
|
20
|
+
return {
|
|
21
|
+
id: taskId,
|
|
22
|
+
workspace_id: 'workspace-surfaces-cli',
|
|
23
|
+
lane_id: 'framework-cli-wu-commands',
|
|
24
|
+
domain: 'software-delivery',
|
|
25
|
+
title: `Task ${taskId}`,
|
|
26
|
+
description: `Task for ${taskId}`,
|
|
27
|
+
acceptance: ['CLI lifecycle flow works'],
|
|
28
|
+
declared_scopes: [{ type: 'path', pattern: '**', access: 'read' }],
|
|
29
|
+
risk: 'medium',
|
|
30
|
+
type: 'feature',
|
|
31
|
+
priority: 'P1',
|
|
32
|
+
created: '2026-02-16',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function writeWorkspaceFixture(root: string): Promise<void> {
|
|
37
|
+
const packsRoot = join(root, 'packs');
|
|
38
|
+
const packRoot = join(packsRoot, 'software-delivery');
|
|
39
|
+
await mkdir(packRoot, { recursive: true });
|
|
40
|
+
|
|
41
|
+
await writeFile(
|
|
42
|
+
join(root, 'workspace.yaml'),
|
|
43
|
+
[
|
|
44
|
+
'id: workspace-surfaces-cli',
|
|
45
|
+
'name: Surfaces CLI Workspace',
|
|
46
|
+
'packs:',
|
|
47
|
+
' - id: software-delivery',
|
|
48
|
+
' version: 1.0.0',
|
|
49
|
+
' integrity: dev',
|
|
50
|
+
' source: local',
|
|
51
|
+
'lanes:',
|
|
52
|
+
' - id: framework-cli-wu-commands',
|
|
53
|
+
' title: Framework CLI WU Commands',
|
|
54
|
+
' allowed_scopes:',
|
|
55
|
+
' - type: path',
|
|
56
|
+
' pattern: "**"',
|
|
57
|
+
' access: read',
|
|
58
|
+
'security:',
|
|
59
|
+
' allowed_scopes:',
|
|
60
|
+
' - type: path',
|
|
61
|
+
' pattern: "**"',
|
|
62
|
+
' access: read',
|
|
63
|
+
' network_default: off',
|
|
64
|
+
' deny_overlays: []',
|
|
65
|
+
'software_delivery: {}',
|
|
66
|
+
'memory_namespace: mem',
|
|
67
|
+
'event_namespace: evt',
|
|
68
|
+
].join('\n'),
|
|
69
|
+
'utf8',
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
await writeFile(
|
|
73
|
+
join(packRoot, 'manifest.yaml'),
|
|
74
|
+
[
|
|
75
|
+
'id: software-delivery',
|
|
76
|
+
'version: 1.0.0',
|
|
77
|
+
'task_types:',
|
|
78
|
+
' - work-unit',
|
|
79
|
+
'tools: []',
|
|
80
|
+
'policies:',
|
|
81
|
+
' - id: runtime.completion.allow',
|
|
82
|
+
' trigger: on_completion',
|
|
83
|
+
' decision: allow',
|
|
84
|
+
'state_aliases:',
|
|
85
|
+
' active: in_progress',
|
|
86
|
+
'evidence_types: []',
|
|
87
|
+
'lane_templates: []',
|
|
88
|
+
'config_key: software_delivery',
|
|
89
|
+
].join('\n'),
|
|
90
|
+
'utf8',
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
describe('surfaces/cli task lifecycle tracer bullet', () => {
|
|
95
|
+
let tempRoot: string;
|
|
96
|
+
|
|
97
|
+
beforeEach(async () => {
|
|
98
|
+
tempRoot = await mkdtemp(join(tmpdir(), 'lumenflow-surfaces-cli-'));
|
|
99
|
+
await writeWorkspaceFixture(tempRoot);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
afterEach(async () => {
|
|
103
|
+
await rm(tempRoot, { recursive: true, force: true });
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('routes task commands to KernelRuntime methods', async () => {
|
|
107
|
+
const createTask = vi.fn<(task: TaskSpec) => Promise<CreateTaskResult>>();
|
|
108
|
+
const claimTask =
|
|
109
|
+
vi.fn<
|
|
110
|
+
(input: { task_id: string; by: string; session_id: string }) => Promise<ClaimTaskResult>
|
|
111
|
+
>();
|
|
112
|
+
const inspectTask = vi.fn<(taskId: string) => Promise<TaskInspection>>();
|
|
113
|
+
const completeTask = vi.fn<(input: { task_id: string }) => Promise<CompleteTaskResult>>();
|
|
114
|
+
const executeTool = vi.fn<(name: string, input: unknown, ctx: never) => Promise<ToolOutput>>();
|
|
115
|
+
|
|
116
|
+
const runtime = {
|
|
117
|
+
createTask,
|
|
118
|
+
claimTask,
|
|
119
|
+
inspectTask,
|
|
120
|
+
completeTask,
|
|
121
|
+
executeTool,
|
|
122
|
+
getToolHost: vi.fn(),
|
|
123
|
+
getPolicyEngine: vi.fn(),
|
|
124
|
+
} as unknown as KernelRuntime;
|
|
125
|
+
|
|
126
|
+
const commands = createTaskLifecycleCommands(runtime);
|
|
127
|
+
const taskSpec = createTaskSpec('WU-1736-routing');
|
|
128
|
+
|
|
129
|
+
await commands['task:create'](taskSpec);
|
|
130
|
+
await commands['task:claim']({
|
|
131
|
+
task_id: taskSpec.id,
|
|
132
|
+
by: 'maintainer@example.com',
|
|
133
|
+
session_id: 'session-routing',
|
|
134
|
+
});
|
|
135
|
+
await commands['task:status'](taskSpec.id);
|
|
136
|
+
await commands['task:complete']({ task_id: taskSpec.id });
|
|
137
|
+
|
|
138
|
+
expect(createTask).toHaveBeenCalledWith(taskSpec);
|
|
139
|
+
expect(claimTask).toHaveBeenCalledWith({
|
|
140
|
+
task_id: taskSpec.id,
|
|
141
|
+
by: 'maintainer@example.com',
|
|
142
|
+
session_id: 'session-routing',
|
|
143
|
+
});
|
|
144
|
+
expect(inspectTask).toHaveBeenCalledWith(taskSpec.id);
|
|
145
|
+
expect(completeTask).toHaveBeenCalledWith({ task_id: taskSpec.id });
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('executes create -> claim -> status -> complete -> status through KernelRuntime', async () => {
|
|
149
|
+
const initialized = await initializeTaskLifecycleCommands({
|
|
150
|
+
workspaceRoot: tempRoot,
|
|
151
|
+
packsRoot: join(tempRoot, 'packs'),
|
|
152
|
+
taskSpecRoot: join(tempRoot, 'tasks'),
|
|
153
|
+
eventsFilePath: join(tempRoot, 'events.jsonl'),
|
|
154
|
+
eventLockFilePath: join(tempRoot, 'events.lock'),
|
|
155
|
+
evidenceRoot: join(tempRoot, 'evidence'),
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
const commands = initialized.commands;
|
|
159
|
+
const taskSpec = createTaskSpec('WU-1736-e2e');
|
|
160
|
+
|
|
161
|
+
await commands['task:create'](taskSpec);
|
|
162
|
+
await commands['task:claim']({
|
|
163
|
+
task_id: taskSpec.id,
|
|
164
|
+
by: 'maintainer@example.com',
|
|
165
|
+
session_id: 'session-e2e',
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const activeStatus = await commands['task:status'](taskSpec.id);
|
|
169
|
+
expect(activeStatus.state.status).toBe('active');
|
|
170
|
+
|
|
171
|
+
await commands['task:complete']({ task_id: taskSpec.id });
|
|
172
|
+
|
|
173
|
+
const doneStatus = await commands['task:status'](taskSpec.id);
|
|
174
|
+
expect(doneStatus.state.status).toBe('done');
|
|
175
|
+
|
|
176
|
+
const eventKinds = doneStatus.events.map((event) => event.kind);
|
|
177
|
+
expect(eventKinds).toEqual([
|
|
178
|
+
'task_created',
|
|
179
|
+
'task_claimed',
|
|
180
|
+
'run_started',
|
|
181
|
+
'run_succeeded',
|
|
182
|
+
'task_completed',
|
|
183
|
+
]);
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it('surface implementation avoids direct @lumenflow/core imports', async () => {
|
|
187
|
+
const sourcePath = join(
|
|
188
|
+
process.cwd(),
|
|
189
|
+
'packages',
|
|
190
|
+
'@lumenflow',
|
|
191
|
+
'surfaces',
|
|
192
|
+
'cli',
|
|
193
|
+
'task-lifecycle.ts',
|
|
194
|
+
);
|
|
195
|
+
const source = await readFile(sourcePath, 'utf8');
|
|
196
|
+
|
|
197
|
+
expect(source.includes('@lumenflow/core')).toBe(false);
|
|
198
|
+
expect(source.includes('runtime.createTask')).toBe(true);
|
|
199
|
+
expect(source.includes('runtime.claimTask')).toBe(true);
|
|
200
|
+
expect(source.includes('runtime.inspectTask')).toBe(true);
|
|
201
|
+
expect(source.includes('runtime.completeTask')).toBe(true);
|
|
202
|
+
});
|
|
203
|
+
});
|
package/cli/gates.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Copyright (c) 2026 Hellmai Ltd
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import {
|
|
5
|
+
POLICY_TRIGGERS,
|
|
6
|
+
type KernelRuntime,
|
|
7
|
+
type PolicyEvaluationResult,
|
|
8
|
+
type TaskInspection,
|
|
9
|
+
} from '@lumenflow/kernel';
|
|
10
|
+
|
|
11
|
+
export interface GateRunInput {
|
|
12
|
+
task_id: string;
|
|
13
|
+
run_id: string;
|
|
14
|
+
lane_id?: string;
|
|
15
|
+
pack_id?: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface GateCommands {
|
|
19
|
+
'gates:run': (input: GateRunInput) => Promise<PolicyEvaluationResult>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface OrchestrationCommands {
|
|
23
|
+
'orchestration:init-status': (taskId: string) => Promise<TaskInspection>;
|
|
24
|
+
'orchestration:monitor': (taskId: string) => Promise<TaskInspection>;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function createGateCommands(runtime: KernelRuntime): GateCommands {
|
|
28
|
+
return {
|
|
29
|
+
'gates:run': async (input) => {
|
|
30
|
+
return runtime.getPolicyEngine().evaluate({
|
|
31
|
+
trigger: POLICY_TRIGGERS.ON_COMPLETION,
|
|
32
|
+
task_id: input.task_id,
|
|
33
|
+
run_id: input.run_id,
|
|
34
|
+
lane_id: input.lane_id,
|
|
35
|
+
pack_id: input.pack_id,
|
|
36
|
+
});
|
|
37
|
+
},
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function createOrchestrationCommands(runtime: KernelRuntime): OrchestrationCommands {
|
|
42
|
+
return {
|
|
43
|
+
'orchestration:init-status': async (taskId) => runtime.inspectTask(taskId),
|
|
44
|
+
'orchestration:monitor': async (taskId) => runtime.inspectTask(taskId),
|
|
45
|
+
};
|
|
46
|
+
}
|
package/cli/index.ts
ADDED
package/cli/inspect.ts
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
// Copyright (c) 2026 Hellmai Ltd
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import { readFile } from 'node:fs/promises';
|
|
5
|
+
import type {
|
|
6
|
+
KernelRuntime,
|
|
7
|
+
TaskInspection,
|
|
8
|
+
TaskInspection as KernelTaskInspection,
|
|
9
|
+
ToolOutput,
|
|
10
|
+
} from '@lumenflow/kernel';
|
|
11
|
+
|
|
12
|
+
export interface InspectView extends KernelTaskInspection {
|
|
13
|
+
evidence: string[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface InspectCommands {
|
|
17
|
+
'task:inspect': (taskId: string) => Promise<InspectView>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface ReplayRunInput {
|
|
21
|
+
task_id: string;
|
|
22
|
+
run_id: string;
|
|
23
|
+
session_id: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface ReplayOutput {
|
|
27
|
+
receipt_id: string;
|
|
28
|
+
tool_name: string;
|
|
29
|
+
output: ToolOutput;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ReplayRunResult {
|
|
33
|
+
task_id: string;
|
|
34
|
+
run_id: string;
|
|
35
|
+
replay_run_id: string;
|
|
36
|
+
outputs: ReplayOutput[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface ReplayCommands {
|
|
40
|
+
'task:replay': (input: ReplayRunInput) => Promise<ReplayRunResult>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function collectEvidence(inspection: TaskInspection): string[] {
|
|
44
|
+
const evidence = new Set<string>();
|
|
45
|
+
|
|
46
|
+
for (const event of inspection.events) {
|
|
47
|
+
if (!('evidence_refs' in event)) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const refs = event.evidence_refs;
|
|
51
|
+
if (!Array.isArray(refs)) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
for (const ref of refs) {
|
|
55
|
+
evidence.add(ref);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
for (const receipt of inspection.receipts) {
|
|
60
|
+
if (receipt.kind === 'tool_call_started') {
|
|
61
|
+
evidence.add(receipt.input_ref);
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
if (receipt.output_ref) {
|
|
65
|
+
evidence.add(receipt.output_ref);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return [...evidence];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function createInspectCommands(runtime: KernelRuntime): InspectCommands {
|
|
73
|
+
return {
|
|
74
|
+
'task:inspect': async (taskId) => {
|
|
75
|
+
const inspection = await runtime.inspectTask(taskId);
|
|
76
|
+
return {
|
|
77
|
+
...inspection,
|
|
78
|
+
evidence: collectEvidence(inspection),
|
|
79
|
+
};
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function loadReplayInput(inputRef: string): Promise<unknown> {
|
|
85
|
+
const payload = await readFile(inputRef, 'utf8');
|
|
86
|
+
try {
|
|
87
|
+
return JSON.parse(payload) as unknown;
|
|
88
|
+
} catch {
|
|
89
|
+
return payload;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function createReplayCommands(runtime: KernelRuntime): ReplayCommands {
|
|
94
|
+
return {
|
|
95
|
+
'task:replay': async (input) => {
|
|
96
|
+
const inspection = await runtime.inspectTask(input.task_id);
|
|
97
|
+
const replayRunId = `${input.run_id}:replay`;
|
|
98
|
+
const replayOutputs: ReplayOutput[] = [];
|
|
99
|
+
|
|
100
|
+
const startedReceipts = inspection.receipts.filter((receipt) => {
|
|
101
|
+
return receipt.kind === 'tool_call_started' && receipt.run_id === input.run_id;
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
for (const receipt of startedReceipts) {
|
|
105
|
+
const replayInput = await loadReplayInput(receipt.input_ref);
|
|
106
|
+
const output = await runtime.executeTool(receipt.tool_name, replayInput, {
|
|
107
|
+
run_id: replayRunId,
|
|
108
|
+
task_id: input.task_id,
|
|
109
|
+
session_id: input.session_id,
|
|
110
|
+
allowed_scopes: receipt.scope_enforced,
|
|
111
|
+
metadata: {
|
|
112
|
+
workspace_allowed_scopes: receipt.scope_enforced,
|
|
113
|
+
lane_allowed_scopes: receipt.scope_enforced,
|
|
114
|
+
task_declared_scopes: receipt.scope_enforced,
|
|
115
|
+
workspace_config_hash: receipt.workspace_config_hash,
|
|
116
|
+
runtime_version: receipt.runtime_version,
|
|
117
|
+
pack_id: receipt.pack_id,
|
|
118
|
+
pack_version: receipt.pack_version,
|
|
119
|
+
pack_integrity: receipt.pack_integrity,
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
replayOutputs.push({
|
|
124
|
+
receipt_id: receipt.receipt_id,
|
|
125
|
+
tool_name: receipt.tool_name,
|
|
126
|
+
output,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
task_id: input.task_id,
|
|
132
|
+
run_id: input.run_id,
|
|
133
|
+
replay_run_id: replayRunId,
|
|
134
|
+
outputs: replayOutputs,
|
|
135
|
+
};
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Copyright (c) 2026 Hellmai Ltd
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import {
|
|
5
|
+
initializeKernelRuntime,
|
|
6
|
+
type ClaimTaskInput,
|
|
7
|
+
type ClaimTaskResult,
|
|
8
|
+
type CompleteTaskInput,
|
|
9
|
+
type CompleteTaskResult,
|
|
10
|
+
type CreateTaskResult,
|
|
11
|
+
type InitializeKernelRuntimeOptions,
|
|
12
|
+
type KernelRuntime,
|
|
13
|
+
type TaskSpec,
|
|
14
|
+
type TaskInspection,
|
|
15
|
+
} from '@lumenflow/kernel';
|
|
16
|
+
|
|
17
|
+
export interface TaskLifecycleCommands {
|
|
18
|
+
'task:create': (taskSpec: TaskSpec) => Promise<CreateTaskResult>;
|
|
19
|
+
'task:claim': (input: ClaimTaskInput) => Promise<ClaimTaskResult>;
|
|
20
|
+
'task:status': (taskId: string) => Promise<TaskInspection>;
|
|
21
|
+
'task:complete': (input: CompleteTaskInput) => Promise<CompleteTaskResult>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface InitializedTaskLifecycleCommands {
|
|
25
|
+
runtime: KernelRuntime;
|
|
26
|
+
commands: TaskLifecycleCommands;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function createTaskLifecycleCommands(runtime: KernelRuntime): TaskLifecycleCommands {
|
|
30
|
+
return {
|
|
31
|
+
'task:create': async (taskSpec) => runtime.createTask(taskSpec),
|
|
32
|
+
'task:claim': async (input) => runtime.claimTask(input),
|
|
33
|
+
'task:status': async (taskId) => runtime.inspectTask(taskId),
|
|
34
|
+
'task:complete': async (input) => runtime.completeTask(input),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export async function initializeTaskLifecycleCommands(
|
|
39
|
+
options: InitializeKernelRuntimeOptions,
|
|
40
|
+
): Promise<InitializedTaskLifecycleCommands> {
|
|
41
|
+
const runtime = await initializeKernelRuntime(options);
|
|
42
|
+
return {
|
|
43
|
+
runtime,
|
|
44
|
+
commands: createTaskLifecycleCommands(runtime),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
// Copyright (c) 2026 Hellmai Ltd
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
//
|
|
4
|
+
// WU-2732 (INIT-060 Phase 3, ADR-013 §1 + §6 tracer bullet):
|
|
5
|
+
// End-to-end wire-contract proof for the six agent-runtime
|
|
6
|
+
// remote-control tools. Dispatches POST /tools/:name through the
|
|
7
|
+
// HTTP tool-api router into the real mock tool implementations and
|
|
8
|
+
// asserts:
|
|
9
|
+
// 1. 200 round-trip for every tool on the allowlist.
|
|
10
|
+
// 2. Response JSON carries the mock shape (tool_name, session_id,
|
|
11
|
+
// plus cleanup_status/recovery_action for abort_turn).
|
|
12
|
+
// 3. agent-runtime:tool_called is emitted on each invocation.
|
|
13
|
+
//
|
|
14
|
+
// The test pins the wire contract BEFORE WU-2733 ships the real
|
|
15
|
+
// kernel-side pause/resume/abort logic. WU-2733 replaces the mock
|
|
16
|
+
// entries without changing names, scopes, approvals, or response
|
|
17
|
+
// shapes — this suite runs unchanged against the real implementations.
|
|
18
|
+
|
|
19
|
+
import type { IncomingHttpHeaders, IncomingMessage, ServerResponse } from 'node:http';
|
|
20
|
+
import { EventEmitter } from 'node:events';
|
|
21
|
+
import { PassThrough } from 'node:stream';
|
|
22
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
23
|
+
import { AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS } from '../../../packs/agent-runtime/manifest.js';
|
|
24
|
+
import {
|
|
25
|
+
AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES,
|
|
26
|
+
type AgentRuntimeRemoteControlToolName,
|
|
27
|
+
} from '../../../packs/agent-runtime/remote-controls/types.js';
|
|
28
|
+
import {
|
|
29
|
+
AGENT_RUNTIME_EVENT_KINDS,
|
|
30
|
+
resetAgentRuntimeSeqCounter,
|
|
31
|
+
type AgentRuntimeEvent,
|
|
32
|
+
} from '../../../packs/agent-runtime/turn-lifecycle-events.js';
|
|
33
|
+
import {
|
|
34
|
+
abortTurnMockTool,
|
|
35
|
+
approveInflightMockTool,
|
|
36
|
+
elevateAutonomyMockTool,
|
|
37
|
+
lowerAutonomyMockTool,
|
|
38
|
+
pauseTurnMockTool,
|
|
39
|
+
resumeWorkflowMockTool,
|
|
40
|
+
setMockRemoteControlEventSink,
|
|
41
|
+
} from '../../../packs/agent-runtime/tool-impl/remote-controls.mock.js';
|
|
42
|
+
import { createToolApiRouter } from '../tool-api.js';
|
|
43
|
+
|
|
44
|
+
const HTTP_METHOD = { POST: 'POST' } as const;
|
|
45
|
+
const HTTP_STATUS = { OK: 200 } as const;
|
|
46
|
+
const CONTENT_TYPE_JSON = 'application/json; charset=utf-8';
|
|
47
|
+
|
|
48
|
+
const MOCK_TOOL_DISPATCH: Record<AgentRuntimeRemoteControlToolName, typeof resumeWorkflowMockTool> =
|
|
49
|
+
{
|
|
50
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.RESUME_WORKFLOW]: resumeWorkflowMockTool,
|
|
51
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.PAUSE_TURN]: pauseTurnMockTool,
|
|
52
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.ABORT_TURN]: abortTurnMockTool,
|
|
53
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.ELEVATE_AUTONOMY]: elevateAutonomyMockTool,
|
|
54
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.LOWER_AUTONOMY]: lowerAutonomyMockTool,
|
|
55
|
+
[AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.APPROVE_INFLIGHT]: approveInflightMockTool,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
class MockResponse extends EventEmitter {
|
|
59
|
+
statusCode = HTTP_STATUS.OK;
|
|
60
|
+
body = '';
|
|
61
|
+
private readonly headers = new Map<string, string>();
|
|
62
|
+
|
|
63
|
+
setHeader(name: string, value: string | number | readonly string[]): this {
|
|
64
|
+
this.headers.set(name.toLowerCase(), String(value));
|
|
65
|
+
return this;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
write(chunk: string | Buffer): boolean {
|
|
69
|
+
this.body += Buffer.isBuffer(chunk) ? chunk.toString('utf8') : chunk;
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
end(chunk?: string | Buffer): this {
|
|
74
|
+
if (chunk !== undefined) {
|
|
75
|
+
this.write(chunk);
|
|
76
|
+
}
|
|
77
|
+
this.emit('finish');
|
|
78
|
+
return this;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
interface CreateRequestOptions {
|
|
83
|
+
method: string;
|
|
84
|
+
body?: unknown;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function createRequest(options: CreateRequestOptions): IncomingMessage {
|
|
88
|
+
const request = new PassThrough() as unknown as IncomingMessage & {
|
|
89
|
+
method: string;
|
|
90
|
+
url: string;
|
|
91
|
+
headers: IncomingHttpHeaders;
|
|
92
|
+
};
|
|
93
|
+
request.method = options.method;
|
|
94
|
+
request.url = '/';
|
|
95
|
+
request.headers = {
|
|
96
|
+
'content-type': CONTENT_TYPE_JSON,
|
|
97
|
+
// WU-2779: POST /tools/:name now requires an Authorization header. Use a
|
|
98
|
+
// legacy opaque token — this suite exercises the agent-runtime wire
|
|
99
|
+
// contract, not auth; scope enforcement is covered elsewhere.
|
|
100
|
+
authorization: 'Bearer legacy-opaque-agent-runtime-test-token',
|
|
101
|
+
};
|
|
102
|
+
const payload = options.body === undefined ? '' : JSON.stringify(options.body);
|
|
103
|
+
(request as unknown as PassThrough).end(payload);
|
|
104
|
+
return request;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function createContext() {
|
|
108
|
+
return {
|
|
109
|
+
run_id: 'run-wu-2732',
|
|
110
|
+
task_id: 'WU-2732',
|
|
111
|
+
session_id: 'session-http-e2e',
|
|
112
|
+
allowed_scopes: [{ type: 'path' as const, pattern: '**', access: 'read' as const }],
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
interface RouterRuntime {
|
|
117
|
+
executeTool: (
|
|
118
|
+
name: string,
|
|
119
|
+
input: unknown,
|
|
120
|
+
ctx: ReturnType<typeof createContext>,
|
|
121
|
+
) => Promise<unknown>;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function createRemoteControlMockRuntime(): RouterRuntime {
|
|
125
|
+
return {
|
|
126
|
+
async executeTool(name, input, ctx) {
|
|
127
|
+
const tool = MOCK_TOOL_DISPATCH[name as AgentRuntimeRemoteControlToolName];
|
|
128
|
+
if (!tool) {
|
|
129
|
+
throw new Error(`Test harness: unknown tool "${name}"`);
|
|
130
|
+
}
|
|
131
|
+
return tool(input, ctx as unknown as never);
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function parseJsonBody(body: string): { success: boolean; data: unknown } {
|
|
137
|
+
return JSON.parse(body) as { success: boolean; data: unknown };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
describe('WU-2732 E2E: POST /tools/agent-runtime:* round-trips the wire contract', () => {
|
|
141
|
+
let capturedEvents: AgentRuntimeEvent[];
|
|
142
|
+
|
|
143
|
+
beforeEach(() => {
|
|
144
|
+
capturedEvents = [];
|
|
145
|
+
resetAgentRuntimeSeqCounter();
|
|
146
|
+
setMockRemoteControlEventSink((event) => capturedEvents.push(event));
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
afterEach(() => {
|
|
150
|
+
setMockRemoteControlEventSink(undefined);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it.each(AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS)(
|
|
154
|
+
'POST /tools/%s returns 200 with success=true and the mock-shape payload',
|
|
155
|
+
async (toolName) => {
|
|
156
|
+
const runtime = createRemoteControlMockRuntime();
|
|
157
|
+
const router = createToolApiRouter(runtime as never, {
|
|
158
|
+
allowlistedTools: [...AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS],
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const request = createRequest({
|
|
162
|
+
method: HTTP_METHOD.POST,
|
|
163
|
+
body: {
|
|
164
|
+
input: { session_id: 'session-from-cloud' },
|
|
165
|
+
context: createContext(),
|
|
166
|
+
},
|
|
167
|
+
});
|
|
168
|
+
const response = new MockResponse();
|
|
169
|
+
|
|
170
|
+
await router.handleRequest(request, response as unknown as ServerResponse<IncomingMessage>, [
|
|
171
|
+
toolName,
|
|
172
|
+
]);
|
|
173
|
+
|
|
174
|
+
expect(response.statusCode).toBe(HTTP_STATUS.OK);
|
|
175
|
+
const body = parseJsonBody(response.body);
|
|
176
|
+
expect(body.success).toBe(true);
|
|
177
|
+
|
|
178
|
+
const data = body.data as { tool_name: string; session_id: string; status: string };
|
|
179
|
+
expect(data.tool_name).toBe(toolName);
|
|
180
|
+
expect(data.session_id).toBe('session-from-cloud');
|
|
181
|
+
expect(data.status).toBe('ok');
|
|
182
|
+
},
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
it('POST /tools/agent-runtime:abort_turn returns the ADR-013 §1 abort payload', async () => {
|
|
186
|
+
const runtime = createRemoteControlMockRuntime();
|
|
187
|
+
const router = createToolApiRouter(runtime as never, {
|
|
188
|
+
allowlistedTools: [...AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS],
|
|
189
|
+
});
|
|
190
|
+
const request = createRequest({
|
|
191
|
+
method: HTTP_METHOD.POST,
|
|
192
|
+
body: {
|
|
193
|
+
input: { session_id: 'session-abort-e2e', reason: 'operator-requested' },
|
|
194
|
+
context: createContext(),
|
|
195
|
+
},
|
|
196
|
+
});
|
|
197
|
+
const response = new MockResponse();
|
|
198
|
+
|
|
199
|
+
await router.handleRequest(request, response as unknown as ServerResponse<IncomingMessage>, [
|
|
200
|
+
AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.ABORT_TURN,
|
|
201
|
+
]);
|
|
202
|
+
|
|
203
|
+
expect(response.statusCode).toBe(HTTP_STATUS.OK);
|
|
204
|
+
const body = parseJsonBody(response.body);
|
|
205
|
+
const data = body.data as {
|
|
206
|
+
tool_name: string;
|
|
207
|
+
session_id: string;
|
|
208
|
+
cleanup_status: string;
|
|
209
|
+
recovery_action: string | null;
|
|
210
|
+
};
|
|
211
|
+
expect(data.tool_name).toBe(AGENT_RUNTIME_REMOTE_CONTROL_TOOL_NAMES.ABORT_TURN);
|
|
212
|
+
expect(data.session_id).toBe('session-abort-e2e');
|
|
213
|
+
expect(data.cleanup_status).toBe('clean');
|
|
214
|
+
expect(data.recovery_action).toBeNull();
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
it.each(AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS)(
|
|
218
|
+
'invoking %s over HTTP emits exactly one agent-runtime:tool_called event',
|
|
219
|
+
async (toolName) => {
|
|
220
|
+
const runtime = createRemoteControlMockRuntime();
|
|
221
|
+
const router = createToolApiRouter(runtime as never, {
|
|
222
|
+
allowlistedTools: [...AGENT_RUNTIME_REMOTE_CALLABLE_TOOLS],
|
|
223
|
+
});
|
|
224
|
+
const request = createRequest({
|
|
225
|
+
method: HTTP_METHOD.POST,
|
|
226
|
+
body: {
|
|
227
|
+
input: { session_id: 'session-emit-e2e' },
|
|
228
|
+
context: createContext(),
|
|
229
|
+
},
|
|
230
|
+
});
|
|
231
|
+
const response = new MockResponse();
|
|
232
|
+
|
|
233
|
+
await router.handleRequest(request, response as unknown as ServerResponse<IncomingMessage>, [
|
|
234
|
+
toolName,
|
|
235
|
+
]);
|
|
236
|
+
|
|
237
|
+
const toolCalledEvents = capturedEvents.filter(
|
|
238
|
+
(event) => event.kind === AGENT_RUNTIME_EVENT_KINDS.TOOL_CALLED,
|
|
239
|
+
);
|
|
240
|
+
expect(toolCalledEvents).toHaveLength(1);
|
|
241
|
+
const [event] = toolCalledEvents;
|
|
242
|
+
if (event?.kind !== AGENT_RUNTIME_EVENT_KINDS.TOOL_CALLED) {
|
|
243
|
+
throw new Error('expected a tool_called event');
|
|
244
|
+
}
|
|
245
|
+
expect(event.tool_name).toBe(toolName);
|
|
246
|
+
expect(event.session_id).toBe('session-emit-e2e');
|
|
247
|
+
},
|
|
248
|
+
);
|
|
249
|
+
});
|