vibeman 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.js +88180 -80376
- package/dist/apps/api/resources/templates/task.md +48 -0
- package/dist/commit.txt +1 -1
- package/dist/index.js +2021 -35
- package/dist/prisma/dev.db +0 -0
- package/dist/prisma/schema.prisma +0 -1
- package/dist/prisma.config.ts +8 -0
- package/dist/scripts/init-test-repo.mjs +234 -0
- package/dist/scripts/lib/test-fixtures.mjs +901 -0
- package/dist/scripts/seed-test-fixtures.mjs +281 -0
- package/dist/ui/assets/{index-BbfDsh-D.js → index-5kvaa7VH.js} +1 -1
- package/dist/ui/assets/{index-Bb69tSxN.js → index-B2YlQpV0.js} +1 -1
- package/dist/ui/assets/{index-C-OxxnxE.js → index-BFpKdhc4.js} +1 -1
- package/dist/ui/assets/{index-qDf_Uo6N.js → index-BKcn2ir8.js} +1 -1
- package/dist/ui/assets/{index-CW8VfVIC.js → index-BmCH7Zkp.js} +88 -94
- package/dist/ui/assets/{index-EHnTtRu0.js → index-BnA5v3sz.js} +1 -1
- package/dist/ui/assets/{index-DqEtuI-j.js → index-BtgmEMNX.js} +1 -1
- package/dist/ui/assets/{index-B7PC3UwW.js → index-Buod5MG9.js} +1 -1
- package/dist/ui/assets/{index-Dhrbisdh.js → index-BxFJT2l4.js} +1 -1
- package/dist/ui/assets/{index-BEEMkSga.js → index-CCzed9cx.js} +1 -1
- package/dist/ui/assets/{index-SowqJRTb.js → index-CJ0FVxY4.js} +1 -1
- package/dist/ui/assets/{index-ZYq2otFn.js → index-CaM8gf-6.js} +1 -1
- package/dist/ui/assets/{index-DQTqMEWL.js → index-CfiNAuWd.js} +1 -1
- package/dist/ui/assets/{index-Cb3j0p9y.js → index-Ck0eDlqj.js} +1 -1
- package/dist/ui/assets/{index-F3UrAKk1.js → index-CmzQ8vUy.js} +1 -1
- package/dist/ui/assets/{index-Du9IbowE.js → index-CoX8THvk.js} +1 -1
- package/dist/ui/assets/{index-BZ4ywKS-.js → index-D4lRQ9OU.js} +1 -1
- package/dist/ui/assets/{index-Bu9gi8CT.js → index-DCwTMEKA.js} +1 -1
- package/dist/ui/assets/{index-hJzy_UWf.js → index-D_p2Z3lg.js} +1 -1
- package/dist/ui/assets/{index-DrA24vfU.js → index-DlPVzvxz.js} +1 -1
- package/dist/ui/assets/{index-CwR-eksU.js → index-Q46jjFaN.js} +1 -1
- package/dist/ui/assets/{index-CH876Txt.js → index-XVbgp8h-.js} +1 -1
- package/dist/ui/assets/{index-iMeltPfH.js → index-xr3-NPcF.js} +1 -1
- package/dist/ui/index.html +1 -1
- package/package.json +10 -3
|
@@ -0,0 +1,901 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { dirname, resolve } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
|
|
5
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const repoRoot = resolve(moduleDir, '..', '..');
|
|
7
|
+
|
|
8
|
+
const DEFAULT_CODEX_MODEL = 'gpt-5.3-codex';
|
|
9
|
+
const LOW_COST_GEMINI_MODEL = 'gemini-3-flash-preview';
|
|
10
|
+
const LOW_COST_CLAUDE_MODEL = 'claude-haiku';
|
|
11
|
+
|
|
12
|
+
const EXECUTOR_IDS = {
|
|
13
|
+
CODEX_PING: 'executor_fixture_codex_mini_ping',
|
|
14
|
+
CODEX_FIX: 'executor_fixture_codex_mini_fix',
|
|
15
|
+
CODEX_FORCED_FAIL: 'executor_fixture_codex_mini_forced_fail',
|
|
16
|
+
CLAUDE_PING: 'executor_fixture_claude_haiku_ping',
|
|
17
|
+
GEMINI_PING: 'executor_fixture_gemini_flash_lite_ping',
|
|
18
|
+
SHELL_SUCCESS: 'executor_fixture_shell_success',
|
|
19
|
+
SHELL_LONG_RUNNING: 'executor_fixture_shell_long_running',
|
|
20
|
+
SHELL_FAIL_ALWAYS: 'executor_fixture_shell_fail_always',
|
|
21
|
+
SHELL_FAIL_ONCE: 'executor_fixture_shell_fail_once',
|
|
22
|
+
SHELL_RESTART_GATE: 'executor_fixture_shell_restart_gate',
|
|
23
|
+
SHELL_RANDOM_REVIEW: 'executor_fixture_shell_random_review',
|
|
24
|
+
SHELL_NOISY_LOGS: 'executor_fixture_shell_noisy_logs',
|
|
25
|
+
HUMAN_GATE: 'executor_fixture_human_gate',
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const WORKFLOW_IDS = {
|
|
29
|
+
T00_CONN_CODEX_BASIC: 'workflow_test_t00_conn_codex_basic',
|
|
30
|
+
T01_CONN_CLAUDE_BASIC: 'workflow_test_t01_conn_claude_basic',
|
|
31
|
+
T02_CONN_GEMINI_BASIC: 'workflow_test_t02_conn_gemini_basic',
|
|
32
|
+
T03_CROSS_AGENT_HANDOFF: 'workflow_test_t03_cross_agent_handoff',
|
|
33
|
+
T04_CROSS_AGENT_REVIEW_HANDOFF: 'workflow_test_t04_cross_agent_review_handoff',
|
|
34
|
+
T10_LONG_RUNNING_CANCEL: 'workflow_test_t10_t11_t14_long_running_cancel',
|
|
35
|
+
T12_PAUSE_RESUME_GATE: 'workflow_test_t12_t15_pause_resume_gate',
|
|
36
|
+
T13_RESTART_MARKER: 'workflow_test_t13_restart_marker',
|
|
37
|
+
T17_RETRY_EXHAUSTED: 'workflow_test_t17_retry_exhausted',
|
|
38
|
+
T19_THREAD_REUSE: 'workflow_test_t19_thread_reuse',
|
|
39
|
+
T30_LIFECYCLE_HAPPY_MERGED: 'workflow_test_t30_lifecycle_happy_merged',
|
|
40
|
+
T31_FAILURE_RECOVERY_MERGED: 'workflow_test_t31_failure_recovery_merged',
|
|
41
|
+
T32_REVIEW_RANDOMNESS_MERGED: 'workflow_test_t32_review_randomness_merged',
|
|
42
|
+
T33_OBSERVABILITY_MERGED: 'workflow_test_t33_observability_merged',
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
function makeAgentExecutor({ id, name, provider, model, prompt, tags }) {
|
|
46
|
+
return {
|
|
47
|
+
id,
|
|
48
|
+
name,
|
|
49
|
+
type: 'coding_agent',
|
|
50
|
+
config: {
|
|
51
|
+
provider,
|
|
52
|
+
model,
|
|
53
|
+
permissionMode: 'full-auto',
|
|
54
|
+
systemPrompt:
|
|
55
|
+
'You are running a vibeman workflow fixture. Keep responses short and deterministic.',
|
|
56
|
+
prompt,
|
|
57
|
+
includeTaskFile: false,
|
|
58
|
+
includeRawContextJson: false,
|
|
59
|
+
includeFailureContract: false,
|
|
60
|
+
promptMode: 'full',
|
|
61
|
+
...(provider === 'codex-cli' ? { agentThreadOptions: { modelReasoningEffort: 'low' } } : {}),
|
|
62
|
+
},
|
|
63
|
+
tags: ['fixture', 'test', ...tags],
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function makeShellExecutor({ id, name, commands, tags }) {
|
|
68
|
+
return {
|
|
69
|
+
id,
|
|
70
|
+
name,
|
|
71
|
+
type: 'shell',
|
|
72
|
+
config: { commands },
|
|
73
|
+
tags: ['fixture', 'test', ...tags],
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export const TEST_FIXTURE_EXECUTORS = [
|
|
78
|
+
makeAgentExecutor({
|
|
79
|
+
id: EXECUTOR_IDS.CODEX_PING,
|
|
80
|
+
name: 'Fixture Codex Ping',
|
|
81
|
+
provider: 'codex-cli',
|
|
82
|
+
model: DEFAULT_CODEX_MODEL,
|
|
83
|
+
prompt: 'Output exactly one line: FIXTURE_OK_CODEX',
|
|
84
|
+
tags: ['codex', 'connectivity'],
|
|
85
|
+
}),
|
|
86
|
+
makeAgentExecutor({
|
|
87
|
+
id: EXECUTOR_IDS.CODEX_FIX,
|
|
88
|
+
name: 'Fixture Codex Fix',
|
|
89
|
+
provider: 'codex-cli',
|
|
90
|
+
model: DEFAULT_CODEX_MODEL,
|
|
91
|
+
prompt: 'Output exactly one line: FIXTURE_FIX_APPLIED',
|
|
92
|
+
tags: ['codex'],
|
|
93
|
+
}),
|
|
94
|
+
makeAgentExecutor({
|
|
95
|
+
id: EXECUTOR_IDS.CODEX_FORCED_FAIL,
|
|
96
|
+
name: 'Fixture Codex Forced Fail',
|
|
97
|
+
provider: 'codex-cli',
|
|
98
|
+
model: DEFAULT_CODEX_MODEL,
|
|
99
|
+
prompt:
|
|
100
|
+
'Respond exactly with this JSON and nothing else: {"vibeman_task_result": {"success": false, "reason": "Intentional failure for retry exhaustion fixture"}}',
|
|
101
|
+
tags: ['codex', 'failure'],
|
|
102
|
+
}),
|
|
103
|
+
makeAgentExecutor({
|
|
104
|
+
id: EXECUTOR_IDS.CLAUDE_PING,
|
|
105
|
+
name: 'Fixture Claude Haiku Ping',
|
|
106
|
+
provider: 'claude-code',
|
|
107
|
+
model: LOW_COST_CLAUDE_MODEL,
|
|
108
|
+
prompt: 'Output exactly one line: FIXTURE_OK_CLAUDE',
|
|
109
|
+
tags: ['claude', 'connectivity'],
|
|
110
|
+
}),
|
|
111
|
+
makeAgentExecutor({
|
|
112
|
+
id: EXECUTOR_IDS.GEMINI_PING,
|
|
113
|
+
name: 'Fixture Gemini Flash-Lite Ping',
|
|
114
|
+
provider: 'gemini-cli',
|
|
115
|
+
model: LOW_COST_GEMINI_MODEL,
|
|
116
|
+
prompt: 'Output exactly one line: FIXTURE_OK_GEMINI',
|
|
117
|
+
tags: ['gemini', 'connectivity'],
|
|
118
|
+
}),
|
|
119
|
+
makeShellExecutor({
|
|
120
|
+
id: EXECUTOR_IDS.SHELL_SUCCESS,
|
|
121
|
+
name: 'Fixture Shell Success',
|
|
122
|
+
commands: [
|
|
123
|
+
'echo "fixture shell success"',
|
|
124
|
+
'test -n "$VIBEMAN_TASK_PATH"',
|
|
125
|
+
'test -f "$VIBEMAN_TASK_PATH"',
|
|
126
|
+
'echo "task file verified: $VIBEMAN_TASK_PATH"',
|
|
127
|
+
],
|
|
128
|
+
tags: ['shell', 'success'],
|
|
129
|
+
}),
|
|
130
|
+
makeShellExecutor({
|
|
131
|
+
id: EXECUTOR_IDS.SHELL_LONG_RUNNING,
|
|
132
|
+
name: 'Fixture Shell Long Running',
|
|
133
|
+
commands: ['node -e "setTimeout(() => console.log(\'fixture long-running done\'), 30000)"'],
|
|
134
|
+
tags: ['shell', 'cancel'],
|
|
135
|
+
}),
|
|
136
|
+
makeShellExecutor({
|
|
137
|
+
id: EXECUTOR_IDS.SHELL_FAIL_ALWAYS,
|
|
138
|
+
name: 'Fixture Shell Fail Always',
|
|
139
|
+
commands: ['node -e "console.error(\'fixture forced failure\'); process.exit(1)"'],
|
|
140
|
+
tags: ['shell', 'failure'],
|
|
141
|
+
}),
|
|
142
|
+
makeShellExecutor({
|
|
143
|
+
id: EXECUTOR_IDS.SHELL_FAIL_ONCE,
|
|
144
|
+
name: 'Fixture Shell Fail Once',
|
|
145
|
+
commands: [
|
|
146
|
+
"node -e \"const fs=require('node:fs'); const d='.vibeman/.fixtures'; const p=d+'/retry-once.marker'; fs.mkdirSync(d,{recursive:true}); if (fs.existsSync(p)) { console.log('retry-once-pass'); process.exit(0); } fs.writeFileSync(p,'1'); console.error('retry-once-fail'); process.exit(1);\"",
|
|
147
|
+
],
|
|
148
|
+
tags: ['shell', 'retry'],
|
|
149
|
+
}),
|
|
150
|
+
makeShellExecutor({
|
|
151
|
+
id: EXECUTOR_IDS.SHELL_RESTART_GATE,
|
|
152
|
+
name: 'Fixture Shell Restart Gate',
|
|
153
|
+
commands: [
|
|
154
|
+
"node -e \"const fs=require('node:fs'); const d='.vibeman/.fixtures'; const p=d+'/restart-gate.marker'; fs.mkdirSync(d,{recursive:true}); if (fs.existsSync(p)) { console.log('restart-gate-pass'); process.exit(0); } fs.writeFileSync(p,'1'); console.error('restart-gate-first-run-fail'); process.exit(1);\"",
|
|
155
|
+
],
|
|
156
|
+
tags: ['shell', 'restart'],
|
|
157
|
+
}),
|
|
158
|
+
makeShellExecutor({
|
|
159
|
+
id: EXECUTOR_IDS.SHELL_RANDOM_REVIEW,
|
|
160
|
+
name: 'Fixture Shell Random Review',
|
|
161
|
+
commands: [
|
|
162
|
+
"node -e \"const ok=Math.random()>=0.5; if (ok) { console.log('review-approve'); process.exit(0); } console.error('review-reject'); process.exit(1);\"",
|
|
163
|
+
],
|
|
164
|
+
tags: ['shell', 'review', 'random'],
|
|
165
|
+
}),
|
|
166
|
+
makeShellExecutor({
|
|
167
|
+
id: EXECUTOR_IDS.SHELL_NOISY_LOGS,
|
|
168
|
+
name: 'Fixture Shell Noisy Logs',
|
|
169
|
+
commands: [
|
|
170
|
+
"node -e \"console.log('stdout-line-1'); console.error('stderr-line-1'); console.log('stdout-line-2'); console.error('stderr-line-2')\"",
|
|
171
|
+
],
|
|
172
|
+
tags: ['shell', 'logs'],
|
|
173
|
+
}),
|
|
174
|
+
{
|
|
175
|
+
id: EXECUTOR_IDS.HUMAN_GATE,
|
|
176
|
+
name: 'Fixture Human Gate',
|
|
177
|
+
type: 'human_input',
|
|
178
|
+
config: {
|
|
179
|
+
message: 'Fixture human checkpoint: provide approval to continue.',
|
|
180
|
+
},
|
|
181
|
+
tags: ['fixture', 'test', 'human'],
|
|
182
|
+
},
|
|
183
|
+
];
|
|
184
|
+
|
|
185
|
+
function node(
|
|
186
|
+
nodeKey,
|
|
187
|
+
executorId,
|
|
188
|
+
{
|
|
189
|
+
next = null,
|
|
190
|
+
onFailure = null,
|
|
191
|
+
maxRetry = 0,
|
|
192
|
+
timeoutMs = 120000,
|
|
193
|
+
nodeConfig = {},
|
|
194
|
+
routes = undefined,
|
|
195
|
+
} = {},
|
|
196
|
+
) {
|
|
197
|
+
return {
|
|
198
|
+
nodeKey,
|
|
199
|
+
executorId,
|
|
200
|
+
nodeConfig,
|
|
201
|
+
transitionMap: {
|
|
202
|
+
next,
|
|
203
|
+
onFailure,
|
|
204
|
+
...(routes ? { routes } : {}),
|
|
205
|
+
},
|
|
206
|
+
maxRetry,
|
|
207
|
+
timeoutMs,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function workflow({ id, name, description, startNodeKey, nodes }) {
|
|
212
|
+
return {
|
|
213
|
+
id,
|
|
214
|
+
name,
|
|
215
|
+
type: 'test',
|
|
216
|
+
tag: 'test-fixture',
|
|
217
|
+
description,
|
|
218
|
+
startNodeKey,
|
|
219
|
+
nodes,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export const TEST_FIXTURE_WORKFLOWS = [
|
|
224
|
+
workflow({
|
|
225
|
+
id: WORKFLOW_IDS.T00_CONN_CODEX_BASIC,
|
|
226
|
+
name: 'T00 Connectivity: Codex Basic',
|
|
227
|
+
description: 'Basic codex-cli connectivity using low-cost codex mini model.',
|
|
228
|
+
startNodeKey: 'connect_codex',
|
|
229
|
+
nodes: [node('connect_codex', EXECUTOR_IDS.CODEX_PING)],
|
|
230
|
+
}),
|
|
231
|
+
workflow({
|
|
232
|
+
id: WORKFLOW_IDS.T01_CONN_CLAUDE_BASIC,
|
|
233
|
+
name: 'T01 Connectivity: Claude Basic',
|
|
234
|
+
description: 'Basic claude-code connectivity using low-cost haiku model.',
|
|
235
|
+
startNodeKey: 'connect_claude',
|
|
236
|
+
nodes: [node('connect_claude', EXECUTOR_IDS.CLAUDE_PING)],
|
|
237
|
+
}),
|
|
238
|
+
workflow({
|
|
239
|
+
id: WORKFLOW_IDS.T02_CONN_GEMINI_BASIC,
|
|
240
|
+
name: 'T02 Connectivity: Gemini Basic',
|
|
241
|
+
description: 'Basic gemini-cli connectivity using low-cost flash-lite model.',
|
|
242
|
+
startNodeKey: 'connect_gemini',
|
|
243
|
+
nodes: [node('connect_gemini', EXECUTOR_IDS.GEMINI_PING)],
|
|
244
|
+
}),
|
|
245
|
+
workflow({
|
|
246
|
+
id: WORKFLOW_IDS.T03_CROSS_AGENT_HANDOFF,
|
|
247
|
+
name: 'T03 Cross Agent Handoff',
|
|
248
|
+
description: 'Codex step followed by Claude step to validate cross-provider handoff.',
|
|
249
|
+
startNodeKey: 'codex_step',
|
|
250
|
+
nodes: [
|
|
251
|
+
node('codex_step', EXECUTOR_IDS.CODEX_PING, { next: 'claude_step' }),
|
|
252
|
+
node('claude_step', EXECUTOR_IDS.CLAUDE_PING),
|
|
253
|
+
],
|
|
254
|
+
}),
|
|
255
|
+
workflow({
|
|
256
|
+
id: WORKFLOW_IDS.T04_CROSS_AGENT_REVIEW_HANDOFF,
|
|
257
|
+
name: 'T04 Cross Agent Review Handoff',
|
|
258
|
+
description:
|
|
259
|
+
'Codex step followed by Gemini review step to validate handoff and provider change.',
|
|
260
|
+
startNodeKey: 'codex_step',
|
|
261
|
+
nodes: [
|
|
262
|
+
node('codex_step', EXECUTOR_IDS.CODEX_PING, { next: 'gemini_review' }),
|
|
263
|
+
node('gemini_review', EXECUTOR_IDS.GEMINI_PING),
|
|
264
|
+
],
|
|
265
|
+
}),
|
|
266
|
+
workflow({
|
|
267
|
+
id: WORKFLOW_IDS.T10_LONG_RUNNING_CANCEL,
|
|
268
|
+
name: 'T10/T11/T14 Long Running Cancel Fixture',
|
|
269
|
+
description: 'Long-running shell node for run/node cancel behavior validation.',
|
|
270
|
+
startNodeKey: 'long_running',
|
|
271
|
+
nodes: [node('long_running', EXECUTOR_IDS.SHELL_LONG_RUNNING, { timeoutMs: 90000 })],
|
|
272
|
+
}),
|
|
273
|
+
workflow({
|
|
274
|
+
id: WORKFLOW_IDS.T12_PAUSE_RESUME_GATE,
|
|
275
|
+
name: 'T12/T15 Pause Resume Gate',
|
|
276
|
+
description: 'Pause on human input then continue to completion.',
|
|
277
|
+
startNodeKey: 'prepare',
|
|
278
|
+
nodes: [
|
|
279
|
+
node('prepare', EXECUTOR_IDS.CODEX_PING, { next: 'await_input' }),
|
|
280
|
+
node('await_input', EXECUTOR_IDS.HUMAN_GATE, {
|
|
281
|
+
next: 'finalize',
|
|
282
|
+
onFailure: 'finalize',
|
|
283
|
+
}),
|
|
284
|
+
node('finalize', EXECUTOR_IDS.SHELL_SUCCESS),
|
|
285
|
+
],
|
|
286
|
+
}),
|
|
287
|
+
workflow({
|
|
288
|
+
id: WORKFLOW_IDS.T13_RESTART_MARKER,
|
|
289
|
+
name: 'T13 Restart Marker',
|
|
290
|
+
description: 'First run fails, restart succeeds due marker file.',
|
|
291
|
+
startNodeKey: 'restart_gate',
|
|
292
|
+
nodes: [node('restart_gate', EXECUTOR_IDS.SHELL_RESTART_GATE)],
|
|
293
|
+
}),
|
|
294
|
+
workflow({
|
|
295
|
+
id: WORKFLOW_IDS.T17_RETRY_EXHAUSTED,
|
|
296
|
+
name: 'T17 Retry Exhausted',
|
|
297
|
+
description: 'Codex node intentionally reports failure to test retry exhaustion behavior.',
|
|
298
|
+
startNodeKey: 'failing_codex',
|
|
299
|
+
nodes: [
|
|
300
|
+
node('failing_codex', EXECUTOR_IDS.CODEX_FORCED_FAIL, {
|
|
301
|
+
next: null,
|
|
302
|
+
onFailure: 'fallback_if_retry_allowed',
|
|
303
|
+
maxRetry: 1,
|
|
304
|
+
}),
|
|
305
|
+
node('fallback_if_retry_allowed', EXECUTOR_IDS.CODEX_PING),
|
|
306
|
+
],
|
|
307
|
+
}),
|
|
308
|
+
workflow({
|
|
309
|
+
id: WORKFLOW_IDS.T19_THREAD_REUSE,
|
|
310
|
+
name: 'T19 Thread Reuse',
|
|
311
|
+
description: 'Codex -> human pause -> codex to validate resume and thread reuse behavior.',
|
|
312
|
+
startNodeKey: 'codex_first',
|
|
313
|
+
nodes: [
|
|
314
|
+
node('codex_first', EXECUTOR_IDS.CODEX_PING, { next: 'await_input' }),
|
|
315
|
+
node('await_input', EXECUTOR_IDS.HUMAN_GATE, {
|
|
316
|
+
next: 'codex_second',
|
|
317
|
+
onFailure: 'codex_second',
|
|
318
|
+
}),
|
|
319
|
+
node('codex_second', EXECUTOR_IDS.CODEX_FIX),
|
|
320
|
+
],
|
|
321
|
+
}),
|
|
322
|
+
workflow({
|
|
323
|
+
id: WORKFLOW_IDS.T30_LIFECYCLE_HAPPY_MERGED,
|
|
324
|
+
name: 'T30 Lifecycle Happy Merged',
|
|
325
|
+
description: 'Merged happy-path fixture for pause/resume, retry-once, and final completion.',
|
|
326
|
+
startNodeKey: 'kickoff',
|
|
327
|
+
nodes: [
|
|
328
|
+
node('kickoff', EXECUTOR_IDS.CODEX_PING, { next: 'await_input' }),
|
|
329
|
+
node('await_input', EXECUTOR_IDS.HUMAN_GATE, {
|
|
330
|
+
next: 'retry_once',
|
|
331
|
+
onFailure: 'retry_once',
|
|
332
|
+
}),
|
|
333
|
+
node('retry_once', EXECUTOR_IDS.SHELL_FAIL_ONCE, {
|
|
334
|
+
next: 'emit_logs',
|
|
335
|
+
onFailure: 'retry_once',
|
|
336
|
+
maxRetry: 2,
|
|
337
|
+
}),
|
|
338
|
+
node('emit_logs', EXECUTOR_IDS.SHELL_NOISY_LOGS, { next: 'done' }),
|
|
339
|
+
node('done', EXECUTOR_IDS.SHELL_SUCCESS),
|
|
340
|
+
],
|
|
341
|
+
}),
|
|
342
|
+
workflow({
|
|
343
|
+
id: WORKFLOW_IDS.T31_FAILURE_RECOVERY_MERGED,
|
|
344
|
+
name: 'T31 Failure Recovery Merged',
|
|
345
|
+
description:
|
|
346
|
+
'Merged failure-recovery fixture for transient failure, fallback, and codex finalize.',
|
|
347
|
+
startNodeKey: 'transient_retry',
|
|
348
|
+
nodes: [
|
|
349
|
+
node('transient_retry', EXECUTOR_IDS.SHELL_FAIL_ONCE, {
|
|
350
|
+
next: 'fail_to_fallback',
|
|
351
|
+
onFailure: 'transient_retry',
|
|
352
|
+
maxRetry: 2,
|
|
353
|
+
}),
|
|
354
|
+
node('fail_to_fallback', EXECUTOR_IDS.SHELL_FAIL_ALWAYS, {
|
|
355
|
+
next: 'done',
|
|
356
|
+
onFailure: 'fallback_success',
|
|
357
|
+
maxRetry: 2,
|
|
358
|
+
}),
|
|
359
|
+
node('fallback_success', EXECUTOR_IDS.SHELL_SUCCESS, { next: 'codex_finalize' }),
|
|
360
|
+
node('codex_finalize', EXECUTOR_IDS.CODEX_FIX),
|
|
361
|
+
node('done', EXECUTOR_IDS.SHELL_SUCCESS),
|
|
362
|
+
],
|
|
363
|
+
}),
|
|
364
|
+
workflow({
|
|
365
|
+
id: WORKFLOW_IDS.T32_REVIEW_RANDOMNESS_MERGED,
|
|
366
|
+
name: 'T32 Review Randomness Merged',
|
|
367
|
+
description: 'Random review outcomes with codex fix loop to validate branch behavior.',
|
|
368
|
+
startNodeKey: 'implement',
|
|
369
|
+
nodes: [
|
|
370
|
+
node('implement', EXECUTOR_IDS.CODEX_PING, { next: 'review_random' }),
|
|
371
|
+
node('review_random', EXECUTOR_IDS.SHELL_RANDOM_REVIEW, {
|
|
372
|
+
next: 'finalize',
|
|
373
|
+
onFailure: 'fix',
|
|
374
|
+
maxRetry: 2,
|
|
375
|
+
}),
|
|
376
|
+
node('fix', EXECUTOR_IDS.CODEX_FIX, { next: 'review_random' }),
|
|
377
|
+
node('finalize', EXECUTOR_IDS.SHELL_SUCCESS),
|
|
378
|
+
],
|
|
379
|
+
}),
|
|
380
|
+
workflow({
|
|
381
|
+
id: WORKFLOW_IDS.T33_OBSERVABILITY_MERGED,
|
|
382
|
+
name: 'T33 Observability Merged',
|
|
383
|
+
description: 'Codex + noisy shell logs for usage/log validation in one run.',
|
|
384
|
+
startNodeKey: 'codex_start',
|
|
385
|
+
nodes: [
|
|
386
|
+
node('codex_start', EXECUTOR_IDS.CODEX_PING, { next: 'noisy_logs' }),
|
|
387
|
+
node('noisy_logs', EXECUTOR_IDS.SHELL_NOISY_LOGS, { next: 'done' }),
|
|
388
|
+
node('done', EXECUTOR_IDS.SHELL_SUCCESS),
|
|
389
|
+
],
|
|
390
|
+
}),
|
|
391
|
+
];
|
|
392
|
+
|
|
393
|
+
function makeTask({
|
|
394
|
+
id,
|
|
395
|
+
title,
|
|
396
|
+
workflowId,
|
|
397
|
+
tags,
|
|
398
|
+
goal,
|
|
399
|
+
context,
|
|
400
|
+
requirements,
|
|
401
|
+
notes,
|
|
402
|
+
steps,
|
|
403
|
+
acceptance,
|
|
404
|
+
}) {
|
|
405
|
+
return {
|
|
406
|
+
id,
|
|
407
|
+
title,
|
|
408
|
+
workflowId,
|
|
409
|
+
type: 'test',
|
|
410
|
+
status: 'backlog',
|
|
411
|
+
priority: 'low',
|
|
412
|
+
tags,
|
|
413
|
+
goal,
|
|
414
|
+
context,
|
|
415
|
+
requirements,
|
|
416
|
+
notes,
|
|
417
|
+
steps,
|
|
418
|
+
acceptance,
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
export const TEST_FIXTURE_TASKS = [
|
|
423
|
+
makeTask({
|
|
424
|
+
id: 'test-t00-conn-codex-basic',
|
|
425
|
+
title: 'T00 Connectivity Codex Basic',
|
|
426
|
+
workflowId: WORKFLOW_IDS.T00_CONN_CODEX_BASIC,
|
|
427
|
+
tags: ['fixture', 'workflow', 'codex', 'connectivity'],
|
|
428
|
+
goal: 'Validate codex-cli can run a minimal workflow step with default codex model settings.',
|
|
429
|
+
context: 'Use this as the first sanity check before any other fixture execution.',
|
|
430
|
+
requirements: [
|
|
431
|
+
'Start the workflow successfully.',
|
|
432
|
+
'Node output indicates codex connectivity passed.',
|
|
433
|
+
],
|
|
434
|
+
notes: [
|
|
435
|
+
'No repository edits are required.',
|
|
436
|
+
'Expected to finish quickly and deterministically.',
|
|
437
|
+
],
|
|
438
|
+
steps: [
|
|
439
|
+
'Start workflow `workflow_test_t00_conn_codex_basic` for this task.',
|
|
440
|
+
'Confirm run status becomes `completed`.',
|
|
441
|
+
],
|
|
442
|
+
acceptance: ['Run reaches `completed`.', 'Terminal logs contain `FIXTURE_OK_CODEX`.'],
|
|
443
|
+
}),
|
|
444
|
+
makeTask({
|
|
445
|
+
id: 'test-t01-conn-claude-basic',
|
|
446
|
+
title: 'T01 Connectivity Claude Basic',
|
|
447
|
+
workflowId: WORKFLOW_IDS.T01_CONN_CLAUDE_BASIC,
|
|
448
|
+
tags: ['fixture', 'workflow', 'claude', 'connectivity'],
|
|
449
|
+
goal: 'Validate claude-code can run a minimal workflow step.',
|
|
450
|
+
context: 'Nightly connectivity coverage for non-codex provider.',
|
|
451
|
+
requirements: [
|
|
452
|
+
'Start the workflow successfully.',
|
|
453
|
+
'Node output indicates claude connectivity passed.',
|
|
454
|
+
],
|
|
455
|
+
notes: ['Keep this as a minimal one-step smoke check.', 'Use low-cost Claude model.'],
|
|
456
|
+
steps: [
|
|
457
|
+
'Start workflow `workflow_test_t01_conn_claude_basic`.',
|
|
458
|
+
'Confirm run completes and output is present.',
|
|
459
|
+
],
|
|
460
|
+
acceptance: ['Run reaches `completed`.', 'Terminal logs contain `FIXTURE_OK_CLAUDE`.'],
|
|
461
|
+
}),
|
|
462
|
+
makeTask({
|
|
463
|
+
id: 'test-t02-conn-gemini-basic',
|
|
464
|
+
title: 'T02 Connectivity Gemini Basic',
|
|
465
|
+
workflowId: WORKFLOW_IDS.T02_CONN_GEMINI_BASIC,
|
|
466
|
+
tags: ['fixture', 'workflow', 'gemini', 'connectivity'],
|
|
467
|
+
goal: 'Validate gemini-cli can run a minimal workflow step.',
|
|
468
|
+
context: 'Nightly connectivity coverage for non-codex provider.',
|
|
469
|
+
requirements: [
|
|
470
|
+
'Start the workflow successfully.',
|
|
471
|
+
'Node output indicates gemini connectivity passed.',
|
|
472
|
+
],
|
|
473
|
+
notes: ['Keep this as a minimal one-step smoke check.', 'Use low-cost Gemini model.'],
|
|
474
|
+
steps: [
|
|
475
|
+
'Start workflow `workflow_test_t02_conn_gemini_basic`.',
|
|
476
|
+
'Confirm run completes and output is present.',
|
|
477
|
+
],
|
|
478
|
+
acceptance: ['Run reaches `completed`.', 'Terminal logs contain `FIXTURE_OK_GEMINI`.'],
|
|
479
|
+
}),
|
|
480
|
+
makeTask({
|
|
481
|
+
id: 'test-t03-cross-agent-handoff',
|
|
482
|
+
title: 'T03 Cross Agent Handoff',
|
|
483
|
+
workflowId: WORKFLOW_IDS.T03_CROSS_AGENT_HANDOFF,
|
|
484
|
+
tags: ['fixture', 'workflow', 'cross-agent'],
|
|
485
|
+
goal: 'Validate codex-to-claude handoff works in one workflow.',
|
|
486
|
+
context: 'Covers provider switch in sequential nodes.',
|
|
487
|
+
requirements: ['Both nodes execute in order.', 'Run completes without manual intervention.'],
|
|
488
|
+
notes: ['No code edits required.', 'Focus on provider transition stability.'],
|
|
489
|
+
steps: [
|
|
490
|
+
'Start workflow `workflow_test_t03_cross_agent_handoff`.',
|
|
491
|
+
'Verify both node logs are present.',
|
|
492
|
+
],
|
|
493
|
+
acceptance: ['Run reaches `completed`.', 'Logs include both codex and claude outputs.'],
|
|
494
|
+
}),
|
|
495
|
+
makeTask({
|
|
496
|
+
id: 'test-t04-cross-agent-review-handoff',
|
|
497
|
+
title: 'T04 Cross Agent Review Handoff',
|
|
498
|
+
workflowId: WORKFLOW_IDS.T04_CROSS_AGENT_REVIEW_HANDOFF,
|
|
499
|
+
tags: ['fixture', 'workflow', 'cross-agent', 'review'],
|
|
500
|
+
goal: 'Validate codex-to-gemini handoff in a review-like sequence.',
|
|
501
|
+
context: 'Covers second cross-agent path used for review routing checks.',
|
|
502
|
+
requirements: ['Codex node runs first.', 'Gemini node runs after codex node.'],
|
|
503
|
+
notes: ['Keep output short and deterministic.', 'This is a handoff verification fixture.'],
|
|
504
|
+
steps: [
|
|
505
|
+
'Start workflow `workflow_test_t04_cross_agent_review_handoff`.',
|
|
506
|
+
'Confirm node execution order in timeline.',
|
|
507
|
+
],
|
|
508
|
+
acceptance: ['Run reaches `completed`.', 'Timeline shows codex then gemini.'],
|
|
509
|
+
}),
|
|
510
|
+
makeTask({
|
|
511
|
+
id: 'test-t10-run-cancel-pending',
|
|
512
|
+
title: 'T10 Run Cancel Pending',
|
|
513
|
+
workflowId: WORKFLOW_IDS.T10_LONG_RUNNING_CANCEL,
|
|
514
|
+
tags: ['fixture', 'workflow', 'cancel', 'run'],
|
|
515
|
+
goal: 'Validate run-level cancel works when cancel is triggered immediately after start.',
|
|
516
|
+
context: 'Use long-running shell node so cancel window is easy to hit.',
|
|
517
|
+
requirements: [
|
|
518
|
+
'Run starts and enters active state.',
|
|
519
|
+
'Cancel action moves run to terminal cancelled state.',
|
|
520
|
+
],
|
|
521
|
+
notes: [
|
|
522
|
+
'Trigger cancel quickly after start.',
|
|
523
|
+
'Same workflow is reused for running/node cancel tests.',
|
|
524
|
+
],
|
|
525
|
+
steps: [
|
|
526
|
+
'Start workflow `workflow_test_t10_t11_t14_long_running_cancel`.',
|
|
527
|
+
'Immediately click run cancel.',
|
|
528
|
+
],
|
|
529
|
+
acceptance: ['Run status becomes `cancelled`.', 'No extra downstream nodes run.'],
|
|
530
|
+
}),
|
|
531
|
+
makeTask({
|
|
532
|
+
id: 'test-t11-run-cancel-running',
|
|
533
|
+
title: 'T11 Run Cancel Running',
|
|
534
|
+
workflowId: WORKFLOW_IDS.T10_LONG_RUNNING_CANCEL,
|
|
535
|
+
tags: ['fixture', 'workflow', 'cancel', 'run'],
|
|
536
|
+
goal: 'Validate run-level cancel during active execution.',
|
|
537
|
+
context: 'Cancel after node is clearly in running state.',
|
|
538
|
+
requirements: ['Node starts running.', 'Cancel stops run and marks it terminal.'],
|
|
539
|
+
notes: ['Wait until running state is visible.', 'Cancel should stop the active node.'],
|
|
540
|
+
steps: [
|
|
541
|
+
'Start workflow `workflow_test_t10_t11_t14_long_running_cancel`.',
|
|
542
|
+
'Cancel when node state is `running`.',
|
|
543
|
+
],
|
|
544
|
+
acceptance: [
|
|
545
|
+
'Run status becomes `cancelled`.',
|
|
546
|
+
'Node does not continue to natural completion.',
|
|
547
|
+
],
|
|
548
|
+
}),
|
|
549
|
+
makeTask({
|
|
550
|
+
id: 'test-t12-run-resume-paused',
|
|
551
|
+
title: 'T12 Run Resume Paused',
|
|
552
|
+
workflowId: WORKFLOW_IDS.T12_PAUSE_RESUME_GATE,
|
|
553
|
+
tags: ['fixture', 'workflow', 'resume', 'run'],
|
|
554
|
+
goal: 'Validate run-level resume from paused human gate.',
|
|
555
|
+
context: 'Workflow intentionally pauses at `await_input` node.',
|
|
556
|
+
requirements: ['Run pauses waiting for input.', 'Resume continues and completes workflow.'],
|
|
557
|
+
notes: ['Provide a positive approval payload.', 'Observe paused -> running transition.'],
|
|
558
|
+
steps: [
|
|
559
|
+
'Start workflow `workflow_test_t12_t15_pause_resume_gate`.',
|
|
560
|
+
'Submit human input and resume run.',
|
|
561
|
+
],
|
|
562
|
+
acceptance: [
|
|
563
|
+
'Run transitions from `paused` to `completed`.',
|
|
564
|
+
'Final shell node executes successfully.',
|
|
565
|
+
],
|
|
566
|
+
}),
|
|
567
|
+
makeTask({
|
|
568
|
+
id: 'test-t13-run-restart-from-initial',
|
|
569
|
+
title: 'T13 Run Restart From Initial',
|
|
570
|
+
workflowId: WORKFLOW_IDS.T13_RESTART_MARKER,
|
|
571
|
+
tags: ['fixture', 'workflow', 'restart', 'run'],
|
|
572
|
+
goal: 'Validate run-level restart from initial payload/state.',
|
|
573
|
+
context: 'First run fails by design; restart should pass because marker file now exists.',
|
|
574
|
+
requirements: ['First run ends in failure.', 'Restarted run reaches completion.'],
|
|
575
|
+
notes: [
|
|
576
|
+
'Do not delete marker between first run and restart.',
|
|
577
|
+
'Use restart action on the same run context.',
|
|
578
|
+
],
|
|
579
|
+
steps: [
|
|
580
|
+
'Start workflow `workflow_test_t13_restart_marker` and wait for fail.',
|
|
581
|
+
'Use run restart action and observe second run.',
|
|
582
|
+
],
|
|
583
|
+
acceptance: ['First run is `failed`.', 'Restarted run is `completed`.'],
|
|
584
|
+
}),
|
|
585
|
+
makeTask({
|
|
586
|
+
id: 'test-t14-node-cancel-running',
|
|
587
|
+
title: 'T14 Node Cancel Running',
|
|
588
|
+
workflowId: WORKFLOW_IDS.T10_LONG_RUNNING_CANCEL,
|
|
589
|
+
tags: ['fixture', 'workflow', 'cancel', 'node'],
|
|
590
|
+
goal: 'Validate node-level cancel while current node is running.',
|
|
591
|
+
context: 'Long-running node provides enough time for node cancel action.',
|
|
592
|
+
requirements: ['Node enters running state.', 'Node cancel action interrupts execution.'],
|
|
593
|
+
notes: [
|
|
594
|
+
'Use node-level action instead of run-level action.',
|
|
595
|
+
'Observe node status transition in timeline.',
|
|
596
|
+
],
|
|
597
|
+
steps: [
|
|
598
|
+
'Start workflow `workflow_test_t10_t11_t14_long_running_cancel`.',
|
|
599
|
+
'Click cancel on the active node execution.',
|
|
600
|
+
],
|
|
601
|
+
acceptance: [
|
|
602
|
+
'Node is terminally cancelled/failed-as-cancelled per UI behavior.',
|
|
603
|
+
'Run reflects interrupted execution state.',
|
|
604
|
+
],
|
|
605
|
+
}),
|
|
606
|
+
makeTask({
|
|
607
|
+
id: 'test-t15-node-resume-paused',
|
|
608
|
+
title: 'T15 Node Resume Paused',
|
|
609
|
+
workflowId: WORKFLOW_IDS.T12_PAUSE_RESUME_GATE,
|
|
610
|
+
tags: ['fixture', 'workflow', 'resume', 'node'],
|
|
611
|
+
goal: 'Validate node-level resume from paused human input node.',
|
|
612
|
+
context: 'Pause is deterministic at `await_input` node.',
|
|
613
|
+
requirements: ['Node pauses awaiting response.', 'Node resume continues to final node.'],
|
|
614
|
+
notes: ['Provide decision payload in human input.', 'Verify node-level state transitions.'],
|
|
615
|
+
steps: [
|
|
616
|
+
'Start workflow `workflow_test_t12_t15_pause_resume_gate`.',
|
|
617
|
+
'Resume paused `await_input` node with input payload.',
|
|
618
|
+
],
|
|
619
|
+
acceptance: ['Paused node resumes and completes.', 'Workflow run reaches `completed`.'],
|
|
620
|
+
}),
|
|
621
|
+
makeTask({
|
|
622
|
+
id: 'test-t16-node-restart',
|
|
623
|
+
title: 'T16 Node Restart',
|
|
624
|
+
workflowId: WORKFLOW_IDS.T30_LIFECYCLE_HAPPY_MERGED,
|
|
625
|
+
tags: ['fixture', 'workflow', 'restart', 'node'],
|
|
626
|
+
goal: 'Validate node-level restart in a multi-node workflow.',
|
|
627
|
+
context: 'Use merged lifecycle workflow to inspect downstream reset behavior.',
|
|
628
|
+
requirements: [
|
|
629
|
+
'Restart from a selected node is accepted.',
|
|
630
|
+
'Downstream nodes re-run after restart.',
|
|
631
|
+
],
|
|
632
|
+
notes: [
|
|
633
|
+
'Prefer restarting from `retry_once` or `emit_logs` node.',
|
|
634
|
+
'Watch timeline reset scope carefully.',
|
|
635
|
+
],
|
|
636
|
+
steps: [
|
|
637
|
+
'Run `workflow_test_t30_lifecycle_happy_merged` once to completion.',
|
|
638
|
+
'Trigger node restart and observe second execution path.',
|
|
639
|
+
],
|
|
640
|
+
acceptance: [
|
|
641
|
+
'Selected node and downstream nodes execute again.',
|
|
642
|
+
'Workflow reaches `completed` after restart.',
|
|
643
|
+
],
|
|
644
|
+
}),
|
|
645
|
+
makeTask({
|
|
646
|
+
id: 'test-t17-node-retry-exhausted',
|
|
647
|
+
title: 'T17 Node Retry Exhausted',
|
|
648
|
+
workflowId: WORKFLOW_IDS.T17_RETRY_EXHAUSTED,
|
|
649
|
+
tags: ['fixture', 'workflow', 'retry', 'node'],
|
|
650
|
+
goal: 'Validate failure behavior when maxRetry cap is reached.',
|
|
651
|
+
context: 'Codex node intentionally reports failure on every attempt.',
|
|
652
|
+
requirements: ['Node fails on first attempt.', 'Run fails with retry exhaustion message.'],
|
|
653
|
+
notes: ['Max retry is intentionally low.', 'Fallback node should not run once cap is reached.'],
|
|
654
|
+
steps: [
|
|
655
|
+
'Start workflow `workflow_test_t17_retry_exhausted`.',
|
|
656
|
+
'Check run/node failure messages after terminal fail.',
|
|
657
|
+
],
|
|
658
|
+
acceptance: ['Run status is `failed`.', 'Failure message references retry cap/attempt.'],
|
|
659
|
+
}),
|
|
660
|
+
makeTask({
|
|
661
|
+
id: 'test-t18-state-guard-rules',
|
|
662
|
+
title: 'T18 State Guard Rules',
|
|
663
|
+
workflowId: WORKFLOW_IDS.T12_PAUSE_RESUME_GATE,
|
|
664
|
+
tags: ['fixture', 'workflow', 'guards', 'lifecycle'],
|
|
665
|
+
goal: 'Validate invalid lifecycle actions are rejected in guarded states.',
|
|
666
|
+
context: 'Use paused and active states from existing fixtures to test guard behavior.',
|
|
667
|
+
requirements: [
|
|
668
|
+
'Invalid resume/retry/restart actions are blocked.',
|
|
669
|
+
'Error messages are explicit and stable.',
|
|
670
|
+
],
|
|
671
|
+
notes: [
|
|
672
|
+
'Try operations on active, paused, and terminal runs.',
|
|
673
|
+
'Reuse paused gate and long-running fixtures as needed.',
|
|
674
|
+
],
|
|
675
|
+
steps: [
|
|
676
|
+
'Create paused run from `workflow_test_t12_t15_pause_resume_gate`.',
|
|
677
|
+
'Attempt invalid lifecycle actions and capture responses.',
|
|
678
|
+
],
|
|
679
|
+
acceptance: [
|
|
680
|
+
'Guarded actions are rejected consistently.',
|
|
681
|
+
'No invalid transition mutates run state.',
|
|
682
|
+
],
|
|
683
|
+
}),
|
|
684
|
+
makeTask({
|
|
685
|
+
id: 'test-t19-thread-reuse-resume-retry',
|
|
686
|
+
title: 'T19 Thread Reuse Resume Retry',
|
|
687
|
+
workflowId: WORKFLOW_IDS.T19_THREAD_REUSE,
|
|
688
|
+
tags: ['fixture', 'workflow', 'thread', 'resume', 'retry'],
|
|
689
|
+
goal: 'Validate thread continuity across pause/resume and subsequent codex node execution.',
|
|
690
|
+
context: 'Workflow has codex node before and after human pause.',
|
|
691
|
+
requirements: [
|
|
692
|
+
'First codex node stores thread/session context.',
|
|
693
|
+
'Second codex node reuses resumable context where applicable.',
|
|
694
|
+
],
|
|
695
|
+
notes: [
|
|
696
|
+
'Inspect prompt state metadata in run payload/logs.',
|
|
697
|
+
'Use node retry once to observe thread behavior.',
|
|
698
|
+
],
|
|
699
|
+
steps: [
|
|
700
|
+
'Run `workflow_test_t19_thread_reuse` until paused, then resume.',
|
|
701
|
+
'Inspect second codex node logs for resume context.',
|
|
702
|
+
],
|
|
703
|
+
acceptance: [
|
|
704
|
+
'Workflow completes successfully.',
|
|
705
|
+
'Thread reuse behavior matches configured resume rules.',
|
|
706
|
+
],
|
|
707
|
+
}),
|
|
708
|
+
makeTask({
|
|
709
|
+
id: 'test-t30-lifecycle-happy-merged',
|
|
710
|
+
title: 'T30 Lifecycle Happy Merged',
|
|
711
|
+
workflowId: WORKFLOW_IDS.T30_LIFECYCLE_HAPPY_MERGED,
|
|
712
|
+
tags: ['fixture', 'workflow', 'merged', 'happy-path'],
|
|
713
|
+
goal: 'Run one merged flow covering pause/resume, retry-once, and final completion.',
|
|
714
|
+
context: 'This is the primary low-cost lifecycle happy-path regression fixture.',
|
|
715
|
+
requirements: [
|
|
716
|
+
'Human pause/resume succeeds.',
|
|
717
|
+
'Retry-once node eventually succeeds and flow completes.',
|
|
718
|
+
],
|
|
719
|
+
notes: [
|
|
720
|
+
'This fixture intentionally emits logs near the end.',
|
|
721
|
+
'Use this as default manual regression path.',
|
|
722
|
+
],
|
|
723
|
+
steps: [
|
|
724
|
+
'Start workflow `workflow_test_t30_lifecycle_happy_merged`.',
|
|
725
|
+
'Approve human gate and let workflow finish.',
|
|
726
|
+
],
|
|
727
|
+
acceptance: [
|
|
728
|
+
'Run status becomes `completed`.',
|
|
729
|
+
'Timeline shows pause/resume and retry behavior.',
|
|
730
|
+
],
|
|
731
|
+
}),
|
|
732
|
+
makeTask({
|
|
733
|
+
id: 'test-t31-failure-recovery-merged',
|
|
734
|
+
title: 'T31 Failure Recovery Merged',
|
|
735
|
+
workflowId: WORKFLOW_IDS.T31_FAILURE_RECOVERY_MERGED,
|
|
736
|
+
tags: ['fixture', 'workflow', 'merged', 'failure-recovery'],
|
|
737
|
+
goal: 'Run one merged flow covering transient retry and onFailure fallback recovery.',
|
|
738
|
+
context: 'Combines multiple failure recovery paths to reduce total fixture count.',
|
|
739
|
+
requirements: [
|
|
740
|
+
'Transient failure path recovers via retry.',
|
|
741
|
+
'Hard failure path recovers via fallback node.',
|
|
742
|
+
],
|
|
743
|
+
notes: [
|
|
744
|
+
'Observe node attempts and transitions closely.',
|
|
745
|
+
'Codex final node confirms post-recovery continuation.',
|
|
746
|
+
],
|
|
747
|
+
steps: [
|
|
748
|
+
'Start workflow `workflow_test_t31_failure_recovery_merged`.',
|
|
749
|
+
'Verify retry and fallback transitions in timeline.',
|
|
750
|
+
],
|
|
751
|
+
acceptance: [
|
|
752
|
+
'Run reaches terminal completion.',
|
|
753
|
+
'Timeline contains both retry and fallback events.',
|
|
754
|
+
],
|
|
755
|
+
}),
|
|
756
|
+
makeTask({
|
|
757
|
+
id: 'test-t32-review-randomness-merged',
|
|
758
|
+
title: 'T32 Review Randomness Merged',
|
|
759
|
+
workflowId: WORKFLOW_IDS.T32_REVIEW_RANDOMNESS_MERGED,
|
|
760
|
+
tags: ['fixture', 'workflow', 'merged', 'review', 'random'],
|
|
761
|
+
goal: 'Validate stochastic review branch behavior with bounded retries.',
|
|
762
|
+
context: 'Random review node may approve or reject; rejection routes to codex fix.',
|
|
763
|
+
requirements: [
|
|
764
|
+
'Both approve/reject branches can be observed over multiple runs.',
|
|
765
|
+
'Retry cap prevents unbounded loops.',
|
|
766
|
+
],
|
|
767
|
+
notes: [
|
|
768
|
+
'Run this fixture multiple times for branch coverage.',
|
|
769
|
+
'Keep focus on route behavior, not model output quality.',
|
|
770
|
+
],
|
|
771
|
+
steps: [
|
|
772
|
+
'Start workflow `workflow_test_t32_review_randomness_merged` repeatedly.',
|
|
773
|
+
'Record branch outcomes from review node.',
|
|
774
|
+
],
|
|
775
|
+
acceptance: [
|
|
776
|
+
'At least one run shows fix loop path.',
|
|
777
|
+
'At least one run shows direct approval path.',
|
|
778
|
+
],
|
|
779
|
+
}),
|
|
780
|
+
makeTask({
|
|
781
|
+
id: 'test-t33-observability-merged',
|
|
782
|
+
title: 'T33 Observability Merged',
|
|
783
|
+
workflowId: WORKFLOW_IDS.T33_OBSERVABILITY_MERGED,
|
|
784
|
+
tags: ['fixture', 'workflow', 'merged', 'observability'],
|
|
785
|
+
goal: 'Validate token/log observability in a single low-cost merged run.',
|
|
786
|
+
context: 'Combines codex token usage with mixed stdout/stderr logging.',
|
|
787
|
+
requirements: [
|
|
788
|
+
'Codex node usage metadata is recorded.',
|
|
789
|
+
'Shell node stdout/stderr entries are persisted and readable.',
|
|
790
|
+
],
|
|
791
|
+
notes: [
|
|
792
|
+
'Use log pagination views after completion.',
|
|
793
|
+
'This fixture is intended for observability checks only.',
|
|
794
|
+
],
|
|
795
|
+
steps: [
|
|
796
|
+
'Start workflow `workflow_test_t33_observability_merged`.',
|
|
797
|
+
'Inspect run logs and usage data after completion.',
|
|
798
|
+
],
|
|
799
|
+
acceptance: ['Run reaches `completed`.', 'Expected logs and usage metadata are present.'],
|
|
800
|
+
}),
|
|
801
|
+
];
|
|
802
|
+
|
|
803
|
+
export const PRIMARY_TEST_TASK_ID = TEST_FIXTURE_TASKS[0].id;
|
|
804
|
+
export const PRIMARY_TEST_WORKFLOW_ID = TEST_FIXTURE_TASKS[0].workflowId;
|
|
805
|
+
|
|
806
|
+
export function taskFilename(taskId) {
|
|
807
|
+
return `${taskId}.md`;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
export function taskRelativePath(taskId) {
|
|
811
|
+
return `.vibeman/tasks/${taskFilename(taskId)}`;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
export function readTaskTemplate() {
|
|
815
|
+
return readFileSync(resolve(repoRoot, 'apps/api/resources/templates/task.md'), 'utf8');
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
function replaceOnce(input, from, to) {
|
|
819
|
+
return input.replace(from, to);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
function ensureTwo(items, fallbackA, fallbackB) {
|
|
823
|
+
const first = typeof items?.[0] === 'string' && items[0].trim() ? items[0].trim() : fallbackA;
|
|
824
|
+
const second = typeof items?.[1] === 'string' && items[1].trim() ? items[1].trim() : fallbackB;
|
|
825
|
+
return [first, second];
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
export function buildTaskMarkdown(taskDef, nowIso = new Date().toISOString()) {
|
|
829
|
+
const template = readTaskTemplate();
|
|
830
|
+
const [requirementA, requirementB] = ensureTwo(
|
|
831
|
+
taskDef.requirements,
|
|
832
|
+
'Execute the target workflow successfully.',
|
|
833
|
+
'Verify expected run and node states.',
|
|
834
|
+
);
|
|
835
|
+
const [stepA, stepB] = ensureTwo(
|
|
836
|
+
taskDef.steps,
|
|
837
|
+
`Start workflow \`${taskDef.workflowId}\` for this task.`,
|
|
838
|
+
'Confirm the final state matches expectation.',
|
|
839
|
+
);
|
|
840
|
+
const [acceptanceA, acceptanceB] = ensureTwo(
|
|
841
|
+
taskDef.acceptance,
|
|
842
|
+
'Workflow completes with expected status.',
|
|
843
|
+
'Observed behavior matches fixture intent.',
|
|
844
|
+
);
|
|
845
|
+
|
|
846
|
+
const notes =
|
|
847
|
+
Array.isArray(taskDef.notes) && taskDef.notes.length > 0
|
|
848
|
+
? taskDef.notes.map((note) => `- ${note}`).join('\n')
|
|
849
|
+
: '- Keep the run focused and deterministic.';
|
|
850
|
+
|
|
851
|
+
const tags =
|
|
852
|
+
Array.isArray(taskDef.tags) && taskDef.tags.length > 0
|
|
853
|
+
? taskDef.tags.join(', ')
|
|
854
|
+
: 'fixture, workflow';
|
|
855
|
+
|
|
856
|
+
let task = template;
|
|
857
|
+
|
|
858
|
+
task = replaceOnce(task, /id:\s*\[type\]-\[short-id\]/, `id: ${taskDef.id}`);
|
|
859
|
+
task = replaceOnce(task, /title:\s*\[Short, descriptive title\]/, `title: ${taskDef.title}`);
|
|
860
|
+
task = replaceOnce(task, /type:\s*feature\b/, `type: ${taskDef.type ?? 'test'}`);
|
|
861
|
+
task = replaceOnce(task, /status:\s*backlog\b/, `status: ${taskDef.status ?? 'backlog'}`);
|
|
862
|
+
task = replaceOnce(task, /tags:\s*\[tag1, tag2\]/, `tags: [${tags}]`);
|
|
863
|
+
task = replaceOnce(task, /priority:\s*medium\b/, `priority: ${taskDef.priority ?? 'low'}`);
|
|
864
|
+
task = replaceOnce(
|
|
865
|
+
task,
|
|
866
|
+
/---\n\n## Goal/,
|
|
867
|
+
`created_at: '${nowIso}'\nupdated_at: '${nowIso}'\n---\n\n## Goal`,
|
|
868
|
+
);
|
|
869
|
+
|
|
870
|
+
task = replaceOnce(
|
|
871
|
+
task,
|
|
872
|
+
'[What outcome should exist when this is done? Keep it concrete and short.]',
|
|
873
|
+
taskDef.goal,
|
|
874
|
+
);
|
|
875
|
+
task = replaceOnce(
|
|
876
|
+
task,
|
|
877
|
+
'[Key background, links, or constraints that matter to implementation.]',
|
|
878
|
+
`${taskDef.context}\n\nTarget workflow: \`${taskDef.workflowId}\`.`,
|
|
879
|
+
);
|
|
880
|
+
|
|
881
|
+
task = replaceOnce(task, '- [ ] Primary requirement #1', `- [ ] ${requirementA}`);
|
|
882
|
+
task = replaceOnce(task, '- [ ] Primary requirement #2', `- [ ] ${requirementB}`);
|
|
883
|
+
|
|
884
|
+
task = replaceOnce(
|
|
885
|
+
task,
|
|
886
|
+
`[Notes for AI or human. Include only what matters:\n\n- target files or directories\n- desired approach or patterns to follow\n- tests/commands to run\n- any do-not-change or out-of-scope items]`,
|
|
887
|
+
notes,
|
|
888
|
+
);
|
|
889
|
+
|
|
890
|
+
task = replaceOnce(task, '- [ ] Actionable TODO', `- [ ] ${stepA}`);
|
|
891
|
+
task = replaceOnce(task, '- [ ] Actionable TODO', `- [ ] ${stepB}`);
|
|
892
|
+
|
|
893
|
+
task = replaceOnce(task, '- [ ] Verifiable outcome #1', `- [ ] ${acceptanceA}`);
|
|
894
|
+
task = replaceOnce(task, '- [ ] Verifiable outcome #2', `- [ ] ${acceptanceB}`);
|
|
895
|
+
|
|
896
|
+
return task;
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
export function getRepoRoot() {
|
|
900
|
+
return repoRoot;
|
|
901
|
+
}
|