oh-my-opencode 3.5.4 → 3.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/prometheus/identity-constraints.d.ts +1 -1
- package/dist/agents/prometheus/plan-template.d.ts +1 -1
- package/dist/agents/prometheus/system-prompt.d.ts +1 -1
- package/dist/cli/index.js +15 -14
- package/dist/hooks/todo-continuation-enforcer/constants.d.ts +2 -0
- package/dist/hooks/todo-continuation-enforcer/types.d.ts +1 -0
- package/dist/index.js +273 -248
- package/dist/tools/delegate-task/constants.d.ts +1 -1
- package/package.json +8 -8
package/dist/index.js
CHANGED
|
@@ -4963,6 +4963,16 @@ Approach:
|
|
|
4963
4963
|
- Draft with care
|
|
4964
4964
|
- Polish for clarity and impact
|
|
4965
4965
|
- Documentation, READMEs, articles, technical writing
|
|
4966
|
+
|
|
4967
|
+
ANTI-AI-SLOP RULES (NON-NEGOTIABLE):
|
|
4968
|
+
- NEVER use em dashes (\u2014) or en dashes (\u2013). Use commas, periods, ellipses, or line breaks instead. Zero tolerance.
|
|
4969
|
+
- Remove AI-sounding phrases: "delve", "it's important to note", "I'd be happy to", "certainly", "please don't hesitate", "leverage", "utilize", "in order to", "moving forward", "circle back", "at the end of the day", "robust", "streamline", "facilitate"
|
|
4970
|
+
- Pick plain words. "Use" not "utilize". "Start" not "commence". "Help" not "facilitate".
|
|
4971
|
+
- Use contractions naturally: "don't" not "do not", "it's" not "it is".
|
|
4972
|
+
- Vary sentence length. Don't make every sentence the same length.
|
|
4973
|
+
- NEVER start consecutive sentences with the same word.
|
|
4974
|
+
- No filler openings: skip "In today's world...", "As we all know...", "It goes without saying..."
|
|
4975
|
+
- Write like a human, not a corporate template.
|
|
4966
4976
|
</Category_Context>`, DEEP_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
|
4967
4977
|
You are working on GOAL-ORIENTED AUTONOMOUS tasks.
|
|
4968
4978
|
|
|
@@ -5238,14 +5248,14 @@ WHY THIS FORMAT IS MANDATORY:
|
|
|
5238
5248
|
`, PLAN_AGENT_NAMES, PLAN_FAMILY_NAMES;
|
|
5239
5249
|
var init_constants = __esm(() => {
|
|
5240
5250
|
DEFAULT_CATEGORIES = {
|
|
5241
|
-
"visual-engineering": { model: "google/gemini-3-pro" },
|
|
5251
|
+
"visual-engineering": { model: "google/gemini-3-pro", variant: "high" },
|
|
5242
5252
|
ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
|
|
5243
5253
|
deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
|
|
5244
5254
|
artistry: { model: "google/gemini-3-pro", variant: "high" },
|
|
5245
5255
|
quick: { model: "anthropic/claude-haiku-4-5" },
|
|
5246
5256
|
"unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
|
|
5247
5257
|
"unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
|
|
5248
|
-
writing: { model: "
|
|
5258
|
+
writing: { model: "kimi-for-coding/k2p5" }
|
|
5249
5259
|
};
|
|
5250
5260
|
CATEGORY_PROMPT_APPENDS = {
|
|
5251
5261
|
"visual-engineering": VISUAL_CATEGORY_PROMPT_APPEND,
|
|
@@ -12230,6 +12240,8 @@ var TOAST_DURATION_MS = 900;
|
|
|
12230
12240
|
var COUNTDOWN_GRACE_PERIOD_MS = 500;
|
|
12231
12241
|
var ABORT_WINDOW_MS = 3000;
|
|
12232
12242
|
var CONTINUATION_COOLDOWN_MS = 30000;
|
|
12243
|
+
var MAX_CONSECUTIVE_FAILURES = 5;
|
|
12244
|
+
var FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000;
|
|
12233
12245
|
|
|
12234
12246
|
// src/hooks/todo-continuation-enforcer/handler.ts
|
|
12235
12247
|
init_logger();
|
|
@@ -12454,11 +12466,14 @@ ${todoList}`;
|
|
|
12454
12466
|
if (injectionState) {
|
|
12455
12467
|
injectionState.inFlight = false;
|
|
12456
12468
|
injectionState.lastInjectedAt = Date.now();
|
|
12469
|
+
injectionState.consecutiveFailures = 0;
|
|
12457
12470
|
}
|
|
12458
12471
|
} catch (error) {
|
|
12459
12472
|
log(`[${HOOK_NAME}] Injection failed`, { sessionID, error: String(error) });
|
|
12460
12473
|
if (injectionState) {
|
|
12461
12474
|
injectionState.inFlight = false;
|
|
12475
|
+
injectionState.lastInjectedAt = Date.now();
|
|
12476
|
+
injectionState.consecutiveFailures = (injectionState.consecutiveFailures ?? 0) + 1;
|
|
12462
12477
|
}
|
|
12463
12478
|
}
|
|
12464
12479
|
}
|
|
@@ -12577,8 +12592,28 @@ async function handleSessionIdle(args) {
|
|
|
12577
12592
|
log(`[${HOOK_NAME}] Skipped: injection in flight`, { sessionID });
|
|
12578
12593
|
return;
|
|
12579
12594
|
}
|
|
12580
|
-
if (state.lastInjectedAt && Date.now() - state.lastInjectedAt
|
|
12581
|
-
|
|
12595
|
+
if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES && state.lastInjectedAt && Date.now() - state.lastInjectedAt >= FAILURE_RESET_WINDOW_MS) {
|
|
12596
|
+
state.consecutiveFailures = 0;
|
|
12597
|
+
log(`[${HOOK_NAME}] Reset consecutive failures after recovery window`, {
|
|
12598
|
+
sessionID,
|
|
12599
|
+
failureResetWindowMs: FAILURE_RESET_WINDOW_MS
|
|
12600
|
+
});
|
|
12601
|
+
}
|
|
12602
|
+
if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
12603
|
+
log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, {
|
|
12604
|
+
sessionID,
|
|
12605
|
+
consecutiveFailures: state.consecutiveFailures,
|
|
12606
|
+
maxConsecutiveFailures: MAX_CONSECUTIVE_FAILURES
|
|
12607
|
+
});
|
|
12608
|
+
return;
|
|
12609
|
+
}
|
|
12610
|
+
const effectiveCooldown = CONTINUATION_COOLDOWN_MS * Math.pow(2, Math.min(state.consecutiveFailures, 5));
|
|
12611
|
+
if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < effectiveCooldown) {
|
|
12612
|
+
log(`[${HOOK_NAME}] Skipped: cooldown active`, {
|
|
12613
|
+
sessionID,
|
|
12614
|
+
effectiveCooldown,
|
|
12615
|
+
consecutiveFailures: state.consecutiveFailures
|
|
12616
|
+
});
|
|
12582
12617
|
return;
|
|
12583
12618
|
}
|
|
12584
12619
|
let resolvedInfo;
|
|
@@ -12767,7 +12802,9 @@ function createSessionStateStore() {
|
|
|
12767
12802
|
existing.lastAccessedAt = Date.now();
|
|
12768
12803
|
return existing.state;
|
|
12769
12804
|
}
|
|
12770
|
-
const state = {
|
|
12805
|
+
const state = {
|
|
12806
|
+
consecutiveFailures: 0
|
|
12807
|
+
};
|
|
12771
12808
|
sessions.set(sessionID, { state, lastAccessedAt: Date.now() });
|
|
12772
12809
|
return state;
|
|
12773
12810
|
}
|
|
@@ -28285,11 +28322,17 @@ function createRecoveryState() {
|
|
|
28285
28322
|
function createAnthropicContextWindowLimitRecoveryHook(ctx, options) {
|
|
28286
28323
|
const autoCompactState = createRecoveryState();
|
|
28287
28324
|
const experimental = options?.experimental;
|
|
28325
|
+
const pendingCompactionTimeoutBySession = new Map;
|
|
28288
28326
|
const eventHandler = async ({ event }) => {
|
|
28289
28327
|
const props = event.properties;
|
|
28290
28328
|
if (event.type === "session.deleted") {
|
|
28291
28329
|
const sessionInfo = props?.info;
|
|
28292
28330
|
if (sessionInfo?.id) {
|
|
28331
|
+
const timeoutID = pendingCompactionTimeoutBySession.get(sessionInfo.id);
|
|
28332
|
+
if (timeoutID !== undefined) {
|
|
28333
|
+
clearTimeout(timeoutID);
|
|
28334
|
+
pendingCompactionTimeoutBySession.delete(sessionInfo.id);
|
|
28335
|
+
}
|
|
28293
28336
|
autoCompactState.pendingCompact.delete(sessionInfo.id);
|
|
28294
28337
|
autoCompactState.errorDataBySession.delete(sessionInfo.id);
|
|
28295
28338
|
autoCompactState.retryStateBySession.delete(sessionInfo.id);
|
|
@@ -28324,9 +28367,11 @@ function createAnthropicContextWindowLimitRecoveryHook(ctx, options) {
|
|
|
28324
28367
|
duration: 3000
|
|
28325
28368
|
}
|
|
28326
28369
|
}).catch(() => {});
|
|
28327
|
-
setTimeout(() => {
|
|
28370
|
+
const timeoutID = setTimeout(() => {
|
|
28371
|
+
pendingCompactionTimeoutBySession.delete(sessionID);
|
|
28328
28372
|
executeCompact(sessionID, { providerID, modelID }, autoCompactState, ctx.client, ctx.directory, experimental);
|
|
28329
28373
|
}, 300);
|
|
28374
|
+
pendingCompactionTimeoutBySession.set(sessionID, timeoutID);
|
|
28330
28375
|
}
|
|
28331
28376
|
return;
|
|
28332
28377
|
}
|
|
@@ -28352,6 +28397,11 @@ function createAnthropicContextWindowLimitRecoveryHook(ctx, options) {
|
|
|
28352
28397
|
return;
|
|
28353
28398
|
if (!autoCompactState.pendingCompact.has(sessionID))
|
|
28354
28399
|
return;
|
|
28400
|
+
const timeoutID = pendingCompactionTimeoutBySession.get(sessionID);
|
|
28401
|
+
if (timeoutID !== undefined) {
|
|
28402
|
+
clearTimeout(timeoutID);
|
|
28403
|
+
pendingCompactionTimeoutBySession.delete(sessionID);
|
|
28404
|
+
}
|
|
28355
28405
|
const errorData = autoCompactState.errorDataBySession.get(sessionID);
|
|
28356
28406
|
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory);
|
|
28357
28407
|
if (lastAssistant?.summary === true) {
|
|
@@ -33130,9 +33180,10 @@ var AGENT_MODEL_REQUIREMENTS = {
|
|
|
33130
33180
|
var CATEGORY_MODEL_REQUIREMENTS = {
|
|
33131
33181
|
"visual-engineering": {
|
|
33132
33182
|
fallbackChain: [
|
|
33133
|
-
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
|
|
33183
|
+
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
|
33184
|
+
{ providers: ["zai-coding-plan"], model: "glm-5" },
|
|
33134
33185
|
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
|
33135
|
-
{ providers: ["
|
|
33186
|
+
{ providers: ["kimi-for-coding"], model: "k2p5" }
|
|
33136
33187
|
]
|
|
33137
33188
|
},
|
|
33138
33189
|
ultrabrain: {
|
|
@@ -33181,10 +33232,9 @@ var CATEGORY_MODEL_REQUIREMENTS = {
|
|
|
33181
33232
|
},
|
|
33182
33233
|
writing: {
|
|
33183
33234
|
fallbackChain: [
|
|
33235
|
+
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
|
33184
33236
|
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
|
|
33185
|
-
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }
|
|
33186
|
-
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
|
|
33187
|
-
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }
|
|
33237
|
+
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }
|
|
33188
33238
|
]
|
|
33189
33239
|
}
|
|
33190
33240
|
};
|
|
@@ -44976,6 +45026,7 @@ function createUnstableAgentBabysitterHook(ctx, options) {
|
|
|
44976
45026
|
};
|
|
44977
45027
|
}
|
|
44978
45028
|
// src/hooks/preemptive-compaction.ts
|
|
45029
|
+
init_logger();
|
|
44979
45030
|
var DEFAULT_ACTUAL_LIMIT = 200000;
|
|
44980
45031
|
var ANTHROPIC_ACTUAL_LIMIT3 = process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" ? 1e6 : DEFAULT_ACTUAL_LIMIT;
|
|
44981
45032
|
var PREEMPTIVE_COMPACTION_THRESHOLD = 0.78;
|
|
@@ -45007,7 +45058,9 @@ function createPreemptiveCompactionHook(ctx) {
|
|
|
45007
45058
|
query: { directory: ctx.directory }
|
|
45008
45059
|
});
|
|
45009
45060
|
compactedSessions.add(sessionID);
|
|
45010
|
-
} catch
|
|
45061
|
+
} catch (error45) {
|
|
45062
|
+
log("[preemptive-compaction] Compaction failed", { sessionID, error: String(error45) });
|
|
45063
|
+
} finally {
|
|
45011
45064
|
compactionInProgress.delete(sessionID);
|
|
45012
45065
|
}
|
|
45013
45066
|
};
|
|
@@ -51135,6 +51188,7 @@ Task ID: ${task.id}`;
|
|
|
51135
51188
|
const pollStart = Date.now();
|
|
51136
51189
|
let lastMsgCount = 0;
|
|
51137
51190
|
let stablePolls = 0;
|
|
51191
|
+
let terminalStatus;
|
|
51138
51192
|
while (Date.now() - pollStart < timingCfg.MAX_POLL_TIME_MS) {
|
|
51139
51193
|
if (ctx.abort?.aborted) {
|
|
51140
51194
|
return `Task aborted (was running in background mode).
|
|
@@ -51142,6 +51196,11 @@ Task ID: ${task.id}`;
|
|
|
51142
51196
|
Session ID: ${sessionID}`;
|
|
51143
51197
|
}
|
|
51144
51198
|
await new Promise((resolve10) => setTimeout(resolve10, timingCfg.POLL_INTERVAL_MS));
|
|
51199
|
+
const currentTask = manager.getTask(task.id);
|
|
51200
|
+
if (currentTask && (currentTask.status === "interrupt" || currentTask.status === "error" || currentTask.status === "cancelled")) {
|
|
51201
|
+
terminalStatus = { status: currentTask.status, error: currentTask.error };
|
|
51202
|
+
break;
|
|
51203
|
+
}
|
|
51145
51204
|
const statusResult = await client2.session.status();
|
|
51146
51205
|
const allStatuses = statusResult.data ?? {};
|
|
51147
51206
|
const sessionStatus = allStatuses[sessionID];
|
|
@@ -51164,6 +51223,23 @@ Session ID: ${sessionID}`;
|
|
|
51164
51223
|
lastMsgCount = currentMsgCount;
|
|
51165
51224
|
}
|
|
51166
51225
|
}
|
|
51226
|
+
if (terminalStatus) {
|
|
51227
|
+
const duration4 = formatDuration2(startTime);
|
|
51228
|
+
return `SUPERVISED TASK FAILED (${terminalStatus.status})
|
|
51229
|
+
|
|
51230
|
+
Task was interrupted/failed while running in monitored background mode.
|
|
51231
|
+
${terminalStatus.error ? `Error: ${terminalStatus.error}` : ""}
|
|
51232
|
+
|
|
51233
|
+
Duration: ${duration4}
|
|
51234
|
+
Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
|
|
51235
|
+
Model: ${actualModel}
|
|
51236
|
+
|
|
51237
|
+
The task session may contain partial results.
|
|
51238
|
+
|
|
51239
|
+
<task_metadata>
|
|
51240
|
+
session_id: ${sessionID}
|
|
51241
|
+
</task_metadata>`;
|
|
51242
|
+
}
|
|
51167
51243
|
const messagesResult = await client2.session.messages({ path: { id: sessionID } });
|
|
51168
51244
|
const messages = messagesResult.data ?? messagesResult;
|
|
51169
51245
|
const assistantMessages = messages.filter((m) => m.info?.role === "assistant").sort((a, b) => (b.info?.time?.created ?? 0) - (a.info?.time?.created ?? 0));
|
|
@@ -53497,6 +53573,11 @@ class BackgroundManager {
|
|
|
53497
53573
|
});
|
|
53498
53574
|
return existingTask;
|
|
53499
53575
|
}
|
|
53576
|
+
const completionTimer = this.completionTimers.get(existingTask.id);
|
|
53577
|
+
if (completionTimer) {
|
|
53578
|
+
clearTimeout(completionTimer);
|
|
53579
|
+
this.completionTimers.delete(existingTask.id);
|
|
53580
|
+
}
|
|
53500
53581
|
const concurrencyKey = existingTask.concurrencyGroup ?? existingTask.agent;
|
|
53501
53582
|
await this.concurrencyManager.acquire(concurrencyKey);
|
|
53502
53583
|
existingTask.concurrencyKey = concurrencyKey;
|
|
@@ -53600,7 +53681,7 @@ class BackgroundManager {
|
|
|
53600
53681
|
}
|
|
53601
53682
|
handleEvent(event) {
|
|
53602
53683
|
const props = event.properties;
|
|
53603
|
-
if (event.type === "message.part.updated") {
|
|
53684
|
+
if (event.type === "message.part.updated" || event.type === "message.part.delta") {
|
|
53604
53685
|
if (!props || typeof props !== "object" || !("sessionID" in props))
|
|
53605
53686
|
return;
|
|
53606
53687
|
const partInfo = props;
|
|
@@ -53704,6 +53785,10 @@ class BackgroundManager {
|
|
|
53704
53785
|
this.cleanupPendingByParent(task);
|
|
53705
53786
|
this.tasks.delete(task.id);
|
|
53706
53787
|
this.clearNotificationsForTask(task.id);
|
|
53788
|
+
const toastManager = getTaskToastManager();
|
|
53789
|
+
if (toastManager) {
|
|
53790
|
+
toastManager.removeTask(task.id);
|
|
53791
|
+
}
|
|
53707
53792
|
if (task.sessionID) {
|
|
53708
53793
|
subagentSessions.delete(task.sessionID);
|
|
53709
53794
|
}
|
|
@@ -53746,6 +53831,10 @@ class BackgroundManager {
|
|
|
53746
53831
|
this.cleanupPendingByParent(task);
|
|
53747
53832
|
this.tasks.delete(task.id);
|
|
53748
53833
|
this.clearNotificationsForTask(task.id);
|
|
53834
|
+
const toastManager = getTaskToastManager();
|
|
53835
|
+
if (toastManager) {
|
|
53836
|
+
toastManager.removeTask(task.id);
|
|
53837
|
+
}
|
|
53749
53838
|
if (task.sessionID) {
|
|
53750
53839
|
subagentSessions.delete(task.sessionID);
|
|
53751
53840
|
}
|
|
@@ -53860,6 +53949,10 @@ class BackgroundManager {
|
|
|
53860
53949
|
}).catch(() => {});
|
|
53861
53950
|
}
|
|
53862
53951
|
if (options?.skipNotification) {
|
|
53952
|
+
const toastManager = getTaskToastManager();
|
|
53953
|
+
if (toastManager) {
|
|
53954
|
+
toastManager.removeTask(task.id);
|
|
53955
|
+
}
|
|
53863
53956
|
log(`[background-agent] Task cancelled via ${source} (notification skipped):`, task.id);
|
|
53864
53957
|
return true;
|
|
53865
53958
|
}
|
|
@@ -54037,11 +54130,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
|
|
54037
54130
|
}
|
|
54038
54131
|
} catch (error45) {
|
|
54039
54132
|
if (this.isAbortedSessionError(error45)) {
|
|
54040
|
-
log("[background-agent] Parent session aborted
|
|
54133
|
+
log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
|
|
54041
54134
|
taskId: task.id,
|
|
54042
54135
|
parentSessionID: task.parentSessionID
|
|
54043
54136
|
});
|
|
54044
|
-
return;
|
|
54045
54137
|
}
|
|
54046
54138
|
const messageDir = getMessageDir12(task.parentSessionID);
|
|
54047
54139
|
const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null;
|
|
@@ -54071,13 +54163,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
|
|
54071
54163
|
});
|
|
54072
54164
|
} catch (error45) {
|
|
54073
54165
|
if (this.isAbortedSessionError(error45)) {
|
|
54074
|
-
log("[background-agent] Parent session aborted
|
|
54166
|
+
log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
|
|
54075
54167
|
taskId: task.id,
|
|
54076
54168
|
parentSessionID: task.parentSessionID
|
|
54077
54169
|
});
|
|
54078
|
-
|
|
54170
|
+
} else {
|
|
54171
|
+
log("[background-agent] Failed to send notification:", error45);
|
|
54079
54172
|
}
|
|
54080
|
-
log("[background-agent] Failed to send notification:", error45);
|
|
54081
54173
|
}
|
|
54082
54174
|
if (allComplete) {
|
|
54083
54175
|
for (const completedTask of completedTasks) {
|
|
@@ -54190,6 +54282,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
|
|
54190
54282
|
}
|
|
54191
54283
|
}
|
|
54192
54284
|
this.clearNotificationsForTask(taskId);
|
|
54285
|
+
const toastManager = getTaskToastManager();
|
|
54286
|
+
if (toastManager) {
|
|
54287
|
+
toastManager.removeTask(taskId);
|
|
54288
|
+
}
|
|
54193
54289
|
this.tasks.delete(taskId);
|
|
54194
54290
|
if (task.sessionID) {
|
|
54195
54291
|
subagentSessions.delete(task.sessionID);
|
|
@@ -54225,7 +54321,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
|
|
54225
54321
|
const sessionID = task.sessionID;
|
|
54226
54322
|
if (!startedAt || !sessionID)
|
|
54227
54323
|
continue;
|
|
54228
|
-
const
|
|
54324
|
+
const sessionStatus = allStatuses[sessionID]?.type;
|
|
54325
|
+
const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle";
|
|
54229
54326
|
const runtime = now - startedAt.getTime();
|
|
54230
54327
|
if (!task.progress?.lastUpdate) {
|
|
54231
54328
|
if (sessionIsRunning)
|
|
@@ -64430,7 +64527,21 @@ Your ONLY valid output locations are \`.sisyphus/plans/*.md\` and \`.sisyphus/dr
|
|
|
64430
64527
|
|
|
64431
64528
|
Example: \`.sisyphus/plans/auth-refactor.md\`
|
|
64432
64529
|
|
|
64433
|
-
### 5.
|
|
64530
|
+
### 5. MAXIMUM PARALLELISM PRINCIPLE (NON-NEGOTIABLE)
|
|
64531
|
+
|
|
64532
|
+
Your plans MUST maximize parallel execution. This is a core planning quality metric.
|
|
64533
|
+
|
|
64534
|
+
**Granularity Rule**: One task = one module/concern = 1-3 files.
|
|
64535
|
+
If a task touches 4+ files or 2+ unrelated concerns, SPLIT IT.
|
|
64536
|
+
|
|
64537
|
+
**Parallelism Target**: Aim for 5-8 tasks per wave.
|
|
64538
|
+
If any wave has fewer than 3 tasks (except the final integration), you under-split.
|
|
64539
|
+
|
|
64540
|
+
**Dependency Minimization**: Structure tasks so shared dependencies
|
|
64541
|
+
(types, interfaces, configs) are extracted as early Wave-1 tasks,
|
|
64542
|
+
unblocking maximum parallelism in subsequent waves.
|
|
64543
|
+
|
|
64544
|
+
### 6. SINGLE PLAN MANDATE (CRITICAL)
|
|
64434
64545
|
**No matter how large the task, EVERYTHING goes into ONE work plan.**
|
|
64435
64546
|
|
|
64436
64547
|
**NEVER:**
|
|
@@ -64453,7 +64564,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
|
|
|
64453
64564
|
|
|
64454
64565
|
**The plan can have 50+ TODOs. That's OK. ONE PLAN.**
|
|
64455
64566
|
|
|
64456
|
-
###
|
|
64567
|
+
### 6.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)
|
|
64457
64568
|
|
|
64458
64569
|
<write_protocol>
|
|
64459
64570
|
**The Write tool OVERWRITES files. It does NOT append.**
|
|
@@ -64496,7 +64607,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
|
|
|
64496
64607
|
- [ ] File already exists with my content? \u2192 Use Edit to append, NOT Write
|
|
64497
64608
|
</write_protocol>
|
|
64498
64609
|
|
|
64499
|
-
###
|
|
64610
|
+
### 7. DRAFT AS WORKING MEMORY (MANDATORY)
|
|
64500
64611
|
**During interview, CONTINUOUSLY record decisions to a draft file.**
|
|
64501
64612
|
|
|
64502
64613
|
**Draft Location**: \`.sisyphus/drafts/{name}.md\`
|
|
@@ -65303,108 +65414,25 @@ Generate plan to: \`.sisyphus/plans/{name}.md\`
|
|
|
65303
65414
|
|
|
65304
65415
|
## Verification Strategy (MANDATORY)
|
|
65305
65416
|
|
|
65306
|
-
> **
|
|
65307
|
-
>
|
|
65308
|
-
> ALL tasks in this plan MUST be verifiable WITHOUT any human action.
|
|
65309
|
-
> This is NOT conditional \u2014 it applies to EVERY task, regardless of test strategy.
|
|
65310
|
-
>
|
|
65311
|
-
> **FORBIDDEN** \u2014 acceptance criteria that require:
|
|
65312
|
-
> - "User manually tests..." / "\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uD14C\uC2A4\uD2B8..."
|
|
65313
|
-
> - "User visually confirms..." / "\uC0AC\uC6A9\uC790\uAC00 \uB208\uC73C\uB85C \uD655\uC778..."
|
|
65314
|
-
> - "User interacts with..." / "\uC0AC\uC6A9\uC790\uAC00 \uC9C1\uC811 \uC870\uC791..."
|
|
65315
|
-
> - "Ask user to verify..." / "\uC0AC\uC6A9\uC790\uC5D0\uAC8C \uD655\uC778 \uC694\uCCAD..."
|
|
65316
|
-
> - ANY step where a human must perform an action
|
|
65317
|
-
>
|
|
65318
|
-
> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.
|
|
65417
|
+
> **ZERO HUMAN INTERVENTION** \u2014 ALL verification is agent-executed. No exceptions.
|
|
65418
|
+
> Acceptance criteria requiring "user manually tests/confirms" are FORBIDDEN.
|
|
65319
65419
|
|
|
65320
65420
|
### Test Decision
|
|
65321
65421
|
- **Infrastructure exists**: [YES/NO]
|
|
65322
65422
|
- **Automated tests**: [TDD / Tests-after / None]
|
|
65323
65423
|
- **Framework**: [bun test / vitest / jest / pytest / none]
|
|
65424
|
+
- **If TDD**: Each task follows RED (failing test) \u2192 GREEN (minimal impl) \u2192 REFACTOR
|
|
65324
65425
|
|
|
65325
|
-
###
|
|
65326
|
-
|
|
65327
|
-
|
|
65328
|
-
|
|
65329
|
-
**Task Structure:**
|
|
65330
|
-
1. **RED**: Write failing test first
|
|
65331
|
-
- Test file: \`[path].test.ts\`
|
|
65332
|
-
- Test command: \`bun test [file]\`
|
|
65333
|
-
- Expected: FAIL (test exists, implementation doesn't)
|
|
65334
|
-
2. **GREEN**: Implement minimum code to pass
|
|
65335
|
-
- Command: \`bun test [file]\`
|
|
65336
|
-
- Expected: PASS
|
|
65337
|
-
3. **REFACTOR**: Clean up while keeping green
|
|
65338
|
-
- Command: \`bun test [file]\`
|
|
65339
|
-
- Expected: PASS (still)
|
|
65340
|
-
|
|
65341
|
-
**Test Setup Task (if infrastructure doesn't exist):**
|
|
65342
|
-
- [ ] 0. Setup Test Infrastructure
|
|
65343
|
-
- Install: \`bun add -d [test-framework]\`
|
|
65344
|
-
- Config: Create \`[config-file]\`
|
|
65345
|
-
- Verify: \`bun test --help\` \u2192 shows help
|
|
65346
|
-
- Example: Create \`src/__tests__/example.test.ts\`
|
|
65347
|
-
- Verify: \`bun test\` \u2192 1 test passes
|
|
65348
|
-
|
|
65349
|
-
### Agent-Executed QA Scenarios (MANDATORY \u2014 ALL tasks)
|
|
65350
|
-
|
|
65351
|
-
> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.
|
|
65352
|
-
> - **With TDD**: QA scenarios complement unit tests at integration/E2E level
|
|
65353
|
-
> - **Without TDD**: QA scenarios are the PRIMARY verification method
|
|
65354
|
-
>
|
|
65355
|
-
> These describe how the executing agent DIRECTLY verifies the deliverable
|
|
65356
|
-
> by running it \u2014 opening browsers, executing commands, sending API requests.
|
|
65357
|
-
> The agent performs what a human tester would do, but automated via tools.
|
|
65358
|
-
|
|
65359
|
-
**Verification Tool by Deliverable Type:**
|
|
65360
|
-
|
|
65361
|
-
| Type | Tool | How Agent Verifies |
|
|
65362
|
-
|------|------|-------------------|
|
|
65363
|
-
| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
|
|
65364
|
-
| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |
|
|
65365
|
-
| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |
|
|
65366
|
-
| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |
|
|
65367
|
-
| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |
|
|
65368
|
-
|
|
65369
|
-
**Each Scenario MUST Follow This Format:**
|
|
65370
|
-
|
|
65371
|
-
\`\`\`
|
|
65372
|
-
Scenario: [Descriptive name \u2014 what user action/flow is being verified]
|
|
65373
|
-
Tool: [Playwright / interactive_bash / Bash]
|
|
65374
|
-
Preconditions: [What must be true before this scenario runs]
|
|
65375
|
-
Steps:
|
|
65376
|
-
1. [Exact action with specific selector/command/endpoint]
|
|
65377
|
-
2. [Next action with expected intermediate state]
|
|
65378
|
-
3. [Assertion with exact expected value]
|
|
65379
|
-
Expected Result: [Concrete, observable outcome]
|
|
65380
|
-
Failure Indicators: [What would indicate failure]
|
|
65381
|
-
Evidence: [Screenshot path / output capture / response body path]
|
|
65382
|
-
\`\`\`
|
|
65426
|
+
### QA Policy
|
|
65427
|
+
Every task MUST include agent-executed QA scenarios (see TODO template below).
|
|
65428
|
+
Evidence saved to \`.sisyphus/evidence/task-{N}-{scenario-slug}.{ext}\`.
|
|
65383
65429
|
|
|
65384
|
-
|
|
65385
|
-
|
|
65386
|
-
|
|
65387
|
-
|
|
65388
|
-
|
|
65389
|
-
|
|
65390
|
-
- **Evidence Paths**: Specific file paths (\`.sisyphus/evidence/task-N-scenario-name.png\`)
|
|
65391
|
-
|
|
65392
|
-
**Anti-patterns (NEVER write scenarios like this):**
|
|
65393
|
-
- \u274C "Verify the login page works correctly"
|
|
65394
|
-
- \u274C "Check that the API returns the right data"
|
|
65395
|
-
- \u274C "Test the form validation"
|
|
65396
|
-
- \u274C "User opens browser and confirms..."
|
|
65397
|
-
|
|
65398
|
-
**Write scenarios like this instead:**
|
|
65399
|
-
- \u2705 \`Navigate to /login \u2192 Fill input[name="email"] with "test@example.com" \u2192 Fill input[name="password"] with "Pass123!" \u2192 Click button[type="submit"] \u2192 Wait for /dashboard \u2192 Assert h1 contains "Welcome"\`
|
|
65400
|
-
- \u2705 \`POST /api/users {"name":"Test","email":"new@test.com"} \u2192 Assert status 201 \u2192 Assert response.id is UUID \u2192 GET /api/users/{id} \u2192 Assert name equals "Test"\`
|
|
65401
|
-
- \u2705 \`Run ./cli --config test.yaml \u2192 Wait for "Loaded" in stdout \u2192 Send "q" \u2192 Assert exit code 0 \u2192 Assert stdout contains "Goodbye"\`
|
|
65402
|
-
|
|
65403
|
-
**Evidence Requirements:**
|
|
65404
|
-
- Screenshots: \`.sisyphus/evidence/\` for all UI verifications
|
|
65405
|
-
- Terminal output: Captured for CLI/TUI verifications
|
|
65406
|
-
- Response bodies: Saved for API verifications
|
|
65407
|
-
- All evidence referenced by specific file path in acceptance criteria
|
|
65430
|
+
| Deliverable Type | Verification Tool | Method |
|
|
65431
|
+
|------------------|-------------------|--------|
|
|
65432
|
+
| Frontend/UI | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
|
|
65433
|
+
| TUI/CLI | interactive_bash (tmux) | Run command, send keystrokes, validate output |
|
|
65434
|
+
| API/Backend | Bash (curl) | Send requests, assert status + response fields |
|
|
65435
|
+
| Library/Module | Bash (bun/node REPL) | Import, call functions, compare output |
|
|
65408
65436
|
|
|
65409
65437
|
---
|
|
65410
65438
|
|
|
@@ -65414,49 +65442,82 @@ Scenario: [Descriptive name \u2014 what user action/flow is being verified]
|
|
|
65414
65442
|
|
|
65415
65443
|
> Maximize throughput by grouping independent tasks into parallel waves.
|
|
65416
65444
|
> Each wave completes before the next begins.
|
|
65445
|
+
> Target: 5-8 tasks per wave. Fewer than 3 per wave (except final) = under-splitting.
|
|
65417
65446
|
|
|
65418
65447
|
\`\`\`
|
|
65419
|
-
Wave 1 (Start Immediately):
|
|
65420
|
-
\u251C\u2500\u2500 Task 1: [
|
|
65421
|
-
\
|
|
65422
|
-
|
|
65423
|
-
|
|
65424
|
-
\u251C\u2500\u2500 Task
|
|
65425
|
-
\u251C\u2500\u2500 Task
|
|
65426
|
-
\u2514\u2500\u2500 Task
|
|
65427
|
-
|
|
65428
|
-
Wave
|
|
65429
|
-
\
|
|
65430
|
-
|
|
65431
|
-
|
|
65432
|
-
|
|
65448
|
+
Wave 1 (Start Immediately \u2014 foundation + scaffolding):
|
|
65449
|
+
\u251C\u2500\u2500 Task 1: Project scaffolding + config [quick]
|
|
65450
|
+
\u251C\u2500\u2500 Task 2: Design system tokens [quick]
|
|
65451
|
+
\u251C\u2500\u2500 Task 3: Type definitions [quick]
|
|
65452
|
+
\u251C\u2500\u2500 Task 4: Schema definitions [quick]
|
|
65453
|
+
\u251C\u2500\u2500 Task 5: Storage interface + in-memory impl [quick]
|
|
65454
|
+
\u251C\u2500\u2500 Task 6: Auth middleware [quick]
|
|
65455
|
+
\u2514\u2500\u2500 Task 7: Client module [quick]
|
|
65456
|
+
|
|
65457
|
+
Wave 2 (After Wave 1 \u2014 core modules, MAX PARALLEL):
|
|
65458
|
+
\u251C\u2500\u2500 Task 8: Core business logic (depends: 3, 5, 7) [deep]
|
|
65459
|
+
\u251C\u2500\u2500 Task 9: API endpoints (depends: 4, 5) [unspecified-high]
|
|
65460
|
+
\u251C\u2500\u2500 Task 10: Secondary storage impl (depends: 5) [unspecified-high]
|
|
65461
|
+
\u251C\u2500\u2500 Task 11: Retry/fallback logic (depends: 8) [deep]
|
|
65462
|
+
\u251C\u2500\u2500 Task 12: UI layout + navigation (depends: 2) [visual-engineering]
|
|
65463
|
+
\u251C\u2500\u2500 Task 13: API client + hooks (depends: 4) [quick]
|
|
65464
|
+
\u2514\u2500\u2500 Task 14: Telemetry middleware (depends: 5, 10) [unspecified-high]
|
|
65465
|
+
|
|
65466
|
+
Wave 3 (After Wave 2 \u2014 integration + UI):
|
|
65467
|
+
\u251C\u2500\u2500 Task 15: Main route combining modules (depends: 6, 11, 14) [deep]
|
|
65468
|
+
\u251C\u2500\u2500 Task 16: UI data visualization (depends: 12, 13) [visual-engineering]
|
|
65469
|
+
\u251C\u2500\u2500 Task 17: Deployment config A (depends: 15) [quick]
|
|
65470
|
+
\u251C\u2500\u2500 Task 18: Deployment config B (depends: 15) [quick]
|
|
65471
|
+
\u251C\u2500\u2500 Task 19: Deployment config C (depends: 15) [quick]
|
|
65472
|
+
\u2514\u2500\u2500 Task 20: UI request log + build (depends: 16) [visual-engineering]
|
|
65473
|
+
|
|
65474
|
+
Wave 4 (After Wave 3 \u2014 verification):
|
|
65475
|
+
\u251C\u2500\u2500 Task 21: Integration tests (depends: 15) [deep]
|
|
65476
|
+
\u251C\u2500\u2500 Task 22: UI QA - Playwright (depends: 20) [unspecified-high]
|
|
65477
|
+
\u251C\u2500\u2500 Task 23: E2E QA (depends: 21) [deep]
|
|
65478
|
+
\u2514\u2500\u2500 Task 24: Git cleanup + tagging (depends: 21) [git]
|
|
65479
|
+
|
|
65480
|
+
Wave FINAL (After ALL tasks \u2014 independent review, 4 parallel):
|
|
65481
|
+
\u251C\u2500\u2500 Task F1: Plan compliance audit (oracle)
|
|
65482
|
+
\u251C\u2500\u2500 Task F2: Code quality review (unspecified-high)
|
|
65483
|
+
\u251C\u2500\u2500 Task F3: Real manual QA (unspecified-high)
|
|
65484
|
+
\u2514\u2500\u2500 Task F4: Scope fidelity check (deep)
|
|
65485
|
+
|
|
65486
|
+
Critical Path: Task 1 \u2192 Task 5 \u2192 Task 8 \u2192 Task 11 \u2192 Task 15 \u2192 Task 21 \u2192 F1-F4
|
|
65487
|
+
Parallel Speedup: ~70% faster than sequential
|
|
65488
|
+
Max Concurrent: 7 (Waves 1 & 2)
|
|
65433
65489
|
\`\`\`
|
|
65434
65490
|
|
|
65435
|
-
### Dependency Matrix
|
|
65491
|
+
### Dependency Matrix (abbreviated \u2014 show ALL tasks in your generated plan)
|
|
65436
65492
|
|
|
65437
|
-
| Task | Depends On | Blocks |
|
|
65438
|
-
|
|
65439
|
-
| 1 |
|
|
65440
|
-
|
|
|
65441
|
-
|
|
|
65442
|
-
|
|
|
65443
|
-
|
|
|
65444
|
-
|
|
|
65493
|
+
| Task | Depends On | Blocks | Wave |
|
|
65494
|
+
|------|------------|--------|------|
|
|
65495
|
+
| 1-7 | \u2014 | 8-14 | 1 |
|
|
65496
|
+
| 8 | 3, 5, 7 | 11, 15 | 2 |
|
|
65497
|
+
| 11 | 8 | 15 | 2 |
|
|
65498
|
+
| 14 | 5, 10 | 15 | 2 |
|
|
65499
|
+
| 15 | 6, 11, 14 | 17-19, 21 | 3 |
|
|
65500
|
+
| 21 | 15 | 23, 24 | 4 |
|
|
65501
|
+
|
|
65502
|
+
> This is abbreviated for reference. YOUR generated plan must include the FULL matrix for ALL tasks.
|
|
65445
65503
|
|
|
65446
65504
|
### Agent Dispatch Summary
|
|
65447
65505
|
|
|
65448
|
-
| Wave |
|
|
65449
|
-
|
|
65450
|
-
| 1 |
|
|
65451
|
-
| 2 |
|
|
65452
|
-
| 3 |
|
|
65506
|
+
| Wave | # Parallel | Tasks \u2192 Agent Category |
|
|
65507
|
+
|------|------------|----------------------|
|
|
65508
|
+
| 1 | **7** | T1-T4 \u2192 \`quick\`, T5 \u2192 \`quick\`, T6 \u2192 \`quick\`, T7 \u2192 \`quick\` |
|
|
65509
|
+
| 2 | **7** | T8 \u2192 \`deep\`, T9 \u2192 \`unspecified-high\`, T10 \u2192 \`unspecified-high\`, T11 \u2192 \`deep\`, T12 \u2192 \`visual-engineering\`, T13 \u2192 \`quick\`, T14 \u2192 \`unspecified-high\` |
|
|
65510
|
+
| 3 | **6** | T15 \u2192 \`deep\`, T16 \u2192 \`visual-engineering\`, T17-T19 \u2192 \`quick\`, T20 \u2192 \`visual-engineering\` |
|
|
65511
|
+
| 4 | **4** | T21 \u2192 \`deep\`, T22 \u2192 \`unspecified-high\`, T23 \u2192 \`deep\`, T24 \u2192 \`git\` |
|
|
65512
|
+
| FINAL | **4** | F1 \u2192 \`oracle\`, F2 \u2192 \`unspecified-high\`, F3 \u2192 \`unspecified-high\`, F4 \u2192 \`deep\` |
|
|
65453
65513
|
|
|
65454
65514
|
---
|
|
65455
65515
|
|
|
65456
65516
|
## TODOs
|
|
65457
65517
|
|
|
65458
65518
|
> Implementation + Test = ONE Task. Never separate.
|
|
65459
|
-
> EVERY task MUST have: Recommended Agent Profile + Parallelization info.
|
|
65519
|
+
> EVERY task MUST have: Recommended Agent Profile + Parallelization info + QA Scenarios.
|
|
65520
|
+
> **A task WITHOUT QA Scenarios is INCOMPLETE. No exceptions.**
|
|
65460
65521
|
|
|
65461
65522
|
- [ ] 1. [Task Title]
|
|
65462
65523
|
|
|
@@ -65490,22 +65551,15 @@ Parallel Speedup: ~40% faster than sequential
|
|
|
65490
65551
|
|
|
65491
65552
|
**Pattern References** (existing code to follow):
|
|
65492
65553
|
- \`src/services/auth.ts:45-78\` - Authentication flow pattern (JWT creation, refresh token handling)
|
|
65493
|
-
- \`src/hooks/useForm.ts:12-34\` - Form validation pattern (Zod schema + react-hook-form integration)
|
|
65494
65554
|
|
|
65495
65555
|
**API/Type References** (contracts to implement against):
|
|
65496
65556
|
- \`src/types/user.ts:UserDTO\` - Response shape for user endpoints
|
|
65497
|
-
- \`src/api/schema.ts:createUserSchema\` - Request validation schema
|
|
65498
65557
|
|
|
65499
65558
|
**Test References** (testing patterns to follow):
|
|
65500
65559
|
- \`src/__tests__/auth.test.ts:describe("login")\` - Test structure and mocking patterns
|
|
65501
65560
|
|
|
65502
|
-
**Documentation References** (specs and requirements):
|
|
65503
|
-
- \`docs/api-spec.md#authentication\` - API contract details
|
|
65504
|
-
- \`ARCHITECTURE.md:Database Layer\` - Database access patterns
|
|
65505
|
-
|
|
65506
65561
|
**External References** (libraries and frameworks):
|
|
65507
65562
|
- Official docs: \`https://zod.dev/?id=basic-usage\` - Zod validation syntax
|
|
65508
|
-
- Example repo: \`github.com/example/project/src/auth\` - Reference implementation
|
|
65509
65563
|
|
|
65510
65564
|
**WHY Each Reference Matters** (explain the relevance):
|
|
65511
65565
|
- Don't just list files - explain what pattern/information the executor should extract
|
|
@@ -65516,113 +65570,60 @@ Parallel Speedup: ~40% faster than sequential
|
|
|
65516
65570
|
|
|
65517
65571
|
> **AGENT-EXECUTABLE VERIFICATION ONLY** \u2014 No human action permitted.
|
|
65518
65572
|
> Every criterion MUST be verifiable by running a command or using a tool.
|
|
65519
|
-
> REPLACE all placeholders with actual values from task context.
|
|
65520
65573
|
|
|
65521
65574
|
**If TDD (tests enabled):**
|
|
65522
65575
|
- [ ] Test file created: src/auth/login.test.ts
|
|
65523
|
-
- [ ] Test covers: successful login returns JWT token
|
|
65524
65576
|
- [ ] bun test src/auth/login.test.ts \u2192 PASS (3 tests, 0 failures)
|
|
65525
65577
|
|
|
65526
|
-
**
|
|
65527
|
-
|
|
65528
|
-
> Write MULTIPLE named scenarios per task: happy path AND failure cases.
|
|
65529
|
-
> Each scenario = exact tool + steps with real selectors/data + evidence path.
|
|
65530
|
-
|
|
65531
|
-
**Example \u2014 Frontend/UI (Playwright):**
|
|
65578
|
+
**QA Scenarios (MANDATORY \u2014 task is INCOMPLETE without these):**
|
|
65532
65579
|
|
|
65533
|
-
|
|
65534
|
-
|
|
65535
|
-
|
|
65536
|
-
|
|
65537
|
-
|
|
65538
|
-
|
|
65539
|
-
|
|
65540
|
-
|
|
65541
|
-
4. Fill: input[name="password"] \u2192 "ValidPass123!"
|
|
65542
|
-
5. Click: button[type="submit"]
|
|
65543
|
-
6. Wait for: navigation to /dashboard (timeout: 10s)
|
|
65544
|
-
7. Assert: h1 text contains "Welcome back"
|
|
65545
|
-
8. Assert: cookie "session_token" exists
|
|
65546
|
-
9. Screenshot: .sisyphus/evidence/task-1-login-success.png
|
|
65547
|
-
Expected Result: Dashboard loads with welcome message
|
|
65548
|
-
Evidence: .sisyphus/evidence/task-1-login-success.png
|
|
65549
|
-
|
|
65550
|
-
Scenario: Login fails with invalid credentials
|
|
65551
|
-
Tool: Playwright (playwright skill)
|
|
65552
|
-
Preconditions: Dev server running, no valid user with these credentials
|
|
65553
|
-
Steps:
|
|
65554
|
-
1. Navigate to: http://localhost:3000/login
|
|
65555
|
-
2. Fill: input[name="email"] \u2192 "wrong@example.com"
|
|
65556
|
-
3. Fill: input[name="password"] \u2192 "WrongPass"
|
|
65557
|
-
4. Click: button[type="submit"]
|
|
65558
|
-
5. Wait for: .error-message visible (timeout: 5s)
|
|
65559
|
-
6. Assert: .error-message text contains "Invalid credentials"
|
|
65560
|
-
7. Assert: URL is still /login (no redirect)
|
|
65561
|
-
8. Screenshot: .sisyphus/evidence/task-1-login-failure.png
|
|
65562
|
-
Expected Result: Error message shown, stays on login page
|
|
65563
|
-
Evidence: .sisyphus/evidence/task-1-login-failure.png
|
|
65564
|
-
\\\`\\\`\\\`
|
|
65565
|
-
|
|
65566
|
-
**Example \u2014 API/Backend (curl):**
|
|
65580
|
+
> **This is NOT optional. A task without QA scenarios WILL BE REJECTED.**
|
|
65581
|
+
>
|
|
65582
|
+
> Write scenario tests that verify the ACTUAL BEHAVIOR of what you built.
|
|
65583
|
+
> Minimum: 1 happy path + 1 failure/edge case per task.
|
|
65584
|
+
> Each scenario = exact tool + exact steps + exact assertions + evidence path.
|
|
65585
|
+
>
|
|
65586
|
+
> **The executing agent MUST run these scenarios after implementation.**
|
|
65587
|
+
> **The orchestrator WILL verify evidence files exist before marking task complete.**
|
|
65567
65588
|
|
|
65568
65589
|
\\\`\\\`\\\`
|
|
65569
|
-
Scenario:
|
|
65570
|
-
Tool: Bash (curl)
|
|
65571
|
-
Preconditions:
|
|
65590
|
+
Scenario: [Happy path \u2014 what SHOULD work]
|
|
65591
|
+
Tool: [Playwright / interactive_bash / Bash (curl)]
|
|
65592
|
+
Preconditions: [Exact setup state]
|
|
65572
65593
|
Steps:
|
|
65573
|
-
1.
|
|
65574
|
-
|
|
65575
|
-
|
|
65576
|
-
|
|
65577
|
-
|
|
65578
|
-
|
|
65579
|
-
|
|
65580
|
-
|
|
65581
|
-
|
|
65582
|
-
|
|
65583
|
-
Tool: Bash (curl)
|
|
65584
|
-
Preconditions: User with email "new@test.com" already exists
|
|
65594
|
+
1. [Exact action \u2014 specific command/selector/endpoint, no vagueness]
|
|
65595
|
+
2. [Next action \u2014 with expected intermediate state]
|
|
65596
|
+
3. [Assertion \u2014 exact expected value, not "verify it works"]
|
|
65597
|
+
Expected Result: [Concrete, observable, binary pass/fail]
|
|
65598
|
+
Failure Indicators: [What specifically would mean this failed]
|
|
65599
|
+
Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}.{ext}
|
|
65600
|
+
|
|
65601
|
+
Scenario: [Failure/edge case \u2014 what SHOULD fail gracefully]
|
|
65602
|
+
Tool: [same format]
|
|
65603
|
+
Preconditions: [Invalid input / missing dependency / error state]
|
|
65585
65604
|
Steps:
|
|
65586
|
-
1.
|
|
65587
|
-
2. Assert
|
|
65588
|
-
|
|
65589
|
-
|
|
65590
|
-
Evidence: Response body captured
|
|
65605
|
+
1. [Trigger the error condition]
|
|
65606
|
+
2. [Assert error is handled correctly]
|
|
65607
|
+
Expected Result: [Graceful failure with correct error message/code]
|
|
65608
|
+
Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}-error.{ext}
|
|
65591
65609
|
\\\`\\\`\\\`
|
|
65592
65610
|
|
|
65593
|
-
**
|
|
65594
|
-
|
|
65595
|
-
|
|
65596
|
-
|
|
65597
|
-
|
|
65598
|
-
|
|
65599
|
-
|
|
65600
|
-
|
|
65601
|
-
|
|
65602
|
-
|
|
65603
|
-
|
|
65604
|
-
|
|
65605
|
-
6. Assert: Process exited with code 0
|
|
65606
|
-
Expected Result: CLI starts, shows menu, exits cleanly
|
|
65607
|
-
Evidence: Terminal output captured
|
|
65608
|
-
|
|
65609
|
-
Scenario: CLI handles missing config gracefully
|
|
65610
|
-
Tool: interactive_bash (tmux)
|
|
65611
|
-
Preconditions: No config file at ./nonexistent.yaml
|
|
65612
|
-
Steps:
|
|
65613
|
-
1. tmux new-session: ./my-cli --config nonexistent.yaml
|
|
65614
|
-
2. Wait for: output (timeout: 3s)
|
|
65615
|
-
3. Assert: stderr contains "Config file not found"
|
|
65616
|
-
4. Assert: Process exited with code 1
|
|
65617
|
-
Expected Result: Meaningful error, non-zero exit
|
|
65618
|
-
Evidence: Error output captured
|
|
65619
|
-
\\\`\\\`\\\`
|
|
65611
|
+
> **Specificity requirements \u2014 every scenario MUST use:**
|
|
65612
|
+
> - **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
|
|
65613
|
+
> - **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
|
|
65614
|
+
> - **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
|
|
65615
|
+
> - **Timing**: Wait conditions where relevant (\`timeout: 10s\`)
|
|
65616
|
+
> - **Negative**: At least ONE failure/error scenario per task
|
|
65617
|
+
>
|
|
65618
|
+
> **Anti-patterns (your scenario is INVALID if it looks like this):**
|
|
65619
|
+
> - \u274C "Verify it works correctly" \u2014 HOW? What does "correctly" mean?
|
|
65620
|
+
> - \u274C "Check the API returns data" \u2014 WHAT data? What fields? What values?
|
|
65621
|
+
> - \u274C "Test the component renders" \u2014 WHERE? What selector? What content?
|
|
65622
|
+
> - \u274C Any scenario without an evidence path
|
|
65620
65623
|
|
|
65621
65624
|
**Evidence to Capture:**
|
|
65622
|
-
- [ ] Screenshots in .sisyphus/evidence/ for UI scenarios
|
|
65623
|
-
- [ ] Terminal output for CLI/TUI scenarios
|
|
65624
|
-
- [ ] Response bodies for API scenarios
|
|
65625
65625
|
- [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}
|
|
65626
|
+
- [ ] Screenshots for UI, terminal output for CLI, response bodies for API
|
|
65626
65627
|
|
|
65627
65628
|
**Commit**: YES | NO (groups with N)
|
|
65628
65629
|
- Message: \`type(scope): desc\`
|
|
@@ -65631,6 +65632,28 @@ Parallel Speedup: ~40% faster than sequential
|
|
|
65631
65632
|
|
|
65632
65633
|
---
|
|
65633
65634
|
|
|
65635
|
+
## Final Verification Wave (MANDATORY \u2014 after ALL implementation tasks)
|
|
65636
|
+
|
|
65637
|
+
> 4 review agents run in PARALLEL. ALL must APPROVE. Rejection \u2192 fix \u2192 re-run.
|
|
65638
|
+
|
|
65639
|
+
- [ ] F1. **Plan Compliance Audit** \u2014 \`oracle\`
|
|
65640
|
+
Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns \u2014 reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
|
|
65641
|
+
Output: \`Must Have [N/N] | Must NOT Have [N/N] | Tasks [N/N] | VERDICT: APPROVE/REJECT\`
|
|
65642
|
+
|
|
65643
|
+
- [ ] F2. **Code Quality Review** \u2014 \`unspecified-high\`
|
|
65644
|
+
Run \`tsc --noEmit\` + linter + \`bun test\`. Review all changed files for: \`as any\`/\`@ts-ignore\`, empty catches, console.log in prod, commented-out code, unused imports. Check AI slop: excessive comments, over-abstraction, generic names (data/result/item/temp).
|
|
65645
|
+
Output: \`Build [PASS/FAIL] | Lint [PASS/FAIL] | Tests [N pass/N fail] | Files [N clean/N issues] | VERDICT\`
|
|
65646
|
+
|
|
65647
|
+
- [ ] F3. **Real Manual QA** \u2014 \`unspecified-high\` (+ \`playwright\` skill if UI)
|
|
65648
|
+
Start from clean state. Execute EVERY QA scenario from EVERY task \u2014 follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
|
|
65649
|
+
Output: \`Scenarios [N/N pass] | Integration [N/N] | Edge Cases [N tested] | VERDICT\`
|
|
65650
|
+
|
|
65651
|
+
- [ ] F4. **Scope Fidelity Check** \u2014 \`deep\`
|
|
65652
|
+
For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 \u2014 everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
|
|
65653
|
+
Output: \`Tasks [N/N compliant] | Contamination [CLEAN/N issues] | Unaccounted [CLEAN/N files] | VERDICT\`
|
|
65654
|
+
|
|
65655
|
+
---
|
|
65656
|
+
|
|
65634
65657
|
## Commit Strategy
|
|
65635
65658
|
|
|
65636
65659
|
| After Task | Message | Files | Verification |
|
|
@@ -67540,9 +67563,11 @@ function createChatMessageHandler2(args) {
|
|
|
67540
67563
|
}
|
|
67541
67564
|
const message = output.message;
|
|
67542
67565
|
if (firstMessageVariantGate.shouldOverride(input.sessionID)) {
|
|
67543
|
-
|
|
67544
|
-
|
|
67545
|
-
|
|
67566
|
+
if (message["variant"] === undefined) {
|
|
67567
|
+
const variant = input.model && input.agent ? resolveVariantForModel(pluginConfig, input.agent, input.model) : resolveAgentVariant(pluginConfig, input.agent);
|
|
67568
|
+
if (variant !== undefined) {
|
|
67569
|
+
message["variant"] = variant;
|
|
67570
|
+
}
|
|
67546
67571
|
}
|
|
67547
67572
|
firstMessageVariantGate.markApplied(input.sessionID);
|
|
67548
67573
|
} else {
|