agent-conveyor 0.1.13 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -11
- package/dist/cli/typescript-runtime.js +554 -12
- package/dist/cli/typescript-runtime.js.map +1 -1
- package/dist/runtime/app-autonomy.d.ts +1 -0
- package/dist/runtime/app-autonomy.js +16 -0
- package/dist/runtime/app-autonomy.js.map +1 -1
- package/dist/runtime/manager-permissions.js +1 -1
- package/dist/runtime/manager-permissions.js.map +1 -1
- package/docs/manager-recipes.md +85 -0
- package/package.json +1 -1
- package/skills/manage-codex-workers/SKILL.md +76 -8
|
@@ -5,7 +5,7 @@ import { homedir, tmpdir } from "node:os";
|
|
|
5
5
|
import { dirname, join, relative, resolve } from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { taskAuditSync } from "../runtime/audit.js";
|
|
8
|
-
import { appAutopilotPlanSync, appLoopStatusSync, appWakeupDispatchPlanSync, appWakeupPlanSync, directInboxPollCommand, } from "../runtime/app-autonomy.js";
|
|
8
|
+
import { appAutopilotPlanSync, appLoopStatusSync, appWakeupDispatchPlanSync, appWakeupPlanSync, directInboxPollCommand, visibleSessionProtocolLines, } from "../runtime/app-autonomy.js";
|
|
9
9
|
import { classifyBusyWait, classifyStartupOutput } from "../runtime/classify.js";
|
|
10
10
|
import { exportTaskSync } from "../runtime/export.js";
|
|
11
11
|
import { ingestSessionSync } from "../runtime/ingest.js";
|
|
@@ -368,10 +368,10 @@ function commandHelpText(program, command) {
|
|
|
368
368
|
` ${program} criteria my-task --satisfy 1 --proof "File exists" --evidence-json '{"artifact":{"path":"docs/note.md"}}' --path /tmp/work/workerctl.db`,
|
|
369
369
|
],
|
|
370
370
|
"finish-task": [
|
|
371
|
-
`usage: ${program} finish-task <task> --reason <reason> [--require-criteria-audit] ${path}`,
|
|
371
|
+
`usage: ${program} finish-task <task> --reason <reason> [--require-criteria-audit] ${path} [--json]`,
|
|
372
372
|
"",
|
|
373
373
|
"Examples:",
|
|
374
|
-
` ${program} finish-task my-task --reason "Accepted criteria satisfied" --require-criteria-audit --path /tmp/work/workerctl.db`,
|
|
374
|
+
` ${program} finish-task my-task --reason "Accepted criteria satisfied" --require-criteria-audit --path /tmp/work/workerctl.db --json`,
|
|
375
375
|
],
|
|
376
376
|
"manager-ack": [
|
|
377
377
|
`usage: ${program} manager-ack <task> --from-stdin ${path}`,
|
|
@@ -2854,7 +2854,7 @@ function parseRuntimeArgs(args, env) {
|
|
|
2854
2854
|
}
|
|
2855
2855
|
}
|
|
2856
2856
|
else if (command === "loop-evidence" && flags.subtype === null) {
|
|
2857
|
-
if (!["add", "visual-diff", "visual_diff", "adversarial-check", "adversarial_check"].includes(arg)) {
|
|
2857
|
+
if (!["add", "visual-diff", "visual_diff", "build-passed", "build_passed", "adversarial-check", "adversarial_check"].includes(arg)) {
|
|
2858
2858
|
return { command, enabled, error: `Unsupported loop-evidence action: ${arg}`, explicit, flags, task };
|
|
2859
2859
|
}
|
|
2860
2860
|
flags.subtype = arg;
|
|
@@ -3164,7 +3164,7 @@ function runLoopEvidenceCommand(parsed, options) {
|
|
|
3164
3164
|
}
|
|
3165
3165
|
const action = parsed.flags.subtype;
|
|
3166
3166
|
if (!action) {
|
|
3167
|
-
return unsupportedRuntimeResult(parsed, "loop-evidence requires an action: add, visual-diff, or adversarial-check.");
|
|
3167
|
+
return unsupportedRuntimeResult(parsed, "loop-evidence requires an action: add, visual-diff, build-passed, or adversarial-check.");
|
|
3168
3168
|
}
|
|
3169
3169
|
const task = requireTask(parsed);
|
|
3170
3170
|
if (!parsed.flags.loopRun) {
|
|
@@ -3191,6 +3191,24 @@ function runLoopEvidenceCommand(parsed, options) {
|
|
|
3191
3191
|
});
|
|
3192
3192
|
return jsonResult(result);
|
|
3193
3193
|
}
|
|
3194
|
+
if (action === "build-passed" || action === "build_passed") {
|
|
3195
|
+
if (parsed.flags.evidenceType && parsed.flags.evidenceType !== "build_passed") {
|
|
3196
|
+
return errorResult("loop-evidence build-passed records evidence_type=build_passed; omit --evidence-type or use build_passed.");
|
|
3197
|
+
}
|
|
3198
|
+
const result = recordLoopEvidenceSync(database, {
|
|
3199
|
+
artifactPath: parsed.flags.output,
|
|
3200
|
+
correlationId: parsed.flags.correlationId,
|
|
3201
|
+
evidenceType: "build_passed",
|
|
3202
|
+
iteration: parsed.flags.currentIteration,
|
|
3203
|
+
loopRunId: parsed.flags.loopRun,
|
|
3204
|
+
metadata: jsonObjectArg(parsed.flags.metadataJson, "--metadata-json"),
|
|
3205
|
+
proof: parsed.flags.proof,
|
|
3206
|
+
source,
|
|
3207
|
+
status: parsed.flags.statusState ?? "pass",
|
|
3208
|
+
task,
|
|
3209
|
+
});
|
|
3210
|
+
return jsonResult(result);
|
|
3211
|
+
}
|
|
3194
3212
|
if (action === "adversarial-check" || action === "adversarial_check") {
|
|
3195
3213
|
const result = recordAdversarialLoopEvidenceSync(database, {
|
|
3196
3214
|
artifactPath: parsed.flags.output,
|
|
@@ -4049,6 +4067,7 @@ const QA_PLAN_SCENARIOS = new Set([
|
|
|
4049
4067
|
"tmux-errors",
|
|
4050
4068
|
"dispatch-completion",
|
|
4051
4069
|
"ralph-loop",
|
|
4070
|
+
"ship-it-loop",
|
|
4052
4071
|
"adversarial-triggers",
|
|
4053
4072
|
"goalbuddy-conveyor",
|
|
4054
4073
|
]);
|
|
@@ -4182,6 +4201,52 @@ function qaPlan(scenario) {
|
|
|
4182
4201
|
],
|
|
4183
4202
|
};
|
|
4184
4203
|
}
|
|
4204
|
+
if (scenario === "ship-it-loop") {
|
|
4205
|
+
return {
|
|
4206
|
+
authority_boundaries: [
|
|
4207
|
+
"Do not push a branch before repo.push_branch is permitted.",
|
|
4208
|
+
"Do not open or update a PR before repo.open_pr is permitted.",
|
|
4209
|
+
"Do not treat CI monitoring as CI truth; record explicit ci_green evidence.",
|
|
4210
|
+
"Do not resolve conflicts without a bounded manager instruction and retry limit.",
|
|
4211
|
+
"Do not merge before repo.merge_green_pr, ci_green, mergeability, manager_merge_decision, merge, post_merge_verification, and adversarial_check evidence exist.",
|
|
4212
|
+
],
|
|
4213
|
+
correlation_markers: [
|
|
4214
|
+
{ correlation_id: "ship-it-push-permission", purpose: "push branch permission gate" },
|
|
4215
|
+
{ correlation_id: "ship-it-open-pr-permission", purpose: "open PR permission gate" },
|
|
4216
|
+
{ correlation_id: "ship-it-merge-permission", purpose: "merge permission gate" },
|
|
4217
|
+
{ correlation_id: "ship-it-missing-evidence", purpose: "missing lifecycle evidence block" },
|
|
4218
|
+
{ correlation_id: "ship-it-conflict-block", purpose: "conflict retry limit proof" },
|
|
4219
|
+
{ correlation_id: "ship-it-allowed-closeout", purpose: "allowed closeout after all lifecycle evidence" },
|
|
4220
|
+
],
|
|
4221
|
+
evidence_template: {
|
|
4222
|
+
branch_ready: { branch: "<branch>", commit_sha: "<sha>" },
|
|
4223
|
+
branch_pushed: { remote: "origin", branch: "<branch>" },
|
|
4224
|
+
pr_url: { url: "<pull request URL>" },
|
|
4225
|
+
ci_green: { command: "gh pr checks --required", status: "green" },
|
|
4226
|
+
mergeability_clean: { conflicts: false, mergeable_state: "clean" },
|
|
4227
|
+
manager_merge_decision: { decision: "merge_ready", manager_verified: true },
|
|
4228
|
+
merge: { merge_sha: "<sha>" },
|
|
4229
|
+
post_merge_verification: { command: "<post-merge check>", status: "pass" },
|
|
4230
|
+
adversarial_check: { failure_mode: "<risk>", check: "<proof>", result: "<outcome>" },
|
|
4231
|
+
},
|
|
4232
|
+
expected_observations: [
|
|
4233
|
+
"push, PR creation, and merge commands fail closed until their manager permissions are granted",
|
|
4234
|
+
"missing lifecycle evidence blocks a continue_iteration before worker delivery",
|
|
4235
|
+
"unresolved conflicts are represented as bounded blockers, not hidden behind CI green",
|
|
4236
|
+
"a fresh retry delivers only after branch, PR, CI, mergeability, manager decision, merge, post-merge, and adversarial evidence exists",
|
|
4237
|
+
"the recipe and prompts keep merge readiness as a manager decision, not a worker claim",
|
|
4238
|
+
],
|
|
4239
|
+
scenario,
|
|
4240
|
+
steps: [
|
|
4241
|
+
"Create a disposable no-tmux task with the ship_it_loop template.",
|
|
4242
|
+
"Run the permission-gate checks for repo.push_branch, repo.open_pr, and repo.merge_green_pr.",
|
|
4243
|
+
"Attempt a lifecycle continuation before evidence and verify missing evidence blocks before worker delivery.",
|
|
4244
|
+
"Record partial PR/CI evidence and verify mergeability/manager-decision/merge/post-merge proof is still required.",
|
|
4245
|
+
"Record conflict retry-limit evidence as blocked when unresolved.",
|
|
4246
|
+
"Record all lifecycle receipts plus structured adversarial proof and verify a fresh retry reaches the worker inbox.",
|
|
4247
|
+
],
|
|
4248
|
+
};
|
|
4249
|
+
}
|
|
4185
4250
|
if (scenario === "adversarial-triggers") {
|
|
4186
4251
|
return {
|
|
4187
4252
|
correlation_markers: [
|
|
@@ -4282,6 +4347,7 @@ const SUPPORTED_QA_RUN_SCENARIOS = new Set([
|
|
|
4282
4347
|
"generic-loop-template",
|
|
4283
4348
|
"generic-loop-template-browser",
|
|
4284
4349
|
"ralph-loop-guardrails",
|
|
4350
|
+
"ship-it-loop",
|
|
4285
4351
|
"test-coverage-loop",
|
|
4286
4352
|
]);
|
|
4287
4353
|
function isSupportedQaRunScenario(scenario) {
|
|
@@ -4324,6 +4390,9 @@ function runQaScenario(scenario, context) {
|
|
|
4324
4390
|
if (scenario === "build-clear-loop") {
|
|
4325
4391
|
return qaRunBuildClearLoop(context);
|
|
4326
4392
|
}
|
|
4393
|
+
if (scenario === "ship-it-loop") {
|
|
4394
|
+
return qaRunShipItLoop(context);
|
|
4395
|
+
}
|
|
4327
4396
|
if (scenario === "adversarial-triggers") {
|
|
4328
4397
|
return qaRunAdversarialTriggers(context);
|
|
4329
4398
|
}
|
|
@@ -4669,6 +4738,166 @@ function qaRunBuildClearLoop(context) {
|
|
|
4669
4738
|
template_metadata: templateMetadata,
|
|
4670
4739
|
};
|
|
4671
4740
|
}
|
|
4741
|
+
function qaRunShipItLoop(context) {
|
|
4742
|
+
const slug = randomUUID().slice(0, 8);
|
|
4743
|
+
const checks = [];
|
|
4744
|
+
const generatedTasks = [];
|
|
4745
|
+
const pushTask = createQaBoundTask(context, slug, "ship-it-push-permission");
|
|
4746
|
+
generatedTasks.push(generatedTask(pushTask, "ship-it-push-permission"));
|
|
4747
|
+
checks.push(qaRunPermissionGate(context, pushTask, {
|
|
4748
|
+
checkName: "ship_it_push_branch_requires_repo_push_branch",
|
|
4749
|
+
correlationId: "ship-it-push-permission-denied",
|
|
4750
|
+
message: "Push branch origin/codex/ship-it-loop.",
|
|
4751
|
+
permission: "repo.push_branch",
|
|
4752
|
+
}));
|
|
4753
|
+
qaConfigureManagerPermissions(context, pushTask, ["repo.push_branch"]);
|
|
4754
|
+
checks.push(qaRunPermissionGate(context, pushTask, {
|
|
4755
|
+
checkName: "ship_it_push_branch_delivers_after_permission",
|
|
4756
|
+
correlationId: "ship-it-push-permission-allowed",
|
|
4757
|
+
expectAllowed: true,
|
|
4758
|
+
message: "Push branch origin/codex/ship-it-loop after manager permission.",
|
|
4759
|
+
permission: "repo.push_branch",
|
|
4760
|
+
}));
|
|
4761
|
+
const prTask = createQaBoundTask(context, slug, "ship-it-open-pr-permission");
|
|
4762
|
+
generatedTasks.push(generatedTask(prTask, "ship-it-open-pr-permission"));
|
|
4763
|
+
checks.push(qaRunPermissionGate(context, prTask, {
|
|
4764
|
+
checkName: "ship_it_open_pr_requires_repo_open_pr",
|
|
4765
|
+
correlationId: "ship-it-open-pr-permission-denied",
|
|
4766
|
+
message: "Open PR for ship-it loop.",
|
|
4767
|
+
permission: "repo.open_pr",
|
|
4768
|
+
}));
|
|
4769
|
+
qaConfigureManagerPermissions(context, prTask, ["repo.open_pr"]);
|
|
4770
|
+
checks.push(qaRunPermissionGate(context, prTask, {
|
|
4771
|
+
checkName: "ship_it_open_pr_delivers_after_permission",
|
|
4772
|
+
correlationId: "ship-it-open-pr-permission-allowed",
|
|
4773
|
+
expectAllowed: true,
|
|
4774
|
+
message: "Open PR for ship-it loop after manager permission.",
|
|
4775
|
+
permission: "repo.open_pr",
|
|
4776
|
+
}));
|
|
4777
|
+
const mergeTask = createQaBoundTask(context, slug, "ship-it-merge-permission");
|
|
4778
|
+
generatedTasks.push(generatedTask(mergeTask, "ship-it-merge-permission"));
|
|
4779
|
+
checks.push(qaRunPermissionGate(context, mergeTask, {
|
|
4780
|
+
checkName: "ship_it_merge_requires_repo_merge_green_pr",
|
|
4781
|
+
correlationId: "ship-it-merge-permission-denied",
|
|
4782
|
+
message: "Merge PR after verified closeout.",
|
|
4783
|
+
permission: "repo.merge_green_pr",
|
|
4784
|
+
}));
|
|
4785
|
+
qaConfigureManagerPermissions(context, mergeTask, ["repo.merge_green_pr"]);
|
|
4786
|
+
checks.push(qaRunPermissionGate(context, mergeTask, {
|
|
4787
|
+
checkName: "ship_it_merge_delivers_after_permission",
|
|
4788
|
+
correlationId: "ship-it-merge-permission-allowed",
|
|
4789
|
+
expectAllowed: true,
|
|
4790
|
+
message: "Merge PR after verified closeout and manager permission.",
|
|
4791
|
+
permission: "repo.merge_green_pr",
|
|
4792
|
+
}));
|
|
4793
|
+
const lifecycleTask = createQaBoundTask(context, slug, "ship-it-lifecycle");
|
|
4794
|
+
generatedTasks.push(generatedTask(lifecycleTask, "ship-it-lifecycle"));
|
|
4795
|
+
const templateMetadata = loopTemplateMetadata("ship_it_loop", {
|
|
4796
|
+
currentIteration: 1,
|
|
4797
|
+
maxIterations: 2,
|
|
4798
|
+
seedPromptSha256: "qa-run-ship-it-seed",
|
|
4799
|
+
});
|
|
4800
|
+
const run = createQaRalphLoopRun(context, lifecycleTask, {
|
|
4801
|
+
currentIteration: 1,
|
|
4802
|
+
maxIterations: 2,
|
|
4803
|
+
metadata: templateMetadata,
|
|
4804
|
+
preset: "ship_it_loop",
|
|
4805
|
+
requiredBeforeContinue: asStringArray(templateMetadata.required_before_continue),
|
|
4806
|
+
seedPromptSha256: "qa-run-ship-it-seed",
|
|
4807
|
+
stopConditions: asStringArray(templateMetadata.stop_conditions),
|
|
4808
|
+
});
|
|
4809
|
+
enqueueQaContinue(context, lifecycleTask, run.id, "ship-it-missing-evidence", "Run ship-it continuation before lifecycle evidence.");
|
|
4810
|
+
const missing = qaDispatchContinueOnce(context, "ship-it-missing-evidence");
|
|
4811
|
+
const missingCounts = qaDeliveryCounts(context, lifecycleTask);
|
|
4812
|
+
qaExpectBlocked(missing, missingCounts, {
|
|
4813
|
+
message: "ship_it_loop missing lifecycle evidence",
|
|
4814
|
+
missingEvidence: asStringArray(templateMetadata.required_before_continue),
|
|
4815
|
+
reason: "missing_required_evidence",
|
|
4816
|
+
});
|
|
4817
|
+
checks.push(qaCheck("ship_it_lifecycle_blocks_before_any_evidence", missing, missingCounts));
|
|
4818
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "branch_ready", "ship-it-branch-ready", {
|
|
4819
|
+
metadata: { branch: "codex/ship-it-loop", commit_sha: "1111111111111111111111111111111111111111" },
|
|
4820
|
+
});
|
|
4821
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "branch_pushed", "ship-it-branch-pushed", {
|
|
4822
|
+
metadata: { branch: "codex/ship-it-loop", remote: "origin" },
|
|
4823
|
+
});
|
|
4824
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "pr_url", "ship-it-pr-url", {
|
|
4825
|
+
metadata: { url: "https://github.example.test/acme/repo/pull/42" },
|
|
4826
|
+
});
|
|
4827
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "ci_green", "ship-it-ci-green", {
|
|
4828
|
+
metadata: { command: "gh pr checks 42 --required", status: "green" },
|
|
4829
|
+
status: "green",
|
|
4830
|
+
});
|
|
4831
|
+
enqueueQaContinue(context, lifecycleTask, run.id, "ship-it-partial-evidence", "Run ship-it continuation after PR and CI but before merge readiness.");
|
|
4832
|
+
const partial = qaDispatchContinueOnce(context, "ship-it-partial-evidence");
|
|
4833
|
+
const partialCounts = qaDeliveryCounts(context, lifecycleTask);
|
|
4834
|
+
qaExpectBlocked(partial, partialCounts, {
|
|
4835
|
+
message: "ship_it_loop partial lifecycle evidence",
|
|
4836
|
+
missingEvidence: ["mergeability_clean", "manager_merge_decision", "merge", "post_merge_verification", "adversarial_check"],
|
|
4837
|
+
reason: "missing_required_evidence",
|
|
4838
|
+
});
|
|
4839
|
+
checks.push(qaCheck("ship_it_lifecycle_blocks_before_mergeability_and_manager_decision", partial, partialCounts));
|
|
4840
|
+
const artifactDir = qaArtifactDir(context, "ship-it-loop", slug, run.id);
|
|
4841
|
+
const conflictReceipt = join(artifactDir, "conflict-blocked.json");
|
|
4842
|
+
mkdirSync(dirname(conflictReceipt), { recursive: true });
|
|
4843
|
+
const conflictPayload = {
|
|
4844
|
+
conflict_state: "unresolved",
|
|
4845
|
+
max_retries: 2,
|
|
4846
|
+
retry_count: 2,
|
|
4847
|
+
status: "blocked",
|
|
4848
|
+
stop_reason: "conflict_retry_limit_reached",
|
|
4849
|
+
};
|
|
4850
|
+
writeFileSync(conflictReceipt, `${JSON.stringify(sortJson(conflictPayload), null, 2)}\n`);
|
|
4851
|
+
checks.push({
|
|
4852
|
+
artifact_path: conflictReceipt,
|
|
4853
|
+
conflict: conflictPayload,
|
|
4854
|
+
name: "ship_it_conflict_retry_blocks_after_limit",
|
|
4855
|
+
status: "passed",
|
|
4856
|
+
});
|
|
4857
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "mergeability_clean", "ship-it-mergeability-clean", {
|
|
4858
|
+
metadata: { conflicts: false, mergeable_state: "clean" },
|
|
4859
|
+
});
|
|
4860
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "manager_merge_decision", "ship-it-manager-merge-decision", {
|
|
4861
|
+
metadata: { decision: "merge_ready", manager_verified: true },
|
|
4862
|
+
});
|
|
4863
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "merge", "ship-it-merge", {
|
|
4864
|
+
metadata: { merge_sha: "2222222222222222222222222222222222222222" },
|
|
4865
|
+
});
|
|
4866
|
+
qaRecordLoopEvidence(context, lifecycleTask, run.id, "post_merge_verification", "ship-it-post-merge-verification", {
|
|
4867
|
+
metadata: { command: "git rev-parse HEAD && npm test -- --runInBand", status: "pass" },
|
|
4868
|
+
});
|
|
4869
|
+
qaRecordAdversarialEvidence(context, lifecycleTask, run.id, "ship-it-adversarial-proof", {
|
|
4870
|
+
check: "Inspect permission denials, missing-evidence blocks, conflict retry receipt, and final evidence set.",
|
|
4871
|
+
failure_mode: "A ship-it loop could merge after CI green while conflicts, manager decision, or post-merge proof are missing.",
|
|
4872
|
+
result: "Dispatch stayed blocked until mergeability, manager decision, merge, post-merge, and adversarial receipts were present.",
|
|
4873
|
+
});
|
|
4874
|
+
enqueueQaContinue(context, lifecycleTask, run.id, "ship-it-allowed-closeout", "Run ship-it continuation after all lifecycle evidence.");
|
|
4875
|
+
const allowed = qaDispatchContinueOnce(context, "ship-it-allowed-closeout");
|
|
4876
|
+
const allowedCounts = qaDeliveryCounts(context, lifecycleTask);
|
|
4877
|
+
qaExpectDelivered(allowed, allowedCounts, "ship_it_loop allowed closeout");
|
|
4878
|
+
checks.push(qaCheck("ship_it_lifecycle_retry_delivers_after_all_evidence", allowed, allowedCounts));
|
|
4879
|
+
return {
|
|
4880
|
+
artifacts: { conflict_receipt: conflictReceipt, db_path: context.dbPath },
|
|
4881
|
+
checks,
|
|
4882
|
+
generated_at: new Date().toISOString(),
|
|
4883
|
+
generated_tasks: generatedTasks,
|
|
4884
|
+
replay_commands: [
|
|
4885
|
+
"conveyor loop-templates --show ship_it_loop --json",
|
|
4886
|
+
"conveyor manager-recipes --show ship-it-loop --json",
|
|
4887
|
+
"conveyor manager-permission <task> repo.push_branch --require",
|
|
4888
|
+
"conveyor manager-permission <task> repo.open_pr --require",
|
|
4889
|
+
"conveyor manager-permission <task> repo.merge_green_pr --require",
|
|
4890
|
+
"conveyor loop-evidence add <task> --loop-run <run-id> --iteration 1 --evidence-type branch_ready",
|
|
4891
|
+
"conveyor loop-evidence add <task> --loop-run <run-id> --iteration 1 --evidence-type ci_green",
|
|
4892
|
+
"conveyor loop-evidence adversarial-check <task> --loop-run <run-id> --iteration 1 --failure-mode <failure> --check <check> --result <result>",
|
|
4893
|
+
`conveyor dispatch --once --type continue_iteration --dispatcher-id ${context.dispatcherId} --path ${context.dbPath}`,
|
|
4894
|
+
],
|
|
4895
|
+
result: "passed",
|
|
4896
|
+
scenario: "ship-it-loop",
|
|
4897
|
+
template: "ship_it_loop",
|
|
4898
|
+
template_metadata: templateMetadata,
|
|
4899
|
+
};
|
|
4900
|
+
}
|
|
4672
4901
|
function qaRunAdversarialTriggers(context) {
|
|
4673
4902
|
const slug = randomUUID().slice(0, 8);
|
|
4674
4903
|
const triggerDefinitions = listLoopTriggers();
|
|
@@ -4966,24 +5195,27 @@ function enqueueQaContinue(context, task, runId, correlationId, message) {
|
|
|
4966
5195
|
}
|
|
4967
5196
|
}
|
|
4968
5197
|
function qaDispatchContinueOnce(context, expectedCorrelationId) {
|
|
5198
|
+
return qaDispatchCommandOnce(context, "continue_iteration", expectedCorrelationId);
|
|
5199
|
+
}
|
|
5200
|
+
function qaDispatchCommandOnce(context, commandType, expectedCorrelationId) {
|
|
4969
5201
|
const before = openDatabaseSync(context.dbPath);
|
|
4970
5202
|
try {
|
|
4971
5203
|
initializeDatabaseSync(before);
|
|
4972
5204
|
const rows = before.prepare(`
|
|
4973
5205
|
select correlation_id, state
|
|
4974
5206
|
from commands
|
|
4975
|
-
where type =
|
|
5207
|
+
where type = ? and state in ('pending', 'attempted')
|
|
4976
5208
|
order by created_at, id
|
|
4977
|
-
`).all();
|
|
5209
|
+
`).all(commandType);
|
|
4978
5210
|
const seen = rows.map((row) => `${row.correlation_id}:${row.state}`);
|
|
4979
5211
|
if (rows.length !== 1 || rows[0]?.correlation_id !== expectedCorrelationId || rows[0]?.state !== "pending") {
|
|
4980
|
-
throw new Error(`qa-run
|
|
5212
|
+
throw new Error(`qa-run ${commandType} dispatch queue is not clean; expected only ${expectedCorrelationId}, found ${JSON.stringify(seen)}`);
|
|
4981
5213
|
}
|
|
4982
5214
|
}
|
|
4983
5215
|
finally {
|
|
4984
5216
|
before.close();
|
|
4985
5217
|
}
|
|
4986
|
-
const parsed = parseRuntimeArgs(["dispatch", "--type",
|
|
5218
|
+
const parsed = parseRuntimeArgs(["dispatch", "--type", commandType, "--path", context.dbPath], {
|
|
4987
5219
|
AGENT_CONVEYOR_TS_RUNTIME: "1",
|
|
4988
5220
|
});
|
|
4989
5221
|
const processed = dispatchOncePass(parsed, context.runtimeOptions, {
|
|
@@ -4993,13 +5225,68 @@ function qaDispatchContinueOnce(context, expectedCorrelationId) {
|
|
|
4993
5225
|
limit: 1,
|
|
4994
5226
|
});
|
|
4995
5227
|
if (processed.length !== 1) {
|
|
4996
|
-
throw new Error(`expected exactly one
|
|
5228
|
+
throw new Error(`expected exactly one ${commandType} dispatch item, got ${processed.length}`);
|
|
4997
5229
|
}
|
|
4998
5230
|
if (processed[0]?.correlation_id !== expectedCorrelationId) {
|
|
4999
5231
|
throw new Error(`qa-run dispatched unexpected command ${String(processed[0]?.correlation_id)}`);
|
|
5000
5232
|
}
|
|
5001
5233
|
return processed[0] ?? {};
|
|
5002
5234
|
}
|
|
5235
|
+
function qaConfigureManagerPermissions(context, task, permissions) {
|
|
5236
|
+
const result = runTypescriptRuntimeCommand({
|
|
5237
|
+
...context.runtimeOptions,
|
|
5238
|
+
args: [
|
|
5239
|
+
"manager-config",
|
|
5240
|
+
task.task_name,
|
|
5241
|
+
"--mode",
|
|
5242
|
+
"strict",
|
|
5243
|
+
"--objective",
|
|
5244
|
+
"Ship-it lifecycle QA permission contract.",
|
|
5245
|
+
...permissions.flatMap((permission) => ["--permit", permission]),
|
|
5246
|
+
"--path",
|
|
5247
|
+
context.dbPath,
|
|
5248
|
+
],
|
|
5249
|
+
env: {
|
|
5250
|
+
...(context.runtimeOptions.env ?? {}),
|
|
5251
|
+
AGENT_CONVEYOR_TS_RUNTIME: "1",
|
|
5252
|
+
},
|
|
5253
|
+
});
|
|
5254
|
+
qaRequire(result.exitCode === 0, `manager-config permission setup failed: ${result.stderr ?? result.stdout ?? ""}`);
|
|
5255
|
+
}
|
|
5256
|
+
function qaRunPermissionGate(context, task, options) {
|
|
5257
|
+
const database = openDatabaseSync(context.dbPath);
|
|
5258
|
+
try {
|
|
5259
|
+
initializeDatabaseSync(database);
|
|
5260
|
+
createCommandSync(database, {
|
|
5261
|
+
commandType: "nudge_worker",
|
|
5262
|
+
correlationId: options.correlationId,
|
|
5263
|
+
payload: { message: options.message, ship_it: { required_permission: options.permission } },
|
|
5264
|
+
requiredPermission: options.permission,
|
|
5265
|
+
taskId: task.task_id,
|
|
5266
|
+
});
|
|
5267
|
+
}
|
|
5268
|
+
finally {
|
|
5269
|
+
database.close();
|
|
5270
|
+
}
|
|
5271
|
+
const dispatch = qaDispatchCommandOnce(context, "nudge_worker", options.correlationId);
|
|
5272
|
+
const counts = qaDeliveryCounts(context, task);
|
|
5273
|
+
if (options.expectAllowed === true) {
|
|
5274
|
+
qaExpectDelivered(dispatch, counts, `${options.permission} permission gate`);
|
|
5275
|
+
}
|
|
5276
|
+
else {
|
|
5277
|
+
qaRequire(dispatch.state === "failed", `${options.permission} gate did not fail without permission`);
|
|
5278
|
+
qaRequire(String(dispatch.error ?? "").includes("manager permission required"), `${options.permission} gate failed for the wrong reason`);
|
|
5279
|
+
qaRequire(counts.worker_inbox_count === 0, `${options.permission} denied gate left worker inbox mail`);
|
|
5280
|
+
}
|
|
5281
|
+
return {
|
|
5282
|
+
...counts,
|
|
5283
|
+
command_type: "nudge_worker",
|
|
5284
|
+
dispatch,
|
|
5285
|
+
name: options.checkName,
|
|
5286
|
+
permission: options.permission,
|
|
5287
|
+
status: "passed",
|
|
5288
|
+
};
|
|
5289
|
+
}
|
|
5003
5290
|
function qaDeliveryCounts(context, task) {
|
|
5004
5291
|
const database = openDatabaseSync(context.dbPath);
|
|
5005
5292
|
try {
|
|
@@ -11942,8 +12229,10 @@ const MANAGER_PERMISSION_ACTION_NAMES = new Set([
|
|
|
11942
12229
|
"context.fetch_prs",
|
|
11943
12230
|
"context.spawn_reviewer",
|
|
11944
12231
|
"repo.merge_green_pr",
|
|
12232
|
+
"repo.monitor_ci",
|
|
11945
12233
|
"repo.open_pr",
|
|
11946
12234
|
"repo.push_branch",
|
|
12235
|
+
"repo.resolve_conflicts",
|
|
11947
12236
|
"verification.run_cargo",
|
|
11948
12237
|
"verification.run_playwright",
|
|
11949
12238
|
"verification.run_pytest",
|
|
@@ -14100,6 +14389,7 @@ const DEFERRED_HEADING_RE = /\b(follow[- ]?up|deferred)\b/i;
|
|
|
14100
14389
|
const LIST_ITEM_RE = /^\s*(?:[-*+]|\d+[.)]|\[[ xX]\])\s+(?<text>.+?)\s*$/;
|
|
14101
14390
|
const EMPTY_ITEM_RE = /^(?:n\/?a|none|no follow[- ]?ups?|no deferred(?: criteria)?|nothing)$/i;
|
|
14102
14391
|
const INDENTED_CONTINUATION_RE = /^\s+\S/;
|
|
14392
|
+
const CLOSEOUT_CRITERION_RE = /\b(?:finish-task|require-criteria-audit|task (?:is )?(?:marked )?done|mark(?:ed)? (?:the )?task done|terminal closeout|verified task closeout|heartbeat teardown|final manager (?:report|decision)|manager final (?:report|handoff)|closeout proof|control-plane closeout)\b/i;
|
|
14103
14393
|
function planCriteriaCommands(task, text, options) {
|
|
14104
14394
|
const { suggestions, warnings } = parseWorkerCriteriaResponse(text);
|
|
14105
14395
|
return {
|
|
@@ -14161,6 +14451,11 @@ function parseWorkerCriteriaResponse(text) {
|
|
|
14161
14451
|
else if (suggestions.length === 0) {
|
|
14162
14452
|
warnings.push("Clear criteria headings were found, but no bullet or numbered criteria items were detected.");
|
|
14163
14453
|
}
|
|
14454
|
+
for (const suggestion of suggestions) {
|
|
14455
|
+
if (suggestion.classification?.kind === "manager_closeout_proof") {
|
|
14456
|
+
warnings.push(`Criterion "${suggestion.criterion}" appears to describe manager closeout/control-plane proof. Keep closeout proof in the manager final report, audit, replay, or epilogue evidence instead of accepted worker/task criteria unless this task is explicitly Conveyor closeout QA.`);
|
|
14457
|
+
}
|
|
14458
|
+
}
|
|
14164
14459
|
return { suggestions, warnings };
|
|
14165
14460
|
}
|
|
14166
14461
|
function headingStatus(line) {
|
|
@@ -14182,12 +14477,23 @@ function makeCriteriaSuggestion(text, status) {
|
|
|
14182
14477
|
return null;
|
|
14183
14478
|
}
|
|
14184
14479
|
return {
|
|
14480
|
+
classification: classifyCriteriaSuggestion(criterion),
|
|
14185
14481
|
criterion,
|
|
14186
14482
|
rationale: status === "deferred" ? DEFAULT_DEFERRED_RATIONALE : null,
|
|
14187
14483
|
source: "worker_proposed",
|
|
14188
14484
|
status,
|
|
14189
14485
|
};
|
|
14190
14486
|
}
|
|
14487
|
+
function classifyCriteriaSuggestion(criterion) {
|
|
14488
|
+
if (!CLOSEOUT_CRITERION_RE.test(criterion)) {
|
|
14489
|
+
return null;
|
|
14490
|
+
}
|
|
14491
|
+
return {
|
|
14492
|
+
kind: "manager_closeout_proof",
|
|
14493
|
+
reason: "The criterion names manager closeout mechanics rather than the worker/task outcome being accepted.",
|
|
14494
|
+
recommendation: "keep_out_of_acceptance_criteria",
|
|
14495
|
+
};
|
|
14496
|
+
}
|
|
14191
14497
|
function suggestionToArgv(task, suggestion, options) {
|
|
14192
14498
|
const argv = [
|
|
14193
14499
|
"conveyor",
|
|
@@ -14723,7 +15029,6 @@ function unsupportedLifecycleTaskOptions(parsed, finish) {
|
|
|
14723
15029
|
|| parsed.flags.includeFullTranscripts
|
|
14724
15030
|
|| parsed.flags.includeLegacy
|
|
14725
15031
|
|| parsed.flags.includeTranscripts
|
|
14726
|
-
|| parsed.flags.json
|
|
14727
15032
|
|| parsed.flags.limit !== null
|
|
14728
15033
|
|| parsed.flags.names.length > 0
|
|
14729
15034
|
|| parsed.flags.output !== null
|
|
@@ -15879,7 +16184,91 @@ const ADVERSARIAL_CHECK_REQUIREMENT = {
|
|
|
15879
16184
|
required: ["failure_mode", "check", "result"],
|
|
15880
16185
|
type: "object",
|
|
15881
16186
|
};
|
|
16187
|
+
const SHIP_IT_ARTIFACT_REQUIREMENTS = {
|
|
16188
|
+
adversarial_check: ADVERSARIAL_CHECK_REQUIREMENT,
|
|
16189
|
+
branch_pushed: {
|
|
16190
|
+
description: "Receipt that the worker branch was pushed only after repo.push_branch was permitted.",
|
|
16191
|
+
properties: {
|
|
16192
|
+
branch: { type: "string" },
|
|
16193
|
+
remote: { type: "string" },
|
|
16194
|
+
},
|
|
16195
|
+
required: ["branch", "remote"],
|
|
16196
|
+
type: "object",
|
|
16197
|
+
},
|
|
16198
|
+
branch_ready: {
|
|
16199
|
+
description: "Branch and commit evidence for the candidate ship-it change.",
|
|
16200
|
+
properties: {
|
|
16201
|
+
branch: { type: "string" },
|
|
16202
|
+
commit_sha: { type: "string" },
|
|
16203
|
+
},
|
|
16204
|
+
required: ["branch", "commit_sha"],
|
|
16205
|
+
type: "object",
|
|
16206
|
+
},
|
|
16207
|
+
ci_green: {
|
|
16208
|
+
description: "Explicit CI/check evidence. Prefer gh pr checks --required, or record why no required checks exist.",
|
|
16209
|
+
properties: {
|
|
16210
|
+
command: { type: "string" },
|
|
16211
|
+
status: { type: "string" },
|
|
16212
|
+
},
|
|
16213
|
+
required: ["command", "status"],
|
|
16214
|
+
type: "object",
|
|
16215
|
+
},
|
|
16216
|
+
manager_merge_decision: {
|
|
16217
|
+
description: "Manager-owned decision that all required evidence has been independently verified and merge is allowed.",
|
|
16218
|
+
properties: {
|
|
16219
|
+
decision: { type: "string" },
|
|
16220
|
+
manager_verified: { type: "boolean" },
|
|
16221
|
+
},
|
|
16222
|
+
required: ["decision", "manager_verified"],
|
|
16223
|
+
type: "object",
|
|
16224
|
+
},
|
|
16225
|
+
merge: {
|
|
16226
|
+
description: "Merge receipt recorded only after repo.merge_green_pr, CI, mergeability, and manager decision gates pass.",
|
|
16227
|
+
properties: {
|
|
16228
|
+
merge_sha: { type: "string" },
|
|
16229
|
+
},
|
|
16230
|
+
required: ["merge_sha"],
|
|
16231
|
+
type: "object",
|
|
16232
|
+
},
|
|
16233
|
+
mergeability_clean: {
|
|
16234
|
+
description: "Evidence that the PR is mergeable or conflicts were resolved within the manager-approved retry limit.",
|
|
16235
|
+
properties: {
|
|
16236
|
+
conflicts: { type: "boolean" },
|
|
16237
|
+
mergeable_state: { type: "string" },
|
|
16238
|
+
},
|
|
16239
|
+
required: ["conflicts", "mergeable_state"],
|
|
16240
|
+
type: "object",
|
|
16241
|
+
},
|
|
16242
|
+
post_merge_verification: {
|
|
16243
|
+
description: "Post-merge or main-branch verification receipt.",
|
|
16244
|
+
properties: {
|
|
16245
|
+
command: { type: "string" },
|
|
16246
|
+
status: { type: "string" },
|
|
16247
|
+
},
|
|
16248
|
+
required: ["command", "status"],
|
|
16249
|
+
type: "object",
|
|
16250
|
+
},
|
|
16251
|
+
pr_url: {
|
|
16252
|
+
description: "Pull request URL recorded only after repo.open_pr was permitted.",
|
|
16253
|
+
properties: {
|
|
16254
|
+
url: { type: "string" },
|
|
16255
|
+
},
|
|
16256
|
+
required: ["url"],
|
|
16257
|
+
type: "object",
|
|
16258
|
+
},
|
|
16259
|
+
};
|
|
15882
16260
|
const LOOP_TEMPLATES = {
|
|
16261
|
+
app_visible_build_loop: {
|
|
16262
|
+
artifactRequirements: { adversarial_check: ADVERSARIAL_CHECK_REQUIREMENT },
|
|
16263
|
+
cleanupPolicy: "off",
|
|
16264
|
+
description: "Require build evidence and adversarial proof between visible Codex app iterations without a cleanup gate.",
|
|
16265
|
+
maxIterations: 2,
|
|
16266
|
+
name: "app_visible_build_loop",
|
|
16267
|
+
recommendedTools: ["verification.run_tests"],
|
|
16268
|
+
requiredBeforeContinue: ["build_passed", "adversarial_check"],
|
|
16269
|
+
stopConditions: ["max_iterations", "required_evidence"],
|
|
16270
|
+
tags: ["build", "codex_app", "visible_session"],
|
|
16271
|
+
},
|
|
15883
16272
|
build_then_clear: {
|
|
15884
16273
|
artifactRequirements: {},
|
|
15885
16274
|
cleanupPolicy: "clear",
|
|
@@ -15913,6 +16302,27 @@ const LOOP_TEMPLATES = {
|
|
|
15913
16302
|
stopConditions: ["max_iterations", "required_evidence"],
|
|
15914
16303
|
tags: ["repo", "ci"],
|
|
15915
16304
|
},
|
|
16305
|
+
ship_it_loop: {
|
|
16306
|
+
artifactRequirements: SHIP_IT_ARTIFACT_REQUIREMENTS,
|
|
16307
|
+
cleanupPolicy: "clear",
|
|
16308
|
+
description: "Require branch, push, PR, CI, mergeability, manager merge decision, merge, post-merge, and adversarial evidence before ship-it continuation.",
|
|
16309
|
+
maxIterations: 2,
|
|
16310
|
+
name: "ship_it_loop",
|
|
16311
|
+
recommendedTools: ["gh", "verification.run_tests", "git"],
|
|
16312
|
+
requiredBeforeContinue: [
|
|
16313
|
+
"branch_ready",
|
|
16314
|
+
"branch_pushed",
|
|
16315
|
+
"pr_url",
|
|
16316
|
+
"ci_green",
|
|
16317
|
+
"mergeability_clean",
|
|
16318
|
+
"manager_merge_decision",
|
|
16319
|
+
"merge",
|
|
16320
|
+
"post_merge_verification",
|
|
16321
|
+
"adversarial_check",
|
|
16322
|
+
],
|
|
16323
|
+
stopConditions: ["max_iterations", "required_evidence", "manager_accepts"],
|
|
16324
|
+
tags: ["repo", "ci", "merge", "ship_it"],
|
|
16325
|
+
},
|
|
15916
16326
|
test_coverage_loop: {
|
|
15917
16327
|
artifactRequirements: { adversarial_check: ADVERSARIAL_CHECK_REQUIREMENT },
|
|
15918
16328
|
cleanupPolicy: "clear",
|
|
@@ -16035,6 +16445,9 @@ const MANAGER_RECIPES = {
|
|
|
16035
16445
|
"PR/CI/merge or satisfied_on_main proof",
|
|
16036
16446
|
"parent receipt update before the next child",
|
|
16037
16447
|
],
|
|
16448
|
+
finalReportRequirements: [
|
|
16449
|
+
"Record manager closeout proof, including final task state and any finish-task/heartbeat teardown receipt, in the final report instead of accepted worker criteria.",
|
|
16450
|
+
],
|
|
16038
16451
|
guidelines: [
|
|
16039
16452
|
"Keep exactly one child board active at a time.",
|
|
16040
16453
|
"Before activating the next child, update the parent receipt.",
|
|
@@ -16058,6 +16471,9 @@ const MANAGER_RECIPES = {
|
|
|
16058
16471
|
displayName: "Nudge / What's Next Manager",
|
|
16059
16472
|
epilogues: [],
|
|
16060
16473
|
evidenceGates: ["manager decision", "worker receipt", "accepted criteria closure"],
|
|
16474
|
+
finalReportRequirements: [
|
|
16475
|
+
"Record status, residual risk, and any finish-task or terminal closeout proof in the final report, not as worker acceptance criteria.",
|
|
16476
|
+
],
|
|
16061
16477
|
guidelines: [
|
|
16062
16478
|
"Prefer wait over nudge while the worker is active.",
|
|
16063
16479
|
"Ask for must-have current-task criteria versus follow-ups when scope changes.",
|
|
@@ -16085,6 +16501,9 @@ const MANAGER_RECIPES = {
|
|
|
16085
16501
|
displayName: "PR/CI/Merge Ralph Loop",
|
|
16086
16502
|
epilogues: ["draft-pr", "record-handoff"],
|
|
16087
16503
|
evidenceGates: ["pr_url", "ci_green", "merge", "adversarial_check"],
|
|
16504
|
+
finalReportRequirements: [
|
|
16505
|
+
"Record PR URL, CI, merge, handoff, finish-task, and cleanup receipts in the final report; keep accepted criteria focused on deliverable proof.",
|
|
16506
|
+
],
|
|
16088
16507
|
guidelines: ["Merge only after green CI and recorded manager decision evidence."],
|
|
16089
16508
|
loopTemplate: "pr_ci_merge_loop",
|
|
16090
16509
|
mode: "strict",
|
|
@@ -16094,6 +16513,57 @@ const MANAGER_RECIPES = {
|
|
|
16094
16513
|
supportPatterns: ["Inbox / No-Tmux App Loop", "Recovery / Resume / Handoff"],
|
|
16095
16514
|
tools: ["verification.run_tests", "context.fetch_prs"],
|
|
16096
16515
|
},
|
|
16516
|
+
"ship-it-loop": {
|
|
16517
|
+
acceptance: [
|
|
16518
|
+
"Branch, push, PR URL, CI-green, mergeability, manager merge decision, merge, post-merge verification, and adversarial proof are recorded.",
|
|
16519
|
+
"Push, PR creation, conflict resolution, and merge actions are each gated by explicit manager permissions.",
|
|
16520
|
+
"Merge readiness is a manager decision after independent verification, not a worker claim or CI-green shortcut.",
|
|
16521
|
+
],
|
|
16522
|
+
cleanup: "clear after saved handoff",
|
|
16523
|
+
description: "Drive a visible manager-worker ship-it loop through branch push, PR, CI, conflict handling, manager merge decision, merge, and post-merge receipts.",
|
|
16524
|
+
disallowedActions: [
|
|
16525
|
+
"Do not push branches before repo.push_branch is permitted.",
|
|
16526
|
+
"Do not open or update PRs before repo.open_pr is permitted.",
|
|
16527
|
+
"Do not resolve conflicts before repo.resolve_conflicts is permitted and retry bounds are recorded.",
|
|
16528
|
+
"Do not merge before repo.merge_green_pr is permitted, CI is green, mergeability is clean, and the manager records merge_ready.",
|
|
16529
|
+
],
|
|
16530
|
+
displayName: "Autonomous Ship-It Loop",
|
|
16531
|
+
epilogues: ["draft-pr", "record-handoff"],
|
|
16532
|
+
evidenceGates: [
|
|
16533
|
+
"branch_ready",
|
|
16534
|
+
"branch_pushed",
|
|
16535
|
+
"pr_url",
|
|
16536
|
+
"ci_green",
|
|
16537
|
+
"mergeability_clean",
|
|
16538
|
+
"manager_merge_decision",
|
|
16539
|
+
"merge",
|
|
16540
|
+
"post_merge_verification",
|
|
16541
|
+
"adversarial_check",
|
|
16542
|
+
],
|
|
16543
|
+
finalReportRequirements: [
|
|
16544
|
+
"Record branch, PR URL, CI/check output, mergeability/conflict status, manager merge decision, merge SHA, post-merge verification, finish-task, and heartbeat teardown proof in the final report.",
|
|
16545
|
+
],
|
|
16546
|
+
guidelines: [
|
|
16547
|
+
"Keep all PR lifecycle phases visible in the manager and worker sessions.",
|
|
16548
|
+
"Treat CI-green, mergeability, and worker receipts as claims until the manager verifies them.",
|
|
16549
|
+
"Use a bounded conflict retry and block with evidence when conflicts remain unresolved.",
|
|
16550
|
+
],
|
|
16551
|
+
loopTemplate: "ship_it_loop",
|
|
16552
|
+
mode: "strict",
|
|
16553
|
+
name: "ship-it-loop",
|
|
16554
|
+
objective: "Supervise a worker from implementation through explicit branch, PR, CI, conflict, merge, and post-merge evidence gates.",
|
|
16555
|
+
permissions: [
|
|
16556
|
+
"repo.push_branch",
|
|
16557
|
+
"repo.open_pr",
|
|
16558
|
+
"repo.monitor_ci",
|
|
16559
|
+
"repo.resolve_conflicts",
|
|
16560
|
+
"repo.merge_green_pr",
|
|
16561
|
+
"worker_session.compact",
|
|
16562
|
+
"worker_session.clear",
|
|
16563
|
+
],
|
|
16564
|
+
supportPatterns: ["Inbox / No-Tmux App Loop", "Recovery / Resume / Handoff"],
|
|
16565
|
+
tools: ["gh", "git", "verification.run_tests", "context.fetch_prs"],
|
|
16566
|
+
},
|
|
16097
16567
|
"test-coverage-loop": {
|
|
16098
16568
|
acceptance: [
|
|
16099
16569
|
"Coverage or targeted test evidence is recorded before another worker pass.",
|
|
@@ -16105,6 +16575,9 @@ const MANAGER_RECIPES = {
|
|
|
16105
16575
|
displayName: "Test Coverage Loop",
|
|
16106
16576
|
epilogues: [],
|
|
16107
16577
|
evidenceGates: ["test_coverage", "adversarial_check"],
|
|
16578
|
+
finalReportRequirements: [
|
|
16579
|
+
"Record final closeout and finish-task proof in the manager final report; do not make closeout mechanics a test-coverage criterion.",
|
|
16580
|
+
],
|
|
16108
16581
|
guidelines: ["Record coverage evidence before asking for another worker pass."],
|
|
16109
16582
|
loopTemplate: "test_coverage_loop",
|
|
16110
16583
|
mode: "strict",
|
|
@@ -16131,6 +16604,9 @@ const MANAGER_RECIPES = {
|
|
|
16131
16604
|
"diff_below_threshold",
|
|
16132
16605
|
"adversarial_check",
|
|
16133
16606
|
],
|
|
16607
|
+
finalReportRequirements: [
|
|
16608
|
+
"Record final visual decision, closeout, and cleanup proof in the manager final report; keep accepted criteria focused on visible-output evidence.",
|
|
16609
|
+
],
|
|
16134
16610
|
guidelines: ["Compare visible output against references before requesting another pass."],
|
|
16135
16611
|
loopTemplate: "visual_diff_loop",
|
|
16136
16612
|
mode: "guided",
|
|
@@ -16149,6 +16625,9 @@ const MANAGER_RECIPE_ALIASES = {
|
|
|
16149
16625
|
"pr ci merge ralph loop": "pr-ci-merge-ralph-loop",
|
|
16150
16626
|
"pr/ci/merge ralph loop": "pr-ci-merge-ralph-loop",
|
|
16151
16627
|
"ralph loop": "pr-ci-merge-ralph-loop",
|
|
16628
|
+
"ship it": "ship-it-loop",
|
|
16629
|
+
"ship it loop": "ship-it-loop",
|
|
16630
|
+
"ship-it": "ship-it-loop",
|
|
16152
16631
|
"test coverage": "test-coverage-loop",
|
|
16153
16632
|
"test coverage loop": "test-coverage-loop",
|
|
16154
16633
|
"ux polish": "ux-polish-loop",
|
|
@@ -16182,6 +16661,7 @@ function managerRecipeSummary(name) {
|
|
|
16182
16661
|
display_name: recipe.displayName,
|
|
16183
16662
|
epilogues: [...recipe.epilogues],
|
|
16184
16663
|
evidence_gates: [...recipe.evidenceGates],
|
|
16664
|
+
final_report_requirements: [...recipe.finalReportRequirements],
|
|
16185
16665
|
guidelines: [...recipe.guidelines],
|
|
16186
16666
|
locked_summary_template: lockedManagerRecipeSummary(recipe),
|
|
16187
16667
|
loop_template: recipe.loopTemplate,
|
|
@@ -16236,6 +16716,7 @@ function lockedManagerRecipeSummary(recipe) {
|
|
|
16236
16716
|
`Epilogues: ${recipe.epilogues.length > 0 ? recipe.epilogues.join(", ") : "none"}`,
|
|
16237
16717
|
`Cleanup: ${recipe.cleanup}`,
|
|
16238
16718
|
`Evidence gates: ${recipe.evidenceGates.length > 0 ? recipe.evidenceGates.join(", ") : "manager-reviewed evidence"}`,
|
|
16719
|
+
`Final report: ${recipe.finalReportRequirements.join("; ")}`,
|
|
16239
16720
|
`Not allowed: ${recipe.disallowedActions.length > 0 ? recipe.disallowedActions.join("; ") : "unconfirmed custom actions"}`,
|
|
16240
16721
|
"User confirmed: <yes|no>",
|
|
16241
16722
|
].join("\n");
|
|
@@ -16542,7 +17023,7 @@ function loopStatusSummarySync(database, options) {
|
|
|
16542
17023
|
const matchingCommands = commandRows.filter((row) => commandRowMatchesRun(row, options.run.id));
|
|
16543
17024
|
const commandStates = countBy(matchingCommands.map((row) => row.state));
|
|
16544
17025
|
const notificationRows = database.prepare(`
|
|
16545
|
-
select state, payload_json
|
|
17026
|
+
select consumed_at, state, payload_json
|
|
16546
17027
|
from routed_notifications
|
|
16547
17028
|
where task_id = ?
|
|
16548
17029
|
order by created_at, id
|
|
@@ -16575,6 +17056,12 @@ function loopStatusSummarySync(database, options) {
|
|
|
16575
17056
|
.filter((value) => typeof value === "string" && value.length > 0))].sort();
|
|
16576
17057
|
const telemetryEvents = telemetryEventsForRunSync(database, { runId: options.run.id, taskId: options.task.id });
|
|
16577
17058
|
const telemetryByType = countBy(telemetryEvents.map((event) => event.event_type));
|
|
17059
|
+
const appTaskDispatch = appTaskDispatchSummarySync(database, {
|
|
17060
|
+
commandRows,
|
|
17061
|
+
notificationRows,
|
|
17062
|
+
runScopedActivityTotal: matchingCommands.length + matchingNotifications.length + telemetryEvents.length,
|
|
17063
|
+
taskId: options.task.id,
|
|
17064
|
+
});
|
|
16578
17065
|
const failedCommandCount = commandStates.failed ?? 0;
|
|
16579
17066
|
const failureCounts = loopFailureCountsSync(database, {
|
|
16580
17067
|
failedCommandCount,
|
|
@@ -16595,6 +17082,7 @@ function loopStatusSummarySync(database, options) {
|
|
|
16595
17082
|
total: evidenceItems.length,
|
|
16596
17083
|
types: evidenceTypes,
|
|
16597
17084
|
},
|
|
17085
|
+
app_task_dispatch: appTaskDispatch,
|
|
16598
17086
|
failures: failureCounts,
|
|
16599
17087
|
inbox: {
|
|
16600
17088
|
worker_unconsumed: workerInbox,
|
|
@@ -16638,6 +17126,52 @@ function telemetryEventsForRunSync(database, options) {
|
|
|
16638
17126
|
limit 1000
|
|
16639
17127
|
`).all(options.taskId, options.runId);
|
|
16640
17128
|
}
|
|
17129
|
+
function appTaskDispatchSummarySync(database, options) {
|
|
17130
|
+
const taskDispatchEventTypes = [
|
|
17131
|
+
"app_autopilot_started",
|
|
17132
|
+
"app_autopilot_stopped",
|
|
17133
|
+
"app_heartbeat",
|
|
17134
|
+
"app_wakeup_delivery_recorded",
|
|
17135
|
+
"app_wakeup_dispatch_planned",
|
|
17136
|
+
"command_created",
|
|
17137
|
+
"dispatch_inbox_consumed",
|
|
17138
|
+
];
|
|
17139
|
+
const telemetryRows = database.prepare(`
|
|
17140
|
+
select event_type, timestamp
|
|
17141
|
+
from telemetry_events
|
|
17142
|
+
where task_id = ?
|
|
17143
|
+
and event_type in (${taskDispatchEventTypes.map(() => "?").join(", ")})
|
|
17144
|
+
order by timestamp, id
|
|
17145
|
+
`).all(options.taskId, ...taskDispatchEventTypes);
|
|
17146
|
+
const telemetryByType = countBy(telemetryRows.map((row) => row.event_type));
|
|
17147
|
+
const commandStates = countBy(options.commandRows.map((row) => row.state));
|
|
17148
|
+
const notificationStates = countBy(options.notificationRows.map((row) => row.state));
|
|
17149
|
+
const recordsTotal = options.commandRows.length + options.notificationRows.length + telemetryRows.length;
|
|
17150
|
+
const blindToRun = options.runScopedActivityTotal === 0 && recordsTotal > 0;
|
|
17151
|
+
return {
|
|
17152
|
+
commands: {
|
|
17153
|
+
states: sortJson(commandStates),
|
|
17154
|
+
total: options.commandRows.length,
|
|
17155
|
+
},
|
|
17156
|
+
latest_event_at: telemetryRows.at(-1)?.timestamp ?? null,
|
|
17157
|
+
note: blindToRun
|
|
17158
|
+
? "Requested run has no run-scoped activity, but task-level app Dispatch records exist."
|
|
17159
|
+
: null,
|
|
17160
|
+
notifications: {
|
|
17161
|
+
delivered_unconsumed: options.notificationRows
|
|
17162
|
+
.filter((row) => row.state === "delivered" && row.consumed_at === null).length,
|
|
17163
|
+
states: sortJson(notificationStates),
|
|
17164
|
+
total: options.notificationRows.length,
|
|
17165
|
+
},
|
|
17166
|
+
records_total: recordsTotal,
|
|
17167
|
+
telemetry: {
|
|
17168
|
+
by_event_type: sortJson(telemetryByType),
|
|
17169
|
+
command_created: telemetryByType.command_created ?? 0,
|
|
17170
|
+
dispatch_inbox_consumed: telemetryByType.dispatch_inbox_consumed ?? 0,
|
|
17171
|
+
total: telemetryRows.length,
|
|
17172
|
+
},
|
|
17173
|
+
};
|
|
17174
|
+
}
|
|
16641
17175
|
function loopFailureCountsSync(database, options) {
|
|
16642
17176
|
const failedCycles = database.prepare(`
|
|
16643
17177
|
select count(distinct mc.id) as count
|
|
@@ -16727,6 +17261,8 @@ function renderLoopStatusText(result) {
|
|
|
16727
17261
|
const notifications = result.notifications;
|
|
16728
17262
|
const inbox = result.inbox;
|
|
16729
17263
|
const telemetry = result.telemetry;
|
|
17264
|
+
const appTaskDispatch = result.app_task_dispatch;
|
|
17265
|
+
const appTaskDispatchTelemetry = appTaskDispatch?.telemetry;
|
|
16730
17266
|
return [
|
|
16731
17267
|
`task: ${task.name} (${task.state})`,
|
|
16732
17268
|
`run: ${run.name || run.id} (${run.status})`,
|
|
@@ -16735,6 +17271,7 @@ function renderLoopStatusText(result) {
|
|
|
16735
17271
|
`notifications: ${notifications.delivered}/${notifications.total} delivered`,
|
|
16736
17272
|
`worker_unconsumed: ${inbox.worker_unconsumed}`,
|
|
16737
17273
|
`dispatch_inbox_consumed: ${telemetry.dispatch_inbox_consumed}`,
|
|
17274
|
+
`app_task_dispatch: ${appTaskDispatch?.records_total ?? 0} records ${JSON.stringify(appTaskDispatchTelemetry?.by_event_type ?? {})}${appTaskDispatch?.note ? ` (${appTaskDispatch.note})` : ""}`,
|
|
16738
17275
|
`failures: ${JSON.stringify(result.failures ?? {})}`,
|
|
16739
17276
|
`recommendation: ${result.recommendation}`,
|
|
16740
17277
|
].join("\n") + "\n";
|
|
@@ -17250,6 +17787,7 @@ function disposableWorkerHandoff(taskName, runName, dbPath) {
|
|
|
17250
17787
|
"",
|
|
17251
17788
|
`You are the worker for task ${taskName}${loopClause}.`,
|
|
17252
17789
|
"Keep polling your Conveyor worker inbox until there are no items left or the loop reaches max_iterations. Consume the next item now, treat each consumed item as the manager's next instruction, complete the requested work, and report changed files, exact commands run, evidence, and any residual risk.",
|
|
17790
|
+
...visibleSessionProtocolLines("worker"),
|
|
17253
17791
|
"After completing or blocking on a consumed item, send the manager a durable Conveyor notification before your final answer. A direct app-thread final answer is not a manager receipt and is not task completion.",
|
|
17254
17792
|
`Run: ${notifyCommand}`,
|
|
17255
17793
|
`Then run: ${dispatchCommand}`,
|
|
@@ -17332,9 +17870,11 @@ function disposableHeartbeatRecommendations(taskName, dbPath) {
|
|
|
17332
17870
|
`After a successful app-thread send, record it with: ${deliveryReceiptCommands.sent}`,
|
|
17333
17871
|
`For healthy skipped actions, record: ${deliveryReceiptCommands.skipped}`,
|
|
17334
17872
|
`For missing-thread blocked actions, record: ${deliveryReceiptCommands.blocked}`,
|
|
17873
|
+
...visibleSessionProtocolLines("manager"),
|
|
17335
17874
|
"If an item is consumed, execute only that manager instruction, verify worker claims before recording conclusions, update Conveyor state as appropriate, and produce exactly one next worker task.",
|
|
17336
17875
|
"If no item is consumed, stop after a one-line idle receipt.",
|
|
17337
17876
|
"Do not delete, pause, or disable manager or worker heartbeat automation after an idle poll; an idle poll is only a quiet interval.",
|
|
17877
|
+
"Keep manager closeout/control-plane proof out of accepted worker criteria; record finish-task, final task state, and heartbeat teardown proof in the manager final report or audit receipts.",
|
|
17338
17878
|
`If all accepted criteria are satisfied, deferred, or rejected and there is no next worker task, record the terminal manager decision, run or report the result of: ${terminalCloseoutCommand}`,
|
|
17339
17879
|
"After verified task closeout, explicitly report heartbeat teardown status; if the task remains managed/active, report that as a control-plane blocker instead of calling the loop complete.",
|
|
17340
17880
|
].join("\n"),
|
|
@@ -17348,6 +17888,7 @@ function disposableHeartbeatRecommendations(taskName, dbPath) {
|
|
|
17348
17888
|
`Run the worker app heartbeat for task ${taskName}.`,
|
|
17349
17889
|
`Run: ${workerHeartbeatCommand}`,
|
|
17350
17890
|
`If the heartbeat output asks for direct inbox polling, run: ${workerInboxCommand}`,
|
|
17891
|
+
...visibleSessionProtocolLines("worker"),
|
|
17351
17892
|
"If an item is consumed, execute only that single worker instruction and return exact commands, compact evidence for any completion claim, blockers/residual risk, and exactly one next recommended worker task.",
|
|
17352
17893
|
"Before your final answer after any consumed item, notify the manager durably; a direct app-thread final answer is not a manager receipt and is not task completion.",
|
|
17353
17894
|
`Run: ${workerNotifyCommand}`,
|
|
@@ -17583,6 +18124,7 @@ function startManagerBootstrapPrompt(database, options) {
|
|
|
17583
18124
|
"- Treat acceptance criteria as living supervision state.",
|
|
17584
18125
|
"- Inspect `manager_context.acceptance_criteria` each cycle.",
|
|
17585
18126
|
"- If worker progress reveals new edge cases, tests, polish, or scope boundaries, ask the worker to propose must-have vs follow-up criteria.",
|
|
18127
|
+
"- Keep manager closeout/control-plane proof out of accepted worker criteria; record finish-task, final task state, teardown, and final-report proof in manager closeout evidence instead.",
|
|
17586
18128
|
"- Before finishing, compare worker receipts/verification against accepted open criteria.",
|
|
17587
18129
|
`- For each accepted criterion that is proven, record evidence with \`${satisfyCriterionCommand}\`.`,
|
|
17588
18130
|
`- When all accepted criteria are satisfied, deferred, or rejected, finish the task with \`${workerctl} finish-task ${taskLine} --reason "Accepted criteria satisfied" --require-criteria-audit${pathSuffix}\`.`,
|