ralphctl 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,7 +52,7 @@ import {
52
52
  updateTask,
53
53
  updateTaskStatus,
54
54
  validateImportTasks
55
- } from "./chunk-3HJNVQ7N.mjs";
55
+ } from "./chunk-TKPTT2UG.mjs";
56
56
  import {
57
57
  fetchIssueFromUrl,
58
58
  formatIssueContext,
@@ -177,7 +177,7 @@ import {
177
177
  // package.json
178
178
  var package_default = {
179
179
  name: "ralphctl",
180
- version: "0.4.1",
180
+ version: "0.4.2",
181
181
  description: "Agent harness for long-running AI coding tasks \u2014 orchestrates Claude Code & GitHub Copilot across repositories",
182
182
  homepage: "https://github.com/lukas-grigis/ralphctl",
183
183
  type: "module",
@@ -4239,9 +4239,12 @@ function composePrompt(template, substitutions) {
4239
4239
  }
4240
4240
  return result;
4241
4241
  }
4242
+ var CHECK_GATE_EXAMPLE = "Run the project's check gate \u2014 all pass (omit this step when the project has no check script)";
4242
4243
  function buildPlanCommon(projectToolingSection) {
4243
4244
  return composePrompt(loadPartial("plan-common"), {
4244
- PROJECT_TOOLING: projectToolingSection
4245
+ PLAN_COMMON_EXAMPLES: loadPartial("plan-common-examples"),
4246
+ PROJECT_TOOLING: projectToolingSection,
4247
+ CHECK_GATE_EXAMPLE
4245
4248
  });
4246
4249
  }
4247
4250
  function buildPlannerBase(projectToolingSection) {
@@ -4249,7 +4252,8 @@ function buildPlannerBase(projectToolingSection) {
4249
4252
  HARNESS_CONTEXT: loadPartial("harness-context"),
4250
4253
  COMMON: buildPlanCommon(projectToolingSection),
4251
4254
  VALIDATION: loadPartial("validation-checklist"),
4252
- SIGNALS: loadPartial("signals-planning")
4255
+ SIGNALS: loadPartial("signals-planning"),
4256
+ CHECK_GATE_EXAMPLE
4253
4257
  };
4254
4258
  }
4255
4259
  function buildInteractivePrompt(context, outputFile, schema, projectToolingSection) {
@@ -4268,9 +4272,13 @@ function buildAutoPrompt(context, schema, projectToolingSection) {
4268
4272
  });
4269
4273
  }
4270
4274
  function buildTaskExecutionPrompt(progressFilePath, noCommit, contextFileName, projectToolingSection = "") {
4271
- const template = loadTemplate("task-execution");
4272
- const commitStep = noCommit ? "" : "\n - **Before continuing:** Create a git commit with a descriptive message for the changes made.";
4273
- const commitConstraint = noCommit ? "" : "- **Must commit** \u2014 Create a git commit before signaling completion.\n";
4275
+ let template = loadTemplate("task-execution");
4276
+ if (noCommit) {
4277
+ template = template.replace(/^[ \t]*\{\{COMMIT_STEP\}\}\n/m, "\n");
4278
+ template = template.replace(/^[ \t]*\{\{COMMIT_CONSTRAINT\}\}\n/m, "");
4279
+ }
4280
+ const commitStep = noCommit ? "" : " - **Before continuing:** Create a git commit with a descriptive message for the changes made.";
4281
+ const commitConstraint = noCommit ? "" : "- **Must commit** \u2014 Create a git commit before signaling completion.";
4274
4282
  return composePrompt(template, {
4275
4283
  HARNESS_CONTEXT: loadPartial("harness-context"),
4276
4284
  SIGNALS: loadPartial("signals-task"),
@@ -4283,11 +4291,16 @@ function buildTaskExecutionPrompt(progressFilePath, noCommit, contextFileName, p
4283
4291
  }
4284
4292
  function buildTicketRefinePrompt(ticketContent, outputFile, schema, issueContext = "") {
4285
4293
  const template = loadTemplate("ticket-refine");
4294
+ const issueContextSection = issueContext ? `<context>
4295
+
4296
+ ${issueContext}
4297
+
4298
+ </context>` : "";
4286
4299
  return composePrompt(template, {
4287
4300
  TICKET: ticketContent,
4288
4301
  OUTPUT_FILE: outputFile,
4289
4302
  SCHEMA: schema,
4290
- ISSUE_CONTEXT: issueContext
4303
+ ISSUE_CONTEXT: issueContextSection
4291
4304
  });
4292
4305
  }
4293
4306
  function buildIdeatePrompt(ideaTitle, ideaDescription, projectName, repositories, outputFile, schema, projectToolingSection) {
@@ -4316,9 +4329,10 @@ function renderExtraDimensions(extras) {
4316
4329
  return { section: "", passBar: "", assessment: "" };
4317
4330
  }
4318
4331
  const section = extras.map(
4319
- (name, i) => `
4320
- **Dimension ${String(5 + i)} \u2014 ${name}**
4332
+ (name) => `
4333
+ <dimension name="${name}" floor="false">
4321
4334
  Additional task-specific dimension flagged by the planner. Apply judgment to whether the implementation satisfies this dimension given the task's verification criteria and steps.
4335
+ </dimension>
4322
4336
  `
4323
4337
  ).join("");
4324
4338
  const passBar = extras.map((name) => `
package/dist/cli.mjs CHANGED
@@ -41,7 +41,7 @@ import {
41
41
  ticketRefineCommand,
42
42
  ticketRemoveCommand,
43
43
  ticketShowCommand
44
- } from "./chunk-SM4GGZSU.mjs";
44
+ } from "./chunk-GL7MKLLS.mjs";
45
45
  import {
46
46
  projectAddCommand
47
47
  } from "./chunk-D2YGPLIV.mjs";
@@ -55,7 +55,7 @@ import "./chunk-NUYQK5MN.mjs";
55
55
  import {
56
56
  getTasks,
57
57
  sprintStartCommand
58
- } from "./chunk-3HJNVQ7N.mjs";
58
+ } from "./chunk-TKPTT2UG.mjs";
59
59
  import {
60
60
  truncate
61
61
  } from "./chunk-JOQO4HMM.mjs";
@@ -705,7 +705,7 @@ async function main() {
705
705
  const isBare = argv.length <= 2;
706
706
  const isInteractive = argv[2] === "interactive";
707
707
  if (isBare || isInteractive) {
708
- const { mountInkApp } = await import("./mount-2N6H5CWA.mjs");
708
+ const { mountInkApp } = await import("./mount-ISHZM36X.mjs");
709
709
  const { fallback } = await mountInkApp({ initialView: "repl" });
710
710
  if (!fallback) return;
711
711
  printBanner();
@@ -716,10 +716,10 @@ async function main() {
716
716
  return;
717
717
  }
718
718
  if (argv[2] === "sprint" && argv[3] === "start") {
719
- const { parseSprintStartArgs } = await import("./start-IUDCXIEA.mjs");
719
+ const { parseSprintStartArgs } = await import("./start-76JKJQIH.mjs");
720
720
  const parsed = parseSprintStartArgs(argv.slice(4));
721
721
  if (parsed.ok) {
722
- const { mountInkApp } = await import("./mount-2N6H5CWA.mjs");
722
+ const { mountInkApp } = await import("./mount-ISHZM36X.mjs");
723
723
  const { getSharedDeps } = await import("./bootstrap-FMHG6DRY.mjs");
724
724
  let sprintId;
725
725
  try {
@@ -62,7 +62,7 @@ import {
62
62
  ticketShowCommand,
63
63
  useCurrentPrompt,
64
64
  validateConfigValue
65
- } from "./chunk-SM4GGZSU.mjs";
65
+ } from "./chunk-GL7MKLLS.mjs";
66
66
  import {
67
67
  PromptCancelledError,
68
68
  projectAddCommand
@@ -109,7 +109,7 @@ import {
109
109
  sprintStartCommand,
110
110
  updateTaskStatus,
111
111
  withSuspendedTui
112
- } from "./chunk-3HJNVQ7N.mjs";
112
+ } from "./chunk-TKPTT2UG.mjs";
113
113
  import {
114
114
  addTicket,
115
115
  allRequirementsApproved,
@@ -11,6 +11,27 @@ When finished, emit a signal from the `<signals>` block below.
11
11
 
12
12
  ## Two-Phase Protocol
13
13
 
14
+ ### Phase 0: Think Before Writing
15
+
16
+ Before emitting any JSON, write your reasoning in a `<thinking>…</thinking>` block. Use it to interrogate the idea —
17
+ surface hidden assumptions, identify the real user problem, sketch requirements, and reason about which repositories
18
+ and dependencies the work touches. Explicit reasoning produces sharper output than jumping straight to JSON.
19
+
20
+ The harness's JSON extractor skips everything before the first `{`, so the `<thinking>` block is stripped
21
+ automatically — but the JSON object itself must still be emitted without markdown fences or commentary after it.
22
+
23
+ ```
24
+ <thinking>
25
+ The idea says "webhook notifications" but doesn't say which events. Reviewing the API, the natural candidates are
26
+ task-status transitions. Scope = status-change webhooks only; other event types are out of scope.
27
+ Acceptance: POST to configured URL with JSON payload on task status change; retries on 5xx.
28
+
29
+ </thinking>
30
+ {
31
+ … JSON object …
32
+ }
33
+ ```
34
+
14
35
  ### Phase 1: Refine Requirements (WHAT)
15
36
 
16
37
  Analyze the idea and produce complete, implementation-agnostic requirements:
@@ -87,6 +108,8 @@ If you cannot produce a valid plan, signal the issue instead of outputting incom
87
108
 
88
109
  - `<planning-blocked>reason</planning-blocked>`
89
110
 
111
+ <context>
112
+
90
113
  ## Idea to Implement
91
114
 
92
115
  **Title:** {{IDEA_TITLE}}
@@ -107,6 +130,8 @@ You have access to these repositories:
107
130
 
108
131
  {{COMMON}}
109
132
 
133
+ </context>
134
+
110
135
  {{VALIDATION}}
111
136
 
112
137
  ## Output Format
@@ -148,7 +173,7 @@ If you cannot produce a valid plan, output `<planning-blocked>reason</planning-b
148
173
  "Update src/repositories/export.ts findExports() to add WHERE clause for date filtering",
149
174
  "Add unit tests in src/schemas/__tests__/date-range.test.ts covering valid ranges, invalid formats, and reversed dates",
150
175
  "Add integration test in src/controllers/__tests__/export.test.ts for filtered and unfiltered queries",
151
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
176
+ "{{CHECK_GATE_EXAMPLE}}"
152
177
  ],
153
178
  "verificationCriteria": [
154
179
  "TypeScript compiles with no errors",
@@ -118,6 +118,8 @@ Focus: Determine HOW to implement the approved requirements
118
118
 
119
119
  {{VALIDATION}}
120
120
 
121
+ <context>
122
+
121
123
  ## Idea to Refine and Plan
122
124
 
123
125
  **Title:** {{IDEA_TITLE}}
@@ -141,6 +143,8 @@ mention it as an observation.
141
143
 
142
144
  {{COMMON}}
143
145
 
146
+ </context>
147
+
144
148
  ## Output Format
145
149
 
146
150
  When BOTH phases are approved by the user, write the JSON to: {{OUTPUT_FILE}}
@@ -169,7 +173,7 @@ Use this exact JSON Schema:
169
173
  "Update ExportController.getExport() in src/controllers/export.ts to parse and validate date range params",
170
174
  "Add date range filtering to ExportRepository.findRecords() in src/repositories/export.ts",
171
175
  "Write tests in src/controllers/__tests__/export.test.ts for: no dates, valid range, invalid range, start > end",
172
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
176
+ "{{CHECK_GATE_EXAMPLE}}"
173
177
  ],
174
178
  "verificationCriteria": [
175
179
  "TypeScript compiles with no errors",
@@ -12,6 +12,27 @@ When finished, emit a signal from the `<signals>` block below.
12
12
 
13
13
  ## Protocol
14
14
 
15
+ ### Step 0: Think Before Writing
16
+
17
+ Before emitting any JSON, write your reasoning in a `<thinking>…</thinking>` block. Use it to work through the problem
18
+ — map tickets to repositories, reason about dependencies, identify risks, and decide on task boundaries. Explicit
19
+ reasoning produces sharper plans than jumping straight to output.
20
+
21
+ The harness's JSON extractor skips everything before the first `[`, so the `<thinking>` block is stripped
22
+ automatically — but the JSON array itself must still be emitted without markdown fences or commentary after it.
23
+
24
+ ```
25
+ <thinking>
26
+ Ticket 1 touches both the API and the worker repo — split into two tasks with a blockedBy edge.
27
+ The shared schema change must land first so the worker can import it.
28
+ Verification criterion for the API task: a contract test against the new schema.
29
+
30
+ </thinking>
31
+ [
32
+ { … JSON array … }
33
+ ]
34
+ ```
35
+
15
36
  ### Step 1: Explore the Project
16
37
 
17
38
  Scope exploration to what will change the plan — read instruction files first, then only the specific files you need
@@ -55,10 +76,14 @@ The sprint contains:
55
76
  - **Existing Tasks**: Tasks from a previous planning run (your output replaces all existing tasks)
56
77
  - **Projects**: Each ticket belongs to a project which may have multiple repository paths
57
78
 
79
+ <context>
80
+
58
81
  {{CONTEXT}}
59
82
 
60
83
  {{COMMON}}
61
84
 
85
+ </context>
86
+
62
87
  ### Step 5: Handle Blockers
63
88
 
64
89
  If you cannot produce a valid task breakdown, signal the issue instead of outputting incomplete JSON:
@@ -73,6 +98,9 @@ If you cannot produce a valid task breakdown, signal the issue instead of output
73
98
 
74
99
  ## Output
75
100
 
101
+ Your output MAY begin with a `<thinking>…</thinking>` block — the harness's JSON extractor skips everything before the
102
+ first `[`. The JSON array itself must still be emitted without markdown fences or surrounding prose.
103
+
76
104
  Output only the JSON document matching the schema below — the harness parses your raw output directly as JSON, so emit
77
105
  it without markdown fences, commentary, or surrounding prose. If you cannot produce tasks, output a
78
106
  `<planning-blocked>` signal instead.
@@ -102,7 +130,7 @@ JSON Schema:
102
130
  "steps": [
103
131
  "Create src/utils/validation.ts with validateEmail(), validatePhone(), validateDateRange()",
104
132
  "Add corresponding unit tests in src/utils/__tests__/validation.test.ts covering valid inputs, invalid inputs, and edge cases (empty strings, unicode)",
105
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
133
+ "{{CHECK_GATE_EXAMPLE}}"
106
134
  ],
107
135
  "verificationCriteria": [
108
136
  "TypeScript compiles with no errors",
@@ -123,7 +151,7 @@ JSON Schema:
123
151
  "Wire up validation from src/utils/validation.ts with inline error messages",
124
152
  "Add form submission handler that calls POST /api/users",
125
153
  "Write component tests in src/components/__tests__/RegistrationForm.test.ts for valid submission, validation errors, and API failure",
126
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
154
+ "{{CHECK_GATE_EXAMPLE}}"
127
155
  ],
128
156
  "verificationCriteria": [
129
157
  "TypeScript compiles with no errors",
@@ -0,0 +1,82 @@
1
+ <examples>
2
+
3
+ The illustrations below are non-normative — they show good/bad shapes for the rules stated in `plan-common.md`. Use
4
+ them as calibration, not templates to copy literally.
5
+
6
+ ## Verification Criteria — good vs bad
7
+
8
+ > **Good criteria (verifiable, unambiguous):**
9
+ >
10
+ > - "TypeScript compiles with no errors"
11
+ > - "All existing tests pass plus new tests for the added feature"
12
+ > - "GET /api/users returns 200 with paginated user list"
13
+ > - "GET /api/users?page=-1 returns 400 with validation error"
14
+ > - "Component renders without console errors in browser"
15
+ > - "Playwright e2e: login flow completes without errors" _(UI tasks with Playwright configured)_
16
+
17
+ > **Bad criteria (vague, not independently verifiable):**
18
+ >
19
+ > - "Code is clean and well-structured"
20
+ > - "Error handling is appropriate"
21
+ > - "Performance is acceptable"
22
+
23
+ ## Dependency Graph — good vs bad
24
+
25
+ ### Good Dependency Graph
26
+
27
+ ```
28
+ Task 1: Add shared validation utilities (no deps)
29
+ Task 2: Implement user registration form (blockedBy: [1])
30
+ Task 3: Implement user profile editor (blockedBy: [1])
31
+ Task 4: Add form submission analytics (blockedBy: [2, 3])
32
+ ```
33
+
34
+ Tasks 2 and 3 run in parallel (both depend only on 1). Task 4 waits for both.
35
+
36
+ ### Bad Dependency Graph
37
+
38
+ ```
39
+ Task 1: Add validation utilities (no deps)
40
+ Task 2: Implement registration form (blockedBy: [1])
41
+ Task 3: Implement profile editor (blockedBy: [2]) <-- WRONG
42
+ Task 4: Add submission analytics (blockedBy: [3]) <-- WRONG
43
+ ```
44
+
45
+ Task 3 does not actually need Task 2 — it only needs Task 1. This creates a false serial chain that prevents parallel
46
+ execution.
47
+
48
+ ## Precise Steps — good vs bad
49
+
50
+ Bad — vague steps that force the agent to guess:
51
+
52
+ ```json
53
+ {
54
+ "name": "Add user authentication",
55
+ "steps": ["Implement auth", "Add tests", "Update docs"]
56
+ }
57
+ ```
58
+
59
+ Good — precise steps with file paths and pattern references:
60
+
61
+ ```json
62
+ {
63
+ "name": "Add user authentication",
64
+ "projectPath": "/Users/dev/my-app",
65
+ "steps": [
66
+ "Create auth service in src/services/auth.ts with login(), logout(), getCurrentUser() — follow the pattern in src/services/user.ts for error handling and return types",
67
+ "Add AuthContext provider in src/contexts/AuthContext.tsx wrapping the app — follow existing ThemeContext pattern",
68
+ "Create useAuth hook in src/hooks/useAuth.ts exposing auth state and actions",
69
+ "Add ProtectedRoute wrapper component in src/components/ProtectedRoute.tsx",
70
+ "Write unit tests in src/services/__tests__/auth.test.ts — follow test patterns in src/services/__tests__/user.test.ts",
71
+ "{{CHECK_GATE_EXAMPLE}}"
72
+ ],
73
+ "verificationCriteria": [
74
+ "TypeScript compiles with no errors",
75
+ "All existing tests pass plus new auth tests",
76
+ "ProtectedRoute redirects unauthenticated users to /login",
77
+ "useAuth hook exposes isAuthenticated, user, login, and logout"
78
+ ]
79
+ }
80
+ ```
81
+
82
+ </examples>
@@ -1,17 +1,22 @@
1
1
  ## Project Resources
2
2
 
3
- Each repository may ship with project-specific instruction files at its root and a `.claude/` configuration directory.
4
- Read them during exploration and reference them throughout planning:
3
+ During exploration, check for project instruction files if present. Treat whichever files exist as authoritative for
4
+ that codebase; skip silently when absent.
5
+
6
+ **Instruction files (any ecosystem):**
7
+
8
+ - **`CLAUDE.md` / `AGENTS.md`** — when present: project-level rules, conventions, and persistent memory
9
+ - **`.github/copilot-instructions.md`** — when present: GitHub Copilot-specific repository instructions
10
+ - **`README.md`** and manifest files (`package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod`, `pom.xml`, …) — setup,
11
+ scripts, and dependencies
12
+
13
+ **Claude-specific configuration (only when the repo has a `.claude/` directory):**
5
14
 
6
- - **`CLAUDE.md` / `AGENTS.md`** — project-level rules, conventions, and persistent memory
7
- - **`.github/copilot-instructions.md`** — GitHub Copilot-specific repository instructions, when present
8
15
  - **`.mcp.json`** — MCP servers the project ships with (Playwright, database inspection, etc.)
9
16
  - **`.claude/agents/`** — subagent definitions for Task-tool delegation
10
17
  - **`.claude/skills/`** — custom skills invokable with the Skill tool for project-specific workflows
11
18
  - **`.claude/settings.json`** / **`.claude/settings.local.json`** — tool permissions, model preferences, hooks
12
19
 
13
- When repository instruction files exist, treat their instructions as authoritative for that codebase.
14
-
15
20
  ## What Makes a Great Task
16
21
 
17
22
  A great task can be picked up cold by an AI agent, implemented independently, and verified as done — by a _different_ AI
@@ -63,6 +68,8 @@ Right size (one task covering the full change):
63
68
 
64
69
  ### Verification Criteria (The Evaluator Contract)
65
70
 
71
+ _See the `<examples>` block at the end of this page for good/bad pairs._
72
+
66
73
  Every task must include a `verificationCriteria` array — these are the **done contract** between the generator (task
67
74
  executor) and the evaluator (independent reviewer). The evaluator grades each criterion as pass/fail across four
68
75
  floor dimensions: correctness, completeness, safety, and consistency. If ANY dimension fails, the task fails
@@ -86,21 +93,6 @@ Write criteria that are:
86
93
  - **Unambiguous** — two reviewers would agree on pass/fail
87
94
  - **Outcome-oriented** — describe WHAT is true when done, not HOW to get there
88
95
 
89
- > **Good criteria (verifiable, unambiguous):**
90
- >
91
- > - "TypeScript compiles with no errors"
92
- > - "All existing tests pass plus new tests for the added feature"
93
- > - "GET /api/users returns 200 with paginated user list"
94
- > - "GET /api/users?page=-1 returns 400 with validation error"
95
- > - "Component renders without console errors in browser"
96
- > - "Playwright e2e: login flow completes without errors" _(UI tasks with Playwright configured)_
97
-
98
- > **Bad criteria (vague, not independently verifiable):**
99
- >
100
- > - "Code is clean and well-structured"
101
- > - "Error handling is appropriate"
102
- > - "Performance is acceptable"
103
-
104
96
  Aim for 2-4 criteria per task. Include at least one criterion that is computationally checkable (test pass, type check,
105
97
  lint clean). For **UI/frontend tasks**, if the project has Playwright configured, add a browser-verifiable criterion —
106
98
  the evaluator will attempt visual verification using Playwright or browser tools when the project supports it.
@@ -108,7 +100,8 @@ the evaluator will attempt visual verification using Playwright or browser tools
108
100
  ### Guidelines
109
101
 
110
102
  1. **Outcome-oriented** — Each task delivers a testable result
111
- 2. **Merge create+use** — Never separate "create X" from "use X" — that is one task
103
+ 2. **Merge create+use** — Keep "create X" and "use X" in one task except when a stable contract makes them
104
+ independently testable (e.g. schema + migration lands first, consumer wiring lands after)
112
105
  3. **Let scope drive task count** — do not aim for a specific number. Fewer, larger coherent tasks beat many
113
106
  micro-tasks; split only when parallelism or a clean boundary justifies it
114
107
  4. **Merge serial chains** — If tasks only make sense when run in sequence, fold them into one task
@@ -134,6 +127,8 @@ the evaluator will attempt visual verification using Playwright or browser tools
134
127
 
135
128
  ## Dependency Graph
136
129
 
130
+ _See the `<examples>` block at the end of this page for good/bad pairs._
131
+
137
132
  Tasks execute in dependency order — foundations before dependents.
138
133
 
139
134
  ### Guidelines
@@ -143,29 +138,6 @@ Tasks execute in dependency order — foundations before dependents.
143
138
  3. **Maximize parallelism** — Only add `blockedBy` when there is a real code dependency
144
139
  4. **Validate the DAG** — No cycles; earlier tasks cannot depend on later ones
145
140
 
146
- ### Good Dependency Graph
147
-
148
- ```
149
- Task 1: Add shared validation utilities (no deps)
150
- Task 2: Implement user registration form (blockedBy: [1])
151
- Task 3: Implement user profile editor (blockedBy: [1])
152
- Task 4: Add form submission analytics (blockedBy: [2, 3])
153
- ```
154
-
155
- Tasks 2 and 3 run in parallel (both depend only on 1). Task 4 waits for both.
156
-
157
- ### Bad Dependency Graph
158
-
159
- ```
160
- Task 1: Add validation utilities (no deps)
161
- Task 2: Implement registration form (blockedBy: [1])
162
- Task 3: Implement profile editor (blockedBy: [2]) <-- WRONG
163
- Task 4: Add submission analytics (blockedBy: [3]) <-- WRONG
164
- ```
165
-
166
- Task 3 does not actually need Task 2 — it only needs Task 1. This creates a false serial chain that prevents parallel
167
- execution.
168
-
169
141
  **Dependency test**: For each `blockedBy` entry, ask: "Does this task literally use code produced by the blocker?" If
170
142
  not, remove the dependency.
171
143
 
@@ -177,10 +149,14 @@ Each task must specify which repository it executes in via `projectPath`:
177
149
  2. **Split by repo** — If a ticket affects multiple repos, create separate tasks per repo with dependencies
178
150
  3. **Use exact paths** — `projectPath` must be one of the absolute paths from the project's Repositories section
179
151
 
180
- Never create a task that modifies files in multiple repossplit it.
152
+ Split cross-repo work into one task per repo with `blockedBy`except when atomicity is genuinely required (a
153
+ single commit must land in both repos to avoid broken state), in which case flag the task and surface the need for
154
+ human coordination.
181
155
 
182
156
  ## Precise Step Declarations
183
157
 
158
+ _See the `<examples>` block at the end of this page for good/bad pairs._
159
+
184
160
  Every task must include explicit, actionable steps — the implementation checklist.
185
161
 
186
162
  ### Step Requirements
@@ -194,38 +170,6 @@ Every task must include explicit, actionable steps — the implementation checkl
194
170
  instruction files
195
171
  5. **No ambiguity** — Another developer should be able to follow steps without guessing
196
172
 
197
- Bad — vague steps that force the agent to guess:
198
-
199
- ```json
200
- {
201
- "name": "Add user authentication",
202
- "steps": ["Implement auth", "Add tests", "Update docs"]
203
- }
204
- ```
205
-
206
- Good — precise steps with file paths and pattern references:
207
-
208
- ```json
209
- {
210
- "name": "Add user authentication",
211
- "projectPath": "/Users/dev/my-app",
212
- "steps": [
213
- "Create auth service in src/services/auth.ts with login(), logout(), getCurrentUser() — follow the pattern in src/services/user.ts for error handling and return types",
214
- "Add AuthContext provider in src/contexts/AuthContext.tsx wrapping the app — follow existing ThemeContext pattern",
215
- "Create useAuth hook in src/hooks/useAuth.ts exposing auth state and actions",
216
- "Add ProtectedRoute wrapper component in src/components/ProtectedRoute.tsx",
217
- "Write unit tests in src/services/__tests__/auth.test.ts — follow test patterns in src/services/__tests__/user.test.ts",
218
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
219
- ],
220
- "verificationCriteria": [
221
- "TypeScript compiles with no errors",
222
- "All existing tests pass plus new auth tests",
223
- "ProtectedRoute redirects unauthenticated users to /login",
224
- "useAuth hook exposes isAuthenticated, user, login, and logout"
225
- ]
226
- }
227
- ```
228
-
229
173
  Use actual file paths discovered during exploration. Reference the repository instruction files for verification
230
174
  commands.
231
175
 
@@ -234,6 +178,10 @@ commands.
234
178
  Start with an action verb (Add, Create, Update, Fix, Refactor, Remove, Migrate). Include the feature/concept, not files.
235
179
  Keep under 60 characters. Avoid vague verbs (Improve, Enhance, Handle).
236
180
 
181
+ See `<examples>` below for concrete good/bad pairs.
182
+
183
+ {{PLAN_COMMON_EXAMPLES}}
184
+
237
185
  ## Delegation to Available Tooling
238
186
 
239
187
  The "Project Tooling" section below (when present) lists subagents, skills, and MCP servers detected in the target
@@ -72,7 +72,7 @@ before the plan is finalized.
72
72
  **Steps:**
73
73
  1. Create src/utils/csvExport.ts with column formatters for date, number, and string types
74
74
  2. Add unit tests in src/utils/__tests__/csvExport.test.ts covering empty data, special characters, and large datasets
75
- 3. Run `pnpm typecheck && pnpm lint && pnpm test` — all pass
75
+ 3. Run the project's check/test/build gate — all pass
76
76
  ```
77
77
 
78
78
  2. **Show the dependency graph** — Make it obvious which tasks run in parallel vs sequentially, and why each dependency
@@ -123,10 +123,14 @@ The sprint contains:
123
123
  - **Existing Tasks**: Tasks from a previous planning run (your output replaces all existing tasks)
124
124
  - **Projects**: Each ticket belongs to a project which may have multiple repository paths
125
125
 
126
+ <context>
127
+
126
128
  {{CONTEXT}}
127
129
 
128
130
  {{COMMON}}
129
131
 
132
+ </context>
133
+
130
134
  ### Repository Assignment
131
135
 
132
136
  Repositories have been pre-selected by the user. Only create tasks targeting these repositories — the harness executes
@@ -166,7 +170,7 @@ Use this exact JSON Schema:
166
170
  "Update ExportController.getExport() in src/controllers/export.ts to parse and validate date range params",
167
171
  "Add date range filtering to ExportRepository.findRecords() in src/repositories/export.ts",
168
172
  "Write tests in src/controllers/__tests__/export.test.ts for: no dates, valid range, invalid range, start > end",
169
- "Run pnpm typecheck && pnpm lint && pnpm test — all pass"
173
+ "{{CHECK_GATE_EXAMPLE}}"
170
174
  ],
171
175
  "verificationCriteria": [
172
176
  "TypeScript compiles with no errors",
@@ -19,16 +19,20 @@ something entirely new (create a file, add a feature, tweak a script), do exactl
19
19
 
20
20
  ## User Feedback — Implement this
21
21
 
22
+ <task-specification>
23
+
22
24
  {{FEEDBACK}}
23
25
 
26
+ </task-specification>
27
+
24
28
  ## Protocol
25
29
 
26
30
  1. **Parse the feedback as an instruction** — Identify the concrete change(s) requested. If it says "create X", create
27
31
  X. If it says "change Y", change Y. Do not ask for clarification unless the instruction is genuinely contradictory.
28
32
  2. **Implement the change** — Create or edit the files required to satisfy the feedback. Make the smallest change that
29
33
  fully carries out the instruction.
30
- 3. **Run verification** — If the project has a check script (e.g., `pnpm test`, `pnpm typecheck`), run it and confirm
31
- it passes. If no check script is configured, skip this step.
34
+ 3. **Run verification** — If the project has a check script (test, typecheck, lint, or build command), run it and
35
+ confirm it passes. If no check script is configured, skip this step.
32
36
  4. **Output verification results** — Wrap any verification output in `<task-verified>...</task-verified>`. If you
33
37
  skipped step 3, emit `<task-verified>no check script configured; change applied</task-verified>`.
34
38
  5. **Commit your work** — Stage the modified files and create a git commit with a descriptive message summarising the
@@ -58,10 +58,16 @@ Now apply semantic judgment to what the computational checks cannot catch:
58
58
  2. **Read the changed files carefully** — understand the full implementation, not just the diff.
59
59
  3. **Read surrounding code** — check that the implementation follows existing patterns and conventions.
60
60
  4. **Augment the Project Tooling section above** — the section lists detected subagents, skills, and MCP servers.
61
- Additionally skim `package.json` scripts, `playwright.config.*`, `cypress.config.*`, `vitest.config.*`, `.storybook/`,
62
- `CLAUDE.md`, and `.github/copilot-instructions.md` for the test/verification stack and any conventions the section
63
- didn't surface. Note which application type this is (backend API / CLI / frontend SPA / fullstack / library) — it
64
- determines which verification methods apply.
61
+ Additionally skim repository config for the test/verification stack and any conventions the section didn't surface.
62
+ Note which application type this is (backend API / CLI / frontend SPA / fullstack / library) it determines which
63
+ verification methods apply.
64
+
65
+ <examples>
66
+ Representative files to scan when present — not an exhaustive list, adapt to the ecosystem:
67
+ `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod`, `playwright.config.*`, `cypress.config.*`,
68
+ `vitest.config.*`, `.storybook/`, `CLAUDE.md`, `AGENTS.md`, `.github/copilot-instructions.md`.
69
+ </examples>
70
+
65
71
  5. **Run extended verification when the detected tooling makes it cheap and deterministic:**
66
72
  - **Frontend/UI tasks** — if Playwright or Cypress is configured, run a targeted e2e test or use a browser MCP to
67
73
  verify the changed UI renders correctly (console errors, layout, interactive behaviour).
@@ -78,14 +84,15 @@ Evaluate the implementation across the dimensions below. Each dimension is pass/
78
84
  dimension fails, the overall evaluation fails. The first four are the floor — every task is graded on them. The
79
85
  planner may have flagged additional task-specific dimensions; when present, they are graded on top of the floor.
80
86
 
81
- **Dimension 1 — Correctness**
87
+ <dimension name="Correctness" floor="true">
82
88
  Does the implementation do what the specification says? Check for:
83
89
 
84
90
  - Logical errors, off-by-one, race conditions, type issues
85
91
  - Behavior matches each verification criterion (grade each one explicitly)
86
92
  - Edge cases handled where specified
93
+ </dimension>
87
94
 
88
- **Dimension 2 — Completeness**
95
+ <dimension name="Completeness" floor="true">
89
96
  Is the full specification implemented? Check for:
90
97
 
91
98
  - Every verification criterion is satisfied (not just most)
@@ -93,25 +100,29 @@ Is the full specification implemented? Check for:
93
100
  - No TODO/FIXME/HACK markers left behind that indicate unfinished work
94
101
  - Uncommitted changes that look like incomplete work (WIP diffs, stashed edits) — committing is expected unless the
95
102
  task's contract says otherwise
103
+ </dimension>
96
104
 
97
- **Dimension 3 — Safety**
105
+ <dimension name="Safety" floor="true">
98
106
  Are there security or reliability issues? Check for:
99
107
 
100
108
  - Injection vulnerabilities (SQL, command, XSS)
101
109
  - Validation gaps on external input
102
110
  - Exposed secrets, hardcoded credentials
103
111
  - Unsafe error handling that leaks internals
112
+ </dimension>
104
113
 
105
- **Dimension 4 — Consistency**
114
+ <dimension name="Consistency" floor="true">
106
115
  Does the implementation fit the codebase? Check for:
107
116
 
108
117
  - Follows existing patterns and conventions (naming, structure, error handling)
109
118
  - Uses existing utilities instead of reinventing them
110
119
  - No unnecessary changes outside the task scope — spec drift
111
120
  - Test patterns match the project's existing test style
121
+ </dimension>
112
122
  {{EXTRA_DIMENSIONS_SECTION}}
113
- Evaluate only what was asked vs what was delivered — suggesting improvements beyond the task scope creates noise that
114
- distracts from the actual pass/fail decision.
123
+
124
+ Evaluate only what was asked vs what was delivered — suggesting improvements beyond the task scope creates noise that
125
+ distracts from the actual pass/fail decision.
115
126
 
116
127
  ### Pass Bar
117
128
 
@@ -165,6 +176,8 @@ Each issue must reference which dimension it violates.]
165
176
 
166
177
  ### Calibration Examples
167
178
 
179
+ <examples>
180
+
168
181
  **Example of a correct PASS:**
169
182
 
170
183
  > Task: "Add date validation to export endpoint"
@@ -193,6 +206,8 @@ Each issue must reference which dimension it violates.]
193
206
  > 2. [Safety] `src/repositories/users.ts:23` — `WHERE name LIKE '%${query}%'` is SQL injection. Use parameterized
194
207
  > query: `WHERE name LIKE $1` with `%${query}%` as parameter.
195
208
 
209
+ </examples>
210
+
196
211
  Be direct and specific — point to files, lines, and concrete problems.
197
212
 
198
213
  {{SIGNALS}}
@@ -15,16 +15,17 @@ When finished, emit a signal from the `<signals>` block below.
15
15
  - **Respect task boundaries** — complete exactly the declared steps for this one task, then stop. Other agents may be
16
16
  working on neighboring tasks in parallel; skipping steps, improvising, or editing files outside the declared set
17
17
  causes merge conflicts with their work.
18
- - **Prefer fixing the code over the test** — a failing test usually indicates a bug in the implementation. Update a
19
- test only when the declared steps intentionally change the behaviour it asserts (e.g. a regression fix, a contract
20
- change). Do not remove, skip, or weaken a test to make a failure go awaythat masks real bugs. If the right move
21
- is genuinely ambiguous, signal `<task-blocked>` so a human can decide.
18
+ - **Prefer fixing the code over the test** — a failing test usually indicates a bug in the implementation. Update
19
+ tests only when the declared steps intentionally change the asserted behaviour (e.g. a contract change, a regression
20
+ fix). If the right move is genuinely ambiguous, signal `<task-blocked>` so a human can decidedo not silently
21
+ weaken a test to make a failure go away.
22
22
  - **Verify before completing** — the harness runs a post-task check gate; unverified work will be caught and rejected.
23
23
  - **Append progress, never overwrite** — append each progress entry at the end of the progress file. Overwriting
24
24
  erases context that downstream tasks depend on.
25
25
  - **Leave {{CONTEXT_FILE}} and task definitions alone** — the context file is cleaned up by the harness (committing it
26
26
  pollutes the repo); the task name, description, steps, and other task files are immutable.
27
- {{COMMIT_CONSTRAINT}}
27
+
28
+ {{COMMIT_CONSTRAINT}}
28
29
 
29
30
  </constraints>
30
31
 
@@ -93,7 +94,8 @@ Complete these steps IN ORDER:
93
94
  1. **Confirm all steps done** — Every task step has been completed
94
95
  2. **Run ALL verification commands** — Execute every verification command (see Check Script section in the context file
95
96
  or project instructions). Fix any failures before proceeding. The harness runs the check script as a post-task
96
- gate — your task is not marked done unless it passes.{{COMMIT_STEP}}
97
+ gate — your task is not marked done unless it passes.
98
+ {{COMMIT_STEP}}
97
99
  3. **Update progress file** — Append to {{PROGRESS_FILE}} using this format:
98
100
 
99
101
  ```markdown
@@ -142,17 +144,15 @@ Complete these steps IN ORDER:
142
144
  - The WHERE clause builder in src/repositories/base.ts can be extended for future filters
143
145
  ```
144
146
 
145
- 4. **Output verification results:**
147
+ 4. **Output verification results** — use the actual commands the harness ran; the examples below are illustrative:
146
148
 
147
149
  <!-- prettier-ignore -->
148
150
  ```
149
151
  <task-verified>
150
- $ pnpm typecheck
151
- No type errors
152
- $ pnpm lint
153
- No lint errors
154
- $ pnpm test
155
- 47 tests passed
152
+ $ <check-command-1>
153
+ <output>
154
+ $ <check-command-2>
155
+ <output>
156
156
  </task-verified>
157
157
  ```
158
158
 
@@ -223,10 +223,14 @@ The `ref` field should match either:
223
223
  - The ticket's internal ID
224
224
  - The exact ticket title
225
225
 
226
+ <task-specification>
227
+
226
228
  ## Ticket to Refine
227
229
 
228
230
  {{TICKET}}
229
231
 
232
+ </task-specification>
233
+
230
234
  {{ISSUE_CONTEXT}}
231
235
 
232
236
  ---
@@ -1,3 +1,5 @@
1
+ <validation-checklist>
2
+
1
3
  ## Pre-Output Validation
2
4
 
3
5
  Before writing the JSON output, verify EVERY item:
@@ -12,3 +14,5 @@ Before writing the JSON output, verify EVERY item:
12
14
  8. **`projectPath` assigned** — every task uses a path from the available repositories
13
15
  9. **Verification criteria** — every task has 2-4 `verificationCriteria` that are testable and unambiguous
14
16
  10. **Raw JSON output** — the output is valid JSON matching the schema exactly; the harness parses the output directly as JSON, so emit it without markdown fences, commentary, or surrounding prose
17
+
18
+ </validation-checklist>
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  parseSprintStartArgs,
4
4
  sprintStartCommand
5
- } from "./chunk-3HJNVQ7N.mjs";
5
+ } from "./chunk-TKPTT2UG.mjs";
6
6
  import "./chunk-JOQO4HMM.mjs";
7
7
  import "./chunk-CFUVE2BP.mjs";
8
8
  import "./chunk-747KW2RW.mjs";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralphctl",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "description": "Agent harness for long-running AI coding tasks — orchestrates Claude Code & GitHub Copilot across repositories",
5
5
  "homepage": "https://github.com/lukas-grigis/ralphctl",
6
6
  "type": "module",