ralphctl 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/{add-MG26JWBP.mjs → add-DVPVHENV.mjs} +7 -7
  2. package/dist/{add-ZZYL4BSF.mjs → add-YVXM34RP.mjs} +6 -6
  3. package/dist/{chunk-LDSG7G2T.mjs → chunk-BSB4EDGR.mjs} +2 -2
  4. package/dist/{chunk-RQGD5WS6.mjs → chunk-CBMFRQ4Y.mjs} +3 -3
  5. package/dist/{chunk-Q4AVHUZL.mjs → chunk-FNAAA32W.mjs} +3 -3
  6. package/dist/{chunk-EGUFQNRB.mjs → chunk-GQ2WFKBN.mjs} +3 -3
  7. package/dist/{chunk-LCY32RW4.mjs → chunk-OFILN7QL.mjs} +183 -39
  8. package/dist/{chunk-MDE6KPJQ.mjs → chunk-OGEXYSFS.mjs} +5 -5
  9. package/dist/{chunk-TDBEEHTS.mjs → chunk-PYZEQ2VK.mjs} +5 -5
  10. package/dist/{chunk-57UWLHRH.mjs → chunk-VAZ3LJBI.mjs} +12 -1
  11. package/dist/{chunk-D2HWXEHH.mjs → chunk-WDMLPXOD.mjs} +2 -2
  12. package/dist/{chunk-WZTY77GY.mjs → chunk-XN2UIHBY.mjs} +10 -3
  13. package/dist/{chunk-WOMGKKZY.mjs → chunk-XPLYLRIM.mjs} +319 -15
  14. package/dist/{chunk-2FT37OZX.mjs → chunk-ZLWSPLWI.mjs} +53 -7
  15. package/dist/cli.mjs +19 -17
  16. package/dist/create-Z635FQKO.mjs +15 -0
  17. package/dist/{handle-SYVCFI6Y.mjs → handle-23EFF3BE.mjs} +1 -1
  18. package/dist/{mount-2ANLHHQE.mjs → mount-H2IH3MWE.mjs} +1455 -1193
  19. package/dist/{project-JF47ZWMF.mjs → project-DQHF4ISP.mjs} +3 -3
  20. package/dist/prompts/sprint-feedback.md +4 -0
  21. package/dist/prompts/task-evaluation.md +44 -2
  22. package/dist/prompts/task-execution.md +5 -0
  23. package/dist/{resolver-PG2DZEBX.mjs → resolver-OVPYVW6Q.mjs} +3 -3
  24. package/dist/{sprint-54DOSIJK.mjs → sprint-4E26AB5F.mjs} +4 -4
  25. package/dist/start-2WH4BTDB.mjs +19 -0
  26. package/package.json +1 -1
  27. package/dist/create-PQK6KKRD.mjs +0 -15
  28. package/dist/start-2SZTBKGF.mjs +0 -19
@@ -11,12 +11,12 @@ import {
11
11
  removeProjectRepo,
12
12
  resolveRepoPath,
13
13
  updateProject
14
- } from "./chunk-LDSG7G2T.mjs";
15
- import "./chunk-D2HWXEHH.mjs";
14
+ } from "./chunk-BSB4EDGR.mjs";
15
+ import "./chunk-WDMLPXOD.mjs";
16
16
  import {
17
17
  ProjectExistsError,
18
18
  ProjectNotFoundError
19
- } from "./chunk-57UWLHRH.mjs";
19
+ } from "./chunk-VAZ3LJBI.mjs";
20
20
  export {
21
21
  ProjectExistsError,
22
22
  ProjectNotFoundError,
@@ -49,6 +49,10 @@ interpretation and proceed.
49
49
  - **The feedback is the authoritative instruction** — implement it even if it seems unrelated to the completed tasks.
50
50
  - **Do the smallest change that fully satisfies the feedback** — no speculative refactors, no adjacent cleanup.
51
51
  - **Make the edits — don't just describe them** — the harness does not apply edits for you; you must write the files.
52
+ - **Never reference sprint-local identifiers in code** — do not mention acceptance-criterion labels (`AC1`, `AC2`,
53
+ `AC1–AC6`), ticket numbers, task IDs, or sprint IDs in source files, comments, docstrings, test names, commit
54
+ messages, or any committed artefact. These identifiers are ephemeral sprint metadata and become stale. Describe
55
+ the underlying invariant or constraint directly instead.
52
56
  - **Must commit** — Create a git commit before signaling completion. Uncommitted changes leave the sprint branch dirty
53
57
  and block sprint close.
54
58
 
@@ -50,13 +50,19 @@ Computational results are ground truth. If the check script fails, stop early
50
50
 
51
51
  ### Phase 2: Inferential Investigation (reason about the changes)
52
52
 
53
- Now apply semantic judgment to what the computational checks cannot catch:
53
+ Now apply semantic judgment to what the computational checks cannot catch. Every finding you emit
54
+ must be traceable to a concrete observation from this phase — a file path, a line, a function name, a
55
+ specific value, a tool output, or a quoted snippet. Generic approval language ("looks good", "appears
56
+ correct", "seems fine", "looks clean", "should be OK") is **insufficient** and MUST be treated as a
57
+ rubber stamp — flag it as a Completeness failure rather than emitting it yourself.
54
58
 
55
59
  1. **Diff the task's commit range** — derive the base from the branch's divergence point (`git merge-base HEAD main`
56
60
  or the closest equivalent) and run `git diff <base>..HEAD`. Tasks may produce multiple commits; do not assume
57
61
  a single commit.
58
- 2. **Read the changed files carefully** — understand the full implementation, not just the diff.
62
+ 2. **Read the changed files carefully** — understand the full implementation, not just the diff. Note
63
+ specific constructs worth citing later (new functions, changed signatures, edge-case branches).
59
64
  3. **Read surrounding code** — check that the implementation follows existing patterns and conventions.
65
+ Cite a specific sibling file or function when the comparison matters.
60
66
  4. **Augment the Project Tooling section above** — the section lists detected subagents, skills, and MCP servers.
61
67
  Additionally skim repository config for the test/verification stack and any conventions the section didn't surface.
62
68
  Note which application type this is (backend API / CLI / frontend SPA / fullstack / library) — it determines which
@@ -84,6 +90,13 @@ Evaluate the implementation across the dimensions below. Each dimension is pass/
84
90
  dimension fails, the overall evaluation fails. The first four are the floor — every task is graded on them. The
85
91
  planner may have flagged additional task-specific dimensions; when present, they are graded on top of the floor.
86
92
 
93
+ **Evidence rule — load-bearing:** Every dimension line, PASS or FAIL, MUST cite a concrete observation
94
+ from Phase 1 or Phase 2. A PASS without evidence is not a PASS — it is a rubber stamp. Good evidence
95
+ names something specific: a file path, a line number, a test count, a command output, a function
96
+ name, a verification criterion that was graded, a pattern from a sibling file. Evidence that only
97
+ restates the criterion in different words ("all tests pass", "implementation matches the spec", "no
98
+ issues found") is still generic and does NOT satisfy this rule.
99
+
87
100
  <dimension name="Correctness" floor="true">
88
101
  Does the implementation do what the specification says? Check for:
89
102
 
@@ -137,6 +150,25 @@ Fail only on missed verification criteria, skipped steps, safety issues, or genu
137
150
  not style preferences, naming opinions, or improvements beyond the task scope. When verification criteria are provided,
138
151
  grade primarily against them — they are the contract.
139
152
 
153
+ ### Anti-Rubber-Stamp Guard
154
+
155
+ Before you decide the verdict, answer both questions honestly:
156
+
157
+ 1. **Did you actually run the Phase 1 verification commands?** If the check script exists and you did
158
+ not execute it, or you did not run `git status` / `git log`, you lack the ground truth that
159
+ authoritatively settles Correctness and Completeness.
160
+ 2. **Can you name a specific observation for each dimension?** For every PASS and FAIL line you are
161
+ about to emit, point to a concrete piece of evidence — a file path, a line number, a test count,
162
+ a tool output, a function name, a verification criterion you graded. "Looks good" / "appears
163
+ correct" / "no issues found" are NOT specific observations.
164
+
165
+ If the answer to either question is **no**, you MUST FAIL Completeness with a one-line finding
166
+ explaining what you skipped, and emit `<evaluation-failed>` — even if everything else seems fine. A
167
+ rubber-stamp PASS is worse than a real FAIL because it misleads the harness into marking work done
168
+ when it was never audited. This guard exists because the evaluator is the last line of defense
169
+ against silent-pass regressions; the cost of a false FAIL is one extra fix iteration, the cost of a
170
+ false PASS is a shipped bug.
171
+
140
172
  ## Output
141
173
 
142
174
  Structure your output as a dimension assessment followed by a verdict signal.
@@ -144,8 +176,18 @@ Structure your output as a dimension assessment followed by a verdict signal.
144
176
  **Format rule:** Each dimension MUST be a single line: `**Dimension**: PASS/FAIL — one-line summary`. Put detailed
145
177
  findings in the critique section below, not in the dimension line.
146
178
 
179
+ **Justification rule (enforced):** The `— one-line summary` after the verdict is required, not
180
+ decorative. A bare `**Dimension**: PASS` with no em-dash and no finding is invalid — it parses as a
181
+ rubber stamp and the harness will treat the evaluation as failed. Every dimension line needs an
182
+ em-dash (or hyphen) followed by a non-empty, concrete finding.
183
+
147
184
  ### If the implementation passes all dimensions:
148
185
 
186
+ Emit `<evaluation-passed>` ONLY when every dimension has a one-line justification that cites
187
+ concrete evidence. A `<evaluation-passed>` signal after bare `PASS` lines or after generic approval
188
+ phrasing is a contract violation — in that case, emit `<evaluation-failed>` instead with a
189
+ Completeness finding that you could not justify the pass.
190
+
149
191
  ```
150
192
  ## Assessment
151
193
 
@@ -24,6 +24,11 @@ When finished, emit a signal from the `<signals>` block below.
24
24
  erases context that downstream tasks depend on.
25
25
  - **Leave {{CONTEXT_FILE}} and task definitions alone** — the context file is cleaned up by the harness (committing it
26
26
  pollutes the repo); the task name, description, steps, and other task files are immutable.
27
+ - **Never reference sprint-local identifiers in code** — do not mention acceptance-criterion labels (`AC1`, `AC2`,
28
+ `AC1–AC6`), ticket numbers, task IDs, or sprint IDs in source files, comments, docstrings, test names, commit
29
+ messages, or any committed artefact. These identifiers are ephemeral sprint metadata and become stale as tickets
30
+ close. If a comment needs to explain WHY, state the underlying invariant or constraint directly (e.g. "exactly one
31
+ confirmation per destructive action") rather than citing the AC that mandates it.
27
32
 
28
33
  {{COMMIT_CONSTRAINT}}
29
34
 
@@ -4,14 +4,14 @@ import {
4
4
  } from "./chunk-IWXBJD2D.mjs";
5
5
  import {
6
6
  IOError
7
- } from "./chunk-57UWLHRH.mjs";
7
+ } from "./chunk-VAZ3LJBI.mjs";
8
8
 
9
9
  // src/integration/cli/completion/resolver.ts
10
10
  var dynamicResolvers = {
11
11
  "--project": async () => {
12
12
  const result = await wrapAsync(
13
13
  async () => {
14
- const { listProjects } = await import("./project-JF47ZWMF.mjs");
14
+ const { listProjects } = await import("./project-DQHF4ISP.mjs");
15
15
  return listProjects();
16
16
  },
17
17
  (err) => new IOError("Failed to load projects for completion", err instanceof Error ? err : void 0)
@@ -45,7 +45,7 @@ var configValueCompletions = {
45
45
  async function getSprintCompletions() {
46
46
  const result = await wrapAsync(
47
47
  async () => {
48
- const { listSprints } = await import("./sprint-54DOSIJK.mjs");
48
+ const { listSprints } = await import("./sprint-4E26AB5F.mjs");
49
49
  return listSprints();
50
50
  },
51
51
  (err) => new IOError("Failed to load sprints for completion", err instanceof Error ? err : void 0)
@@ -11,15 +11,15 @@ import {
11
11
  logSprintBaselines,
12
12
  resolveSprintId,
13
13
  saveSprint
14
- } from "./chunk-RQGD5WS6.mjs";
15
- import "./chunk-WZTY77GY.mjs";
14
+ } from "./chunk-CBMFRQ4Y.mjs";
15
+ import "./chunk-XN2UIHBY.mjs";
16
16
  import "./chunk-IWXBJD2D.mjs";
17
- import "./chunk-D2HWXEHH.mjs";
17
+ import "./chunk-WDMLPXOD.mjs";
18
18
  import {
19
19
  NoCurrentSprintError,
20
20
  SprintNotFoundError,
21
21
  SprintStatusError
22
- } from "./chunk-57UWLHRH.mjs";
22
+ } from "./chunk-VAZ3LJBI.mjs";
23
23
  export {
24
24
  NoCurrentSprintError,
25
25
  SprintNotFoundError,
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ parseSprintStartArgs,
4
+ sprintStartCommand
5
+ } from "./chunk-OFILN7QL.mjs";
6
+ import "./chunk-ZLWSPLWI.mjs";
7
+ import "./chunk-GQ2WFKBN.mjs";
8
+ import "./chunk-CFUVE2BP.mjs";
9
+ import "./chunk-747KW2RW.mjs";
10
+ import "./chunk-BSB4EDGR.mjs";
11
+ import "./chunk-CBMFRQ4Y.mjs";
12
+ import "./chunk-XN2UIHBY.mjs";
13
+ import "./chunk-IWXBJD2D.mjs";
14
+ import "./chunk-WDMLPXOD.mjs";
15
+ import "./chunk-VAZ3LJBI.mjs";
16
+ export {
17
+ parseSprintStartArgs,
18
+ sprintStartCommand
19
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralphctl",
3
- "version": "0.4.3",
3
+ "version": "0.4.5",
4
4
  "description": "Agent harness for long-running AI coding tasks — orchestrates Claude Code & GitHub Copilot across repositories",
5
5
  "homepage": "https://github.com/lukas-grigis/ralphctl",
6
6
  "type": "module",
@@ -1,15 +0,0 @@
1
- #!/usr/bin/env node
2
- import {
3
- sprintCreateCommand
4
- } from "./chunk-Q4AVHUZL.mjs";
5
- import "./chunk-CFUVE2BP.mjs";
6
- import "./chunk-747KW2RW.mjs";
7
- import "./chunk-LDSG7G2T.mjs";
8
- import "./chunk-RQGD5WS6.mjs";
9
- import "./chunk-WZTY77GY.mjs";
10
- import "./chunk-IWXBJD2D.mjs";
11
- import "./chunk-D2HWXEHH.mjs";
12
- import "./chunk-57UWLHRH.mjs";
13
- export {
14
- sprintCreateCommand
15
- };
@@ -1,19 +0,0 @@
1
- #!/usr/bin/env node
2
- import {
3
- parseSprintStartArgs,
4
- sprintStartCommand
5
- } from "./chunk-LCY32RW4.mjs";
6
- import "./chunk-2FT37OZX.mjs";
7
- import "./chunk-EGUFQNRB.mjs";
8
- import "./chunk-CFUVE2BP.mjs";
9
- import "./chunk-747KW2RW.mjs";
10
- import "./chunk-LDSG7G2T.mjs";
11
- import "./chunk-RQGD5WS6.mjs";
12
- import "./chunk-WZTY77GY.mjs";
13
- import "./chunk-IWXBJD2D.mjs";
14
- import "./chunk-D2HWXEHH.mjs";
15
- import "./chunk-57UWLHRH.mjs";
16
- export {
17
- parseSprintStartArgs,
18
- sprintStartCommand
19
- };