ralphctl 0.6.3 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -138
- package/dist/cli.mjs +20665 -21131
- package/dist/manifest.json +17 -19
- package/dist/prompts/_partials/signals-evaluation.md +14 -0
- package/dist/prompts/_partials/signals-task.md +26 -0
- package/dist/prompts/_partials/validation-checklist.md +24 -0
- package/dist/prompts/apply-feedback/template.md +118 -0
- package/dist/prompts/detect-scripts/template.md +118 -0
- package/dist/prompts/detect-skills/template.md +136 -0
- package/dist/prompts/evaluate/template.md +236 -0
- package/dist/prompts/ideate/template.md +172 -0
- package/dist/prompts/implement/template.md +203 -0
- package/dist/prompts/plan/template.md +347 -0
- package/dist/prompts/readiness/template.md +132 -0
- package/dist/prompts/refine/template.md +254 -0
- package/dist/skills/{default/abstraction-first → ralphctl-abstraction-first}/SKILL.md +1 -1
- package/dist/skills/{default/alignment → ralphctl-alignment}/SKILL.md +1 -1
- package/dist/skills/{default/iterative-review → ralphctl-iterative-review}/SKILL.md +1 -1
- package/package.json +25 -28
- package/dist/absolute-path-WUTZQ37D.mjs +0 -8
- package/dist/chunk-6RDMCLWU.mjs +0 -108
- package/dist/chunk-HIU74KTO.mjs +0 -1046
- package/dist/chunk-S3PTDH57.mjs +0 -78
- package/dist/chunk-WV4D2CPG.mjs +0 -26
- package/dist/prompt-adapter-JQICGVX7.mjs +0 -7
- package/dist/prompts/ideate.md +0 -204
- package/dist/prompts/plan-auto.md +0 -182
- package/dist/prompts/plan-common-examples.md +0 -82
- package/dist/prompts/plan-common.md +0 -200
- package/dist/prompts/plan-interactive.md +0 -212
- package/dist/prompts/repo-onboard.md +0 -201
- package/dist/prompts/signals-evaluation.md +0 -6
- package/dist/prompts/signals-planning.md +0 -5
- package/dist/prompts/signals-task.md +0 -10
- package/dist/prompts/sprint-feedback.md +0 -64
- package/dist/prompts/task-evaluation.md +0 -276
- package/dist/prompts/task-execution.md +0 -233
- package/dist/prompts/ticket-refine.md +0 -242
- package/dist/prompts/validation-checklist.md +0 -19
- package/dist/skills/exec/.gitkeep +0 -0
- package/dist/skills/plan/.gitkeep +0 -0
- package/dist/skills/refine/.gitkeep +0 -0
- package/dist/storage-paths-IPNZZM5D.mjs +0 -15
- package/dist/validation-error-QT6Q7FYU.mjs +0 -7
- /package/dist/prompts/{harness-context.md → _partials/harness-context.md} +0 -0
package/dist/manifest.json
CHANGED
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 1,
|
|
3
|
-
"generatedAt": "2026-05-
|
|
3
|
+
"generatedAt": "2026-05-19T05:25:59.916Z",
|
|
4
4
|
"assets": [
|
|
5
|
-
"prompts/harness-context.md",
|
|
6
|
-
"prompts/
|
|
7
|
-
"prompts/
|
|
8
|
-
"prompts/
|
|
9
|
-
"prompts/
|
|
10
|
-
"prompts/
|
|
11
|
-
"prompts/
|
|
12
|
-
"prompts/
|
|
13
|
-
"prompts/
|
|
14
|
-
"prompts/
|
|
15
|
-
"prompts/
|
|
16
|
-
"prompts/
|
|
17
|
-
"prompts/
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"skills/
|
|
21
|
-
"skills/default/alignment/SKILL.md",
|
|
22
|
-
"skills/default/iterative-review/SKILL.md"
|
|
5
|
+
"prompts/_partials/harness-context.md",
|
|
6
|
+
"prompts/_partials/signals-evaluation.md",
|
|
7
|
+
"prompts/_partials/signals-task.md",
|
|
8
|
+
"prompts/_partials/validation-checklist.md",
|
|
9
|
+
"prompts/apply-feedback/template.md",
|
|
10
|
+
"prompts/detect-scripts/template.md",
|
|
11
|
+
"prompts/detect-skills/template.md",
|
|
12
|
+
"prompts/evaluate/template.md",
|
|
13
|
+
"prompts/ideate/template.md",
|
|
14
|
+
"prompts/implement/template.md",
|
|
15
|
+
"prompts/plan/template.md",
|
|
16
|
+
"prompts/readiness/template.md",
|
|
17
|
+
"prompts/refine/template.md",
|
|
18
|
+
"skills/ralphctl-abstraction-first/SKILL.md",
|
|
19
|
+
"skills/ralphctl-alignment/SKILL.md",
|
|
20
|
+
"skills/ralphctl-iterative-review/SKILL.md"
|
|
23
21
|
]
|
|
24
22
|
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
<signals>
|
|
2
|
+
|
|
3
|
+
Emit exactly one of the verdict signals below at the end of your evaluation. The harness records this as the
|
|
4
|
+
authoritative outcome and resumes the generator with the critique on failure.
|
|
5
|
+
|
|
6
|
+
- `<evaluation-passed>` — Every dimension scored 4 or 5; the implementation matches the specification.
|
|
7
|
+
- `<evaluation-failed>critique</evaluation-failed>` — At least one dimension scored 1, 2, or 3. The critique is
|
|
8
|
+
the actionable summary the generator will see — be specific about what is wrong and what needs to change. Do
|
|
9
|
+
not write generic praise or hedged language; the critique must point at concrete files, lines, or behaviours.
|
|
10
|
+
|
|
11
|
+
Per-dimension findings belong in your markdown body above the verdict signal so a human reviewer can audit your
|
|
12
|
+
reasoning. The signal itself is the verdict only.
|
|
13
|
+
|
|
14
|
+
</signals>
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
<signals>
|
|
2
|
+
|
|
3
|
+
Use these signals to communicate task outcome to the harness. The harness parses your output for these tags; nothing
|
|
4
|
+
else in your message is treated as a control signal.
|
|
5
|
+
|
|
6
|
+
- `<task-verified>output</task-verified>` — Records the verification commands you ran and their output. Required
|
|
7
|
+
before completion so the harness has on-disk evidence of what passed.
|
|
8
|
+
- `<task-complete>` — Marks the task as done. Emit ONLY after `<task-verified>` and only when every declared step
|
|
9
|
+
has been completed and every verification command passes.
|
|
10
|
+
- `<task-blocked>reason</task-blocked>` — Marks the task as blocked. Use when you cannot proceed: missing
|
|
11
|
+
dependency, ambiguous step, pre-existing failure, scope mismatch with the ticket. Be concrete in the reason —
|
|
12
|
+
the harness surfaces this verbatim to the operator.
|
|
13
|
+
|
|
14
|
+
Optional progress signals you may emit during long-running work:
|
|
15
|
+
|
|
16
|
+
- `<progress>short summary</progress>` — A one-line status update; the harness streams these to the live UI.
|
|
17
|
+
- `<note>text</note>` — Incidental observations that future tasks should be aware of (patterns, gotchas).
|
|
18
|
+
- `<change>text</change>` — A concrete change you made during this task. Granular ("added X", "renamed Y to Z", "deleted Z"). The harness appends these inline to the task's section in `progress.md`.
|
|
19
|
+
- `<learning>text</learning>` — Non-obvious project knowledge worth carrying across tasks (a hidden constraint, an undocumented convention, a gotcha you hit and resolved). The harness pins these under `## Learnings` at the top of `progress.md` so future tasks see them. Use sparingly; only the kind of insight you'd want a fresh agent to read first.
|
|
20
|
+
- `<decision>text</decision>` — An architectural or design choice with rationale ("chose path A over B because <reason>"). Higher signal than `<learning>`. The harness pins these under `## Decisions` in `progress.md`. Use only for choices a future maintainer would want explained.
|
|
21
|
+
|
|
22
|
+
Commit message — the harness owns the commit; you propose the wording (emit on every turn that produced edits):
|
|
23
|
+
|
|
24
|
+
- `<commit-message><subject>type(scope): imperative present tense, ≤72 chars</subject><body>WHY this change, what was considered, follow-ups — wrap lines at 72 chars; multiple paragraphs allowed</body></commit-message>` — Proposed message for the per-task `git commit` the harness runs after this turn. **Emit this on every task that touched a file.** The subject is required and should follow a Conventional Commits shape (`feat(scope): …`, `fix(scope): …`, `refactor(scope): …`, `chore(scope): …`, `docs(scope): …`). The body is required for anything beyond a trivial rename — explain WHY the change exists, what alternatives you considered, what follow-ups remain. The diff already shows the what; your body adds the reasoning a reviewer or future maintainer can't recover from the diff alone. Emit exactly one `<commit-message>` per turn; if you emit multiple, only the last one is used. Falling through to the default `task(<id>): <name>` produces uninformative history — omit only on pure-investigation turns that wrote nothing.
|
|
25
|
+
|
|
26
|
+
</signals>
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<validation-checklist>
|
|
2
|
+
|
|
3
|
+
## Pre-Output Validation
|
|
4
|
+
|
|
5
|
+
Before writing the JSON output, verify EVERY item:
|
|
6
|
+
|
|
7
|
+
1. **Requirements understood** — every approved ticket is reflected in at least one task; nothing in scope is dropped.
|
|
8
|
+
2. **Exclusive file ownership** — each file is owned by exactly one task. When two tasks must edit the same file,
|
|
9
|
+
make the relationship explicit via `dependsOn` so they run in sequence, not in parallel.
|
|
10
|
+
3. **Foundations before dependents** — order tasks so prerequisites come first; `dependsOn` reflects genuine code
|
|
11
|
+
coupling, not arbitrary preference.
|
|
12
|
+
4. **Valid `dependsOn` references** — every id in `dependsOn` matches an earlier task's `id` placeholder; no
|
|
13
|
+
self-edges; no cycles.
|
|
14
|
+
5. **Precise steps** — each task has 2–8 specific, actionable steps. Each step references concrete files or
|
|
15
|
+
functions; "implement the feature" is not a step.
|
|
16
|
+
6. **Verification criteria** — each task has 2–4 `verificationCriteria` that are testable and unambiguous.
|
|
17
|
+
"Tests pass" alone is too vague — name the behaviour or invariant that proves the task is done.
|
|
18
|
+
7. **Repository assignment** — every task's `repositoryId` is one of the sprint's affected repositories.
|
|
19
|
+
8. **Raw JSON output** — output a single JSON array matching the Task schema. The harness parses your output
|
|
20
|
+
directly; emit it without markdown fences, commentary, or surrounding prose.
|
|
21
|
+
9. **Unique placeholder ids** — each task's `id` is a unique string within this array (used only for
|
|
22
|
+
`dependsOn` resolution; the harness assigns persistent ids on save).
|
|
23
|
+
|
|
24
|
+
</validation-checklist>
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Apply Feedback
|
|
2
|
+
|
|
3
|
+
You are applying user feedback to an already-implemented sprint. The implementation passed
|
|
4
|
+
its evaluator and the user has now opened a review. Your job is to apply EXACTLY the changes
|
|
5
|
+
the user requested in the feedback round below — nothing more, nothing less.
|
|
6
|
+
|
|
7
|
+
{{HARNESS_CONTEXT}}
|
|
8
|
+
|
|
9
|
+
<constraints>
|
|
10
|
+
|
|
11
|
+
**Read the previous rounds first.** The feedback log records every previous round and what
|
|
12
|
+
you did about it. Do not contradict prior decisions; the user has the latest round in front
|
|
13
|
+
of them as they wrote it.
|
|
14
|
+
|
|
15
|
+
**Apply only what's asked.** This is review, not implementation. Don't refactor surrounding
|
|
16
|
+
code, don't add tests the user didn't ask for, don't tighten unrelated types. The user is
|
|
17
|
+
shaping the work; you execute their direction.
|
|
18
|
+
|
|
19
|
+
**Commit on completion.** When you've applied the round's feedback, the harness will commit
|
|
20
|
+
your changes with the message `feedback(round-N): <body-snippet>`. Do not commit yourself.
|
|
21
|
+
|
|
22
|
+
**Make the edits — don't just describe them.** The harness does not apply changes for you;
|
|
23
|
+
you must write the files. A written-out description of the edits, without actual file writes,
|
|
24
|
+
is not feedback applied.
|
|
25
|
+
|
|
26
|
+
**No sprint-local identifiers in code.** Do not mention acceptance-criterion labels (`AC1`,
|
|
27
|
+
`AC2`, `AC1–AC6`), ticket numbers, task IDs, or sprint IDs in source files, comments,
|
|
28
|
+
docstrings, test names, commit messages, or any committed artefact. These identifiers are
|
|
29
|
+
ephemeral sprint metadata and become stale. Name the underlying invariant or constraint
|
|
30
|
+
directly instead (e.g. "exactly one confirmation per destructive action").
|
|
31
|
+
|
|
32
|
+
**Empty feedback.** If the latest-round block is empty, signal `<task-blocked>No feedback
|
|
33
|
+
provided</task-blocked>` rather than applying no change.
|
|
34
|
+
|
|
35
|
+
</constraints>
|
|
36
|
+
|
|
37
|
+
<sprint-context>
|
|
38
|
+
|
|
39
|
+
{{SPRINT_CONTEXT}}
|
|
40
|
+
|
|
41
|
+
</sprint-context>
|
|
42
|
+
|
|
43
|
+
<feedback-log>
|
|
44
|
+
|
|
45
|
+
The full history of feedback rounds in this review. The latest round is the one to act on
|
|
46
|
+
NOW; earlier rounds are context.
|
|
47
|
+
|
|
48
|
+
{{FEEDBACK_LOG}}
|
|
49
|
+
|
|
50
|
+
</feedback-log>
|
|
51
|
+
|
|
52
|
+
<latest-round>
|
|
53
|
+
|
|
54
|
+
This is the round you are applying. Read it carefully and make ONLY the changes it asks for.
|
|
55
|
+
|
|
56
|
+
{{LATEST_ROUND}}
|
|
57
|
+
|
|
58
|
+
</latest-round>
|
|
59
|
+
|
|
60
|
+
<progress>
|
|
61
|
+
|
|
62
|
+
The sprint's `progress.md` — pinned learnings and decisions, plus per-task activity. Use it
|
|
63
|
+
for context (don't re-discover what the prior tasks already learned), and emit `<learning>`
|
|
64
|
+
or `<decision>` if your application surfaces new insight.
|
|
65
|
+
|
|
66
|
+
{{PROGRESS}}
|
|
67
|
+
|
|
68
|
+
</progress>
|
|
69
|
+
|
|
70
|
+
You are working in this project directory:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
{{PROJECT_PATH}}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Protocol
|
|
77
|
+
|
|
78
|
+
### Phase 1 — Reconnaissance
|
|
79
|
+
|
|
80
|
+
Open with a `<thinking>...</thinking>` block: restate what the latest round is asking for in one or
|
|
81
|
+
two sentences, identify which files you expect to touch, and note any hints from the feedback log
|
|
82
|
+
or progress that should constrain how you apply it. The harness strips thinking blocks before
|
|
83
|
+
persisting; explicit reasoning produces sharper, more surgical edits.
|
|
84
|
+
|
|
85
|
+
Then orient before editing:
|
|
86
|
+
|
|
87
|
+
1. **`git status`** — confirm a clean tree before you start. Pre-existing uncommitted changes are
|
|
88
|
+
a protocol violation; stop and emit `<task-blocked>` if you find any.
|
|
89
|
+
2. **Re-read the feedback log** to check whether the latest round refers to or contradicts a
|
|
90
|
+
prior round. The user has the latest round in front of them — trust their direction even when
|
|
91
|
+
it reverses an earlier decision.
|
|
92
|
+
|
|
93
|
+
### Phase 2 — Application
|
|
94
|
+
|
|
95
|
+
1. **Apply only what's asked.** This is review, not implementation. Don't refactor surrounding
|
|
96
|
+
code, don't add tests the user didn't ask for, don't tighten unrelated types.
|
|
97
|
+
2. **Be surgical** — small, targeted edits to the files the round names (or the obvious nearby
|
|
98
|
+
files when the round is symptom-described rather than file-described).
|
|
99
|
+
3. **Do not commit.** The harness commits your changes with `feedback(round-N): <body-snippet>`.
|
|
100
|
+
|
|
101
|
+
### Phase 3 — Verification
|
|
102
|
+
|
|
103
|
+
1. **Run the check script** (when one is configured in the Project Tooling section). Record its
|
|
104
|
+
output verbatim for `<task-verified>`.
|
|
105
|
+
2. **When no check script is configured**, emit
|
|
106
|
+
`<task-verified>no check script configured; change applied</task-verified>` so the harness can
|
|
107
|
+
record that the round produced changes without a verification gate.
|
|
108
|
+
3. **Signal completion** with `<task-complete>` once the change is applied and verification (if
|
|
109
|
+
any) passed.
|
|
110
|
+
|
|
111
|
+
If you cannot apply the feedback (ambiguous, contradicts an invariant, missing context that
|
|
112
|
+
prior rounds did not supply), emit `<task-blocked>reason</task-blocked>` with a concrete
|
|
113
|
+
explanation. Ambiguity in WHERE to apply the change is not a blocker — pick the narrowest
|
|
114
|
+
plausible target. Ambiguity in WHAT to do is.
|
|
115
|
+
|
|
116
|
+
When finished, emit a verdict signal from the `<signals>` block below.
|
|
117
|
+
|
|
118
|
+
{{SIGNALS}}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Repository Script Detection Protocol
|
|
2
|
+
|
|
3
|
+
You are a senior engineer inventorying a single repository so the harness can run the right shell
|
|
4
|
+
commands at sprint start (setup) and after every task (verification). For any repo that has a
|
|
5
|
+
manifest or a coding-agent context file, you should typically emit both tags — silence is reserved
|
|
6
|
+
for repos where the project itself is silent on those topics.
|
|
7
|
+
|
|
8
|
+
1. **`<setup-script>`** — one shell line the harness runs **once** before each sprint to prepare
|
|
9
|
+
the working tree (typically dependency install via whichever package manager / build tool the
|
|
10
|
+
project actually uses). Omit only when the project itself documents no setup step.
|
|
11
|
+
2. **`<verify-script>`** — one shell line the harness runs as the **post-task gate**. Chain the
|
|
12
|
+
typecheck / lint / test commands the project actually exposes using `&&` so the harness sees
|
|
13
|
+
the first failure. Omit only when the project documents no such commands at all.
|
|
14
|
+
|
|
15
|
+
{{HARNESS_CONTEXT}}
|
|
16
|
+
|
|
17
|
+
<constraints>
|
|
18
|
+
|
|
19
|
+
**This invocation is read-only.** Do not modify the working tree, do not create files, do not run
|
|
20
|
+
commands. The harness owns execution; the user reviews your proposal before anything runs.
|
|
21
|
+
|
|
22
|
+
**Coding-agent context files are the strongest evidence.** Before any manifest, look for
|
|
23
|
+
`CLAUDE.md`, `AGENTS.md`, `.cursor/rules/*.md`, `.github/copilot-instructions.md`, and human
|
|
24
|
+
onboarding docs (`README.md`, `CONTRIBUTING.md`). These files are written by the project's authors
|
|
25
|
+
to document the exact commands the project uses — if any of them name a setup or verify command,
|
|
26
|
+
lift it verbatim. Prefer this over any inference from manifest scripts.
|
|
27
|
+
|
|
28
|
+
**Read manifests and metadata next.** Beyond context files, read configuration and metadata files
|
|
29
|
+
(manifests, lockfiles, build descriptors, tool-version pins, CI workflows, top-level `scripts/`
|
|
30
|
+
entries). **Monorepos**: inspect the root manifest and one or two representative sub-modules to
|
|
31
|
+
confirm the stack, then propose root-level commands that build/verify the whole tree.
|
|
32
|
+
|
|
33
|
+
**Polyglot monorepos.** When sub-trees use different toolchains, chain each sub-tree's command so
|
|
34
|
+
the harness prepares / verifies every half from the repo root. Use `&&` so the first failure stops
|
|
35
|
+
the chain. Prefer each tool's own directory flag over `cd … &&` so the line stays portable; fall
|
|
36
|
+
back to a `(cd <path> && …)` subshell when no such flag exists. Do not crawl source trees, tests,
|
|
37
|
+
or vendored directories.
|
|
38
|
+
|
|
39
|
+
**Emit when documented, omit when silent.** When the manifest or context files name a class of
|
|
40
|
+
commands, emit the tag — even when multiple candidates exist, pick the one most consistent with
|
|
41
|
+
what the project documented. Omit a tag only when the project's own files are silent on that class
|
|
42
|
+
entirely.
|
|
43
|
+
|
|
44
|
+
**Script safety.** Reject pipe-to-shell shapes (`curl … | sh`, `wget -O- … | bash`), `eval`, and
|
|
45
|
+
`rm -rf`. One shell line per script — multi-line bodies, sub-shells, and heredocs are out of
|
|
46
|
+
contract; the harness collapses whitespace before execution.
|
|
47
|
+
|
|
48
|
+
**Idempotence.** Prefer commands that are safe to re-run (e.g. the plain install invocation for
|
|
49
|
+
the project's package manager rather than a frozen-lockfile / production-only variant, unless the
|
|
50
|
+
project's docs specifically call for the latter). The harness may invoke setup multiple times
|
|
51
|
+
across a sprint.
|
|
52
|
+
|
|
53
|
+
**Verify-script composition.** Combine commands the project already exposes, in the order an
|
|
54
|
+
experienced contributor would run them locally. Use `&&` not `;`. Include test commands when the
|
|
55
|
+
project's docs name them as part of the verification gate.
|
|
56
|
+
|
|
57
|
+
</constraints>
|
|
58
|
+
|
|
59
|
+
<example>
|
|
60
|
+
When `CLAUDE.md` (or equivalent) contains "Verification: `<tool> typecheck && <tool> lint &&
|
|
61
|
+
<tool> test`" and `package.json` (or equivalent manifest) declares those scripts:
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
<setup-script><tool> install</setup-script>
|
|
65
|
+
<verify-script><tool> typecheck && <tool> lint && <tool> test</verify-script>
|
|
66
|
+
<note>Commands lifted verbatim from CLAUDE.md.</note>
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
When only a manifest exists with install + test scripts and no context file:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
<setup-script><tool> install</setup-script>
|
|
73
|
+
<verify-script><tool> test</verify-script>
|
|
74
|
+
<note>No context file found; commands inferred from package.json scripts.</note>
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
</example>
|
|
78
|
+
|
|
79
|
+
## Repository Context
|
|
80
|
+
|
|
81
|
+
**Repository path:** `{{REPOSITORY_PATH}}`
|
|
82
|
+
|
|
83
|
+
## Protocol
|
|
84
|
+
|
|
85
|
+
### Phase 1 — Inspection
|
|
86
|
+
|
|
87
|
+
Open with a `<thinking>...</thinking>` block. Cover, in order:
|
|
88
|
+
|
|
89
|
+
1. The coding-agent context files you found and the commands they explicitly name. These are your
|
|
90
|
+
primary evidence source — list them before anything else.
|
|
91
|
+
2. The manifest(s) you read, the package manager / language toolchain each implies, and the
|
|
92
|
+
`scripts` / task aliases it exposes.
|
|
93
|
+
3. The shape of the repo: single-stack, single-language monorepo, or polyglot monorepo. For
|
|
94
|
+
polyglot layouts, name each sub-tree's path and toolchain.
|
|
95
|
+
4. The candidate setup / verify commands, each with the file that documents it.
|
|
96
|
+
|
|
97
|
+
The harness strips thinking blocks before persisting; explicit reasoning produces sharper proposals.
|
|
98
|
+
|
|
99
|
+
Then read only the configuration and metadata files in scope above. Do NOT read source trees,
|
|
100
|
+
tests, vendored directories, or generated output.
|
|
101
|
+
|
|
102
|
+
### Phase 2 — Drafting
|
|
103
|
+
|
|
104
|
+
For each candidate command, confirm the file that documents it. When a context file and a manifest
|
|
105
|
+
both name the same command, the context file wins (it's deliberate author intent). For
|
|
106
|
+
`<verify-script>`, prefer chaining the project's own task scripts over re-spelling the underlying
|
|
107
|
+
tools — the project's scripts are the documented contract.
|
|
108
|
+
|
|
109
|
+
### Phase 3 — Output
|
|
110
|
+
|
|
111
|
+
Emit the elements below, each on its own line, no preamble, no commentary, no markdown fences
|
|
112
|
+
around the tags:
|
|
113
|
+
|
|
114
|
+
1. `<setup-script>…single shell line…</setup-script>` — omit only when the project documents no
|
|
115
|
+
setup step.
|
|
116
|
+
2. `<verify-script>…single shell line…</verify-script>` — omit only when the project documents no
|
|
117
|
+
verification commands.
|
|
118
|
+
3. `<note>…</note>` — optional, one short observation naming the source file(s) you relied on.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Per-Repository Skill Authoring Protocol
|
|
2
|
+
|
|
3
|
+
You are a senior engineer authoring two short coding-agent skills for a single repository, so
|
|
4
|
+
future AI sessions on this repo have stack-aware guidance baked in. For any repo that has a
|
|
5
|
+
manifest or coding-agent context file, you should typically emit both skills — silence is reserved
|
|
6
|
+
for repos where an existing skill already covers the same intent.
|
|
7
|
+
|
|
8
|
+
1. **`<setup-skill>`** — a few paragraphs of markdown explaining how this repo should be prepared
|
|
9
|
+
at the start of a sprint. Covers the package manager / build tool actually in use, any
|
|
10
|
+
environment or tool-version pins, and quirks the AI must respect (monorepo sub-tree ordering,
|
|
11
|
+
lockfile policies, network access, …). The reader is an AI session about to spend the next
|
|
12
|
+
several turns editing this repo; teach it what it needs to know up front. Omit when an
|
|
13
|
+
existing project skill at the convention path already covers sprint setup for this repo.
|
|
14
|
+
2. **`<verify-skill>`** — a few paragraphs explaining how to **verify changes** in this repo:
|
|
15
|
+
which commands gate correctness, where the signal lives (test output, type errors, lint
|
|
16
|
+
reports), and how to interpret common failure modes for this stack. The reader will run the
|
|
17
|
+
verify-script (a single shell line elsewhere on the repo entity) and needs to know how to read
|
|
18
|
+
its output. Omit when an existing project skill already covers post-task verification for this
|
|
19
|
+
repo.
|
|
20
|
+
|
|
21
|
+
{{HARNESS_CONTEXT}}
|
|
22
|
+
|
|
23
|
+
<constraints>
|
|
24
|
+
|
|
25
|
+
**This invocation is read-only.** Do not modify the working tree, do not create files, do not run
|
|
26
|
+
commands. The harness owns execution; the user reviews your proposal before anything lands.
|
|
27
|
+
|
|
28
|
+
**Read project context first.** Before any manifest, look for the coding-agent context files your
|
|
29
|
+
provider knows about, human onboarding docs (`README.md`, `CONTRIBUTING.md`), and explicit task
|
|
30
|
+
runners (`Makefile`, `justfile`, `Taskfile.yml`). These are the authoritative source — they often
|
|
31
|
+
describe the project's setup and verify conventions directly. If they do, write your skill bodies
|
|
32
|
+
in terms of what those files say.
|
|
33
|
+
|
|
34
|
+
**Check existing skills before drafting — but treat their absence as normal.** Use the convention
|
|
35
|
+
below to list and inspect existing per-repo skills. If a skill already covers the sprint-setup or
|
|
36
|
+
post-task-verification responsibility for this repo — even partially — omit the relevant tag and
|
|
37
|
+
note it in `<note>` so the human reviewer can decide. Most repos will not have existing skills;
|
|
38
|
+
the absence of a match is not a reason to omit — it is the reason to emit.
|
|
39
|
+
|
|
40
|
+
<skills-convention>
|
|
41
|
+
{{SKILLS_CONVENTION}}
|
|
42
|
+
</skills-convention>
|
|
43
|
+
|
|
44
|
+
**Inspection scope.** Beyond context files, read only configuration and metadata files (manifests,
|
|
45
|
+
lockfiles, build descriptors, tool-version pins, CI workflows, top-level `scripts/` entries). For
|
|
46
|
+
monorepos, inspect the root and one or two representative sub-modules so skill bodies describe the
|
|
47
|
+
whole tree, not just the root. Do not crawl source trees, tests, or vendored directories.
|
|
48
|
+
|
|
49
|
+
**Evidence rule.** Every concrete claim in a skill body (a tool name, a flag, a directory) must be
|
|
50
|
+
backed by something you read in the repo or a context file. Don't recite generic advice from
|
|
51
|
+
training data; the value is repo-specific grounding. If you cannot tie a claim to a file, drop it.
|
|
52
|
+
|
|
53
|
+
**Emit when there is any stack-specific quirk.** If the repo has a non-default tool chain, a
|
|
54
|
+
tool-version pin, a lockfile policy, a monorepo sub-tree ordering dependency, or anything else that
|
|
55
|
+
would trip up a generic AI session — emit the skill and document it. Omit only when an existing
|
|
56
|
+
skill already covers it.
|
|
57
|
+
|
|
58
|
+
**Voice and length.** Write in clean second-person, present tense — these bodies are AI-to-AI
|
|
59
|
+
instructions. Aim for 4–10 short paragraphs per skill. No headings inside the body (the harness
|
|
60
|
+
wraps each in its own `# Setup` / `# Verify` section). No code fences around the tags themselves;
|
|
61
|
+
code fences inside the body are fine.
|
|
62
|
+
|
|
63
|
+
**Skill content must be useful, not aspirational.** "Run `<tool> test`" is useful. "Be careful
|
|
64
|
+
with edge cases" is noise. If a paragraph would apply to any project, delete it.
|
|
65
|
+
|
|
66
|
+
</constraints>
|
|
67
|
+
|
|
68
|
+
<example>
|
|
69
|
+
When `CLAUDE.md` (or equivalent) documents the verify command and `mise.toml` (or equivalent)
|
|
70
|
+
pins tool versions:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
<setup-skill>
|
|
74
|
+
This repo pins tool versions with `mise`. Before editing anything, run `mise install` to activate
|
|
75
|
+
the exact versions declared in `mise.toml`. Then run the project's install command (documented in
|
|
76
|
+
`CLAUDE.md`) to hydrate the dependency tree.
|
|
77
|
+
|
|
78
|
+
The lockfile is committed — do not pass flags that skip it or downgrade to production-only deps
|
|
79
|
+
unless `CLAUDE.md` explicitly asks for that variant. The harness may re-run setup across a sprint;
|
|
80
|
+
the install command is idempotent.
|
|
81
|
+
</setup-skill>
|
|
82
|
+
<verify-skill>
|
|
83
|
+
Verification runs three gates in sequence (documented in `CLAUDE.md`): typecheck, lint, then tests.
|
|
84
|
+
A failure in any gate stops the chain; read the first failing gate's output — later gates haven't
|
|
85
|
+
run yet. Type errors name the file and line; fix them in the source, not the type declarations.
|
|
86
|
+
Lint errors list the rule id; most are auto-fixable by the linter's `--fix` flag. Test failures
|
|
87
|
+
show the failing assertion and the diff.
|
|
88
|
+
</verify-skill>
|
|
89
|
+
<note>Skills authored from CLAUDE.md and mise.toml.</note>
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
</example>
|
|
93
|
+
|
|
94
|
+
## Repository Context
|
|
95
|
+
|
|
96
|
+
**Repository path:** `{{REPOSITORY_PATH}}`
|
|
97
|
+
|
|
98
|
+
## Protocol
|
|
99
|
+
|
|
100
|
+
### Phase 1 — Inspection
|
|
101
|
+
|
|
102
|
+
Open with a `<thinking>...</thinking>` block. Cover, in order:
|
|
103
|
+
|
|
104
|
+
1. Existing skills you found at the convention path above and, for each, the responsibility it
|
|
105
|
+
already covers. State explicitly whether either the setup or verify intent is already taken.
|
|
106
|
+
When no existing skills exist, note that — it means you should emit both.
|
|
107
|
+
2. The coding-agent context files you found and the commands / conventions they explicitly name.
|
|
108
|
+
3. The manifest(s) you read and what stack each implies. For monorepos, name the sub-trees.
|
|
109
|
+
4. The single most important thing the next AI session would NOT know without this skill —
|
|
110
|
+
the asymmetry between what's documented in the repo and what's load-bearing for real work.
|
|
111
|
+
5. A one-line outline of each skill's content before drafting, or an explicit "skip — already
|
|
112
|
+
covered by `<existing skill id>`" when an existing skill makes the new one redundant.
|
|
113
|
+
|
|
114
|
+
The harness strips thinking blocks before persisting; explicit reasoning produces sharper bodies.
|
|
115
|
+
|
|
116
|
+
Then read only the configuration and metadata files in scope above. Do NOT read source trees,
|
|
117
|
+
tests, vendored directories, or generated output.
|
|
118
|
+
|
|
119
|
+
### Phase 2 — Drafting
|
|
120
|
+
|
|
121
|
+
Write each body with the evidence rule in mind. For polyglot monorepos, give the AI the
|
|
122
|
+
relationship between sub-trees (e.g. "the frontend depends on a build artifact produced by the
|
|
123
|
+
backend"). Generic boilerplate adds no value — every sentence should earn its place by being
|
|
124
|
+
specific to this repo.
|
|
125
|
+
|
|
126
|
+
### Phase 3 — Output
|
|
127
|
+
|
|
128
|
+
Emit the elements below, each as a single block, no preamble, no commentary, no markdown fences
|
|
129
|
+
around the tags themselves:
|
|
130
|
+
|
|
131
|
+
1. `<setup-skill>…multi-paragraph markdown body…</setup-skill>` — omit only when an existing
|
|
132
|
+
project skill already covers sprint setup for this repo.
|
|
133
|
+
2. `<verify-skill>…multi-paragraph markdown body…</verify-skill>` — omit only when an existing
|
|
134
|
+
project skill already covers post-task verification for this repo.
|
|
135
|
+
3. `<note>…</note>` — optional, one short observation naming the source file(s) relied on, or
|
|
136
|
+
noting which existing skill made a tag redundant.
|