executant 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/index.js +3 -3
- package/dist/prompts/plan-decompose.txt +55 -8
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -129,3 +129,13 @@ executant --step <name|n> wf.yaml # run one step by name or index
|
|
|
129
129
|
executant --from-step <n> wf.yaml # resume from step n
|
|
130
130
|
executant update # upgrade to latest version
|
|
131
131
|
```
|
|
132
|
+
|
|
133
|
+
## Development
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
npm test # run tests
|
|
137
|
+
npm run eval evals/plan-decompose.eval.yaml # score prompt templates
|
|
138
|
+
npm run eval -- --refine evals/plan-decompose.eval.yaml # refine until all cases pass
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The eval system tests and iteratively refines the prompt templates in `src/prompts/`. Eval definitions live in `evals/*.eval.yaml`; see `AGENTS.md` for the full format.
|
package/dist/index.js
CHANGED
|
@@ -1322,14 +1322,14 @@ function normalizeWorkflow(workflow2) {
|
|
|
1322
1322
|
const isNumeric = isNumericSequence(arr);
|
|
1323
1323
|
const labeledN = !isNumeric ? isLabeledSequence(arr) : null;
|
|
1324
1324
|
if (isNumeric || labeledN !== null) {
|
|
1325
|
-
const { forEach, ...rest } = step;
|
|
1325
|
+
const { forEach: _forEach, ...rest } = step;
|
|
1326
1326
|
return { ...rest, repeat: isNumeric ? arr.length : labeledN };
|
|
1327
1327
|
}
|
|
1328
1328
|
}
|
|
1329
1329
|
if (typeof step.forEach === "string") {
|
|
1330
1330
|
const n = parseSeqCommand(step.forEach);
|
|
1331
1331
|
if (n !== null) {
|
|
1332
|
-
const { forEach, ...rest } = step;
|
|
1332
|
+
const { forEach: _forEach, ...rest } = step;
|
|
1333
1333
|
return { ...rest, repeat: n };
|
|
1334
1334
|
}
|
|
1335
1335
|
}
|
|
@@ -1366,7 +1366,7 @@ function collapseSequentialSteps(steps) {
|
|
|
1366
1366
|
i++;
|
|
1367
1367
|
continue;
|
|
1368
1368
|
}
|
|
1369
|
-
const { name, ...rest } = step;
|
|
1369
|
+
const { name: _name, ...rest } = step;
|
|
1370
1370
|
result.push({ ...rest, name: `${prefix}_{{item}}`, repeat: n });
|
|
1371
1371
|
i += n;
|
|
1372
1372
|
}
|
|
@@ -93,15 +93,48 @@ or commands.
|
|
|
93
93
|
`vars` MUST appear before `steps` in the JSON output.
|
|
94
94
|
|
|
95
95
|
**Pre-Output Self-Review — Vars (MANDATORY):**
|
|
96
|
-
Before finalising your JSON, scan every `prompt` and `command` field you wrote.
|
|
97
|
-
For each field,
|
|
98
|
-
|
|
96
|
+
Before finalising your JSON, scan every `prompt` and `command` field you wrote — every sentence, every numbered instruction, every parenthetical.
|
|
97
|
+
For each field, identify ALL occurrences of paths, including:
|
|
98
|
+
- Direct path references (e.g., `src/middleware/rate-limit.ts`)
|
|
99
|
+
- Paths mentioned in narrative context (e.g., "match the style of tests in `src/tests/`")
|
|
100
|
+
- Relative import paths used as examples (e.g., `../models/User`, `./utils`)
|
|
101
|
+
- Any string segment containing `/` that represents a file or directory location
|
|
102
|
+
|
|
103
|
+
For EVERY path found in ANY context, extract it to `vars` and replace ALL occurrences with `{{var_name}}`. There are no exceptions — even paths used only as style references or examples must use `{{var_name}}`.
|
|
104
|
+
|
|
105
|
+
**Pay special attention to `command` fields in script steps.** Short package/directory paths like `packages/api` or `packages/web` appearing in commands are paths and MUST be in `vars`.
|
|
106
|
+
|
|
107
|
+
❌ WRONG — hardcoded directory path in a command:
|
|
108
|
+
```json
|
|
109
|
+
{"name": "test_api", "type": "script", "command": "cd packages/api && npm test"}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
✅ CORRECT — directory path extracted to vars:
|
|
113
|
+
```json
|
|
114
|
+
{"name": "test_api", "type": "script", "command": "cd {{api_package}} && npm test"}
|
|
115
|
+
```
|
|
116
|
+
(with `"api_package": "packages/api"` declared in `vars`)
|
|
99
117
|
|
|
100
118
|
**Pre-Output Self-Review — Repeat (MANDATORY):**
|
|
101
119
|
Scan every `forEach` field you wrote.
|
|
102
120
|
Ask: "Is this array just sequential numbers like `["1","2","3"]` with no meaningful items?"
|
|
103
121
|
If yes, replace the entire `forEach` with `repeat: N` where N is the count. Sequential-number forEach arrays are ALWAYS wrong — they are a misuse of forEach and must be converted to `repeat: N`.
|
|
104
122
|
|
|
123
|
+
**Pre-Output Self-Review — Verification (MANDATORY):**
|
|
124
|
+
Before finalising your JSON, check your last steps.
|
|
125
|
+
Ask: "Do my final steps include `"type": "script"` steps that run the lint, test, and/or build commands from the research document's Verification Plan?"
|
|
126
|
+
If no, add them now. A `llm_as_judge: true` prompt step does NOT count as a verification step and does NOT replace them.
|
|
127
|
+
Verification steps MUST be `"type": "script"` — not prompt steps.
|
|
128
|
+
|
|
129
|
+
Example of correct verification steps at the end of `steps`:
|
|
130
|
+
```json
|
|
131
|
+
{"name": "lint", "type": "script", "command": "npm run lint"},
|
|
132
|
+
{"name": "test", "type": "script", "command": "npm test"},
|
|
133
|
+
{"name": "typecheck", "type": "script", "command": "npm run build"}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Use the EXACT commands from the research document. Only skip a category if the research document explicitly says "none found" for it.
|
|
137
|
+
|
|
105
138
|
## When to Use Each Step Type
|
|
106
139
|
|
|
107
140
|
**Use `prompt` steps (AI-assisted) for:**
|
|
@@ -133,7 +166,7 @@ If yes, replace the entire `forEach` with `repeat: N` where N is the count. Sequ
|
|
|
133
166
|
|
|
134
167
|
## Atomicity (MANDATORY)
|
|
135
168
|
|
|
136
|
-
Each step must do ONE focused thing. If a step description contains "and" — split it.
|
|
169
|
+
Each step must do ONE focused thing. If a step description contains "and" connecting two distinct actions — split it.
|
|
137
170
|
|
|
138
171
|
❌ WRONG — too many concerns in one step:
|
|
139
172
|
```json
|
|
@@ -148,6 +181,18 @@ Each step must do ONE focused thing. If a step description contains "and" — sp
|
|
|
148
181
|
]
|
|
149
182
|
```
|
|
150
183
|
|
|
184
|
+
This rule also applies within numbered sub-instructions inside a prompt. Each numbered instruction must describe a single action. If a numbered instruction uses "and" to connect two distinct actions, split it into two separate numbered instructions.
|
|
185
|
+
|
|
186
|
+
❌ WRONG — "and" connects distinct actions inside a numbered instruction:
|
|
187
|
+
```
|
|
188
|
+
"1. Create and export the configured limiter as the default export"
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
✅ CORRECT — each numbered instruction is a single action:
|
|
192
|
+
```
|
|
193
|
+
"1. Create the configured limiter with the required options\n2. Export the limiter as the default export"
|
|
194
|
+
```
|
|
195
|
+
|
|
151
196
|
Prefer 8 small, focused steps over 3 large, vague ones.
|
|
152
197
|
|
|
153
198
|
## Verification Enforcement (MANDATORY)
|
|
@@ -166,6 +211,8 @@ Required verification step order (include each that the research document confir
|
|
|
166
211
|
Use the EXACT commands from the "Verification Plan" section of the research document.
|
|
167
212
|
Do NOT invent commands. If the research document says "none found" for a category, skip it.
|
|
168
213
|
|
|
214
|
+
**These steps MUST be `"type": "script"` steps.** A prompt step with `llm_as_judge: true` is not a verification step and does not satisfy this requirement.
|
|
215
|
+
|
|
169
216
|
If the project has no verified lint/test/build commands, include at least one visual check
|
|
170
217
|
prompt step as the final step (with `llm_as_judge: true`) to review the changes.
|
|
171
218
|
|
|
@@ -177,12 +224,12 @@ Generate a JSON object that:
|
|
|
177
224
|
3. Names steps with descriptive snake_case identifiers (unique within the task)
|
|
178
225
|
4. Structures prompts with numbered instructions for clarity (use \n for newlines)
|
|
179
226
|
5. Decomposes to the smallest logical unit — one concern per step
|
|
180
|
-
6. Ends with ALL verification steps confirmed in the research document
|
|
227
|
+
6. Ends with ALL verification steps confirmed in the research document as `"type": "script"` steps
|
|
181
228
|
7. Adds `llm_as_judge: true` to quality-critical implementation and writing steps
|
|
182
229
|
8. Adds `self_healing: true` to script steps where auto-recovery is safe (opt-in, not default)
|
|
183
230
|
9. Uses `continue_on_error: true` for non-critical script steps
|
|
184
231
|
10. Uses `output:` + `context:` to pass script step results to downstream prompt steps
|
|
185
|
-
11. Declares ALL file paths in `vars` — no hardcoded paths in prompts or commands
|
|
232
|
+
11. Declares ALL file paths in `vars` — no hardcoded paths in prompts or commands, including paths in narrative or example context
|
|
186
233
|
12. Places `vars` before `steps` in the JSON output
|
|
187
234
|
|
|
188
235
|
## Critical Rules
|
|
@@ -192,8 +239,8 @@ Generate a JSON object that:
|
|
|
192
239
|
- Step names MUST be unique within the task
|
|
193
240
|
- Prompt steps are default — only specify `"type": "script"` for script steps
|
|
194
241
|
- `vars` MUST appear before `steps` in the output JSON
|
|
195
|
-
- The final steps MUST be the verification steps (lint, test, build) from the research document
|
|
196
|
-
- NEVER hardcode file paths in `prompt` or `command` fields
|
|
242
|
+
- The final steps MUST be the verification steps (lint, test, build) from the research document, each as `"type": "script"`
|
|
243
|
+
- NEVER hardcode file paths in `prompt` or `command` fields — this includes paths mentioned as style references, examples, or relative imports
|
|
197
244
|
|
|
198
245
|
## Output Format
|
|
199
246
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "executant",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Harness for YAML-defined workflows that enables stepping through Claude sessions and bash commands",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"dev": "tsx src/index.ts",
|
|
21
21
|
"start": "node dist/index.js",
|
|
22
22
|
"test": "env -u NODE_TEST_CONTEXT node --import tsx/esm --test src/tests/*.test.ts",
|
|
23
|
+
"eval": "tsx src/eval/index.ts",
|
|
23
24
|
"lint": "eslint src",
|
|
24
25
|
"knip": "knip"
|
|
25
26
|
},
|