@flumecode/runner 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp-stdio.js CHANGED
@@ -57,6 +57,14 @@ var stepSchema = z.object({
57
57
  "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
58
58
  )
59
59
  });
60
+ var requirementSchema = z.object({
61
+ requirement: z.string().min(1).describe(
62
+ "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
63
+ ),
64
+ acceptanceCriteria: z.array(z.string().min(1)).min(1).describe(
65
+ "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
66
+ )
67
+ });
60
68
  var planInputSchema = {
61
69
  title: z.string().min(1).max(120).describe(
62
70
  "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -70,13 +78,10 @@ var planInputSchema = {
70
78
  "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
71
79
  ),
72
80
  assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
73
- requirements: z.array(z.string().min(1)).min(1).describe(
74
- "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
81
+ requirements: z.array(requirementSchema).min(1).describe(
82
+ "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
75
83
  ),
76
84
  steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
77
- acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
78
- "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
79
- ),
80
85
  risks: z.array(z.string()).describe("Anything that could change the approach."),
81
86
  outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
82
87
  };
@@ -91,7 +96,21 @@ function requireRootCauseForFix(schema) {
91
96
  }
92
97
  });
93
98
  }
94
- var planSchema = requireRootCauseForFix(z.object(planInputSchema));
99
+ function requireAtLeastTwoCriteria(schema) {
100
+ return schema.superRefine((plan, ctx) => {
101
+ const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
102
+ if (total < 2) {
103
+ ctx.addIssue({
104
+ code: z.ZodIssueCode.custom,
105
+ path: ["requirements"],
106
+ message: "At least 2 acceptance criteria total across all requirements are required."
107
+ });
108
+ }
109
+ });
110
+ }
111
+ var planSchema = requireAtLeastTwoCriteria(
112
+ requireRootCauseForFix(z.object(planInputSchema))
113
+ );
95
114
  function renderPlan(plan) {
96
115
  const lines = [];
97
116
  lines.push(`# ${plan.title}`);
@@ -118,8 +137,8 @@ function renderPlan(plan) {
118
137
  }
119
138
  lines.push("");
120
139
  lines.push("## Requirements");
121
- for (const requirement of plan.requirements) {
122
- lines.push(`- ${requirement}`);
140
+ for (const req of plan.requirements) {
141
+ lines.push(`- ${req.requirement}`);
123
142
  }
124
143
  lines.push("");
125
144
  lines.push("## Steps");
@@ -144,8 +163,11 @@ function renderPlan(plan) {
144
163
  }
145
164
  lines.push("");
146
165
  lines.push("## Acceptance criteria");
147
- for (const criterion of plan.acceptanceCriteria) {
148
- lines.push(`- [ ] ${criterion}`);
166
+ for (const req of plan.requirements) {
167
+ lines.push(`### ${req.requirement}`);
168
+ for (const criterion of req.acceptanceCriteria) {
169
+ lines.push(`- [ ] ${criterion}`);
170
+ }
149
171
  }
150
172
  if (plan.risks.length > 0) {
151
173
  lines.push("");
@@ -166,7 +188,7 @@ function renderPlan(plan) {
166
188
  return lines.join("\n");
167
189
  }
168
190
  var submitPlanInputSchema = {
169
- plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
191
+ plans: z.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z.object(planInputSchema)))).min(1).refine(
170
192
  (arr) => {
171
193
  const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
172
194
  return new Set(titles).size === titles.length;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.22.0",
3
+ "version": "0.23.1",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -28,11 +28,17 @@
28
28
  "dependencies": {
29
29
  "@anthropic-ai/claude-agent-sdk": "^0.3.0",
30
30
  "@modelcontextprotocol/sdk": "^1",
31
+ "ink": "^5.2.1",
32
+ "ink-text-input": "^6.0.0",
33
+ "playwright": "^1.52.0",
34
+ "react": "^18.3.1",
31
35
  "zod": "4.4.3"
32
36
  },
33
37
  "devDependencies": {
34
38
  "@types/node": "^22.10.5",
39
+ "@types/react": "^19.0.0",
35
40
  "esbuild": "^0.24.2",
41
+ "ink-testing-library": "^4.0.0",
36
42
  "tsx": "^4.19.2",
37
43
  "typescript": "^5.7.3"
38
44
  }
@@ -184,6 +184,7 @@ The report subagent calls `submit_report` with these fields:
184
184
  verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
185
185
  optionally explains it). Never include a hunk that isn't in the actual diff. Cite
186
186
  the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
187
+ - `requirement` — the verbatim text of the `### <requirement>` heading this criterion appeared under in the plan (from the `## Acceptance criteria` section). Set this for every criterion from a structured plan. Omit only for resolve runs (no plan) or legacy plans without requirement headings.
187
188
  - **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
188
189
 
189
190
  ## Always
@@ -0,0 +1,127 @@
1
+ ---
2
+ name: preview-ui
3
+ description: >-
4
+ Author an ephemeral fake-data showcase page for changed UI components so the
5
+ runner can screenshot them in a headless browser. Use after a UI-touching
6
+ implementation, when the runner provides a tmpRoute directory. Writes the
7
+ showcase files there and records the URL path in a sentinel file. Never
8
+ commits, pushes, or modifies application code outside the tmpRoute directory.
9
+ ---
10
+
11
+ # preview-ui
12
+
13
+ You author a **temporary showcase page** that imports the recently-changed UI
14
+ components and fills them with realistic fake data, so the runner can start the
15
+ repo's dev server and take headless screenshots.
16
+
17
+ You write only inside the `<tmpRoute>` directory the runner hands you. You never
18
+ modify production code, commit, or push.
19
+
20
+ ## What you receive
21
+
22
+ The runner injects these into your prompt:
23
+
24
+ - **`<tmpRoute>`** — an absolute path to an empty temp directory inside the repo
25
+ (already git-excluded). Write your showcase files here.
26
+ - **Committed UI files** — the list of files changed by the implementation, so
27
+ you know which components to showcase.
28
+
29
+ ## Step 1 — Detect the framework
30
+
31
+ Read the repo's `package.json` (and `package.json` files in workspaces, if any)
32
+ to determine the framework:
33
+
34
+ | Framework | Key `dependencies` / `devDependencies` |
35
+ | ------------- | --------------------------------------- |
36
+ | Next.js | `next` |
37
+ | Vite + React | `vite` + `react` |
38
+ | Vite + Vue | `vite` + `vue` |
39
+ | Vite + Svelte | `vite` + `svelte` (no SvelteKit) |
40
+ | SvelteKit | `@sveltejs/kit` |
41
+ | Nuxt | `nuxt` |
42
+ | Astro | `astro` |
43
+ | CRA | `react-scripts` |
44
+ | Remix | `@remix-run/react` or `@remix-run/node` |
45
+
46
+ If the framework is not recognisable or the file list contains no importable
47
+ components, write a single plain `<tmpRoute>/index.html` file with a message
48
+ like "No supported framework detected" and write `/__flumecode_preview` to
49
+ `<tmpRoute>/.showcase-path`. Then stop — do not create a route file.
50
+
51
+ ## Step 2 — Determine the entry path
52
+
53
+ Choose a URL path unlikely to collide with real routes: `/__flumecode_preview`.
54
+ Write that string (exactly, no trailing newline) to `<tmpRoute>/.showcase-path`.
55
+
56
+ ## Step 3 — Author the showcase entry file
57
+
58
+ Create a single route/page file at the correct location under `<tmpRoute>` for
59
+ the detected framework:
60
+
61
+ | Framework | File to create |
62
+ | ---------------------- | ----------------------------------------------------- |
63
+ | Next.js (App Router) | `<tmpRoute>/page.tsx` (if the project uses `app/`) |
64
+ | Next.js (Pages Router) | `<tmpRoute>/index.tsx` (if the project uses `pages/`) |
65
+ | Vite + React | `<tmpRoute>/App.tsx` (or `.jsx`) |
66
+ | Vite + Vue | `<tmpRoute>/App.vue` |
67
+ | Vite + Svelte | `<tmpRoute>/App.svelte` |
68
+ | SvelteKit | `<tmpRoute>/+page.svelte` |
69
+ | Nuxt | `<tmpRoute>/index.vue` |
70
+ | Astro | `<tmpRoute>/index.astro` |
71
+ | CRA | `<tmpRoute>/index.tsx` |
72
+ | Remix | `<tmpRoute>/route.tsx` |
73
+
74
+ **Next.js App Router note:** The runner mounts the showcase at
75
+ `app/__flumecode_preview/page.tsx` by symlinking or copying `<tmpRoute>` to
76
+ `app/__flumecode_preview/`. You only need to produce `<tmpRoute>/page.tsx` — the
77
+ runner handles the mount.
78
+
79
+ ### Content rules
80
+
81
+ - Import the changed components using their real relative paths (calculate the
82
+ path from `<tmpRoute>` to the component's source location).
83
+ - Fill every prop with **realistic fake data** — use hardcoded literals, not
84
+ calls to external APIs or databases.
85
+ - If a component requires a provider (React context, Vuex store, Pinia, etc.),
86
+ wrap it with a minimal in-file stub provider — do not import the real app
87
+ store or data layer.
88
+ - If a component calls a route handler or fetch endpoint, stub the relevant
89
+ function or hook at the top of the file with a mock that returns realistic
90
+ hard-coded data. Do NOT import MSW or any test library; keep stubs as plain
91
+ module-level overrides.
92
+ - Export the showcase page as the default export (except Astro/SvelteKit, which
93
+ don't require a default export).
94
+ - Keep the file short: one `export default` function that renders all changed
95
+ components in a flex column, with a small amount of padding.
96
+
97
+ ### What NOT to do
98
+
99
+ - Do not call `fetch`, `axios`, `prisma`, `supabase`, or any I/O in the
100
+ showcase.
101
+ - Do not import from test utilities, MSW, Storybook, or Cypress.
102
+ - Do not add new npm dependencies.
103
+ - Do not edit any file outside `<tmpRoute>`.
104
+ - Do not commit or push.
105
+
106
+ ## Step 4 — Verify your output
107
+
108
+ Before finishing, confirm:
109
+
110
+ 1. `<tmpRoute>/.showcase-path` exists and contains the URL path string.
111
+ 2. The showcase entry file exists at the expected path under `<tmpRoute>`.
112
+ 3. The file imports only modules that already exist in the repo (no invented
113
+ paths).
114
+
115
+ If you cannot produce a valid showcase (e.g. the components have complex
116
+ dependencies you cannot stub), write only `<tmpRoute>/.showcase-path` with the
117
+ URL string and leave the entry file absent — the runner will detect the missing
118
+ file and skip the screenshot step gracefully.
119
+
120
+ ## Always
121
+
122
+ - Write the URL path to `<tmpRoute>/.showcase-path` first, before any other
123
+ file — the runner reads it even if something else fails.
124
+ - These files are ephemeral and git-excluded. They will be deleted by the runner
125
+ after screenshots are taken. Never commit or push them.
126
+ - Your final reply should be one short sentence confirming what you created (e.g.
127
+ "Wrote `<tmpRoute>/page.tsx` showcasing `Button` and `Card` with fake data.").
@@ -72,18 +72,11 @@ Field-by-field guidance:
72
72
  - **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
73
73
  - **`assumptions`** — anything you decided during investigation (including
74
74
  unanswered defaults from Phase 1).
75
- - **`requirements`** — **required; at least 1 item.** Plain-language statements of what this change must accomplish and why, written so a non-technical reader can follow them. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof. At least 1 item required.
76
- - **`steps`** — an ordered list. For each step provide:
77
- - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
78
- - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
79
- - **`pseudoCode`** an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
80
- - **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
81
- be a concrete, deterministically-checkable condition that a third party can verify
82
- without knowing the author's intent. Write each as a trigger/precondition and the
83
- exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`.
84
- No vague adjectives (`robust`, `clean`, `properly`, `works correctly`). The set
85
- must be **collectively exhaustive** — every step's intended change is covered by
86
- at least one AC. Do **not** restate a step as a criterion.
75
+ - **`requirements`** — **required; at least 1 item.** An array of objects, each with:
76
+ - **`requirement`** — a plain-language statement of what this change must accomplish and why, written so a non-technical reader can follow it. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof.
77
+ - **`acceptanceCriteria`** — **required; at least 1 item per requirement.** Concrete, deterministically-checkable conditions that prove this specific requirement is satisfied. The total count across all requirements must be **at least 2**. Write each criterion as a trigger/precondition and the exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`. No vague adjectives. Every step's intended change must be covered by at least one AC.
78
+
79
+ The link between each requirement and its criteria is established at plan time. When the agent reports back, each `submit_report` verdict must carry the `requirement` field identifying which `### <requirement>` heading the criterion appeared under in the plan.
87
80
 
88
81
  **Good vs bad examples:**
89
82
  - ✅ `grep -rn "What changed" apps/runner/src/report.ts` produces no matches.
@@ -91,6 +84,11 @@ Field-by-field guidance:
91
84
  - ✅ `pnpm test` in the repo root exits 0 and report.test.ts output contains no failures.
92
85
  - ❌ Tests pass correctly. _(no trigger, no observable result)_
93
86
 
87
+ - **`steps`** — an ordered list. For each step provide:
88
+ - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
89
+ - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
90
+ - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
91
+
94
92
  - **`risks`** — anything that could change the approach or surface a problem.
95
93
  - **`outOfScope`** — what you are deliberately not doing.
96
94
 
@@ -41,7 +41,7 @@ actual code. Pick exactly one:
41
41
  - **Re-plan** — the request meaningfully changes scope or direction, enough that a
42
42
  fresh plan should be agreed before building. Call **`submit_plan`** with a `plans[]` array
43
43
  containing the revised structured fields (same per-plan shape as the request-to-plan skill:
44
- `scope`, `goal`, `assumptions`, `requirements` — at least 1 —, `steps`, `acceptanceCriteria` at least 2 —, `risks`,
44
+ `scope`, `goal`, `assumptions`, `requirements` — at least 1, each with its own `acceptanceCriteria` array; at least 2 criteria total across all requirements —, `steps`, `risks`,
45
45
  `outOfScope`). Include only one entry for a revise turn. The runner posts it as a revision
46
46
  the user can accept; make no code changes this turn.
47
47
  - **Implement** — the request is clear and reasonable. Make the change (via
@@ -82,8 +82,9 @@ user:
82
82
 
83
83
  - **Implemented:** call **`submit_report`** with the structured report, exactly as
84
84
  `implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
85
- met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
86
- required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
85
+ met / not_met / unclear verdict, the diff hunk(s) that prove it, and a `requirement`
86
+ field set to the verbatim requirement heading the criterion appeared under in the plan),
87
+ plus the four required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
87
88
  Include `cicd` from the Verify results (one entry per check, same shape as
88
89
  `implement-plan`; omit when no verification setup).
89
90
  Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what