npm - @flumecode/runner - Versions diffs - 0.22.0 → 0.23.1 - Mend

@flumecode/runner 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli.js +1109 -62
package/dist/mcp-stdio.js +33 -11
package/package.json +7 -1
package/skills-plugin/skills/implement-plan/SKILL.md +1 -0
package/skills-plugin/skills/preview-ui/SKILL.md +127 -0
package/skills-plugin/skills/request-to-plan/SKILL.md +10 -12
package/skills-plugin/skills/revise-implementation/SKILL.md +4 -3

package/dist/mcp-stdio.js CHANGED Viewed

@@ -57,6 +57,14 @@ var stepSchema = z.object({
     "Per-file pseudo code. Provide an entry for every non-documentation file this step touches. Each entry contains the file path and pseudo code describing the changes to that file."
   )
 });
+var requirementSchema = z.object({
+  requirement: z.string().min(1).describe(
+    "A human-readable statement of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. " + INLINE_CODE_HINT
+  ),
+  acceptanceCriteria: z.array(z.string().min(1)).min(1).describe(
+    "Concrete, deterministically-checkable conditions that prove this requirement is satisfied. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. " + INLINE_CODE_HINT
+  )
+});
 var planInputSchema = {
   title: z.string().min(1).max(120).describe(
     "A concise, descriptive name for THIS plan. Must be distinct from the request title and from any sibling plans on the same request. Keep it under 120 characters."
@@ -70,13 +78,10 @@ var planInputSchema = {
     "Why the user is making this request \u2014 the underlying motivation or problem the change addresses. Fill this especially when the request content/context does NOT already state the why (ask the user in the Clarify phase); omit when there is no additional motivation to record. Useful for future understanding of the system. " + INLINE_CODE_HINT
   ),
   assumptions: z.array(z.string()).describe("Anything decided during planning, including unanswered defaults."),
-  requirements: z.array(z.string().min(1)).min(1).describe(
-    "Required, human-readable statements of what this change must accomplish and why, in plain language a non-technical reader can follow. Distinct from acceptanceCriteria: requirements explain intent/rationale; acceptance criteria are the machine-checkable proof. At least 1 required. " + INLINE_CODE_HINT
+  requirements: z.array(requirementSchema).min(1).describe(
+    "Required, human-readable statements of what this change must accomplish and why, each carrying its own acceptanceCriteria. At least 1 requirement required; at least 2 acceptance criteria total across all requirements. " + INLINE_CODE_HINT
   ),
   steps: z.array(stepSchema).min(1).describe("Ordered list of changes. Each step says what and why, with file references."),
-  acceptanceCriteria: z.array(z.string().min(1)).min(2).describe(
-    "Concrete, deterministically-checkable conditions that together define done. Each names a trigger/precondition and the exact observable result (run X -> output Y; file Z contains W; f(a) returns b) \u2014 no vague adjectives, not a restatement of a step. The set must collectively cover every step's change. At least 2 required. " + INLINE_CODE_HINT
-  ),
   risks: z.array(z.string()).describe("Anything that could change the approach."),
   outOfScope: z.array(z.string()).describe("What is deliberately not being done.")
 };
@@ -91,7 +96,21 @@ function requireRootCauseForFix(schema) {
     }
   });
 }
-var planSchema = requireRootCauseForFix(z.object(planInputSchema));
+function requireAtLeastTwoCriteria(schema) {
+  return schema.superRefine((plan, ctx) => {
+    const total = plan.requirements.reduce((sum, r) => sum + r.acceptanceCriteria.length, 0);
+    if (total < 2) {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        path: ["requirements"],
+        message: "At least 2 acceptance criteria total across all requirements are required."
+      });
+    }
+  });
+}
+var planSchema = requireAtLeastTwoCriteria(
+  requireRootCauseForFix(z.object(planInputSchema))
+);
 function renderPlan(plan) {
   const lines = [];
   lines.push(`# ${plan.title}`);
@@ -118,8 +137,8 @@ function renderPlan(plan) {
   }
   lines.push("");
   lines.push("## Requirements");
-  for (const requirement of plan.requirements) {
-    lines.push(`- ${requirement}`);
+  for (const req of plan.requirements) {
+    lines.push(`- ${req.requirement}`);
   }
   lines.push("");
   lines.push("## Steps");
@@ -144,8 +163,11 @@ function renderPlan(plan) {
   }
   lines.push("");
   lines.push("## Acceptance criteria");
-  for (const criterion of plan.acceptanceCriteria) {
-    lines.push(`- [ ] ${criterion}`);
+  for (const req of plan.requirements) {
+    lines.push(`### ${req.requirement}`);
+    for (const criterion of req.acceptanceCriteria) {
+      lines.push(`- [ ] ${criterion}`);
+    }
   }
   if (plan.risks.length > 0) {
     lines.push("");
@@ -166,7 +188,7 @@ function renderPlan(plan) {
   return lines.join("\n");
 }
 var submitPlanInputSchema = {
-  plans: z.array(requireRootCauseForFix(z.object(planInputSchema))).min(1).refine(
+  plans: z.array(requireAtLeastTwoCriteria(requireRootCauseForFix(z.object(planInputSchema)))).min(1).refine(
     (arr) => {
       const titles = arr.map((p) => p.title.trim()).filter((t) => t.length > 0);
       return new Set(titles).size === titles.length;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@flumecode/runner",
-  "version": "0.22.0",
+  "version": "0.23.1",
   "type": "module",
   "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
   "bin": {
@@ -28,11 +28,17 @@
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.3.0",
     "@modelcontextprotocol/sdk": "^1",
+    "ink": "^5.2.1",
+    "ink-text-input": "^6.0.0",
+    "playwright": "^1.52.0",
+    "react": "^18.3.1",
     "zod": "4.4.3"
   },
   "devDependencies": {
     "@types/node": "^22.10.5",
+    "@types/react": "^19.0.0",
     "esbuild": "^0.24.2",
+    "ink-testing-library": "^4.0.0",
     "tsx": "^4.19.2",
     "typescript": "^5.7.3"
   }

package/skills-plugin/skills/implement-plan/SKILL.md CHANGED Viewed

@@ -184,6 +184,7 @@ The report subagent calls `submit_report` with these fields:
     verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
     optionally explains it). Never include a hunk that isn't in the actual diff. Cite
     the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
+  - `requirement` — the verbatim text of the `### <requirement>` heading this criterion appeared under in the plan (from the `## Acceptance criteria` section). Set this for every criterion from a structured plan. Omit only for resolve runs (no plan) or legacy plans without requirement headings.
 - **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
 ## Always

package/skills-plugin/skills/preview-ui/SKILL.md ADDED Viewed

@@ -0,0 +1,127 @@
+---
+name: preview-ui
+description: >-
+  Author an ephemeral fake-data showcase page for changed UI components so the
+  runner can screenshot them in a headless browser. Use after a UI-touching
+  implementation, when the runner provides a tmpRoute directory. Writes the
+  showcase files there and records the URL path in a sentinel file. Never
+  commits, pushes, or modifies application code outside the tmpRoute directory.
+---
+# preview-ui
+You author a **temporary showcase page** that imports the recently-changed UI
+components and fills them with realistic fake data, so the runner can start the
+repo's dev server and take headless screenshots.
+You write only inside the `<tmpRoute>` directory the runner hands you. You never
+modify production code, commit, or push.
+## What you receive
+The runner injects these into your prompt:
+- **`<tmpRoute>`** — an absolute path to an empty temp directory inside the repo
+  (already git-excluded). Write your showcase files here.
+- **Committed UI files** — the list of files changed by the implementation, so
+  you know which components to showcase.
+## Step 1 — Detect the framework
+Read the repo's `package.json` (and `package.json` files in workspaces, if any)
+to determine the framework:
+| Framework     | Key `dependencies` / `devDependencies`  |
+| ------------- | --------------------------------------- |
+| Next.js       | `next`                                  |
+| Vite + React  | `vite` + `react`                        |
+| Vite + Vue    | `vite` + `vue`                          |
+| Vite + Svelte | `vite` + `svelte` (no SvelteKit)        |
+| SvelteKit     | `@sveltejs/kit`                         |
+| Nuxt          | `nuxt`                                  |
+| Astro         | `astro`                                 |
+| CRA           | `react-scripts`                         |
+| Remix         | `@remix-run/react` or `@remix-run/node` |
+If the framework is not recognisable or the file list contains no importable
+components, write a single plain `<tmpRoute>/index.html` file with a message
+like "No supported framework detected" and write `/__flumecode_preview` to
+`<tmpRoute>/.showcase-path`. Then stop — do not create a route file.
+## Step 2 — Determine the entry path
+Choose a URL path unlikely to collide with real routes: `/__flumecode_preview`.
+Write that string (exactly, no trailing newline) to `<tmpRoute>/.showcase-path`.
+## Step 3 — Author the showcase entry file
+Create a single route/page file at the correct location under `<tmpRoute>` for
+the detected framework:
+| Framework              | File to create                                        |
+| ---------------------- | ----------------------------------------------------- |
+| Next.js (App Router)   | `<tmpRoute>/page.tsx` (if the project uses `app/`)    |
+| Next.js (Pages Router) | `<tmpRoute>/index.tsx` (if the project uses `pages/`) |
+| Vite + React           | `<tmpRoute>/App.tsx` (or `.jsx`)                      |
+| Vite + Vue             | `<tmpRoute>/App.vue`                                  |
+| Vite + Svelte          | `<tmpRoute>/App.svelte`                               |
+| SvelteKit              | `<tmpRoute>/+page.svelte`                             |
+| Nuxt                   | `<tmpRoute>/index.vue`                                |
+| Astro                  | `<tmpRoute>/index.astro`                              |
+| CRA                    | `<tmpRoute>/index.tsx`                                |
+| Remix                  | `<tmpRoute>/route.tsx`                                |
+**Next.js App Router note:** The runner mounts the showcase at
+`app/__flumecode_preview/page.tsx` by symlinking or copying `<tmpRoute>` to
+`app/__flumecode_preview/`. You only need to produce `<tmpRoute>/page.tsx` — the
+runner handles the mount.
+### Content rules
+- Import the changed components using their real relative paths (calculate the
+  path from `<tmpRoute>` to the component's source location).
+- Fill every prop with **realistic fake data** — use hardcoded literals, not
+  calls to external APIs or databases.
+- If a component requires a provider (React context, Vuex store, Pinia, etc.),
+  wrap it with a minimal in-file stub provider — do not import the real app
+  store or data layer.
+- If a component calls a route handler or fetch endpoint, stub the relevant
+  function or hook at the top of the file with a mock that returns realistic
+  hard-coded data. Do NOT import MSW or any test library; keep stubs as plain
+  module-level overrides.
+- Export the showcase page as the default export (except Astro/SvelteKit, which
+  don't require a default export).
+- Keep the file short: one `export default` function that renders all changed
+  components in a flex column, with a small amount of padding.
+### What NOT to do
+- Do not call `fetch`, `axios`, `prisma`, `supabase`, or any I/O in the
+  showcase.
+- Do not import from test utilities, MSW, Storybook, or Cypress.
+- Do not add new npm dependencies.
+- Do not edit any file outside `<tmpRoute>`.
+- Do not commit or push.
+## Step 4 — Verify your output
+Before finishing, confirm:
+1. `<tmpRoute>/.showcase-path` exists and contains the URL path string.
+2. The showcase entry file exists at the expected path under `<tmpRoute>`.
+3. The file imports only modules that already exist in the repo (no invented
+   paths).
+If you cannot produce a valid showcase (e.g. the components have complex
+dependencies you cannot stub), write only `<tmpRoute>/.showcase-path` with the
+URL string and leave the entry file absent — the runner will detect the missing
+file and skip the screenshot step gracefully.
+## Always
+- Write the URL path to `<tmpRoute>/.showcase-path` first, before any other
+  file — the runner reads it even if something else fails.
+- These files are ephemeral and git-excluded. They will be deleted by the runner
+  after screenshots are taken. Never commit or push them.
+- Your final reply should be one short sentence confirming what you created (e.g.
+  "Wrote `<tmpRoute>/page.tsx` showcasing `Button` and `Card` with fake data.").

package/skills-plugin/skills/request-to-plan/SKILL.md CHANGED Viewed

@@ -72,18 +72,11 @@ Field-by-field guidance:
 - **`motivation`** — optional. The user's stated or asked-for reason for making this request — the underlying motivation or problem the change addresses. Fill this when the request content/context does NOT already state the why (ask during Phase 1 — Clarify if needed); omit when there is no additional motivation to record. Useful for future understanding of the system.
 - **`assumptions`** — anything you decided during investigation (including
   unanswered defaults from Phase 1).
-- **`requirements`** — **required; at least 1 item.** Plain-language statements of what this change must accomplish and why, written so a non-technical reader can follow them. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof. At least 1 item required.
-- **`steps`** — an ordered list. For each step provide:
-  - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
-  - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
-  - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
-- **`acceptanceCriteria`** — **required; at least 2 items.** Each criterion must
-  be a concrete, deterministically-checkable condition that a third party can verify
-  without knowing the author's intent. Write each as a trigger/precondition and the
-  exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`.
-  No vague adjectives (`robust`, `clean`, `properly`, `works correctly`). The set
-  must be **collectively exhaustive** — every step's intended change is covered by
-  at least one AC. Do **not** restate a step as a criterion.
+- **`requirements`** — **required; at least 1 item.** An array of objects, each with:
+  - **`requirement`** — a plain-language statement of what this change must accomplish and why, written so a non-technical reader can follow it. Distinct from `acceptanceCriteria`: requirements explain intent and rationale; acceptance criteria are the machine-checkable proof.
+  - **`acceptanceCriteria`** — **required; at least 1 item per requirement.** Concrete, deterministically-checkable conditions that prove this specific requirement is satisfied. The total count across all requirements must be **at least 2**. Write each criterion as a trigger/precondition and the exact observable result: `run X → output Y`, `file Z contains W`, `calling f(a) returns b`. No vague adjectives. Every step's intended change must be covered by at least one AC.
+  The link between each requirement and its criteria is established at plan time. When the agent reports back, each `submit_report` verdict must carry the `requirement` field identifying which `### <requirement>` heading the criterion appeared under in the plan.
   **Good vs bad examples:**
   - ✅ `grep -rn "What changed" apps/runner/src/report.ts` produces no matches.
@@ -91,6 +84,11 @@ Field-by-field guidance:
   - ✅ `pnpm test` in the repo root exits 0 and report.test.ts output contains no failures.
   - ❌ Tests pass correctly. _(no trigger, no observable result)_
+- **`steps`** — an ordered list. For each step provide:
+  - **`title`** — a concise imperative phrase naming the step (e.g. "Add submit_plan schema to plan.ts").
+  - **`description`** — an array of bullet points that help the reviewer understand the upcoming `pseudoCode` and decide whether the plan and design are correct. Each item is a distinct, self-contained point about what is changing and why — not a single paragraph, and not a line-by-line restatement of the pseudo code. Use concrete file references (`path/to/file.ts`) and name the functions/symbols involved. Apply inline-code formatting to all identifiers.
+  - **`pseudoCode`** — an array of `{ file, pseudoCode }` entries. Provide an entry for every file the step touches **except** documentation files (SKILL.md, README.md, wiki pages, etc.). `pseudoCode` is optional in the schema but expected for all non-documentation files. Each entry names the file path and contains pseudo code that precisely describes the changes to make in that file.
 - **`risks`** — anything that could change the approach or surface a problem.
 - **`outOfScope`** — what you are deliberately not doing.

package/skills-plugin/skills/revise-implementation/SKILL.md CHANGED Viewed

@@ -41,7 +41,7 @@ actual code. Pick exactly one:
 - **Re-plan** — the request meaningfully changes scope or direction, enough that a
   fresh plan should be agreed before building. Call **`submit_plan`** with a `plans[]` array
   containing the revised structured fields (same per-plan shape as the request-to-plan skill:
-  `scope`, `goal`, `assumptions`, `requirements` — at least 1 —, `steps`, `acceptanceCriteria` — at least 2 —, `risks`,
+  `scope`, `goal`, `assumptions`, `requirements` — at least 1, each with its own `acceptanceCriteria` array; at least 2 criteria total across all requirements —, `steps`, `risks`,
   `outOfScope`). Include only one entry for a revise turn. The runner posts it as a revision
   the user can accept; make no code changes this turn.
 - **Implement** — the request is clear and reasonable. Make the change (via
@@ -82,8 +82,9 @@ user:
 - **Implemented:** call **`submit_report`** with the structured report, exactly as
   `implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
-  met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
-  required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
+  met / not_met / unclear verdict, the diff hunk(s) that prove it, and a `requirement`
+  field set to the verbatim requirement heading the criterion appeared under in the plan),
+  plus the four required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
   Include `cicd` from the Verify results (one entry per check, same shape as
   `implement-plan`; omit when no verification setup).
   Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what