@flumecode/runner 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -188,6 +188,12 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
188
188
  import { randomUUID } from "node:crypto";
189
189
  import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk";
190
190
  import { z } from "zod";
191
+
192
+ // src/schema-hints.ts
193
+ var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
194
+ var WIDGET_LANGUAGE_HINT = "Write this in the same natural language as the incoming thread (the request body and the user's messages). If the thread is in English, keep it in English; do not switch languages. Keep code identifiers, file paths, and quoted code verbatim.";
195
+
196
+ // src/widgets.ts
191
197
  var SERVER_NAME = "flume_widgets";
192
198
  var SINGLE_SELECT = "single_select";
193
199
  var MULTI_SELECT = "multi_select";
@@ -195,15 +201,15 @@ var WIDGET_TOOL_NAMES = [
195
201
  `mcp__${SERVER_NAME}__${SINGLE_SELECT}`,
196
202
  `mcp__${SERVER_NAME}__${MULTI_SELECT}`
197
203
  ];
198
- var optionsSchema = z.array(z.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from.");
199
- var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
204
+ var optionsSchema = z.array(z.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from. " + WIDGET_LANGUAGE_HINT);
205
+ var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. " + WIDGET_LANGUAGE_HINT + " After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
200
206
  function createWidgetTooling() {
201
207
  const collected = [];
202
208
  const singleSelect = tool(
203
209
  SINGLE_SELECT,
204
210
  "Ask the user a single-select (radio-button) question \u2014 exactly one answer. Use this for a genuine either/or choice (competing approaches, scope decisions, yes/no) instead of writing the options as prose. " + TAIL,
205
211
  {
206
- question: z.string().min(1).describe("The question to ask the user."),
212
+ question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
207
213
  body: z.string().optional().describe(
208
214
  "Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
209
215
  ),
@@ -226,7 +232,7 @@ function createWidgetTooling() {
226
232
  MULTI_SELECT,
227
233
  "Ask the user a multi-select (checkbox) question \u2014 they may pick any number of options, including none of the presets if they use 'Other'. Use this for 'select all that apply' questions (which features to include, which files to touch). " + TAIL,
228
234
  {
229
- question: z.string().min(1).describe("The question to ask the user."),
235
+ question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
230
236
  body: z.string().optional().describe(
231
237
  "Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
232
238
  ),
@@ -266,8 +272,27 @@ function widgetPosted(kind) {
266
272
  import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
267
273
  import { z as z2 } from "zod";
268
274
 
269
- // src/schema-hints.ts
270
- var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
275
+ // src/code-lang.ts
276
+ var EXT_TO_LANG = {
277
+ ts: "typescript",
278
+ tsx: "tsx",
279
+ js: "javascript",
280
+ jsx: "jsx",
281
+ json: "json",
282
+ css: "css",
283
+ md: "markdown",
284
+ sh: "bash",
285
+ py: "python",
286
+ yaml: "yaml",
287
+ yml: "yaml",
288
+ html: "markup",
289
+ xml: "markup",
290
+ sql: "sql"
291
+ };
292
+ function langFromPath(path) {
293
+ const ext = path.split(".").pop()?.toLowerCase();
294
+ return ext ? EXT_TO_LANG[ext] : void 0;
295
+ }
271
296
 
272
297
  // src/plan.ts
273
298
  var SERVER_NAME2 = "flume_plan";
@@ -357,7 +382,8 @@ function renderPlan(plan) {
357
382
  lines2.push("");
358
383
  lines2.push(`\`${entry.file}\``);
359
384
  lines2.push("");
360
- lines2.push("```");
385
+ const lang = langFromPath(entry.file);
386
+ lines2.push(lang ? "```" + lang : "```");
361
387
  lines2.push(entry.pseudoCode);
362
388
  lines2.push("```");
363
389
  }
@@ -446,6 +472,15 @@ var STATUS_ICON = {
446
472
  not_met: "\u274C",
447
473
  unclear: "\u26A0\uFE0F"
448
474
  };
475
+ var CICD_STATUS_ICON = {
476
+ passed: "\u2705",
477
+ failed: "\u274C"
478
+ };
479
+ var cicdCheckSchema = z3.object({
480
+ command: z3.string().min(1).describe("The exact verification command run, e.g. `pnpm typecheck`."),
481
+ status: z3.enum(["passed", "failed"]).describe("Whether the command passed or failed."),
482
+ output: z3.string().optional().describe("Short excerpt of failing output; include on failure.")
483
+ });
449
484
  var evidenceSchema = z3.object({
450
485
  file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
451
486
  hunk: z3.string().min(1).describe(
@@ -477,6 +512,9 @@ var reportInputSchema = {
477
512
  ),
478
513
  conflictResolution: z3.string().optional().describe(
479
514
  "Markdown: present ONLY when a merge conflict was actually resolved. Explain, per conflicted file, how ours/theirs were integrated. Rendered under '## Conflict resolution'. Omit entirely when no conflict occurred."
515
+ ),
516
+ cicd: z3.array(cicdCheckSchema).optional().describe(
517
+ "Verify-phase build/typecheck/lint/test results. Omit when the repo has no verification setup. Rendered under '## CI/CD'."
480
518
  )
481
519
  };
482
520
  var reportSchema = z3.object(reportInputSchema);
@@ -504,6 +542,15 @@ function renderReport(report) {
504
542
  if (report.conflictResolution?.trim()) {
505
543
  lines2.push("", "## Conflict resolution", "", report.conflictResolution.trim());
506
544
  }
545
+ if (report.cicd && report.cicd.length > 0) {
546
+ lines2.push("", "## CI/CD");
547
+ for (const check of report.cicd) {
548
+ lines2.push("", `- ${CICD_STATUS_ICON[check.status]} \`${check.command}\``);
549
+ if (check.status === "failed" && check.output?.trim()) {
550
+ lines2.push("", "```", check.output.trim(), "```");
551
+ }
552
+ }
553
+ }
507
554
  lines2.push("", "## Code quality", "", report.codeQuality.trim());
508
555
  lines2.push("", "## Caveats / follow-ups", "", report.caveats.trim());
509
556
  return lines2.join("\n");
@@ -512,7 +559,7 @@ function createReportTooling() {
512
559
  let submittedReport = null;
513
560
  const submitReport = tool3(
514
561
  SUBMIT_REPORT,
515
- "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. Do NOT include a PR link \u2014 the runner appends it.",
562
+ "Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
516
563
  reportInputSchema,
517
564
  async (args) => {
518
565
  submittedReport = reportSchema.parse(args);
@@ -729,6 +776,7 @@ function appendRule(lines2, intro, ruleName) {
729
776
  lines2.push("", intro, "", loadRule(ruleName));
730
777
  }
731
778
  var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
779
+ var LANGUAGE_DIRECTIVE = "First, determine the dominant natural language of the incoming thread (the request title/body and the user's messages). Use that one language for EVERYTHING you author this run - your reply body, any plan or report fields, AND every clarifying question and its widget options. Never mix languages: if the thread is in English, your questions and options must be in English too. Keep code identifiers, file paths, and quoted code verbatim.";
732
780
  function turnHeading(turn, agentName) {
733
781
  if (turn.role === "user") return "User";
734
782
  if (turn.failed) return `${agentName} (this run ended in an error)`;
@@ -752,7 +800,8 @@ function buildPrompt(ctx) {
752
800
  `The repository ${ctx.repo.fullName} is checked out in your current working directory on branch "${ctx.repo.checkoutBranch}" at commit ${ctx.repo.checkoutSha.slice(0, 7)}.`,
753
801
  task,
754
802
  orient,
755
- widgets
803
+ widgets,
804
+ LANGUAGE_DIRECTIVE
756
805
  ];
757
806
  if (ctx.permissionMode !== "plan") {
758
807
  lines2.push(
@@ -784,6 +833,7 @@ function buildRevisePrompt(ctx) {
784
833
  task,
785
834
  orient,
786
835
  widgets,
836
+ LANGUAGE_DIRECTIVE,
787
837
  "",
788
838
  "These coding guidelines apply to all code produced in this run:",
789
839
  "",
@@ -899,6 +949,7 @@ function buildReleasePrompt(ctx, baseChecks) {
899
949
  task,
900
950
  orient,
901
951
  widgets,
952
+ LANGUAGE_DIRECTIVE,
902
953
  "",
903
954
  "These coding guidelines apply to all code produced in this run:",
904
955
  "",
@@ -926,6 +977,14 @@ function buildReleasePrompt(ctx, baseChecks) {
926
977
  "```"
927
978
  );
928
979
  }
980
+ if (ctx.prerelease) {
981
+ lines2.push(
982
+ "",
983
+ "# Pre-release",
984
+ "",
985
+ "This is a PRE-RELEASE. When proposing and applying versions, use a semver pre-release version string (e.g. `0.9.0-beta.1`): take the next stable version you would otherwise pick and append `-beta.N`, where N is the next unused beta number for that version (check existing `v<version>-beta.*` tags). Offer these pre-release strings in the version-confirmation widgets, and write them to package.json, CHANGELOG.md, and the `flumecode:versions` comment as usual."
986
+ );
987
+ }
929
988
  appendThread(lines2, ctx);
930
989
  lines2.push(
931
990
  "",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flumecode/runner",
3
- "version": "0.17.0",
3
+ "version": "0.19.0",
4
4
  "type": "module",
5
5
  "description": "FlumeCode local runner — claims jobs and drives your local Claude Code against a real checkout.",
6
6
  "bin": {
@@ -1,8 +1,9 @@
1
1
  ---
2
2
  name: technical-writing
3
3
  description: >-
4
- Inline-code conventions for agent-authored plan and report prose: wrap code
5
- identifiers in backticks so they render as inline code.
4
+ Inline-code and output-language conventions for agent-authored plan and report
5
+ prose: wrap code identifiers in backticks, and write prose in the same natural
6
+ language as the user's request.
6
7
  ---
7
8
 
8
9
  # Technical Writing
@@ -12,3 +13,12 @@ description: >-
12
13
  Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
13
14
 
14
15
  This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.
16
+
17
+ ## Output language
18
+
19
+ Before writing anything, determine the dominant natural language of the incoming thread (the
20
+ request title/body and the user's messages). Use that one language for all free-text prose in
21
+ this run — your reply body, plan goals/steps/risks, report summaries, clarifying questions,
22
+ widget options, and push-backs. Never switch languages mid-response. Keep code identifiers, file
23
+ paths, commands, and quoted code/diffs verbatim; only the surrounding prose follows the thread
24
+ language.
@@ -183,6 +183,32 @@ version did not change.
183
183
  silence them.
184
184
  - **Never commit, push, or open a PR** — the runner does that.
185
185
 
186
+ ## Pre-release
187
+
188
+ When the prompt contains a `# Pre-release` section, this release uses semver
189
+ pre-release version strings instead of stable ones:
190
+
191
+ - **Compute versions:** take the next stable version you would otherwise propose
192
+ (patch or minor bump), then append `-beta.N`, where N is the next unused beta
193
+ number for that base version. Check existing tags with:
194
+
195
+ ```
196
+ git tag -l --sort=-version:refname 'v<version>-beta.*' | head -1
197
+ ```
198
+
199
+ If no beta tags exist for that base version, start at `-beta.1`.
200
+
201
+ - **Phase 1 (propose):** offer the pre-release version string (e.g.
202
+ `0.9.0-beta.1`) in the version-confirmation widgets instead of the stable
203
+ version.
204
+
205
+ - **Phase 2 (apply):** write the pre-release version string (e.g.
206
+ `0.9.0-beta.1`) to `package.json`, `CHANGELOG.md`, and the
207
+ `<!-- flumecode:versions {...} -->` comment — exactly as you would for a
208
+ stable release, just with the pre-release suffix included.
209
+
210
+ ---
211
+
186
212
  ## Pre-release checks
187
213
 
188
214
  We cannot release code with failing checks. Before this turn, the runner ran the
@@ -88,11 +88,26 @@ contain, in this order:
88
88
 
89
89
  ### Every page: front-load an "At a glance" block
90
90
 
91
+ Before the "At a glance" block on **every** page (component pages, README,
92
+ architecture, glossary), place a TL;DR blockquote immediately after the H1:
93
+
94
+ ```
95
+ > **TL;DR** — one plain-language sentence on what this page covers.
96
+ ```
97
+
98
+ Then a blank line, then the existing `> **Purpose**` / "At a glance" block
99
+ (where applicable). For `README.md`, place the TL;DR after the
100
+ `<!-- wiki-synced-to -->` marker and H1. The blank line between the TL;DR and
101
+ the next blockquote is required — without it, markdown merges the two
102
+ blockquotes into one. This rule applies in both Bootstrap and Update modes.
103
+
91
104
  So an agent can grab context in seconds, begin each component page with:
92
105
 
93
106
  ```
94
107
  # <component>
95
108
 
109
+ > **TL;DR** — one plain-language sentence on what this page covers.
110
+
96
111
  > **Purpose** — one or two sentences.
97
112
  > **Key files** — `path/a.ts`, `path/b.ts` (the entry points worth opening).
98
113
  > **Depends on** — what it relies on. **Used by** — what relies on it.
@@ -138,7 +138,11 @@ the next step.
138
138
  7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
139
139
  verdicts (with criterion text, from step 4), the Verify results (from step 3),
140
140
  and the quality findings, and tell it to run `git --no-pager diff` itself as
141
- the **single source of truth** for the report. Do not pass the full plan the
141
+ the **single source of truth** for the report. Pass the Verify results as the
142
+ `cicd` field — one entry per check with `command`, `status` (`passed`/`failed`),
143
+ and (on failure) a short `output` excerpt. Omit `cicd` when no verification
144
+ setup exists. A failing check does NOT block the report — include the failing
145
+ entry and continue. Do not pass the full plan — the
142
146
  AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
143
147
  is the authoritative source for evidence; re-inlining the full plan is
144
148
  redundant. Keep each subagent prompt to the minimal self-contained slice it
@@ -181,6 +185,7 @@ The report subagent calls `submit_report` with these fields:
181
185
  verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
182
186
  optionally explains it). Never include a hunk that isn't in the actual diff. Cite
183
187
  the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
188
+ - **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
184
189
 
185
190
  ## Always
186
191
 
@@ -108,6 +108,13 @@ own independently-acceptable "Accept as plan" draft. After a plan is accepted th
108
108
  keep commenting to refine it; treat a later turn as a fresh **Plan** phase and call
109
109
  `submit_plan` again with a `plans[]` array containing the revised fields.
110
110
 
111
+ Before adding an entry to `plans[]`, apply this right-sizing checklist — if a plan fails any criterion, split it into separate entries:
112
+
113
+ - **Single, clear outcome** — one bug fixed, one feature increment, one refactor. If the `title` needs "and", consider splitting.
114
+ - **Fits in a sprint comfortably** — if it can't fit in one iteration, it's likely an epic that needs breaking down.
115
+ - **Reviewable PR** — small enough that a reviewer can hold it in their head (often cited as under ~200–400 lines of diff, though this varies).
116
+ - **Testable acceptance criteria** — you can state up front what "done" looks like; use the `acceptanceCriteria` field to capture this.
117
+
111
118
  ## Always
112
119
 
113
120
  - Stay read-only. Propose; do not edit.
@@ -104,5 +104,6 @@ Call **`submit_report`** with the structured report. Fields:
104
104
  file, explaining which side you kept and why (or how you merged both intents). Wrap file names
105
105
  and code identifiers in inline backticks. This is what the user reads to understand how each
106
106
  conflict was integrated.
107
+ - `cicd` (optional): array of Verify-phase check results from Step 3, each with `command`, `status` (`"passed"`/`"failed"`), and `output` on failure. Omit when no build/test setup exists.
107
108
 
108
109
  The runner renders the report and appends the pull-request link — do not add one yourself.
@@ -84,6 +84,8 @@ user:
84
84
  `implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
85
85
  met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
86
86
  required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
87
+ Include `cicd` from the Verify results (one entry per check, same shape as
88
+ `implement-plan`; omit when no verification setup).
87
89
  Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what
88
90
  a subagent claimed; if the diff is empty, say nothing was changed. The runner
89
91
  renders the report and appends the pull-request link — do not add one yourself.