@flumecode/runner 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +68 -9
- package/package.json +1 -1
- package/skills-plugin/rules/technical-writing.md +12 -2
- package/skills-plugin/skills/create-release/SKILL.md +26 -0
- package/skills-plugin/skills/document/SKILL.md +15 -0
- package/skills-plugin/skills/implement-plan/SKILL.md +6 -1
- package/skills-plugin/skills/request-to-plan/SKILL.md +7 -0
- package/skills-plugin/skills/resolve-merge-conflict/SKILL.md +1 -0
- package/skills-plugin/skills/revise-implementation/SKILL.md +2 -0
package/dist/cli.js
CHANGED
|
@@ -188,6 +188,12 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
|
188
188
|
import { randomUUID } from "node:crypto";
|
|
189
189
|
import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk";
|
|
190
190
|
import { z } from "zod";
|
|
191
|
+
|
|
192
|
+
// src/schema-hints.ts
|
|
193
|
+
var INLINE_CODE_HINT = "Wrap code identifiers (function, variable, type, and file names, commands, and flags) in inline backticks, e.g. `getCodingSessionsForRequest`.";
|
|
194
|
+
var WIDGET_LANGUAGE_HINT = "Write this in the same natural language as the incoming thread (the request body and the user's messages). If the thread is in English, keep it in English; do not switch languages. Keep code identifiers, file paths, and quoted code verbatim.";
|
|
195
|
+
|
|
196
|
+
// src/widgets.ts
|
|
191
197
|
var SERVER_NAME = "flume_widgets";
|
|
192
198
|
var SINGLE_SELECT = "single_select";
|
|
193
199
|
var MULTI_SELECT = "multi_select";
|
|
@@ -195,15 +201,15 @@ var WIDGET_TOOL_NAMES = [
|
|
|
195
201
|
`mcp__${SERVER_NAME}__${SINGLE_SELECT}`,
|
|
196
202
|
`mcp__${SERVER_NAME}__${MULTI_SELECT}`
|
|
197
203
|
];
|
|
198
|
-
var optionsSchema = z.array(z.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from.");
|
|
199
|
-
var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
|
|
204
|
+
var optionsSchema = z.array(z.string().min(1)).min(2).max(8).describe("2\u20138 short, distinct choices for the user to pick from. " + WIDGET_LANGUAGE_HINT);
|
|
205
|
+
var TAIL = "Do NOT add an 'Other' or 'None of these' catch-all \u2014 the UI always offers an 'Other' free-text option automatically. " + WIDGET_LANGUAGE_HINT + " After calling this, END YOUR TURN and wait: the user's answer arrives as their next message and starts a fresh run.";
|
|
200
206
|
function createWidgetTooling() {
|
|
201
207
|
const collected = [];
|
|
202
208
|
const singleSelect = tool(
|
|
203
209
|
SINGLE_SELECT,
|
|
204
210
|
"Ask the user a single-select (radio-button) question \u2014 exactly one answer. Use this for a genuine either/or choice (competing approaches, scope decisions, yes/no) instead of writing the options as prose. " + TAIL,
|
|
205
211
|
{
|
|
206
|
-
question: z.string().min(1).describe("The question to ask the user."),
|
|
212
|
+
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
207
213
|
body: z.string().optional().describe(
|
|
208
214
|
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
209
215
|
),
|
|
@@ -226,7 +232,7 @@ function createWidgetTooling() {
|
|
|
226
232
|
MULTI_SELECT,
|
|
227
233
|
"Ask the user a multi-select (checkbox) question \u2014 they may pick any number of options, including none of the presets if they use 'Other'. Use this for 'select all that apply' questions (which features to include, which files to touch). " + TAIL,
|
|
228
234
|
{
|
|
229
|
-
question: z.string().min(1).describe("The question to ask the user."),
|
|
235
|
+
question: z.string().min(1).describe("The question to ask the user. " + WIDGET_LANGUAGE_HINT),
|
|
230
236
|
body: z.string().optional().describe(
|
|
231
237
|
"Optional markdown shown above the question so the user can read the context they're confirming (e.g. the drafted release notes). Omit for plain questions."
|
|
232
238
|
),
|
|
@@ -266,8 +272,27 @@ function widgetPosted(kind) {
|
|
|
266
272
|
import { createSdkMcpServer as createSdkMcpServer2, tool as tool2 } from "@anthropic-ai/claude-agent-sdk";
|
|
267
273
|
import { z as z2 } from "zod";
|
|
268
274
|
|
|
269
|
-
// src/
|
|
270
|
-
var
|
|
275
|
+
// src/code-lang.ts
|
|
276
|
+
var EXT_TO_LANG = {
|
|
277
|
+
ts: "typescript",
|
|
278
|
+
tsx: "tsx",
|
|
279
|
+
js: "javascript",
|
|
280
|
+
jsx: "jsx",
|
|
281
|
+
json: "json",
|
|
282
|
+
css: "css",
|
|
283
|
+
md: "markdown",
|
|
284
|
+
sh: "bash",
|
|
285
|
+
py: "python",
|
|
286
|
+
yaml: "yaml",
|
|
287
|
+
yml: "yaml",
|
|
288
|
+
html: "markup",
|
|
289
|
+
xml: "markup",
|
|
290
|
+
sql: "sql"
|
|
291
|
+
};
|
|
292
|
+
function langFromPath(path) {
|
|
293
|
+
const ext = path.split(".").pop()?.toLowerCase();
|
|
294
|
+
return ext ? EXT_TO_LANG[ext] : void 0;
|
|
295
|
+
}
|
|
271
296
|
|
|
272
297
|
// src/plan.ts
|
|
273
298
|
var SERVER_NAME2 = "flume_plan";
|
|
@@ -357,7 +382,8 @@ function renderPlan(plan) {
|
|
|
357
382
|
lines2.push("");
|
|
358
383
|
lines2.push(`\`${entry.file}\``);
|
|
359
384
|
lines2.push("");
|
|
360
|
-
|
|
385
|
+
const lang = langFromPath(entry.file);
|
|
386
|
+
lines2.push(lang ? "```" + lang : "```");
|
|
361
387
|
lines2.push(entry.pseudoCode);
|
|
362
388
|
lines2.push("```");
|
|
363
389
|
}
|
|
@@ -446,6 +472,15 @@ var STATUS_ICON = {
|
|
|
446
472
|
not_met: "\u274C",
|
|
447
473
|
unclear: "\u26A0\uFE0F"
|
|
448
474
|
};
|
|
475
|
+
var CICD_STATUS_ICON = {
|
|
476
|
+
passed: "\u2705",
|
|
477
|
+
failed: "\u274C"
|
|
478
|
+
};
|
|
479
|
+
var cicdCheckSchema = z3.object({
|
|
480
|
+
command: z3.string().min(1).describe("The exact verification command run, e.g. `pnpm typecheck`."),
|
|
481
|
+
status: z3.enum(["passed", "failed"]).describe("Whether the command passed or failed."),
|
|
482
|
+
output: z3.string().optional().describe("Short excerpt of failing output; include on failure.")
|
|
483
|
+
});
|
|
449
484
|
var evidenceSchema = z3.object({
|
|
450
485
|
file: z3.string().min(1).describe("Repo-relative path the hunk comes from."),
|
|
451
486
|
hunk: z3.string().min(1).describe(
|
|
@@ -477,6 +512,9 @@ var reportInputSchema = {
|
|
|
477
512
|
),
|
|
478
513
|
conflictResolution: z3.string().optional().describe(
|
|
479
514
|
"Markdown: present ONLY when a merge conflict was actually resolved. Explain, per conflicted file, how ours/theirs were integrated. Rendered under '## Conflict resolution'. Omit entirely when no conflict occurred."
|
|
515
|
+
),
|
|
516
|
+
cicd: z3.array(cicdCheckSchema).optional().describe(
|
|
517
|
+
"Verify-phase build/typecheck/lint/test results. Omit when the repo has no verification setup. Rendered under '## CI/CD'."
|
|
480
518
|
)
|
|
481
519
|
};
|
|
482
520
|
var reportSchema = z3.object(reportInputSchema);
|
|
@@ -504,6 +542,15 @@ function renderReport(report) {
|
|
|
504
542
|
if (report.conflictResolution?.trim()) {
|
|
505
543
|
lines2.push("", "## Conflict resolution", "", report.conflictResolution.trim());
|
|
506
544
|
}
|
|
545
|
+
if (report.cicd && report.cicd.length > 0) {
|
|
546
|
+
lines2.push("", "## CI/CD");
|
|
547
|
+
for (const check of report.cicd) {
|
|
548
|
+
lines2.push("", `- ${CICD_STATUS_ICON[check.status]} \`${check.command}\``);
|
|
549
|
+
if (check.status === "failed" && check.output?.trim()) {
|
|
550
|
+
lines2.push("", "```", check.output.trim(), "```");
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
507
554
|
lines2.push("", "## Code quality", "", report.codeQuality.trim());
|
|
508
555
|
lines2.push("", "## Caveats / follow-ups", "", report.caveats.trim());
|
|
509
556
|
return lines2.join("\n");
|
|
@@ -512,7 +559,7 @@ function createReportTooling() {
|
|
|
512
559
|
let submittedReport = null;
|
|
513
560
|
const submitReport = tool3(
|
|
514
561
|
SUBMIT_REPORT,
|
|
515
|
-
"Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. Do NOT include a PR link \u2014 the runner appends it.",
|
|
562
|
+
"Submit the final implementation report as structured data. Call this exactly once, at the end of the run. `acceptanceCriteria` must contain one entry per plan criterion, each with a met / not_met / unclear verdict and the diff hunk(s) that prove it. `summary`, `filesChanged`, `codeQuality`, and `caveats` are the four named markdown sections. `cicd` (optional) holds Verify-phase check results (one entry per command with `command`, `status` `passed`/`failed`, and `output` on failure); omit when no verification setup exists. Do NOT include a PR link \u2014 the runner appends it.",
|
|
516
563
|
reportInputSchema,
|
|
517
564
|
async (args) => {
|
|
518
565
|
submittedReport = reportSchema.parse(args);
|
|
@@ -729,6 +776,7 @@ function appendRule(lines2, intro, ruleName) {
|
|
|
729
776
|
lines2.push("", intro, "", loadRule(ruleName));
|
|
730
777
|
}
|
|
731
778
|
var WRITING_INTRO = "These technical-writing guidelines apply to the plan and report prose you author in this run:";
|
|
779
|
+
var LANGUAGE_DIRECTIVE = "First, determine the dominant natural language of the incoming thread (the request title/body and the user's messages). Use that one language for EVERYTHING you author this run - your reply body, any plan or report fields, AND every clarifying question and its widget options. Never mix languages: if the thread is in English, your questions and options must be in English too. Keep code identifiers, file paths, and quoted code verbatim.";
|
|
732
780
|
function turnHeading(turn, agentName) {
|
|
733
781
|
if (turn.role === "user") return "User";
|
|
734
782
|
if (turn.failed) return `${agentName} (this run ended in an error)`;
|
|
@@ -752,7 +800,8 @@ function buildPrompt(ctx) {
|
|
|
752
800
|
`The repository ${ctx.repo.fullName} is checked out in your current working directory on branch "${ctx.repo.checkoutBranch}" at commit ${ctx.repo.checkoutSha.slice(0, 7)}.`,
|
|
753
801
|
task,
|
|
754
802
|
orient,
|
|
755
|
-
widgets
|
|
803
|
+
widgets,
|
|
804
|
+
LANGUAGE_DIRECTIVE
|
|
756
805
|
];
|
|
757
806
|
if (ctx.permissionMode !== "plan") {
|
|
758
807
|
lines2.push(
|
|
@@ -784,6 +833,7 @@ function buildRevisePrompt(ctx) {
|
|
|
784
833
|
task,
|
|
785
834
|
orient,
|
|
786
835
|
widgets,
|
|
836
|
+
LANGUAGE_DIRECTIVE,
|
|
787
837
|
"",
|
|
788
838
|
"These coding guidelines apply to all code produced in this run:",
|
|
789
839
|
"",
|
|
@@ -899,6 +949,7 @@ function buildReleasePrompt(ctx, baseChecks) {
|
|
|
899
949
|
task,
|
|
900
950
|
orient,
|
|
901
951
|
widgets,
|
|
952
|
+
LANGUAGE_DIRECTIVE,
|
|
902
953
|
"",
|
|
903
954
|
"These coding guidelines apply to all code produced in this run:",
|
|
904
955
|
"",
|
|
@@ -926,6 +977,14 @@ function buildReleasePrompt(ctx, baseChecks) {
|
|
|
926
977
|
"```"
|
|
927
978
|
);
|
|
928
979
|
}
|
|
980
|
+
if (ctx.prerelease) {
|
|
981
|
+
lines2.push(
|
|
982
|
+
"",
|
|
983
|
+
"# Pre-release",
|
|
984
|
+
"",
|
|
985
|
+
"This is a PRE-RELEASE. When proposing and applying versions, use a semver pre-release version string (e.g. `0.9.0-beta.1`): take the next stable version you would otherwise pick and append `-beta.N`, where N is the next unused beta number for that version (check existing `v<version>-beta.*` tags). Offer these pre-release strings in the version-confirmation widgets, and write them to package.json, CHANGELOG.md, and the `flumecode:versions` comment as usual."
|
|
986
|
+
);
|
|
987
|
+
}
|
|
929
988
|
appendThread(lines2, ctx);
|
|
930
989
|
lines2.push(
|
|
931
990
|
"",
|
package/package.json
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: technical-writing
|
|
3
3
|
description: >-
|
|
4
|
-
Inline-code conventions for agent-authored plan and report
|
|
5
|
-
identifiers in backticks
|
|
4
|
+
Inline-code and output-language conventions for agent-authored plan and report
|
|
5
|
+
prose: wrap code identifiers in backticks, and write prose in the same natural
|
|
6
|
+
language as the user's request.
|
|
6
7
|
---
|
|
7
8
|
|
|
8
9
|
# Technical Writing
|
|
@@ -12,3 +13,12 @@ description: >-
|
|
|
12
13
|
Wrap code identifiers — function names, variable names, type names, file names, commands, and flags — in inline backticks so they render as inline code. For example: `getCodingSessionsForRequest`, not getCodingSessionsForRequest.
|
|
13
14
|
|
|
14
15
|
This convention applies to all free-text fields in plans and reports: goals, step descriptions, acceptance criteria, summaries, code-quality notes, and caveats.
|
|
16
|
+
|
|
17
|
+
## Output language
|
|
18
|
+
|
|
19
|
+
Before writing anything, determine the dominant natural language of the incoming thread (the
|
|
20
|
+
request title/body and the user's messages). Use that one language for all free-text prose in
|
|
21
|
+
this run — your reply body, plan goals/steps/risks, report summaries, clarifying questions,
|
|
22
|
+
widget options, and push-backs. Never switch languages mid-response. Keep code identifiers, file
|
|
23
|
+
paths, commands, and quoted code/diffs verbatim; only the surrounding prose follows the thread
|
|
24
|
+
language.
|
|
@@ -183,6 +183,32 @@ version did not change.
|
|
|
183
183
|
silence them.
|
|
184
184
|
- **Never commit, push, or open a PR** — the runner does that.
|
|
185
185
|
|
|
186
|
+
## Pre-release
|
|
187
|
+
|
|
188
|
+
When the prompt contains a `# Pre-release` section, this release uses semver
|
|
189
|
+
pre-release version strings instead of stable ones:
|
|
190
|
+
|
|
191
|
+
- **Compute versions:** take the next stable version you would otherwise propose
|
|
192
|
+
(patch or minor bump), then append `-beta.N`, where N is the next unused beta
|
|
193
|
+
number for that base version. Check existing tags with:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
git tag -l --sort=-version:refname 'v<version>-beta.*' | head -1
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
If no beta tags exist for that base version, start at `-beta.1`.
|
|
200
|
+
|
|
201
|
+
- **Phase 1 (propose):** offer the pre-release version string (e.g.
|
|
202
|
+
`0.9.0-beta.1`) in the version-confirmation widgets instead of the stable
|
|
203
|
+
version.
|
|
204
|
+
|
|
205
|
+
- **Phase 2 (apply):** write the pre-release version string (e.g.
|
|
206
|
+
`0.9.0-beta.1`) to `package.json`, `CHANGELOG.md`, and the
|
|
207
|
+
`<!-- flumecode:versions {...} -->` comment — exactly as you would for a
|
|
208
|
+
stable release, just with the pre-release suffix included.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
186
212
|
## Pre-release checks
|
|
187
213
|
|
|
188
214
|
We cannot release code with failing checks. Before this turn, the runner ran the
|
|
@@ -88,11 +88,26 @@ contain, in this order:
|
|
|
88
88
|
|
|
89
89
|
### Every page: front-load an "At a glance" block
|
|
90
90
|
|
|
91
|
+
Before the "At a glance" block on **every** page (component pages, README,
|
|
92
|
+
architecture, glossary), place a TL;DR blockquote immediately after the H1:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
> **TL;DR** — one plain-language sentence on what this page covers.
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Then a blank line, then the existing `> **Purpose**` / "At a glance" block
|
|
99
|
+
(where applicable). For `README.md`, place the TL;DR after the
|
|
100
|
+
`<!-- wiki-synced-to -->` marker and H1. The blank line between the TL;DR and
|
|
101
|
+
the next blockquote is required — without it, markdown merges the two
|
|
102
|
+
blockquotes into one. This rule applies in both Bootstrap and Update modes.
|
|
103
|
+
|
|
91
104
|
So an agent can grab context in seconds, begin each component page with:
|
|
92
105
|
|
|
93
106
|
```
|
|
94
107
|
# <component>
|
|
95
108
|
|
|
109
|
+
> **TL;DR** — one plain-language sentence on what this page covers.
|
|
110
|
+
|
|
96
111
|
> **Purpose** — one or two sentences.
|
|
97
112
|
> **Key files** — `path/a.ts`, `path/b.ts` (the entry points worth opening).
|
|
98
113
|
> **Depends on** — what it relies on. **Used by** — what relies on it.
|
|
@@ -138,7 +138,11 @@ the next step.
|
|
|
138
138
|
7. **Report** — Task, `model: "opus"`, read-only. Give the subagent the AC
|
|
139
139
|
verdicts (with criterion text, from step 4), the Verify results (from step 3),
|
|
140
140
|
and the quality findings, and tell it to run `git --no-pager diff` itself as
|
|
141
|
-
the **single source of truth** for the report.
|
|
141
|
+
the **single source of truth** for the report. Pass the Verify results as the
|
|
142
|
+
`cicd` field — one entry per check with `command`, `status` (`passed`/`failed`),
|
|
143
|
+
and (on failure) a short `output` excerpt. Omit `cicd` when no verification
|
|
144
|
+
setup exists. A failing check does NOT block the report — include the failing
|
|
145
|
+
entry and continue. Do not pass the full plan — the
|
|
142
146
|
AC verdicts carry each criterion verbatim, and the live `git --no-pager diff`
|
|
143
147
|
is the authoritative source for evidence; re-inlining the full plan is
|
|
144
148
|
redundant. Keep each subagent prompt to the minimal self-contained slice it
|
|
@@ -181,6 +185,7 @@ The report subagent calls `submit_report` with these fields:
|
|
|
181
185
|
verbatim from the live `git --no-pager diff`, including each hunk's `@@ -a,b +c,d @@` header line(s) (do not strip them — the report renders file line numbers from them), and proves the verdict (`note`
|
|
182
186
|
optionally explains it). Never include a hunk that isn't in the actual diff. Cite
|
|
183
187
|
the supporting hunk(s) for a met criterion; may be empty for not_met / unclear.
|
|
188
|
+
- **`cicd`** (optional) — array of Verify-phase check results. Each entry: `command` (exact command run), `status` (`"passed"` / `"failed"`), `output` (short failing-output excerpt, on failure only). Omit when the repo has no verification setup. Rendered under `## CI/CD`. A failing check does not block the report.
|
|
184
189
|
|
|
185
190
|
## Always
|
|
186
191
|
|
|
@@ -108,6 +108,13 @@ own independently-acceptable "Accept as plan" draft. After a plan is accepted th
|
|
|
108
108
|
keep commenting to refine it; treat a later turn as a fresh **Plan** phase and call
|
|
109
109
|
`submit_plan` again with a `plans[]` array containing the revised fields.
|
|
110
110
|
|
|
111
|
+
Before adding an entry to `plans[]`, apply this right-sizing checklist — if a plan fails any criterion, split it into separate entries:
|
|
112
|
+
|
|
113
|
+
- **Single, clear outcome** — one bug fixed, one feature increment, one refactor. If the `title` needs "and", consider splitting.
|
|
114
|
+
- **Fits in a sprint comfortably** — if it can't fit in one iteration, it's likely an epic that needs breaking down.
|
|
115
|
+
- **Reviewable PR** — small enough that a reviewer can hold it in their head (often cited as under ~200–400 lines of diff, though this varies).
|
|
116
|
+
- **Testable acceptance criteria** — you can state up front what "done" looks like; use the `acceptanceCriteria` field to capture this.
|
|
117
|
+
|
|
111
118
|
## Always
|
|
112
119
|
|
|
113
120
|
- Stay read-only. Propose; do not edit.
|
|
@@ -104,5 +104,6 @@ Call **`submit_report`** with the structured report. Fields:
|
|
|
104
104
|
file, explaining which side you kept and why (or how you merged both intents). Wrap file names
|
|
105
105
|
and code identifiers in inline backticks. This is what the user reads to understand how each
|
|
106
106
|
conflict was integrated.
|
|
107
|
+
- `cicd` (optional): array of Verify-phase check results from Step 3, each with `command`, `status` (`"passed"`/`"failed"`), and `output` on failure. Omit when no build/test setup exists.
|
|
107
108
|
|
|
108
109
|
The runner renders the report and appends the pull-request link — do not add one yourself.
|
|
@@ -84,6 +84,8 @@ user:
|
|
|
84
84
|
`implement-plan` does. Include one `acceptanceCriteria` entry per plan AC (with a
|
|
85
85
|
met / not_met / unclear verdict and the diff hunk(s) that prove it), plus the four
|
|
86
86
|
required markdown sections (`summary`, `filesChanged`, `codeQuality`, `caveats`).
|
|
87
|
+
Include `cicd` from the Verify results (one entry per check, same shape as
|
|
88
|
+
`implement-plan`; omit when no verification setup).
|
|
87
89
|
Base `filesChanged` and evidence on the actual `git --no-pager diff`, not on what
|
|
88
90
|
a subagent claimed; if the diff is empty, say nothing was changed. The runner
|
|
89
91
|
renders the report and appends the pull-request link — do not add one yourself.
|