agent-state-machine 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +78 -2
- package/lib/remote/client.js +37 -8
- package/lib/runtime/agent.js +6 -2
- package/lib/runtime/interaction.js +2 -1
- package/lib/runtime/prompt.js +37 -1
- package/lib/runtime/runtime.js +67 -5
- package/package.json +1 -1
- package/templates/project-builder/README.md +304 -56
- package/templates/project-builder/agents/code-fixer.md +50 -0
- package/templates/project-builder/agents/code-writer.md +3 -0
- package/templates/project-builder/agents/sanity-checker.md +6 -0
- package/templates/project-builder/agents/sanity-runner.js +3 -1
- package/templates/project-builder/agents/test-planner.md +3 -1
- package/templates/project-builder/config.js +4 -4
- package/templates/project-builder/scripts/workflow-helpers.js +104 -2
- package/templates/project-builder/workflow.js +151 -14
- package/templates/starter/README.md +291 -42
- package/templates/starter/config.js +1 -1
- package/vercel-server/api/submit/[token].js +2 -13
- package/vercel-server/api/ws/cli.js +40 -2
- package/vercel-server/local-server.js +32 -22
- package/vercel-server/public/remote/assets/index-BsJsLDKc.css +1 -0
- package/vercel-server/public/remote/assets/index-CmtT6ADh.js +168 -0
- package/vercel-server/public/remote/index.html +2 -2
- package/vercel-server/ui/src/App.jsx +69 -62
- package/vercel-server/ui/src/components/ChoiceInteraction.jsx +69 -18
- package/vercel-server/ui/src/components/ConfirmInteraction.jsx +7 -7
- package/vercel-server/ui/src/components/ContentCard.jsx +600 -104
- package/vercel-server/ui/src/components/EventsLog.jsx +20 -13
- package/vercel-server/ui/src/components/Footer.jsx +9 -4
- package/vercel-server/ui/src/components/Header.jsx +12 -3
- package/vercel-server/ui/src/components/SendingCard.jsx +33 -0
- package/vercel-server/ui/src/components/TextInteraction.jsx +8 -8
- package/vercel-server/ui/src/index.css +82 -10
- package/vercel-server/public/remote/assets/index-BOKpYANC.js +0 -148
- package/vercel-server/public/remote/assets/index-DHL_iHQW.css +0 -1
|
@@ -1,60 +1,97 @@
|
|
|
1
|
-
#
|
|
1
|
+
# agent-state-machine
|
|
2
2
|
|
|
3
|
-
A workflow
|
|
3
|
+
A workflow runner for building **linear, stateful agent workflows** in plain JavaScript.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
You write normal `async/await` code. The runtime handles:
|
|
6
|
+
- **Auto-persisted** `memory` (saved to disk on mutation)
|
|
7
|
+
- **Auto-tracked** `fileTree` (detects file changes made by agents via Git)
|
|
8
|
+
- **Human-in-the-loop** blocking via `askHuman()` or agent-driven interactions
|
|
9
|
+
- Local **JS agents** + **Markdown agents** (LLM-powered)
|
|
10
|
+
- **Agent retries** with history logging for failures
|
|
6
11
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
├── agents/ # Custom agents (.js/.mjs/.cjs or .md)
|
|
13
|
-
├── interactions/ # Human-in-the-loop inputs (created at runtime)
|
|
14
|
-
├── state/ # Runtime state (current.json, history.jsonl)
|
|
15
|
-
└── steering/ # Steering configuration
|
|
16
|
-
\`\`\`
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
You need to install the package **globally** to get the CLI, and **locally** in your project so your workflow can import the library.
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
### Global CLI
|
|
19
|
+
Provides the `state-machine` command.
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
```bash
|
|
22
|
+
# npm
|
|
23
|
+
npm i -g agent-state-machine
|
|
21
24
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
\`\`\`
|
|
25
|
+
# pnpm
|
|
26
|
+
pnpm add -g agent-state-machine
|
|
27
|
+
```
|
|
26
28
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
state-machine status project-builder
|
|
30
|
-
\`\`\`
|
|
29
|
+
### Local Library
|
|
30
|
+
Required so your `workflow.js` can `import { agent, memory, fileTree } from 'agent-state-machine'`.
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
state-machine
|
|
35
|
-
\`\`\`
|
|
32
|
+
```bash
|
|
33
|
+
# npm
|
|
34
|
+
npm i agent-state-machine
|
|
36
35
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
# pnpm (for monorepos/turbo, install in root)
|
|
37
|
+
pnpm add agent-state-machine -w
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Requirements: Node.js >= 16.
|
|
41
|
+
|
|
42
|
+
---
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
\`\`\`bash
|
|
44
|
-
state-machine reset project-builder
|
|
45
|
-
\`\`\`
|
|
44
|
+
## CLI
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
state-machine
|
|
50
|
-
|
|
46
|
+
```bash
|
|
47
|
+
state-machine --setup <workflow-name>
|
|
48
|
+
state-machine --setup <workflow-name> --template <template-name>
|
|
49
|
+
state-machine run <workflow-name>
|
|
50
|
+
state-machine run <workflow-name> -reset
|
|
51
|
+
state-machine run <workflow-name> -reset-hard
|
|
51
52
|
|
|
52
|
-
|
|
53
|
+
state-machine -reset <workflow-name>
|
|
54
|
+
state-machine -reset-hard <workflow-name>
|
|
53
55
|
|
|
54
|
-
|
|
56
|
+
state-machine history <workflow-name> [limit]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Templates live in `templates/` and `starter` is used by default.
|
|
60
|
+
|
|
61
|
+
Workflows live in:
|
|
62
|
+
|
|
63
|
+
```text
|
|
64
|
+
workflows/<name>/
|
|
65
|
+
├── workflow.js # Native JS workflow (async/await)
|
|
66
|
+
├── config.js # Model/API key configuration
|
|
67
|
+
├── package.json # Sets "type": "module" for this workflow folder
|
|
68
|
+
├── agents/ # Custom agents (.js/.mjs/.cjs or .md)
|
|
69
|
+
├── interactions/ # Human-in-the-loop files (auto-created)
|
|
70
|
+
├── state/ # current.json, history.jsonl
|
|
71
|
+
└── steering/ # global.md + config.json
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Writing workflows (native JS)
|
|
77
|
+
|
|
78
|
+
Edit `config.js` to set models and API keys for the workflow.
|
|
79
|
+
|
|
80
|
+
```js
|
|
81
|
+
/**
|
|
82
|
+
/**
|
|
83
|
+
* project-builder Workflow
|
|
84
|
+
*
|
|
85
|
+
* Native JavaScript workflow - write normal async/await code!
|
|
86
|
+
*
|
|
87
|
+
* Features:
|
|
88
|
+
* - memory object auto-persists to disk (use memory guards for idempotency)
|
|
89
|
+
* - Use standard JS control flow (if, for, etc.)
|
|
90
|
+
* - Interactive prompts pause and wait for user input
|
|
91
|
+
*/
|
|
55
92
|
|
|
56
|
-
\`\`\`js
|
|
57
93
|
import { agent, memory, askHuman, parallel } from 'agent-state-machine';
|
|
94
|
+
import { notify } from './scripts/mac-notification.js';
|
|
58
95
|
|
|
59
96
|
export default async function() {
|
|
60
97
|
console.log('Starting project-builder workflow...');
|
|
@@ -71,8 +108,8 @@ export default async function() {
|
|
|
71
108
|
|
|
72
109
|
console.log('Example agent memory.userInfo:', memory.userInfo || userInfo);
|
|
73
110
|
|
|
74
|
-
// Context is
|
|
75
|
-
const { greeting } = await agent('yoda-greeter', { userLocation });
|
|
111
|
+
// Context is explicit: pass what the agent needs
|
|
112
|
+
const { greeting } = await agent('yoda-greeter', { userLocation, memory });
|
|
76
113
|
console.log('Example agent greeting:', greeting);
|
|
77
114
|
|
|
78
115
|
// Or you can provide context manually
|
|
@@ -93,27 +130,238 @@ export default async function() {
|
|
|
93
130
|
|
|
94
131
|
console.log('Workflow completed!');
|
|
95
132
|
}
|
|
96
|
-
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Resuming workflows
|
|
136
|
+
|
|
137
|
+
`state-machine run` restarts your workflow from the top, loading the persisted state.
|
|
138
|
+
|
|
139
|
+
If the workflow needs human input, it will **block inline** in the terminal. You can answer in the terminal, edit `interactions/<slug>.md`, or respond in the browser.
|
|
140
|
+
|
|
141
|
+
If the process is interrupted, running `state-machine run <workflow-name>` again will continue execution (assuming your workflow uses `memory` to skip completed steps).
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Core API
|
|
146
|
+
|
|
147
|
+
### `agent(name, params?, options?)`
|
|
148
|
+
|
|
149
|
+
Runs `workflows/<name>/agents/<agent>.(js|mjs|cjs)` or `<agent>.md`.
|
|
150
|
+
|
|
151
|
+
```js
|
|
152
|
+
const out = await agent('review', { file: 'src/app.js' });
|
|
153
|
+
memory.lastReview = out;
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Options:
|
|
157
|
+
- `retry` (number | false): default `2` (3 total attempts). Use `false` to disable retries.
|
|
158
|
+
- `steering` (string | string[]): extra steering files to load from `workflows/<name>/steering/`.
|
|
159
|
+
|
|
160
|
+
Context is explicit: only `params` are provided to agents unless you pass additional data.
|
|
161
|
+
|
|
162
|
+
### `memory`
|
|
163
|
+
|
|
164
|
+
A persisted object for your workflow.
|
|
165
|
+
|
|
166
|
+
- Mutations auto-save to `workflows/<name>/state/current.json`.
|
|
167
|
+
- Use it as your "long-lived state" between runs.
|
|
168
|
+
|
|
169
|
+
```js
|
|
170
|
+
memory.count = (memory.count || 0) + 1;
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### `fileTree`
|
|
174
|
+
|
|
175
|
+
Auto-tracked file changes made by agents.
|
|
176
|
+
|
|
177
|
+
- Before each `await agent(...)`, the runtime captures a Git baseline
|
|
178
|
+
- After the agent completes, it detects created/modified/deleted files
|
|
179
|
+
- Changes are stored in `memory.fileTree` and persisted to `current.json`
|
|
180
|
+
|
|
181
|
+
```js
|
|
182
|
+
// Files are auto-tracked when agents create them
|
|
183
|
+
await agent('code-writer', { task: 'Create auth module' });
|
|
184
|
+
|
|
185
|
+
// Access tracked files
|
|
186
|
+
console.log(memory.fileTree);
|
|
187
|
+
// { "src/auth.js": { status: "created", createdBy: "code-writer", ... } }
|
|
188
|
+
|
|
189
|
+
// Pass file context to other agents
|
|
190
|
+
await agent('code-reviewer', { fileTree: memory.fileTree });
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Configuration in `config.js`:
|
|
194
|
+
|
|
195
|
+
```js
|
|
196
|
+
export const config = {
|
|
197
|
+
// ... models and apiKeys ...
|
|
198
|
+
projectRoot: process.env.PROJECT_ROOT, // defaults to ../.. from workflow
|
|
199
|
+
fileTracking: true, // enable/disable (default: true)
|
|
200
|
+
fileTrackingIgnore: ['node_modules/**', '.git/**', 'dist/**'],
|
|
201
|
+
fileTrackingKeepDeleted: false // keep deleted files in tree
|
|
202
|
+
};
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### `trackFile(path, options?)` / `untrackFile(path)`
|
|
206
|
+
|
|
207
|
+
Manual file tracking utilities:
|
|
208
|
+
|
|
209
|
+
```js
|
|
210
|
+
import { trackFile, getFileTree, untrackFile } from 'agent-state-machine';
|
|
211
|
+
|
|
212
|
+
trackFile('README.md', { caption: 'Project docs' });
|
|
213
|
+
const tree = getFileTree();
|
|
214
|
+
untrackFile('old-file.js');
|
|
215
|
+
```
|
|
97
216
|
|
|
98
|
-
|
|
217
|
+
### `askHuman(question, options?)`
|
|
99
218
|
|
|
100
|
-
|
|
219
|
+
Gets user input.
|
|
101
220
|
|
|
102
|
-
|
|
221
|
+
- In a TTY, it prompts in the terminal (or via the browser when remote follow is enabled).
|
|
222
|
+
- Otherwise it creates `interactions/<slug>.md` and blocks until you confirm in the terminal (or respond in the browser).
|
|
223
|
+
|
|
224
|
+
```js
|
|
225
|
+
const repo = await askHuman('What repo should I work on?', { slug: 'repo' });
|
|
226
|
+
memory.repo = repo;
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### `parallel([...])` / `parallelLimit([...], limit)`
|
|
230
|
+
|
|
231
|
+
Run multiple `agent()` calls concurrently:
|
|
232
|
+
|
|
233
|
+
```js
|
|
234
|
+
import { agent, parallel, parallelLimit } from 'agent-state-machine';
|
|
235
|
+
|
|
236
|
+
const [a, b] = await parallel([
|
|
237
|
+
agent('review', { file: 'src/a.js' }),
|
|
238
|
+
agent('review', { file: 'src/b.js' }),
|
|
239
|
+
]);
|
|
240
|
+
|
|
241
|
+
const results = await parallelLimit(
|
|
242
|
+
['a.js', 'b.js', 'c.js'].map(f => agent('review', { file: f })),
|
|
243
|
+
2
|
|
244
|
+
);
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Agents
|
|
250
|
+
|
|
251
|
+
Agents live in `workflows/<workflow>/agents/`.
|
|
252
|
+
|
|
253
|
+
### JavaScript agents
|
|
254
|
+
|
|
255
|
+
**ESM (`.js` / `.mjs`)**:
|
|
256
|
+
|
|
257
|
+
```js
|
|
258
|
+
// workflows/<name>/agents/example.js
|
|
103
259
|
import { llm } from 'agent-state-machine';
|
|
104
260
|
|
|
105
261
|
export default async function handler(context) {
|
|
106
|
-
|
|
107
|
-
|
|
262
|
+
// context includes:
|
|
263
|
+
// - params passed to agent(name, params)
|
|
264
|
+
// - context._steering (global + optional additional steering content)
|
|
265
|
+
// - context._config (models/apiKeys/workflowDir/projectRoot)
|
|
266
|
+
|
|
267
|
+
// Optionally return _files to annotate tracked files
|
|
268
|
+
return {
|
|
269
|
+
ok: true,
|
|
270
|
+
_files: [{ path: 'src/example.js', caption: 'Example module' }]
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
**CommonJS (`.cjs`)** (only if you prefer CJS):
|
|
276
|
+
|
|
277
|
+
```js
|
|
278
|
+
// workflows/<name>/agents/example.cjs
|
|
279
|
+
async function handler(context) {
|
|
280
|
+
return { ok: true };
|
|
108
281
|
}
|
|
109
|
-
\`\`\`
|
|
110
282
|
|
|
111
|
-
|
|
283
|
+
module.exports = handler;
|
|
284
|
+
module.exports.handler = handler;
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
If you need to request human input from a JS agent, return an `_interaction` payload:
|
|
288
|
+
|
|
289
|
+
```js
|
|
290
|
+
return {
|
|
291
|
+
_interaction: {
|
|
292
|
+
slug: 'approval',
|
|
293
|
+
targetKey: 'approval',
|
|
294
|
+
content: 'Please approve this change (yes/no).'
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
The runtime will block execution and wait for your response in the terminal.
|
|
300
|
+
|
|
301
|
+
### Markdown agents (`.md`)
|
|
112
302
|
|
|
113
|
-
|
|
303
|
+
Markdown agents are LLM-backed prompt templates with optional frontmatter.
|
|
304
|
+
Frontmatter can include `steering` to load additional files from `workflows/<name>/steering/`.
|
|
305
|
+
|
|
306
|
+
```md
|
|
114
307
|
---
|
|
115
|
-
model:
|
|
308
|
+
model: smart
|
|
116
309
|
output: greeting
|
|
310
|
+
steering: tone, product
|
|
311
|
+
---
|
|
312
|
+
Generate a friendly greeting for {{name}}.
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
Calling it:
|
|
316
|
+
|
|
317
|
+
```js
|
|
318
|
+
const { greeting } = await agent('greeter', { name: 'Sam' });
|
|
319
|
+
memory.greeting = greeting;
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Models & LLM execution
|
|
325
|
+
|
|
326
|
+
In your workflow’s `export const config = { models: { ... } }`, each model value can be:
|
|
327
|
+
|
|
328
|
+
### CLI command
|
|
329
|
+
|
|
330
|
+
```js
|
|
331
|
+
export const config = {
|
|
332
|
+
models: {
|
|
333
|
+
smart: "claude -m claude-sonnet-4-20250514 -p"
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### API target
|
|
339
|
+
|
|
340
|
+
Format: `api:<provider>:<model>`
|
|
341
|
+
|
|
342
|
+
```js
|
|
343
|
+
export const config = {
|
|
344
|
+
models: {
|
|
345
|
+
smart: "api:openai:gpt-4.1-mini"
|
|
346
|
+
},
|
|
347
|
+
apiKeys: {
|
|
348
|
+
openai: process.env.OPENAI_API_KEY
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
The runtime captures the fully-built prompt in `state/history.jsonl`, viewable in the browser with live updates when running with the `--local` flag or via the remote URL. Remote follow links persist across runs (stored in `config.js`) unless you pass `-n`/`--new` to regenerate.
|
|
354
|
+
|
|
117
355
|
---
|
|
118
|
-
|
|
119
|
-
|
|
356
|
+
|
|
357
|
+
## State & persistence
|
|
358
|
+
|
|
359
|
+
Native JS workflows persist to:
|
|
360
|
+
|
|
361
|
+
- `workflows/<name>/state/current.json` — status, memory (includes fileTree), pending interaction
|
|
362
|
+
- `workflows/<name>/state/history.jsonl` — event log (newest entries first, includes agent retry/failure entries)
|
|
363
|
+
- `workflows/<name>/interactions/*.md` — human input files (when paused)
|
|
364
|
+
|
|
365
|
+
## License
|
|
366
|
+
|
|
367
|
+
MIT
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
model: high
|
|
3
|
+
format: json
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Code Fixer Agent
|
|
7
|
+
|
|
8
|
+
You fix specific issues in existing code based on sanity check failures.
|
|
9
|
+
|
|
10
|
+
## Critical Guidelines
|
|
11
|
+
|
|
12
|
+
**DO NOT** disable, skip, or remove failing tests to make them pass.
|
|
13
|
+
Your fixes must address the actual underlying code issues that cause tests to fail.
|
|
14
|
+
|
|
15
|
+
- ❌ Never add `.skip()`, `.todo()`, or comment out tests
|
|
16
|
+
- ❌ Never modify test expectations to match broken behavior
|
|
17
|
+
- ❌ Never delete test files or test cases
|
|
18
|
+
- ❌ Never wrap tests in `try/catch` to swallow errors
|
|
19
|
+
- ✅ Fix the implementation code to pass existing tests
|
|
20
|
+
- ✅ Fix test setup/teardown issues if the tests themselves are misconfigured
|
|
21
|
+
- ✅ Update tests ONLY if the original requirements were misunderstood
|
|
22
|
+
|
|
23
|
+
If the issue truly cannot be fixed within the current architecture, set `"confidence": "low"` and explain why in the analysis.
|
|
24
|
+
|
|
25
|
+
## Input
|
|
26
|
+
- task: Task definition
|
|
27
|
+
- originalImplementation: Current code-writer output
|
|
28
|
+
- sanityCheckResults: Failed checks with specific errors
|
|
29
|
+
- testPlan: Test plan for context
|
|
30
|
+
- previousAttempts: Number of quick-fix attempts so far
|
|
31
|
+
|
|
32
|
+
## Output Format
|
|
33
|
+
|
|
34
|
+
{
|
|
35
|
+
"analysis": {
|
|
36
|
+
"rootCauses": ["What caused each failure"],
|
|
37
|
+
"fixApproach": "Strategy for fixing"
|
|
38
|
+
},
|
|
39
|
+
"fixes": [
|
|
40
|
+
{
|
|
41
|
+
"path": "src/feature.js",
|
|
42
|
+
"operation": "replace",
|
|
43
|
+
"code": "// Full corrected file content"
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
"expectedResolutions": ["Which checks should now pass"],
|
|
47
|
+
"confidence": "high|medium|low"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
Focus on minimal, targeted fixes. Don't rewrite entire files unless necessary.
|
|
@@ -27,6 +27,9 @@ Implement the task following these principles:
|
|
|
27
27
|
- Implement to satisfy the test plan
|
|
28
28
|
- Ensure all test cases can pass
|
|
29
29
|
- Consider edge cases identified in testing
|
|
30
|
+
- Write runnable test files, not just descriptions
|
|
31
|
+
- Use appropriate test locations (e.g. *.test.js, *.spec.js, __tests__/)
|
|
32
|
+
- Tests must import and exercise real implementation functions
|
|
30
33
|
|
|
31
34
|
## Output Format
|
|
32
35
|
|
|
@@ -9,6 +9,7 @@ Input:
|
|
|
9
9
|
- task: { title, description, doneDefinition, sanityCheck }
|
|
10
10
|
- implementation: code-writer output
|
|
11
11
|
- testPlan: test-planner output
|
|
12
|
+
- testFramework: { framework, command }
|
|
12
13
|
|
|
13
14
|
Return JSON only in this shape:
|
|
14
15
|
{
|
|
@@ -34,6 +35,8 @@ Guidelines:
|
|
|
34
35
|
- If the task describes a server endpoint, include a curl check.
|
|
35
36
|
- Keep checks short, clear, and runnable.
|
|
36
37
|
- Include at least one file_exists or file_contains check when files are created/modified.
|
|
38
|
+
- If tests exist (from testPlan or implementation), include a type "test_suite" check.
|
|
39
|
+
- Use testFramework.command for running tests (optionally target specific files when possible).
|
|
37
40
|
|
|
38
41
|
Task:
|
|
39
42
|
{{task}}
|
|
@@ -43,3 +46,6 @@ Implementation:
|
|
|
43
46
|
|
|
44
47
|
Test Plan:
|
|
45
48
|
{{testPlan}}
|
|
49
|
+
|
|
50
|
+
Test Framework:
|
|
51
|
+
{{testFramework}}
|
|
@@ -6,7 +6,9 @@ const DEFAULT_TIMEOUT_MS = 30000;
|
|
|
6
6
|
|
|
7
7
|
export default async function sanityRunner(context) {
|
|
8
8
|
const { checks = [], setup, teardown } = context;
|
|
9
|
-
const
|
|
9
|
+
const workflowDir = context?._config?.workflowDir || process.cwd();
|
|
10
|
+
const projectRoot = context?._config?.projectRoot || workflowDir;
|
|
11
|
+
const cwd = projectRoot;
|
|
10
12
|
const results = [];
|
|
11
13
|
|
|
12
14
|
let setupError = null;
|
|
@@ -22,6 +22,7 @@ Create a comprehensive test plan for the task. Include:
|
|
|
22
22
|
- Cover happy path and error cases
|
|
23
23
|
- Include tests for security concerns flagged in review
|
|
24
24
|
- Prioritize tests by risk and importance
|
|
25
|
+
- Provide expected test file paths for planned tests
|
|
25
26
|
|
|
26
27
|
## Output Format
|
|
27
28
|
|
|
@@ -59,7 +60,8 @@ Return a valid JSON object:
|
|
|
59
60
|
"scenario": "Empty input handling",
|
|
60
61
|
"expectedBehavior": "Return validation error"
|
|
61
62
|
}
|
|
62
|
-
]
|
|
63
|
+
],
|
|
64
|
+
"testFilePaths": ["src/feature.test.js"]
|
|
63
65
|
},
|
|
64
66
|
"testingNotes": "Any special considerations or setup needed"
|
|
65
67
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export const config = {
|
|
2
2
|
models: {
|
|
3
|
-
fast: "gemini",
|
|
4
|
-
low: "gemini",
|
|
5
|
-
med: "gemini",
|
|
6
|
-
high: "gemini",
|
|
3
|
+
fast: "gemini -m gemini-2.5-flash-lite",
|
|
4
|
+
low: "gemini -m gemini-2.5-flash-lite",
|
|
5
|
+
med: "gemini -m gemini-2.5-flash-lite",
|
|
6
|
+
high: "gemini -m gemini-2.5-flash-lite",
|
|
7
7
|
},
|
|
8
8
|
apiKeys: {
|
|
9
9
|
gemini: process.env.GEMINI_API_KEY,
|
|
@@ -1,6 +1,39 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
-
import { memory } from 'agent-state-machine';
|
|
3
|
+
import { memory, getCurrentRuntime } from 'agent-state-machine';
|
|
4
|
+
|
|
5
|
+
// Write implementation files from code-writer agent output
|
|
6
|
+
function writeImplementationFiles(implementation) {
|
|
7
|
+
const runtime = getCurrentRuntime();
|
|
8
|
+
if (!runtime) {
|
|
9
|
+
throw new Error('writeImplementationFiles must be called within a workflow context');
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const projectRoot = runtime.workflowConfig.projectRoot;
|
|
13
|
+
const files = implementation?.implementation?.files || implementation?.files || [];
|
|
14
|
+
const written = [];
|
|
15
|
+
|
|
16
|
+
for (const file of files) {
|
|
17
|
+
if (!file.path || !file.code) {
|
|
18
|
+
console.warn(` [File] Skipping invalid file entry: ${JSON.stringify(file)}`);
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const fullPath = path.resolve(projectRoot, file.path);
|
|
23
|
+
|
|
24
|
+
// Ensure directory exists
|
|
25
|
+
const dir = path.dirname(fullPath);
|
|
26
|
+
if (!fs.existsSync(dir)) {
|
|
27
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
fs.writeFileSync(fullPath, file.code);
|
|
31
|
+
written.push(file.path);
|
|
32
|
+
console.log(` [File] Created: ${file.path}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return written;
|
|
36
|
+
}
|
|
4
37
|
|
|
5
38
|
// Write markdown file to workflow state directory
|
|
6
39
|
function writeMarkdownFile(stateDir, filename, content) {
|
|
@@ -109,8 +142,72 @@ function setTaskData(phaseIndex, taskId, dataKey, value) {
|
|
|
109
142
|
memory[key] = value;
|
|
110
143
|
}
|
|
111
144
|
|
|
145
|
+
function clearPartialTaskData(phaseIndex, taskId, keepKeys = []) {
|
|
146
|
+
const allKeys = [
|
|
147
|
+
'security_pre',
|
|
148
|
+
'tests',
|
|
149
|
+
'code',
|
|
150
|
+
'review',
|
|
151
|
+
'security_post',
|
|
152
|
+
'sanity_checks',
|
|
153
|
+
'sanity_results'
|
|
154
|
+
];
|
|
155
|
+
for (const key of allKeys) {
|
|
156
|
+
if (!keepKeys.includes(key)) {
|
|
157
|
+
setTaskData(phaseIndex, taskId, key, null);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function getQuickFixAttempts(phaseIndex, taskId) {
|
|
163
|
+
return getTaskData(phaseIndex, taskId, 'quick_fix_attempts') || 0;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function incrementQuickFixAttempts(phaseIndex, taskId) {
|
|
167
|
+
const current = getQuickFixAttempts(phaseIndex, taskId);
|
|
168
|
+
setTaskData(phaseIndex, taskId, 'quick_fix_attempts', current + 1);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function resetQuickFixAttempts(phaseIndex, taskId) {
|
|
172
|
+
setTaskData(phaseIndex, taskId, 'quick_fix_attempts', 0);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function detectTestFramework() {
|
|
176
|
+
const runtime = getCurrentRuntime();
|
|
177
|
+
const projectRoot = runtime?.workflowConfig?.projectRoot || process.cwd();
|
|
178
|
+
const pkgPath = path.join(projectRoot, 'package.json');
|
|
179
|
+
|
|
180
|
+
if (!fs.existsSync(pkgPath)) {
|
|
181
|
+
return { framework: 'vitest', command: 'npx vitest run', isDefault: true };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let pkg;
|
|
185
|
+
try {
|
|
186
|
+
pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
187
|
+
} catch (error) {
|
|
188
|
+
console.warn(` [Test] Failed to parse package.json: ${error.message}`);
|
|
189
|
+
return { framework: 'vitest', command: 'npx vitest run', isDefault: true };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const deps = { ...pkg.dependencies, ...pkg.devDependencies };
|
|
193
|
+
const testScript = pkg.scripts?.test || '';
|
|
194
|
+
|
|
195
|
+
if (testScript.includes('vitest') || deps.vitest) {
|
|
196
|
+
return { framework: 'vitest', command: 'npm test' };
|
|
197
|
+
}
|
|
198
|
+
if (testScript.includes('jest') || deps.jest) {
|
|
199
|
+
return { framework: 'jest', command: 'npm test' };
|
|
200
|
+
}
|
|
201
|
+
if (testScript.includes('mocha') || deps.mocha) {
|
|
202
|
+
return { framework: 'mocha', command: 'npm test' };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return { framework: 'vitest', command: 'npx vitest run', isDefault: true };
|
|
206
|
+
}
|
|
207
|
+
|
|
112
208
|
export {
|
|
113
209
|
writeMarkdownFile,
|
|
210
|
+
writeImplementationFiles,
|
|
114
211
|
isApproval,
|
|
115
212
|
renderRoadmapMarkdown,
|
|
116
213
|
renderTasksMarkdown,
|
|
@@ -118,5 +215,10 @@ export {
|
|
|
118
215
|
getTaskStage,
|
|
119
216
|
setTaskStage,
|
|
120
217
|
getTaskData,
|
|
121
|
-
setTaskData
|
|
218
|
+
setTaskData,
|
|
219
|
+
clearPartialTaskData,
|
|
220
|
+
getQuickFixAttempts,
|
|
221
|
+
incrementQuickFixAttempts,
|
|
222
|
+
resetQuickFixAttempts,
|
|
223
|
+
detectTestFramework
|
|
122
224
|
};
|