codemini-cli 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/deployment.md +5 -5
- package/package.json +1 -2
- package/skills/grill-me/SKILL.md +30 -0
- package/skills/project-requirements/SKILL.md +245 -0
- package/skills/superpowers-lite/SKILL.md +5 -1
- package/src/commands/run.js +5 -4
- package/src/core/agent-loop.js +8 -8
- package/src/core/chat-runtime.js +220 -31
- package/src/core/config-store.js +6 -3
- package/src/core/fff-adapter.js +1 -1
- package/src/core/provider/anthropic.js +2 -2
- package/src/core/provider/openai-compatible.js +2 -2
- package/src/core/shell.js +1 -1
- package/src/core/tools.js +116 -39
- package/src/tui/chat-app.js +52 -22
- package/src/tui/tool-activity/presenters/system.js +6 -0
package/README.md
CHANGED
|
@@ -110,7 +110,8 @@ Skills are reusable workflow patterns that guide how the agent approaches differ
|
|
|
110
110
|
|
|
111
111
|
| Skill | Trigger | Description |
|
|
112
112
|
|-------|---------|-------------|
|
|
113
|
-
| **superpowers-lite** | Default for all coding work | Lightweight operating style: prefer structured tools, keep context tight, use sub-agents, verify before claiming success |
|
|
113
|
+
| **superpowers-lite** | Default for all coding work | Lightweight operating style: prefer structured tools, keep context tight, use sub-agents, verify before claiming success; asks 1-3 sharp questions only for high-risk decisions |
|
|
114
|
+
| **grill-me** | Explicit pressure-test requests | Optional scrutiny mode for plans, PRs, launches, and ideas; challenges assumptions without changing the default workflow |
|
|
114
115
|
| **brainstorm** | Multiple reasonable approaches exist | Explores options and tradeoffs before coding; asks one question at a time to resolve uncertainty |
|
|
115
116
|
| **writing-plans** | Non-trivial implementation task | Creates a step-by-step plan with exact file paths, code, and verification steps before touching code |
|
|
116
117
|
|
|
@@ -382,7 +383,8 @@ Skill 是可复用的工作流模式,指导 agent 如何处理不同类型的
|
|
|
382
383
|
|
|
383
384
|
| Skill | 触发条件 | 说明 |
|
|
384
385
|
|-------|----------|------|
|
|
385
|
-
| **superpowers-lite** | 所有编码工作的默认 skill | 轻量操作风格:优先结构化工具、保持上下文精简、使用 sub-agent
|
|
386
|
+
| **superpowers-lite** | 所有编码工作的默认 skill | 轻量操作风格:优先结构化工具、保持上下文精简、使用 sub-agent、验证后再报告完成;仅在高风险决策中提出 1-3 个尖锐问题 |
|
|
387
|
+
| **grill-me** | 明确要求压力测试或拷问时 | 可选审查模式,用于方案、PR、发布和想法;挑战假设但不改变默认协作流程 |
|
|
386
388
|
| **brainstorm** | 存在多种合理方案时 | 在编码前探索选项和权衡;每次只问一个问题来消除不确定性 |
|
|
387
389
|
| **writing-plans** | 非平凡的实现任务 | 在动手之前创建包含精确文件路径、代码和验证步骤的分步计划 |
|
|
388
390
|
|
package/deployment.md
CHANGED
|
@@ -13,13 +13,13 @@ npm pack
|
|
|
13
13
|
Expected output:
|
|
14
14
|
|
|
15
15
|
```text
|
|
16
|
-
codemini-cli-0.4.
|
|
16
|
+
codemini-cli-0.4.3.tgz
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
If you want to verify the package contents:
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
tar -tf codemini-cli-0.4.
|
|
22
|
+
tar -tf codemini-cli-0.4.3.tgz
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
## 2. Copy To The Target Machine
|
|
@@ -34,7 +34,7 @@ Copy the generated `.tgz` file to the Win10 machine by one of these methods:
|
|
|
34
34
|
Recommended target path:
|
|
35
35
|
|
|
36
36
|
```powershell
|
|
37
|
-
C:\temp\codemini-cli-0.4.
|
|
37
|
+
C:\temp\codemini-cli-0.4.3.tgz
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
## 3. Environment Requirements
|
|
@@ -58,7 +58,7 @@ npm -v
|
|
|
58
58
|
Global install:
|
|
59
59
|
|
|
60
60
|
```powershell
|
|
61
|
-
npm install -g C:\temp\codemini-cli-0.4.
|
|
61
|
+
npm install -g C:\temp\codemini-cli-0.4.3.tgz
|
|
62
62
|
```
|
|
63
63
|
|
|
64
64
|
If global install is blocked by company policy, install in a working directory instead:
|
|
@@ -66,7 +66,7 @@ If global install is blocked by company policy, install in a working directory i
|
|
|
66
66
|
```powershell
|
|
67
67
|
mkdir C:\temp\coder-test
|
|
68
68
|
cd C:\temp\coder-test
|
|
69
|
-
npm install C:\temp\codemini-cli-0.4.
|
|
69
|
+
npm install C:\temp\codemini-cli-0.4.3.tgz
|
|
70
70
|
```
|
|
71
71
|
|
|
72
72
|
## 5. Confirm Installation
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codemini-cli",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"description": "Coding CLI optimized for small-model workflows and Windows PowerShell",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cli",
|
|
@@ -49,7 +49,6 @@
|
|
|
49
49
|
"@cursorless/tree-sitter-wasms": "^0.8.1",
|
|
50
50
|
"cheerio": "^1.1.2",
|
|
51
51
|
"cli-truncate": "^6.0.0",
|
|
52
|
-
"duck-duck-scrape": "^2.2.7",
|
|
53
52
|
"ink": "^7.0.0",
|
|
54
53
|
"react": "^19.2.5",
|
|
55
54
|
"strip-ansi": "^7.2.0",
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: grill-me
|
|
3
|
+
description: Optional pressure-test mode for plans, architecture choices, PRs, launches, and product ideas: challenge assumptions without changing the default collaborative workflow.
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Use this skill only when the user explicitly asks to be grilled, challenged, pressure-tested, stress-tested, or reviewed with unusually direct scrutiny.
|
|
8
|
+
|
|
9
|
+
## Stance
|
|
10
|
+
|
|
11
|
+
Be direct, but keep the target clear: challenge the work, not the person. The goal is better judgment, not dominance or theater.
|
|
12
|
+
|
|
13
|
+
## Process
|
|
14
|
+
|
|
15
|
+
1. Identify the claim, plan, design, PR, launch, or decision under review.
|
|
16
|
+
2. State the highest-risk assumption first.
|
|
17
|
+
3. Ask 3-7 pointed questions, ordered by risk.
|
|
18
|
+
4. Call out missing evidence, weak verification, unclear ownership, rollback gaps, and hidden dependencies.
|
|
19
|
+
5. End with a short verdict:
|
|
20
|
+
- `Ship`: risks are understood and verification is credible.
|
|
21
|
+
- `Revise`: the direction is good, but one or more issues should be fixed first.
|
|
22
|
+
- `Stop`: a core assumption is unproven or the blast radius is too high.
|
|
23
|
+
|
|
24
|
+
## Boundaries
|
|
25
|
+
|
|
26
|
+
- Do not insult, mock, or psychoanalyze the user.
|
|
27
|
+
- Do not turn every normal coding task into a cross-examination.
|
|
28
|
+
- Do not invent requirements. If context is missing, ask for the missing artifact or state the assumption.
|
|
29
|
+
- Prefer concrete tests, rollback paths, and observable acceptance criteria over vague caution.
|
|
30
|
+
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: project-requirements
|
|
3
|
+
description: Generate an interactive project requirements report from an existing codebase. Use when the user asks for a PRD, requirements document, API-by-API breakdown, business flow, architecture map, dependency graph, flowchart, product requirements reverse-engineering, or detailed project demand analysis.
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Use this skill to reverse-engineer a project into a requirements document that product, engineering, and QA can navigate.
|
|
8
|
+
|
|
9
|
+
Default to an HTML report with lightweight interactions. Produce Markdown only when the user asks for a text-first artifact, a PR-friendly source document, or an additional companion file.
|
|
10
|
+
|
|
11
|
+
User request:
|
|
12
|
+
|
|
13
|
+
```text
|
|
14
|
+
{{args}}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Honor any concrete user request above, such as output format, report path, focus area, API subset, diagram style, or language. If it is empty, generate the default HTML requirements report for the current workspace.
|
|
18
|
+
|
|
19
|
+
## Output
|
|
20
|
+
|
|
21
|
+
Create the primary report at:
|
|
22
|
+
|
|
23
|
+
```text
|
|
24
|
+
docs/requirements/YYYY-MM-DD-project-requirements.html
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
If a companion Markdown file is useful, create:
|
|
28
|
+
|
|
29
|
+
```text
|
|
30
|
+
docs/requirements/YYYY-MM-DD-project-requirements.md
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
The HTML should be self-contained: inline CSS, inline JavaScript, no build step, no required external assets.
|
|
34
|
+
|
|
35
|
+
Diagrams must be visible when the HTML is opened directly from disk:
|
|
36
|
+
|
|
37
|
+
- Prefer inline SVG for architecture maps, dependency graphs, sequence summaries, and state diagrams.
|
|
38
|
+
- Use semantic SVG groups, `<title>`/`<desc>`, readable labels, arrow markers, and stable element ids so sections can link to diagram nodes.
|
|
39
|
+
- For simple hierarchy diagrams, CSS grid/flex boxes with connector lines are also acceptable.
|
|
40
|
+
- Do not rely on Mermaid rendering as the only visible diagram. Mermaid source may be included in a collapsible `<details>` block as an editable source-of-truth companion.
|
|
41
|
+
- Use Mermaid CDN rendering only as optional progressive enhancement when the user accepts network access. The static inline SVG or CSS diagram must remain the fallback and primary offline view.
|
|
42
|
+
- Avoid showing only raw Mermaid code blocks in the final HTML unless the user explicitly asks for source-only diagrams.
|
|
43
|
+
|
|
44
|
+
For medium or large projects, do not generate the entire HTML document in one model response or one huge `write` call. Create the report incrementally:
|
|
45
|
+
|
|
46
|
+
1. Write a complete HTML shell first: `doctype`, `<head>`, inline CSS, navigation container, empty main sections, inline script, and closing tags.
|
|
47
|
+
2. Add each major section with smaller `edit` insertions before a stable marker such as `<!-- REQUIREMENTS_SECTIONS -->`.
|
|
48
|
+
3. Keep each write/edit chunk focused: one section, one API group, or one diagram at a time.
|
|
49
|
+
4. After each chunk, preserve valid HTML and keep the marker in place until the final cleanup.
|
|
50
|
+
5. In the final pass, remove unused markers and verify the file can be opened directly from disk.
|
|
51
|
+
|
|
52
|
+
This chunked approach is required for HTML reports because inline CSS, JavaScript, diagrams, and API cards can become much larger than Markdown. It also gives the user immediate visible tool progress instead of waiting for one giant generated tool call.
|
|
53
|
+
|
|
54
|
+
## Process
|
|
55
|
+
|
|
56
|
+
1. Inspect the project before writing:
|
|
57
|
+
- Read top-level docs such as `README.md`, `OPERATIONS.md`, `docs/`, and deployment notes.
|
|
58
|
+
- Identify the stack from package manifests, route files, command handlers, API clients, database modules, schemas, and tests.
|
|
59
|
+
- Search with `rg` for routes, handlers, controllers, commands, schemas, migrations, HTTP verbs, RPC methods, queue handlers, and CLI subcommands.
|
|
60
|
+
2. Build an evidence map:
|
|
61
|
+
- `EXTRACTED`: behavior directly supported by source code, docs, tests, config, or schemas.
|
|
62
|
+
- `INFERRED`: reasonable product requirement inferred from code relationships.
|
|
63
|
+
- `UNKNOWN`: requirement, owner, actor, edge case, or business rule that needs user confirmation.
|
|
64
|
+
3. Decompose by API or interface first:
|
|
65
|
+
- HTTP API endpoints.
|
|
66
|
+
- CLI commands and subcommands.
|
|
67
|
+
- Tool calls, MCP handlers, RPC methods, queue jobs, scheduled tasks, or exported SDK functions.
|
|
68
|
+
- UI flows only after the backend/interface layer is mapped, unless the project is frontend-only.
|
|
69
|
+
4. Connect each API/interface to requirements:
|
|
70
|
+
- User goal and actor.
|
|
71
|
+
- Trigger and entry point.
|
|
72
|
+
- Request/input shape.
|
|
73
|
+
- Response/output shape.
|
|
74
|
+
- Validation and permission rules.
|
|
75
|
+
- Data read/write behavior.
|
|
76
|
+
- Internal modules called.
|
|
77
|
+
- External services or files touched.
|
|
78
|
+
- Error cases and retry/rollback behavior.
|
|
79
|
+
- Observability, audit, and security notes.
|
|
80
|
+
- Acceptance criteria.
|
|
81
|
+
5. Generate diagrams:
|
|
82
|
+
- Product flowchart for the main user journey.
|
|
83
|
+
- API dependency graph linking endpoints/commands to modules, data stores, and external services.
|
|
84
|
+
- Sequence diagram for at least one high-value flow.
|
|
85
|
+
- State or lifecycle diagram when the domain has clear states.
|
|
86
|
+
- Render each diagram as static inline SVG or CSS boxes in the HTML, with optional Mermaid source hidden in a collapsible details block.
|
|
87
|
+
6. Write the report and preserve traceability:
|
|
88
|
+
- Link sections with stable anchors.
|
|
89
|
+
- Include code file paths for evidence.
|
|
90
|
+
- Mark inferred or unknown content visibly.
|
|
91
|
+
- Avoid pretending uncertain requirements are confirmed.
|
|
92
|
+
- For HTML output, write the shell first, then append/insert sections incrementally instead of producing one large complete file in a single tool call.
|
|
93
|
+
|
|
94
|
+
## HTML Structure
|
|
95
|
+
|
|
96
|
+
Use this structure unless the project suggests a better one:
|
|
97
|
+
|
|
98
|
+
1. Executive summary.
|
|
99
|
+
2. System map with a high-level static SVG or CSS architecture diagram.
|
|
100
|
+
3. API/interface inventory with filters or grouped navigation.
|
|
101
|
+
4. Per-API requirement cards.
|
|
102
|
+
5. Core user flows with diagrams.
|
|
103
|
+
6. Domain model and data ownership.
|
|
104
|
+
7. Permissions, security, and compliance notes.
|
|
105
|
+
8. Error handling and edge cases.
|
|
106
|
+
9. Non-functional requirements.
|
|
107
|
+
10. Open questions and `UNKNOWN` items.
|
|
108
|
+
11. Source evidence index.
|
|
109
|
+
|
|
110
|
+
## Interaction Guidelines
|
|
111
|
+
|
|
112
|
+
Implement useful interactions with plain JavaScript:
|
|
113
|
+
|
|
114
|
+
- Sticky table of contents.
|
|
115
|
+
- Search/filter input for APIs, modules, and tags.
|
|
116
|
+
- Expand/collapse details for each API.
|
|
117
|
+
- Anchor links for every API and flow.
|
|
118
|
+
- Evidence tags: `EXTRACTED`, `INFERRED`, `UNKNOWN`.
|
|
119
|
+
- Back-to-top links for long reports.
|
|
120
|
+
- Optional "show only open questions" toggle.
|
|
121
|
+
|
|
122
|
+
Keep interactions accessible:
|
|
123
|
+
|
|
124
|
+
- Use semantic headings, buttons, tables, and lists.
|
|
125
|
+
- Make controls keyboard reachable.
|
|
126
|
+
- Do not hide critical content behind JavaScript-only rendering.
|
|
127
|
+
- Ensure the document remains readable if JavaScript is disabled.
|
|
128
|
+
|
|
129
|
+
## API Section Template
|
|
130
|
+
|
|
131
|
+
For each API, command, handler, or externally visible interface, include:
|
|
132
|
+
|
|
133
|
+
```text
|
|
134
|
+
Name:
|
|
135
|
+
Type:
|
|
136
|
+
Route/command/function:
|
|
137
|
+
Evidence:
|
|
138
|
+
Actor:
|
|
139
|
+
Goal:
|
|
140
|
+
Inputs:
|
|
141
|
+
Outputs:
|
|
142
|
+
Preconditions:
|
|
143
|
+
Main flow:
|
|
144
|
+
Alternative flows:
|
|
145
|
+
Validation:
|
|
146
|
+
Permissions:
|
|
147
|
+
Data reads:
|
|
148
|
+
Data writes:
|
|
149
|
+
Internal dependencies:
|
|
150
|
+
External dependencies:
|
|
151
|
+
Errors:
|
|
152
|
+
Observability:
|
|
153
|
+
Acceptance criteria:
|
|
154
|
+
Open questions:
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Diagram Patterns
|
|
158
|
+
|
|
159
|
+
Use static diagrams when diagrams help compress complexity. In HTML output, render the visible diagram as inline SVG or CSS boxes. Include Mermaid only as optional source text when it helps future editing.
|
|
160
|
+
|
|
161
|
+
Inline SVG architecture map:
|
|
162
|
+
|
|
163
|
+
```html
|
|
164
|
+
<figure class="diagram" id="system-architecture">
|
|
165
|
+
<figcaption>System architecture</figcaption>
|
|
166
|
+
<svg viewBox="0 0 960 520" role="img" aria-labelledby="arch-title arch-desc">
|
|
167
|
+
<title id="arch-title">System architecture</title>
|
|
168
|
+
<desc id="arch-desc">CLI commands call runtime services, which use tools and data stores.</desc>
|
|
169
|
+
<defs>
|
|
170
|
+
<marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="3" orient="auto">
|
|
171
|
+
<path d="M0,0 L0,6 L9,3 z"></path>
|
|
172
|
+
</marker>
|
|
173
|
+
</defs>
|
|
174
|
+
<g id="cli-layer">
|
|
175
|
+
<rect x="40" y="40" width="220" height="90" rx="8"></rect>
|
|
176
|
+
<text x="60" y="90">CLI Entry</text>
|
|
177
|
+
</g>
|
|
178
|
+
<g id="runtime-layer">
|
|
179
|
+
<rect x="370" y="40" width="240" height="90" rx="8"></rect>
|
|
180
|
+
<text x="390" y="90">Runtime</text>
|
|
181
|
+
</g>
|
|
182
|
+
<line x1="260" y1="85" x2="370" y2="85" marker-end="url(#arrow)"></line>
|
|
183
|
+
</svg>
|
|
184
|
+
</figure>
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
CSS box architecture map:
|
|
188
|
+
|
|
189
|
+
```html
|
|
190
|
+
<section class="arch-map" aria-label="System architecture">
|
|
191
|
+
<a class="arch-node" href="#api-chat">Chat command</a>
|
|
192
|
+
<span class="arch-edge" aria-hidden="true">-></span>
|
|
193
|
+
<a class="arch-node" href="#runtime-agent-loop">Agent loop</a>
|
|
194
|
+
<span class="arch-edge" aria-hidden="true">-></span>
|
|
195
|
+
<a class="arch-node" href="#tools-write">Tools</a>
|
|
196
|
+
</section>
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Optional Mermaid companion:
|
|
200
|
+
|
|
201
|
+
Product flow:
|
|
202
|
+
|
|
203
|
+
```mermaid
|
|
204
|
+
flowchart TD
|
|
205
|
+
A[User starts task] --> B[System validates input]
|
|
206
|
+
B --> C[System performs core action]
|
|
207
|
+
C --> D[User receives result]
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
API dependency map:
|
|
211
|
+
|
|
212
|
+
```mermaid
|
|
213
|
+
graph LR
|
|
214
|
+
API[API or command] --> Handler[Handler]
|
|
215
|
+
Handler --> Service[Service]
|
|
216
|
+
Service --> Store[(Data store)]
|
|
217
|
+
Service --> External[External service]
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
Sequence flow:
|
|
221
|
+
|
|
222
|
+
```mermaid
|
|
223
|
+
sequenceDiagram
|
|
224
|
+
participant User
|
|
225
|
+
participant API
|
|
226
|
+
participant Service
|
|
227
|
+
participant Store
|
|
228
|
+
User->>API: Request
|
|
229
|
+
API->>Service: Validate and execute
|
|
230
|
+
Service->>Store: Read/write data
|
|
231
|
+
Store-->>Service: Result
|
|
232
|
+
Service-->>API: Domain result
|
|
233
|
+
API-->>User: Response
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Quality Bar
|
|
237
|
+
|
|
238
|
+
The report is complete when:
|
|
239
|
+
|
|
240
|
+
- A reader can find every major API or user-facing interface from the navigation.
|
|
241
|
+
- Each interface has at least one source evidence path.
|
|
242
|
+
- Main flows and dependencies are represented both in text and diagrams.
|
|
243
|
+
- Inferred requirements are labeled instead of stated as facts.
|
|
244
|
+
- Open questions are grouped so the user can resolve them later.
|
|
245
|
+
- The HTML can be opened directly from disk in a browser.
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: superpowers-lite
|
|
3
3
|
description: Concise workflow skill tuned for 30B-class models: prefer structured code tools first, keep context tight, use sub-agents for narrow tasks, and verify before claiming success.
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
Use this skill as the default lightweight operating style for all coding work.
|
|
8
8
|
|
|
9
|
+
This is the default, not an interrogation mode. Keep help calm and direct. For high-risk decisions only, add a light Grill Me pass: ask 1-3 sharp questions about assumptions, failure modes, or verification before proceeding. Challenge the plan, not the person.
|
|
10
|
+
|
|
9
11
|
**Announce when using a skill:** Before following any route below, say "Using [skill name] to [purpose]" in your response. This signals intent and prevents silent skill skipping.
|
|
10
12
|
|
|
11
13
|
## Mandatory Skill Check
|
|
@@ -83,6 +85,8 @@ Evaluate the user's request and YOU MUST follow exactly one route:
|
|
|
83
85
|
|
|
84
86
|
5. **Verify before claiming success.** Run the relevant test or command before saying work is done.
|
|
85
87
|
|
|
88
|
+
6. **Use sharp questions sparingly.** For high-risk work, ask 1-3 sharp questions that expose assumptions or likely failure modes. For ordinary tasks, stay lightweight and keep moving.
|
|
89
|
+
|
|
86
90
|
## Sub-agent Guidance
|
|
87
91
|
|
|
88
92
|
- `planner`: break work into steps, risks, and checks
|
package/src/commands/run.js
CHANGED
|
@@ -11,6 +11,7 @@ import path from 'node:path';
|
|
|
11
11
|
|
|
12
12
|
const ROLE_TOOL_POLICY = {
|
|
13
13
|
planner: ['read', 'grep', 'list', 'query_project_index', 'tool_search', 'glob', 'ast_query', 'read_ast_node', 'read_plan', 'update_plan'],
|
|
14
|
+
advisor: ['read', 'grep', 'list', 'query_project_index', 'tool_search', 'read_plan'],
|
|
14
15
|
coder: ['read', 'grep', 'list', 'edit', 'write', 'run', 'ast_query', 'read_ast_node', 'glob', 'tool_search', 'update_todos', 'read_plan', 'update_plan'],
|
|
15
16
|
reviewer: ['read', 'grep', 'list', 'glob', 'tool_search', 'ast_query', 'read_ast_node', 'read_plan'],
|
|
16
17
|
tester: ['read', 'grep', 'list', 'run', 'glob', 'tool_search', 'read_plan']
|
|
@@ -70,7 +71,7 @@ function makeCompletionFn(config) {
|
|
|
70
71
|
model,
|
|
71
72
|
messages,
|
|
72
73
|
tools,
|
|
73
|
-
timeoutMs: config.gateway.timeout_ms ||
|
|
74
|
+
timeoutMs: config.gateway.timeout_ms || 1800000,
|
|
74
75
|
maxRetries: config.gateway.max_retries ?? 2
|
|
75
76
|
});
|
|
76
77
|
}
|
|
@@ -142,11 +143,11 @@ function normalizePlan(parsed, goal) {
|
|
|
142
143
|
async function planPipeline({ goal, config, systemPrompt, model }) {
|
|
143
144
|
const plannerPrompt = [
|
|
144
145
|
'Create an execution plan and assign the best sub-agent role for each step.',
|
|
145
|
-
'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|coder|reviewer|tester","task":"..."}]}. No markdown.',
|
|
146
|
+
'Return strict JSON only with shape {"summary":"...","steps":[{"title":"...","role":"planner|advisor|coder|reviewer|tester","task":"..."}]}. No markdown.',
|
|
146
147
|
`Available roles: ${HARNESS_ROLES.join(', ')}.`,
|
|
147
148
|
'Prefer 3-5 steps total. The first step should usually inspect the target area.',
|
|
148
149
|
'For implementation goals, include a reviewer or tester step near the end.',
|
|
149
|
-
'For advisory/analysis goals, keep it lean with planner/coder
|
|
150
|
+
'For advisory/analysis goals, keep it lean with planner/advisor only; do not use coder unless code or files will be modified.'
|
|
150
151
|
].join('\n');
|
|
151
152
|
|
|
152
153
|
const planning = await createChatCompletion({
|
|
@@ -158,7 +159,7 @@ async function planPipeline({ goal, config, systemPrompt, model }) {
|
|
|
158
159
|
{ role: 'system', content: `${systemPrompt}\n${plannerPrompt}` },
|
|
159
160
|
{ role: 'user', content: `Plan the following task:\n${goal}` }
|
|
160
161
|
],
|
|
161
|
-
timeoutMs: config.gateway.timeout_ms ||
|
|
162
|
+
timeoutMs: config.gateway.timeout_ms || 1800000,
|
|
162
163
|
maxRetries: config.gateway.max_retries ?? 2
|
|
163
164
|
});
|
|
164
165
|
|
package/src/core/agent-loop.js
CHANGED
|
@@ -179,6 +179,10 @@ const DREAM_AUTO_CAPTURE_TOOLS = new Set([
|
|
|
179
179
|
const DREAM_AUTO_CAPTURE_COOLDOWN_MS = 60_000;
|
|
180
180
|
const lastAutoCaptureByTool = new Map();
|
|
181
181
|
|
|
182
|
+
function isAutoCaptureEnabled(config = {}) {
|
|
183
|
+
return config?.memory?.enabled !== false && config?.memory?.auto_capture !== false;
|
|
184
|
+
}
|
|
185
|
+
|
|
182
186
|
function shouldAutoCaptureError(toolName, message) {
|
|
183
187
|
if (!DREAM_AUTO_CAPTURE_TOOLS.has(toolName)) return false;
|
|
184
188
|
const now = Date.now();
|
|
@@ -196,10 +200,6 @@ function shouldAutoCaptureError(toolName, message) {
|
|
|
196
200
|
/command not found/i,
|
|
197
201
|
/permission denied/i,
|
|
198
202
|
/args\?\s/i,
|
|
199
|
-
/Raw tool arguments/i,
|
|
200
|
-
/edit requires/i,
|
|
201
|
-
/write requires/i,
|
|
202
|
-
/requires file/i,
|
|
203
203
|
/path.*outside workspace/i,
|
|
204
204
|
/escapes workspace/i
|
|
205
205
|
];
|
|
@@ -209,7 +209,7 @@ function shouldAutoCaptureError(toolName, message) {
|
|
|
209
209
|
}
|
|
210
210
|
|
|
211
211
|
async function captureToolFailure(toolName, message, args, config = {}) {
|
|
212
|
-
if (config
|
|
212
|
+
if (!isAutoCaptureEnabled(config)) return;
|
|
213
213
|
const summary = `[${toolName}] ${String(message).slice(0, 120)}`;
|
|
214
214
|
const details = args
|
|
215
215
|
? `Tool: ${toolName}\nError: ${message}\nArgs: ${JSON.stringify(args).slice(0, 300)}`
|
|
@@ -805,7 +805,7 @@ export async function runAgentLoop({
|
|
|
805
805
|
if (onEvent) {
|
|
806
806
|
onEvent({ type: 'tool:error', name: displayName, id: call.id, arguments: effectiveArgs, durationMs, summary: trimInline(message, 120) });
|
|
807
807
|
}
|
|
808
|
-
if (shouldAutoCaptureError(toolName, message)) {
|
|
808
|
+
if (isAutoCaptureEnabled(config) && shouldAutoCaptureError(toolName, message)) {
|
|
809
809
|
await captureToolFailure(toolName, message, effectiveArgs, config).catch(() => {});
|
|
810
810
|
}
|
|
811
811
|
return {
|
|
@@ -828,13 +828,13 @@ export async function runAgentLoop({
|
|
|
828
828
|
const stderr = String(toolResult.stderr || '');
|
|
829
829
|
if (typeof exitCode === 'number' && exitCode !== 0 && stderr) {
|
|
830
830
|
const failMsg = `exit ${exitCode}: ${stderr.slice(0, 120)}`;
|
|
831
|
-
if (shouldAutoCaptureError(toolName, failMsg)) {
|
|
831
|
+
if (isAutoCaptureEnabled(config) && shouldAutoCaptureError(toolName, failMsg)) {
|
|
832
832
|
await captureToolFailure(toolName, failMsg, effectiveArgs, config).catch(() => {});
|
|
833
833
|
}
|
|
834
834
|
}
|
|
835
835
|
if (toolResult.error) {
|
|
836
836
|
const errMsg = String(toolResult.error).slice(0, 120);
|
|
837
|
-
if (shouldAutoCaptureError(toolName, errMsg)) {
|
|
837
|
+
if (isAutoCaptureEnabled(config) && shouldAutoCaptureError(toolName, errMsg)) {
|
|
838
838
|
await captureToolFailure(toolName, errMsg, effectiveArgs, config).catch(() => {});
|
|
839
839
|
}
|
|
840
840
|
}
|