@zhijiewang/openharness 2.18.0 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -12
- package/README.zh-CN.md +45 -12
- package/dist/commands/index.d.ts +23 -0
- package/dist/commands/index.js +64 -0
- package/dist/commands/info.js +46 -3
- package/dist/harness/config.d.ts +12 -0
- package/dist/harness/hooks.d.ts +23 -1
- package/dist/harness/rules.js +18 -2
- package/dist/harness/submit-handler.js +14 -1
- package/dist/main.js +90 -14
- package/dist/mcp/client.d.ts +23 -0
- package/dist/mcp/client.js +37 -0
- package/dist/mcp/loader.d.ts +20 -0
- package/dist/mcp/loader.js +27 -0
- package/dist/providers/ollama.d.ts +13 -0
- package/dist/providers/ollama.js +41 -0
- package/dist/query/tools.js +50 -10
- package/dist/tools/TaskCreateTool/index.js +5 -0
- package/dist/tools/TaskUpdateTool/index.js +11 -0
- package/dist/utils/parse-budget.d.ts +20 -0
- package/dist/utils/parse-budget.js +12 -0
- package/package.json +12 -6
package/README.md
CHANGED
|
@@ -32,8 +32,8 @@ AI coding agent in your terminal. Works with any LLM -- free local models or clo
|
|
|
32
32
|
- [Quick Start](#quick-start)
|
|
33
33
|
- [Why OpenHarness?](#why-openharness)
|
|
34
34
|
- [Terminal UI](#terminal-ui)
|
|
35
|
-
- [Tools (
|
|
36
|
-
- [Slash Commands
|
|
35
|
+
- [Tools (43)](#tools-43)
|
|
36
|
+
- [Slash Commands](#slash-commands)
|
|
37
37
|
- [Permission Modes](#permission-modes)
|
|
38
38
|
- [Hooks](#hooks)
|
|
39
39
|
- [Checkpoints & Rewind](#checkpoints--rewind)
|
|
@@ -61,6 +61,8 @@ That's it. OpenHarness auto-detects Ollama and starts chatting. No API key neede
|
|
|
61
61
|
|
|
62
62
|
**Python SDK:** there's also an official Python SDK for driving `oh` from Python programs (notebooks, batch scripts, ML pipelines). Install with `pip install openharness-sdk` after the npm install (the PyPI distribution is `openharness-sdk` because the unqualified name is taken), then `from openharness import query`. See [`python/README.md`](python/README.md).
|
|
63
63
|
|
|
64
|
+
**TypeScript SDK:** drive `oh` from Node.js (VS Code extensions, Electron apps, build scripts) with `@zhijiewang/openharness-sdk` — `npm install @zhijiewang/openharness-sdk`, then `import { query, OpenHarnessClient, tool } from "@zhijiewang/openharness-sdk"`. Mirrors the Python SDK surface (streaming events, stateful sessions, custom tools, permission callback, session resume). See [`packages/sdk/README.md`](packages/sdk/README.md).
|
|
65
|
+
|
|
64
66
|
```bash
|
|
65
67
|
oh init # interactive setup wizard (provider + cybergotchi)
|
|
66
68
|
oh # auto-detect local model
|
|
@@ -142,12 +144,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
142
144
|
|
|
143
145
|
Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XXXX), `{ctx}` (context usage bar). Empty sections are automatically collapsed.
|
|
144
146
|
|
|
145
|
-
## Tools (
|
|
147
|
+
## Tools (43)
|
|
146
148
|
|
|
147
149
|
| Tool | Risk | Description |
|
|
148
150
|
|------|------|-------------|
|
|
149
151
|
| **Core** | | |
|
|
150
152
|
| Bash | high | Execute shell commands with live streaming output (AST safety analysis) |
|
|
153
|
+
| PowerShell | high | Execute PowerShell commands (Windows-native scripting) |
|
|
151
154
|
| Read | low | Read files with line ranges, PDF support |
|
|
152
155
|
| ImageRead | low | Read images/PDFs for multimodal analysis |
|
|
153
156
|
| Write | medium | Create or overwrite files |
|
|
@@ -167,6 +170,7 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
|
|
|
167
170
|
| TaskGet | low | Get task details |
|
|
168
171
|
| TaskStop | low | Stop a running task |
|
|
169
172
|
| TaskOutput | low | Get task output |
|
|
173
|
+
| TodoWrite | low | Manage session task checklist (Claude Code-compatible) |
|
|
170
174
|
| **Agents** | | |
|
|
171
175
|
| Agent | medium | Spawn a sub-agent (with role specialization) |
|
|
172
176
|
| ParallelAgent | medium | Dispatch multiple agents with DAG dependencies |
|
|
@@ -176,9 +180,12 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
|
|
|
176
180
|
| CronCreate | medium | Schedule recurring tasks |
|
|
177
181
|
| CronDelete | medium | Remove scheduled tasks |
|
|
178
182
|
| CronList | low | List all scheduled tasks |
|
|
183
|
+
| ScheduleWakeup | low | Self-pace the next /loop iteration (cache-aware) |
|
|
179
184
|
| **Planning** | | |
|
|
180
185
|
| EnterPlanMode | low | Enter structured planning mode |
|
|
181
186
|
| ExitPlanMode | low | Exit planning mode |
|
|
187
|
+
| **Pipelines** | | |
|
|
188
|
+
| Pipeline | medium | Run a sequence of tasks with output passed between steps |
|
|
182
189
|
| **Code Intelligence** | | |
|
|
183
190
|
| Diagnostics | low | LSP-based code diagnostics |
|
|
184
191
|
| NotebookEdit | medium | Edit Jupyter notebooks |
|
|
@@ -186,6 +193,7 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
|
|
|
186
193
|
| Memory | low | Save/list/search persistent memories |
|
|
187
194
|
| Skill | low | Invoke a skill from .oh/skills/ |
|
|
188
195
|
| ToolSearch | low | Find tools by description |
|
|
196
|
+
| SessionSearch | low | Search prior sessions for relevant context |
|
|
189
197
|
| **MCP** | | |
|
|
190
198
|
| ListMcpResources | low | List resources from connected MCP servers |
|
|
191
199
|
| ReadMcpResource | low | Read a specific MCP resource by URI |
|
|
@@ -194,12 +202,13 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
|
|
|
194
202
|
| ExitWorktree | medium | Remove a git worktree |
|
|
195
203
|
| **Process** | | |
|
|
196
204
|
| KillProcess | high | Stop processes by PID or name |
|
|
205
|
+
| Monitor | medium | Run a background command and stream each output line back to the agent |
|
|
197
206
|
|
|
198
207
|
Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` or `--auto` to skip prompts.
|
|
199
208
|
|
|
200
|
-
## Slash Commands
|
|
209
|
+
## Slash Commands
|
|
201
210
|
|
|
202
|
-
|
|
211
|
+
Over 80 commands are registered. The most-used ones are grouped below; see `/help` in-session for the full list. Aliases: `/q` exit, `/h` help, `/c` commit, `/m` model, `/s` status.
|
|
203
212
|
|
|
204
213
|
**Session:**
|
|
205
214
|
| Command | Description |
|
|
@@ -289,11 +298,29 @@ hooks:
|
|
|
289
298
|
command: "scripts/cleanup.sh"
|
|
290
299
|
```
|
|
291
300
|
|
|
292
|
-
**Event types
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
301
|
+
**Event types** (17 total):
|
|
302
|
+
|
|
303
|
+
| Event | When it fires | Can block? |
|
|
304
|
+
|-------|---------------|------------|
|
|
305
|
+
| `sessionStart` | Session begins | — |
|
|
306
|
+
| `sessionEnd` | Session ends | — |
|
|
307
|
+
| `turnStart` | Top-level agent turn begins (after user prompt accepted) | — |
|
|
308
|
+
| `turnStop` | Top-level agent turn ends (mirrors Claude Code's `Stop`) | — |
|
|
309
|
+
| `userPromptSubmit` | Before user prompt reaches the LLM | yes — `decision: deny` |
|
|
310
|
+
| `preToolUse` | Before each tool call | yes — exit code 1 / `decision: deny` |
|
|
311
|
+
| `postToolUse` | After successful tool execution | — |
|
|
312
|
+
| `postToolUseFailure` | After tool throws or returns `isError: true` | — |
|
|
313
|
+
| `permissionRequest` | When a tool needs approval (between `preToolUse` and the prompt) | yes — `decision: allow\|deny\|ask` |
|
|
314
|
+
| `fileChanged` | After a tool modifies a file | — |
|
|
315
|
+
| `cwdChanged` | After working directory changes | — |
|
|
316
|
+
| `subagentStart` | A sub-agent is spawned | — |
|
|
317
|
+
| `subagentStop` | A sub-agent completes | — |
|
|
318
|
+
| `preCompact` | Before conversation compaction | — |
|
|
319
|
+
| `postCompact` | After conversation compaction | — |
|
|
320
|
+
| `configChange` | `.oh/config.yaml` is modified during the session | — |
|
|
321
|
+
| `notification` | A notification is dispatched | — |
|
|
322
|
+
|
|
323
|
+
Live introspection: run `/hooks` in-session to see exactly which hooks are loaded, grouped by event.
|
|
297
324
|
|
|
298
325
|
**Environment variables** available to hook scripts:
|
|
299
326
|
|
|
@@ -303,10 +330,16 @@ hooks:
|
|
|
303
330
|
| `OH_TOOL_NAME` | Name of the tool being called (tool events only) |
|
|
304
331
|
| `OH_TOOL_ARGS` | JSON-encoded tool arguments (tool events only) |
|
|
305
332
|
| `OH_TOOL_OUTPUT` | JSON-encoded tool output (`postToolUse` only) |
|
|
333
|
+
| `OH_TOOL_INPUT_JSON` | Full JSON tool input (tool events only) |
|
|
334
|
+
| `OH_SESSION_ID` / `OH_MODEL` / `OH_PROVIDER` / `OH_PERMISSION_MODE` | Current session context |
|
|
335
|
+
| `OH_COST` / `OH_TOKENS` | Running cost and token totals |
|
|
336
|
+
| `OH_FILE_PATH` | Path that changed (`fileChanged` only) |
|
|
337
|
+
| `OH_NEW_CWD` | New working directory (`cwdChanged` only) |
|
|
338
|
+
| `OH_TURN_NUMBER` / `OH_TURN_REASON` | Turn boundary context (`turnStart` / `turnStop`) |
|
|
306
339
|
|
|
307
|
-
Use `match` to restrict a hook to a specific tool name (e.g., `match: Bash` only triggers for the Bash tool).
|
|
340
|
+
Use `match` to restrict a hook to a specific tool name (e.g., `match: Bash` only triggers for the Bash tool). Substring, glob (`Cron*`), and `/regex/flags` patterns are all supported.
|
|
308
341
|
|
|
309
|
-
See [docs/hooks.md](docs/hooks.md) for the full
|
|
342
|
+
Set `jsonIO: true` on a `command` hook to opt into structured JSON I/O — the harness sends `{event, ...context}` on stdin and reads `{decision, reason, hookSpecificOutput}` from stdout. HTTP hooks accept the same response shape. See [docs/hooks.md](docs/hooks.md) for the full reference.
|
|
310
343
|
|
|
311
344
|
## Cybergotchi
|
|
312
345
|
|
package/README.zh-CN.md
CHANGED
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
- [快速开始](#快速开始)
|
|
33
33
|
- [为什么选择 OpenHarness?](#为什么选择-openharness)
|
|
34
34
|
- [终端界面](#终端界面)
|
|
35
|
-
- [工具(
|
|
36
|
-
- [
|
|
35
|
+
- [工具(43 个)](#工具43-个)
|
|
36
|
+
- [斜杠命令](#斜杠命令)
|
|
37
37
|
- [权限模式](#权限模式)
|
|
38
38
|
- [钩子](#钩子)
|
|
39
39
|
- [检查点与回滚](#检查点与回滚)
|
|
@@ -61,6 +61,8 @@ oh
|
|
|
61
61
|
|
|
62
62
|
**Python SDK:** 我们还提供了官方的 Python SDK,可以在 Python 程序中驱动 `oh`(笔记本、批处理脚本、ML 流水线)。在 npm 安装之后,使用 `pip install openharness-sdk` 安装(PyPI 分发名为 `openharness-sdk`,因为未加后缀的名称已被占用),然后 `from openharness import query`。详见 [`python/README.md`](python/README.md)。
|
|
63
63
|
|
|
64
|
+
**TypeScript SDK:** 同样有官方的 TypeScript SDK,可以在 Node.js(VS Code 插件、Electron 应用、构建脚本等)中驱动 `oh`:使用 `@zhijiewang/openharness-sdk` —— 通过 `npm install @zhijiewang/openharness-sdk` 安装,然后 `import { query, OpenHarnessClient, tool } from "@zhijiewang/openharness-sdk"`。功能与 Python SDK 对等(流式事件、有状态会话、自定义工具、权限回调、会话恢复)。详见 [`packages/sdk/README.md`](packages/sdk/README.md)。
|
|
65
|
+
|
|
64
66
|
```bash
|
|
65
67
|
oh init # 交互式安装向导(模型提供商 + 电子宠物)
|
|
66
68
|
oh # 自动检测本地模型
|
|
@@ -142,12 +144,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
142
144
|
|
|
143
145
|
可用变量:`{model}`、`{tokens}`(输入↑ 输出↓)、`{cost}`($X.XXXX)、`{ctx}`(上下文占用条)。空片段会自动折叠。
|
|
144
146
|
|
|
145
|
-
## 工具(
|
|
147
|
+
## 工具(43 个)
|
|
146
148
|
|
|
147
149
|
| 工具 | 风险 | 描述 |
|
|
148
150
|
|------|------|-------------|
|
|
149
151
|
| **核心** | | |
|
|
150
152
|
| Bash | 高 | 执行 shell 命令并实时流式输出(AST 安全分析) |
|
|
153
|
+
| PowerShell | 高 | 执行 PowerShell 命令(Windows 原生脚本) |
|
|
151
154
|
| Read | 低 | 按行范围读取文件,支持 PDF |
|
|
152
155
|
| ImageRead | 低 | 读取图片/PDF 以进行多模态分析 |
|
|
153
156
|
| Write | 中 | 创建或覆盖文件 |
|
|
@@ -167,6 +170,7 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
167
170
|
| TaskGet | 低 | 获取任务详情 |
|
|
168
171
|
| TaskStop | 低 | 停止正在运行的任务 |
|
|
169
172
|
| TaskOutput | 低 | 获取任务输出 |
|
|
173
|
+
| TodoWrite | 低 | 管理会话级 todo 列表(兼容 Claude Code) |
|
|
170
174
|
| **代理** | | |
|
|
171
175
|
| Agent | 中 | 派生一个子代理(可指定角色) |
|
|
172
176
|
| ParallelAgent | 中 | 派发多个代理并支持 DAG 依赖 |
|
|
@@ -176,9 +180,12 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
176
180
|
| CronCreate | 中 | 创建定时任务 |
|
|
177
181
|
| CronDelete | 中 | 删除定时任务 |
|
|
178
182
|
| CronList | 低 | 列出所有定时任务 |
|
|
183
|
+
| ScheduleWakeup | 低 | 在 /loop 中自适应安排下一次触发(缓存感知) |
|
|
179
184
|
| **规划** | | |
|
|
180
185
|
| EnterPlanMode | 低 | 进入结构化规划模式 |
|
|
181
186
|
| ExitPlanMode | 低 | 退出规划模式 |
|
|
187
|
+
| **流水线** | | |
|
|
188
|
+
| Pipeline | 中 | 顺序执行一连串子任务,把每一步的输出作为下一步的输入 |
|
|
182
189
|
| **代码智能** | | |
|
|
183
190
|
| Diagnostics | 低 | 基于 LSP 的代码诊断 |
|
|
184
191
|
| NotebookEdit | 中 | 编辑 Jupyter notebook |
|
|
@@ -186,6 +193,7 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
186
193
|
| Memory | 低 | 保存/列出/搜索持久化记忆 |
|
|
187
194
|
| Skill | 低 | 调用 .oh/skills/ 下的技能 |
|
|
188
195
|
| ToolSearch | 低 | 按描述查找工具 |
|
|
196
|
+
| SessionSearch | 低 | 在历史会话中搜索相关上下文 |
|
|
189
197
|
| **MCP** | | |
|
|
190
198
|
| ListMcpResources | 低 | 列出已连接 MCP 服务器上的资源 |
|
|
191
199
|
| ReadMcpResource | 低 | 按 URI 读取指定的 MCP 资源 |
|
|
@@ -194,12 +202,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
194
202
|
| ExitWorktree | 中 | 移除一个 git worktree |
|
|
195
203
|
| **进程** | | |
|
|
196
204
|
| KillProcess | 高 | 按 PID 或名称停止进程 |
|
|
205
|
+
| Monitor | 中 | 在后台运行命令,并把每一行输出实时反馈给代理 |
|
|
197
206
|
|
|
198
207
|
低风险只读工具会自动批准。在 `ask` 模式下,中高风险工具需要确认。使用 `--trust` 或 `--auto` 可跳过提示。
|
|
199
208
|
|
|
200
|
-
##
|
|
209
|
+
## 斜杠命令
|
|
201
210
|
|
|
202
|
-
|
|
211
|
+
OH 注册了 80+ 个斜杠命令;下表只列出最常用的一部分。在会话中运行 `/help` 可以看到完整列表。别名:`/q` 退出、`/h` 帮助、`/c` 提交、`/m` 模型、`/s` 状态。
|
|
203
212
|
|
|
204
213
|
**会话:**
|
|
205
214
|
| 命令 | 描述 |
|
|
@@ -289,11 +298,29 @@ hooks:
|
|
|
289
298
|
command: "scripts/cleanup.sh"
|
|
290
299
|
```
|
|
291
300
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
301
|
+
**事件类型**(共 17 个):
|
|
302
|
+
|
|
303
|
+
| 事件 | 触发时机 | 是否可阻止 |
|
|
304
|
+
|-------|---------------|------------|
|
|
305
|
+
| `sessionStart` | 会话开始 | — |
|
|
306
|
+
| `sessionEnd` | 会话结束 | — |
|
|
307
|
+
| `turnStart` | 顶层代理回合开始(用户提示词被接受后) | — |
|
|
308
|
+
| `turnStop` | 顶层代理回合结束(对应 Claude Code 的 `Stop`) | — |
|
|
309
|
+
| `userPromptSubmit` | 用户提示词到达 LLM 之前 | 是 —— `decision: deny` |
|
|
310
|
+
| `preToolUse` | 工具调用之前 | 是 —— 退出码 1 / `decision: deny` |
|
|
311
|
+
| `postToolUse` | 工具成功执行之后 | — |
|
|
312
|
+
| `postToolUseFailure` | 工具抛错或返回 `isError: true` | — |
|
|
313
|
+
| `permissionRequest` | 工具需要授权时(`preToolUse` 与询问之间) | 是 —— `decision: allow\|deny\|ask` |
|
|
314
|
+
| `fileChanged` | 工具修改文件之后 | — |
|
|
315
|
+
| `cwdChanged` | 工作目录变更之后 | — |
|
|
316
|
+
| `subagentStart` | 子代理被派生 | — |
|
|
317
|
+
| `subagentStop` | 子代理完成 | — |
|
|
318
|
+
| `preCompact` | 对话压缩之前 | — |
|
|
319
|
+
| `postCompact` | 对话压缩之后 | — |
|
|
320
|
+
| `configChange` | 会话过程中 `.oh/config.yaml` 被修改 | — |
|
|
321
|
+
| `notification` | 通知被派发 | — |
|
|
322
|
+
|
|
323
|
+
实时查看:在会话中运行 `/hooks` 可以按事件分组查看当前已加载的钩子。
|
|
297
324
|
|
|
298
325
|
**环境变量**(钩子脚本可用):
|
|
299
326
|
|
|
@@ -303,10 +330,16 @@ hooks:
|
|
|
303
330
|
| `OH_TOOL_NAME` | 正在调用的工具名(仅工具类事件) |
|
|
304
331
|
| `OH_TOOL_ARGS` | JSON 编码的工具参数(仅工具类事件) |
|
|
305
332
|
| `OH_TOOL_OUTPUT` | JSON 编码的工具输出(仅 `postToolUse`) |
|
|
333
|
+
| `OH_TOOL_INPUT_JSON` | 完整的 JSON 工具输入(仅工具类事件) |
|
|
334
|
+
| `OH_SESSION_ID` / `OH_MODEL` / `OH_PROVIDER` / `OH_PERMISSION_MODE` | 当前会话上下文 |
|
|
335
|
+
| `OH_COST` / `OH_TOKENS` | 累计费用与 token 数 |
|
|
336
|
+
| `OH_FILE_PATH` | 变更的文件路径(仅 `fileChanged`) |
|
|
337
|
+
| `OH_NEW_CWD` | 新的工作目录(仅 `cwdChanged`) |
|
|
338
|
+
| `OH_TURN_NUMBER` / `OH_TURN_REASON` | 回合边界上下文(`turnStart` / `turnStop`) |
|
|
306
339
|
|
|
307
|
-
使用 `match` 将钩子限定到特定工具名(例如 `match: Bash` 仅对 Bash
|
|
340
|
+
使用 `match` 将钩子限定到特定工具名(例如 `match: Bash` 仅对 Bash 工具触发)。支持子串、glob(如 `Cron*`)和 `/regex/flags` 三种匹配方式。
|
|
308
341
|
|
|
309
|
-
|
|
342
|
+
将 `command` 钩子设置 `jsonIO: true` 即可启用结构化 JSON I/O —— 框架在 stdin 上发送 `{event, ...context}`,并从 stdout 读取 `{decision, reason, hookSpecificOutput}`。HTTP 钩子接受同样的响应格式。完整参考见 [docs/hooks.md](docs/hooks.md)。
|
|
310
343
|
|
|
311
344
|
## 电子宠物 Cybergotchi
|
|
312
345
|
|
package/dist/commands/index.d.ts
CHANGED
|
@@ -26,4 +26,27 @@ export declare function getCommandEntries(): Array<{
|
|
|
26
26
|
name: string;
|
|
27
27
|
description: string;
|
|
28
28
|
}>;
|
|
29
|
+
/**
|
|
30
|
+
* Register MCP-server prompts as `/server:prompt` slash commands. Called from
|
|
31
|
+
* main.tsx after `loadMcpTools()` + `loadMcpPrompts()` so the connections are
|
|
32
|
+
* warm. Each handler invokes the prompt's `render()` and returns the result
|
|
33
|
+
* as a `prependToPrompt` so the next user prompt carries it as context.
|
|
34
|
+
*
|
|
35
|
+
* Argument syntax: `/server:prompt key=value key2=value2 ...`. Quoted values
|
|
36
|
+
* (`key="value with spaces"`) are supported. Args declared as `required` on
|
|
37
|
+
* the prompt template that aren't supplied surface as a usage error.
|
|
38
|
+
*
|
|
39
|
+
* Re-registering replaces any prior MCP prompt commands — safe to call again
|
|
40
|
+
* after `/reload-plugins` triggers a re-discover.
|
|
41
|
+
*/
|
|
42
|
+
import type { McpPromptHandle } from "../mcp/loader.js";
|
|
43
|
+
export declare function registerMcpPromptCommands(prompts: readonly McpPromptHandle[]): void;
|
|
44
|
+
/**
|
|
45
|
+
* Parse `key=value key2="value with spaces"` style args into a map. Bare
|
|
46
|
+
* tokens (no `=`) are dropped — MCP prompt arguments are always named.
|
|
47
|
+
* Exposed for tests.
|
|
48
|
+
*
|
|
49
|
+
* @internal
|
|
50
|
+
*/
|
|
51
|
+
export declare function parseMcpPromptArgs(raw: string): Record<string, string>;
|
|
29
52
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/commands/index.js
CHANGED
|
@@ -67,4 +67,68 @@ export function getCommandNames() {
|
|
|
67
67
|
export function getCommandEntries() {
|
|
68
68
|
return [...commands.entries()].map(([name, { description }]) => ({ name, description }));
|
|
69
69
|
}
|
|
70
|
+
let mcpPromptKeys = [];
|
|
71
|
+
export function registerMcpPromptCommands(prompts) {
|
|
72
|
+
for (const key of mcpPromptKeys)
|
|
73
|
+
commands.delete(key);
|
|
74
|
+
mcpPromptKeys = [];
|
|
75
|
+
for (const handle of prompts) {
|
|
76
|
+
const key = handle.qualifiedName.toLowerCase();
|
|
77
|
+
const required = (handle.arguments ?? []).filter((a) => a.required).map((a) => a.name);
|
|
78
|
+
const optional = (handle.arguments ?? []).filter((a) => !a.required).map((a) => a.name);
|
|
79
|
+
const usageBits = [...required.map((n) => `${n}=<value>`), ...optional.map((n) => `[${n}=<value>]`)].join(" ");
|
|
80
|
+
commands.set(key, {
|
|
81
|
+
description: handle.description,
|
|
82
|
+
handler: async (args) => {
|
|
83
|
+
const parsed = parseMcpPromptArgs(args);
|
|
84
|
+
const missing = required.filter((n) => !(n in parsed));
|
|
85
|
+
if (missing.length > 0) {
|
|
86
|
+
return {
|
|
87
|
+
output: `/${handle.qualifiedName}: missing required argument(s): ${missing.join(", ")}\nUsage: /${handle.qualifiedName}${usageBits ? ` ${usageBits}` : ""}`,
|
|
88
|
+
handled: true,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const rendered = await handle.render(parsed);
|
|
93
|
+
if (!rendered.trim()) {
|
|
94
|
+
return { output: `/${handle.qualifiedName} returned an empty prompt.`, handled: true };
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
output: `[mcp-prompt] ${handle.qualifiedName}`,
|
|
98
|
+
handled: false,
|
|
99
|
+
prependToPrompt: rendered,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
return {
|
|
104
|
+
output: `/${handle.qualifiedName} failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
105
|
+
handled: true,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
mcpPromptKeys.push(key);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Parse `key=value key2="value with spaces"` style args into a map. Bare
|
|
115
|
+
* tokens (no `=`) are dropped — MCP prompt arguments are always named.
|
|
116
|
+
* Exposed for tests.
|
|
117
|
+
*
|
|
118
|
+
* @internal
|
|
119
|
+
*/
|
|
120
|
+
export function parseMcpPromptArgs(raw) {
|
|
121
|
+
const out = {};
|
|
122
|
+
if (!raw.trim())
|
|
123
|
+
return out;
|
|
124
|
+
// Match key=value or key="value with spaces" or key='value'
|
|
125
|
+
const re = /(\w[\w.-]*)\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))/g;
|
|
126
|
+
let m;
|
|
127
|
+
while ((m = re.exec(raw)) !== null) {
|
|
128
|
+
const key = m[1];
|
|
129
|
+
const value = m[2] ?? m[3] ?? m[4] ?? "";
|
|
130
|
+
out[key] = value;
|
|
131
|
+
}
|
|
132
|
+
return out;
|
|
133
|
+
}
|
|
70
134
|
//# sourceMappingURL=index.js.map
|
package/dist/commands/info.js
CHANGED
|
@@ -5,12 +5,15 @@ import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from
|
|
|
5
5
|
import { homedir } from "node:os";
|
|
6
6
|
import { join } from "node:path";
|
|
7
7
|
import { gitBranch, isGitRepo, isInMergeOrRebase } from "../git/index.js";
|
|
8
|
-
import { readOhConfig } from "../harness/config.js";
|
|
8
|
+
import { invalidateConfigCache, readOhConfig } from "../harness/config.js";
|
|
9
9
|
import { estimateMessageTokens } from "../harness/context-warning.js";
|
|
10
10
|
import { getContextWindow } from "../harness/cost.js";
|
|
11
|
-
import { getHooks } from "../harness/hooks.js";
|
|
11
|
+
import { getHooks, invalidateHookCache } from "../harness/hooks.js";
|
|
12
|
+
import { discoverPlugins, discoverSkills } from "../harness/plugins.js";
|
|
13
|
+
import { invalidateSandboxCache } from "../harness/sandbox.js";
|
|
14
|
+
import { invalidateVerificationCache } from "../harness/verification.js";
|
|
12
15
|
import { normalizeMcpConfig } from "../mcp/config-normalize.js";
|
|
13
|
-
import { connectedMcpServers } from "../mcp/loader.js";
|
|
16
|
+
import { connectedMcpServers, disconnectMcpClients, loadMcpTools } from "../mcp/loader.js";
|
|
14
17
|
import { getAuthStatus } from "../mcp/oauth.js";
|
|
15
18
|
import { getRouteSelection } from "../providers/router.js";
|
|
16
19
|
import { formatHooksReport } from "./hooks-report.js";
|
|
@@ -721,6 +724,46 @@ export function registerInfoCommands(register, getCommandMap) {
|
|
|
721
724
|
}
|
|
722
725
|
return { output: lines.join("\n"), handled: true };
|
|
723
726
|
});
|
|
727
|
+
register("reload-plugins", "Hot-reload plugins, skills, hooks, MCP servers and config without restarting the session.", async () => {
|
|
728
|
+
// Invalidate every cached source — config, hooks, sandbox, verification.
|
|
729
|
+
// Skills + plugins aren't cached (each discoverSkills/discoverPlugins call
|
|
730
|
+
// reads fresh) but we still re-run them for the report so the user sees
|
|
731
|
+
// a count consistent with the new on-disk state.
|
|
732
|
+
invalidateConfigCache();
|
|
733
|
+
invalidateHookCache();
|
|
734
|
+
invalidateSandboxCache();
|
|
735
|
+
invalidateVerificationCache();
|
|
736
|
+
// Tear down + reconnect MCP servers (the live connections aren't
|
|
737
|
+
// cache-driven; they're long-lived sockets that need an explicit
|
|
738
|
+
// disconnect/reconnect). Failures don't block the reload — partial
|
|
739
|
+
// success is more useful than nothing.
|
|
740
|
+
disconnectMcpClients();
|
|
741
|
+
let mcpTools = 0;
|
|
742
|
+
let mcpError = null;
|
|
743
|
+
try {
|
|
744
|
+
const tools = await loadMcpTools();
|
|
745
|
+
mcpTools = tools.length;
|
|
746
|
+
}
|
|
747
|
+
catch (err) {
|
|
748
|
+
mcpError = err instanceof Error ? err.message : String(err);
|
|
749
|
+
}
|
|
750
|
+
const skillsCount = discoverSkills().length;
|
|
751
|
+
const pluginsCount = discoverPlugins().length;
|
|
752
|
+
const hookEvents = Object.keys(getHooks() ?? {}).length;
|
|
753
|
+
const mcpServers = connectedMcpServers().length;
|
|
754
|
+
const lines = [
|
|
755
|
+
"Hot reload complete:",
|
|
756
|
+
" - config + hooks + sandbox + verification: caches invalidated",
|
|
757
|
+
` - hook events configured: ${hookEvents}`,
|
|
758
|
+
` - MCP servers connected: ${mcpServers}${mcpError ? ` (error: ${mcpError})` : ""}`,
|
|
759
|
+
` - MCP tools loaded: ${mcpTools}`,
|
|
760
|
+
` - skills discovered: ${skillsCount}`,
|
|
761
|
+
` - plugins discovered: ${pluginsCount}`,
|
|
762
|
+
"",
|
|
763
|
+
"Note: in-flight tool registries (held by the agent loop) refresh on the next prompt.",
|
|
764
|
+
];
|
|
765
|
+
return { output: lines.join("\n"), handled: true };
|
|
766
|
+
});
|
|
724
767
|
register("benchmark", "Run SWE-bench benchmark suite", (args) => {
|
|
725
768
|
const task = args.trim();
|
|
726
769
|
if (!task) {
|
package/dist/harness/config.d.ts
CHANGED
|
@@ -66,6 +66,18 @@ export type HooksConfig = {
|
|
|
66
66
|
turnStart?: HookDef[];
|
|
67
67
|
/** Fires at the end of each top-level agent turn (after the model either completes or errors). Matches Claude Code's Stop hook. */
|
|
68
68
|
turnStop?: HookDef[];
|
|
69
|
+
/** Fires after a slash command expands into a model prompt (`prependToPrompt`), between expansion and userPromptSubmit. Useful for audit trails. */
|
|
70
|
+
userPromptExpansion?: HookDef[];
|
|
71
|
+
/** Fires after a turn's full set of tool calls have all resolved, before the next model call. Sees the batch as a whole; postToolUse fires per-tool. */
|
|
72
|
+
postToolBatch?: HookDef[];
|
|
73
|
+
/** Fires when a tool call is denied (auto-mode policy block, hook-driven deny, headless fail-closed, or user "no"). Symmetric to permissionRequest. */
|
|
74
|
+
permissionDenied?: HookDef[];
|
|
75
|
+
/** Fires when a TaskCreate tool call has just persisted a new task. */
|
|
76
|
+
taskCreated?: HookDef[];
|
|
77
|
+
/** Fires when a TaskUpdate tool call transitions a task to status "completed". */
|
|
78
|
+
taskCompleted?: HookDef[];
|
|
79
|
+
/** Fires once per system-prompt build after CLAUDE.md / global-rules / project RULES.md / user profile have been concatenated. Useful for audit trails. */
|
|
80
|
+
instructionsLoaded?: HookDef[];
|
|
69
81
|
};
|
|
70
82
|
export type ToolPermissionRule = {
|
|
71
83
|
tool: string;
|
package/dist/harness/hooks.d.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* - prompt: LLM yes/no check via provider.complete()
|
|
11
11
|
*/
|
|
12
12
|
import type { HookDef, HooksConfig } from "./config.js";
|
|
13
|
-
export type HookEvent = "sessionStart" | "sessionEnd" | "preToolUse" | "postToolUse" | "postToolUseFailure" | "userPromptSubmit" | "permissionRequest" | "fileChanged" | "cwdChanged" | "subagentStart" | "subagentStop" | "preCompact" | "postCompact" | "configChange" | "notification" | "turnStart" | "turnStop";
|
|
13
|
+
export type HookEvent = "sessionStart" | "sessionEnd" | "preToolUse" | "postToolUse" | "postToolUseFailure" | "postToolBatch" | "userPromptSubmit" | "userPromptExpansion" | "permissionRequest" | "permissionDenied" | "fileChanged" | "cwdChanged" | "subagentStart" | "subagentStop" | "preCompact" | "postCompact" | "configChange" | "notification" | "turnStart" | "turnStop" | "taskCreated" | "taskCompleted" | "instructionsLoaded";
|
|
14
14
|
export type HookContext = {
|
|
15
15
|
toolName?: string;
|
|
16
16
|
toolArgs?: string;
|
|
@@ -42,6 +42,28 @@ export type HookContext = {
|
|
|
42
42
|
turnNumber?: string;
|
|
43
43
|
/** For turnStop: reason the turn ended ("completed", "max_turns", "error", "interrupted") */
|
|
44
44
|
turnReason?: string;
|
|
45
|
+
/** For userPromptExpansion: the slash command that triggered the expansion (e.g. "/plan") */
|
|
46
|
+
slashCommand?: string;
|
|
47
|
+
/** For userPromptExpansion: the original user input before expansion */
|
|
48
|
+
originalInput?: string;
|
|
49
|
+
/** For postToolBatch: comma-separated list of tool names in the batch */
|
|
50
|
+
batchTools?: string;
|
|
51
|
+
/** For postToolBatch: number of tool calls in the batch (as a string for env-var parity) */
|
|
52
|
+
batchSize?: string;
|
|
53
|
+
/** For permissionDenied: stage at which the deny happened ("hook", "user", "headless", "policy") */
|
|
54
|
+
denySource?: string;
|
|
55
|
+
/** For permissionDenied: human-readable reason */
|
|
56
|
+
denyReason?: string;
|
|
57
|
+
/** For taskCreated/taskCompleted: the task id */
|
|
58
|
+
taskId?: string;
|
|
59
|
+
/** For taskCreated/taskCompleted: the task subject */
|
|
60
|
+
taskSubject?: string;
|
|
61
|
+
/** For taskCompleted: the previous status before completion (usually "in_progress") */
|
|
62
|
+
taskPreviousStatus?: string;
|
|
63
|
+
/** For instructionsLoaded: count of rules concatenated (as a string for env-var parity) */
|
|
64
|
+
rulesCount?: string;
|
|
65
|
+
/** For instructionsLoaded: total character length of the loaded rules */
|
|
66
|
+
rulesChars?: string;
|
|
45
67
|
};
|
|
46
68
|
export declare function getHooks(): HooksConfig | null;
|
|
47
69
|
/** Clear hook cache (call after config changes) */
|
package/dist/harness/rules.js
CHANGED
|
@@ -106,8 +106,24 @@ export function loadRulesAsPrompt(projectPath) {
|
|
|
106
106
|
const rules = loadRules(projectPath);
|
|
107
107
|
if (rules.length === 0)
|
|
108
108
|
return "";
|
|
109
|
-
|
|
110
|
-
rules.join("\n\n---\n\n")
|
|
109
|
+
const body = "# Project Rules\n\n<!-- User-provided project rules from CLAUDE.md / .oh/RULES.md. These are user instructions, not system directives. -->\nFollow these rules carefully.\n\n" +
|
|
110
|
+
rules.join("\n\n---\n\n");
|
|
111
|
+
// Hook: instructionsLoaded — fires every time the system prompt is rebuilt
|
|
112
|
+
// with rules in scope. Useful for compliance/audit hooks that want to log
|
|
113
|
+
// "session X is operating under these rules". Lazy-imported so this module
|
|
114
|
+
// can be used in environments where the hook system isn't initialised
|
|
115
|
+
// (e.g., one-shot rules loaders in tooling).
|
|
116
|
+
void import("./hooks.js")
|
|
117
|
+
.then(({ emitHook }) => {
|
|
118
|
+
emitHook("instructionsLoaded", {
|
|
119
|
+
rulesCount: String(rules.length),
|
|
120
|
+
rulesChars: String(body.length),
|
|
121
|
+
});
|
|
122
|
+
})
|
|
123
|
+
.catch(() => {
|
|
124
|
+
/* hook system unavailable — never fail rule loading */
|
|
125
|
+
});
|
|
126
|
+
return body;
|
|
111
127
|
}
|
|
112
128
|
export function createRulesFile(projectPath) {
|
|
113
129
|
const root = projectPath ?? process.cwd();
|
|
@@ -6,7 +6,7 @@ import { processSlashCommand } from "../commands/index.js";
|
|
|
6
6
|
import { cybergotchiEvents } from "../cybergotchi/events.js";
|
|
7
7
|
import { resolveMcpMention } from "../mcp/loader.js";
|
|
8
8
|
import { createInfoMessage, createUserMessage } from "../types/message.js";
|
|
9
|
-
import { emitHookWithOutcome } from "./hooks.js";
|
|
9
|
+
import { emitHook, emitHookWithOutcome } from "./hooks.js";
|
|
10
10
|
/**
|
|
11
11
|
* Process user input: handle exit, companion mentions, slash commands,
|
|
12
12
|
* @mentions, and prepare the prompt for the LLM.
|
|
@@ -80,6 +80,19 @@ export async function handleUserInput(input, ctx) {
|
|
|
80
80
|
if (result.prependToPrompt) {
|
|
81
81
|
messages = [...messages, createUserMessage(input)];
|
|
82
82
|
const prependPrompt = result.prependToPrompt;
|
|
83
|
+
// Slash command produced an expanded prompt — fire userPromptExpansion
|
|
84
|
+
// before userPromptSubmit so audit hooks can see the (input → expanded)
|
|
85
|
+
// boundary that's otherwise hidden from observers.
|
|
86
|
+
const slashCommand = trimmed.split(/\s/)[0] ?? trimmed;
|
|
87
|
+
emitHook("userPromptExpansion", {
|
|
88
|
+
slashCommand,
|
|
89
|
+
originalInput: input.slice(0, 1000),
|
|
90
|
+
prompt: prependPrompt.slice(0, 1000),
|
|
91
|
+
sessionId: ctx.sessionId,
|
|
92
|
+
model: ctx.currentModel,
|
|
93
|
+
provider: ctx.providerName,
|
|
94
|
+
permissionMode: ctx.permissionMode,
|
|
95
|
+
});
|
|
83
96
|
const prependOutcome = await emitHookWithOutcome("userPromptSubmit", {
|
|
84
97
|
prompt: prependPrompt,
|
|
85
98
|
sessionId: ctx.sessionId,
|
package/dist/main.js
CHANGED
|
@@ -23,10 +23,11 @@ import { detectProject, projectContextToPrompt } from "./harness/onboarding.js";
|
|
|
23
23
|
import { discoverSkills, skillsToPrompt } from "./harness/plugins.js";
|
|
24
24
|
import { createRulesFile, loadRules, loadRulesAsPrompt } from "./harness/rules.js";
|
|
25
25
|
import { listSessions } from "./harness/session.js";
|
|
26
|
-
import { connectedMcpServers, disconnectMcpClients, getMcpInstructions, loadMcpTools } from "./mcp/loader.js";
|
|
26
|
+
import { connectedMcpServers, disconnectMcpClients, getMcpInstructions, loadMcpPrompts, loadMcpTools, } from "./mcp/loader.js";
|
|
27
27
|
import { loadOutputStyle } from "./outputStyles/index.js";
|
|
28
28
|
import { getAllTools } from "./tools.js";
|
|
29
29
|
import { validateAgainstJsonSchema } from "./utils/json-schema.js";
|
|
30
|
+
import { parseMaxBudgetUsd } from "./utils/parse-budget.js";
|
|
30
31
|
const _require = createRequire(import.meta.url);
|
|
31
32
|
const VERSION = _require("../package.json").version;
|
|
32
33
|
const BANNER = ` ___
|
|
@@ -74,6 +75,20 @@ You have access to tools for reading, writing, and searching files, running shel
|
|
|
74
75
|
- When referencing code, include file_path:line_number.
|
|
75
76
|
- Do not restate what the user said. Do not add trailing summaries unless asked.
|
|
76
77
|
- Keep responses short and direct. If you can say it in one sentence, don't use three.`;
|
|
78
|
+
/**
|
|
79
|
+
* Parse the `--max-budget-usd` CLI argument into a positive USD amount, or
|
|
80
|
+
* exit 2 with an error message. The pure parser lives in
|
|
81
|
+
* `src/utils/parse-budget.ts` so it can be unit-tested without spawning the
|
|
82
|
+
* CLI; this thin wrapper handles the exit-on-failure side effect.
|
|
83
|
+
*/
|
|
84
|
+
function parseMaxBudgetUsdOrExit(raw) {
|
|
85
|
+
const result = parseMaxBudgetUsd(raw);
|
|
86
|
+
if (!result.ok) {
|
|
87
|
+
process.stderr.write(`Error: ${result.message}\n`);
|
|
88
|
+
process.exit(2);
|
|
89
|
+
}
|
|
90
|
+
return result.value;
|
|
91
|
+
}
|
|
77
92
|
function buildSystemPrompt(model) {
|
|
78
93
|
const cfg = readOhConfig();
|
|
79
94
|
// Output-style preface (first — sets personality for everything that follows).
|
|
@@ -136,6 +151,7 @@ program
|
|
|
136
151
|
.option("--disallowed-tools <tools>", "Comma-separated list of disallowed tools")
|
|
137
152
|
.option("--resume <id>", "Resume a saved session (replays its message history before this prompt)")
|
|
138
153
|
.option("--setting-sources <sources>", "Comma-separated list of setting sources to merge (e.g. 'user,project,local'). Mirrors Claude Code's setting_sources.")
|
|
154
|
+
.option("--max-budget-usd <amount>", "Hard cap on session cost in USD. The agent halts with reason 'budget_exceeded' once totalCost reaches this amount. Mirrors Claude Code's --max-budget-usd.")
|
|
139
155
|
.action(async (promptArg, opts) => {
|
|
140
156
|
// Read from stdin if prompt is "-" or omitted and stdin is not a TTY
|
|
141
157
|
let prompt;
|
|
@@ -201,6 +217,7 @@ program
|
|
|
201
217
|
permissionMode,
|
|
202
218
|
maxTurns: parseInt(opts.maxTurns, 10),
|
|
203
219
|
model,
|
|
220
|
+
...(opts.maxBudgetUsd !== undefined ? { maxCost: parseMaxBudgetUsdOrExit(opts.maxBudgetUsd) } : {}),
|
|
204
221
|
};
|
|
205
222
|
const outputFormat = opts.json ? "json" : (opts.outputFormat ?? "text");
|
|
206
223
|
let fullOutput = "";
|
|
@@ -210,26 +227,36 @@ program
|
|
|
210
227
|
// history into the conversation before the new prompt. If the session can't
|
|
211
228
|
// be loaded (missing file, malformed JSON), fail early with a clear error
|
|
212
229
|
// rather than silently starting fresh.
|
|
230
|
+
//
|
|
231
|
+
// When --resume is NOT passed, mint a fresh session record so SDK callers
|
|
232
|
+
// can capture its id from the session_start event and pass it back as
|
|
233
|
+
// --resume <id> on a later run. Without this, every fresh `oh run` was
|
|
234
|
+
// a programmatic dead-end for resumption (issue #60).
|
|
235
|
+
const { createSession, loadSession, saveSession } = await import("./harness/session.js");
|
|
213
236
|
let priorMessages;
|
|
214
237
|
let sessionId;
|
|
238
|
+
let sessionRecord;
|
|
215
239
|
if (opts.resume) {
|
|
216
|
-
const { loadSession } = await import("./harness/session.js");
|
|
217
240
|
try {
|
|
218
|
-
|
|
219
|
-
priorMessages =
|
|
220
|
-
sessionId =
|
|
241
|
+
sessionRecord = loadSession(opts.resume);
|
|
242
|
+
priorMessages = sessionRecord.messages;
|
|
243
|
+
sessionId = sessionRecord.id;
|
|
221
244
|
}
|
|
222
245
|
catch {
|
|
223
246
|
process.stderr.write(`Error: could not load session '${opts.resume}'\n`);
|
|
224
247
|
process.exit(1);
|
|
225
248
|
}
|
|
226
249
|
}
|
|
250
|
+
else {
|
|
251
|
+
sessionRecord = createSession(provider.name, model);
|
|
252
|
+
sessionId = sessionRecord.id;
|
|
253
|
+
saveSession(sessionRecord);
|
|
254
|
+
}
|
|
227
255
|
if (outputFormat === "stream-json") {
|
|
228
256
|
// Emit a session_start event so SDK callers can capture the id for
|
|
229
|
-
// later resume (fires once, before turnStart).
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
}
|
|
257
|
+
// later resume (fires once, before turnStart). Always emitted now —
|
|
258
|
+
// fresh runs mint a sessionId above.
|
|
259
|
+
console.log(JSON.stringify({ type: "session_start", sessionId }));
|
|
233
260
|
setHookDecisionObserver((n) => {
|
|
234
261
|
console.log(JSON.stringify({
|
|
235
262
|
type: "hook_decision",
|
|
@@ -320,6 +347,22 @@ program
|
|
|
320
347
|
else if (outputFormat === "text") {
|
|
321
348
|
process.stdout.write("\n");
|
|
322
349
|
}
|
|
350
|
+
// Persist this run's contribution so a later --resume <sessionId> finds
|
|
351
|
+
// the user/assistant pair. Tool details are intentionally elided —
|
|
352
|
+
// they're per-tool ephemerals; the assistant's final text is what
|
|
353
|
+
// matters for context resumption. Mirrors the REPL's save-on-exit pattern
|
|
354
|
+
// (src/components/REPL.tsx:120) but at one-shot scope.
|
|
355
|
+
try {
|
|
356
|
+
const { createUserMessage, createAssistantMessage } = await import("./types/message.js");
|
|
357
|
+
const newMessages = [...(priorMessages ?? []), createUserMessage(prompt)];
|
|
358
|
+
if (fullOutput)
|
|
359
|
+
newMessages.push(createAssistantMessage(fullOutput));
|
|
360
|
+
sessionRecord.messages = newMessages;
|
|
361
|
+
saveSession(sessionRecord);
|
|
362
|
+
}
|
|
363
|
+
catch {
|
|
364
|
+
/* persistence is best-effort — never fail the user's run on a save error */
|
|
365
|
+
}
|
|
323
366
|
});
|
|
324
367
|
// ── `oh session`: long-lived stateful session for the Python SDK ──
|
|
325
368
|
program
|
|
@@ -335,6 +378,7 @@ program
|
|
|
335
378
|
.option("--system-prompt <prompt>", "Override the system prompt")
|
|
336
379
|
.option("--resume <id>", "Resume a saved session (seeds the conversation with its prior message history)")
|
|
337
380
|
.option("--setting-sources <sources>", "Comma-separated list of setting sources to merge (mirrors Claude Code's setting_sources).")
|
|
381
|
+
.option("--max-budget-usd <amount>", "Hard cap on session cost in USD. Each prompt's cost accumulates; the agent halts with reason 'budget_exceeded' once totalCost reaches this amount.")
|
|
338
382
|
.action(async (opts) => {
|
|
339
383
|
const settingSources = parseSettingSources(opts.settingSources);
|
|
340
384
|
const savedConfig = readOhConfig(undefined, settingSources);
|
|
@@ -368,23 +412,32 @@ program
|
|
|
368
412
|
permissionMode,
|
|
369
413
|
maxTurns: parseInt(opts.maxTurns, 10),
|
|
370
414
|
model,
|
|
415
|
+
...(opts.maxBudgetUsd !== undefined ? { maxCost: parseMaxBudgetUsdOrExit(opts.maxBudgetUsd) } : {}),
|
|
371
416
|
};
|
|
372
417
|
// Conversation history, shared across all prompts for this process.
|
|
373
|
-
// Seeded from a prior session when --resume <id> is passed
|
|
418
|
+
// Seeded from a prior session when --resume <id> is passed; otherwise a
|
|
419
|
+
// fresh session is minted so the SDK can capture the id from the `ready`
|
|
420
|
+
// event for later resume (issue #60).
|
|
374
421
|
const conversation = [];
|
|
422
|
+
const { createSession, loadSession, saveSession } = await import("./harness/session.js");
|
|
375
423
|
let sessionId;
|
|
424
|
+
let sessionRecord;
|
|
376
425
|
if (opts.resume) {
|
|
377
|
-
const { loadSession } = await import("./harness/session.js");
|
|
378
426
|
try {
|
|
379
|
-
|
|
380
|
-
conversation.push(...
|
|
381
|
-
sessionId =
|
|
427
|
+
sessionRecord = loadSession(opts.resume);
|
|
428
|
+
conversation.push(...sessionRecord.messages);
|
|
429
|
+
sessionId = sessionRecord.id;
|
|
382
430
|
}
|
|
383
431
|
catch {
|
|
384
432
|
console.log(JSON.stringify({ type: "error", message: `could not load session '${opts.resume}'` }));
|
|
385
433
|
return;
|
|
386
434
|
}
|
|
387
435
|
}
|
|
436
|
+
else {
|
|
437
|
+
sessionRecord = createSession(provider.name, model);
|
|
438
|
+
sessionId = sessionRecord.id;
|
|
439
|
+
saveSession(sessionRecord);
|
|
440
|
+
}
|
|
388
441
|
let turnCounter = 0;
|
|
389
442
|
// Will be set to the current prompt id before each turn so hook_decision
|
|
390
443
|
// events can be demultiplexed by the client.
|
|
@@ -494,6 +547,15 @@ program
|
|
|
494
547
|
for (const tr of toolResults) {
|
|
495
548
|
conversation.push(createToolResultMessage({ callId: tr.callId, output: tr.output, isError: tr.isError }));
|
|
496
549
|
}
|
|
550
|
+
// Persist after every completed turn so a later --resume picks up the
|
|
551
|
+
// history. Best-effort — a save failure shouldn't break the live session.
|
|
552
|
+
try {
|
|
553
|
+
sessionRecord.messages = conversation.slice();
|
|
554
|
+
saveSession(sessionRecord);
|
|
555
|
+
}
|
|
556
|
+
catch {
|
|
557
|
+
/* save errors don't propagate to the client */
|
|
558
|
+
}
|
|
497
559
|
}
|
|
498
560
|
});
|
|
499
561
|
// ── Default command: just run `openharness` to start chatting ──
|
|
@@ -591,6 +653,20 @@ program
|
|
|
591
653
|
if (mcpNames.length > 0) {
|
|
592
654
|
console.log(`[mcp] Connected: ${mcpNames.join(", ")}`);
|
|
593
655
|
}
|
|
656
|
+
// Surface MCP-server prompts (`prompts/list`) as `/server:prompt` slash
|
|
657
|
+
// commands. Errors are swallowed inside loadMcpPrompts — servers that
|
|
658
|
+
// don't implement the prompts capability return [] without throwing.
|
|
659
|
+
try {
|
|
660
|
+
const { registerMcpPromptCommands } = await import("./commands/index.js");
|
|
661
|
+
const prompts = await loadMcpPrompts();
|
|
662
|
+
registerMcpPromptCommands(prompts);
|
|
663
|
+
if (prompts.length > 0) {
|
|
664
|
+
console.log(`[mcp] Prompts: ${prompts.map((p) => `/${p.qualifiedName}`).join(", ")}`);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
catch {
|
|
668
|
+
/* prompt registration is best-effort; never block the REPL */
|
|
669
|
+
}
|
|
594
670
|
const tools = [...getAllTools(), ...mcpTools];
|
|
595
671
|
process.on("exit", () => disconnectMcpClients());
|
|
596
672
|
// Compute working directory and git branch
|
package/dist/mcp/client.d.ts
CHANGED
|
@@ -31,6 +31,29 @@ export declare class McpClient {
|
|
|
31
31
|
description?: string;
|
|
32
32
|
}>>;
|
|
33
33
|
readResource(uri: string): Promise<string>;
|
|
34
|
+
/**
|
|
35
|
+
* List the prompts an MCP server exposes. Returns `[]` for servers that
|
|
36
|
+
* don't implement the `prompts/list` capability — this is a normal case
|
|
37
|
+
* (most non-prompt-aware MCP servers throw a method-not-found error).
|
|
38
|
+
*
|
|
39
|
+
* Each prompt may declare named arguments; surfaced via `arguments`.
|
|
40
|
+
*/
|
|
41
|
+
listPrompts(): Promise<Array<{
|
|
42
|
+
name: string;
|
|
43
|
+
description?: string;
|
|
44
|
+
arguments?: Array<{
|
|
45
|
+
name: string;
|
|
46
|
+
description?: string;
|
|
47
|
+
required?: boolean;
|
|
48
|
+
}>;
|
|
49
|
+
}>>;
|
|
50
|
+
/**
|
|
51
|
+
* Get the rendered text of an MCP prompt. Server-side templates are
|
|
52
|
+
* applied with the supplied arguments. Multiple message turns are
|
|
53
|
+
* concatenated with double-newline separators — same shape OH uses for
|
|
54
|
+
* other prepended prompts.
|
|
55
|
+
*/
|
|
56
|
+
getPrompt(name: string, args?: Record<string, string>): Promise<string>;
|
|
34
57
|
callTool(name: string, args: Record<string, unknown>): Promise<string>;
|
|
35
58
|
disconnect(): void;
|
|
36
59
|
}
|
package/dist/mcp/client.js
CHANGED
|
@@ -91,6 +91,43 @@ export class McpClient {
|
|
|
91
91
|
.map((c) => c.text)
|
|
92
92
|
.join("\n");
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* List the prompts an MCP server exposes. Returns `[]` for servers that
|
|
96
|
+
* don't implement the `prompts/list` capability — this is a normal case
|
|
97
|
+
* (most non-prompt-aware MCP servers throw a method-not-found error).
|
|
98
|
+
*
|
|
99
|
+
* Each prompt may declare named arguments; surfaced via `arguments`.
|
|
100
|
+
*/
|
|
101
|
+
async listPrompts() {
|
|
102
|
+
try {
|
|
103
|
+
const res = await this.sdk.listPrompts();
|
|
104
|
+
return (res?.prompts ?? []);
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return [];
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Get the rendered text of an MCP prompt. Server-side templates are
|
|
112
|
+
* applied with the supplied arguments. Multiple message turns are
|
|
113
|
+
* concatenated with double-newline separators — same shape OH uses for
|
|
114
|
+
* other prepended prompts.
|
|
115
|
+
*/
|
|
116
|
+
async getPrompt(name, args = {}) {
|
|
117
|
+
const res = await this.sdk.getPrompt({ name, arguments: args });
|
|
118
|
+
const messages = (res?.messages ?? []);
|
|
119
|
+
const parts = [];
|
|
120
|
+
for (const m of messages) {
|
|
121
|
+
const content = m.content;
|
|
122
|
+
if (typeof content === "string") {
|
|
123
|
+
parts.push(content);
|
|
124
|
+
}
|
|
125
|
+
else if (content && content.type === "text" && typeof content.text === "string") {
|
|
126
|
+
parts.push(content.text);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return parts.join("\n\n");
|
|
130
|
+
}
|
|
94
131
|
async callTool(name, args) {
|
|
95
132
|
// Retry up to 2 times on transport-closed / timeout errors
|
|
96
133
|
let lastErr = null;
|
package/dist/mcp/loader.d.ts
CHANGED
|
@@ -5,6 +5,26 @@ export declare function loadMcpTools(): Promise<Tool[]>;
|
|
|
5
5
|
export declare function disconnectMcpClients(): void;
|
|
6
6
|
/** Names of connected MCP servers */
|
|
7
7
|
export declare function connectedMcpServers(): string[];
|
|
8
|
+
export type McpPromptHandle = {
|
|
9
|
+
/** `<server>:<prompt>` qualified name — the slash command is `/<server>:<prompt>`. */
|
|
10
|
+
qualifiedName: string;
|
|
11
|
+
description: string;
|
|
12
|
+
/** List of named arguments the prompt template expects. */
|
|
13
|
+
arguments?: Array<{
|
|
14
|
+
name: string;
|
|
15
|
+
description?: string;
|
|
16
|
+
required?: boolean;
|
|
17
|
+
}>;
|
|
18
|
+
/** Render the prompt with the supplied named arguments. */
|
|
19
|
+
render(args?: Record<string, string>): Promise<string>;
|
|
20
|
+
};
|
|
21
|
+
/**
|
|
22
|
+
* Enumerate prompts on every already-connected MCP server. Servers that don't
|
|
23
|
+
* implement the `prompts/list` capability return an empty list (handled
|
|
24
|
+
* inside `client.listPrompts`). Call AFTER `loadMcpTools()` so the client
|
|
25
|
+
* connections are warm.
|
|
26
|
+
*/
|
|
27
|
+
export declare function loadMcpPrompts(): Promise<McpPromptHandle[]>;
|
|
8
28
|
/** Get MCP server instructions to inject into system prompt (sandboxed with origin markers) */
|
|
9
29
|
export declare function getMcpInstructions(): string[];
|
|
10
30
|
/** List all available resources across connected MCP servers */
|
package/dist/mcp/loader.js
CHANGED
|
@@ -78,6 +78,33 @@ export function disconnectMcpClients() {
|
|
|
78
78
|
export function connectedMcpServers() {
|
|
79
79
|
return connectedClients.map((c) => c.name);
|
|
80
80
|
}
|
|
81
|
+
/**
|
|
82
|
+
* Enumerate prompts on every already-connected MCP server. Servers that don't
|
|
83
|
+
* implement the `prompts/list` capability return an empty list (handled
|
|
84
|
+
* inside `client.listPrompts`). Call AFTER `loadMcpTools()` so the client
|
|
85
|
+
* connections are warm.
|
|
86
|
+
*/
|
|
87
|
+
export async function loadMcpPrompts() {
|
|
88
|
+
const handles = [];
|
|
89
|
+
for (const client of connectedClients) {
|
|
90
|
+
let prompts;
|
|
91
|
+
try {
|
|
92
|
+
prompts = await client.listPrompts();
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
continue; // Defensive — listPrompts already swallows method-not-found
|
|
96
|
+
}
|
|
97
|
+
for (const p of prompts) {
|
|
98
|
+
handles.push({
|
|
99
|
+
qualifiedName: `${client.name}:${p.name}`,
|
|
100
|
+
description: p.description ?? `MCP prompt from ${client.name}`,
|
|
101
|
+
...(p.arguments ? { arguments: p.arguments } : {}),
|
|
102
|
+
render: (args = {}) => client.getPrompt(p.name, args),
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return handles;
|
|
107
|
+
}
|
|
81
108
|
const MAX_MCP_INSTRUCTION_LENGTH = 2000;
|
|
82
109
|
/** Get MCP server instructions to inject into system prompt (sandboxed with origin markers) */
|
|
83
110
|
export function getMcpInstructions() {
|
|
@@ -9,6 +9,19 @@ export declare class OllamaProvider implements Provider {
|
|
|
9
9
|
private baseUrl;
|
|
10
10
|
private defaultModel;
|
|
11
11
|
constructor(config: ProviderConfig);
|
|
12
|
+
/**
|
|
13
|
+
* Estimate the prompt size and pick a `num_ctx` for Ollama. Without this
|
|
14
|
+
* Ollama defaults to a 2048-token context window — anything bigger gets
|
|
15
|
+
* silently truncated server-side. OH's typical system prompt + tool list
|
|
16
|
+
* already pushes ~4 K, so multi-turn chats lose prior turns and the model
|
|
17
|
+
* appears to "forget" what was just said. See issue #61.
|
|
18
|
+
*
|
|
19
|
+
* Strategy: rough char/4 token estimate, +1 K headroom for the response,
|
|
20
|
+
* then round up to the next power of 2 ≥ 8192. Capped at 32 K to keep KV
|
|
21
|
+
* cache bounded; users with bigger models can override via
|
|
22
|
+
* `OLLAMA_NUM_CTX`.
|
|
23
|
+
*/
|
|
24
|
+
private computeNumCtx;
|
|
12
25
|
private convertMessages;
|
|
13
26
|
private convertTools;
|
|
14
27
|
stream(messages: Message[], systemPrompt: string, tools?: APIToolDef[], model?: string): AsyncGenerator<StreamEvent, void>;
|
package/dist/providers/ollama.js
CHANGED
|
@@ -10,6 +10,45 @@ export class OllamaProvider {
|
|
|
10
10
|
this.baseUrl = (config.baseUrl ?? "http://localhost:11434").replace(/\/$/, "");
|
|
11
11
|
this.defaultModel = config.defaultModel ?? "llama3.1";
|
|
12
12
|
}
|
|
13
|
+
/**
|
|
14
|
+
* Estimate the prompt size and pick a `num_ctx` for Ollama. Without this
|
|
15
|
+
* Ollama defaults to a 2048-token context window — anything bigger gets
|
|
16
|
+
* silently truncated server-side. OH's typical system prompt + tool list
|
|
17
|
+
* already pushes ~4 K, so multi-turn chats lose prior turns and the model
|
|
18
|
+
* appears to "forget" what was just said. See issue #61.
|
|
19
|
+
*
|
|
20
|
+
* Strategy: rough char/4 token estimate, +1 K headroom for the response,
|
|
21
|
+
* then round up to the next power of 2 ≥ 8192. Capped at 32 K to keep KV
|
|
22
|
+
* cache bounded; users with bigger models can override via
|
|
23
|
+
* `OLLAMA_NUM_CTX`.
|
|
24
|
+
*/
|
|
25
|
+
computeNumCtx(messages, systemPrompt, tools) {
|
|
26
|
+
const override = process.env.OLLAMA_NUM_CTX;
|
|
27
|
+
if (override) {
|
|
28
|
+
const parsed = Number(override);
|
|
29
|
+
if (Number.isFinite(parsed) && parsed > 0)
|
|
30
|
+
return Math.floor(parsed);
|
|
31
|
+
}
|
|
32
|
+
const estimate = (s) => Math.ceil(s.length / 4);
|
|
33
|
+
let total = systemPrompt ? estimate(systemPrompt) : 0;
|
|
34
|
+
for (const m of messages) {
|
|
35
|
+
total += estimate(m.content);
|
|
36
|
+
if (m.toolCalls)
|
|
37
|
+
for (const tc of m.toolCalls)
|
|
38
|
+
total += estimate(JSON.stringify(tc.arguments));
|
|
39
|
+
if (m.toolResults)
|
|
40
|
+
for (const tr of m.toolResults)
|
|
41
|
+
total += estimate(tr.output);
|
|
42
|
+
}
|
|
43
|
+
if (tools)
|
|
44
|
+
for (const t of tools)
|
|
45
|
+
total += estimate(JSON.stringify(t));
|
|
46
|
+
const padded = Math.ceil(total * 1.25) + 1024;
|
|
47
|
+
let nc = 8192;
|
|
48
|
+
while (nc < padded && nc < 32768)
|
|
49
|
+
nc *= 2;
|
|
50
|
+
return Math.min(nc, 32768);
|
|
51
|
+
}
|
|
13
52
|
convertMessages(messages, systemPrompt) {
|
|
14
53
|
const converted = [];
|
|
15
54
|
if (systemPrompt) {
|
|
@@ -69,6 +108,7 @@ export class OllamaProvider {
|
|
|
69
108
|
model: m,
|
|
70
109
|
messages: msgs,
|
|
71
110
|
stream: true,
|
|
111
|
+
options: { num_ctx: this.computeNumCtx(messages, systemPrompt, tools) },
|
|
72
112
|
};
|
|
73
113
|
const ollamaTools = this.convertTools(tools);
|
|
74
114
|
if (ollamaTools)
|
|
@@ -219,6 +259,7 @@ export class OllamaProvider {
|
|
|
219
259
|
model: m,
|
|
220
260
|
messages: msgs,
|
|
221
261
|
stream: false,
|
|
262
|
+
options: { num_ctx: this.computeNumCtx(messages, systemPrompt, tools) },
|
|
222
263
|
};
|
|
223
264
|
const ollamaTools = this.convertTools(tools);
|
|
224
265
|
if (ollamaTools)
|
package/dist/query/tools.js
CHANGED
|
@@ -42,11 +42,12 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
|
|
|
42
42
|
// Permission check
|
|
43
43
|
const perm = checkPermission(permissionMode, tool.riskLevel, tool.isReadOnly(parsed.data), tool.name, parsed.data);
|
|
44
44
|
if (!perm.allowed) {
|
|
45
|
-
if (perm.reason === "needs-approval"
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
//
|
|
49
|
-
//
|
|
45
|
+
if (perm.reason === "needs-approval") {
|
|
46
|
+
// Hook: permissionRequest — fires whenever checkPermission says
|
|
47
|
+
// "needs-approval", in both interactive and headless modes. Configured
|
|
48
|
+
// hooks get first say; if they return "ask" or have no decision, we
|
|
49
|
+
// fall through to the interactive prompt when one is available, or
|
|
50
|
+
// fail-closed deny in headless mode (issue #62).
|
|
50
51
|
const hookOutcome = await emitHookWithOutcome("permissionRequest", {
|
|
51
52
|
toolName: tool.name,
|
|
52
53
|
toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
|
|
@@ -54,22 +55,48 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
|
|
|
54
55
|
permissionMode,
|
|
55
56
|
permissionAction: "ask",
|
|
56
57
|
});
|
|
58
|
+
const denyAndEmit = (source, reason, output) => {
|
|
59
|
+
emitHook("permissionDenied", {
|
|
60
|
+
toolName: tool.name,
|
|
61
|
+
toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
|
|
62
|
+
permissionMode,
|
|
63
|
+
denySource: source,
|
|
64
|
+
denyReason: reason,
|
|
65
|
+
});
|
|
66
|
+
return { output, isError: true };
|
|
67
|
+
};
|
|
57
68
|
if (hookOutcome.permissionDecision === "allow") {
|
|
58
|
-
// Hook granted permission —
|
|
69
|
+
// Hook granted permission — proceed to execution.
|
|
59
70
|
}
|
|
60
71
|
else if (hookOutcome.permissionDecision === "deny" || !hookOutcome.allowed) {
|
|
61
72
|
const reason = hookOutcome.reason ? `: ${hookOutcome.reason}` : "";
|
|
62
|
-
return
|
|
73
|
+
return denyAndEmit("hook", hookOutcome.reason ?? "hook denied", `Permission denied by hook${reason}`);
|
|
63
74
|
}
|
|
64
|
-
else {
|
|
65
|
-
// "ask" or no decision →
|
|
75
|
+
else if (askUser) {
|
|
76
|
+
// "ask" or no decision → interactive prompt when available
|
|
77
|
+
const { formatToolArgs } = await import("../utils/tool-summary.js");
|
|
78
|
+
const description = formatToolArgs(tool.name, toolCall.arguments);
|
|
66
79
|
const allowed = await askUser(tool.name, description, tool.riskLevel);
|
|
67
80
|
if (!allowed) {
|
|
68
|
-
return
|
|
81
|
+
return denyAndEmit("user", "user declined", "Permission denied by user.");
|
|
69
82
|
}
|
|
70
83
|
}
|
|
84
|
+
else {
|
|
85
|
+
// Headless mode with no hook decision and no interactive prompt:
|
|
86
|
+
// fail-closed deny. SDK consumers should configure a permissionRequest
|
|
87
|
+
// hook (or use canUseTool) to make per-call decisions.
|
|
88
|
+
return denyAndEmit("headless", "no hook decision and no interactive prompt available", "Permission denied: needs-approval (no interactive prompt available; configure a permissionRequest hook to gate this tool)");
|
|
89
|
+
}
|
|
71
90
|
}
|
|
72
91
|
else {
|
|
92
|
+
// Auto-mode policy block (deny / acceptEdits / etc) — symmetric event.
|
|
93
|
+
emitHook("permissionDenied", {
|
|
94
|
+
toolName: tool.name,
|
|
95
|
+
toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
|
|
96
|
+
permissionMode,
|
|
97
|
+
denySource: "policy",
|
|
98
|
+
denyReason: perm.reason,
|
|
99
|
+
});
|
|
73
100
|
return { output: `Permission denied: ${perm.reason}`, isError: true };
|
|
74
101
|
}
|
|
75
102
|
}
|
|
@@ -188,6 +215,7 @@ export async function* executeToolCalls(toolCalls, tools, context, permissionMod
|
|
|
188
215
|
const onOutputChunk = (callId, chunk) => {
|
|
189
216
|
outputChunks.push({ type: "tool_output_delta", callId, chunk });
|
|
190
217
|
};
|
|
218
|
+
const allToolNames = toolCalls.map((tc) => tc.toolName);
|
|
191
219
|
for (const batch of batches) {
|
|
192
220
|
if (batch.concurrent) {
|
|
193
221
|
const results = await Promise.all(batch.calls.map((tc) => executeSingleTool(tc, tools, { ...context, callId: tc.id, onOutputChunk }, permissionMode, askUser)));
|
|
@@ -210,5 +238,17 @@ export async function* executeToolCalls(toolCalls, tools, context, permissionMod
|
|
|
210
238
|
}
|
|
211
239
|
}
|
|
212
240
|
}
|
|
241
|
+
// Hook: postToolBatch — fires once after the model's full set of tool
|
|
242
|
+
// calls for this turn have all resolved (across however many serial /
|
|
243
|
+
// concurrent batches partitionToolCalls produced), before the next model
|
|
244
|
+
// call. Per-tool postToolUse / postToolUseFailure still fire as before;
|
|
245
|
+
// this is the batch-level boundary for hooks that want to act once per
|
|
246
|
+
// turn instead of once per tool.
|
|
247
|
+
if (toolCalls.length > 0) {
|
|
248
|
+
emitHook("postToolBatch", {
|
|
249
|
+
batchSize: String(toolCalls.length),
|
|
250
|
+
batchTools: allToolNames.slice(0, 50).join(","),
|
|
251
|
+
});
|
|
252
|
+
}
|
|
213
253
|
}
|
|
214
254
|
//# sourceMappingURL=tools.js.map
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs/promises";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import { z } from "zod";
|
|
4
|
+
import { emitHook } from "../../harness/hooks.js";
|
|
4
5
|
const inputSchema = z.object({
|
|
5
6
|
subject: z.string(),
|
|
6
7
|
description: z.string(),
|
|
@@ -42,6 +43,10 @@ export const TaskCreateTool = {
|
|
|
42
43
|
};
|
|
43
44
|
tasks.push(newTask);
|
|
44
45
|
await fs.writeFile(filePath, JSON.stringify(tasks, null, 2), "utf-8");
|
|
46
|
+
emitHook("taskCreated", {
|
|
47
|
+
taskId: String(newTask.id),
|
|
48
|
+
taskSubject: newTask.subject.slice(0, 200),
|
|
49
|
+
});
|
|
45
50
|
return { output: `Task #${newTask.id} created: ${newTask.subject}`, isError: false };
|
|
46
51
|
}
|
|
47
52
|
catch (err) {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs/promises";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import { z } from "zod";
|
|
4
|
+
import { emitHook } from "../../harness/hooks.js";
|
|
4
5
|
const inputSchema = z.object({
|
|
5
6
|
taskId: z.number(),
|
|
6
7
|
status: z.enum(["pending", "in_progress", "completed", "cancelled", "deleted"]).optional(),
|
|
@@ -32,6 +33,7 @@ export const TaskUpdateTool = {
|
|
|
32
33
|
if (!task) {
|
|
33
34
|
return { output: `Error: Task #${input.taskId} not found.`, isError: true };
|
|
34
35
|
}
|
|
36
|
+
const previousStatus = task.status;
|
|
35
37
|
// Handle deletion
|
|
36
38
|
if (input.status === "deleted") {
|
|
37
39
|
const idx = tasks.indexOf(task);
|
|
@@ -69,6 +71,15 @@ export const TaskUpdateTool = {
|
|
|
69
71
|
task.blockedBy = [...new Set([...(task.blockedBy ?? []), ...input.addBlockedBy])];
|
|
70
72
|
}
|
|
71
73
|
await fs.writeFile(filePath, JSON.stringify(tasks, null, 2), "utf-8");
|
|
74
|
+
// Hook: taskCompleted — fires only on the pending|in_progress → completed
|
|
75
|
+
// transition. Re-saving an already-completed task is a no-op for the hook.
|
|
76
|
+
if (input.status === "completed" && previousStatus !== "completed") {
|
|
77
|
+
emitHook("taskCompleted", {
|
|
78
|
+
taskId: String(task.id),
|
|
79
|
+
taskSubject: task.subject.slice(0, 200),
|
|
80
|
+
taskPreviousStatus: previousStatus,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
72
83
|
return { output: `Task #${task.id} updated. Status: ${task.status}`, isError: false };
|
|
73
84
|
}
|
|
74
85
|
catch (err) {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse the `--max-budget-usd` CLI argument into a positive USD amount.
|
|
3
|
+
*
|
|
4
|
+
* Accepts plain decimals (`5`, `0.50`, `2.5`) and an optional leading `$`.
|
|
5
|
+
* Negative or zero values are rejected — a budget of zero would block the
|
|
6
|
+
* very first call before any cost has accumulated.
|
|
7
|
+
*
|
|
8
|
+
* Returns `{ ok: true, value }` on success or `{ ok: false, message }` on
|
|
9
|
+
* invalid input. The CLI wrapper translates failures into a stderr message
|
|
10
|
+
* and exit code 2.
|
|
11
|
+
*/
|
|
12
|
+
export type ParseBudgetResult = {
|
|
13
|
+
ok: true;
|
|
14
|
+
value: number;
|
|
15
|
+
} | {
|
|
16
|
+
ok: false;
|
|
17
|
+
message: string;
|
|
18
|
+
};
|
|
19
|
+
export declare function parseMaxBudgetUsd(raw: string): ParseBudgetResult;
|
|
20
|
+
//# sourceMappingURL=parse-budget.d.ts.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export function parseMaxBudgetUsd(raw) {
|
|
2
|
+
const cleaned = raw.replace(/^\$/, "").trim();
|
|
3
|
+
if (cleaned === "") {
|
|
4
|
+
return { ok: false, message: `--max-budget-usd must be a positive USD amount, got '${raw}'` };
|
|
5
|
+
}
|
|
6
|
+
const n = Number(cleaned);
|
|
7
|
+
if (!Number.isFinite(n) || n <= 0) {
|
|
8
|
+
return { ok: false, message: `--max-budget-usd must be a positive USD amount, got '${raw}'` };
|
|
9
|
+
}
|
|
10
|
+
return { ok: true, value: n };
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=parse-budget.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zhijiewang/openharness",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.20.0",
|
|
4
4
|
"description": "Open-source terminal coding agent. Works with any LLM.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -22,17 +22,23 @@
|
|
|
22
22
|
"README.md",
|
|
23
23
|
"LICENSE"
|
|
24
24
|
],
|
|
25
|
+
"workspaces": [
|
|
26
|
+
"packages/sdk"
|
|
27
|
+
],
|
|
25
28
|
"scripts": {
|
|
26
29
|
"dev": "tsx src/main.tsx",
|
|
27
30
|
"build": "tsc",
|
|
31
|
+
"build:sdk": "npm --workspace @zhijiewang/openharness-sdk run build",
|
|
28
32
|
"prepare": "husky",
|
|
29
33
|
"prepublishOnly": "npm run build",
|
|
30
|
-
"test": "node scripts/test.mjs",
|
|
34
|
+
"test": "node scripts/test.mjs && npm --workspace @zhijiewang/openharness-sdk run test",
|
|
35
|
+
"test:cli": "node scripts/test.mjs",
|
|
36
|
+
"test:sdk": "npm --workspace @zhijiewang/openharness-sdk run test",
|
|
31
37
|
"test:coverage": "node scripts/coverage.mjs",
|
|
32
|
-
"typecheck": "tsc --noEmit",
|
|
33
|
-
"lint": "biome check src/",
|
|
34
|
-
"lint:fix": "biome check --write src/",
|
|
35
|
-
"format": "biome format --write src/",
|
|
38
|
+
"typecheck": "tsc --noEmit && npm --workspace @zhijiewang/openharness-sdk run typecheck",
|
|
39
|
+
"lint": "biome check src/ packages/sdk/src/",
|
|
40
|
+
"lint:fix": "biome check --write src/ packages/sdk/src/",
|
|
41
|
+
"format": "biome format --write src/ packages/sdk/src/",
|
|
36
42
|
"start": "node dist/main.js"
|
|
37
43
|
},
|
|
38
44
|
"dependencies": {
|