npm - @zhijiewang/openharness - Versions diffs - 2.18.0 → 2.19.0 - Mend

@zhijiewang/openharness 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +45 -12
package/README.zh-CN.md +45 -12
package/dist/main.js +75 -13
package/dist/providers/ollama.d.ts +13 -0
package/dist/providers/ollama.js +41 -0
package/dist/query/tools.js +20 -8
package/dist/utils/parse-budget.d.ts +20 -0
package/dist/utils/parse-budget.js +12 -0
package/package.json +12 -6

package/README.md CHANGED Viewed

@@ -32,8 +32,8 @@ AI coding agent in your terminal. Works with any LLM -- free local models or clo
 - [Quick Start](#quick-start)
 - [Why OpenHarness?](#why-openharness)
 - [Terminal UI](#terminal-ui)
-- [Tools (37)](#tools-37)
-- [Slash Commands (33)](#slash-commands-33)
+- [Tools (43)](#tools-43)
+- [Slash Commands](#slash-commands)
 - [Permission Modes](#permission-modes)
 - [Hooks](#hooks)
 - [Checkpoints & Rewind](#checkpoints--rewind)
@@ -61,6 +61,8 @@ That's it. OpenHarness auto-detects Ollama and starts chatting. No API key neede
 **Python SDK:** there's also an official Python SDK for driving `oh` from Python programs (notebooks, batch scripts, ML pipelines). Install with `pip install openharness-sdk` after the npm install (the PyPI distribution is `openharness-sdk` because the unqualified name is taken), then `from openharness import query`. See [`python/README.md`](python/README.md).
+**TypeScript SDK:** drive `oh` from Node.js (VS Code extensions, Electron apps, build scripts) with `@zhijiewang/openharness-sdk` — `npm install @zhijiewang/openharness-sdk`, then `import { query, OpenHarnessClient, tool } from "@zhijiewang/openharness-sdk"`. Mirrors the Python SDK surface (streaming events, stateful sessions, custom tools, permission callback, session resume). See [`packages/sdk/README.md`](packages/sdk/README.md).
 ```bash
 oh init                               # interactive setup wizard (provider + cybergotchi)
 oh                                    # auto-detect local model
@@ -142,12 +144,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XXXX), `{ctx}` (context usage bar). Empty sections are automatically collapsed.
-## Tools (37)
+## Tools (43)
 | Tool | Risk | Description |
 |------|------|-------------|
 | **Core** | | |
 | Bash | high | Execute shell commands with live streaming output (AST safety analysis) |
+| PowerShell | high | Execute PowerShell commands (Windows-native scripting) |
 | Read | low | Read files with line ranges, PDF support |
 | ImageRead | low | Read images/PDFs for multimodal analysis |
 | Write | medium | Create or overwrite files |
@@ -167,6 +170,7 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
 | TaskGet | low | Get task details |
 | TaskStop | low | Stop a running task |
 | TaskOutput | low | Get task output |
+| TodoWrite | low | Manage session task checklist (Claude Code-compatible) |
 | **Agents** | | |
 | Agent | medium | Spawn a sub-agent (with role specialization) |
 | ParallelAgent | medium | Dispatch multiple agents with DAG dependencies |
@@ -176,9 +180,12 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
 | CronCreate | medium | Schedule recurring tasks |
 | CronDelete | medium | Remove scheduled tasks |
 | CronList | low | List all scheduled tasks |
+| ScheduleWakeup | low | Self-pace the next /loop iteration (cache-aware) |
 | **Planning** | | |
 | EnterPlanMode | low | Enter structured planning mode |
 | ExitPlanMode | low | Exit planning mode |
+| **Pipelines** | | |
+| Pipeline | medium | Run a sequence of tasks with output passed between steps |
 | **Code Intelligence** | | |
 | Diagnostics | low | LSP-based code diagnostics |
 | NotebookEdit | medium | Edit Jupyter notebooks |
@@ -186,6 +193,7 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
 | Memory | low | Save/list/search persistent memories |
 | Skill | low | Invoke a skill from .oh/skills/ |
 | ToolSearch | low | Find tools by description |
+| SessionSearch | low | Search prior sessions for relevant context |
 | **MCP** | | |
 | ListMcpResources | low | List resources from connected MCP servers |
 | ReadMcpResource | low | Read a specific MCP resource by URI |
@@ -194,12 +202,13 @@ Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XX
 | ExitWorktree | medium | Remove a git worktree |
 | **Process** | | |
 | KillProcess | high | Stop processes by PID or name |
+| Monitor | medium | Run a background command and stream each output line back to the agent |
 Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` or `--auto` to skip prompts.
-## Slash Commands (33)
+## Slash Commands
-Type these during a chat session. Aliases: `/q` exit, `/h` help, `/c` commit, `/m` model, `/s` status.
+Over 80 commands are registered. The most-used ones are grouped below; see `/help` in-session for the full list. Aliases: `/q` exit, `/h` help, `/c` commit, `/m` model, `/s` status.
 **Session:**
 | Command | Description |
@@ -289,11 +298,29 @@ hooks:
     command: "scripts/cleanup.sh"
 ```
-**Event types:**
-- `sessionStart` — fires once when the session begins
-- `preToolUse` — fires before each tool call; **exit code 1 blocks the tool** and returns an error to the model
-- `postToolUse` — fires after each tool call completes
-- `sessionEnd` — fires when the session ends
+**Event types** (17 total):
+| Event | When it fires | Can block? |
+|-------|---------------|------------|
+| `sessionStart` | Session begins | — |
+| `sessionEnd` | Session ends | — |
+| `turnStart` | Top-level agent turn begins (after user prompt accepted) | — |
+| `turnStop` | Top-level agent turn ends (mirrors Claude Code's `Stop`) | — |
+| `userPromptSubmit` | Before user prompt reaches the LLM | yes — `decision: deny` |
+| `preToolUse` | Before each tool call | yes — exit code 1 / `decision: deny` |
+| `postToolUse` | After successful tool execution | — |
+| `postToolUseFailure` | After tool throws or returns `isError: true` | — |
+| `permissionRequest` | When a tool needs approval (between `preToolUse` and the prompt) | yes — `decision: allow\|deny\|ask` |
+| `fileChanged` | After a tool modifies a file | — |
+| `cwdChanged` | After working directory changes | — |
+| `subagentStart` | A sub-agent is spawned | — |
+| `subagentStop` | A sub-agent completes | — |
+| `preCompact` | Before conversation compaction | — |
+| `postCompact` | After conversation compaction | — |
+| `configChange` | `.oh/config.yaml` is modified during the session | — |
+| `notification` | A notification is dispatched | — |
+Live introspection: run `/hooks` in-session to see exactly which hooks are loaded, grouped by event.
 **Environment variables** available to hook scripts:
@@ -303,10 +330,16 @@ hooks:
 | `OH_TOOL_NAME` | Name of the tool being called (tool events only) |
 | `OH_TOOL_ARGS` | JSON-encoded tool arguments (tool events only) |
 | `OH_TOOL_OUTPUT` | JSON-encoded tool output (`postToolUse` only) |
+| `OH_TOOL_INPUT_JSON` | Full JSON tool input (tool events only) |
+| `OH_SESSION_ID` / `OH_MODEL` / `OH_PROVIDER` / `OH_PERMISSION_MODE` | Current session context |
+| `OH_COST` / `OH_TOKENS` | Running cost and token totals |
+| `OH_FILE_PATH` | Path that changed (`fileChanged` only) |
+| `OH_NEW_CWD` | New working directory (`cwdChanged` only) |
+| `OH_TURN_NUMBER` / `OH_TURN_REASON` | Turn boundary context (`turnStart` / `turnStop`) |
-Use `match` to restrict a hook to a specific tool name (e.g., `match: Bash` only triggers for the Bash tool).
+Use `match` to restrict a hook to a specific tool name (e.g., `match: Bash` only triggers for the Bash tool). Substring, glob (`Cron*`), and `/regex/flags` patterns are all supported.
-See [docs/hooks.md](docs/hooks.md) for the full event reference including the new `userPromptSubmit`, `permissionRequest`, and `postToolUseFailure` events.
+Set `jsonIO: true` on a `command` hook to opt into structured JSON I/O — the harness sends `{event, ...context}` on stdin and reads `{decision, reason, hookSpecificOutput}` from stdout. HTTP hooks accept the same response shape. See [docs/hooks.md](docs/hooks.md) for the full reference.
 ## Cybergotchi

package/README.zh-CN.md CHANGED Viewed

@@ -32,8 +32,8 @@
 - [快速开始](#快速开始)
 - [为什么选择 OpenHarness？](#为什么选择-openharness)
 - [终端界面](#终端界面)
-- [工具（37 个）](#工具37-个)
-- [斜杠命令（33 个）](#斜杠命令33-个)
+- [工具（43 个）](#工具43-个)
+- [斜杠命令](#斜杠命令)
 - [权限模式](#权限模式)
 - [钩子](#钩子)
 - [检查点与回滚](#检查点与回滚)
@@ -61,6 +61,8 @@ oh
 **Python SDK：** 我们还提供了官方的 Python SDK，可以在 Python 程序中驱动 `oh`（笔记本、批处理脚本、ML 流水线）。在 npm 安装之后，使用 `pip install openharness-sdk` 安装（PyPI 分发名为 `openharness-sdk`，因为未加后缀的名称已被占用），然后 `from openharness import query`。详见 [`python/README.md`](python/README.md)。
+**TypeScript SDK：** 同样有官方的 TypeScript SDK，可以在 Node.js（VS Code 插件、Electron 应用、构建脚本等）中驱动 `oh`：使用 `@zhijiewang/openharness-sdk` —— 通过 `npm install @zhijiewang/openharness-sdk` 安装，然后 `import { query, OpenHarnessClient, tool } from "@zhijiewang/openharness-sdk"`。功能与 Python SDK 对等（流式事件、有状态会话、自定义工具、权限回调、会话恢复）。详见 [`packages/sdk/README.md`](packages/sdk/README.md)。
 ```bash
 oh init                               # 交互式安装向导（模型提供商 + 电子宠物）
 oh                                    # 自动检测本地模型
@@ -142,12 +144,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 可用变量：`{model}`、`{tokens}`（输入↑ 输出↓）、`{cost}`（$X.XXXX）、`{ctx}`（上下文占用条）。空片段会自动折叠。
-## 工具（37 个）
+## 工具（43 个）
 | 工具 | 风险 | 描述 |
 |------|------|-------------|
 | **核心** | | |
 | Bash | 高 | 执行 shell 命令并实时流式输出（AST 安全分析） |
+| PowerShell | 高 | 执行 PowerShell 命令（Windows 原生脚本） |
 | Read | 低 | 按行范围读取文件，支持 PDF |
 | ImageRead | 低 | 读取图片/PDF 以进行多模态分析 |
 | Write | 中 | 创建或覆盖文件 |
@@ -167,6 +170,7 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 | TaskGet | 低 | 获取任务详情 |
 | TaskStop | 低 | 停止正在运行的任务 |
 | TaskOutput | 低 | 获取任务输出 |
+| TodoWrite | 低 | 管理会话级 todo 列表（兼容 Claude Code） |
 | **代理** | | |
 | Agent | 中 | 派生一个子代理（可指定角色） |
 | ParallelAgent | 中 | 派发多个代理并支持 DAG 依赖 |
@@ -176,9 +180,12 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 | CronCreate | 中 | 创建定时任务 |
 | CronDelete | 中 | 删除定时任务 |
 | CronList | 低 | 列出所有定时任务 |
+| ScheduleWakeup | 低 | 在 /loop 中自适应安排下一次触发（缓存感知） |
 | **规划** | | |
 | EnterPlanMode | 低 | 进入结构化规划模式 |
 | ExitPlanMode | 低 | 退出规划模式 |
+| **流水线** | | |
+| Pipeline | 中 | 顺序执行一连串子任务，把每一步的输出作为下一步的输入 |
 | **代码智能** | | |
 | Diagnostics | 低 | 基于 LSP 的代码诊断 |
 | NotebookEdit | 中 | 编辑 Jupyter notebook |
@@ -186,6 +193,7 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 | Memory | 低 | 保存/列出/搜索持久化记忆 |
 | Skill | 低 | 调用 .oh/skills/ 下的技能 |
 | ToolSearch | 低 | 按描述查找工具 |
+| SessionSearch | 低 | 在历史会话中搜索相关上下文 |
 | **MCP** | | |
 | ListMcpResources | 低 | 列出已连接 MCP 服务器上的资源 |
 | ReadMcpResource | 低 | 按 URI 读取指定的 MCP 资源 |
@@ -194,12 +202,13 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
 | ExitWorktree | 中 | 移除一个 git worktree |
 | **进程** | | |
 | KillProcess | 高 | 按 PID 或名称停止进程 |
+| Monitor | 中 | 在后台运行命令，并把每一行输出实时反馈给代理 |
 低风险只读工具会自动批准。在 `ask` 模式下，中高风险工具需要确认。使用 `--trust` 或 `--auto` 可跳过提示。
-## 斜杠命令（33 个）
+## 斜杠命令
-在对话中输入这些命令。别名：`/q` 退出、`/h` 帮助、`/c` 提交、`/m` 模型、`/s` 状态。
+OH 注册了 80+ 个斜杠命令；下表只列出最常用的一部分。在会话中运行 `/help` 可以看到完整列表。别名：`/q` 退出、`/h` 帮助、`/c` 提交、`/m` 模型、`/s` 状态。
 **会话：**
 | 命令 | 描述 |
@@ -289,11 +298,29 @@ hooks:
     command: "scripts/cleanup.sh"
 ```
-**事件类型：**
-- `sessionStart` —— 会话开始时触发一次
-- `preToolUse` —— 每次工具调用前触发；**退出码 1 会阻止该工具**并向模型返回错误
-- `postToolUse` —— 每次工具调用完成后触发
-- `sessionEnd` —— 会话结束时触发
+**事件类型**（共 17 个）：
+| 事件 | 触发时机 | 是否可阻止 |
+|-------|---------------|------------|
+| `sessionStart` | 会话开始 | — |
+| `sessionEnd` | 会话结束 | — |
+| `turnStart` | 顶层代理回合开始（用户提示词被接受后） | — |
+| `turnStop` | 顶层代理回合结束（对应 Claude Code 的 `Stop`） | — |
+| `userPromptSubmit` | 用户提示词到达 LLM 之前 | 是 —— `decision: deny` |
+| `preToolUse` | 工具调用之前 | 是 —— 退出码 1 / `decision: deny` |
+| `postToolUse` | 工具成功执行之后 | — |
+| `postToolUseFailure` | 工具抛错或返回 `isError: true` | — |
+| `permissionRequest` | 工具需要授权时（`preToolUse` 与询问之间） | 是 —— `decision: allow\|deny\|ask` |
+| `fileChanged` | 工具修改文件之后 | — |
+| `cwdChanged` | 工作目录变更之后 | — |
+| `subagentStart` | 子代理被派生 | — |
+| `subagentStop` | 子代理完成 | — |
+| `preCompact` | 对话压缩之前 | — |
+| `postCompact` | 对话压缩之后 | — |
+| `configChange` | 会话过程中 `.oh/config.yaml` 被修改 | — |
+| `notification` | 通知被派发 | — |
+实时查看：在会话中运行 `/hooks` 可以按事件分组查看当前已加载的钩子。
 **环境变量**（钩子脚本可用）：
@@ -303,10 +330,16 @@ hooks:
 | `OH_TOOL_NAME` | 正在调用的工具名（仅工具类事件） |
 | `OH_TOOL_ARGS` | JSON 编码的工具参数（仅工具类事件） |
 | `OH_TOOL_OUTPUT` | JSON 编码的工具输出（仅 `postToolUse`） |
+| `OH_TOOL_INPUT_JSON` | 完整的 JSON 工具输入（仅工具类事件） |
+| `OH_SESSION_ID` / `OH_MODEL` / `OH_PROVIDER` / `OH_PERMISSION_MODE` | 当前会话上下文 |
+| `OH_COST` / `OH_TOKENS` | 累计费用与 token 数 |
+| `OH_FILE_PATH` | 变更的文件路径（仅 `fileChanged`） |
+| `OH_NEW_CWD` | 新的工作目录（仅 `cwdChanged`） |
+| `OH_TURN_NUMBER` / `OH_TURN_REASON` | 回合边界上下文（`turnStart` / `turnStop`） |
-使用 `match` 将钩子限定到特定工具名（例如 `match: Bash` 仅对 Bash 工具触发）。
+使用 `match` 将钩子限定到特定工具名（例如 `match: Bash` 仅对 Bash 工具触发）。支持子串、glob（如 `Cron*`）和 `/regex/flags` 三种匹配方式。
-完整事件参考（包括新增的 `userPromptSubmit`、`permissionRequest`、`postToolUseFailure` 事件）见 [docs/hooks.md](docs/hooks.md)。
+将 `command` 钩子设置 `jsonIO: true` 即可启用结构化 JSON I/O —— 框架在 stdin 上发送 `{event, ...context}`，并从 stdout 读取 `{decision, reason, hookSpecificOutput}`。HTTP 钩子接受同样的响应格式。完整参考见 [docs/hooks.md](docs/hooks.md)。
 ## 电子宠物 Cybergotchi

package/dist/main.js CHANGED Viewed

@@ -27,6 +27,7 @@ import { connectedMcpServers, disconnectMcpClients, getMcpInstructions, loadMcpT
 import { loadOutputStyle } from "./outputStyles/index.js";
 import { getAllTools } from "./tools.js";
 import { validateAgainstJsonSchema } from "./utils/json-schema.js";
+import { parseMaxBudgetUsd } from "./utils/parse-budget.js";
 const _require = createRequire(import.meta.url);
 const VERSION = _require("../package.json").version;
 const BANNER = `        ___
@@ -74,6 +75,20 @@ You have access to tools for reading, writing, and searching files, running shel
 - When referencing code, include file_path:line_number.
 - Do not restate what the user said. Do not add trailing summaries unless asked.
 - Keep responses short and direct. If you can say it in one sentence, don't use three.`;
+/**
+ * Parse the `--max-budget-usd` CLI argument into a positive USD amount, or
+ * exit 2 with an error message. The pure parser lives in
+ * `src/utils/parse-budget.ts` so it can be unit-tested without spawning the
+ * CLI; this thin wrapper handles the exit-on-failure side effect.
+ */
+function parseMaxBudgetUsdOrExit(raw) {
+    const result = parseMaxBudgetUsd(raw);
+    if (!result.ok) {
+        process.stderr.write(`Error: ${result.message}\n`);
+        process.exit(2);
+    }
+    return result.value;
+}
 function buildSystemPrompt(model) {
     const cfg = readOhConfig();
     // Output-style preface (first — sets personality for everything that follows).
@@ -136,6 +151,7 @@ program
     .option("--disallowed-tools <tools>", "Comma-separated list of disallowed tools")
     .option("--resume <id>", "Resume a saved session (replays its message history before this prompt)")
     .option("--setting-sources <sources>", "Comma-separated list of setting sources to merge (e.g. 'user,project,local'). Mirrors Claude Code's setting_sources.")
+    .option("--max-budget-usd <amount>", "Hard cap on session cost in USD. The agent halts with reason 'budget_exceeded' once totalCost reaches this amount. Mirrors Claude Code's --max-budget-usd.")
     .action(async (promptArg, opts) => {
     // Read from stdin if prompt is "-" or omitted and stdin is not a TTY
     let prompt;
@@ -201,6 +217,7 @@ program
         permissionMode,
         maxTurns: parseInt(opts.maxTurns, 10),
         model,
+        ...(opts.maxBudgetUsd !== undefined ? { maxCost: parseMaxBudgetUsdOrExit(opts.maxBudgetUsd) } : {}),
     };
     const outputFormat = opts.json ? "json" : (opts.outputFormat ?? "text");
     let fullOutput = "";
@@ -210,26 +227,36 @@ program
     // history into the conversation before the new prompt. If the session can't
     // be loaded (missing file, malformed JSON), fail early with a clear error
     // rather than silently starting fresh.
+    //
+    // When --resume is NOT passed, mint a fresh session record so SDK callers
+    // can capture its id from the session_start event and pass it back as
+    // --resume <id> on a later run. Without this, every fresh `oh run` was
+    // a programmatic dead-end for resumption (issue #60).
+    const { createSession, loadSession, saveSession } = await import("./harness/session.js");
     let priorMessages;
     let sessionId;
+    let sessionRecord;
     if (opts.resume) {
-        const { loadSession } = await import("./harness/session.js");
         try {
-            const src = loadSession(opts.resume);
-            priorMessages = src.messages;
-            sessionId = src.id;
+            sessionRecord = loadSession(opts.resume);
+            priorMessages = sessionRecord.messages;
+            sessionId = sessionRecord.id;
         }
         catch {
             process.stderr.write(`Error: could not load session '${opts.resume}'\n`);
             process.exit(1);
         }
     }
+    else {
+        sessionRecord = createSession(provider.name, model);
+        sessionId = sessionRecord.id;
+        saveSession(sessionRecord);
+    }
     if (outputFormat === "stream-json") {
         // Emit a session_start event so SDK callers can capture the id for
-        // later resume (fires once, before turnStart).
-        if (sessionId) {
-            console.log(JSON.stringify({ type: "session_start", sessionId }));
-        }
+        // later resume (fires once, before turnStart). Always emitted now —
+        // fresh runs mint a sessionId above.
+        console.log(JSON.stringify({ type: "session_start", sessionId }));
         setHookDecisionObserver((n) => {
             console.log(JSON.stringify({
                 type: "hook_decision",
@@ -320,6 +347,22 @@ program
     else if (outputFormat === "text") {
         process.stdout.write("\n");
     }
+    // Persist this run's contribution so a later --resume <sessionId> finds
+    // the user/assistant pair. Tool details are intentionally elided —
+    // they're per-tool ephemerals; the assistant's final text is what
+    // matters for context resumption. Mirrors the REPL's save-on-exit pattern
+    // (src/components/REPL.tsx:120) but at one-shot scope.
+    try {
+        const { createUserMessage, createAssistantMessage } = await import("./types/message.js");
+        const newMessages = [...(priorMessages ?? []), createUserMessage(prompt)];
+        if (fullOutput)
+            newMessages.push(createAssistantMessage(fullOutput));
+        sessionRecord.messages = newMessages;
+        saveSession(sessionRecord);
+    }
+    catch {
+        /* persistence is best-effort — never fail the user's run on a save error */
+    }
 });
 // ── `oh session`: long-lived stateful session for the Python SDK ──
 program
@@ -335,6 +378,7 @@ program
     .option("--system-prompt <prompt>", "Override the system prompt")
     .option("--resume <id>", "Resume a saved session (seeds the conversation with its prior message history)")
     .option("--setting-sources <sources>", "Comma-separated list of setting sources to merge (mirrors Claude Code's setting_sources).")
+    .option("--max-budget-usd <amount>", "Hard cap on session cost in USD. Each prompt's cost accumulates; the agent halts with reason 'budget_exceeded' once totalCost reaches this amount.")
     .action(async (opts) => {
     const settingSources = parseSettingSources(opts.settingSources);
     const savedConfig = readOhConfig(undefined, settingSources);
@@ -368,23 +412,32 @@ program
         permissionMode,
         maxTurns: parseInt(opts.maxTurns, 10),
         model,
+        ...(opts.maxBudgetUsd !== undefined ? { maxCost: parseMaxBudgetUsdOrExit(opts.maxBudgetUsd) } : {}),
     };
     // Conversation history, shared across all prompts for this process.
-    // Seeded from a prior session when --resume <id> is passed.
+    // Seeded from a prior session when --resume <id> is passed; otherwise a
+    // fresh session is minted so the SDK can capture the id from the `ready`
+    // event for later resume (issue #60).
     const conversation = [];
+    const { createSession, loadSession, saveSession } = await import("./harness/session.js");
     let sessionId;
+    let sessionRecord;
     if (opts.resume) {
-        const { loadSession } = await import("./harness/session.js");
         try {
-            const src = loadSession(opts.resume);
-            conversation.push(...src.messages);
-            sessionId = src.id;
+            sessionRecord = loadSession(opts.resume);
+            conversation.push(...sessionRecord.messages);
+            sessionId = sessionRecord.id;
         }
         catch {
             console.log(JSON.stringify({ type: "error", message: `could not load session '${opts.resume}'` }));
             return;
         }
     }
+    else {
+        sessionRecord = createSession(provider.name, model);
+        sessionId = sessionRecord.id;
+        saveSession(sessionRecord);
+    }
     let turnCounter = 0;
     // Will be set to the current prompt id before each turn so hook_decision
     // events can be demultiplexed by the client.
@@ -494,6 +547,15 @@ program
         for (const tr of toolResults) {
             conversation.push(createToolResultMessage({ callId: tr.callId, output: tr.output, isError: tr.isError }));
         }
+        // Persist after every completed turn so a later --resume picks up the
+        // history. Best-effort — a save failure shouldn't break the live session.
+        try {
+            sessionRecord.messages = conversation.slice();
+            saveSession(sessionRecord);
+        }
+        catch {
+            /* save errors don't propagate to the client */
+        }
     }
 });
 // ── Default command: just run `openharness` to start chatting ──

package/dist/providers/ollama.d.ts CHANGED Viewed

@@ -9,6 +9,19 @@ export declare class OllamaProvider implements Provider {
     private baseUrl;
     private defaultModel;
     constructor(config: ProviderConfig);
+    /**
+     * Estimate the prompt size and pick a `num_ctx` for Ollama. Without this
+     * Ollama defaults to a 2048-token context window — anything bigger gets
+     * silently truncated server-side. OH's typical system prompt + tool list
+     * already pushes ~4 K, so multi-turn chats lose prior turns and the model
+     * appears to "forget" what was just said. See issue #61.
+     *
+     * Strategy: rough char/4 token estimate, +1 K headroom for the response,
+     * then round up to the next power of 2 ≥ 8192. Capped at 32 K to keep KV
+     * cache bounded; users with bigger models can override via
+     * `OLLAMA_NUM_CTX`.
+     */
+    private computeNumCtx;
     private convertMessages;
     private convertTools;
     stream(messages: Message[], systemPrompt: string, tools?: APIToolDef[], model?: string): AsyncGenerator<StreamEvent, void>;

package/dist/providers/ollama.js CHANGED Viewed

@@ -10,6 +10,45 @@ export class OllamaProvider {
         this.baseUrl = (config.baseUrl ?? "http://localhost:11434").replace(/\/$/, "");
         this.defaultModel = config.defaultModel ?? "llama3.1";
     }
+    /**
+     * Estimate the prompt size and pick a `num_ctx` for Ollama. Without this
+     * Ollama defaults to a 2048-token context window — anything bigger gets
+     * silently truncated server-side. OH's typical system prompt + tool list
+     * already pushes ~4 K, so multi-turn chats lose prior turns and the model
+     * appears to "forget" what was just said. See issue #61.
+     *
+     * Strategy: rough char/4 token estimate, +1 K headroom for the response,
+     * then round up to the next power of 2 ≥ 8192. Capped at 32 K to keep KV
+     * cache bounded; users with bigger models can override via
+     * `OLLAMA_NUM_CTX`.
+     */
+    computeNumCtx(messages, systemPrompt, tools) {
+        const override = process.env.OLLAMA_NUM_CTX;
+        if (override) {
+            const parsed = Number(override);
+            if (Number.isFinite(parsed) && parsed > 0)
+                return Math.floor(parsed);
+        }
+        const estimate = (s) => Math.ceil(s.length / 4);
+        let total = systemPrompt ? estimate(systemPrompt) : 0;
+        for (const m of messages) {
+            total += estimate(m.content);
+            if (m.toolCalls)
+                for (const tc of m.toolCalls)
+                    total += estimate(JSON.stringify(tc.arguments));
+            if (m.toolResults)
+                for (const tr of m.toolResults)
+                    total += estimate(tr.output);
+        }
+        if (tools)
+            for (const t of tools)
+                total += estimate(JSON.stringify(t));
+        const padded = Math.ceil(total * 1.25) + 1024;
+        let nc = 8192;
+        while (nc < padded && nc < 32768)
+            nc *= 2;
+        return Math.min(nc, 32768);
+    }
     convertMessages(messages, systemPrompt) {
         const converted = [];
         if (systemPrompt) {
@@ -69,6 +108,7 @@ export class OllamaProvider {
             model: m,
             messages: msgs,
             stream: true,
+            options: { num_ctx: this.computeNumCtx(messages, systemPrompt, tools) },
         };
         const ollamaTools = this.convertTools(tools);
         if (ollamaTools)
@@ -219,6 +259,7 @@ export class OllamaProvider {
             model: m,
             messages: msgs,
             stream: false,
+            options: { num_ctx: this.computeNumCtx(messages, systemPrompt, tools) },
         };
         const ollamaTools = this.convertTools(tools);
         if (ollamaTools)

package/dist/query/tools.js CHANGED Viewed

@@ -42,11 +42,12 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
     // Permission check
     const perm = checkPermission(permissionMode, tool.riskLevel, tool.isReadOnly(parsed.data), tool.name, parsed.data);
     if (!perm.allowed) {
-        if (perm.reason === "needs-approval" && askUser) {
-            const { formatToolArgs } = await import("../utils/tool-summary.js");
-            const description = formatToolArgs(tool.name, toolCall.arguments);
-            // Hook: permissionRequest — fires between preToolUse and the interactive askUser prompt.
-            // Only fires when checkPermission says "needs-approval" AND askUser is provided.
+        if (perm.reason === "needs-approval") {
+            // Hook: permissionRequest — fires whenever checkPermission says
+            // "needs-approval", in both interactive and headless modes. Configured
+            // hooks get first say; if they return "ask" or have no decision, we
+            // fall through to the interactive prompt when one is available, or
+            // fail-closed deny in headless mode (issue #62).
             const hookOutcome = await emitHookWithOutcome("permissionRequest", {
                 toolName: tool.name,
                 toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
@@ -55,19 +56,30 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
                 permissionAction: "ask",
             });
             if (hookOutcome.permissionDecision === "allow") {
-                // Hook granted permission — skip interactive prompt and proceed to execution.
+                // Hook granted permission — proceed to execution.
             }
             else if (hookOutcome.permissionDecision === "deny" || !hookOutcome.allowed) {
                 const reason = hookOutcome.reason ? `: ${hookOutcome.reason}` : "";
                 return { output: `Permission denied by hook${reason}`, isError: true };
             }
-            else {
-                // "ask" or no decision → fall through to interactive prompt
+            else if (askUser) {
+                // "ask" or no decision → interactive prompt when available
+                const { formatToolArgs } = await import("../utils/tool-summary.js");
+                const description = formatToolArgs(tool.name, toolCall.arguments);
                 const allowed = await askUser(tool.name, description, tool.riskLevel);
                 if (!allowed) {
                     return { output: "Permission denied by user.", isError: true };
                 }
             }
+            else {
+                // Headless mode with no hook decision and no interactive prompt:
+                // fail-closed deny. SDK consumers should configure a permissionRequest
+                // hook (or use canUseTool) to make per-call decisions.
+                return {
+                    output: "Permission denied: needs-approval (no interactive prompt available; configure a permissionRequest hook to gate this tool)",
+                    isError: true,
+                };
+            }
         }
         else {
             return { output: `Permission denied: ${perm.reason}`, isError: true };

package/dist/utils/parse-budget.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Parse the `--max-budget-usd` CLI argument into a positive USD amount.
+ *
+ * Accepts plain decimals (`5`, `0.50`, `2.5`) and an optional leading `$`.
+ * Negative or zero values are rejected — a budget of zero would block the
+ * very first call before any cost has accumulated.
+ *
+ * Returns `{ ok: true, value }` on success or `{ ok: false, message }` on
+ * invalid input. The CLI wrapper translates failures into a stderr message
+ * and exit code 2.
+ */
+export type ParseBudgetResult = {
+    ok: true;
+    value: number;
+} | {
+    ok: false;
+    message: string;
+};
+export declare function parseMaxBudgetUsd(raw: string): ParseBudgetResult;
+//# sourceMappingURL=parse-budget.d.ts.map

package/dist/utils/parse-budget.js ADDED Viewed

@@ -0,0 +1,12 @@
+export function parseMaxBudgetUsd(raw) {
+    const cleaned = raw.replace(/^\$/, "").trim();
+    if (cleaned === "") {
+        return { ok: false, message: `--max-budget-usd must be a positive USD amount, got '${raw}'` };
+    }
+    const n = Number(cleaned);
+    if (!Number.isFinite(n) || n <= 0) {
+        return { ok: false, message: `--max-budget-usd must be a positive USD amount, got '${raw}'` };
+    }
+    return { ok: true, value: n };
+}
+//# sourceMappingURL=parse-budget.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zhijiewang/openharness",
-  "version": "2.18.0",
+  "version": "2.19.0",
   "description": "Open-source terminal coding agent. Works with any LLM.",
   "type": "module",
   "bin": {
@@ -22,17 +22,23 @@
     "README.md",
     "LICENSE"
   ],
+  "workspaces": [
+    "packages/sdk"
+  ],
   "scripts": {
     "dev": "tsx src/main.tsx",
     "build": "tsc",
+    "build:sdk": "npm --workspace @zhijiewang/openharness-sdk run build",
     "prepare": "husky",
     "prepublishOnly": "npm run build",
-    "test": "node scripts/test.mjs",
+    "test": "node scripts/test.mjs && npm --workspace @zhijiewang/openharness-sdk run test",
+    "test:cli": "node scripts/test.mjs",
+    "test:sdk": "npm --workspace @zhijiewang/openharness-sdk run test",
     "test:coverage": "node scripts/coverage.mjs",
-    "typecheck": "tsc --noEmit",
-    "lint": "biome check src/",
-    "lint:fix": "biome check --write src/",
-    "format": "biome format --write src/",
+    "typecheck": "tsc --noEmit && npm --workspace @zhijiewang/openharness-sdk run typecheck",
+    "lint": "biome check src/ packages/sdk/src/",
+    "lint:fix": "biome check --write src/ packages/sdk/src/",
+    "format": "biome format --write src/ packages/sdk/src/",
     "start": "node dist/main.js"
   },
   "dependencies": {