@zhijiewang/openharness 2.19.0 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,52 @@
1
+ import { readFileSync } from "node:fs";
1
2
  import { readOhConfig } from "../harness/config.js";
3
+ import { debug } from "../utils/debug.js";
2
4
  import { McpClient } from "./client.js";
3
5
  import { DeferredMcpTool } from "./DeferredMcpTool.js";
4
6
  import { McpTool } from "./McpTool.js";
7
+ /**
8
+ * Parse a `--mcp-config <path>` file. Format:
9
+ * - `{ "mcpServers": [...] }` — Claude Code convention (preferred)
10
+ * - `[ ... ]` — bare array of server configs (also accepted)
11
+ * - `{ "name": ..., ... }` — single-server object (also accepted)
12
+ *
13
+ * Validation is shape-only: each entry must be an object with a `name`.
14
+ * Connection-time validation happens in `McpClient.connect`. Throws on
15
+ * malformed JSON or unrecognised top-level shape.
16
+ */
17
+ export function parseMcpConfigFile(path) {
18
+ const raw = readFileSync(path, "utf8");
19
+ let parsed;
20
+ try {
21
+ parsed = JSON.parse(raw);
22
+ }
23
+ catch (err) {
24
+ throw new Error(`--mcp-config '${path}' is not valid JSON: ${err instanceof Error ? err.message : String(err)}`);
25
+ }
26
+ let servers;
27
+ if (Array.isArray(parsed)) {
28
+ servers = parsed;
29
+ }
30
+ else if (parsed && typeof parsed === "object" && "mcpServers" in parsed) {
31
+ const list = parsed.mcpServers;
32
+ if (!Array.isArray(list)) {
33
+ throw new Error(`--mcp-config '${path}': mcpServers must be an array`);
34
+ }
35
+ servers = list;
36
+ }
37
+ else if (parsed && typeof parsed === "object" && "name" in parsed) {
38
+ servers = [parsed];
39
+ }
40
+ else {
41
+ throw new Error(`--mcp-config '${path}': expected an mcpServers array, a bare array, or a single server object`);
42
+ }
43
+ for (const s of servers) {
44
+ if (!s || typeof s !== "object" || typeof s.name !== "string") {
45
+ throw new Error(`--mcp-config '${path}': every server entry must be an object with a 'name' string`);
46
+ }
47
+ }
48
+ return servers;
49
+ }
5
50
  const connectedClients = [];
6
51
  let exitHandlerInstalled = false;
7
52
  function installExitHandler() {
@@ -28,11 +73,20 @@ function installExitHandler() {
28
73
  }
29
74
  /** Threshold: servers with more tools than this use deferred loading */
30
75
  const DEFERRED_THRESHOLD = 10;
31
- /** Load MCP tools from .oh/config.yaml mcpServers list. Returns empty array if none configured. */
32
- export async function loadMcpTools() {
76
+ /** Load MCP tools from .oh/config.yaml mcpServers list (and/or `--mcp-config` overrides). Returns empty array if none configured. */
77
+ export async function loadMcpTools(opts = {}) {
33
78
  installExitHandler();
34
79
  const cfg = readOhConfig();
35
- const servers = cfg?.mcpServers ?? [];
80
+ const fromConfig = opts.strict ? [] : (cfg?.mcpServers ?? []);
81
+ const fromExtra = opts.extraServers ?? [];
82
+ // Dedup by name — extras win on conflict so --mcp-config can override a
83
+ // project-config entry without --strict.
84
+ const byName = new Map();
85
+ for (const s of fromConfig)
86
+ byName.set(s.name, s);
87
+ for (const s of fromExtra)
88
+ byName.set(s.name, s);
89
+ const servers = Array.from(byName.values());
36
90
  if (servers.length === 0)
37
91
  return [];
38
92
  const tools = [];
@@ -45,10 +99,12 @@ export async function loadMcpTools() {
45
99
  for (const result of results) {
46
100
  if (result.status === "rejected") {
47
101
  console.warn(`[mcp] Failed to connect: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`);
102
+ debug("mcp", "connect failed", result.reason);
48
103
  continue;
49
104
  }
50
105
  const { client, defs, server } = result.value;
51
106
  connectedClients.push(client);
107
+ debug("mcp", "connected", { server: server.name, tools: defs.length, deferred: defs.length > DEFERRED_THRESHOLD });
52
108
  if (defs.length > DEFERRED_THRESHOLD) {
53
109
  for (const def of defs) {
54
110
  tools.push(new DeferredMcpTool(client, def.name, def.description ?? "", server.riskLevel));
@@ -78,6 +134,33 @@ export function disconnectMcpClients() {
78
134
  export function connectedMcpServers() {
79
135
  return connectedClients.map((c) => c.name);
80
136
  }
137
+ /**
138
+ * Enumerate prompts on every already-connected MCP server. Servers that don't
139
+ * implement the `prompts/list` capability return an empty list (handled
140
+ * inside `client.listPrompts`). Call AFTER `loadMcpTools()` so the client
141
+ * connections are warm.
142
+ */
143
+ export async function loadMcpPrompts() {
144
+ const handles = [];
145
+ for (const client of connectedClients) {
146
+ let prompts;
147
+ try {
148
+ prompts = await client.listPrompts();
149
+ }
150
+ catch {
151
+ continue; // Defensive — listPrompts already swallows method-not-found
152
+ }
153
+ for (const p of prompts) {
154
+ handles.push({
155
+ qualifiedName: `${client.name}:${p.name}`,
156
+ description: p.description ?? `MCP prompt from ${client.name}`,
157
+ ...(p.arguments ? { arguments: p.arguments } : {}),
158
+ render: (args = {}) => client.getPrompt(p.name, args),
159
+ });
160
+ }
161
+ }
162
+ return handles;
163
+ }
81
164
  const MAX_MCP_INSTRUCTION_LENGTH = 2000;
82
165
  /** Get MCP server instructions to inject into system prompt (sandboxed with origin markers) */
83
166
  export function getMcpInstructions() {
@@ -55,12 +55,22 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
55
55
  permissionMode,
56
56
  permissionAction: "ask",
57
57
  });
58
+ const denyAndEmit = (source, reason, output) => {
59
+ emitHook("permissionDenied", {
60
+ toolName: tool.name,
61
+ toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
62
+ permissionMode,
63
+ denySource: source,
64
+ denyReason: reason,
65
+ });
66
+ return { output, isError: true };
67
+ };
58
68
  if (hookOutcome.permissionDecision === "allow") {
59
69
  // Hook granted permission — proceed to execution.
60
70
  }
61
71
  else if (hookOutcome.permissionDecision === "deny" || !hookOutcome.allowed) {
62
72
  const reason = hookOutcome.reason ? `: ${hookOutcome.reason}` : "";
63
- return { output: `Permission denied by hook${reason}`, isError: true };
73
+ return denyAndEmit("hook", hookOutcome.reason ?? "hook denied", `Permission denied by hook${reason}`);
64
74
  }
65
75
  else if (askUser) {
66
76
  // "ask" or no decision → interactive prompt when available
@@ -68,20 +78,25 @@ export async function executeSingleTool(toolCall, tools, context, permissionMode
68
78
  const description = formatToolArgs(tool.name, toolCall.arguments);
69
79
  const allowed = await askUser(tool.name, description, tool.riskLevel);
70
80
  if (!allowed) {
71
- return { output: "Permission denied by user.", isError: true };
81
+ return denyAndEmit("user", "user declined", "Permission denied by user.");
72
82
  }
73
83
  }
74
84
  else {
75
85
  // Headless mode with no hook decision and no interactive prompt:
76
86
  // fail-closed deny. SDK consumers should configure a permissionRequest
77
87
  // hook (or use canUseTool) to make per-call decisions.
78
- return {
79
- output: "Permission denied: needs-approval (no interactive prompt available; configure a permissionRequest hook to gate this tool)",
80
- isError: true,
81
- };
88
+ return denyAndEmit("headless", "no hook decision and no interactive prompt available", "Permission denied: needs-approval (no interactive prompt available; configure a permissionRequest hook to gate this tool)");
82
89
  }
83
90
  }
84
91
  else {
92
+ // Auto-mode policy block (deny / acceptEdits / etc) — symmetric event.
93
+ emitHook("permissionDenied", {
94
+ toolName: tool.name,
95
+ toolArgs: JSON.stringify(toolCall.arguments).slice(0, 1000),
96
+ permissionMode,
97
+ denySource: "policy",
98
+ denyReason: perm.reason,
99
+ });
85
100
  return { output: `Permission denied: ${perm.reason}`, isError: true };
86
101
  }
87
102
  }
@@ -200,6 +215,7 @@ export async function* executeToolCalls(toolCalls, tools, context, permissionMod
200
215
  const onOutputChunk = (callId, chunk) => {
201
216
  outputChunks.push({ type: "tool_output_delta", callId, chunk });
202
217
  };
218
+ const allToolNames = toolCalls.map((tc) => tc.toolName);
203
219
  for (const batch of batches) {
204
220
  if (batch.concurrent) {
205
221
  const results = await Promise.all(batch.calls.map((tc) => executeSingleTool(tc, tools, { ...context, callId: tc.id, onOutputChunk }, permissionMode, askUser)));
@@ -222,5 +238,17 @@ export async function* executeToolCalls(toolCalls, tools, context, permissionMod
222
238
  }
223
239
  }
224
240
  }
241
+ // Hook: postToolBatch — fires once after the model's full set of tool
242
+ // calls for this turn have all resolved (across however many serial /
243
+ // concurrent batches partitionToolCalls produced), before the next model
244
+ // call. Per-tool postToolUse / postToolUseFailure still fire as before;
245
+ // this is the batch-level boundary for hooks that want to act once per
246
+ // turn instead of once per tool.
247
+ if (toolCalls.length > 0) {
248
+ emitHook("postToolBatch", {
249
+ batchSize: String(toolCalls.length),
250
+ batchTools: allToolNames.slice(0, 50).join(","),
251
+ });
252
+ }
225
253
  }
226
254
  //# sourceMappingURL=tools.js.map
@@ -1,5 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { createWorktree, isGitRepo } from "../../git/index.js";
3
+ import { emitHook } from "../../harness/hooks.js";
3
4
  const inputSchema = z.object({
4
5
  branch: z.string().optional().describe("Branch name for the worktree (auto-generated if omitted)"),
5
6
  });
@@ -22,6 +23,9 @@ export const EnterWorktreeTool = {
22
23
  if (!path) {
23
24
  return { output: "Failed to create worktree.", isError: true };
24
25
  }
26
+ // Symmetric to taskCreated — fire only on the success path so audit hooks
27
+ // can react to the new worktree (e.g. set up a per-worktree scratch dir).
28
+ emitHook("worktreeCreate", { worktreePath: path, worktreeParent: context.workingDir });
25
29
  return { output: `Worktree created at: ${path}\nUse ExitWorktree to clean up when done.`, isError: false };
26
30
  },
27
31
  prompt() {
@@ -1,5 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { hasWorktreeChanges, removeWorktree } from "../../git/index.js";
3
+ import { emitHook } from "../../harness/hooks.js";
3
4
  const inputSchema = z.object({
4
5
  path: z.string().describe("Path to the worktree to remove"),
5
6
  force: z.boolean().optional().describe("Force removal even with uncommitted changes"),
@@ -24,6 +25,12 @@ export const ExitWorktreeTool = {
24
25
  }
25
26
  try {
26
27
  removeWorktree(input.path);
28
+ // Fire after removeWorktree resolves so the hook only sees confirmed
29
+ // removals — symmetric to worktreeCreate firing on success.
30
+ emitHook("worktreeRemove", {
31
+ worktreePath: input.path,
32
+ worktreeForced: input.force ? "true" : "false",
33
+ });
27
34
  return { output: `Worktree removed: ${input.path}`, isError: false };
28
35
  }
29
36
  catch (err) {
@@ -1,6 +1,7 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
3
  import { z } from "zod";
4
+ import { emitHook } from "../../harness/hooks.js";
4
5
  const inputSchema = z.object({
5
6
  subject: z.string(),
6
7
  description: z.string(),
@@ -42,6 +43,10 @@ export const TaskCreateTool = {
42
43
  };
43
44
  tasks.push(newTask);
44
45
  await fs.writeFile(filePath, JSON.stringify(tasks, null, 2), "utf-8");
46
+ emitHook("taskCreated", {
47
+ taskId: String(newTask.id),
48
+ taskSubject: newTask.subject.slice(0, 200),
49
+ });
45
50
  return { output: `Task #${newTask.id} created: ${newTask.subject}`, isError: false };
46
51
  }
47
52
  catch (err) {
@@ -1,6 +1,7 @@
1
1
  import * as fs from "node:fs/promises";
2
2
  import * as path from "node:path";
3
3
  import { z } from "zod";
4
+ import { emitHook } from "../../harness/hooks.js";
4
5
  const inputSchema = z.object({
5
6
  taskId: z.number(),
6
7
  status: z.enum(["pending", "in_progress", "completed", "cancelled", "deleted"]).optional(),
@@ -32,6 +33,7 @@ export const TaskUpdateTool = {
32
33
  if (!task) {
33
34
  return { output: `Error: Task #${input.taskId} not found.`, isError: true };
34
35
  }
36
+ const previousStatus = task.status;
35
37
  // Handle deletion
36
38
  if (input.status === "deleted") {
37
39
  const idx = tasks.indexOf(task);
@@ -69,6 +71,15 @@ export const TaskUpdateTool = {
69
71
  task.blockedBy = [...new Set([...(task.blockedBy ?? []), ...input.addBlockedBy])];
70
72
  }
71
73
  await fs.writeFile(filePath, JSON.stringify(tasks, null, 2), "utf-8");
74
+ // Hook: taskCompleted — fires only on the pending|in_progress → completed
75
+ // transition. Re-saving an already-completed task is a no-op for the hook.
76
+ if (input.status === "completed" && previousStatus !== "completed") {
77
+ emitHook("taskCompleted", {
78
+ taskId: String(task.id),
79
+ taskSubject: task.subject.slice(0, 200),
80
+ taskPreviousStatus: previousStatus,
81
+ });
82
+ }
72
83
  return { output: `Task #${task.id} updated. Status: ${task.status}`, isError: false };
73
84
  }
74
85
  catch (err) {
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Categorized debug logger — gates verbose internal traces behind a runtime
3
+ * switch so they're silent by default but easy to flip on for support / CI.
4
+ *
5
+ * Activation precedence (highest first):
6
+ * 1. `configureDebug({ categories })` from a CLI flag (`--debug [cats]`)
7
+ * 2. `OH_DEBUG` env var
8
+ *
9
+ * Sink precedence:
10
+ * 1. `configureDebug({ file })` from `--debug-file <path>`
11
+ * 2. `OH_DEBUG_FILE` env var
12
+ * 3. `process.stderr` (default)
13
+ *
14
+ * Categories are arbitrary strings — call sites pick them. The CLI accepts a
15
+ * comma-separated list (`--debug mcp,hooks`) or `--debug` alone for "all".
16
+ *
17
+ * Wire pattern:
18
+ * import { configureDebug, debug } from "./utils/debug.js";
19
+ * configureDebug({ categories: opts.debug, file: opts.debugFile });
20
+ * debug("mcp", "connected", server.name);
21
+ */
22
+ /**
23
+ * Parse the raw flag value into a Set of enabled categories.
24
+ *
25
+ * Accepted values:
26
+ * - `undefined` / empty / `false` → no debug
27
+ * - `true` / `"*"` / `"all"` / `"1"` → all categories
28
+ * - `"mcp,hooks,provider"` → comma-separated explicit list
29
+ *
30
+ * Whitespace is trimmed and empty entries dropped, so `"mcp, ,hooks"` is
31
+ * equivalent to `"mcp,hooks"`. Pure function — exposed for testability.
32
+ */
33
+ export declare function parseDebugCategories(raw: string | boolean | undefined): Set<string>;
34
+ export interface ConfigureDebugOptions {
35
+ /** CLI flag value: `--debug` → true, `--debug mcp` → "mcp", absent → undefined. */
36
+ categories?: string | boolean | undefined;
37
+ /** CLI flag value: `--debug-file <path>` — appended to, never truncated. */
38
+ file?: string;
39
+ /** Test injection — overrides the file/stderr sink. Not used at runtime. */
40
+ sink?: NodeJS.WritableStream;
41
+ }
42
+ /**
43
+ * Apply debug configuration. Safe to call multiple times — later calls fully
44
+ * replace earlier state. When `categories` is undefined, falls back to
45
+ * `OH_DEBUG`; when `file` is undefined, falls back to `OH_DEBUG_FILE`.
46
+ *
47
+ * File output uses `appendFileSync` rather than a `WriteStream` so each
48
+ * `debug()` line lands on disk before the function returns. That trades a
49
+ * little throughput for ordering guarantees that matter when debugging
50
+ * crashes — a streamed sink could lose its tail buffer on `process.exit`.
51
+ */
52
+ export declare function configureDebug(opts?: ConfigureDebugOptions): void;
53
+ /** Whether the given category is currently emitting. Cheap — a Set lookup. */
54
+ export declare function isDebugEnabled(category: string): boolean;
55
+ /**
56
+ * Emit a debug line for the given category. Cheap no-op when the category is
57
+ * disabled — argument formatting is skipped entirely. Each line is prefixed
58
+ * with `[debug:<cat>] +<elapsed_ms>ms` so categories interleave readably.
59
+ */
60
+ export declare function debug(category: string, ...args: unknown[]): void;
61
+ /** @internal Test-only: reset module-level state between cases. */
62
+ export declare function _resetDebugForTest(): void;
63
+ //# sourceMappingURL=debug.d.ts.map
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Categorized debug logger — gates verbose internal traces behind a runtime
3
+ * switch so they're silent by default but easy to flip on for support / CI.
4
+ *
5
+ * Activation precedence (highest first):
6
+ * 1. `configureDebug({ categories })` from a CLI flag (`--debug [cats]`)
7
+ * 2. `OH_DEBUG` env var
8
+ *
9
+ * Sink precedence:
10
+ * 1. `configureDebug({ file })` from `--debug-file <path>`
11
+ * 2. `OH_DEBUG_FILE` env var
12
+ * 3. `process.stderr` (default)
13
+ *
14
+ * Categories are arbitrary strings — call sites pick them. The CLI accepts a
15
+ * comma-separated list (`--debug mcp,hooks`) or `--debug` alone for "all".
16
+ *
17
+ * Wire pattern:
18
+ * import { configureDebug, debug } from "./utils/debug.js";
19
+ * configureDebug({ categories: opts.debug, file: opts.debugFile });
20
+ * debug("mcp", "connected", server.name);
21
+ */
22
+ import { appendFileSync } from "node:fs";
23
+ const ALL = "*";
24
+ let enabledCategories = new Set();
25
+ let debugFilePath;
26
+ let sinkOverride;
27
+ let started = Date.now();
28
+ /**
29
+ * Parse the raw flag value into a Set of enabled categories.
30
+ *
31
+ * Accepted values:
32
+ * - `undefined` / empty / `false` → no debug
33
+ * - `true` / `"*"` / `"all"` / `"1"` → all categories
34
+ * - `"mcp,hooks,provider"` → comma-separated explicit list
35
+ *
36
+ * Whitespace is trimmed and empty entries dropped, so `"mcp, ,hooks"` is
37
+ * equivalent to `"mcp,hooks"`. Pure function — exposed for testability.
38
+ */
39
+ export function parseDebugCategories(raw) {
40
+ if (raw === undefined || raw === false || raw === "")
41
+ return new Set();
42
+ if (raw === true)
43
+ return new Set([ALL]);
44
+ const lower = raw.toLowerCase();
45
+ if (lower === "*" || lower === "all" || lower === "true" || lower === "1")
46
+ return new Set([ALL]);
47
+ return new Set(raw
48
+ .split(",")
49
+ .map((s) => s.trim())
50
+ .filter(Boolean));
51
+ }
52
+ /**
53
+ * Apply debug configuration. Safe to call multiple times — later calls fully
54
+ * replace earlier state. When `categories` is undefined, falls back to
55
+ * `OH_DEBUG`; when `file` is undefined, falls back to `OH_DEBUG_FILE`.
56
+ *
57
+ * File output uses `appendFileSync` rather than a `WriteStream` so each
58
+ * `debug()` line lands on disk before the function returns. That trades a
59
+ * little throughput for ordering guarantees that matter when debugging
60
+ * crashes — a streamed sink could lose its tail buffer on `process.exit`.
61
+ */
62
+ export function configureDebug(opts = {}) {
63
+ const rawCats = opts.categories !== undefined ? opts.categories : process.env.OH_DEBUG;
64
+ enabledCategories = parseDebugCategories(rawCats);
65
+ sinkOverride = opts.sink;
66
+ debugFilePath = opts.sink ? undefined : (opts.file ?? process.env.OH_DEBUG_FILE);
67
+ started = Date.now();
68
+ }
69
+ /** Whether the given category is currently emitting. Cheap — a Set lookup. */
70
+ export function isDebugEnabled(category) {
71
+ return enabledCategories.has(ALL) || enabledCategories.has(category);
72
+ }
73
+ /**
74
+ * Emit a debug line for the given category. Cheap no-op when the category is
75
+ * disabled — argument formatting is skipped entirely. Each line is prefixed
76
+ * with `[debug:<cat>] +<elapsed_ms>ms` so categories interleave readably.
77
+ */
78
+ export function debug(category, ...args) {
79
+ if (!isDebugEnabled(category))
80
+ return;
81
+ const elapsed = Date.now() - started;
82
+ const formatted = args
83
+ .map((a) => {
84
+ if (typeof a === "string")
85
+ return a;
86
+ if (a instanceof Error)
87
+ return a.stack ?? a.message;
88
+ try {
89
+ return JSON.stringify(a);
90
+ }
91
+ catch {
92
+ return String(a);
93
+ }
94
+ })
95
+ .join(" ");
96
+ const line = `[debug:${category}] +${elapsed}ms ${formatted}\n`;
97
+ if (sinkOverride) {
98
+ sinkOverride.write(line);
99
+ }
100
+ else if (debugFilePath) {
101
+ try {
102
+ appendFileSync(debugFilePath, line);
103
+ }
104
+ catch (err) {
105
+ // Fall back to stderr so a broken --debug-file doesn't swallow output.
106
+ process.stderr.write(`[debug] could not append to '${debugFilePath}': ${err instanceof Error ? err.message : String(err)}\n`);
107
+ process.stderr.write(line);
108
+ debugFilePath = undefined;
109
+ }
110
+ }
111
+ else {
112
+ process.stderr.write(line);
113
+ }
114
+ }
115
+ /** @internal Test-only: reset module-level state between cases. */
116
+ export function _resetDebugForTest() {
117
+ enabledCategories = new Set();
118
+ debugFilePath = undefined;
119
+ sinkOverride = undefined;
120
+ started = Date.now();
121
+ }
122
+ //# sourceMappingURL=debug.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zhijiewang/openharness",
3
- "version": "2.19.0",
3
+ "version": "2.21.0",
4
4
  "description": "Open-source terminal coding agent. Works with any LLM.",
5
5
  "type": "module",
6
6
  "bin": {