@botbotgo/agent-harness 0.0.134 → 0.0.136

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +110 -49
  2. package/README.zh.md +102 -49
  3. package/dist/config/agents/direct.yaml +70 -71
  4. package/dist/config/agents/orchestra.yaml +90 -91
  5. package/dist/contracts/workspace.d.ts +12 -2
  6. package/dist/extensions.js +13 -1
  7. package/dist/init-project.js +19 -21
  8. package/dist/package-version.d.ts +1 -1
  9. package/dist/package-version.js +1 -1
  10. package/dist/resource/mcp-tool-support.d.ts +4 -0
  11. package/dist/resource/mcp-tool-support.js +112 -35
  12. package/dist/resource/resource-impl.js +199 -7
  13. package/dist/runtime/adapter/runtime-shell.d.ts +3 -1
  14. package/dist/runtime/adapter/runtime-shell.js +2 -1
  15. package/dist/runtime/adapter/tool/tool-arguments.js +1 -0
  16. package/dist/runtime/adapter/tool/tool-hitl.js +3 -0
  17. package/dist/runtime/agent-runtime-adapter.d.ts +6 -0
  18. package/dist/runtime/agent-runtime-adapter.js +32 -2
  19. package/dist/runtime/harness.js +2 -0
  20. package/dist/tool-modules.d.ts +5 -0
  21. package/dist/tool-modules.js +10 -0
  22. package/dist/workspace/agent-binding-compiler.d.ts +2 -2
  23. package/dist/workspace/agent-binding-compiler.js +78 -6
  24. package/dist/workspace/compile.js +150 -6
  25. package/dist/workspace/object-loader.js +148 -53
  26. package/dist/workspace/resource-compilers.js +6 -0
  27. package/dist/workspace/support/source-collectors.js +1 -1
  28. package/dist/workspace/support/workspace-ref-utils.d.ts +1 -0
  29. package/dist/workspace/support/workspace-ref-utils.js +9 -0
  30. package/dist/workspace/tool-hydration.js +87 -13
  31. package/dist/workspace/yaml-object-reader.js +40 -13
  32. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
1
  # agent-harness feature: schema version for this declarative config object.
2
2
  apiVersion: agent-harness/v1alpha1
3
3
  # agent-harness feature: object type discriminator.
4
- # Prefer the generic `Agent` form and select the concrete execution backend under `spec.execution`.
4
+ # Prefer the generic `Agent` form and place execution fields directly under `spec`.
5
5
  kind: Agent
6
6
  metadata:
7
7
  # agent-harness feature: stable object id used for refs and runtime naming.
@@ -19,99 +19,98 @@ spec:
19
19
  # =====================
20
20
  # Runtime Agent Features
21
21
  # =====================
22
- execution:
23
- # Current backend adapter for this host profile.
24
- backend: deepagent
25
- # Upstream execution feature: model ref for the underlying LLM used by this execution host.
26
- modelRef: model/default
27
- memory:
28
- # Upstream execution feature: bootstrap memory sources supplied to the selected backend at construction time.
29
- # These paths resolve relative to the workspace root unless they are already absolute.
30
- # Treat this as agent-owned startup context, not as a dynamic long-term memory sink:
31
- # - keep `systemPrompt` for stable role, boundaries, and hard behavioral rules
32
- # - use `memory:` for stable project knowledge, operating conventions, and shared or agent-specific context files
33
- # - use `/memories/*` via the backend/store below for durable knowledge learned from prior runs
34
- # - use the harness checkpointer for resumable graph state for an in-flight run
35
- # Updating these files changes future agent constructions, but they are still bootstrap inputs rather than
36
- # self-updating runtime memory.
37
- - path: config/agent-context.md
38
- # Upstream execution feature: top-level host starts with no extra direct tool refs beyond discovered workspace tools.
39
- tools: []
40
- # Upstream execution feature: top-level host starts with no explicit skill package refs in the default workspace.
41
- skills: []
42
- # Upstream execution feature: subagent topology is empty in the repository default and can be filled in YAML.
43
- subagents: []
44
- # Upstream execution feature: host-level MCP servers are opt-in and empty by default.
45
- mcpServers: []
46
- # Runtime execution feature: checkpointer config passed into the selected backend adapter.
47
- # This persists resumable graph state for this agent.
48
- # Available `kind` options in this harness: `FileCheckpointer`, `MemorySaver`, `SqliteSaver`.
49
- # `path` is only used by `FileCheckpointer` and `SqliteSaver`; omit it for `MemorySaver`.
50
- checkpointer:
51
- ref: checkpointer/sqlite
52
- # Upstream execution feature: store config passed into the selected backend adapter.
53
- # In the default deepagent adapter this is the LangGraph store used by `StoreBackend` routes.
54
- # Built-in kinds in this harness today: `FileStore`, `InMemoryStore`.
55
- # Other store kinds should flow through a custom runtime resolver instead of being claimed as built in.
56
- store:
57
- ref: store/default
22
+ # Current backend adapter for this host profile.
23
+ backend: deepagent
24
+ # Upstream execution feature: model ref for the underlying LLM used by this execution host.
25
+ modelRef: model/default
26
+ memory:
27
+ # Upstream execution feature: bootstrap memory sources supplied to the selected backend at construction time.
28
+ # These paths resolve relative to the workspace root unless they are already absolute.
29
+ # Treat this as agent-owned startup context, not as a dynamic long-term memory sink:
30
+ # - keep `systemPrompt` for stable role, boundaries, and hard behavioral rules
31
+ # - use `memory:` for stable project knowledge, operating conventions, and shared or agent-specific context files
32
+ # - use `/memories/*` via the backend/store below for durable knowledge learned from prior runs
33
+ # - use the harness checkpointer for resumable graph state for an in-flight run
34
+ # Updating these files changes future agent constructions, but they are still bootstrap inputs rather than
35
+ # self-updating runtime memory.
36
+ - path: config/agent-context.md
37
+ # Upstream execution feature: top-level host starts with no extra direct tool refs beyond discovered workspace tools.
38
+ tools: []
39
+ # Upstream execution feature: top-level host starts with no explicit skill package refs in the default workspace.
40
+ skills: []
41
+ # Upstream execution feature: subagent topology is empty in the repository default and can be filled in YAML.
42
+ subagents: []
43
+ # Upstream execution feature: host-level MCP servers are opt-in and empty by default.
44
+ mcpServers: []
45
+ # Runtime execution feature: checkpointer config passed into the selected backend adapter.
46
+ # This persists resumable graph state for this agent.
47
+ # Available `kind` options in this harness: `FileCheckpointer`, `MemorySaver`, `SqliteSaver`.
48
+ # `path` is only used by `FileCheckpointer` and `SqliteSaver`; omit it for `MemorySaver`.
49
+ checkpointer:
50
+ ref: checkpointer/sqlite
51
+ # Upstream execution feature: store config passed into the selected backend adapter.
52
+ # In the default deepagent adapter this is the LangGraph store used by `StoreBackend` routes.
53
+ # Built-in kinds in this harness today: `FileStore`, `InMemoryStore`.
54
+ # Other store kinds should flow through a custom runtime resolver instead of being claimed as built in.
55
+ store:
56
+ ref: store/default
57
+ # Upstream execution feature: backend config passed into the selected backend adapter.
58
+ # Prefer a reusable backend preset via `ref` so backend topology stays declarative and reusable in YAML.
59
+ # The default preset keeps DeepAgent execution semantics upstream-owned:
60
+ # - workspace execution uses a lightweight VFS sandbox
61
+ # - long-term memory under `/memories/*` uses `StoreBackend`
62
+ # - `CompositeBackend` composes those backend instances together
63
+ # The harness injects the resolved store/checkpointer instances, but the backend topology itself stays upstream-shaped.
64
+ # Upstream execution feature: no extra declarative HITL rules by default.
65
+ interruptOn: {}
66
+ # Upstream execution feature: no extra declarative middleware beyond upstream deepagents defaults by default.
67
+ # Common upstream middleware kinds that this harness can compile directly from YAML:
68
+ # - `patchToolCalls`
69
+ # - `summarization`
70
+ # - `dynamicSystemPrompt`
71
+ # - `humanInTheLoop`
72
+ # - `todoList`
73
+ # - `pii`, `piiRedaction`
74
+ #
75
+ # DeepAgents already includes its own filesystem, planning, subagent, and memory semantics.
76
+ # Keep this list empty unless you are intentionally adding extra upstream middleware on top.
77
+ middleware: []
78
+ config:
58
79
  # Upstream execution feature: backend config passed into the selected backend adapter.
59
- # Prefer a reusable backend preset via `ref` so backend topology stays declarative and reusable in YAML.
60
- # The default preset keeps DeepAgent execution semantics upstream-owned:
61
- # - workspace execution uses a lightweight VFS sandbox
62
- # - long-term memory under `/memories/*` uses `StoreBackend`
63
- # - `CompositeBackend` composes those backend instances together
64
- # The harness injects the resolved store/checkpointer instances, but the backend topology itself stays upstream-shaped.
65
- # Upstream execution feature: no extra declarative HITL rules by default.
66
- interruptOn: {}
67
- # Upstream execution feature: no extra declarative middleware beyond upstream deepagents defaults by default.
68
- # Common upstream middleware kinds that this harness can compile directly from YAML:
69
- # - `patchToolCalls`
70
- # - `summarization`
71
- # - `dynamicSystemPrompt`
72
- # - `humanInTheLoop`
73
- # - `todoList`
74
- # - `pii`, `piiRedaction`
75
- #
76
- # DeepAgents already includes its own filesystem, planning, subagent, and memory semantics.
77
- # Keep this list empty unless you are intentionally adding extra upstream middleware on top.
78
- middleware: []
79
- config:
80
- # Upstream execution feature: backend config passed into the selected backend adapter.
81
- # Keep this nested under `config` because `execution.backend` already selects the adapter mode.
82
- backend:
83
- ref: backend/default
84
- # Upstream execution feature: system prompt for the orchestration host.
85
- # This becomes the top-level instruction block for the selected execution backend and should hold the
86
- # agent's durable role, priorities, and behavioral guardrails rather than bulky project facts.
87
- systemPrompt: |-
88
- You are the orchestra agent.
80
+ # Keep this nested under `config` because `backend` already selects the adapter mode.
81
+ backend:
82
+ ref: backend/default
83
+ # Upstream execution feature: system prompt for the orchestration host.
84
+ # This becomes the top-level instruction block for the selected execution backend and should hold the
85
+ # agent's durable role, priorities, and behavioral guardrails rather than bulky project facts.
86
+ systemPrompt: |-
87
+ You are the orchestra agent.
89
88
 
90
- You are the default execution host.
91
- Try to finish the request yourself before delegating.
92
- Use your own tools first when they are sufficient.
93
- Use your own skills first when they are sufficient.
94
- Delegate only when a subagent is a clearly better fit or when your own tools and skills are not enough.
95
- If neither you nor any suitable subagent can do the work, say so plainly.
89
+ You are the default execution host.
90
+ Try to finish the request yourself before delegating.
91
+ Use your own tools first when they are sufficient.
92
+ Use your own skills first when they are sufficient.
93
+ Delegate only when a subagent is a clearly better fit or when your own tools and skills are not enough.
94
+ If neither you nor any suitable subagent can do the work, say so plainly.
96
95
 
97
- Do not delegate by reflex.
98
- Do not delegate just because a task has multiple steps.
99
- Do not delegate when a direct answer or a short local tool pass is enough.
100
- Keep the critical path local when immediate progress depends on it; otherwise delegate bounded sidecar work to
101
- the most appropriate subagent.
96
+ Do not delegate by reflex.
97
+ Do not delegate just because a task has multiple steps.
98
+ Do not delegate when a direct answer or a short local tool pass is enough.
99
+ Keep the critical path local when immediate progress depends on it; otherwise delegate bounded sidecar work to
100
+ the most appropriate subagent.
102
101
 
103
- Use your own tools for lightweight discovery, inventory, and context gathering.
104
- Prefer the structured checkout, indexing, retrieval, and inventory tools that are already attached to you over
105
- ad hoc shell work when those tools are sufficient.
106
- Use the attached subagent descriptions as the source of truth for what each subagent is for.
107
- Do not delegate to a subagent whose description does not clearly match the task.
108
- Integrate subagent results into one coherent answer and do not claim checks or evidence you did not obtain.
102
+ Use your own tools for lightweight discovery, inventory, and context gathering.
103
+ Prefer the structured checkout, indexing, retrieval, and inventory tools that are already attached to you over
104
+ ad hoc shell work when those tools are sufficient.
105
+ Use the attached subagent descriptions as the source of truth for what each subagent is for.
106
+ Do not delegate to a subagent whose description does not clearly match the task.
107
+ Integrate subagent results into one coherent answer and do not claim checks or evidence you did not obtain.
109
108
 
110
- When the user asks about available tools, skills, or agents, use the attached inventory tools instead of
111
- inferring from memory.
109
+ When the user asks about available tools, skills, or agents, use the attached inventory tools instead of
110
+ inferring from memory.
112
111
 
113
- Write to `/memories/*` only when the information is durable, reusable across future runs or threads, and likely
114
- to matter again: user preferences, project conventions, confirmed decisions, reusable summaries, and stable
115
- ownership facts are good candidates.
116
- Do not store transient reasoning, temporary plans, scratch work, one-off search results, or intermediate
117
- outputs that can be cheaply recomputed.
112
+ Write to `/memories/*` only when the information is durable, reusable across future runs or threads, and likely
113
+ to matter again: user preferences, project conventions, confirmed decisions, reusable summaries, and stable
114
+ ownership facts are good candidates.
115
+ Do not store transient reasoning, temporary plans, scratch work, one-off search results, or intermediate
116
+ outputs that can be cheaply recomputed.
@@ -8,6 +8,8 @@ export type ParsedAgentObject = {
8
8
  modelRef: string;
9
9
  runRoot?: string;
10
10
  toolRefs: string[];
11
+ toolBindings?: ParsedAgentToolBinding[];
12
+ inlineTools?: ParsedToolObject[];
11
13
  mcpServers?: Array<Record<string, unknown>>;
12
14
  skillPathRefs: string[];
13
15
  memorySources: string[];
@@ -17,6 +19,10 @@ export type ParsedAgentObject = {
17
19
  deepAgentConfig?: Record<string, unknown>;
18
20
  sourcePath: string;
19
21
  };
22
+ export type ParsedAgentToolBinding = {
23
+ ref: string;
24
+ overrides?: Record<string, unknown>;
25
+ };
20
26
  export type WorkspaceObject = {
21
27
  id: string;
22
28
  kind: string;
@@ -72,7 +78,9 @@ export type ParsedToolObject = {
72
78
  description: string;
73
79
  implementationName?: string;
74
80
  config?: Record<string, unknown>;
81
+ subprocess?: boolean;
75
82
  inputSchemaRef?: string;
83
+ embeddingModelRef?: string;
76
84
  backendOperation?: string;
77
85
  mcpRef?: string;
78
86
  bundleRefs: string[];
@@ -116,7 +124,9 @@ export type CompiledTool = {
116
124
  name: string;
117
125
  description: string;
118
126
  config?: Record<string, unknown>;
127
+ subprocess?: boolean;
119
128
  inputSchemaRef?: string;
129
+ embeddingModelRef?: string;
120
130
  backendOperation?: string;
121
131
  mcpRef?: string;
122
132
  bundleRefs: string[];
@@ -239,7 +249,7 @@ export type CompiledAgentBinding = {
239
249
  };
240
250
  export type WorkspaceBundle = {
241
251
  workspaceRoot: string;
242
- resourceSources: string[];
252
+ resources: string[];
243
253
  refs: Map<string, WorkspaceObject | ParsedAgentObject>;
244
254
  models: Map<string, ParsedModelObject>;
245
255
  embeddings: Map<string, ParsedEmbeddingModelObject>;
@@ -257,5 +267,5 @@ export type WorkspaceLoadOptions = {
257
267
  * Later values always override earlier values.
258
268
  */
259
269
  overlayRoots?: string[];
260
- resourceSources?: string[];
270
+ resources?: string[];
261
271
  };
@@ -15,7 +15,7 @@ function resolveToolRefId(ref) {
15
15
  if (ref.startsWith("builtin/")) {
16
16
  return ref;
17
17
  }
18
- return ref.split("/").slice(1).join("/");
18
+ return ref.includes("/") ? ref.split("/").slice(1).join("/") : ref;
19
19
  }
20
20
  export function resolveToolTargets(tools, ref) {
21
21
  const resolved = resolveToolRefId(ref);
@@ -28,6 +28,10 @@ export function resolveToolTargets(tools, ref) {
28
28
  if (exact) {
29
29
  return [exact];
30
30
  }
31
+ const byName = Array.from(tools.values()).filter((tool) => tool.name === resolved);
32
+ if (byName.length > 0) {
33
+ return byName;
34
+ }
31
35
  if (!resolved.startsWith("builtin/")) {
32
36
  return [];
33
37
  }
@@ -118,7 +122,9 @@ registerToolKind({
118
122
  name: tool.name,
119
123
  description: tool.description,
120
124
  config: tool.config,
125
+ subprocess: tool.subprocess,
121
126
  inputSchemaRef: tool.inputSchemaRef,
127
+ embeddingModelRef: tool.embeddingModelRef,
122
128
  bundleRefs: [],
123
129
  hitl: tool.hitl
124
130
  ? {
@@ -150,7 +156,9 @@ registerToolKind({
150
156
  name: tool.name,
151
157
  description: tool.description,
152
158
  config: tool.config,
159
+ subprocess: tool.subprocess,
153
160
  inputSchemaRef: tool.inputSchemaRef,
161
+ embeddingModelRef: tool.embeddingModelRef,
154
162
  backendOperation: tool.backendOperation,
155
163
  bundleRefs: [],
156
164
  hitl: tool.hitl
@@ -183,7 +191,9 @@ registerToolKind({
183
191
  name: tool.name,
184
192
  description: tool.description,
185
193
  config: tool.config,
194
+ subprocess: tool.subprocess,
186
195
  inputSchemaRef: tool.inputSchemaRef,
196
+ embeddingModelRef: tool.embeddingModelRef,
187
197
  mcpRef: tool.mcpRef,
188
198
  bundleRefs: [],
189
199
  hitl: tool.hitl
@@ -222,7 +232,9 @@ registerToolKind({
222
232
  name: tool.name,
223
233
  description: tool.description,
224
234
  config: tool.config,
235
+ subprocess: tool.subprocess,
225
236
  inputSchemaRef: tool.inputSchemaRef,
237
+ embeddingModelRef: tool.embeddingModelRef,
226
238
  bundleRefs: [],
227
239
  hitl: tool.hitl
228
240
  ? {
@@ -190,8 +190,8 @@ spec:
190
190
  `;
191
191
  }
192
192
  function renderResearchAgentYaml(options) {
193
- const toolsBlock = options.withWebSearch ? " tools:\n - ref: tool/web-search\n" : " tools: []\n";
194
- const subagentsBlock = options.template === "deep-research" ? " subagents:\n - ref: agent/research-analyst\n" : " subagents: []\n";
193
+ const toolsBlock = options.withWebSearch ? " tools:\n - web-search\n" : " tools: []\n";
194
+ const subagentsBlock = options.template === "deep-research" ? " subagents:\n - ref: agent/research-analyst\n" : " subagents: []\n";
195
195
  const prompt = options.withWebSearch
196
196
  ? "Break complex research requests into a clear plan, use web search when current information matters, and return a concise synthesis with sources and explicit uncertainty."
197
197
  : "Break complex research requests into a clear plan and return a concise synthesis with explicit assumptions and uncertainty.";
@@ -207,19 +207,18 @@ spec:
207
207
  runtime:
208
208
  runtimeMemory:
209
209
  ref: runtime-memory/default
210
- execution:
211
- backend: deepagent
212
- modelRef: model/default
213
- ${toolsBlock} skills:
214
- - path: ./
215
- ${subagentsBlock} config:
216
- backend:
217
- ref: backend/default
218
- systemPrompt: ${prompt}${delegationLine}
210
+ backend: deepagent
211
+ modelRef: model/default
212
+ ${toolsBlock} skills:
213
+ - deep-research
214
+ ${subagentsBlock} config:
215
+ backend:
216
+ ref: backend/default
217
+ systemPrompt: ${prompt}${delegationLine}
219
218
  `;
220
219
  }
221
220
  function renderResearchAnalystYaml(options) {
222
- const toolsBlock = options.withWebSearch ? " tools:\n - ref: tool/web-search\n" : " tools: []\n";
221
+ const toolsBlock = options.withWebSearch ? " tools:\n - web-search\n" : " tools: []\n";
223
222
  const prompt = options.withWebSearch
224
223
  ? "Gather current sources, compare claims carefully, extract the most decision-relevant facts, and return clean notes the host agent can synthesize."
225
224
  : "Break down the problem, compare alternatives carefully, extract the most decision-relevant facts, and return clean notes the host agent can synthesize.";
@@ -232,15 +231,14 @@ spec:
232
231
  runtime:
233
232
  runtimeMemory:
234
233
  ref: runtime-memory/default
235
- execution:
236
- backend: deepagent
237
- modelRef: model/default
238
- ${toolsBlock} skills:
239
- - path: ./
240
- config:
241
- backend:
242
- ref: backend/default
243
- systemPrompt: ${prompt}
234
+ backend: deepagent
235
+ modelRef: model/default
236
+ ${toolsBlock} skills:
237
+ - deep-research
238
+ config:
239
+ backend:
240
+ ref: backend/default
241
+ systemPrompt: ${prompt}
244
242
  `;
245
243
  }
246
244
  function renderResourcePackageJson(projectSlug) {
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.133";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.135";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.133";
1
+ export const AGENT_HARNESS_VERSION = "0.0.135";
@@ -17,5 +17,9 @@ export type McpToolDescriptor = {
17
17
  };
18
18
  export declare function readMcpServerConfig(workspace: WorkspaceBundle, tool: WorkspaceBundle["tools"] extends Map<any, infer T> ? T : never): McpServerConfig | null;
19
19
  export declare function getOrCreateMcpClient(config: McpServerConfig): Promise<Client>;
20
+ export declare function closeMcpClientsForWorkspace(workspace: WorkspaceBundle): Promise<void>;
21
+ export declare function __resetMcpClientCacheForTests(): void;
22
+ export declare function __setMcpClientCacheEntryForTests(config: McpServerConfig, clientPromise: Promise<Client>): void;
23
+ export declare function __setMcpClientLoaderForTests(loader: (config: McpServerConfig) => Promise<Client>): void;
20
24
  export declare function listRemoteMcpTools(config: McpServerConfig): Promise<McpToolDescriptor[]>;
21
25
  export declare function createMcpToolResolver(workspace: WorkspaceBundle): NonNullable<RuntimeAdapterOptions["toolResolver"]>;
@@ -6,6 +6,7 @@ import { WebSocketClientTransport } from "@modelcontextprotocol/sdk/client/webso
6
6
  import { AGENT_HARNESS_VERSION } from "../package-version.js";
7
7
  import { createRuntimeEnv } from "../runtime/support/runtime-env.js";
8
8
  const mcpClientCache = new Map();
9
+ let mcpClientLoader = createConnectedMcpClient;
9
10
  function readStringRecord(value) {
10
11
  if (typeof value !== "object" || !value) {
11
12
  return undefined;
@@ -73,46 +74,124 @@ function createMcpCacheKey(config) {
73
74
  headers: config.headers ?? {},
74
75
  });
75
76
  }
77
+ async function createConnectedMcpClient(config) {
78
+ const client = new Client({
79
+ name: "agent-harness",
80
+ version: AGENT_HARNESS_VERSION,
81
+ });
82
+ const headers = {
83
+ ...(config.headers ?? {}),
84
+ ...(config.token ? { Authorization: `Bearer ${config.token}` } : {}),
85
+ };
86
+ const transport = config.transport === "http"
87
+ ? new StreamableHTTPClientTransport(new URL(config.url ?? ""), {
88
+ requestInit: Object.keys(headers).length > 0 ? { headers } : undefined,
89
+ })
90
+ : config.transport === "sse"
91
+ ? new SSEClientTransport(new URL(config.url ?? ""), {
92
+ requestInit: Object.keys(headers).length > 0 ? { headers } : undefined,
93
+ })
94
+ : config.transport === "websocket"
95
+ ? new WebSocketClientTransport(new URL(config.url ?? ""))
96
+ : new StdioClientTransport({
97
+ command: config.command ?? "",
98
+ args: config.args,
99
+ env: createRuntimeEnv(config.env),
100
+ cwd: config.cwd,
101
+ });
102
+ await client.connect(transport);
103
+ return client;
104
+ }
105
+ function isRecoverableMcpError(error) {
106
+ if (typeof error !== "object" || error === null) {
107
+ return false;
108
+ }
109
+ const message = typeof error.message === "string"
110
+ ? (error.message).toLowerCase()
111
+ : "";
112
+ const code = typeof error.code === "string"
113
+ ? (error.code).toLowerCase()
114
+ : "";
115
+ return [
116
+ "connection closed",
117
+ "transport closed",
118
+ "socket closed",
119
+ "stream closed",
120
+ "network socket disconnected",
121
+ ].some((pattern) => message.includes(pattern))
122
+ || ["econnreset", "epipe", "ehostunreach", "ecancelled"].includes(code);
123
+ }
124
+ async function closeCachedMcpClient(cacheKey) {
125
+ const cached = mcpClientCache.get(cacheKey);
126
+ mcpClientCache.delete(cacheKey);
127
+ if (!cached) {
128
+ return;
129
+ }
130
+ try {
131
+ const client = await cached;
132
+ await client.close();
133
+ }
134
+ catch {
135
+ // Ignore teardown failures for clients that never connected successfully.
136
+ }
137
+ }
138
+ async function invalidateMcpClient(config) {
139
+ await closeCachedMcpClient(createMcpCacheKey(config));
140
+ }
141
+ async function withRecoveredMcpClient(config, operation) {
142
+ const client = await getOrCreateMcpClient(config);
143
+ try {
144
+ return await operation(client);
145
+ }
146
+ catch (error) {
147
+ if (!isRecoverableMcpError(error)) {
148
+ throw error;
149
+ }
150
+ await invalidateMcpClient(config);
151
+ return operation(await getOrCreateMcpClient(config));
152
+ }
153
+ }
76
154
  export async function getOrCreateMcpClient(config) {
77
155
  const cacheKey = createMcpCacheKey(config);
78
156
  const cached = mcpClientCache.get(cacheKey);
79
157
  if (cached) {
80
158
  return cached;
81
159
  }
82
- const loading = (async () => {
83
- const client = new Client({
84
- name: "agent-harness",
85
- version: AGENT_HARNESS_VERSION,
86
- });
87
- const headers = {
88
- ...(config.headers ?? {}),
89
- ...(config.token ? { Authorization: `Bearer ${config.token}` } : {}),
90
- };
91
- const transport = config.transport === "http"
92
- ? new StreamableHTTPClientTransport(new URL(config.url ?? ""), {
93
- requestInit: Object.keys(headers).length > 0 ? { headers } : undefined,
94
- })
95
- : config.transport === "sse"
96
- ? new SSEClientTransport(new URL(config.url ?? ""), {
97
- requestInit: Object.keys(headers).length > 0 ? { headers } : undefined,
98
- })
99
- : config.transport === "websocket"
100
- ? new WebSocketClientTransport(new URL(config.url ?? ""))
101
- : new StdioClientTransport({
102
- command: config.command ?? "",
103
- args: config.args,
104
- env: createRuntimeEnv(config.env),
105
- cwd: config.cwd,
106
- });
107
- await client.connect(transport);
108
- return client;
109
- })();
160
+ const loading = mcpClientLoader(config).catch((error) => {
161
+ if (mcpClientCache.get(cacheKey) === loading) {
162
+ mcpClientCache.delete(cacheKey);
163
+ }
164
+ throw error;
165
+ });
110
166
  mcpClientCache.set(cacheKey, loading);
111
167
  return loading;
112
168
  }
169
+ export async function closeMcpClientsForWorkspace(workspace) {
170
+ const cacheKeys = new Set();
171
+ for (const tool of workspace.tools.values()) {
172
+ if (tool.type !== "mcp") {
173
+ continue;
174
+ }
175
+ const config = readMcpServerConfig(workspace, tool);
176
+ if (!config) {
177
+ continue;
178
+ }
179
+ cacheKeys.add(createMcpCacheKey(config));
180
+ }
181
+ await Promise.all(Array.from(cacheKeys, (cacheKey) => closeCachedMcpClient(cacheKey)));
182
+ }
183
+ export function __resetMcpClientCacheForTests() {
184
+ mcpClientCache.clear();
185
+ mcpClientLoader = createConnectedMcpClient;
186
+ }
187
+ export function __setMcpClientCacheEntryForTests(config, clientPromise) {
188
+ mcpClientCache.set(createMcpCacheKey(config), clientPromise);
189
+ }
190
+ export function __setMcpClientLoaderForTests(loader) {
191
+ mcpClientLoader = loader;
192
+ }
113
193
  async function getRemoteMcpToolDescriptor(config, remoteToolName) {
114
- const client = await getOrCreateMcpClient(config);
115
- const result = await client.listTools();
194
+ const result = await withRecoveredMcpClient(config, (client) => client.listTools());
116
195
  const tool = result.tools.find((item) => typeof item.name === "string" && item.name === remoteToolName);
117
196
  if (!tool || typeof tool.name !== "string") {
118
197
  return null;
@@ -124,8 +203,7 @@ async function getRemoteMcpToolDescriptor(config, remoteToolName) {
124
203
  };
125
204
  }
126
205
  export async function listRemoteMcpTools(config) {
127
- const client = await getOrCreateMcpClient(config);
128
- const result = await client.listTools();
206
+ const result = await withRecoveredMcpClient(config, (client) => client.listTools());
129
207
  return result.tools
130
208
  .filter((tool) => typeof tool.name === "string")
131
209
  .map((tool) => ({
@@ -155,11 +233,10 @@ export function createMcpToolResolver(workspace) {
155
233
  description: tool.description,
156
234
  inputSchemaPromise: descriptorPromise.then((descriptor) => descriptor?.inputSchema),
157
235
  async invoke(input) {
158
- const client = await getOrCreateMcpClient(serverConfig);
159
- const result = await client.callTool({
236
+ const result = await withRecoveredMcpClient(serverConfig, (client) => client.callTool({
160
237
  name: remoteToolName,
161
238
  arguments: typeof input === "object" && input !== null ? input : {},
162
- });
239
+ }));
163
240
  const textParts = Array.isArray(result.content)
164
241
  ? result.content
165
242
  .filter((item) => typeof item === "object" && item !== null && "type" in item)