@botbotgo/agent-harness 0.0.45 → 0.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,7 +17,7 @@ What it provides:
17
17
  - YAML-defined runtime assembly for hosts, models, routing, recovery, concurrency, MCP, and maintenance policy
18
18
  - backend-adapted execution with a generic runtime contract and current LangChain v1 / DeepAgents adapters
19
19
  - local `resources/tools/` and `resources/skills/` loading
20
- - persisted runs, threads, approvals, events, and resumable checkpoints
20
+ - persisted runs, threads, approvals, events, queued tasks, and resumable checkpoints
21
21
 
22
22
  ## Quick Start
23
23
 
@@ -35,6 +35,8 @@ your-workspace/
35
35
  agent-context.md
36
36
  workspace.yaml
37
37
  models.yaml
38
+ embedding-models.yaml
39
+ vector-stores.yaml
38
40
  stores.yaml
39
41
  tools.yaml
40
42
  mcp.yaml
@@ -77,7 +79,7 @@ try {
77
79
  - Persisted threads, runs, approvals, and lifecycle events
78
80
  - Recovery policy and resumable checkpoints
79
81
  - Background checkpoint maintenance
80
- - Runtime-level concurrency control
82
+ - Runtime-level concurrency control and queued-run persistence
81
83
 
82
84
  ## How To Use
83
85
 
@@ -150,6 +152,16 @@ const result = await run(runtime, {
150
152
 
151
153
  `subscribe(...)` is a read-only observer surface over stored lifecycle events.
152
154
 
155
+ The event stream includes:
156
+
157
+ - `run.created`
158
+ - `run.queued`
159
+ - `run.dequeued`
160
+ - `run.state.changed`
161
+ - `approval.requested`
162
+ - `approval.resolved`
163
+ - `output.delta`
164
+
153
165
  ### Inspect Threads And Approvals
154
166
 
155
167
  ```ts
@@ -225,29 +237,105 @@ Use Kubernetes-style YAML:
225
237
 
226
238
  Use distinct names for named objects such as models, stores, checkpointers, tools, and MCP servers.
227
239
 
228
- ### `config/workspace.yaml`
240
+ ### Client-Configurable YAML Reference
241
+
242
+ This section is the client-facing explanation of what can be configured in YAML today and what each field changes at runtime.
243
+
244
+ There are three layers of client configuration:
245
+
246
+ - runtime-level policy in `config/workspace.yaml`
247
+ - reusable object catalogs in `config/*.yaml`
248
+ - agent assembly in `config/agents/*.yaml`
229
249
 
230
- Use this file for runtime-level policy:
250
+ ### `config/workspace.yaml`
231
251
 
232
- - `runRoot`
233
- - `routing.rules`
234
- - `routing.defaultAgentId`
235
- - `routing.systemPrompt`
236
- - `routing.modelRouting`
237
- - `concurrency.maxConcurrentRuns`
238
- - `recovery.enabled`
239
- - `recovery.resumeOnStartup`
240
- - `recovery.maxRecoveryAttempts`
241
- - `maintenance.checkpoints.*`
252
+ Use this file for runtime-level policy shared by the whole workspace.
253
+
254
+ Primary fields:
255
+
256
+ - `runRoot`: root directory where the runtime stores thread indexes, runs, approvals, artifacts, queued requests, and default local persistence
257
+ - `routing.defaultAgentId`: default host selected when no explicit routing rule matches
258
+ - `routing.rules`: ordered YAML routing rules evaluated before backend routing
259
+ - `routing.systemPrompt`: optional model-classifier prompt used only when model routing is enabled
260
+ - `routing.modelRouting`: opt in to model-driven host classification fallback
261
+ - `concurrency.maxConcurrentRuns`: maximum number of active runs; extra runs enter the persistent queue
262
+ - `recovery.enabled`: enables runtime-managed startup recovery
263
+ - `recovery.resumeOnStartup`: compatibility alias for resuming interrupted approval-driven runs on startup
264
+ - `recovery.resumeResumingRunsOnStartup`: explicit control for resuming interrupted approval-driven runs on startup
265
+ - `recovery.maxRecoveryAttempts`: upper bound for startup recovery retries
266
+ - `maintenance.checkpoints.enabled`: turns on background checkpoint cleanup
267
+ - `maintenance.checkpoints.schedule.intervalSeconds`: maintenance loop interval
268
+ - `maintenance.checkpoints.schedule.runOnStartup`: run checkpoint cleanup during startup
269
+ - `maintenance.checkpoints.policies.maxAgeSeconds`: age-based checkpoint cleanup
270
+ - `maintenance.checkpoints.policies.maxBytes`: size-based checkpoint cleanup
271
+ - `maintenance.checkpoints.sqlite.sweepBatchSize`: batch size for SQLite cleanup scans
272
+ - `maintenance.checkpoints.sqlite.vacuum`: vacuum SQLite after deletions
242
273
 
243
274
  If `runRoot` is omitted, the runtime defaults to `<workspace-root>/run-data`.
244
275
 
276
+ Example:
277
+
278
+ ```yaml
279
+ apiVersion: agent-harness/v1alpha1
280
+ kind: Runtime
281
+ metadata:
282
+ name: default
283
+ spec:
284
+ runRoot: ./.agent
285
+ concurrency:
286
+ maxConcurrentRuns: 3
287
+ routing:
288
+ defaultAgentId: orchestra
289
+ modelRouting: false
290
+ rules:
291
+ - agentId: orchestra
292
+ contains: ["latest", "recent", "today", "news"]
293
+ - agentId: orchestra
294
+ regex:
295
+ - "\\b(create|build|implement|fix|debug|review|inspect)\\b"
296
+ maintenance:
297
+ checkpoints:
298
+ enabled: true
299
+ schedule:
300
+ intervalSeconds: 3600
301
+ runOnStartup: true
302
+ policies:
303
+ maxAgeSeconds: 604800
304
+ sqlite:
305
+ sweepBatchSize: 200
306
+ vacuum: false
307
+ recovery:
308
+ enabled: true
309
+ resumeResumingRunsOnStartup: true
310
+ maxRecoveryAttempts: 3
311
+ ```
312
+
313
+ Notes:
314
+
315
+ - `routing.rules` only choose the starting host agent; they do not replace backend planning semantics
316
+ - queued runs are persisted under `runRoot` and continue after process restart
317
+ - `running` runs are only replayed on startup when the bound tools are retryable
318
+
245
319
  ### `config/agent-context.md`
246
320
 
247
321
  Use this file for shared startup context loaded into agents at construction time.
248
322
 
249
323
  Put stable project context here. Do not use it as mutable long-term memory.
250
324
 
325
+ Good uses:
326
+
327
+ - product positioning
328
+ - codebase conventions
329
+ - stable domain vocabulary
330
+ - organization-specific rules
331
+
332
+ Bad uses:
333
+
334
+ - transient scratch notes
335
+ - per-run execution state
336
+ - approval packets
337
+ - long-term memory that should live in the store
338
+
251
339
  ### `config/models.yaml`
252
340
 
253
341
  Use one file for multiple named models:
@@ -259,6 +347,7 @@ spec:
259
347
  - name: default
260
348
  provider: openai
261
349
  model: gpt-4.1
350
+ temperature: 0.2
262
351
  - name: planner
263
352
  provider: openai
264
353
  model: gpt-4.1-mini
@@ -266,6 +355,70 @@ spec:
266
355
 
267
356
  These load as `model/default` and `model/planner`.
268
357
 
358
+ Client-configurable model fields:
359
+
360
+ - `name`: catalog name referenced by `model/<name>`
361
+ - `provider`: provider family such as `openai`, `openai-compatible`, `ollama`, `anthropic`, or `google`
362
+ - `model`: provider model id
363
+ - top-level provider init fields such as `temperature`, `baseUrl`, API-specific settings, and client options
364
+ - `clientRef`: optional external client reference
365
+ - `fallbacks`: optional fallback model refs
366
+ - `metadata`: optional model metadata
367
+
368
+ ### `config/embedding-models.yaml`
369
+
370
+ Use this file for named embedding model presets used by retrieval-oriented tools.
371
+
372
+ ```yaml
373
+ apiVersion: agent-harness/v1alpha1
374
+ kind: EmbeddingModels
375
+ spec:
376
+ - name: default
377
+ provider: ollama
378
+ model: nomic-embed-text
379
+ baseUrl: http://localhost:11434
380
+ ```
381
+
382
+ Client-configurable embedding fields:
383
+
384
+ - `name`
385
+ - `provider`
386
+ - `model`
387
+ - top-level provider init fields such as `baseUrl`
388
+ - `clientRef`
389
+ - `metadata`
390
+
391
+ These load as `embedding-model/default`.
392
+
393
+ ### `config/vector-stores.yaml`
394
+
395
+ Use this file for named vector store presets referenced by retrieval tools.
396
+
397
+ ```yaml
398
+ apiVersion: agent-harness/v1alpha1
399
+ kind: VectorStores
400
+ spec:
401
+ - name: default
402
+ storeKind: LibSQLVectorStore
403
+ url: file:.agent/vector-store.db
404
+ table: rag_chunks
405
+ column: embedding
406
+ embeddingModelRef: embedding-model/default
407
+ ```
408
+
409
+ Client-configurable vector store fields:
410
+
411
+ - `name`
412
+ - `storeKind`
413
+ - `url`
414
+ - `authToken`
415
+ - `table`
416
+ - `column`
417
+ - `embeddingModelRef`
418
+ - `metadata`
419
+
420
+ These load as `vector-store/default`.
421
+
269
422
  ### `config/stores.yaml`
270
423
 
271
424
  Use one file for named persistence presets:
@@ -285,6 +438,73 @@ spec:
285
438
 
286
439
  These load as `store/default` and `checkpointer/default`.
287
440
 
441
+ Client-configurable store fields:
442
+
443
+ - `kind: Store` for backend stores
444
+ - `kind: Checkpointer` for resumable execution state
445
+ - `name` for refs
446
+ - `storeKind` such as `FileStore`, `InMemoryStore`, `RedisStore`, `PostgresStore`
447
+ - `checkpointerKind` such as `MemorySaver`, `FileCheckpointer`, `SqliteSaver`
448
+ - storage-specific fields such as `path`, connection strings, auth, and provider options
449
+
450
+ ### `config/tools.yaml`
451
+
452
+ Use this file for reusable tool presets and tool bundles.
453
+
454
+ Minimal collection form:
455
+
456
+ ```yaml
457
+ apiVersion: agent-harness/v1alpha1
458
+ kind: Tools
459
+ spec:
460
+ - kind: Tool
461
+ name: fetch_docs
462
+ type: function
463
+ description: Fetch a documentation page.
464
+ ```
465
+
466
+ Client-configurable tool fields:
467
+
468
+ - `name`
469
+ - `type`: `function`, `backend`, `mcp`, or `bundle`
470
+ - `description`
471
+ - `implementationName` for local JS tool modules
472
+ - `inputSchema.ref`
473
+ - `backend.operation`
474
+ - `mcp.ref` or `mcp.tool`
475
+ - `refs` for bundle composition
476
+ - `hitl.enabled` and `hitl.allow` for approval-gated tools
477
+ - `retryable: true` for tools that are safe to replay during startup recovery
478
+ - `config` for tool-specific options
479
+
480
+ Use `retryable` carefully. Mark a tool retryable only when repeated execution is safe or intentionally idempotent.
481
+
482
+ ### `config/mcp.yaml`
483
+
484
+ Use this file for reusable MCP server definitions and MCP-backed tool presets.
485
+
486
+ ```yaml
487
+ apiVersion: agent-harness/v1alpha1
488
+ kind: McpServers
489
+ spec:
490
+ - name: docs
491
+ transport: http
492
+ url: https://example.com/mcp
493
+ - name: local-browser
494
+ transport: stdio
495
+ command: node
496
+ args: ["./mcp-browser-server.mjs"]
497
+ ```
498
+
499
+ Client-configurable MCP fields:
500
+
501
+ - `name`
502
+ - `transport`: `stdio`, `http`, `sse`, or `websocket`
503
+ - `command`, `args`, `env`, `cwd` for stdio servers
504
+ - `url`, `token`, `headers` for network servers
505
+
506
+ These load as `mcp/<name>`.
507
+
288
508
  ### `config/agents/*.yaml`
289
509
 
290
510
  Prefer the generic agent form and declare the current execution backend explicitly:
@@ -303,20 +523,75 @@ spec:
303
523
 
304
524
  `kind: DeepAgent` and `kind: LangChainAgent` remain supported as compatibility forms, but `kind: Agent` is the recommended product-facing entry point.
305
525
 
306
- Common fields include:
307
-
308
- - `modelRef`
309
- - `execution.backend`
310
- - `systemPrompt`
311
- - `tools`
312
- - `skills`
313
- - `memory`
314
- - `checkpointer`
315
- - `store`
316
- - `backend`
317
- - `middleware`
318
- - `subagents`
319
- - `mcpServers`
526
+ Common client-configurable agent fields:
527
+
528
+ - `metadata.name`
529
+ - `metadata.description`
530
+ - `spec.execution.backend`
531
+ - `spec.modelRef`
532
+ - `spec.systemPrompt`
533
+ - `spec.tools`
534
+ - `spec.skills`
535
+ - `spec.memory`
536
+ - `spec.checkpointer`
537
+ - `spec.store`
538
+ - `spec.backend`
539
+ - `spec.middleware`
540
+ - `spec.subagents`
541
+ - `spec.mcpServers`
542
+ - `spec.responseFormat`
543
+ - `spec.contextSchema`
544
+
545
+ Typical patterns:
546
+
547
+ - use `direct` as a lightweight host for simple one-turn requests
548
+ - use `orchestra` as the main execution host for tools, multi-step work, and delegation
549
+ - keep routing policy in `config/workspace.yaml`, not buried in prompts
550
+
551
+ Example direct agent:
552
+
553
+ ```yaml
554
+ apiVersion: agent-harness/v1alpha1
555
+ kind: Agent
556
+ metadata:
557
+ name: direct
558
+ spec:
559
+ execution:
560
+ backend: langchain-v1
561
+ modelRef: model/default
562
+ checkpointer:
563
+ ref: checkpointer/default
564
+ systemPrompt: |-
565
+ You are the direct agent.
566
+ Answer simple requests directly.
567
+ ```
568
+
569
+ Example orchestra agent:
570
+
571
+ ```yaml
572
+ apiVersion: agent-harness/v1alpha1
573
+ kind: Agent
574
+ metadata:
575
+ name: orchestra
576
+ spec:
577
+ execution:
578
+ backend: deepagent
579
+ modelRef: model/default
580
+ memory:
581
+ - path: config/agent-context.md
582
+ store:
583
+ ref: store/default
584
+ checkpointer:
585
+ ref: checkpointer/default
586
+ backend:
587
+ kind: CompositeBackend
588
+ state:
589
+ kind: VfsSandbox
590
+ timeout: 600
591
+ routes:
592
+ /memories/:
593
+ kind: StoreBackend
594
+ ```
320
595
 
321
596
  ### `resources/`
322
597
 
@@ -329,6 +604,24 @@ Tool modules are discovered from `resources/tools/*.js`, `resources/tools/*.mjs`
329
604
 
330
605
  The preferred tool module format is exporting `tool({...})`.
331
606
 
607
+ Example:
608
+
609
+ ```js
610
+ import { z } from "zod";
611
+ import { tool } from "@botbotgo/agent-harness/tools";
612
+
613
+ export const local_lookup = tool({
614
+ description: "Lookup a ticker from a local tool module.",
615
+ retryable: true,
616
+ schema: {
617
+ ticker: z.string().min(1),
618
+ },
619
+ async invoke(input) {
620
+ return input.ticker.toUpperCase();
621
+ },
622
+ });
623
+ ```
624
+
332
625
  Keep runtime extension source under `resources/`. Keep tests outside the published source tree, for example under repository `test/`.
333
626
 
334
627
  ## Design Notes
@@ -337,11 +630,12 @@ Keep runtime extension source under `resources/`. Keep tests outside the publish
337
630
  - agent-level execution behavior stays upstream
338
631
  - application-level orchestration and lifecycle management stays in the harness
339
632
  - checkpoint resume is treated as a system-managed runtime behavior, not a primary public abstraction
633
+ - public runtime contract generic does not mean backend-agnostic implementation internals; it means client-facing semantics stay stable even when adapters change
340
634
 
341
635
  ## API Summary
342
636
 
343
637
  - `createAgentHarness(...)`
344
- - `run(...)`
638
+ - `run(runtime, {...})`
345
639
  - `subscribe(...)`
346
640
  - `listThreads(...)`
347
641
  - `getThread(...)`
@@ -22,6 +22,11 @@ spec:
22
22
  # Value options: relative workspace path like `./.agent`, or an absolute filesystem path.
23
23
  runRoot: ./.agent
24
24
 
25
+ # agent-harness feature: runtime-level task queue and maximum number of concurrent runs.
26
+ # Additional runs wait in the harness queue until a slot becomes available.
27
+ concurrency:
28
+ maxConcurrentRuns: 3
29
+
25
30
  # agent-harness feature: optional host-router prompt override used when the runtime chooses between
26
31
  # top-level host agents such as a main execution host and an optional low-latency side host.
27
32
  # Use placeholders so the same prompt can survive host renames:
@@ -90,19 +95,17 @@ spec:
90
95
  # - oldest-first deletion by time policy and/or size policy
91
96
  # - background scheduling inside the harness lifecycle
92
97
  #
93
- # Example:
94
- # maintenance:
95
- # checkpoints:
96
- # enabled: true
97
- # schedule:
98
- # intervalSeconds: 3600
99
- # runOnStartup: true
100
- # policies:
101
- # maxAgeSeconds: 604800
102
- # maxBytes: 1073741824
103
- # sqlite:
104
- # sweepBatchSize: 200
105
- # vacuum: false
98
+ maintenance:
99
+ checkpoints:
100
+ enabled: true
101
+ schedule:
102
+ intervalSeconds: 3600
103
+ runOnStartup: true
104
+ policies:
105
+ maxAgeSeconds: 604800
106
+ sqlite:
107
+ sweepBatchSize: 200
108
+ vacuum: false
106
109
 
107
110
  # agent-harness feature: runtime-managed recovery policy for interrupted runs.
108
111
  # This keeps checkpoint resume as an internal lifecycle concern instead of a primary user-facing API concept.
@@ -112,8 +115,7 @@ spec:
112
115
  # - persisted approval-decision intent for cross-restart resume continuation
113
116
  # - bounded retry attempts to avoid infinite restart loops
114
117
  #
115
- # Example:
116
- # recovery:
117
- # enabled: true
118
- # resumeResumingRunsOnStartup: true
119
- # maxRecoveryAttempts: 3
118
+ recovery:
119
+ enabled: true
120
+ resumeResumingRunsOnStartup: true
121
+ maxRecoveryAttempts: 3
@@ -4,7 +4,7 @@ export type RuntimeCapabilities = {
4
4
  delegation?: boolean;
5
5
  memory?: boolean;
6
6
  };
7
- export type RunState = "running" | "waiting_for_approval" | "resuming" | "completed" | "failed";
7
+ export type RunState = "queued" | "running" | "waiting_for_approval" | "resuming" | "completed" | "failed";
8
8
  export type ParsedAgentObject = {
9
9
  id: string;
10
10
  executionMode: ExecutionMode;
@@ -85,6 +85,7 @@ export type ParsedToolObject = {
85
85
  enabled: boolean;
86
86
  allow?: Array<"approve" | "edit" | "reject">;
87
87
  };
88
+ retryable?: boolean;
88
89
  sourcePath: string;
89
90
  };
90
91
  export type LangChainAgentParams = {
@@ -172,6 +173,7 @@ export type CompiledTool = {
172
173
  enabled: boolean;
173
174
  allow: Array<"approve" | "edit" | "reject">;
174
175
  };
176
+ retryable?: boolean;
175
177
  runtimeValue: {
176
178
  name: string;
177
179
  description: string;
@@ -221,7 +223,7 @@ export type ThreadSummary = {
221
223
  status: RunState;
222
224
  };
223
225
  export type SessionRecord = ThreadSummary;
224
- export type KnownHarnessEventType = "run.created" | "run.state.changed" | "run.resumed" | "approval.requested" | "approval.resolved" | "artifact.created" | "output.delta" | "reasoning.delta" | "runtime.synthetic_fallback";
226
+ export type KnownHarnessEventType = "run.created" | "run.queued" | "run.dequeued" | "run.state.changed" | "run.resumed" | "approval.requested" | "approval.resolved" | "artifact.created" | "output.delta" | "reasoning.delta" | "runtime.synthetic_fallback";
225
227
  export type HarnessEventType = KnownHarnessEventType | (string & {});
226
228
  export type HarnessEvent = {
227
229
  eventId: string;
@@ -126,6 +126,7 @@ registerToolKind({
126
126
  allow: tool.hitl.allow ?? ["approve", "edit", "reject"],
127
127
  }
128
128
  : undefined,
129
+ retryable: tool.retryable,
129
130
  runtimeValue: { name: tool.name, description: tool.description, type: "function" },
130
131
  },
131
132
  ];
@@ -158,6 +159,7 @@ registerToolKind({
158
159
  allow: tool.hitl.allow ?? ["approve", "edit", "reject"],
159
160
  }
160
161
  : undefined,
162
+ retryable: tool.retryable,
161
163
  runtimeValue: { name: tool.name, description: tool.description, type: "backend" },
162
164
  },
163
165
  ];
@@ -190,6 +192,7 @@ registerToolKind({
190
192
  allow: tool.hitl.allow ?? ["approve", "edit", "reject"],
191
193
  }
192
194
  : undefined,
195
+ retryable: tool.retryable,
193
196
  runtimeValue: { name: tool.name, description: tool.description, type: "mcp" },
194
197
  },
195
198
  ];
@@ -1 +1 @@
1
- export declare const AGENT_HARNESS_VERSION = "0.0.44";
1
+ export declare const AGENT_HARNESS_VERSION = "0.0.45";
@@ -1 +1 @@
1
- export const AGENT_HARNESS_VERSION = "0.0.44";
1
+ export const AGENT_HARNESS_VERSION = "0.0.45";
@@ -1,4 +1,4 @@
1
- import type { ArtifactListing, ArtifactRecord, DelegationRecord, HarnessEvent, InternalApprovalRecord, RunState, ThreadSummary, ThreadRunRecord, TranscriptMessage } from "../contracts/types.js";
1
+ import type { ArtifactListing, ArtifactRecord, DelegationRecord, HarnessEvent, InternalApprovalRecord, InvocationEnvelope, MessageContent, RunState, ThreadSummary, ThreadRunRecord, TranscriptMessage } from "../contracts/types.js";
2
2
  type ThreadMeta = {
3
3
  threadId: string;
4
4
  workspaceId: string;
@@ -39,6 +39,11 @@ type RecoveryIntent = {
39
39
  resumePayload: unknown;
40
40
  attempts: number;
41
41
  };
42
+ type PersistedRunRequest = {
43
+ input: MessageContent;
44
+ invocation?: InvocationEnvelope;
45
+ savedAt: string;
46
+ };
42
47
  export declare class FilePersistence {
43
48
  private readonly runRoot;
44
49
  constructor(runRoot: string);
@@ -73,6 +78,9 @@ export declare class FilePersistence {
73
78
  getRunApprovals(threadId: string, runId: string): Promise<InternalApprovalRecord[]>;
74
79
  getRunMeta(threadId: string, runId: string): Promise<RunMeta>;
75
80
  getRunLifecycle(threadId: string, runId: string): Promise<Lifecycle>;
81
+ saveRunRequest(threadId: string, runId: string, request: PersistedRunRequest): Promise<void>;
82
+ getRunRequest(threadId: string, runId: string): Promise<PersistedRunRequest | null>;
83
+ clearRunRequest(threadId: string, runId: string): Promise<void>;
76
84
  listDelegations(): Promise<DelegationRecord[]>;
77
85
  createApproval(record: InternalApprovalRecord): Promise<void>;
78
86
  resolveApproval(threadId: string, runId: string, approvalId: string, status: InternalApprovalRecord["status"]): Promise<InternalApprovalRecord>;
@@ -266,6 +266,22 @@ export class FilePersistence {
266
266
  async getRunLifecycle(threadId, runId) {
267
267
  return readJson(path.join(this.runDir(threadId, runId), "lifecycle.json"));
268
268
  }
269
+ async saveRunRequest(threadId, runId, request) {
270
+ await writeJson(path.join(this.runDir(threadId, runId), "request.json"), request);
271
+ }
272
+ async getRunRequest(threadId, runId) {
273
+ const requestPath = path.join(this.runDir(threadId, runId), "request.json");
274
+ if (!(await fileExists(requestPath))) {
275
+ return null;
276
+ }
277
+ return readJson(requestPath);
278
+ }
279
+ async clearRunRequest(threadId, runId) {
280
+ const requestPath = path.join(this.runDir(threadId, runId), "request.json");
281
+ if (await fileExists(requestPath)) {
282
+ await rm(requestPath, { force: true });
283
+ }
284
+ }
269
285
  async listDelegations() {
270
286
  const delegationsDir = path.join(this.runRoot, "indexes", "delegations");
271
287
  if (!(await fileExists(delegationsDir))) {
@@ -6,6 +6,7 @@ export type ResourceToolInfo = {
6
6
  backendOperation: string;
7
7
  name: string;
8
8
  description: string;
9
+ retryable?: boolean;
9
10
  hitl?: {
10
11
  enabled: boolean;
11
12
  allow: Array<"approve" | "edit" | "reject">;
@@ -40,6 +40,7 @@ export declare class AgentHarnessRuntime {
40
40
  private getBinding;
41
41
  private listAgentTools;
42
42
  private resolveAgentTools;
43
+ private supportsRunningReplay;
43
44
  listThreads(filter?: {
44
45
  agentId?: string;
45
46
  }): Promise<ThreadSummary[]>;
@@ -62,6 +63,8 @@ export declare class AgentHarnessRuntime {
62
63
  private loadRunInput;
63
64
  private appendAssistantMessage;
64
65
  private invokeWithHistory;
66
+ private buildPersistedRunRequest;
67
+ private executeQueuedRun;
65
68
  private checkpointRefForState;
66
69
  private finalizeContinuedRun;
67
70
  private emitOutputDeltaAndCreateItem;
@@ -213,6 +213,10 @@ export class AgentHarnessRuntime {
213
213
  resolvedTool: resolvedTools[index],
214
214
  }));
215
215
  }
216
+ supportsRunningReplay(binding) {
217
+ const tools = getBindingPrimaryTools(binding);
218
+ return tools.every((tool) => tool.retryable === true);
219
+ }
216
220
  async listThreads(filter) {
217
221
  const threadSummaries = await this.persistence.listSessions();
218
222
  if (!filter?.agentId) {
@@ -385,6 +389,72 @@ export class AgentHarnessRuntime {
385
389
  const priorHistory = await this.loadPriorHistory(threadId, runId);
386
390
  return this.runtimeAdapter.invoke(binding, input, threadId, runId, resumePayload, priorHistory, options);
387
391
  }
392
+ buildPersistedRunRequest(input, invocation) {
393
+ const envelope = invocation.invocation ?? {
394
+ ...(invocation.context ? { context: invocation.context } : {}),
395
+ ...(invocation.state ? { inputs: invocation.state } : {}),
396
+ ...(invocation.files ? { attachments: invocation.files } : {}),
397
+ };
398
+ return {
399
+ input: normalizeMessageContent(input),
400
+ invocation: envelope && Object.keys(envelope).length > 0
401
+ ? {
402
+ ...(envelope.context ? { context: envelope.context } : {}),
403
+ ...(envelope.inputs ? { inputs: envelope.inputs } : {}),
404
+ ...(envelope.attachments ? { attachments: envelope.attachments } : {}),
405
+ ...(envelope.capabilities ? { capabilities: envelope.capabilities } : {}),
406
+ }
407
+ : undefined,
408
+ savedAt: new Date().toISOString(),
409
+ };
410
+ }
411
+ async executeQueuedRun(binding, input, threadId, runId, agentId, options = {}) {
412
+ const previousState = options.previousState ?? "running";
413
+ if (previousState === "queued") {
414
+ await this.emit(threadId, runId, 101, "run.dequeued", {
415
+ queuePosition: 0,
416
+ activeRunCount: this.activeRunSlots,
417
+ maxConcurrentRuns: this.concurrencyConfig.maxConcurrentRuns,
418
+ recoveredOnStartup: true,
419
+ });
420
+ await this.setRunStateAndEmit(threadId, runId, 102, "running", {
421
+ previousState: "queued",
422
+ });
423
+ }
424
+ try {
425
+ const actual = await this.invokeWithHistory(binding, input, threadId, runId, undefined, {
426
+ context: options.context,
427
+ state: options.state,
428
+ files: options.files,
429
+ });
430
+ const finalized = await this.finalizeContinuedRun(threadId, runId, input, actual, {
431
+ previousState: previousState === "queued" ? "running" : previousState,
432
+ stateSequence: options.stateSequence ?? 103,
433
+ approvalSequence: options.approvalSequence ?? 104,
434
+ });
435
+ return {
436
+ ...finalized,
437
+ agentId,
438
+ };
439
+ }
440
+ catch (error) {
441
+ await this.emitSyntheticFallback(threadId, runId, agentId, error, 103);
442
+ await this.setRunStateAndEmit(threadId, runId, 104, "failed", {
443
+ previousState: previousState === "queued" ? "running" : previousState,
444
+ error: error instanceof Error ? error.message : String(error),
445
+ });
446
+ return {
447
+ threadId,
448
+ runId,
449
+ agentId,
450
+ state: "failed",
451
+ output: renderRuntimeFailure(error),
452
+ };
453
+ }
454
+ finally {
455
+ await this.persistence.clearRunRequest(threadId, runId);
456
+ }
457
+ }
388
458
  checkpointRefForState(threadId, runId, state) {
389
459
  return state === "waiting_for_approval" ? `checkpoints/${threadId}/${runId}/cp-1` : null;
390
460
  }
@@ -492,17 +562,56 @@ export class AgentHarnessRuntime {
492
562
  }
493
563
  await listener(value);
494
564
  }
495
- async acquireRunSlot() {
565
+ async acquireRunSlot(threadId, runId, activeState = "running") {
496
566
  const maxConcurrentRuns = this.concurrencyConfig.maxConcurrentRuns;
497
567
  if (!maxConcurrentRuns) {
498
568
  return () => undefined;
499
569
  }
500
- if (this.activeRunSlots >= maxConcurrentRuns) {
501
- await new Promise((resolve) => {
502
- this.pendingRunSlots.push(resolve);
570
+ if (this.activeRunSlots < maxConcurrentRuns) {
571
+ this.activeRunSlots += 1;
572
+ let released = false;
573
+ return () => {
574
+ if (released) {
575
+ return;
576
+ }
577
+ released = true;
578
+ this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
579
+ const next = this.pendingRunSlots.shift();
580
+ void next?.();
581
+ };
582
+ }
583
+ if (threadId && runId) {
584
+ const queuePosition = this.pendingRunSlots.length + 1;
585
+ await this.setRunStateAndEmit(threadId, runId, 2, "queued", {
586
+ previousState: activeState,
587
+ });
588
+ await this.emit(threadId, runId, 3, "run.queued", {
589
+ queuePosition,
590
+ activeRunCount: this.activeRunSlots,
591
+ maxConcurrentRuns,
503
592
  });
504
593
  }
505
- this.activeRunSlots += 1;
594
+ await new Promise((resolve, reject) => {
595
+ this.pendingRunSlots.push(async () => {
596
+ try {
597
+ this.activeRunSlots += 1;
598
+ if (threadId && runId) {
599
+ await this.emit(threadId, runId, 4, "run.dequeued", {
600
+ queuePosition: 0,
601
+ activeRunCount: this.activeRunSlots,
602
+ maxConcurrentRuns,
603
+ });
604
+ await this.setRunStateAndEmit(threadId, runId, 5, activeState, {
605
+ previousState: "queued",
606
+ });
607
+ }
608
+ resolve();
609
+ }
610
+ catch (error) {
611
+ reject(error);
612
+ }
613
+ });
614
+ });
506
615
  let released = false;
507
616
  return () => {
508
617
  if (released) {
@@ -511,7 +620,7 @@ export class AgentHarnessRuntime {
511
620
  released = true;
512
621
  this.activeRunSlots = Math.max(0, this.activeRunSlots - 1);
513
622
  const next = this.pendingRunSlots.shift();
514
- next?.();
623
+ void next?.();
515
624
  };
516
625
  }
517
626
  async dispatchRunListeners(stream, listeners) {
@@ -587,88 +696,68 @@ export class AgentHarnessRuntime {
587
696
  if (options.listeners) {
588
697
  return this.dispatchRunListeners(this.streamEvents(options), options.listeners);
589
698
  }
590
- const releaseRunSlot = await this.acquireRunSlot();
699
+ const invocation = this.normalizeInvocationEnvelope(options);
700
+ const selectedAgentId = await this.resolveSelectedAgentId(options.input, options.agentId, options.threadId);
701
+ const binding = this.workspace.bindings.get(selectedAgentId);
702
+ if (!binding) {
703
+ throw new Error(`Unknown agent ${selectedAgentId}`);
704
+ }
705
+ const policyDecision = this.policyEngine.evaluate(binding);
706
+ if (!policyDecision.allowed) {
707
+ throw new Error(`Policy evaluation blocked agent ${selectedAgentId}: ${policyDecision.reasons.join(", ")}`);
708
+ }
709
+ const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
710
+ await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation));
711
+ await this.emitRunCreated(threadId, runId, {
712
+ agentId: binding.agent.id,
713
+ requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
714
+ selectedAgentId,
715
+ executionMode: getBindingAdapterKind(binding),
716
+ });
717
+ const releaseRunSlot = await this.acquireRunSlot(threadId, runId);
591
718
  try {
592
- const invocation = this.normalizeInvocationEnvelope(options);
593
- const selectedAgentId = await this.resolveSelectedAgentId(options.input, options.agentId, options.threadId);
594
- const binding = this.workspace.bindings.get(selectedAgentId);
595
- if (!binding) {
596
- throw new Error(`Unknown agent ${selectedAgentId}`);
597
- }
598
- const policyDecision = this.policyEngine.evaluate(binding);
599
- if (!policyDecision.allowed) {
600
- throw new Error(`Policy evaluation blocked agent ${selectedAgentId}: ${policyDecision.reasons.join(", ")}`);
601
- }
602
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
603
- await this.emitRunCreated(threadId, runId, {
604
- agentId: binding.agent.id,
605
- requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
606
- selectedAgentId,
607
- executionMode: getBindingAdapterKind(binding),
719
+ return await this.executeQueuedRun(binding, options.input, threadId, runId, selectedAgentId, {
720
+ context: invocation.context,
721
+ state: invocation.state,
722
+ files: invocation.files,
723
+ previousState: "running",
724
+ stateSequence: 6,
725
+ approvalSequence: 7,
608
726
  });
609
- try {
610
- const actual = await this.invokeWithHistory(binding, options.input, threadId, runId, undefined, {
611
- context: invocation.context,
612
- state: invocation.state,
613
- files: invocation.files,
614
- });
615
- const finalized = await this.finalizeContinuedRun(threadId, runId, options.input, actual, {
616
- previousState: null,
617
- stateSequence: 3,
618
- approvalSequence: 4,
619
- });
620
- return {
621
- ...finalized,
622
- agentId: selectedAgentId,
623
- };
624
- }
625
- catch (error) {
626
- await this.emitSyntheticFallback(threadId, runId, selectedAgentId, error);
627
- await this.setRunStateAndEmit(threadId, runId, 4, "failed", {
628
- previousState: null,
629
- error: error instanceof Error ? error.message : String(error),
630
- });
631
- return {
632
- threadId,
633
- runId,
634
- agentId: selectedAgentId,
635
- state: "failed",
636
- output: renderRuntimeFailure(error),
637
- };
638
- }
639
727
  }
640
728
  finally {
641
729
  releaseRunSlot();
642
730
  }
643
731
  }
644
732
  async *streamEvents(options) {
645
- const releaseRunSlot = await this.acquireRunSlot();
646
- try {
647
- const invocation = this.normalizeInvocationEnvelope(options);
648
- const selectedAgentId = await this.resolveSelectedAgentId(options.input, options.agentId, options.threadId);
649
- const binding = this.workspace.bindings.get(selectedAgentId);
650
- if (!binding) {
651
- const result = await this.run(options);
652
- for (const line of result.output.split("\n")) {
653
- yield {
654
- type: "content",
655
- threadId: result.threadId,
656
- runId: result.runId,
657
- agentId: result.agentId ?? selectedAgentId,
658
- content: `${line}\n`,
659
- };
660
- }
661
- return;
733
+ const invocation = this.normalizeInvocationEnvelope(options);
734
+ const selectedAgentId = await this.resolveSelectedAgentId(options.input, options.agentId, options.threadId);
735
+ const binding = this.workspace.bindings.get(selectedAgentId);
736
+ if (!binding) {
737
+ const result = await this.run(options);
738
+ for (const line of result.output.split("\n")) {
739
+ yield {
740
+ type: "content",
741
+ threadId: result.threadId,
742
+ runId: result.runId,
743
+ agentId: result.agentId ?? selectedAgentId,
744
+ content: `${line}\n`,
745
+ };
662
746
  }
663
- let emitted = false;
664
- const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
665
- yield { type: "event", event: await this.emitRunCreated(threadId, runId, {
666
- agentId: selectedAgentId,
667
- requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
668
- selectedAgentId,
669
- input: options.input,
670
- state: "running",
671
- }) };
747
+ return;
748
+ }
749
+ let emitted = false;
750
+ const { threadId, runId } = await this.ensureThreadStarted(selectedAgentId, binding, options.input, options.threadId);
751
+ await this.persistence.saveRunRequest(threadId, runId, this.buildPersistedRunRequest(options.input, invocation));
752
+ yield { type: "event", event: await this.emitRunCreated(threadId, runId, {
753
+ agentId: selectedAgentId,
754
+ requestedAgentId: options.agentId ?? AUTO_AGENT_ID,
755
+ selectedAgentId,
756
+ input: options.input,
757
+ state: "running",
758
+ }) };
759
+ const releaseRunSlot = await this.acquireRunSlot(threadId, runId);
760
+ try {
672
761
  try {
673
762
  const priorHistory = await this.loadPriorHistory(threadId, runId);
674
763
  let assistantOutput = "";
@@ -686,11 +775,11 @@ export class AgentHarnessRuntime {
686
775
  : chunk;
687
776
  if (normalizedChunk.kind === "interrupt") {
688
777
  const checkpointRef = `checkpoints/${threadId}/${runId}/cp-1`;
689
- const waitingEvent = await this.setRunStateAndEmit(threadId, runId, 4, "waiting_for_approval", {
690
- previousState: null,
778
+ const waitingEvent = await this.setRunStateAndEmit(threadId, runId, 6, "waiting_for_approval", {
779
+ previousState: "running",
691
780
  checkpointRef,
692
781
  });
693
- const approvalRequest = await this.requestApprovalAndEmit(threadId, runId, options.input, normalizedChunk.content, checkpointRef, 5);
782
+ const approvalRequest = await this.requestApprovalAndEmit(threadId, runId, options.input, normalizedChunk.content, checkpointRef, 7);
694
783
  yield {
695
784
  type: "event",
696
785
  event: waitingEvent,
@@ -783,22 +872,22 @@ export class AgentHarnessRuntime {
783
872
  finalMessageText: assistantOutput,
784
873
  },
785
874
  };
786
- yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 4, "completed", {
787
- previousState: null,
875
+ yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 6, "completed", {
876
+ previousState: "running",
788
877
  }) };
789
878
  return;
790
879
  }
791
880
  catch (error) {
792
881
  if (emitted) {
793
- yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 4, "failed", {
794
- previousState: null,
882
+ yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 6, "failed", {
883
+ previousState: "running",
795
884
  error: error instanceof Error ? error.message : String(error),
796
885
  }) };
797
886
  return;
798
887
  }
799
888
  if (error instanceof RuntimeOperationTimeoutError && error.stage === "invoke") {
800
- yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 4, "failed", {
801
- previousState: null,
889
+ yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 6, "failed", {
890
+ previousState: "running",
802
891
  error: error.message,
803
892
  }) };
804
893
  yield {
@@ -836,15 +925,15 @@ export class AgentHarnessRuntime {
836
925
  agentId: selectedAgentId,
837
926
  },
838
927
  };
839
- yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 4, actual.state, {
840
- previousState: null,
928
+ yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 6, actual.state, {
929
+ previousState: "running",
841
930
  }) };
842
931
  return;
843
932
  }
844
933
  catch (invokeError) {
845
934
  await this.emitSyntheticFallback(threadId, runId, selectedAgentId, invokeError);
846
- yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 4, "failed", {
847
- previousState: null,
935
+ yield { type: "event", event: await this.setRunStateAndEmit(threadId, runId, 6, "failed", {
936
+ previousState: "running",
848
937
  error: invokeError instanceof Error ? invokeError.message : String(invokeError),
849
938
  }) };
850
939
  yield {
@@ -870,29 +959,30 @@ export class AgentHarnessRuntime {
870
959
  }
871
960
  }
872
961
  finally {
962
+ await this.persistence.clearRunRequest(threadId, runId);
873
963
  releaseRunSlot();
874
964
  }
875
965
  }
876
966
  async resume(options) {
877
- const releaseRunSlot = await this.acquireRunSlot();
967
+ const approvalById = options.approvalId ? await this.persistence.getApproval(options.approvalId) : null;
968
+ const thread = options.threadId
969
+ ? await this.getSession(options.threadId)
970
+ : approvalById
971
+ ? await this.getSession(approvalById.threadId)
972
+ : null;
973
+ if (!thread) {
974
+ throw new Error("resume requires either threadId or approvalId");
975
+ }
976
+ const approval = approvalById ?? await this.resolveApprovalRecord(options, thread);
977
+ const threadId = approval.threadId;
978
+ const runId = approval.runId;
979
+ const binding = this.workspace.bindings.get(thread.agentId);
980
+ if (!binding) {
981
+ throw new Error(`Unknown agent ${thread.agentId}`);
982
+ }
983
+ await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
984
+ const releaseRunSlot = await this.acquireRunSlot(threadId, runId, "resuming");
878
985
  try {
879
- const approvalById = options.approvalId ? await this.persistence.getApproval(options.approvalId) : null;
880
- const thread = options.threadId
881
- ? await this.getSession(options.threadId)
882
- : approvalById
883
- ? await this.getSession(approvalById.threadId)
884
- : null;
885
- if (!thread) {
886
- throw new Error("resume requires either threadId or approvalId");
887
- }
888
- const approval = approvalById ?? await this.resolveApprovalRecord(options, thread);
889
- const threadId = approval.threadId;
890
- const runId = approval.runId;
891
- const binding = this.workspace.bindings.get(thread.agentId);
892
- if (!binding) {
893
- throw new Error(`Unknown agent ${thread.agentId}`);
894
- }
895
- await this.persistence.setRunState(threadId, runId, "resuming", `checkpoints/${threadId}/${runId}/cp-1`);
896
986
  await this.persistence.saveRecoveryIntent(threadId, runId, {
897
987
  kind: "approval-decision",
898
988
  savedAt: new Date().toISOString(),
@@ -978,12 +1068,72 @@ export class AgentHarnessRuntime {
978
1068
  await this.close();
979
1069
  }
980
1070
  async recoverStartupRuns() {
981
- if (!this.recoveryConfig.enabled || !this.recoveryConfig.resumeResumingRunsOnStartup) {
1071
+ if (!this.recoveryConfig.enabled) {
982
1072
  return;
983
1073
  }
984
1074
  const threads = await this.persistence.listSessions();
985
1075
  for (const thread of threads) {
986
- if (thread.status !== "resuming") {
1076
+ if (thread.status === "queued") {
1077
+ const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
1078
+ const binding = this.workspace.bindings.get(runMeta.agentId);
1079
+ if (!binding) {
1080
+ continue;
1081
+ }
1082
+ const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
1083
+ if (!request) {
1084
+ await this.setRunStateAndEmit(thread.threadId, thread.latestRunId, 100, "failed", {
1085
+ previousState: "queued",
1086
+ error: "missing persisted run request for queued run recovery",
1087
+ });
1088
+ continue;
1089
+ }
1090
+ const releaseRunSlot = await this.acquireRunSlot();
1091
+ try {
1092
+ await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
1093
+ context: request.invocation?.context,
1094
+ state: request.invocation?.inputs,
1095
+ files: request.invocation?.attachments,
1096
+ previousState: "queued",
1097
+ stateSequence: 103,
1098
+ approvalSequence: 104,
1099
+ });
1100
+ }
1101
+ finally {
1102
+ releaseRunSlot();
1103
+ }
1104
+ continue;
1105
+ }
1106
+ if (thread.status === "running") {
1107
+ const runMeta = await this.persistence.getRunMeta(thread.threadId, thread.latestRunId);
1108
+ const binding = this.workspace.bindings.get(runMeta.agentId);
1109
+ if (!binding || !this.supportsRunningReplay(binding)) {
1110
+ continue;
1111
+ }
1112
+ const request = await this.persistence.getRunRequest(thread.threadId, thread.latestRunId);
1113
+ if (!request) {
1114
+ continue;
1115
+ }
1116
+ const releaseRunSlot = await this.acquireRunSlot();
1117
+ try {
1118
+ await this.emit(thread.threadId, thread.latestRunId, 100, "run.resumed", {
1119
+ resumeKind: "startup-running-recovery",
1120
+ state: "running",
1121
+ });
1122
+ await this.executeQueuedRun(binding, request.input, thread.threadId, thread.latestRunId, runMeta.agentId, {
1123
+ context: request.invocation?.context,
1124
+ state: request.invocation?.inputs,
1125
+ files: request.invocation?.attachments,
1126
+ previousState: "running",
1127
+ stateSequence: 103,
1128
+ approvalSequence: 104,
1129
+ });
1130
+ }
1131
+ finally {
1132
+ releaseRunSlot();
1133
+ }
1134
+ continue;
1135
+ }
1136
+ if (thread.status !== "resuming" || !this.recoveryConfig.resumeResumingRunsOnStartup) {
987
1137
  continue;
988
1138
  }
989
1139
  const binding = this.workspace.bindings.get(thread.agentId);
@@ -44,6 +44,8 @@ function renderOpenApprovalsMarkdown(approvals) {
44
44
  }
45
45
  const THREAD_MEMORY_EVENT_TYPES = new Set([
46
46
  "run.state.changed",
47
+ "run.queued",
48
+ "run.dequeued",
47
49
  "approval.resolved",
48
50
  "approval.requested",
49
51
  ]);
@@ -9,6 +9,7 @@ export type LoadedToolModule = {
9
9
  invoke: (input: unknown, context?: Record<string, unknown>) => Promise<unknown> | unknown;
10
10
  schema: SchemaLike;
11
11
  description: string;
12
+ retryable?: boolean;
12
13
  };
13
14
  export declare function isSupportedToolModulePath(filePath: string): boolean;
14
15
  export declare function discoverAnnotatedFunctionNames(sourceText: string): string[];
@@ -76,6 +76,7 @@ function loadToolObjectDefinition(imported, exportName) {
76
76
  invoke: definition.invoke,
77
77
  schema: normalizeToolSchema(definition.schema),
78
78
  description: definition.description.trim(),
79
+ retryable: definition.retryable === true,
79
80
  };
80
81
  }
81
82
  export function isSupportedToolModulePath(filePath) {
@@ -113,6 +114,11 @@ export function discoverToolModuleDefinitions(sourceText, imported) {
113
114
  invoke: invoke,
114
115
  schema,
115
116
  description: readToolDescription(imported, implementationName, schema),
117
+ retryable: typeof imported[`${implementationName}Retryable`] === "boolean"
118
+ ? imported[`${implementationName}Retryable`] === true
119
+ : typeof imported.retryable === "boolean"
120
+ ? imported.retryable === true
121
+ : undefined,
116
122
  });
117
123
  }
118
124
  return discovered;
@@ -139,5 +145,10 @@ export function loadToolModuleDefinition(imported, implementationName) {
139
145
  invoke: invoke,
140
146
  schema,
141
147
  description: readToolDescription(imported, implementationName, schema),
148
+ retryable: typeof imported[`${implementationName}Retryable`] === "boolean"
149
+ ? imported[`${implementationName}Retryable`] === true
150
+ : typeof imported.retryable === "boolean"
151
+ ? imported.retryable === true
152
+ : undefined,
142
153
  };
143
154
  }
package/dist/tools.d.ts CHANGED
@@ -5,6 +5,7 @@ export type ToolDefinitionObject = {
5
5
  name?: string;
6
6
  description: string;
7
7
  schema: SchemaInput;
8
+ retryable?: boolean;
8
9
  invoke: (input: unknown, context?: Record<string, unknown>) => Promise<unknown> | unknown;
9
10
  [TOOL_DEFINITION_MARKER]: true;
10
11
  };
@@ -15,6 +16,7 @@ export declare function tool(definition: {
15
16
  name?: string;
16
17
  description: string;
17
18
  schema: SchemaInput;
19
+ retryable?: boolean;
18
20
  invoke: (input: unknown, context?: Record<string, unknown>) => Promise<unknown> | unknown;
19
21
  }): ToolDefinitionObject;
20
22
  export {};
@@ -566,6 +566,7 @@ export async function readToolModuleItems(root) {
566
566
  name: definition.implementationName,
567
567
  description: definition.description,
568
568
  implementationName: definition.implementationName,
569
+ ...(definition.retryable !== undefined ? { retryable: definition.retryable } : {}),
569
570
  },
570
571
  sourcePath: filePath,
571
572
  });
@@ -254,6 +254,7 @@ export function parseToolObject(object) {
254
254
  : undefined,
255
255
  bundleRefs,
256
256
  hitl: parseHitlPolicy(value.hitl),
257
+ retryable: value.retryable === true,
257
258
  sourcePath: object.sourcePath,
258
259
  };
259
260
  }
@@ -22,7 +22,7 @@ export type RecoveryConfig = {
22
22
  maxRecoveryAttempts: number;
23
23
  };
24
24
  export type ConcurrencyConfig = {
25
- maxConcurrentRuns?: number;
25
+ maxConcurrentRuns: number;
26
26
  };
27
27
  export declare function getWorkspaceObject(refs: Map<string, WorkspaceObject | ParsedAgentObject>, ref: string | undefined): WorkspaceObject | undefined;
28
28
  export declare function getRuntimeDefaults(refs: Map<string, WorkspaceObject | ParsedAgentObject>): Record<string, unknown> | undefined;
@@ -62,7 +62,7 @@ export function getConcurrencyConfig(refs) {
62
62
  Number.isFinite(concurrency.maxConcurrentRuns) &&
63
63
  concurrency.maxConcurrentRuns > 0
64
64
  ? Math.floor(concurrency.maxConcurrentRuns)
65
- : undefined;
65
+ : 3;
66
66
  return { maxConcurrentRuns };
67
67
  }
68
68
  export function getRoutingSystemPrompt(refs) {
@@ -105,6 +105,7 @@ export async function hydrateResourceAndExternalTools(tools, toolSourceRefs, wor
105
105
  backendOperation: existing?.backendOperation ?? resourceTool.backendOperation,
106
106
  bundleRefs: existing?.bundleRefs ?? [],
107
107
  hitl: existing?.hitl ?? resourceTool.hitl,
108
+ retryable: existing?.retryable ?? resourceTool.retryable,
108
109
  sourcePath: existing?.sourcePath ?? resourceTool.toolPath,
109
110
  });
110
111
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botbotgo/agent-harness",
3
- "version": "0.0.45",
3
+ "version": "0.0.46",
4
4
  "description": "Workspace runtime for multi-agent applications",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",