@mcpmesh/sdk 1.3.4 → 2.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/dist/__tests__/a2a/a2a-bearer.spec.d.ts +2 -0
  2. package/dist/__tests__/a2a/a2a-bearer.spec.d.ts.map +1 -0
  3. package/dist/__tests__/a2a/a2a-bearer.spec.js +58 -0
  4. package/dist/__tests__/a2a/a2a-bearer.spec.js.map +1 -0
  5. package/dist/__tests__/a2a/a2a-client.spec.d.ts +2 -0
  6. package/dist/__tests__/a2a/a2a-client.spec.d.ts.map +1 -0
  7. package/dist/__tests__/a2a/a2a-client.spec.js +334 -0
  8. package/dist/__tests__/a2a/a2a-client.spec.js.map +1 -0
  9. package/dist/__tests__/a2a/a2a-job.spec.d.ts +2 -0
  10. package/dist/__tests__/a2a/a2a-job.spec.d.ts.map +1 -0
  11. package/dist/__tests__/a2a/a2a-job.spec.js +255 -0
  12. package/dist/__tests__/a2a/a2a-job.spec.js.map +1 -0
  13. package/dist/__tests__/a2a/a2a-stream.spec.d.ts +2 -0
  14. package/dist/__tests__/a2a/a2a-stream.spec.d.ts.map +1 -0
  15. package/dist/__tests__/a2a/a2a-stream.spec.js +278 -0
  16. package/dist/__tests__/a2a/a2a-stream.spec.js.map +1 -0
  17. package/dist/__tests__/a2a/agent-a2a-config.spec.d.ts +2 -0
  18. package/dist/__tests__/a2a/agent-a2a-config.spec.d.ts.map +1 -0
  19. package/dist/__tests__/a2a/agent-a2a-config.spec.js +262 -0
  20. package/dist/__tests__/a2a/agent-a2a-config.spec.js.map +1 -0
  21. package/dist/__tests__/a2a/producer/auth-filter.spec.d.ts +2 -0
  22. package/dist/__tests__/a2a/producer/auth-filter.spec.d.ts.map +1 -0
  23. package/dist/__tests__/a2a/producer/auth-filter.spec.js +127 -0
  24. package/dist/__tests__/a2a/producer/auth-filter.spec.js.map +1 -0
  25. package/dist/__tests__/a2a/producer/card-builder.spec.d.ts +2 -0
  26. package/dist/__tests__/a2a/producer/card-builder.spec.d.ts.map +1 -0
  27. package/dist/__tests__/a2a/producer/card-builder.spec.js +113 -0
  28. package/dist/__tests__/a2a/producer/card-builder.spec.js.map +1 -0
  29. package/dist/__tests__/a2a/producer/dispatcher.spec.d.ts +2 -0
  30. package/dist/__tests__/a2a/producer/dispatcher.spec.d.ts.map +1 -0
  31. package/dist/__tests__/a2a/producer/dispatcher.spec.js +850 -0
  32. package/dist/__tests__/a2a/producer/dispatcher.spec.js.map +1 -0
  33. package/dist/__tests__/a2a/producer/mount-surface-push.spec.d.ts +2 -0
  34. package/dist/__tests__/a2a/producer/mount-surface-push.spec.d.ts.map +1 -0
  35. package/dist/__tests__/a2a/producer/mount-surface-push.spec.js +164 -0
  36. package/dist/__tests__/a2a/producer/mount-surface-push.spec.js.map +1 -0
  37. package/dist/__tests__/a2a/producer/mount.spec.d.ts +2 -0
  38. package/dist/__tests__/a2a/producer/mount.spec.d.ts.map +1 -0
  39. package/dist/__tests__/a2a/producer/mount.spec.js +433 -0
  40. package/dist/__tests__/a2a/producer/mount.spec.js.map +1 -0
  41. package/dist/__tests__/a2a/producer/public-url-cache.spec.d.ts +2 -0
  42. package/dist/__tests__/a2a/producer/public-url-cache.spec.d.ts.map +1 -0
  43. package/dist/__tests__/a2a/producer/public-url-cache.spec.js +116 -0
  44. package/dist/__tests__/a2a/producer/public-url-cache.spec.js.map +1 -0
  45. package/dist/__tests__/a2a/producer/sse-emitter.spec.d.ts +2 -0
  46. package/dist/__tests__/a2a/producer/sse-emitter.spec.d.ts.map +1 -0
  47. package/dist/__tests__/a2a/producer/sse-emitter.spec.js +754 -0
  48. package/dist/__tests__/a2a/producer/sse-emitter.spec.js.map +1 -0
  49. package/dist/__tests__/a2a/producer/state-translator.spec.d.ts +2 -0
  50. package/dist/__tests__/a2a/producer/state-translator.spec.d.ts.map +1 -0
  51. package/dist/__tests__/a2a/producer/state-translator.spec.js +124 -0
  52. package/dist/__tests__/a2a/producer/state-translator.spec.js.map +1 -0
  53. package/dist/__tests__/a2a/producer/task-store.spec.d.ts +2 -0
  54. package/dist/__tests__/a2a/producer/task-store.spec.d.ts.map +1 -0
  55. package/dist/__tests__/a2a/producer/task-store.spec.js +180 -0
  56. package/dist/__tests__/a2a/producer/task-store.spec.js.map +1 -0
  57. package/dist/__tests__/agent-add-tool.spec.d.ts +2 -0
  58. package/dist/__tests__/agent-add-tool.spec.d.ts.map +1 -0
  59. package/dist/__tests__/agent-add-tool.spec.js +483 -0
  60. package/dist/__tests__/agent-add-tool.spec.js.map +1 -0
  61. package/dist/__tests__/api-runtime-race.spec.d.ts +2 -0
  62. package/dist/__tests__/api-runtime-race.spec.d.ts.map +1 -0
  63. package/dist/__tests__/api-runtime-race.spec.js +193 -0
  64. package/dist/__tests__/api-runtime-race.spec.js.map +1 -0
  65. package/dist/__tests__/claim-dispatcher.spec.d.ts +2 -0
  66. package/dist/__tests__/claim-dispatcher.spec.d.ts.map +1 -0
  67. package/dist/__tests__/claim-dispatcher.spec.js +408 -0
  68. package/dist/__tests__/claim-dispatcher.spec.js.map +1 -0
  69. package/dist/__tests__/inbound-job-dispatch.spec.d.ts +2 -0
  70. package/dist/__tests__/inbound-job-dispatch.spec.d.ts.map +1 -0
  71. package/dist/__tests__/inbound-job-dispatch.spec.js +185 -0
  72. package/dist/__tests__/inbound-job-dispatch.spec.js.map +1 -0
  73. package/dist/__tests__/job-controller-progress.spec.d.ts +2 -0
  74. package/dist/__tests__/job-controller-progress.spec.d.ts.map +1 -0
  75. package/dist/__tests__/job-controller-progress.spec.js +85 -0
  76. package/dist/__tests__/job-controller-progress.spec.js.map +1 -0
  77. package/dist/__tests__/jobs-cancel-route.spec.d.ts +2 -0
  78. package/dist/__tests__/jobs-cancel-route.spec.d.ts.map +1 -0
  79. package/dist/__tests__/jobs-cancel-route.spec.js +88 -0
  80. package/dist/__tests__/jobs-cancel-route.spec.js.map +1 -0
  81. package/dist/__tests__/llm-agent-stream.test.d.ts +14 -0
  82. package/dist/__tests__/llm-agent-stream.test.d.ts.map +1 -0
  83. package/dist/__tests__/llm-agent-stream.test.js +341 -0
  84. package/dist/__tests__/llm-agent-stream.test.js.map +1 -0
  85. package/dist/__tests__/llm-provider.test.js +22 -1
  86. package/dist/__tests__/llm-provider.test.js.map +1 -1
  87. package/dist/__tests__/media-resolver.test.js +40 -0
  88. package/dist/__tests__/media-resolver.test.js.map +1 -1
  89. package/dist/__tests__/mesh-job-submitter.spec.d.ts +2 -0
  90. package/dist/__tests__/mesh-job-submitter.spec.d.ts.map +1 -0
  91. package/dist/__tests__/mesh-job-submitter.spec.js +110 -0
  92. package/dist/__tests__/mesh-job-submitter.spec.js.map +1 -0
  93. package/dist/__tests__/proxy-stream.test.d.ts +9 -0
  94. package/dist/__tests__/proxy-stream.test.d.ts.map +1 -0
  95. package/dist/__tests__/proxy-stream.test.js +347 -0
  96. package/dist/__tests__/proxy-stream.test.js.map +1 -0
  97. package/dist/__tests__/resolver-meshjob.spec.d.ts +26 -0
  98. package/dist/__tests__/resolver-meshjob.spec.d.ts.map +1 -0
  99. package/dist/__tests__/resolver-meshjob.spec.js +201 -0
  100. package/dist/__tests__/resolver-meshjob.spec.js.map +1 -0
  101. package/dist/__tests__/schema-verdict-policy.test.d.ts +6 -0
  102. package/dist/__tests__/schema-verdict-policy.test.d.ts.map +1 -0
  103. package/dist/__tests__/schema-verdict-policy.test.js +126 -0
  104. package/dist/__tests__/schema-verdict-policy.test.js.map +1 -0
  105. package/dist/__tests__/sse-stream.test.d.ts +12 -0
  106. package/dist/__tests__/sse-stream.test.d.ts.map +1 -0
  107. package/dist/__tests__/sse-stream.test.js +170 -0
  108. package/dist/__tests__/sse-stream.test.js.map +1 -0
  109. package/dist/a2a/a2a-bearer.d.ts +27 -0
  110. package/dist/a2a/a2a-bearer.d.ts.map +1 -0
  111. package/dist/a2a/a2a-bearer.js +63 -0
  112. package/dist/a2a/a2a-bearer.js.map +1 -0
  113. package/dist/a2a/a2a-client.d.ts +114 -0
  114. package/dist/a2a/a2a-client.d.ts.map +1 -0
  115. package/dist/a2a/a2a-client.js +405 -0
  116. package/dist/a2a/a2a-client.js.map +1 -0
  117. package/dist/a2a/a2a-event.d.ts +25 -0
  118. package/dist/a2a/a2a-event.d.ts.map +1 -0
  119. package/dist/a2a/a2a-event.js +9 -0
  120. package/dist/a2a/a2a-event.js.map +1 -0
  121. package/dist/a2a/a2a-job.d.ts +58 -0
  122. package/dist/a2a/a2a-job.d.ts.map +1 -0
  123. package/dist/a2a/a2a-job.js +264 -0
  124. package/dist/a2a/a2a-job.js.map +1 -0
  125. package/dist/a2a/a2a-stream.d.ts +39 -0
  126. package/dist/a2a/a2a-stream.d.ts.map +1 -0
  127. package/dist/a2a/a2a-stream.js +290 -0
  128. package/dist/a2a/a2a-stream.js.map +1 -0
  129. package/dist/a2a/errors.d.ts +29 -0
  130. package/dist/a2a/errors.d.ts.map +1 -0
  131. package/dist/a2a/errors.js +48 -0
  132. package/dist/a2a/errors.js.map +1 -0
  133. package/dist/a2a/index.d.ts +12 -0
  134. package/dist/a2a/index.d.ts.map +1 -0
  135. package/dist/a2a/index.js +11 -0
  136. package/dist/a2a/index.js.map +1 -0
  137. package/dist/a2a/producer/auth-filter.d.ts +34 -0
  138. package/dist/a2a/producer/auth-filter.d.ts.map +1 -0
  139. package/dist/a2a/producer/auth-filter.js +39 -0
  140. package/dist/a2a/producer/auth-filter.js.map +1 -0
  141. package/dist/a2a/producer/card-builder.d.ts +59 -0
  142. package/dist/a2a/producer/card-builder.d.ts.map +1 -0
  143. package/dist/a2a/producer/card-builder.js +59 -0
  144. package/dist/a2a/producer/card-builder.js.map +1 -0
  145. package/dist/a2a/producer/dispatcher.d.ts +276 -0
  146. package/dist/a2a/producer/dispatcher.d.ts.map +1 -0
  147. package/dist/a2a/producer/dispatcher.js +896 -0
  148. package/dist/a2a/producer/dispatcher.js.map +1 -0
  149. package/dist/a2a/producer/index.d.ts +26 -0
  150. package/dist/a2a/producer/index.d.ts.map +1 -0
  151. package/dist/a2a/producer/index.js +23 -0
  152. package/dist/a2a/producer/index.js.map +1 -0
  153. package/dist/a2a/producer/mount.d.ts +75 -0
  154. package/dist/a2a/producer/mount.d.ts.map +1 -0
  155. package/dist/a2a/producer/mount.js +422 -0
  156. package/dist/a2a/producer/mount.js.map +1 -0
  157. package/dist/a2a/producer/public-url-cache.d.ts +73 -0
  158. package/dist/a2a/producer/public-url-cache.d.ts.map +1 -0
  159. package/dist/a2a/producer/public-url-cache.js +0 -0
  160. package/dist/a2a/producer/public-url-cache.js.map +1 -0
  161. package/dist/a2a/producer/registry.d.ts +138 -0
  162. package/dist/a2a/producer/registry.d.ts.map +1 -0
  163. package/dist/a2a/producer/registry.js +117 -0
  164. package/dist/a2a/producer/registry.js.map +1 -0
  165. package/dist/a2a/producer/sse-emitter.d.ts +85 -0
  166. package/dist/a2a/producer/sse-emitter.d.ts.map +1 -0
  167. package/dist/a2a/producer/sse-emitter.js +405 -0
  168. package/dist/a2a/producer/sse-emitter.js.map +1 -0
  169. package/dist/a2a/producer/state-translator.d.ts +63 -0
  170. package/dist/a2a/producer/state-translator.d.ts.map +1 -0
  171. package/dist/a2a/producer/state-translator.js +108 -0
  172. package/dist/a2a/producer/state-translator.js.map +1 -0
  173. package/dist/a2a/producer/task-store.d.ts +128 -0
  174. package/dist/a2a/producer/task-store.d.ts.map +1 -0
  175. package/dist/a2a/producer/task-store.js +128 -0
  176. package/dist/a2a/producer/task-store.js.map +1 -0
  177. package/dist/agent.d.ts +99 -0
  178. package/dist/agent.d.ts.map +1 -1
  179. package/dist/agent.js +754 -19
  180. package/dist/agent.js.map +1 -1
  181. package/dist/api-runtime.d.ts +25 -0
  182. package/dist/api-runtime.d.ts.map +1 -1
  183. package/dist/api-runtime.js +75 -2
  184. package/dist/api-runtime.js.map +1 -1
  185. package/dist/claim-dispatcher.d.ts +126 -0
  186. package/dist/claim-dispatcher.d.ts.map +1 -0
  187. package/dist/claim-dispatcher.js +478 -0
  188. package/dist/claim-dispatcher.js.map +1 -0
  189. package/dist/express.d.ts.map +1 -1
  190. package/dist/express.js +33 -6
  191. package/dist/express.js.map +1 -1
  192. package/dist/inbound-job-dispatch.d.ts +105 -0
  193. package/dist/inbound-job-dispatch.d.ts.map +1 -0
  194. package/dist/inbound-job-dispatch.js +335 -0
  195. package/dist/inbound-job-dispatch.js.map +1 -0
  196. package/dist/index.d.ts +40 -4
  197. package/dist/index.d.ts.map +1 -1
  198. package/dist/index.js +40 -3
  199. package/dist/index.js.map +1 -1
  200. package/dist/job-context.d.ts +107 -0
  201. package/dist/job-context.d.ts.map +1 -0
  202. package/dist/job-context.js +95 -0
  203. package/dist/job-context.js.map +1 -0
  204. package/dist/jobs-cancel-route.d.ts +36 -0
  205. package/dist/jobs-cancel-route.d.ts.map +1 -0
  206. package/dist/jobs-cancel-route.js +60 -0
  207. package/dist/jobs-cancel-route.js.map +1 -0
  208. package/dist/jobs-helper-tools.d.ts +48 -0
  209. package/dist/jobs-helper-tools.d.ts.map +1 -0
  210. package/dist/jobs-helper-tools.js +133 -0
  211. package/dist/jobs-helper-tools.js.map +1 -0
  212. package/dist/llm-agent.d.ts +62 -53
  213. package/dist/llm-agent.d.ts.map +1 -1
  214. package/dist/llm-agent.js +211 -292
  215. package/dist/llm-agent.js.map +1 -1
  216. package/dist/llm-provider.d.ts +11 -4
  217. package/dist/llm-provider.d.ts.map +1 -1
  218. package/dist/llm-provider.js +57 -4
  219. package/dist/llm-provider.js.map +1 -1
  220. package/dist/llm.d.ts +4 -1
  221. package/dist/llm.d.ts.map +1 -1
  222. package/dist/llm.js +7 -17
  223. package/dist/llm.js.map +1 -1
  224. package/dist/media/resolver.d.ts.map +1 -1
  225. package/dist/media/resolver.js +3 -2
  226. package/dist/media/resolver.js.map +1 -1
  227. package/dist/mesh-job-submitter.d.ts +83 -0
  228. package/dist/mesh-job-submitter.d.ts.map +1 -0
  229. package/dist/mesh-job-submitter.js +143 -0
  230. package/dist/mesh-job-submitter.js.map +1 -0
  231. package/dist/provider-handlers/gemini-handler.js +5 -0
  232. package/dist/provider-handlers/gemini-handler.js.map +1 -1
  233. package/dist/proxy.d.ts +40 -0
  234. package/dist/proxy.d.ts.map +1 -1
  235. package/dist/proxy.js +375 -2
  236. package/dist/proxy.js.map +1 -1
  237. package/dist/resolver-meshjob.d.ts +170 -0
  238. package/dist/resolver-meshjob.d.ts.map +1 -0
  239. package/dist/resolver-meshjob.js +159 -0
  240. package/dist/resolver-meshjob.js.map +1 -0
  241. package/dist/route.d.ts +4 -0
  242. package/dist/route.d.ts.map +1 -1
  243. package/dist/route.js.map +1 -1
  244. package/dist/schema-normalize.d.ts +62 -0
  245. package/dist/schema-normalize.d.ts.map +1 -0
  246. package/dist/schema-normalize.js +128 -0
  247. package/dist/schema-normalize.js.map +1 -0
  248. package/dist/sse-stream.d.ts +44 -0
  249. package/dist/sse-stream.d.ts.map +1 -0
  250. package/dist/sse-stream.js +173 -0
  251. package/dist/sse-stream.js.map +1 -0
  252. package/dist/tool-worker-entry.d.ts +21 -0
  253. package/dist/tool-worker-entry.d.ts.map +1 -0
  254. package/dist/tool-worker-entry.js +162 -0
  255. package/dist/tool-worker-entry.js.map +1 -0
  256. package/dist/tool-worker-pool.d.ts +49 -0
  257. package/dist/tool-worker-pool.d.ts.map +1 -0
  258. package/dist/tool-worker-pool.js +272 -0
  259. package/dist/tool-worker-pool.js.map +1 -0
  260. package/dist/types.d.ts +351 -9
  261. package/dist/types.d.ts.map +1 -1
  262. package/package.json +5 -3
package/dist/agent.js CHANGED
@@ -9,16 +9,65 @@
9
9
  * - Graceful shutdown
10
10
  */
11
11
  import { zodToJsonSchema } from "zod-to-json-schema";
12
+ import { isMainThread } from "node:worker_threads";
12
13
  import { startAgent, } from "@mcpmesh/core";
13
14
  import { resolveConfig, generateAgentIdSuffix, findAvailablePort } from "./config.js";
14
15
  import { enrichSchemaWithMediaTypes } from "./media-param.js";
15
- import { createProxy, normalizeDependency, runWithTraceContext, runWithPropagatedHeaders } from "./proxy.js";
16
+ import { createProxy, normalizeDependency, runWithTraceContext, runWithPropagatedHeaders, PROXY_DISPATCH_META } from "./proxy.js";
17
+ import { readJobHeaders, runWithJobContext, makeJobController, spliceJobController, } from "./inbound-job-dispatch.js";
18
+ import { MeshJobSubmitter } from "./mesh-job-submitter.js";
19
+ import { ClaimDispatcher } from "./claim-dispatcher.js";
20
+ import { registerJobHelperTools } from "./jobs-helper-tools.js";
21
+ import { registerCancelRoute } from "./jobs-cancel-route.js";
22
+ import { clusterStrictEnabled, normalizeSchemaWithPolicy, } from "./schema-normalize.js";
16
23
  import { initTracing, generateTraceId, generateSpanId, publishTraceSpan, matchesPropagateHeader, } from "./tracing.js";
17
24
  import { buildLlmAgentSpecs, handleLlmToolsUpdated, handleLlmProviderAvailable, handleLlmProviderUnavailable, LlmToolRegistry, } from "./llm.js";
18
25
  import { llmProvider, getLlmProviderMeta } from "./llm-provider.js";
19
26
  import { findAndSetBasePath } from "./template.js";
20
27
  import { getTlsOptions, getTlsConfigCached, prepareTls, cleanupTls } from "./tls-config.js";
21
28
  import { closeHttpPool } from "./http-pool.js";
29
+ import { dispatch as poolDispatch, closePool } from "./tool-worker-pool.js";
30
+ import { A2AClient, A2ABearer, } from "./a2a/index.js";
31
+ /**
32
+ * Globally-set symbol that user agent code can check to detect whether it
33
+ * is running inside a mesh tool-isolation worker. The mesh runtime sets
34
+ * this on globalThis BEFORE importing the user module in worker mode.
35
+ *
36
+ * Use this to guard module-top-level side effects that should run only in
37
+ * the main process — e.g. HTTP servers you start manually, OpenTelemetry
38
+ * SDK init, prometheus registries, file watchers, etc.:
39
+ *
40
+ * if (!globalThis[Symbol.for("@mcpmesh/sdk/in-worker")]) {
41
+ * await myCustomServer.listen(8081);
42
+ * myMetrics.start();
43
+ * }
44
+ *
45
+ * mesh's own setup (FastMCP server start, Express health endpoints,
46
+ * registry heartbeat) is automatically guarded; users only need this
47
+ * symbol if they have their own top-level side effects.
48
+ */
49
+ export const IN_WORKER_SYMBOL = Symbol.for("@mcpmesh/sdk/in-worker");
50
+ // Worker-mode detection: when this module is loaded inside a worker_threads
51
+ // Worker, we skip all main-thread init (HTTP server, registry heartbeat, etc.)
52
+ // and only collect tool functions into _workerToolMap for the worker entry to
53
+ // invoke. The symbol is set by tool-worker-entry.ts before any user import.
54
+ const WORKER_MODE_SYMBOL = Symbol.for("@mcpmesh/sdk/worker-mode");
55
+ const _isWorkerMode = !isMainThread &&
56
+ globalThis[WORKER_MODE_SYMBOL] === true;
57
+ // Module-level worker tool registry: populated by addTool() in worker mode,
58
+ // read by the worker entry via the __getWorkerToolMap() export. Module-level
59
+ // (not class-level) because the worker entry imports the SDK and needs a
60
+ // stable handle independent of which MeshAgent instance the user constructs.
61
+ const _workerToolMap = new Map();
62
+ /**
63
+ * Internal: returns the worker-side tool map.
64
+ *
65
+ * Used exclusively by tool-worker-entry.ts after dynamic-importing the user
66
+ * module. Not part of the public API.
67
+ */
68
+ export function __getWorkerToolMap() {
69
+ return _workerToolMap;
70
+ }
22
71
  // Internal: pending agent for auto-start
23
72
  let pendingAgent = null;
24
73
  let autoStartScheduled = false;
@@ -70,7 +119,68 @@ export class MeshAgent {
70
119
  * different tags/settings without overwriting each other.
71
120
  */
72
121
  resolvedDeps = new Map();
122
+ // True when this MeshAgent is constructed inside a worker_threads Worker.
123
+ // In worker mode addTool() only stashes execute fns and skips all FastMCP /
124
+ // registry / Rust core wiring (no Express port conflict, no double-register).
125
+ _workerMode = false;
126
+ /**
127
+ * Phase 1 MeshJob substrate: per-tool ClaimHandler for `task: true`
128
+ * tools. Indexed by capability so the ClaimDispatcher can look up
129
+ * the local handler without re-traversing the tools map. Populated
130
+ * by addTool() at registration time; consumed by _autoStart() to
131
+ * spawn one dispatcher per task tool.
132
+ *
133
+ * Issue #894: also carries the per-tool retryOn whitelist so the
134
+ * dispatcher can pass it into `runWithJobContext` for the
135
+ * release-lease-on-retry-eligible-throw path.
136
+ */
137
+ _taskHandlers = new Map();
138
+ /**
139
+ * Active claim dispatchers (one per task=true capability). Started
140
+ * during _autoStart(); stopped during shutdown(). Empty for agents
141
+ * that own no task=true tools.
142
+ */
143
+ _claimDispatchers = [];
144
+ /**
145
+ * Issue #917: cache of `A2AClient` instances keyed by their
146
+ * `(url, skillId, auth, timeoutMs)` tuple so multiple consumer
147
+ * tools targeting the same backend share one outbound connection
148
+ * pool. Closed via `close()` on agent shutdown.
149
+ */
150
+ _a2aClients = new Map();
151
+ /**
152
+ * Issue #917: stable opaque IDs for `A2ABearer` instances used in
153
+ * the A2AClient cache key. Bearer fields are private so we cannot
154
+ * fingerprint by content (would also be a security risk — two
155
+ * tools with distinct literal tokens must NEVER share a cache
156
+ * entry). Identity-based keying is the safe default. `WeakMap`
157
+ * lets bearers be GC'd when the registering tool is removed.
158
+ */
159
+ _bearerIds = new WeakMap();
160
+ _nextBearerId = 0;
73
161
  constructor(server, config) {
162
+ if (_isWorkerMode) {
163
+ // Worker thread: skip ALL init. Only addTool() runs (in worker-mode
164
+ // branch) to populate the module-level _workerToolMap. The worker
165
+ // entry imports the SDK + user module purely to discover tools — it
166
+ // never calls server.start(), startAgent(), or scheduleAutoStart().
167
+ this._workerMode = true;
168
+ // Initialize required fields to satisfy "definitely assigned" without
169
+ // triggering any side effects. None of these are read in worker mode.
170
+ this.server = server;
171
+ this.config = {
172
+ name: config.name,
173
+ version: "0.0.0",
174
+ description: "",
175
+ httpPort: 0,
176
+ httpHost: "127.0.0.1",
177
+ namespace: "default",
178
+ registryUrl: "",
179
+ heartbeatInterval: 0,
180
+ };
181
+ this.agentId = "";
182
+ return;
183
+ }
74
184
  this.server = server;
75
185
  // Resolve config with env var precedence: ENV > config > defaults
76
186
  this.config = resolveConfig(config);
@@ -90,13 +200,224 @@ export class MeshAgent {
90
200
  addTool(def) {
91
201
  const toolName = def.name;
92
202
  const execute = def.execute;
203
+ // Phase 1 MeshJob substrate: validate `task: true` requires an
204
+ // async function. Long-running tools need a Promise-based control
205
+ // flow so the dispatch wrapper (Phase B) can await
206
+ // `MeshJob.updateProgress()` / cancellation / outbound polling.
207
+ // Fail loudly at `addTool` so the developer sees the misuse before
208
+ // the agent even tries to register with the registry.
209
+ //
210
+ // Heuristic: AsyncFunction.constructor.name === "AsyncFunction".
211
+ // We only flag the obvious sync case (an arrow/function literal)
212
+ // — any function returning a Promise will pass this check, which
213
+ // is the right relaxation for users who wrap their handler in a
214
+ // Promise factory.
215
+ if (def.task === true) {
216
+ const ctorName = execute
217
+ ?.constructor?.name;
218
+ if (ctorName !== "AsyncFunction") {
219
+ // We can't reliably detect Promise-returning sync functions
220
+ // without invoking them, but we CAN reject the unambiguous
221
+ // "function() { ... }" case where the developer probably
222
+ // forgot the `async` keyword.
223
+ if (ctorName === "Function") {
224
+ throw new Error(`addTool({ task: true }) requires an async execute function; ` +
225
+ `tool '${toolName}' has a sync execute. Mark it 'async' or ` +
226
+ `remove task: true.`);
227
+ }
228
+ // Other constructor names (GeneratorFunction, etc.) are
229
+ // unusual; let them through with a console warning rather
230
+ // than blocking — the dispatch wrapper will surface any actual
231
+ // misuse at first invocation.
232
+ }
233
+ }
234
+ // Phase 1 MeshJob substrate (consumer-side validation): if the
235
+ // tool declares meshJobDepIndex, that index MUST be a non-negative
236
+ // integer pointing to a valid dependency. Catch misuse at
237
+ // registration so the developer doesn't see a confusing TypeError
238
+ // at runtime when the wrapper tries to swap the dep proxy for a
239
+ // submitter. Mirrors the meshJobParamIndex validation below —
240
+ // NaN / fractional / negative values must fail-fast here too.
241
+ if (def.meshJobDepIndex !== undefined) {
242
+ const depCount = (def.dependencies ?? []).length;
243
+ const v = def.meshJobDepIndex;
244
+ const isInt = Number.isInteger(v) && v >= 0;
245
+ if (!isInt) {
246
+ throw new Error(`addTool({ meshJobDepIndex: ${v} }) for tool '${toolName}': ` +
247
+ `meshJobDepIndex must be a non-negative integer (index into ` +
248
+ `dependencies[]), got: ${v}`);
249
+ }
250
+ if (v >= depCount) {
251
+ throw new Error(`addTool({ meshJobDepIndex: ${v} }) for tool ` +
252
+ `'${toolName}' is out of range — the tool declares ${depCount} ` +
253
+ `dependencies. meshJobDepIndex must be a valid index into ` +
254
+ `dependencies[].`);
255
+ }
256
+ }
257
+ // Phase 1 MeshJob substrate (producer-side validation): if the
258
+ // tool declares meshJobParamIndex, that position MUST be a sane
259
+ // integer >= 1. Position 0 is reserved for the args payload, so
260
+ // the controller can only land at sig pos 1+. Without this
261
+ // guard, values 0 / negative / NaN / non-integer silently skip
262
+ // controller injection — the user's handler then sees `null`
263
+ // where it expected a JobController and throws a confusing
264
+ // `TypeError: Cannot read properties of null` at first await.
265
+ //
266
+ // Upper bound is a sanity check: > 10 almost certainly means a
267
+ // typo (no real producer signature has that many params).
268
+ if (def.meshJobParamIndex !== undefined) {
269
+ const v = def.meshJobParamIndex;
270
+ const ok = Number.isInteger(v) && v >= 1 && v <= 10;
271
+ if (!ok) {
272
+ throw new Error(`addTool({ meshJobParamIndex: ${v} }) for tool '${toolName}': ` +
273
+ `meshJobParamIndex must be an integer >= 1 (position of MeshJob ` +
274
+ `param after the args payload), got: ${v}`);
275
+ }
276
+ }
277
+ // Issue #894: validate retryOn at registration so misuse fails loud
278
+ // before the agent talks to the registry. Mirror Python's
279
+ // `mesh.decorators` validation in spirit:
280
+ // - retryOn requires task: true (without the job dispatch wrapper
281
+ // there's no controller to release a lease on, so the kwarg is
282
+ // meaningless);
283
+ // - entries must be Error constructor classes (typeof === "function").
284
+ // We don't filter control-flow exceptions like Python's
285
+ // KeyboardInterrupt / asyncio.CancelledError — JavaScript has no
286
+ // direct equivalent, and AbortError-style cancellation is a legitimate
287
+ // retry trigger for some users. They get to choose.
288
+ if (def.retryOn !== undefined) {
289
+ if (def.task !== true) {
290
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn is only ` +
291
+ `valid with task: true; remove retryOn or set task: true.`);
292
+ }
293
+ if (!Array.isArray(def.retryOn)) {
294
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn must be ` +
295
+ `an array of Error constructor classes (e.g., [TypeError, MyError]).`);
296
+ }
297
+ for (const entry of def.retryOn) {
298
+ // Must be a function that has a prototype (i.e. an actual class
299
+ // or a `function` declaration — not an arrow function), AND must
300
+ // either be Error itself or a subclass. Arrow functions have
301
+ // `prototype === undefined`, so `entry.prototype instanceof Error`
302
+ // is `false` for them — they're rejected by the second check.
303
+ // Without this, `err instanceof <arrow>` at dispatch time would
304
+ // throw `TypeError: Right-hand side of instanceof is not callable`.
305
+ if (typeof entry !== "function") {
306
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn entries ` +
307
+ `must be Error constructor classes (functions); got: ${String(entry)}`);
308
+ }
309
+ if (entry !== Error && !(entry.prototype instanceof Error)) {
310
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn entries ` +
311
+ `must extend Error (or be Error itself); got: ${String(entry)}`);
312
+ }
313
+ }
314
+ }
315
+ // Issue #917: validate a2aConfig at registration time so misuse fails
316
+ // loud BEFORE the agent talks to the registry. Match the Python
317
+ // `mesh.a2a_consumer` and Java `@A2AConsumer` startup-time checks.
318
+ let a2aClient = null;
319
+ if (def.a2aConfig !== undefined) {
320
+ const cfg = def.a2aConfig;
321
+ if (!cfg.url || cfg.url.trim() === "") {
322
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': url must be ` +
323
+ `a non-empty string.`);
324
+ }
325
+ if (cfg.timeoutMs !== undefined) {
326
+ if (!Number.isFinite(cfg.timeoutMs) || cfg.timeoutMs <= 0) {
327
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': timeoutMs ` +
328
+ `must be a finite positive number (got ${cfg.timeoutMs}).`);
329
+ }
330
+ }
331
+ if (cfg.pollIntervalMs !== undefined) {
332
+ if (!Number.isFinite(cfg.pollIntervalMs) || cfg.pollIntervalMs <= 0) {
333
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
334
+ `pollIntervalMs must be a finite positive number ` +
335
+ `(got ${cfg.pollIntervalMs}).`);
336
+ }
337
+ }
338
+ if (cfg.pollIntervalMaxMs !== undefined) {
339
+ if (!Number.isFinite(cfg.pollIntervalMaxMs) ||
340
+ cfg.pollIntervalMaxMs <= 0) {
341
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
342
+ `pollIntervalMaxMs must be a finite positive number ` +
343
+ `(got ${cfg.pollIntervalMaxMs}).`);
344
+ }
345
+ }
346
+ if (cfg.pollIntervalMs !== undefined &&
347
+ cfg.pollIntervalMaxMs !== undefined &&
348
+ cfg.pollIntervalMaxMs < cfg.pollIntervalMs) {
349
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
350
+ `pollIntervalMaxMs (${cfg.pollIntervalMaxMs}) must be >= ` +
351
+ `pollIntervalMs (${cfg.pollIntervalMs}).`);
352
+ }
353
+ if (!this._workerMode) {
354
+ const skillId = cfg.skillId ?? def.capability ?? toolName;
355
+ a2aClient = this._getOrBuildA2AClient({
356
+ url: cfg.url,
357
+ skillId,
358
+ auth: this._buildBearerFromConfig(cfg.auth),
359
+ timeoutMs: cfg.timeoutMs,
360
+ pollIntervalMs: cfg.pollIntervalMs,
361
+ pollIntervalMaxMs: cfg.pollIntervalMaxMs,
362
+ });
363
+ }
364
+ }
365
+ // Worker mode: register the raw execute fn in the worker tool map and
366
+ // skip FastMCP registration, dependency wiring, and metadata storage.
367
+ // The worker entry will look up tools by name when handling dispatched
368
+ // calls from the main thread.
369
+ if (this._workerMode) {
370
+ _workerToolMap.set(toolName, execute);
371
+ return this;
372
+ }
93
373
  // Normalize dependencies
94
374
  const normalizedDeps = (def.dependencies ?? []).map(normalizeDependency);
95
375
  const depEndpoints = normalizedDeps.map((d) => d.capability);
376
+ // Capture for closures — these reads must be live at invocation
377
+ // time (e.g. registryUrl/agentId aren't set yet at addTool time).
378
+ const isTaskTool = def.task === true;
379
+ const meshJobDepIndex = def.meshJobDepIndex;
380
+ const meshJobParamIndex = def.meshJobParamIndex;
381
+ // Issue #894: per-tool retryOn whitelist threaded into both
382
+ // dispatch paths (inbound HTTP wrapper below + ClaimHandler
383
+ // registered in this.taskHandlers). Captured here so the closure
384
+ // sees a stable reference even if def is mutated post-registration.
385
+ const retryOn = def.retryOn;
386
+ // Phase 1 MeshJob substrate: when a job-bound tool exists AND the
387
+ // user explicitly opted into worker isolation via env, log a single
388
+ // warning at registration time. The wrapper force-disables
389
+ // isolation for job-bound tools because controllers + the
390
+ // AsyncLocalStorage / Rust task-local job context don't cross the
391
+ // worker_threads boundary cleanly. Without this log the
392
+ // force-disable was silent — users who set MCP_MESH_TOOL_ISOLATION
393
+ // expected it to apply to every tool.
394
+ const isJobBoundForLog = isTaskTool || meshJobDepIndex !== undefined;
395
+ const isolationEnvSet = typeof process.env.MCP_MESH_TOOL_ISOLATION === "string" &&
396
+ process.env.MCP_MESH_TOOL_ISOLATION.toLowerCase() !== "false";
397
+ if (isJobBoundForLog && isolationEnvSet) {
398
+ console.warn(`[mesh-tool] '${toolName}' has ` +
399
+ (isTaskTool ? "task: true" : `meshJobDepIndex: ${meshJobDepIndex}`) +
400
+ `; worker isolation is disabled for job-bound tools ` +
401
+ `(controllers/AsyncLocalStorage don't cross worker boundaries). ` +
402
+ `Set 'task: true' explicitly if you intend a producer.`);
403
+ }
96
404
  // Create wrapper that injects dependencies positionally and handles tracing
97
405
  const wrappedExecute = async (args) => {
98
406
  // Build positional deps array using composite keys (toolName:dep_index)
99
- const depsArray = normalizedDeps.map((_, depIndex) => this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null);
407
+ // Phase 1 MeshJob substrate (consumer-side): if meshJobDepIndex is
408
+ // set, swap the McpMeshTool proxy at that slot for a
409
+ // MeshJobSubmitter targeting that dep's capability. We bind the
410
+ // submitter to the live registryUrl/agentId so it can submit
411
+ // jobs without needing access to the agent instance.
412
+ const depsArray = normalizedDeps.map((dep, depIndex) => {
413
+ if (depIndex === meshJobDepIndex) {
414
+ // Build the submitter lazily per call so we always pick
415
+ // up the current registryUrl (test harnesses sometimes
416
+ // mutate it between calls).
417
+ return new MeshJobSubmitter(dep.capability, this.agentId, this.config.registryUrl);
418
+ }
419
+ return this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null;
420
+ });
100
421
  const injectedCount = depsArray.filter((d) => d !== null).length;
101
422
  // Extract trace context from arguments (injected by upstream proxy)
102
423
  // This is the fallback mechanism since fastmcp doesn't expose HTTP headers
@@ -137,18 +458,129 @@ export class MeshAgent {
137
458
  let success = true;
138
459
  let error = null;
139
460
  let resultType = "string";
461
+ // Tool isolation: dispatch user execute() onto a worker thread so
462
+ // blocking/long-running calls don't stall the main loop (which serves
463
+ // /health, /ready, FastMCP HTTP, and registry heartbeats). Mirrors the
464
+ // Python implementation in _mcp_mesh/shared/tool_executor.py.
465
+ // Default ON; set MCP_MESH_TOOL_ISOLATION=false to revert to inline
466
+ // execution on the main loop (legacy behavior).
467
+ //
468
+ // Phase 1 MeshJob substrate: force-disable isolation for tools
469
+ // that bind to a JobController or MeshJobSubmitter. The
470
+ // controller/submitter wrap napi-rs handles plus
471
+ // AsyncLocalStorage state that cannot be cleanly serialised
472
+ // across the worker_threads boundary. Running inline on the
473
+ // main loop is the right trade — task=true tools are
474
+ // long-running by definition and benefit less from isolation
475
+ // (their wall-clock time is dominated by the user's `await`s,
476
+ // not CPU bursts that block the event loop).
477
+ // Issue #917: A2A consumer tools force-disable isolation along
478
+ // with job-bound tools. The framework-injected `A2AClient` wraps
479
+ // an undici dispatcher handle that cannot be cleanly serialised
480
+ // across the worker_threads boundary; running inline keeps the
481
+ // cached client + connection pool intact across calls.
482
+ const isA2aBound = a2aClient !== null;
483
+ const isJobBound = isTaskTool || meshJobDepIndex !== undefined;
484
+ const isolationEnabled = !isJobBound &&
485
+ !isA2aBound &&
486
+ (process.env.MCP_MESH_TOOL_ISOLATION ?? "true").toLowerCase() !== "false";
140
487
  try {
141
- // Run tool execution within trace context using AsyncLocalStorage
142
- // This ensures trace context is properly propagated to all async operations
143
- // and isolated between concurrent requests
144
- const result = await runWithTraceContext(traceContext, async () => {
145
- return await runWithPropagatedHeaders(propagatedHeaders, async () => {
146
- return await execute(cleanArgs, ...depsArray);
488
+ let result;
489
+ if (isolationEnabled) {
490
+ // Build serializable depsConfig from depsArray. The worker rebuilds
491
+ // its own proxies (with worker-local undici Agent) via createProxy
492
+ // Python parity, avoids cross-thread proxy state sharing.
493
+ // Read from the non-enumerable Symbol stash so we don't rely on
494
+ // public properties (which we keep non-enumerable to avoid leaking
495
+ // endpoint/customHeaders via JSON.stringify).
496
+ const depsConfig = depsArray.map((d, depIndex) => {
497
+ if (d === null)
498
+ return null;
499
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
500
+ const meta = d[PROXY_DISPATCH_META];
501
+ if (!meta) {
502
+ console.warn(`[mesh] tool '${toolName}' dependency at index ${depIndex} is missing PROXY_DISPATCH_META — ` +
503
+ `this proxy was not created via createProxy() and will arrive as null in the worker. ` +
504
+ `If you are constructing proxies manually, use createProxy() from @mcpmesh/sdk.`);
505
+ return null;
506
+ }
507
+ return {
508
+ endpoint: meta.endpoint,
509
+ capability: meta.capability,
510
+ functionName: meta.functionName,
511
+ kwargs: (meta.kwargs ?? {}),
512
+ };
147
513
  });
148
- });
149
- // Auto-serialize non-string results (like Python SDK does)
150
- // This allows users to return natural types (numbers, objects, arrays)
151
- // without manually calling JSON.stringify() or String()
514
+ result = await poolDispatch({
515
+ toolName,
516
+ cleanArgs,
517
+ depsConfig,
518
+ traceContext,
519
+ propagatedHeaders,
520
+ });
521
+ }
522
+ else {
523
+ // Legacy inline execution on the main thread. Preserved as a clean
524
+ // fallback for users who explicitly opt out of isolation, AND used
525
+ // unconditionally for job-bound tools (see isJobBound above).
526
+ //
527
+ // Phase 1 MeshJob substrate: when this tool is task=true and the
528
+ // inbound headers carry X-Mesh-Job-Id, build a JobController,
529
+ // splice it into the call args at meshJobParamIndex, and run the
530
+ // user function inside both the JS-side ALS (CURRENT_JOB) and the
531
+ // Rust-side task-local (withJobAsync) so cancel-registry binding
532
+ // + outbound header injection work transparently.
533
+ result = await runWithTraceContext(traceContext, async () => {
534
+ return await runWithPropagatedHeaders(propagatedHeaders, async () => {
535
+ if (isTaskTool) {
536
+ const [jobId, deadlineSecs] = readJobHeaders(propagatedHeaders);
537
+ let controller = null;
538
+ if (jobId && this.config.registryUrl && this.agentId) {
539
+ try {
540
+ controller = makeJobController(jobId, this.agentId, this.config.registryUrl);
541
+ }
542
+ catch (err) {
543
+ // Don't silently fall back to a regular tool call —
544
+ // a `task: true` tool that needs a controller will
545
+ // misbehave (return a dict instead of completing the
546
+ // row, leaving the registry's job stuck in `working`
547
+ // until lease expiry). Surface the failure so the
548
+ // outer FastMCP handler reports it AND the inbound
549
+ // wrapper's catch (or its caller) can fail-fast.
550
+ console.error(`[mesh-jobs] makeJobController failed for tool ` +
551
+ `'${toolName}' job=${jobId} agent=${this.agentId} ` +
552
+ `registry=${this.config.registryUrl}:`, err);
553
+ throw err;
554
+ }
555
+ }
556
+ // Build the call args, splicing the controller (or null) at
557
+ // meshJobParamIndex if specified. Position 0 is `args`; deps
558
+ // begin at position 1. The MeshJob slot is orthogonal —
559
+ // when meshJobParamIndex skips a position, deps shift past
560
+ // it (caller's signature must reflect that).
561
+ const callArgs = spliceJobController(cleanArgs, depsArray, controller, meshJobParamIndex);
562
+ // Issue #917: append the framework-cached A2AClient as
563
+ // the trailing positional arg when this tool declares
564
+ // a2aConfig. Mirrors the producer-side JobController
565
+ // splice — A2AClient never participates in the
566
+ // ordered-deps math, it always lands last.
567
+ if (a2aClient !== null) {
568
+ callArgs.push(a2aClient);
569
+ }
570
+ return await runWithJobContext(jobId, deadlineSecs, controller, () => Promise.resolve(execute(...callArgs)), retryOn);
571
+ }
572
+ if (a2aClient !== null) {
573
+ return await execute(cleanArgs, ...depsArray, a2aClient);
574
+ }
575
+ return await execute(cleanArgs, ...depsArray);
576
+ });
577
+ });
578
+ }
579
+ // Auto-serialize non-string results (like Python SDK does).
580
+ // NOTE: structuredContent removed in #917 — FastMCP TS rejects it via
581
+ // strict zod schema (ContentResultZodSchema.strict()) even though
582
+ // the field is part of the MCP spec. Re-enable when FastMCP TS
583
+ // upstream accepts the field. Tracked in #925.
152
584
  if (typeof result === "string") {
153
585
  return result;
154
586
  }
@@ -156,6 +588,10 @@ export class MeshAgent {
156
588
  return "";
157
589
  }
158
590
  else {
591
+ // Return JSON-stringified text only — every consumer parses
592
+ // content[0].text back into an object anyway. FastMCP TS will
593
+ // auto-build {content: [{type: "text", text: <string>}]} from
594
+ // this bare string return, satisfying its strict schema.
159
595
  return JSON.stringify(result);
160
596
  }
161
597
  }
@@ -205,17 +641,66 @@ export class MeshAgent {
205
641
  parameters: parametersWithPassthrough,
206
642
  execute: wrappedExecute,
207
643
  });
644
+ // Phase 1 MeshJob substrate: register a ClaimHandler for this
645
+ // tool so the per-capability ClaimDispatcher (spawned in
646
+ // _autoStart) can dispatch claimed jobs to the same execute fn
647
+ // — without going through FastMCP's HTTP transport. The handler
648
+ // builds the same callArgs shape the inbound wrapper does, but
649
+ // gets the controller passed in directly (no header parsing
650
+ // needed) and bypasses FastMCP's tool-call serialisation.
651
+ if (isTaskTool) {
652
+ const capability = def.capability ?? toolName;
653
+ const handler = async (payload, controller) => {
654
+ const liveDeps = normalizedDeps.map((dep, depIndex) => {
655
+ if (depIndex === meshJobDepIndex) {
656
+ return new MeshJobSubmitter(dep.capability, this.agentId, this.config.registryUrl);
657
+ }
658
+ return this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null;
659
+ });
660
+ const callArgs = spliceJobController(payload, liveDeps, controller, meshJobParamIndex);
661
+ // Issue #917: A2A consumer tools dispatched via the claim
662
+ // path get the same trailing A2AClient argument as the
663
+ // inbound HTTP path.
664
+ if (a2aClient !== null) {
665
+ callArgs.push(a2aClient);
666
+ }
667
+ return await execute(...callArgs);
668
+ };
669
+ this._taskHandlers.set(capability, { handler, retryOn });
670
+ }
208
671
  // Store mesh metadata with JSON Schema for LLM tool resolution
209
672
  const inputSchema = this.convertZodToJsonSchema(def.parameters);
210
673
  enrichSchemaWithMediaTypes(inputSchema);
674
+ // Issue #547: extract output schema if user supplied one. Zod cannot
675
+ // infer return types from the handler signature, so this is opt-in.
676
+ let outputSchemaRaw;
677
+ if (def.outputSchema) {
678
+ outputSchemaRaw = this.convertZodToJsonSchema(def.outputSchema);
679
+ }
211
680
  this.tools.set(toolName, {
212
681
  capability: def.capability ?? toolName,
213
682
  version: def.version ?? "1.0.0",
214
683
  tags: def.tags ?? [],
215
684
  description: def.description ?? "",
216
685
  inputSchema: JSON.stringify(inputSchema),
686
+ outputSchemaRaw,
687
+ // Issue #547 Phase 4: per-tool override (default true = current behavior).
688
+ outputSchemaStrict: def.outputSchemaStrict !== false,
217
689
  dependencies: normalizedDeps,
218
690
  dependencyKwargs: def.dependencyKwargs,
691
+ // Phase 1 MeshJob substrate: stamp producer's long-running flag
692
+ // so the heartbeat pipeline ships it to the registry. Consumers
693
+ // read this to decide between job semantics and a regular
694
+ // tools/call.
695
+ task: def.task === true,
696
+ meshJobParamIndex: def.meshJobParamIndex,
697
+ meshJobDepIndex: def.meshJobDepIndex,
698
+ // Issue #917: A2A consumer marker so heartbeat-build appends the
699
+ // surrounding agent name to the tag list before shipping to the
700
+ // registry. Captured here at addTool time so a downstream rename
701
+ // of `this.config.name` doesn't desync the registered tag.
702
+ a2aConsumer: def.a2aConfig !== undefined,
703
+ a2aAgentName: def.a2aConfig !== undefined ? this.config.name : undefined,
219
704
  });
220
705
  return this;
221
706
  }
@@ -238,6 +723,12 @@ export class MeshAgent {
238
723
  * ```
239
724
  */
240
725
  addLlmProvider(config) {
726
+ if (this._workerMode) {
727
+ // LLM provider tools are registered with FastMCP directly (not via wrappedExecute),
728
+ // so they don't go through the dispatch path. In worker mode there's no FastMCP
729
+ // server running — just no-op and let the main thread handle LLM calls inline.
730
+ return this;
731
+ }
241
732
  // Create the LLM provider tool definition
242
733
  const toolDef = llmProvider(config);
243
734
  // Add to FastMCP server
@@ -266,11 +757,78 @@ export class MeshAgent {
266
757
  }
267
758
  return this;
268
759
  }
760
+ /**
761
+ * Issue #917: build an `A2ABearer` (or undefined) from the
762
+ * user-friendly auth config supported on `MeshA2AConfig.auth`. The
763
+ * config can be either an `{ token, tokenEnv }` shorthand object OR
764
+ * a pre-built `A2ABearer` instance the user constructed manually.
765
+ *
766
+ * Tightened to `instanceof A2ABearer` so a stray `{ token,
767
+ * authorizationHeader: () => ... }` object cannot duck-type its way
768
+ * past A2ABearer's validation (which catches blank tokens and
769
+ * mutually-exclusive `token`/`tokenEnv`).
770
+ */
771
+ _buildBearerFromConfig(auth) {
772
+ if (auth === undefined)
773
+ return undefined;
774
+ if (auth instanceof A2ABearer)
775
+ return auth;
776
+ return new A2ABearer(auth);
777
+ }
778
+ /**
779
+ * Issue #917: cache `A2AClient` instances by their config tuple so
780
+ * multiple consumer tools targeting the same backend share one
781
+ * outbound connection pool. Auth instances participate in the cache
782
+ * key by reference (same `A2ABearer` ref → same client); two
783
+ * separately-constructed bearers — even ones holding identical
784
+ * tokens — get separate clients. Identity-based keying is the safe
785
+ * default: A2ABearer's private fields make content-fingerprinting
786
+ * impossible from outside, and a content-derived key risks leaking
787
+ * tool-A's bearer onto tool-B's outbound traffic.
788
+ */
789
+ _bearerCacheKey(bearer) {
790
+ if (!bearer)
791
+ return "none";
792
+ // A2AClientConfig.auth permits a raw A2ABearerConfig too, but the
793
+ // call site below always normalises via `_buildBearerFromConfig`
794
+ // first, so in practice we only ever see real A2ABearer instances.
795
+ // Defensively pass-through the config-shape case as a content-free
796
+ // fallback key — never collide with the bearer-id namespace.
797
+ if (!(bearer instanceof A2ABearer))
798
+ return "raw-config";
799
+ let id = this._bearerIds.get(bearer);
800
+ if (id === undefined) {
801
+ id = `bearer-${this._nextBearerId++}`;
802
+ this._bearerIds.set(bearer, id);
803
+ }
804
+ return id;
805
+ }
806
+ _getOrBuildA2AClient(config) {
807
+ const key = [
808
+ config.url,
809
+ config.skillId,
810
+ this._bearerCacheKey(config.auth),
811
+ config.timeoutMs ?? "default",
812
+ config.pollIntervalMs ?? "default",
813
+ config.pollIntervalMaxMs ?? "default",
814
+ ].join("|");
815
+ const existing = this._a2aClients.get(key);
816
+ if (existing)
817
+ return existing;
818
+ const client = new A2AClient(config);
819
+ this._a2aClients.set(key, client);
820
+ return client;
821
+ }
269
822
  /**
270
823
  * Convert Zod schema to JSON Schema.
271
824
  */
272
825
  convertZodToJsonSchema(schema) {
273
- return zodToJsonSchema(schema, { $refStrategy: "none" });
826
+ // $refStrategy: "root" preserves $ref + definitions for recursive Zod
827
+ // schemas (e.g. z.lazy(...)). With "none", zod-to-json-schema can't expand
828
+ // the cycle and falls back to {} (empty), which erases the recursion from
829
+ // the canonical hash. Non-recursive shapes are unchanged because they have
830
+ // no references to inline.
831
+ return zodToJsonSchema(schema, { $refStrategy: "root" });
274
832
  }
275
833
  /**
276
834
  * Internal: Start the agent (called by auto-start mechanism).
@@ -367,11 +925,90 @@ export class MeshAgent {
367
925
  }
368
926
  // 2. Register LLM tools from LlmToolRegistry
369
927
  this.registerLlmTools();
928
+ // 2.5 Phase 1 MeshJob substrate: register the three framework
929
+ // helper tools (`__mesh_job_status`/`_result`/`_cancel`) on
930
+ // every TS agent regardless of whether it owns task=true tools.
931
+ // Mirrors Python's JobsHelperToolsStep. Skipped when there's no
932
+ // registry URL — the helpers can't function without it.
933
+ this.registerJobsHelperTools();
934
+ // 2.6 Phase 1 MeshJob substrate: mount POST /jobs/:job_id/cancel
935
+ // on FastMCP's underlying Hono app so the registry's cancel
936
+ // forwarder can fire the in-process cancel token. Best-effort —
937
+ // failures here are logged, not fatal. When this agent owns
938
+ // task: true tools and the route fails to register, escalate to
939
+ // a second console.error so the operator can't miss the
940
+ // cancel-mid-flight regression in logs.
941
+ if (this.config.registryUrl) {
942
+ const cancelRouteOk = registerCancelRoute(this.server);
943
+ if (!cancelRouteOk && this._taskHandlers.size > 0) {
944
+ console.error(`[mesh-jobs] agent ${this.agentId} owns ${this._taskHandlers.size} ` +
945
+ `task: true tool(s) but the cancel route failed to register. ` +
946
+ `Cancel requests for in-flight jobs will fall through to ` +
947
+ `lease expiry — see the prior [mesh-jobs] error for the cause.`);
948
+ }
949
+ }
370
950
  // 3. Start heartbeat to registry via Rust core
371
951
  await this.startHeartbeat();
952
+ // 3.5 Phase 1 MeshJob substrate: spawn one ClaimDispatcher per
953
+ // task=true tool so the agent can poll the registry's
954
+ // /jobs/claim and dispatch claimed work locally. Started after
955
+ // heartbeat so the registry already knows this replica when the
956
+ // first claim arrives (eliminates the "claim before
957
+ // registration" race).
958
+ this.startClaimDispatchers();
372
959
  // 4. Install signal handlers for graceful shutdown
373
960
  this.installSignalHandlers();
374
961
  }
962
+ /**
963
+ * Phase 1 MeshJob substrate: register the three framework helper
964
+ * tools on the FastMCP server AND in the agent's tool catalog so
965
+ * the heartbeat ships them to the registry as visible capabilities.
966
+ */
967
+ registerJobsHelperTools() {
968
+ if (!this.config.registryUrl) {
969
+ return;
970
+ }
971
+ let helpers;
972
+ try {
973
+ helpers = registerJobHelperTools(this.server, this.config.registryUrl);
974
+ }
975
+ catch (err) {
976
+ console.warn("[mesh-jobs] failed to register job helper tools:", err);
977
+ return;
978
+ }
979
+ for (const [name, meta] of helpers.entries()) {
980
+ // Don't overwrite a user-defined tool with the same name.
981
+ if (this.tools.has(name))
982
+ continue;
983
+ this.tools.set(name, {
984
+ capability: meta.capability,
985
+ version: meta.version,
986
+ tags: meta.tags,
987
+ description: meta.description,
988
+ inputSchema: meta.inputSchema,
989
+ outputSchemaStrict: true,
990
+ dependencies: [],
991
+ dependencyKwargs: undefined,
992
+ task: meta.task,
993
+ });
994
+ }
995
+ }
996
+ /**
997
+ * Phase 1 MeshJob substrate: spawn ClaimDispatchers for every
998
+ * task=true tool registered. Skipped if no registry URL or no task
999
+ * handlers are present.
1000
+ */
1001
+ startClaimDispatchers() {
1002
+ if (!this.config.registryUrl)
1003
+ return;
1004
+ if (this._taskHandlers.size === 0)
1005
+ return;
1006
+ for (const [capability, entry] of this._taskHandlers.entries()) {
1007
+ const dispatcher = new ClaimDispatcher(capability, this.agentId, this.config.registryUrl, entry.handler, entry.retryOn);
1008
+ dispatcher.start();
1009
+ this._claimDispatchers.push(dispatcher);
1010
+ }
1011
+ }
375
1012
  /**
376
1013
  * Register LLM tools from LlmToolRegistry.
377
1014
  * This adds tool metadata for LLM tools created via mesh.llm().
@@ -437,6 +1074,9 @@ export class MeshAgent {
437
1074
  async startHeartbeat() {
438
1075
  // Get LLM tool registry for llmFilter/llmProvider
439
1076
  const llmRegistry = LlmToolRegistry.getInstance();
1077
+ // Issue #547 Phase 4: read cluster strict knob once; per-tool override
1078
+ // is read inside the loop below.
1079
+ const clusterStrict = clusterStrictEnabled();
440
1080
  // Build the agent spec for Rust core
441
1081
  const tools = Array.from(this.tools.entries()).map(([name, meta]) => {
442
1082
  // Check if this tool has LLM config
@@ -457,21 +1097,89 @@ export class MeshAgent {
457
1097
  tags: llmConfig.provider.tags ?? [],
458
1098
  });
459
1099
  }
1100
+ // Issue #547 / Phase 4: normalize via Rust core and apply verdict policy.
1101
+ // Throws on (effective) BLOCK to refuse agent startup; demoted BLOCKs
1102
+ // and WARNs are logged loudly and shipped in schemaWarnings.
1103
+ const toolStrict = meta.outputSchemaStrict !== false;
1104
+ let inputSchemaCanonical;
1105
+ let inputSchemaHash;
1106
+ let outputSchemaCanonical;
1107
+ let outputSchemaHash;
1108
+ const combinedWarnings = [];
1109
+ if (meta.inputSchema) {
1110
+ let inputRaw;
1111
+ try {
1112
+ inputRaw = JSON.parse(meta.inputSchema);
1113
+ }
1114
+ catch {
1115
+ // shouldn't happen, but fall through without normalizing
1116
+ }
1117
+ if (inputRaw) {
1118
+ const r = normalizeSchemaWithPolicy(inputRaw, `tool '${name}' input`, clusterStrict, toolStrict);
1119
+ inputSchemaCanonical = r.canonicalJson ?? undefined;
1120
+ inputSchemaHash = r.hash ?? undefined;
1121
+ combinedWarnings.push(...r.warnings);
1122
+ }
1123
+ }
1124
+ let outputSchemaJson;
1125
+ if (meta.outputSchemaRaw) {
1126
+ outputSchemaJson = JSON.stringify(meta.outputSchemaRaw);
1127
+ const r = normalizeSchemaWithPolicy(meta.outputSchemaRaw, `tool '${name}' output`, clusterStrict, toolStrict);
1128
+ outputSchemaCanonical = r.canonicalJson ?? undefined;
1129
+ outputSchemaHash = r.hash ?? undefined;
1130
+ combinedWarnings.push(...r.warnings);
1131
+ }
1132
+ // Issue #917: when this tool was registered with a2aConfig,
1133
+ // append the consumer agent's name to the tag list (defensive
1134
+ // copy — never mutate meta.tags). Skips when the agent has
1135
+ // no name (consumer-only / nameless agent) or when the tag
1136
+ // already appears, mirrors Java's
1137
+ // MeshToolRegistry.injectConsumerNameTags semantics.
1138
+ let effectiveTags = meta.tags;
1139
+ if (meta.a2aConsumer) {
1140
+ const agentName = meta.a2aAgentName;
1141
+ if (agentName &&
1142
+ agentName.trim() !== "" &&
1143
+ !meta.tags.includes(agentName)) {
1144
+ effectiveTags = [...meta.tags, agentName];
1145
+ }
1146
+ }
460
1147
  return {
461
1148
  functionName: name,
462
1149
  capability: meta.capability,
463
1150
  version: meta.version,
464
- tags: meta.tags,
1151
+ tags: effectiveTags,
465
1152
  description: meta.description,
466
1153
  // Pass dependencies to Rust core for registry resolution
467
1154
  // Note: tags may contain nested arrays for OR alternatives (TagSpec[])
468
1155
  // Serialize to JSON for Rust binding - preserves nested structure
469
- dependencies: meta.dependencies.map((dep) => ({
470
- capability: dep.capability,
471
- tags: JSON.stringify(dep.tags ?? []),
472
- version: dep.version,
473
- })),
1156
+ dependencies: meta.dependencies.map((dep) => {
1157
+ // Issue #547: normalize per-dep expectedSchemaRaw. There's no
1158
+ // per-tool override on the consumer side (override is producer-
1159
+ // side); we still apply cluster strict so WARN→BLOCK works.
1160
+ let expectedCanonical;
1161
+ let expectedHash;
1162
+ if (dep.expectedSchemaRaw) {
1163
+ const r = normalizeSchemaWithPolicy(dep.expectedSchemaRaw, `dependency on '${dep.capability}'`, clusterStrict, true);
1164
+ expectedCanonical = r.canonicalJson ?? undefined;
1165
+ expectedHash = r.hash ?? undefined;
1166
+ }
1167
+ return {
1168
+ capability: dep.capability,
1169
+ tags: JSON.stringify(dep.tags ?? []),
1170
+ version: dep.version,
1171
+ expectedSchemaCanonical: expectedCanonical,
1172
+ expectedSchemaHash: expectedHash,
1173
+ matchMode: dep.matchMode,
1174
+ };
1175
+ }),
474
1176
  inputSchema: meta.inputSchema,
1177
+ outputSchema: outputSchemaJson,
1178
+ inputSchemaCanonical,
1179
+ inputSchemaHash,
1180
+ outputSchemaCanonical,
1181
+ outputSchemaHash,
1182
+ schemaWarnings: combinedWarnings.length > 0 ? combinedWarnings : undefined,
475
1183
  // LLM filter/provider as JSON strings (matches Python format)
476
1184
  llmFilter,
477
1185
  llmProvider,
@@ -709,12 +1417,39 @@ export class MeshAgent {
709
1417
  * Shutdown the agent gracefully.
710
1418
  */
711
1419
  async shutdown() {
1420
+ // Phase 1 MeshJob substrate: stop claim dispatchers first so
1421
+ // they don't pull a fresh job mid-shutdown.
1422
+ for (const d of this._claimDispatchers) {
1423
+ try {
1424
+ await d.stop();
1425
+ }
1426
+ catch (err) {
1427
+ console.warn(`[mesh-jobs] error stopping claim dispatcher:`, err);
1428
+ }
1429
+ }
1430
+ this._claimDispatchers = [];
1431
+ // Issue #917: mark all cached A2AClients closed so any in-flight
1432
+ // user code raises cleanly instead of reusing a torn-down instance.
1433
+ // Close in parallel so one slow client doesn't block the others —
1434
+ // the undici Agent pool is shared via closeHttpPool() below.
1435
+ const closePromises = Array.from(this._a2aClients.values()).map((client) => client.close().catch((err) => {
1436
+ console.warn("[mesh-a2a] Error closing A2AClient:", err);
1437
+ return null;
1438
+ }));
1439
+ await Promise.allSettled(closePromises);
1440
+ this._a2aClients.clear();
712
1441
  try {
713
1442
  await closeHttpPool();
714
1443
  }
715
1444
  catch (err) {
716
1445
  console.warn("Error closing HTTP pool:", err);
717
1446
  }
1447
+ try {
1448
+ await closePool();
1449
+ }
1450
+ catch (err) {
1451
+ console.warn("Error closing tool worker pool:", err);
1452
+ }
718
1453
  if (this.httpsProxy) {
719
1454
  this.httpsProxy.close();
720
1455
  this.httpsProxy = undefined;