@mcpmesh/sdk 1.4.1 → 2.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/dist/__tests__/a2a/a2a-bearer.spec.d.ts +2 -0
  2. package/dist/__tests__/a2a/a2a-bearer.spec.d.ts.map +1 -0
  3. package/dist/__tests__/a2a/a2a-bearer.spec.js +58 -0
  4. package/dist/__tests__/a2a/a2a-bearer.spec.js.map +1 -0
  5. package/dist/__tests__/a2a/a2a-client.spec.d.ts +2 -0
  6. package/dist/__tests__/a2a/a2a-client.spec.d.ts.map +1 -0
  7. package/dist/__tests__/a2a/a2a-client.spec.js +334 -0
  8. package/dist/__tests__/a2a/a2a-client.spec.js.map +1 -0
  9. package/dist/__tests__/a2a/a2a-job.spec.d.ts +2 -0
  10. package/dist/__tests__/a2a/a2a-job.spec.d.ts.map +1 -0
  11. package/dist/__tests__/a2a/a2a-job.spec.js +255 -0
  12. package/dist/__tests__/a2a/a2a-job.spec.js.map +1 -0
  13. package/dist/__tests__/a2a/a2a-stream.spec.d.ts +2 -0
  14. package/dist/__tests__/a2a/a2a-stream.spec.d.ts.map +1 -0
  15. package/dist/__tests__/a2a/a2a-stream.spec.js +278 -0
  16. package/dist/__tests__/a2a/a2a-stream.spec.js.map +1 -0
  17. package/dist/__tests__/a2a/agent-a2a-config.spec.d.ts +2 -0
  18. package/dist/__tests__/a2a/agent-a2a-config.spec.d.ts.map +1 -0
  19. package/dist/__tests__/a2a/agent-a2a-config.spec.js +262 -0
  20. package/dist/__tests__/a2a/agent-a2a-config.spec.js.map +1 -0
  21. package/dist/__tests__/a2a/producer/auth-filter.spec.d.ts +2 -0
  22. package/dist/__tests__/a2a/producer/auth-filter.spec.d.ts.map +1 -0
  23. package/dist/__tests__/a2a/producer/auth-filter.spec.js +127 -0
  24. package/dist/__tests__/a2a/producer/auth-filter.spec.js.map +1 -0
  25. package/dist/__tests__/a2a/producer/card-builder.spec.d.ts +2 -0
  26. package/dist/__tests__/a2a/producer/card-builder.spec.d.ts.map +1 -0
  27. package/dist/__tests__/a2a/producer/card-builder.spec.js +113 -0
  28. package/dist/__tests__/a2a/producer/card-builder.spec.js.map +1 -0
  29. package/dist/__tests__/a2a/producer/dispatcher.spec.d.ts +2 -0
  30. package/dist/__tests__/a2a/producer/dispatcher.spec.d.ts.map +1 -0
  31. package/dist/__tests__/a2a/producer/dispatcher.spec.js +850 -0
  32. package/dist/__tests__/a2a/producer/dispatcher.spec.js.map +1 -0
  33. package/dist/__tests__/a2a/producer/mount-surface-push.spec.d.ts +2 -0
  34. package/dist/__tests__/a2a/producer/mount-surface-push.spec.d.ts.map +1 -0
  35. package/dist/__tests__/a2a/producer/mount-surface-push.spec.js +164 -0
  36. package/dist/__tests__/a2a/producer/mount-surface-push.spec.js.map +1 -0
  37. package/dist/__tests__/a2a/producer/mount.spec.d.ts +2 -0
  38. package/dist/__tests__/a2a/producer/mount.spec.d.ts.map +1 -0
  39. package/dist/__tests__/a2a/producer/mount.spec.js +433 -0
  40. package/dist/__tests__/a2a/producer/mount.spec.js.map +1 -0
  41. package/dist/__tests__/a2a/producer/public-url-cache.spec.d.ts +2 -0
  42. package/dist/__tests__/a2a/producer/public-url-cache.spec.d.ts.map +1 -0
  43. package/dist/__tests__/a2a/producer/public-url-cache.spec.js +116 -0
  44. package/dist/__tests__/a2a/producer/public-url-cache.spec.js.map +1 -0
  45. package/dist/__tests__/a2a/producer/sse-emitter.spec.d.ts +2 -0
  46. package/dist/__tests__/a2a/producer/sse-emitter.spec.d.ts.map +1 -0
  47. package/dist/__tests__/a2a/producer/sse-emitter.spec.js +754 -0
  48. package/dist/__tests__/a2a/producer/sse-emitter.spec.js.map +1 -0
  49. package/dist/__tests__/a2a/producer/state-translator.spec.d.ts +2 -0
  50. package/dist/__tests__/a2a/producer/state-translator.spec.d.ts.map +1 -0
  51. package/dist/__tests__/a2a/producer/state-translator.spec.js +124 -0
  52. package/dist/__tests__/a2a/producer/state-translator.spec.js.map +1 -0
  53. package/dist/__tests__/a2a/producer/task-store.spec.d.ts +2 -0
  54. package/dist/__tests__/a2a/producer/task-store.spec.d.ts.map +1 -0
  55. package/dist/__tests__/a2a/producer/task-store.spec.js +180 -0
  56. package/dist/__tests__/a2a/producer/task-store.spec.js.map +1 -0
  57. package/dist/__tests__/agent-add-tool.spec.d.ts +2 -0
  58. package/dist/__tests__/agent-add-tool.spec.d.ts.map +1 -0
  59. package/dist/__tests__/agent-add-tool.spec.js +483 -0
  60. package/dist/__tests__/agent-add-tool.spec.js.map +1 -0
  61. package/dist/__tests__/api-runtime-race.spec.d.ts +2 -0
  62. package/dist/__tests__/api-runtime-race.spec.d.ts.map +1 -0
  63. package/dist/__tests__/api-runtime-race.spec.js +193 -0
  64. package/dist/__tests__/api-runtime-race.spec.js.map +1 -0
  65. package/dist/__tests__/claim-dispatcher.spec.d.ts +2 -0
  66. package/dist/__tests__/claim-dispatcher.spec.d.ts.map +1 -0
  67. package/dist/__tests__/claim-dispatcher.spec.js +408 -0
  68. package/dist/__tests__/claim-dispatcher.spec.js.map +1 -0
  69. package/dist/__tests__/inbound-job-dispatch.spec.d.ts +2 -0
  70. package/dist/__tests__/inbound-job-dispatch.spec.d.ts.map +1 -0
  71. package/dist/__tests__/inbound-job-dispatch.spec.js +185 -0
  72. package/dist/__tests__/inbound-job-dispatch.spec.js.map +1 -0
  73. package/dist/__tests__/job-controller-progress.spec.d.ts +2 -0
  74. package/dist/__tests__/job-controller-progress.spec.d.ts.map +1 -0
  75. package/dist/__tests__/job-controller-progress.spec.js +85 -0
  76. package/dist/__tests__/job-controller-progress.spec.js.map +1 -0
  77. package/dist/__tests__/jobs-cancel-route.spec.d.ts +2 -0
  78. package/dist/__tests__/jobs-cancel-route.spec.d.ts.map +1 -0
  79. package/dist/__tests__/jobs-cancel-route.spec.js +88 -0
  80. package/dist/__tests__/jobs-cancel-route.spec.js.map +1 -0
  81. package/dist/__tests__/llm-agent-stream.test.d.ts +14 -0
  82. package/dist/__tests__/llm-agent-stream.test.d.ts.map +1 -0
  83. package/dist/__tests__/llm-agent-stream.test.js +341 -0
  84. package/dist/__tests__/llm-agent-stream.test.js.map +1 -0
  85. package/dist/__tests__/mesh-job-submitter.spec.d.ts +2 -0
  86. package/dist/__tests__/mesh-job-submitter.spec.d.ts.map +1 -0
  87. package/dist/__tests__/mesh-job-submitter.spec.js +110 -0
  88. package/dist/__tests__/mesh-job-submitter.spec.js.map +1 -0
  89. package/dist/__tests__/proxy-stream.test.d.ts +9 -0
  90. package/dist/__tests__/proxy-stream.test.d.ts.map +1 -0
  91. package/dist/__tests__/proxy-stream.test.js +347 -0
  92. package/dist/__tests__/proxy-stream.test.js.map +1 -0
  93. package/dist/__tests__/resolver-meshjob.spec.d.ts +26 -0
  94. package/dist/__tests__/resolver-meshjob.spec.d.ts.map +1 -0
  95. package/dist/__tests__/resolver-meshjob.spec.js +201 -0
  96. package/dist/__tests__/resolver-meshjob.spec.js.map +1 -0
  97. package/dist/__tests__/schema-verdict-policy.test.d.ts +6 -0
  98. package/dist/__tests__/schema-verdict-policy.test.d.ts.map +1 -0
  99. package/dist/__tests__/schema-verdict-policy.test.js +126 -0
  100. package/dist/__tests__/schema-verdict-policy.test.js.map +1 -0
  101. package/dist/__tests__/sse-stream.test.d.ts +12 -0
  102. package/dist/__tests__/sse-stream.test.d.ts.map +1 -0
  103. package/dist/__tests__/sse-stream.test.js +170 -0
  104. package/dist/__tests__/sse-stream.test.js.map +1 -0
  105. package/dist/a2a/a2a-bearer.d.ts +27 -0
  106. package/dist/a2a/a2a-bearer.d.ts.map +1 -0
  107. package/dist/a2a/a2a-bearer.js +63 -0
  108. package/dist/a2a/a2a-bearer.js.map +1 -0
  109. package/dist/a2a/a2a-client.d.ts +114 -0
  110. package/dist/a2a/a2a-client.d.ts.map +1 -0
  111. package/dist/a2a/a2a-client.js +405 -0
  112. package/dist/a2a/a2a-client.js.map +1 -0
  113. package/dist/a2a/a2a-event.d.ts +25 -0
  114. package/dist/a2a/a2a-event.d.ts.map +1 -0
  115. package/dist/a2a/a2a-event.js +9 -0
  116. package/dist/a2a/a2a-event.js.map +1 -0
  117. package/dist/a2a/a2a-job.d.ts +58 -0
  118. package/dist/a2a/a2a-job.d.ts.map +1 -0
  119. package/dist/a2a/a2a-job.js +264 -0
  120. package/dist/a2a/a2a-job.js.map +1 -0
  121. package/dist/a2a/a2a-stream.d.ts +39 -0
  122. package/dist/a2a/a2a-stream.d.ts.map +1 -0
  123. package/dist/a2a/a2a-stream.js +290 -0
  124. package/dist/a2a/a2a-stream.js.map +1 -0
  125. package/dist/a2a/errors.d.ts +29 -0
  126. package/dist/a2a/errors.d.ts.map +1 -0
  127. package/dist/a2a/errors.js +48 -0
  128. package/dist/a2a/errors.js.map +1 -0
  129. package/dist/a2a/index.d.ts +12 -0
  130. package/dist/a2a/index.d.ts.map +1 -0
  131. package/dist/a2a/index.js +11 -0
  132. package/dist/a2a/index.js.map +1 -0
  133. package/dist/a2a/producer/auth-filter.d.ts +34 -0
  134. package/dist/a2a/producer/auth-filter.d.ts.map +1 -0
  135. package/dist/a2a/producer/auth-filter.js +39 -0
  136. package/dist/a2a/producer/auth-filter.js.map +1 -0
  137. package/dist/a2a/producer/card-builder.d.ts +59 -0
  138. package/dist/a2a/producer/card-builder.d.ts.map +1 -0
  139. package/dist/a2a/producer/card-builder.js +59 -0
  140. package/dist/a2a/producer/card-builder.js.map +1 -0
  141. package/dist/a2a/producer/dispatcher.d.ts +276 -0
  142. package/dist/a2a/producer/dispatcher.d.ts.map +1 -0
  143. package/dist/a2a/producer/dispatcher.js +896 -0
  144. package/dist/a2a/producer/dispatcher.js.map +1 -0
  145. package/dist/a2a/producer/index.d.ts +26 -0
  146. package/dist/a2a/producer/index.d.ts.map +1 -0
  147. package/dist/a2a/producer/index.js +23 -0
  148. package/dist/a2a/producer/index.js.map +1 -0
  149. package/dist/a2a/producer/mount.d.ts +75 -0
  150. package/dist/a2a/producer/mount.d.ts.map +1 -0
  151. package/dist/a2a/producer/mount.js +422 -0
  152. package/dist/a2a/producer/mount.js.map +1 -0
  153. package/dist/a2a/producer/public-url-cache.d.ts +73 -0
  154. package/dist/a2a/producer/public-url-cache.d.ts.map +1 -0
  155. package/dist/a2a/producer/public-url-cache.js +0 -0
  156. package/dist/a2a/producer/public-url-cache.js.map +1 -0
  157. package/dist/a2a/producer/registry.d.ts +138 -0
  158. package/dist/a2a/producer/registry.d.ts.map +1 -0
  159. package/dist/a2a/producer/registry.js +117 -0
  160. package/dist/a2a/producer/registry.js.map +1 -0
  161. package/dist/a2a/producer/sse-emitter.d.ts +85 -0
  162. package/dist/a2a/producer/sse-emitter.d.ts.map +1 -0
  163. package/dist/a2a/producer/sse-emitter.js +405 -0
  164. package/dist/a2a/producer/sse-emitter.js.map +1 -0
  165. package/dist/a2a/producer/state-translator.d.ts +63 -0
  166. package/dist/a2a/producer/state-translator.d.ts.map +1 -0
  167. package/dist/a2a/producer/state-translator.js +108 -0
  168. package/dist/a2a/producer/state-translator.js.map +1 -0
  169. package/dist/a2a/producer/task-store.d.ts +128 -0
  170. package/dist/a2a/producer/task-store.d.ts.map +1 -0
  171. package/dist/a2a/producer/task-store.js +128 -0
  172. package/dist/a2a/producer/task-store.js.map +1 -0
  173. package/dist/agent.d.ts +72 -0
  174. package/dist/agent.d.ts.map +1 -1
  175. package/dist/agent.js +618 -13
  176. package/dist/agent.js.map +1 -1
  177. package/dist/api-runtime.d.ts +25 -0
  178. package/dist/api-runtime.d.ts.map +1 -1
  179. package/dist/api-runtime.js +75 -2
  180. package/dist/api-runtime.js.map +1 -1
  181. package/dist/claim-dispatcher.d.ts +126 -0
  182. package/dist/claim-dispatcher.d.ts.map +1 -0
  183. package/dist/claim-dispatcher.js +478 -0
  184. package/dist/claim-dispatcher.js.map +1 -0
  185. package/dist/express.d.ts.map +1 -1
  186. package/dist/express.js +33 -6
  187. package/dist/express.js.map +1 -1
  188. package/dist/inbound-job-dispatch.d.ts +105 -0
  189. package/dist/inbound-job-dispatch.d.ts.map +1 -0
  190. package/dist/inbound-job-dispatch.js +335 -0
  191. package/dist/inbound-job-dispatch.js.map +1 -0
  192. package/dist/index.d.ts +37 -4
  193. package/dist/index.d.ts.map +1 -1
  194. package/dist/index.js +29 -3
  195. package/dist/index.js.map +1 -1
  196. package/dist/job-context.d.ts +107 -0
  197. package/dist/job-context.d.ts.map +1 -0
  198. package/dist/job-context.js +95 -0
  199. package/dist/job-context.js.map +1 -0
  200. package/dist/jobs-cancel-route.d.ts +36 -0
  201. package/dist/jobs-cancel-route.d.ts.map +1 -0
  202. package/dist/jobs-cancel-route.js +60 -0
  203. package/dist/jobs-cancel-route.js.map +1 -0
  204. package/dist/jobs-helper-tools.d.ts +48 -0
  205. package/dist/jobs-helper-tools.d.ts.map +1 -0
  206. package/dist/jobs-helper-tools.js +133 -0
  207. package/dist/jobs-helper-tools.js.map +1 -0
  208. package/dist/llm-agent.d.ts +62 -53
  209. package/dist/llm-agent.d.ts.map +1 -1
  210. package/dist/llm-agent.js +211 -292
  211. package/dist/llm-agent.js.map +1 -1
  212. package/dist/llm-provider.d.ts +4 -4
  213. package/dist/llm.d.ts +4 -1
  214. package/dist/llm.d.ts.map +1 -1
  215. package/dist/llm.js +7 -17
  216. package/dist/llm.js.map +1 -1
  217. package/dist/mesh-job-submitter.d.ts +83 -0
  218. package/dist/mesh-job-submitter.d.ts.map +1 -0
  219. package/dist/mesh-job-submitter.js +143 -0
  220. package/dist/mesh-job-submitter.js.map +1 -0
  221. package/dist/proxy.d.ts +30 -0
  222. package/dist/proxy.d.ts.map +1 -1
  223. package/dist/proxy.js +351 -1
  224. package/dist/proxy.js.map +1 -1
  225. package/dist/resolver-meshjob.d.ts +170 -0
  226. package/dist/resolver-meshjob.d.ts.map +1 -0
  227. package/dist/resolver-meshjob.js +159 -0
  228. package/dist/resolver-meshjob.js.map +1 -0
  229. package/dist/route.d.ts +4 -0
  230. package/dist/route.d.ts.map +1 -1
  231. package/dist/route.js.map +1 -1
  232. package/dist/schema-normalize.d.ts +62 -0
  233. package/dist/schema-normalize.d.ts.map +1 -0
  234. package/dist/schema-normalize.js +128 -0
  235. package/dist/schema-normalize.js.map +1 -0
  236. package/dist/sse-stream.d.ts +44 -0
  237. package/dist/sse-stream.d.ts.map +1 -0
  238. package/dist/sse-stream.js +173 -0
  239. package/dist/sse-stream.js.map +1 -0
  240. package/dist/types.d.ts +351 -9
  241. package/dist/types.d.ts.map +1 -1
  242. package/package.json +4 -3
package/dist/agent.js CHANGED
@@ -14,6 +14,12 @@ import { startAgent, } from "@mcpmesh/core";
14
14
  import { resolveConfig, generateAgentIdSuffix, findAvailablePort } from "./config.js";
15
15
  import { enrichSchemaWithMediaTypes } from "./media-param.js";
16
16
  import { createProxy, normalizeDependency, runWithTraceContext, runWithPropagatedHeaders, PROXY_DISPATCH_META } from "./proxy.js";
17
+ import { readJobHeaders, runWithJobContext, makeJobController, spliceJobController, } from "./inbound-job-dispatch.js";
18
+ import { MeshJobSubmitter } from "./mesh-job-submitter.js";
19
+ import { ClaimDispatcher } from "./claim-dispatcher.js";
20
+ import { registerJobHelperTools } from "./jobs-helper-tools.js";
21
+ import { registerCancelRoute } from "./jobs-cancel-route.js";
22
+ import { clusterStrictEnabled, normalizeSchemaWithPolicy, } from "./schema-normalize.js";
17
23
  import { initTracing, generateTraceId, generateSpanId, publishTraceSpan, matchesPropagateHeader, } from "./tracing.js";
18
24
  import { buildLlmAgentSpecs, handleLlmToolsUpdated, handleLlmProviderAvailable, handleLlmProviderUnavailable, LlmToolRegistry, } from "./llm.js";
19
25
  import { llmProvider, getLlmProviderMeta } from "./llm-provider.js";
@@ -21,6 +27,7 @@ import { findAndSetBasePath } from "./template.js";
21
27
  import { getTlsOptions, getTlsConfigCached, prepareTls, cleanupTls } from "./tls-config.js";
22
28
  import { closeHttpPool } from "./http-pool.js";
23
29
  import { dispatch as poolDispatch, closePool } from "./tool-worker-pool.js";
30
+ import { A2AClient, A2ABearer, } from "./a2a/index.js";
24
31
  /**
25
32
  * Globally-set symbol that user agent code can check to detect whether it
26
33
  * is running inside a mesh tool-isolation worker. The mesh runtime sets
@@ -116,6 +123,41 @@ export class MeshAgent {
116
123
  // In worker mode addTool() only stashes execute fns and skips all FastMCP /
117
124
  // registry / Rust core wiring (no Express port conflict, no double-register).
118
125
  _workerMode = false;
126
+ /**
127
+ * Phase 1 MeshJob substrate: per-tool ClaimHandler for `task: true`
128
+ * tools. Indexed by capability so the ClaimDispatcher can look up
129
+ * the local handler without re-traversing the tools map. Populated
130
+ * by addTool() at registration time; consumed by _autoStart() to
131
+ * spawn one dispatcher per task tool.
132
+ *
133
+ * Issue #894: also carries the per-tool retryOn whitelist so the
134
+ * dispatcher can pass it into `runWithJobContext` for the
135
+ * release-lease-on-retry-eligible-throw path.
136
+ */
137
+ _taskHandlers = new Map();
138
+ /**
139
+ * Active claim dispatchers (one per task=true capability). Started
140
+ * during _autoStart(); stopped during shutdown(). Empty for agents
141
+ * that own no task=true tools.
142
+ */
143
+ _claimDispatchers = [];
144
+ /**
145
+ * Issue #917: cache of `A2AClient` instances keyed by their
146
+ * `(url, skillId, auth, timeoutMs)` tuple so multiple consumer
147
+ * tools targeting the same backend share one outbound connection
148
+ * pool. Closed via `close()` on agent shutdown.
149
+ */
150
+ _a2aClients = new Map();
151
+ /**
152
+ * Issue #917: stable opaque IDs for `A2ABearer` instances used in
153
+ * the A2AClient cache key. Bearer fields are private so we cannot
154
+ * fingerprint by content (would also be a security risk — two
155
+ * tools with distinct literal tokens must NEVER share a cache
156
+ * entry). Identity-based keying is the safe default. `WeakMap`
157
+ * lets bearers be GC'd when the registering tool is removed.
158
+ */
159
+ _bearerIds = new WeakMap();
160
+ _nextBearerId = 0;
119
161
  constructor(server, config) {
120
162
  if (_isWorkerMode) {
121
163
  // Worker thread: skip ALL init. Only addTool() runs (in worker-mode
@@ -158,6 +200,168 @@ export class MeshAgent {
158
200
  addTool(def) {
159
201
  const toolName = def.name;
160
202
  const execute = def.execute;
203
+ // Phase 1 MeshJob substrate: validate `task: true` requires an
204
+ // async function. Long-running tools need a Promise-based control
205
+ // flow so the dispatch wrapper (Phase B) can await
206
+ // `MeshJob.updateProgress()` / cancellation / outbound polling.
207
+ // Fail loudly at `addTool` so the developer sees the misuse before
208
+ // the agent even tries to register with the registry.
209
+ //
210
+ // Heuristic: AsyncFunction.constructor.name === "AsyncFunction".
211
+ // We only flag the obvious sync case (an arrow/function literal)
212
+ // — any function returning a Promise will pass this check, which
213
+ // is the right relaxation for users who wrap their handler in a
214
+ // Promise factory.
215
+ if (def.task === true) {
216
+ const ctorName = execute
217
+ ?.constructor?.name;
218
+ if (ctorName !== "AsyncFunction") {
219
+ // We can't reliably detect Promise-returning sync functions
220
+ // without invoking them, but we CAN reject the unambiguous
221
+ // "function() { ... }" case where the developer probably
222
+ // forgot the `async` keyword.
223
+ if (ctorName === "Function") {
224
+ throw new Error(`addTool({ task: true }) requires an async execute function; ` +
225
+ `tool '${toolName}' has a sync execute. Mark it 'async' or ` +
226
+ `remove task: true.`);
227
+ }
228
+ // Other constructor names (GeneratorFunction, etc.) are
229
+ // unusual; let them through with a console warning rather
230
+ // than blocking — the dispatch wrapper will surface any actual
231
+ // misuse at first invocation.
232
+ }
233
+ }
234
+ // Phase 1 MeshJob substrate (consumer-side validation): if the
235
+ // tool declares meshJobDepIndex, that index MUST be a non-negative
236
+ // integer pointing to a valid dependency. Catch misuse at
237
+ // registration so the developer doesn't see a confusing TypeError
238
+ // at runtime when the wrapper tries to swap the dep proxy for a
239
+ // submitter. Mirrors the meshJobParamIndex validation below —
240
+ // NaN / fractional / negative values must fail-fast here too.
241
+ if (def.meshJobDepIndex !== undefined) {
242
+ const depCount = (def.dependencies ?? []).length;
243
+ const v = def.meshJobDepIndex;
244
+ const isInt = Number.isInteger(v) && v >= 0;
245
+ if (!isInt) {
246
+ throw new Error(`addTool({ meshJobDepIndex: ${v} }) for tool '${toolName}': ` +
247
+ `meshJobDepIndex must be a non-negative integer (index into ` +
248
+ `dependencies[]), got: ${v}`);
249
+ }
250
+ if (v >= depCount) {
251
+ throw new Error(`addTool({ meshJobDepIndex: ${v} }) for tool ` +
252
+ `'${toolName}' is out of range — the tool declares ${depCount} ` +
253
+ `dependencies. meshJobDepIndex must be a valid index into ` +
254
+ `dependencies[].`);
255
+ }
256
+ }
257
+ // Phase 1 MeshJob substrate (producer-side validation): if the
258
+ // tool declares meshJobParamIndex, that position MUST be a sane
259
+ // integer >= 1. Position 0 is reserved for the args payload, so
260
+ // the controller can only land at sig pos 1+. Without this
261
+ // guard, values 0 / negative / NaN / non-integer silently skip
262
+ // controller injection — the user's handler then sees `null`
263
+ // where it expected a JobController and throws a confusing
264
+ // `TypeError: Cannot read properties of null` at first await.
265
+ //
266
+ // Upper bound is a sanity check: > 10 almost certainly means a
267
+ // typo (no real producer signature has that many params).
268
+ if (def.meshJobParamIndex !== undefined) {
269
+ const v = def.meshJobParamIndex;
270
+ const ok = Number.isInteger(v) && v >= 1 && v <= 10;
271
+ if (!ok) {
272
+ throw new Error(`addTool({ meshJobParamIndex: ${v} }) for tool '${toolName}': ` +
273
+ `meshJobParamIndex must be an integer >= 1 (position of MeshJob ` +
274
+ `param after the args payload), got: ${v}`);
275
+ }
276
+ }
277
+ // Issue #894: validate retryOn at registration so misuse fails loud
278
+ // before the agent talks to the registry. Mirror Python's
279
+ // `mesh.decorators` validation in spirit:
280
+ // - retryOn requires task: true (without the job dispatch wrapper
281
+ // there's no controller to release a lease on, so the kwarg is
282
+ // meaningless);
283
+ // - entries must be Error constructor classes (typeof === "function").
284
+ // We don't filter control-flow exceptions like Python's
285
+ // KeyboardInterrupt / asyncio.CancelledError — JavaScript has no
286
+ // direct equivalent, and AbortError-style cancellation is a legitimate
287
+ // retry trigger for some users. They get to choose.
288
+ if (def.retryOn !== undefined) {
289
+ if (def.task !== true) {
290
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn is only ` +
291
+ `valid with task: true; remove retryOn or set task: true.`);
292
+ }
293
+ if (!Array.isArray(def.retryOn)) {
294
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn must be ` +
295
+ `an array of Error constructor classes (e.g., [TypeError, MyError]).`);
296
+ }
297
+ for (const entry of def.retryOn) {
298
+ // Must be a function that has a prototype (i.e. an actual class
299
+ // or a `function` declaration — not an arrow function), AND must
300
+ // either be Error itself or a subclass. Arrow functions have
301
+ // `prototype === undefined`, so `entry.prototype instanceof Error`
302
+ // is `false` for them — they're rejected by the second check.
303
+ // Without this, `err instanceof <arrow>` at dispatch time would
304
+ // throw `TypeError: Right-hand side of instanceof is not callable`.
305
+ if (typeof entry !== "function") {
306
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn entries ` +
307
+ `must be Error constructor classes (functions); got: ${String(entry)}`);
308
+ }
309
+ if (entry !== Error && !(entry.prototype instanceof Error)) {
310
+ throw new Error(`addTool({ retryOn }) for tool '${toolName}': retryOn entries ` +
311
+ `must extend Error (or be Error itself); got: ${String(entry)}`);
312
+ }
313
+ }
314
+ }
315
+ // Issue #917: validate a2aConfig at registration time so misuse fails
316
+ // loud BEFORE the agent talks to the registry. Match the Python
317
+ // `mesh.a2a_consumer` and Java `@A2AConsumer` startup-time checks.
318
+ let a2aClient = null;
319
+ if (def.a2aConfig !== undefined) {
320
+ const cfg = def.a2aConfig;
321
+ if (!cfg.url || cfg.url.trim() === "") {
322
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': url must be ` +
323
+ `a non-empty string.`);
324
+ }
325
+ if (cfg.timeoutMs !== undefined) {
326
+ if (!Number.isFinite(cfg.timeoutMs) || cfg.timeoutMs <= 0) {
327
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': timeoutMs ` +
328
+ `must be a finite positive number (got ${cfg.timeoutMs}).`);
329
+ }
330
+ }
331
+ if (cfg.pollIntervalMs !== undefined) {
332
+ if (!Number.isFinite(cfg.pollIntervalMs) || cfg.pollIntervalMs <= 0) {
333
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
334
+ `pollIntervalMs must be a finite positive number ` +
335
+ `(got ${cfg.pollIntervalMs}).`);
336
+ }
337
+ }
338
+ if (cfg.pollIntervalMaxMs !== undefined) {
339
+ if (!Number.isFinite(cfg.pollIntervalMaxMs) ||
340
+ cfg.pollIntervalMaxMs <= 0) {
341
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
342
+ `pollIntervalMaxMs must be a finite positive number ` +
343
+ `(got ${cfg.pollIntervalMaxMs}).`);
344
+ }
345
+ }
346
+ if (cfg.pollIntervalMs !== undefined &&
347
+ cfg.pollIntervalMaxMs !== undefined &&
348
+ cfg.pollIntervalMaxMs < cfg.pollIntervalMs) {
349
+ throw new Error(`addTool({ a2aConfig }) for tool '${toolName}': ` +
350
+ `pollIntervalMaxMs (${cfg.pollIntervalMaxMs}) must be >= ` +
351
+ `pollIntervalMs (${cfg.pollIntervalMs}).`);
352
+ }
353
+ if (!this._workerMode) {
354
+ const skillId = cfg.skillId ?? def.capability ?? toolName;
355
+ a2aClient = this._getOrBuildA2AClient({
356
+ url: cfg.url,
357
+ skillId,
358
+ auth: this._buildBearerFromConfig(cfg.auth),
359
+ timeoutMs: cfg.timeoutMs,
360
+ pollIntervalMs: cfg.pollIntervalMs,
361
+ pollIntervalMaxMs: cfg.pollIntervalMaxMs,
362
+ });
363
+ }
364
+ }
161
365
  // Worker mode: register the raw execute fn in the worker tool map and
162
366
  // skip FastMCP registration, dependency wiring, and metadata storage.
163
367
  // The worker entry will look up tools by name when handling dispatched
@@ -169,10 +373,51 @@ export class MeshAgent {
169
373
  // Normalize dependencies
170
374
  const normalizedDeps = (def.dependencies ?? []).map(normalizeDependency);
171
375
  const depEndpoints = normalizedDeps.map((d) => d.capability);
376
+ // Capture for closures — these reads must be live at invocation
377
+ // time (e.g. registryUrl/agentId aren't set yet at addTool time).
378
+ const isTaskTool = def.task === true;
379
+ const meshJobDepIndex = def.meshJobDepIndex;
380
+ const meshJobParamIndex = def.meshJobParamIndex;
381
+ // Issue #894: per-tool retryOn whitelist threaded into both
382
+ // dispatch paths (inbound HTTP wrapper below + ClaimHandler
383
+ // registered in this.taskHandlers). Captured here so the closure
384
+ // sees a stable reference even if def is mutated post-registration.
385
+ const retryOn = def.retryOn;
386
+ // Phase 1 MeshJob substrate: when a job-bound tool exists AND the
387
+ // user explicitly opted into worker isolation via env, log a single
388
+ // warning at registration time. The wrapper force-disables
389
+ // isolation for job-bound tools because controllers + the
390
+ // AsyncLocalStorage / Rust task-local job context don't cross the
391
+ // worker_threads boundary cleanly. Without this log the
392
+ // force-disable was silent — users who set MCP_MESH_TOOL_ISOLATION
393
+ // expected it to apply to every tool.
394
+ const isJobBoundForLog = isTaskTool || meshJobDepIndex !== undefined;
395
+ const isolationEnvSet = typeof process.env.MCP_MESH_TOOL_ISOLATION === "string" &&
396
+ process.env.MCP_MESH_TOOL_ISOLATION.toLowerCase() !== "false";
397
+ if (isJobBoundForLog && isolationEnvSet) {
398
+ console.warn(`[mesh-tool] '${toolName}' has ` +
399
+ (isTaskTool ? "task: true" : `meshJobDepIndex: ${meshJobDepIndex}`) +
400
+ `; worker isolation is disabled for job-bound tools ` +
401
+ `(controllers/AsyncLocalStorage don't cross worker boundaries). ` +
402
+ `Set 'task: true' explicitly if you intend a producer.`);
403
+ }
172
404
  // Create wrapper that injects dependencies positionally and handles tracing
173
405
  const wrappedExecute = async (args) => {
174
406
  // Build positional deps array using composite keys (toolName:dep_index)
175
- const depsArray = normalizedDeps.map((_, depIndex) => this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null);
407
+ // Phase 1 MeshJob substrate (consumer-side): if meshJobDepIndex is
408
+ // set, swap the McpMeshTool proxy at that slot for a
409
+ // MeshJobSubmitter targeting that dep's capability. We bind the
410
+ // submitter to the live registryUrl/agentId so it can submit
411
+ // jobs without needing access to the agent instance.
412
+ const depsArray = normalizedDeps.map((dep, depIndex) => {
413
+ if (depIndex === meshJobDepIndex) {
414
+ // Build the submitter lazily per call so we always pick
415
+ // up the current registryUrl (test harnesses sometimes
416
+ // mutate it between calls).
417
+ return new MeshJobSubmitter(dep.capability, this.agentId, this.config.registryUrl);
418
+ }
419
+ return this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null;
420
+ });
176
421
  const injectedCount = depsArray.filter((d) => d !== null).length;
177
422
  // Extract trace context from arguments (injected by upstream proxy)
178
423
  // This is the fallback mechanism since fastmcp doesn't expose HTTP headers
@@ -219,7 +464,26 @@ export class MeshAgent {
219
464
  // Python implementation in _mcp_mesh/shared/tool_executor.py.
220
465
  // Default ON; set MCP_MESH_TOOL_ISOLATION=false to revert to inline
221
466
  // execution on the main loop (legacy behavior).
222
- const isolationEnabled = (process.env.MCP_MESH_TOOL_ISOLATION ?? "true").toLowerCase() !== "false";
467
+ //
468
+ // Phase 1 MeshJob substrate: force-disable isolation for tools
469
+ // that bind to a JobController or MeshJobSubmitter. The
470
+ // controller/submitter wrap napi-rs handles plus
471
+ // AsyncLocalStorage state that cannot be cleanly serialised
472
+ // across the worker_threads boundary. Running inline on the
473
+ // main loop is the right trade — task=true tools are
474
+ // long-running by definition and benefit less from isolation
475
+ // (their wall-clock time is dominated by the user's `await`s,
476
+ // not CPU bursts that block the event loop).
477
+ // Issue #917: A2A consumer tools force-disable isolation along
478
+ // with job-bound tools. The framework-injected `A2AClient` wraps
479
+ // an undici dispatcher handle that cannot be cleanly serialised
480
+ // across the worker_threads boundary; running inline keeps the
481
+ // cached client + connection pool intact across calls.
482
+ const isA2aBound = a2aClient !== null;
483
+ const isJobBound = isTaskTool || meshJobDepIndex !== undefined;
484
+ const isolationEnabled = !isJobBound &&
485
+ !isA2aBound &&
486
+ (process.env.MCP_MESH_TOOL_ISOLATION ?? "true").toLowerCase() !== "false";
223
487
  try {
224
488
  let result;
225
489
  if (isolationEnabled) {
@@ -257,16 +521,66 @@ export class MeshAgent {
257
521
  }
258
522
  else {
259
523
  // Legacy inline execution on the main thread. Preserved as a clean
260
- // fallback for users who explicitly opt out of isolation.
524
+ // fallback for users who explicitly opt out of isolation, AND used
525
+ // unconditionally for job-bound tools (see isJobBound above).
526
+ //
527
+ // Phase 1 MeshJob substrate: when this tool is task=true and the
528
+ // inbound headers carry X-Mesh-Job-Id, build a JobController,
529
+ // splice it into the call args at meshJobParamIndex, and run the
530
+ // user function inside both the JS-side ALS (CURRENT_JOB) and the
531
+ // Rust-side task-local (withJobAsync) so cancel-registry binding
532
+ // + outbound header injection work transparently.
261
533
  result = await runWithTraceContext(traceContext, async () => {
262
534
  return await runWithPropagatedHeaders(propagatedHeaders, async () => {
535
+ if (isTaskTool) {
536
+ const [jobId, deadlineSecs] = readJobHeaders(propagatedHeaders);
537
+ let controller = null;
538
+ if (jobId && this.config.registryUrl && this.agentId) {
539
+ try {
540
+ controller = makeJobController(jobId, this.agentId, this.config.registryUrl);
541
+ }
542
+ catch (err) {
543
+ // Don't silently fall back to a regular tool call —
544
+ // a `task: true` tool that needs a controller will
545
+ // misbehave (return a dict instead of completing the
546
+ // row, leaving the registry's job stuck in `working`
547
+ // until lease expiry). Surface the failure so the
548
+ // outer FastMCP handler reports it AND the inbound
549
+ // wrapper's catch (or its caller) can fail-fast.
550
+ console.error(`[mesh-jobs] makeJobController failed for tool ` +
551
+ `'${toolName}' job=${jobId} agent=${this.agentId} ` +
552
+ `registry=${this.config.registryUrl}:`, err);
553
+ throw err;
554
+ }
555
+ }
556
+ // Build the call args, splicing the controller (or null) at
557
+ // meshJobParamIndex if specified. Position 0 is `args`; deps
558
+ // begin at position 1. The MeshJob slot is orthogonal —
559
+ // when meshJobParamIndex skips a position, deps shift past
560
+ // it (caller's signature must reflect that).
561
+ const callArgs = spliceJobController(cleanArgs, depsArray, controller, meshJobParamIndex);
562
+ // Issue #917: append the framework-cached A2AClient as
563
+ // the trailing positional arg when this tool declares
564
+ // a2aConfig. Mirrors the producer-side JobController
565
+ // splice — A2AClient never participates in the
566
+ // ordered-deps math, it always lands last.
567
+ if (a2aClient !== null) {
568
+ callArgs.push(a2aClient);
569
+ }
570
+ return await runWithJobContext(jobId, deadlineSecs, controller, () => Promise.resolve(execute(...callArgs)), retryOn);
571
+ }
572
+ if (a2aClient !== null) {
573
+ return await execute(cleanArgs, ...depsArray, a2aClient);
574
+ }
263
575
  return await execute(cleanArgs, ...depsArray);
264
576
  });
265
577
  });
266
578
  }
267
- // Auto-serialize non-string results (like Python SDK does)
268
- // This allows users to return natural types (numbers, objects, arrays)
269
- // without manually calling JSON.stringify() or String()
579
+ // Auto-serialize non-string results (like Python SDK does).
580
+ // NOTE: structuredContent removed in #917 FastMCP TS rejects it via
581
+ // strict zod schema (ContentResultZodSchema.strict()) even though
582
+ // the field is part of the MCP spec. Re-enable when FastMCP TS
583
+ // upstream accepts the field. Tracked in #925.
270
584
  if (typeof result === "string") {
271
585
  return result;
272
586
  }
@@ -274,6 +588,10 @@ export class MeshAgent {
274
588
  return "";
275
589
  }
276
590
  else {
591
+ // Return JSON-stringified text only — every consumer parses
592
+ // content[0].text back into an object anyway. FastMCP TS will
593
+ // auto-build {content: [{type: "text", text: <string>}]} from
594
+ // this bare string return, satisfying its strict schema.
277
595
  return JSON.stringify(result);
278
596
  }
279
597
  }
@@ -323,17 +641,66 @@ export class MeshAgent {
323
641
  parameters: parametersWithPassthrough,
324
642
  execute: wrappedExecute,
325
643
  });
644
+ // Phase 1 MeshJob substrate: register a ClaimHandler for this
645
+ // tool so the per-capability ClaimDispatcher (spawned in
646
+ // _autoStart) can dispatch claimed jobs to the same execute fn
647
+ // — without going through FastMCP's HTTP transport. The handler
648
+ // builds the same callArgs shape the inbound wrapper does, but
649
+ // gets the controller passed in directly (no header parsing
650
+ // needed) and bypasses FastMCP's tool-call serialisation.
651
+ if (isTaskTool) {
652
+ const capability = def.capability ?? toolName;
653
+ const handler = async (payload, controller) => {
654
+ const liveDeps = normalizedDeps.map((dep, depIndex) => {
655
+ if (depIndex === meshJobDepIndex) {
656
+ return new MeshJobSubmitter(dep.capability, this.agentId, this.config.registryUrl);
657
+ }
658
+ return this.resolvedDeps.get(`${toolName}:dep_${depIndex}`) ?? null;
659
+ });
660
+ const callArgs = spliceJobController(payload, liveDeps, controller, meshJobParamIndex);
661
+ // Issue #917: A2A consumer tools dispatched via the claim
662
+ // path get the same trailing A2AClient argument as the
663
+ // inbound HTTP path.
664
+ if (a2aClient !== null) {
665
+ callArgs.push(a2aClient);
666
+ }
667
+ return await execute(...callArgs);
668
+ };
669
+ this._taskHandlers.set(capability, { handler, retryOn });
670
+ }
326
671
  // Store mesh metadata with JSON Schema for LLM tool resolution
327
672
  const inputSchema = this.convertZodToJsonSchema(def.parameters);
328
673
  enrichSchemaWithMediaTypes(inputSchema);
674
+ // Issue #547: extract output schema if user supplied one. Zod cannot
675
+ // infer return types from the handler signature, so this is opt-in.
676
+ let outputSchemaRaw;
677
+ if (def.outputSchema) {
678
+ outputSchemaRaw = this.convertZodToJsonSchema(def.outputSchema);
679
+ }
329
680
  this.tools.set(toolName, {
330
681
  capability: def.capability ?? toolName,
331
682
  version: def.version ?? "1.0.0",
332
683
  tags: def.tags ?? [],
333
684
  description: def.description ?? "",
334
685
  inputSchema: JSON.stringify(inputSchema),
686
+ outputSchemaRaw,
687
+ // Issue #547 Phase 4: per-tool override (default true = current behavior).
688
+ outputSchemaStrict: def.outputSchemaStrict !== false,
335
689
  dependencies: normalizedDeps,
336
690
  dependencyKwargs: def.dependencyKwargs,
691
+ // Phase 1 MeshJob substrate: stamp producer's long-running flag
692
+ // so the heartbeat pipeline ships it to the registry. Consumers
693
+ // read this to decide between job semantics and a regular
694
+ // tools/call.
695
+ task: def.task === true,
696
+ meshJobParamIndex: def.meshJobParamIndex,
697
+ meshJobDepIndex: def.meshJobDepIndex,
698
+ // Issue #917: A2A consumer marker so heartbeat-build appends the
699
+ // surrounding agent name to the tag list before shipping to the
700
+ // registry. Captured here at addTool time so a downstream rename
701
+ // of `this.config.name` doesn't desync the registered tag.
702
+ a2aConsumer: def.a2aConfig !== undefined,
703
+ a2aAgentName: def.a2aConfig !== undefined ? this.config.name : undefined,
337
704
  });
338
705
  return this;
339
706
  }
@@ -390,11 +757,78 @@ export class MeshAgent {
390
757
  }
391
758
  return this;
392
759
  }
760
+ /**
761
+ * Issue #917: build an `A2ABearer` (or undefined) from the
762
+ * user-friendly auth config supported on `MeshA2AConfig.auth`. The
763
+ * config can be either an `{ token, tokenEnv }` shorthand object OR
764
+ * a pre-built `A2ABearer` instance the user constructed manually.
765
+ *
766
+ * Tightened to `instanceof A2ABearer` so a stray `{ token,
767
+ * authorizationHeader: () => ... }` object cannot duck-type its way
768
+ * past A2ABearer's validation (which catches blank tokens and
769
+ * mutually-exclusive `token`/`tokenEnv`).
770
+ */
771
+ _buildBearerFromConfig(auth) {
772
+ if (auth === undefined)
773
+ return undefined;
774
+ if (auth instanceof A2ABearer)
775
+ return auth;
776
+ return new A2ABearer(auth);
777
+ }
778
+ /**
779
+ * Issue #917: cache `A2AClient` instances by their config tuple so
780
+ * multiple consumer tools targeting the same backend share one
781
+ * outbound connection pool. Auth instances participate in the cache
782
+ * key by reference (same `A2ABearer` ref → same client); two
783
+ * separately-constructed bearers — even ones holding identical
784
+ * tokens — get separate clients. Identity-based keying is the safe
785
+ * default: A2ABearer's private fields make content-fingerprinting
786
+ * impossible from outside, and a content-derived key risks leaking
787
+ * tool-A's bearer onto tool-B's outbound traffic.
788
+ */
789
+ _bearerCacheKey(bearer) {
790
+ if (!bearer)
791
+ return "none";
792
+ // A2AClientConfig.auth permits a raw A2ABearerConfig too, but the
793
+ // call site below always normalises via `_buildBearerFromConfig`
794
+ // first, so in practice we only ever see real A2ABearer instances.
795
+ // Defensively pass-through the config-shape case as a content-free
796
+ // fallback key — never collide with the bearer-id namespace.
797
+ if (!(bearer instanceof A2ABearer))
798
+ return "raw-config";
799
+ let id = this._bearerIds.get(bearer);
800
+ if (id === undefined) {
801
+ id = `bearer-${this._nextBearerId++}`;
802
+ this._bearerIds.set(bearer, id);
803
+ }
804
+ return id;
805
+ }
806
+ _getOrBuildA2AClient(config) {
807
+ const key = [
808
+ config.url,
809
+ config.skillId,
810
+ this._bearerCacheKey(config.auth),
811
+ config.timeoutMs ?? "default",
812
+ config.pollIntervalMs ?? "default",
813
+ config.pollIntervalMaxMs ?? "default",
814
+ ].join("|");
815
+ const existing = this._a2aClients.get(key);
816
+ if (existing)
817
+ return existing;
818
+ const client = new A2AClient(config);
819
+ this._a2aClients.set(key, client);
820
+ return client;
821
+ }
393
822
  /**
394
823
  * Convert Zod schema to JSON Schema.
395
824
  */
396
825
  convertZodToJsonSchema(schema) {
397
- return zodToJsonSchema(schema, { $refStrategy: "none" });
826
+ // $refStrategy: "root" preserves $ref + definitions for recursive Zod
827
+ // schemas (e.g. z.lazy(...)). With "none", zod-to-json-schema can't expand
828
+ // the cycle and falls back to {} (empty), which erases the recursion from
829
+ // the canonical hash. Non-recursive shapes are unchanged because they have
830
+ // no references to inline.
831
+ return zodToJsonSchema(schema, { $refStrategy: "root" });
398
832
  }
399
833
  /**
400
834
  * Internal: Start the agent (called by auto-start mechanism).
@@ -491,11 +925,90 @@ export class MeshAgent {
491
925
  }
492
926
  // 2. Register LLM tools from LlmToolRegistry
493
927
  this.registerLlmTools();
928
+ // 2.5 Phase 1 MeshJob substrate: register the three framework
929
+ // helper tools (`__mesh_job_status`/`_result`/`_cancel`) on
930
+ // every TS agent regardless of whether it owns task=true tools.
931
+ // Mirrors Python's JobsHelperToolsStep. Skipped when there's no
932
+ // registry URL — the helpers can't function without it.
933
+ this.registerJobsHelperTools();
934
+ // 2.6 Phase 1 MeshJob substrate: mount POST /jobs/:job_id/cancel
935
+ // on FastMCP's underlying Hono app so the registry's cancel
936
+ // forwarder can fire the in-process cancel token. Best-effort —
937
+ // failures here are logged, not fatal. When this agent owns
938
+ // task: true tools and the route fails to register, escalate to
939
+ // a second console.error so the operator can't miss the
940
+ // cancel-mid-flight regression in logs.
941
+ if (this.config.registryUrl) {
942
+ const cancelRouteOk = registerCancelRoute(this.server);
943
+ if (!cancelRouteOk && this._taskHandlers.size > 0) {
944
+ console.error(`[mesh-jobs] agent ${this.agentId} owns ${this._taskHandlers.size} ` +
945
+ `task: true tool(s) but the cancel route failed to register. ` +
946
+ `Cancel requests for in-flight jobs will fall through to ` +
947
+ `lease expiry — see the prior [mesh-jobs] error for the cause.`);
948
+ }
949
+ }
494
950
  // 3. Start heartbeat to registry via Rust core
495
951
  await this.startHeartbeat();
952
+ // 3.5 Phase 1 MeshJob substrate: spawn one ClaimDispatcher per
953
+ // task=true tool so the agent can poll the registry's
954
+ // /jobs/claim and dispatch claimed work locally. Started after
955
+ // heartbeat so the registry already knows this replica when the
956
+ // first claim arrives (eliminates the "claim before
957
+ // registration" race).
958
+ this.startClaimDispatchers();
496
959
  // 4. Install signal handlers for graceful shutdown
497
960
  this.installSignalHandlers();
498
961
  }
962
+ /**
963
+ * Phase 1 MeshJob substrate: register the three framework helper
964
+ * tools on the FastMCP server AND in the agent's tool catalog so
965
+ * the heartbeat ships them to the registry as visible capabilities.
966
+ */
967
+ registerJobsHelperTools() {
968
+ if (!this.config.registryUrl) {
969
+ return;
970
+ }
971
+ let helpers;
972
+ try {
973
+ helpers = registerJobHelperTools(this.server, this.config.registryUrl);
974
+ }
975
+ catch (err) {
976
+ console.warn("[mesh-jobs] failed to register job helper tools:", err);
977
+ return;
978
+ }
979
+ for (const [name, meta] of helpers.entries()) {
980
+ // Don't overwrite a user-defined tool with the same name.
981
+ if (this.tools.has(name))
982
+ continue;
983
+ this.tools.set(name, {
984
+ capability: meta.capability,
985
+ version: meta.version,
986
+ tags: meta.tags,
987
+ description: meta.description,
988
+ inputSchema: meta.inputSchema,
989
+ outputSchemaStrict: true,
990
+ dependencies: [],
991
+ dependencyKwargs: undefined,
992
+ task: meta.task,
993
+ });
994
+ }
995
+ }
996
+ /**
997
+ * Phase 1 MeshJob substrate: spawn ClaimDispatchers for every
998
+ * task=true tool registered. Skipped if no registry URL or no task
999
+ * handlers are present.
1000
+ */
1001
+ startClaimDispatchers() {
1002
+ if (!this.config.registryUrl)
1003
+ return;
1004
+ if (this._taskHandlers.size === 0)
1005
+ return;
1006
+ for (const [capability, entry] of this._taskHandlers.entries()) {
1007
+ const dispatcher = new ClaimDispatcher(capability, this.agentId, this.config.registryUrl, entry.handler, entry.retryOn);
1008
+ dispatcher.start();
1009
+ this._claimDispatchers.push(dispatcher);
1010
+ }
1011
+ }
499
1012
  /**
500
1013
  * Register LLM tools from LlmToolRegistry.
501
1014
  * This adds tool metadata for LLM tools created via mesh.llm().
@@ -561,6 +1074,9 @@ export class MeshAgent {
561
1074
  async startHeartbeat() {
562
1075
  // Get LLM tool registry for llmFilter/llmProvider
563
1076
  const llmRegistry = LlmToolRegistry.getInstance();
1077
+ // Issue #547 Phase 4: read cluster strict knob once; per-tool override
1078
+ // is read inside the loop below.
1079
+ const clusterStrict = clusterStrictEnabled();
564
1080
  // Build the agent spec for Rust core
565
1081
  const tools = Array.from(this.tools.entries()).map(([name, meta]) => {
566
1082
  // Check if this tool has LLM config
@@ -581,21 +1097,89 @@ export class MeshAgent {
581
1097
  tags: llmConfig.provider.tags ?? [],
582
1098
  });
583
1099
  }
1100
+ // Issue #547 / Phase 4: normalize via Rust core and apply verdict policy.
1101
+ // Throws on (effective) BLOCK to refuse agent startup; demoted BLOCKs
1102
+ // and WARNs are logged loudly and shipped in schemaWarnings.
1103
+ const toolStrict = meta.outputSchemaStrict !== false;
1104
+ let inputSchemaCanonical;
1105
+ let inputSchemaHash;
1106
+ let outputSchemaCanonical;
1107
+ let outputSchemaHash;
1108
+ const combinedWarnings = [];
1109
+ if (meta.inputSchema) {
1110
+ let inputRaw;
1111
+ try {
1112
+ inputRaw = JSON.parse(meta.inputSchema);
1113
+ }
1114
+ catch {
1115
+ // shouldn't happen, but fall through without normalizing
1116
+ }
1117
+ if (inputRaw) {
1118
+ const r = normalizeSchemaWithPolicy(inputRaw, `tool '${name}' input`, clusterStrict, toolStrict);
1119
+ inputSchemaCanonical = r.canonicalJson ?? undefined;
1120
+ inputSchemaHash = r.hash ?? undefined;
1121
+ combinedWarnings.push(...r.warnings);
1122
+ }
1123
+ }
1124
+ let outputSchemaJson;
1125
+ if (meta.outputSchemaRaw) {
1126
+ outputSchemaJson = JSON.stringify(meta.outputSchemaRaw);
1127
+ const r = normalizeSchemaWithPolicy(meta.outputSchemaRaw, `tool '${name}' output`, clusterStrict, toolStrict);
1128
+ outputSchemaCanonical = r.canonicalJson ?? undefined;
1129
+ outputSchemaHash = r.hash ?? undefined;
1130
+ combinedWarnings.push(...r.warnings);
1131
+ }
1132
+ // Issue #917: when this tool was registered with a2aConfig,
1133
+ // append the consumer agent's name to the tag list (defensive
1134
+ // copy — never mutate meta.tags). Skips when the agent has
1135
+ // no name (consumer-only / nameless agent) or when the tag
1136
+ // already appears, mirrors Java's
1137
+ // MeshToolRegistry.injectConsumerNameTags semantics.
1138
+ let effectiveTags = meta.tags;
1139
+ if (meta.a2aConsumer) {
1140
+ const agentName = meta.a2aAgentName;
1141
+ if (agentName &&
1142
+ agentName.trim() !== "" &&
1143
+ !meta.tags.includes(agentName)) {
1144
+ effectiveTags = [...meta.tags, agentName];
1145
+ }
1146
+ }
584
1147
  return {
585
1148
  functionName: name,
586
1149
  capability: meta.capability,
587
1150
  version: meta.version,
588
- tags: meta.tags,
1151
+ tags: effectiveTags,
589
1152
  description: meta.description,
590
1153
  // Pass dependencies to Rust core for registry resolution
591
1154
  // Note: tags may contain nested arrays for OR alternatives (TagSpec[])
592
1155
  // Serialize to JSON for Rust binding - preserves nested structure
593
- dependencies: meta.dependencies.map((dep) => ({
594
- capability: dep.capability,
595
- tags: JSON.stringify(dep.tags ?? []),
596
- version: dep.version,
597
- })),
1156
+ dependencies: meta.dependencies.map((dep) => {
1157
+ // Issue #547: normalize per-dep expectedSchemaRaw. There's no
1158
+ // per-tool override on the consumer side (override is producer-
1159
+ // side); we still apply cluster strict so WARN→BLOCK works.
1160
+ let expectedCanonical;
1161
+ let expectedHash;
1162
+ if (dep.expectedSchemaRaw) {
1163
+ const r = normalizeSchemaWithPolicy(dep.expectedSchemaRaw, `dependency on '${dep.capability}'`, clusterStrict, true);
1164
+ expectedCanonical = r.canonicalJson ?? undefined;
1165
+ expectedHash = r.hash ?? undefined;
1166
+ }
1167
+ return {
1168
+ capability: dep.capability,
1169
+ tags: JSON.stringify(dep.tags ?? []),
1170
+ version: dep.version,
1171
+ expectedSchemaCanonical: expectedCanonical,
1172
+ expectedSchemaHash: expectedHash,
1173
+ matchMode: dep.matchMode,
1174
+ };
1175
+ }),
598
1176
  inputSchema: meta.inputSchema,
1177
+ outputSchema: outputSchemaJson,
1178
+ inputSchemaCanonical,
1179
+ inputSchemaHash,
1180
+ outputSchemaCanonical,
1181
+ outputSchemaHash,
1182
+ schemaWarnings: combinedWarnings.length > 0 ? combinedWarnings : undefined,
599
1183
  // LLM filter/provider as JSON strings (matches Python format)
600
1184
  llmFilter,
601
1185
  llmProvider,
@@ -833,6 +1417,27 @@ export class MeshAgent {
833
1417
  * Shutdown the agent gracefully.
834
1418
  */
835
1419
  async shutdown() {
1420
+ // Phase 1 MeshJob substrate: stop claim dispatchers first so
1421
+ // they don't pull a fresh job mid-shutdown.
1422
+ for (const d of this._claimDispatchers) {
1423
+ try {
1424
+ await d.stop();
1425
+ }
1426
+ catch (err) {
1427
+ console.warn(`[mesh-jobs] error stopping claim dispatcher:`, err);
1428
+ }
1429
+ }
1430
+ this._claimDispatchers = [];
1431
+ // Issue #917: mark all cached A2AClients closed so any in-flight
1432
+ // user code raises cleanly instead of reusing a torn-down instance.
1433
+ // Close in parallel so one slow client doesn't block the others —
1434
+ // the undici Agent pool is shared via closeHttpPool() below.
1435
+ const closePromises = Array.from(this._a2aClients.values()).map((client) => client.close().catch((err) => {
1436
+ console.warn("[mesh-a2a] Error closing A2AClient:", err);
1437
+ return null;
1438
+ }));
1439
+ await Promise.allSettled(closePromises);
1440
+ this._a2aClients.clear();
836
1441
  try {
837
1442
  await closeHttpPool();
838
1443
  }