@mantyx/sdk 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as MantyxA2AOptions, M as MantyxClient, p as MantyxClientOptions, q as MantyxMcpOptions, r as MantyxPluginToolRef, s as MantyxToolRef, t as McpToolRef, u as ModelCatalog, v as ModelInfo, O as OutputSchema, R as ReasoningLevel, w as ResultEvent, x as RunEvent, y as RunEventBase, z as RunResult, B as RunSpec, S as ServerToolResultEvent, F as SessionInfo, G as SessionSpec, H as ThinkingDeltaEvent, T as ToolRef, Z as ZodLikeObject, I as defineLocalA2A, J as defineLocalMcp, K as defineLocalTool, N as isLocalA2ATool, P as isLocalMcpServer, Q as isLocalTool, U as mantyxA2A, V as mantyxMcp, W as mantyxPluginTool, X as mantyxTool, Y as parseRunOutput } from './client-B3NEFlIU.cjs';
1
+ export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as LoopDetectedEvent, p as LoopDetection, q as MantyxA2AOptions, M as MantyxClient, r as MantyxClientOptions, s as MantyxMcpOptions, t as MantyxPluginToolRef, u as MantyxToolRef, v as McpToolRef, w as ModelCatalog, x as ModelInfo, O as OutputSchema, R as ReasoningLevel, y as ResultEvent, z as RunEvent, B as RunEventBase, F as RunResult, G as RunSpec, S as ServerToolResultEvent, H as SessionInfo, I as SessionSpec, J as ThinkingDeltaEvent, K as ToolBudget, N as ToolBudgetExceededEvent, P as ToolBudgets, T as ToolRef, Z as ZodLikeObject, Q as defineLocalA2A, U as defineLocalMcp, V as defineLocalTool, W as isLocalA2ATool, X as isLocalMcpServer, Y as isLocalTool, _ as mantyxA2A, $ as mantyxMcp, a0 as mantyxPluginTool, a1 as mantyxTool, a2 as parseRunOutput } from './client-BB6cjfsz.cjs';
2
2
  import { z } from 'zod';
3
3
 
4
4
  /**
@@ -26,10 +26,55 @@ declare class MantyxToolError extends MantyxError {
26
26
  readonly toolName: string;
27
27
  constructor(toolName: string, message: string);
28
28
  }
29
+ /**
30
+ * Optional triage attributes the runner attaches to terminal `error`
31
+ * events. Mirrors the wire fields described in
32
+ * `docs/agent-runs-protocol.md` §7 ("error event payload fields") so SDK
33
+ * callers can render structured UI status notes ("model truncated — JSON
34
+ * likely incomplete") and drive retry policy without re-parsing the
35
+ * human-readable `message`.
36
+ */
37
+ interface MantyxRunErrorInit {
38
+ /**
39
+ * Canonical category of failure. One of `"rate_limit"`, `"overloaded"`,
40
+ * `"server"`, `"context_window"`, `"truncation"`, `"invalid_request"`,
41
+ * `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`,
42
+ * `"unknown"`. New categories may land additively — callers should
43
+ * default-branch to `"unknown"` for unrecognized values.
44
+ */
45
+ errorClass?: string;
46
+ /**
47
+ * Canonical lowercase stop reason normalized across providers
48
+ * (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When
49
+ * present, mirrors the value carried on the last `assistant_message`
50
+ * event preceding the failure.
51
+ */
52
+ finishReason?: string | null;
53
+ /**
54
+ * **Best-effort raw bytes** the model emitted before the failure. For
55
+ * `outputSchema` runs this is likely **incomplete JSON** that will
56
+ * fail `JSON.parse` — treat it as diagnostic data, never as a
57
+ * schema-conformant reply.
58
+ */
59
+ partialText?: string;
60
+ /**
61
+ * Coarse retry hint inherited from the pipeline's error classifier.
62
+ * Informational; the SDK still owns the actual retry decision.
63
+ */
64
+ retryable?: boolean;
65
+ }
29
66
  declare class MantyxRunError extends MantyxError {
30
67
  readonly runId: string;
31
68
  readonly subtype: string;
32
- constructor(runId: string, subtype: string, message: string);
69
+ /** See {@link MantyxRunErrorInit.errorClass}. */
70
+ readonly errorClass: string | undefined;
71
+ /** See {@link MantyxRunErrorInit.finishReason}. */
72
+ readonly finishReason: string | null | undefined;
73
+ /** See {@link MantyxRunErrorInit.partialText}. */
74
+ readonly partialText: string | undefined;
75
+ /** See {@link MantyxRunErrorInit.retryable}. */
76
+ readonly retryable: boolean | undefined;
77
+ constructor(runId: string, subtype: string, message: string, init?: MantyxRunErrorInit);
33
78
  }
34
79
  /**
35
80
  * Thrown by {@link parseRunOutput} when the run's terminal text was supposed
@@ -97,6 +142,6 @@ declare function readSseStream(body: ReadableStream<Uint8Array> | null, opts?: S
97
142
  /**
98
143
  * Release version — synced from repo root VERSION (`npm run sync-version`).
99
144
  */
100
- declare const SDK_VERSION = "0.8.0";
145
+ declare const SDK_VERSION = "0.9.1";
101
146
 
102
- export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
147
+ export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, type MantyxRunErrorInit, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as MantyxA2AOptions, M as MantyxClient, p as MantyxClientOptions, q as MantyxMcpOptions, r as MantyxPluginToolRef, s as MantyxToolRef, t as McpToolRef, u as ModelCatalog, v as ModelInfo, O as OutputSchema, R as ReasoningLevel, w as ResultEvent, x as RunEvent, y as RunEventBase, z as RunResult, B as RunSpec, S as ServerToolResultEvent, F as SessionInfo, G as SessionSpec, H as ThinkingDeltaEvent, T as ToolRef, Z as ZodLikeObject, I as defineLocalA2A, J as defineLocalMcp, K as defineLocalTool, N as isLocalA2ATool, P as isLocalMcpServer, Q as isLocalTool, U as mantyxA2A, V as mantyxMcp, W as mantyxPluginTool, X as mantyxTool, Y as parseRunOutput } from './client-B3NEFlIU.js';
1
+ export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as LoopDetectedEvent, p as LoopDetection, q as MantyxA2AOptions, M as MantyxClient, r as MantyxClientOptions, s as MantyxMcpOptions, t as MantyxPluginToolRef, u as MantyxToolRef, v as McpToolRef, w as ModelCatalog, x as ModelInfo, O as OutputSchema, R as ReasoningLevel, y as ResultEvent, z as RunEvent, B as RunEventBase, F as RunResult, G as RunSpec, S as ServerToolResultEvent, H as SessionInfo, I as SessionSpec, J as ThinkingDeltaEvent, K as ToolBudget, N as ToolBudgetExceededEvent, P as ToolBudgets, T as ToolRef, Z as ZodLikeObject, Q as defineLocalA2A, U as defineLocalMcp, V as defineLocalTool, W as isLocalA2ATool, X as isLocalMcpServer, Y as isLocalTool, _ as mantyxA2A, $ as mantyxMcp, a0 as mantyxPluginTool, a1 as mantyxTool, a2 as parseRunOutput } from './client-BB6cjfsz.js';
2
2
  import { z } from 'zod';
3
3
 
4
4
  /**
@@ -26,10 +26,55 @@ declare class MantyxToolError extends MantyxError {
26
26
  readonly toolName: string;
27
27
  constructor(toolName: string, message: string);
28
28
  }
29
+ /**
30
+ * Optional triage attributes the runner attaches to terminal `error`
31
+ * events. Mirrors the wire fields described in
32
+ * `docs/agent-runs-protocol.md` §7 ("error event payload fields") so SDK
33
+ * callers can render structured UI status notes ("model truncated — JSON
34
+ * likely incomplete") and drive retry policy without re-parsing the
35
+ * human-readable `message`.
36
+ */
37
+ interface MantyxRunErrorInit {
38
+ /**
39
+ * Canonical category of failure. One of `"rate_limit"`, `"overloaded"`,
40
+ * `"server"`, `"context_window"`, `"truncation"`, `"invalid_request"`,
41
+ * `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`,
42
+ * `"unknown"`. New categories may land additively — callers should
43
+ * default-branch to `"unknown"` for unrecognized values.
44
+ */
45
+ errorClass?: string;
46
+ /**
47
+ * Canonical lowercase stop reason normalized across providers
48
+ * (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When
49
+ * present, mirrors the value carried on the last `assistant_message`
50
+ * event preceding the failure.
51
+ */
52
+ finishReason?: string | null;
53
+ /**
54
+ * **Best-effort raw bytes** the model emitted before the failure. For
55
+ * `outputSchema` runs this is likely **incomplete JSON** that will
56
+ * fail `JSON.parse` — treat it as diagnostic data, never as a
57
+ * schema-conformant reply.
58
+ */
59
+ partialText?: string;
60
+ /**
61
+ * Coarse retry hint inherited from the pipeline's error classifier.
62
+ * Informational; the SDK still owns the actual retry decision.
63
+ */
64
+ retryable?: boolean;
65
+ }
29
66
  declare class MantyxRunError extends MantyxError {
30
67
  readonly runId: string;
31
68
  readonly subtype: string;
32
- constructor(runId: string, subtype: string, message: string);
69
+ /** See {@link MantyxRunErrorInit.errorClass}. */
70
+ readonly errorClass: string | undefined;
71
+ /** See {@link MantyxRunErrorInit.finishReason}. */
72
+ readonly finishReason: string | null | undefined;
73
+ /** See {@link MantyxRunErrorInit.partialText}. */
74
+ readonly partialText: string | undefined;
75
+ /** See {@link MantyxRunErrorInit.retryable}. */
76
+ readonly retryable: boolean | undefined;
77
+ constructor(runId: string, subtype: string, message: string, init?: MantyxRunErrorInit);
33
78
  }
34
79
  /**
35
80
  * Thrown by {@link parseRunOutput} when the run's terminal text was supposed
@@ -97,6 +142,6 @@ declare function readSseStream(body: ReadableStream<Uint8Array> | null, opts?: S
97
142
  /**
98
143
  * Release version — synced from repo root VERSION (`npm run sync-version`).
99
144
  */
100
- declare const SDK_VERSION = "0.8.0";
145
+ declare const SDK_VERSION = "0.9.1";
101
146
 
102
- export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
147
+ export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, type MantyxRunErrorInit, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
package/dist/index.js CHANGED
@@ -22,10 +22,10 @@ import {
22
22
  readSseStream,
23
23
  toToolParametersWire,
24
24
  zodToJsonSchema
25
- } from "./chunk-T5SXWC6E.js";
25
+ } from "./chunk-AE7ZSLBH.js";
26
26
 
27
27
  // src/version.ts
28
- var SDK_VERSION = "0.8.0";
28
+ var SDK_VERSION = "0.9.1";
29
29
  export {
30
30
  AgentSession,
31
31
  DEFAULT_BASE_URL,
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/version.ts"],"sourcesContent":["/**\n * Release version — synced from repo root VERSION (`npm run sync-version`).\n */\nexport const SDK_VERSION = \"0.8.0\";\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AAGO,IAAM,cAAc;","names":[]}
1
+ {"version":3,"sources":["../src/version.ts"],"sourcesContent":["/**\n * Release version — synced from repo root VERSION (`npm run sync-version`).\n */\nexport const SDK_VERSION = \"0.9.1\";\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AAGO,IAAM,cAAc;","names":[]}
@@ -231,7 +231,16 @@ The agent spec is the body shape used by `POST /agent-runs` and `POST
231
231
  "required": ["city", "temperature_c"]
232
232
  }
233
233
  },
234
- "metadata": { // optional, see §4.6
234
+ "loopDetection": { // optional, see §4.6
235
+ "consecutiveThreshold": 3,
236
+ "hardCutoffThreshold": 6
237
+ },
238
+ "toolBudgets": { // optional, see §4.7
239
+ "recall": { "maxCalls": 4 },
240
+ "hive_consult_ontology": { "maxCalls": 4 },
241
+ "scary_tool": { "maxCalls": 0 }
242
+ },
243
+ "metadata": { // optional, see §4.8
235
244
  "customer": "acme",
236
245
  "env": "prod"
237
246
  }
@@ -581,7 +590,138 @@ control of error handling on malformed-but-rare provider outputs.
581
590
  `AgentSpec` it built for the run. When the field is omitted, runs return
582
591
  unconstrained plain text as before.
583
592
 
584
- ### 4.6 `metadata` (developer-supplied KV for filtering)
593
+ ### 4.6 `loopDetection` (steering nudge + hard cutoff)
594
+
595
+ `loopDetection` is the wire-protocol projection of the SDK's
596
+ `RunAgentOptions.loopDetection`. The pipeline tracks a canonical
597
+ order-invariant `(toolName, args)` signature for every assistant turn that
598
+ makes one or more tool calls; when the same signature repeats consecutively,
599
+ the guard fires.
600
+
601
+ - **`consecutiveThreshold` rounds in a row** (default `3`) — the pipeline
602
+ skips the duplicate batch with a synthetic "you've made this exact call
603
+ before" tool result and prepends a user-style **steering nudge**
604
+ ("either deliver a final answer or change strategy"). The model gets the
605
+ nudge before its next turn and either finalises or pivots.
606
+ - **`hardCutoffThreshold` rounds in a row** (default `6`) — the pipeline
607
+ forces a tools-disabled finalise turn (`maxToolTurnsExceeded: "finalize"`
608
+ semantics) so the run lands cleanly instead of churning forever.
609
+
610
+ ```jsonc
611
+ "loopDetection": {
612
+ "consecutiveThreshold": 3, // optional, default 3 — fires the steering nudge
613
+ "hardCutoffThreshold": 6 // optional, default 6 — forces finalisation
614
+ }
615
+ ```
616
+
617
+ The wire shape also accepts the literal `false`:
618
+
619
+ ```jsonc
620
+ "loopDetection": false // explicitly disable the guard for this run
621
+ ```
622
+
623
+ | Field | Type | Required | Notes |
624
+ | ---------------------- | --------------- | -------- | ----- |
625
+ | `consecutiveThreshold` | integer ≥ 2 | no | Defaults to **3** when the field is omitted. Must be `>= 2` (one identical batch is just a single tool call, not a loop). |
626
+ | `hardCutoffThreshold` | integer ≥ 3 | no | Defaults to **6** when the field is omitted. Must be `> consecutiveThreshold`; otherwise the soft nudge would never get a chance to land. |
627
+ | (top-level `false`) | literal `false` | no | Disables the guard entirely for this run. The pipeline still enforces `budgets.maxToolTurns`. |
628
+
629
+ Validation (server-side, `400 invalid_request` on violation):
630
+
631
+ | Constraint | Limit |
632
+ | -------------------------------------------------- | ----- |
633
+ | `consecutiveThreshold` / `hardCutoffThreshold` upper bound | `100` |
634
+ | `hardCutoffThreshold` strictly greater than `consecutiveThreshold` | enforced |
635
+
636
+ **Defaults.** When `loopDetection` is omitted entirely, MANTYX applies the
637
+ runtime defaults from `runtime/default-run-guards.ts`:
638
+ `{ consecutiveThreshold: 3, hardCutoffThreshold: 6 }`. This is the same
639
+ configuration used by every in-process runner (chat, schedule, inbound) so
640
+ SDK-driven runs and platform-driven runs behave identically.
641
+
642
+ **Inheritance for sessions.**
643
+
644
+ - `POST /agent-sessions { loopDetection }` — sets the session-default,
645
+ applied to every subsequent message run.
646
+ - `POST /agent-sessions/:id/messages { loopDetection }` — optional
647
+ per-message override; applies to that one run only and does not mutate
648
+ the session's stored value.
649
+
650
+ **Observability.** Each intervention emits a SSE `loop_detected` event
651
+ (see §7) so SDK clients can render `looping — nudged` / `looping — gave up`
652
+ status notes. The actual mechanism (skip + nudge or forced finalise) is
653
+ fully handled server-side; the SDK only needs to surface the event.
654
+
655
+ ### 4.7 `toolBudgets` (per-tool call caps)
656
+
657
+ `toolBudgets` caps how many times a specific tool may execute over the
658
+ **lifetime of the run** (across every LLM turn). Calls under the cap run
659
+ normally; calls past the cap are **intercepted before execution** and
660
+ returned to the model as a synthetic "budget exceeded — pivot or finalize"
661
+ tool result.
662
+
663
+ ```jsonc
664
+ "toolBudgets": {
665
+ "recall": { "maxCalls": 4 },
666
+ "hive_consult_ontology": { "maxCalls": 4 },
667
+ "traverse": { "maxCalls": 3 },
668
+ "scary_tool": { "maxCalls": 0 } // disables the tool for this run
669
+ }
670
+ ```
671
+
672
+ | Field | Type | Required | Notes |
673
+ | ---------- | ----------- | -------- | ----- |
674
+ | `<key>` | string | yes | Logical tool name as the model sees it (the same name on `ResolvedTool.name`; the SDK + pipeline handle sanitisation). 1–120 characters. |
675
+ | `maxCalls` | integer ≥ 0 | yes | Hard cap on executed calls per run. `0` disables the tool entirely (every attempt returns the synthetic body on the first try). Budgets are **per-tool, not pooled**: `hive_search_deals: { maxCalls: 5 }` and `hive_search_meetings: { maxCalls: 5 }` give the agent five of each, not five between them. |
676
+
677
+ Validation (server-side, `400 invalid_request` on violation):
678
+
679
+ | Constraint | Limit |
680
+ | --------------------- | ----- |
681
+ | Max entries | `32` |
682
+ | `<key>` length | `1..120` chars |
683
+ | `maxCalls` upper bound | `1000` (functionally unlimited; the SDK's `maxToolTurns: 100` fires first) |
684
+
685
+ **Defaults.** When `toolBudgets` is omitted, MANTYX layers the runtime
686
+ defaults from `runtime/default-run-guards.ts` on top of the spec. The
687
+ default research-tool surface is:
688
+
689
+ | Tool | Default `maxCalls` |
690
+ | ------------------------------------------------------------------------------------------------ | ------------------ |
691
+ | `recall` (workspace memory hybrid search) | `4` |
692
+ | `traverse` (memory graph BFS) | `3` |
693
+ | `hive_consult_ontology` (per-hive ontology read; same name across all three hives) | `4` |
694
+ | `hive_search_deals` / `_meetings` / `_companies` / `_people` (Sales Hive general search) | `5` |
695
+ | `hive_search_tickets` / `_conversations` / `_accounts` (Customer Hive general search) | `5` |
696
+ | `hive_search_releases` / `_issues` (Product Hive general search) | `5` |
697
+
698
+ Pass `"toolBudgets": {}` to start from a clean slate (no defaults applied
699
+ on top — useful for runs that intentionally want unbounded research). When
700
+ both the caller and the runtime defaults specify a budget for the same
701
+ tool, **the caller's value wins**.
702
+
703
+ **Inheritance for sessions.**
704
+
705
+ - `POST /agent-sessions { toolBudgets }` — sets the session-default,
706
+ applied to every subsequent message run.
707
+ - `POST /agent-sessions/:id/messages { toolBudgets }` — optional
708
+ per-message override; applies to that one run only and does not mutate
709
+ the session's stored value.
710
+
711
+ **Observability.** Each interception emits a SSE `tool_budget_exceeded`
712
+ event (see §7) so SDK clients can render `memory budget exhausted` /
713
+ `research cap reached` status notes. The synthetic tool-result is emitted
714
+ on the normal `tool_result` channel just like any other server-resolved
715
+ result, so the run timeline stays linear.
716
+
717
+ **Tools NOT capped by default.** `hive_list_*` and `hive_get_*` are
718
+ intentionally not in the default budget map — agents legitimately call
719
+ them once per entity-of-interest, which can easily exceed any small cap
720
+ during normal multi-entity reads. The loop-detection guard catches the
721
+ pathological "same `(name, args)` batch over and over" case for that
722
+ family without needing per-tool caps.
723
+
724
+ ### 4.8 `metadata` (developer-supplied KV for filtering)
585
725
 
586
726
  `metadata` is a flat string→string KV that is **persisted alongside the run /
587
727
  session** and surfaced in the MANTYX dashboard. Use it to tag runs with your
@@ -703,8 +843,21 @@ data: <utf-8 JSON>
703
843
  // Gemini `includeThoughts`, OpenAI `reasoning_content` on reasoning models).
704
844
  { "seq": 2, "type": "thinking_delta", "data": { "text": "First, I should…" } }
705
845
 
706
- // completed assistant message (text + any tool calls about to execute)
707
- { "seq": 3, "type": "assistant_message", "data": { "text": "...", "toolCalls": [...] } }
846
+ // completed assistant message (text + optional tool calls about to execute).
847
+ // `turn` is the 0-based tool-turn index this message closes.
848
+ // `finishReason` is the canonical lowercase stop reason normalized across
849
+ // providers (`"end_turn"`, `"tool_use"`, `"max_tokens"`, `"refusal"`,
850
+ // `"malformed_function_call"`, …); `null` / omitted when the provider did
851
+ // not report one. `toolCalls` is omitted when the model called no tools.
852
+ { "seq": 3, "type": "assistant_message",
853
+ "data": {
854
+ "text": "...",
855
+ "turn": 0,
856
+ "finishReason": "tool_use",
857
+ "toolCalls": [
858
+ { "id": "call_abc", "name": "search", "input": { /* JSON-Schema-matching args */ } }
859
+ ]
860
+ } }
708
861
 
709
862
  // server-side tool call/result (informational; SDK does not act on these)
710
863
  { "seq": 4, "type": "tool_call", "data": { "toolUseId": "...", "name": "...", "input": {...} } }
@@ -721,18 +874,80 @@ data: <utf-8 JSON>
721
874
  // echo of the SDK's POSTed tool-result, persisted for replay
722
875
  { "seq": 7, "type": "local_tool_result_in", "data": { "toolUseId": "tu_x", "output": "127.0.0.1 ..." } }
723
876
 
724
- // terminal event
877
+ // loop-detection guard fired (see §4.6). Soft nudge: hardCutoff=false. Hard cutoff: hardCutoff=true.
878
+ // `tools` is the (toolName, …) batch the model just repeated; the synthetic skip + nudge are
879
+ // emitted on the normal tool_result + assistant_delta channels — this event is observability only.
880
+ { "seq": 7, "type": "loop_detected", "data": { "consecutiveCount": 3, "hardCutoff": false, "tools": ["recall"] } }
881
+
882
+ // per-tool budget exceeded (see §4.7). The pipeline already surfaced the synthetic
883
+ // "budget exceeded — pivot or finalize" body on the normal tool_result channel; this event
884
+ // is observability so SDK clients can render "memory budget exhausted" status notes.
885
+ { "seq": 7, "type": "tool_budget_exceeded", "data": { "tool": "recall", "maxCalls": 4, "callIndex": 5 } }
886
+
887
+ // terminal event — exactly one of `result`, `error`, or `cancelled` lands per run.
725
888
  { "seq": 8, "type": "result", "data": { "subtype": "success", "text": "Final reply" } }
726
889
  { "seq": 8, "type": "result", "data": { "subtype": "error_local_tool_timeout", "error": "..." } }
890
+ { "seq": 8, "type": "error", "data": {
891
+ "error": "Model output was truncated (stop_reason=max_tokens). …",
892
+ "code": "truncation",
893
+ "errorClass": "truncation",
894
+ "finishReason": "max_tokens",
895
+ "partialText": "{\n \"answer\":… (truncated JSON) …",
896
+ "retryable": false
897
+ } }
727
898
  { "seq": 8, "type": "cancelled", "data": {} }
728
899
  ```
729
900
 
730
- A run terminates with exactly one of `result` or `cancelled`. The connection
731
- is closed by the server immediately after sending the terminal event. Clients
732
- should not assume any particular ordering between the human-readable `event:`
733
- field and the parsed `type` inside `data` — they are always equal, but
734
- implementations should rely on `data.type` because some HTTP middleware
735
- strips the `event:` line.
901
+ A run terminates with exactly one of `result`, `error`, or `cancelled`. The
902
+ connection is closed by the server immediately after sending the terminal
903
+ event. Clients should not assume any particular ordering between the
904
+ human-readable `event:` field and the parsed `type` inside `data` — they
905
+ are always equal, but implementations should rely on `data.type` because
906
+ some HTTP middleware strips the `event:` line.
907
+
908
+ **`error` event payload fields.** The runner enriches the `error` event
909
+ with structured triage attributes when the failure carried a salvage
910
+ path (typically truncation, upstream deadline, or max-budget-with-text):
911
+
912
+ | Field | Type | Required | Notes |
913
+ | -------------- | -------- | -------- | ----- |
914
+ | `error` | string | yes | Human-readable message (also persisted on the run row's `error` column). |
915
+ | `code` | string | yes | Legacy alias for `errorClass`. Equals `errorClass` when present; otherwise a small lowercase token (`"error"`, `"invalid_spec"`, `"worker_error"`, …) the SDK can switch on. |
916
+ | `errorClass` | string | no | Canonical category. One of `"rate_limit"`, `"overloaded"`, `"server"`, `"context_window"` (input too big), `"truncation"` (output budget exhausted), `"invalid_request"`, `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`, `"unknown"`. New categories may land additively. |
917
+ | `finishReason` | string \| null | no | Canonical lowercase stop reason normalized across providers (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When present, mirrors the value on the last `assistant_message`. |
918
+ | `partialText` | string | no | **Best-effort raw bytes** the model emitted before the failure. For `outputSchema` runs this is likely **incomplete JSON** that will fail `JSON.parse` — see §4.5 / `docs/wire-protocol.md` §7. Also persisted on the run row's `finalText` column so the Calls UI can render it alongside a truncation banner. |
919
+ | `retryable` | boolean | no | Coarse retry hint inherited from the pipeline's error classifier. Informational; the SDK still owns the actual retry decision. |
920
+
921
+ **Truncation contract.** When the model is mid-output and Gemini /
922
+ Anthropic / OpenAI hit the output budget, MANTYX does **not** discard
923
+ the bytes that already streamed. Instead:
924
+
925
+ 1. The last `assistant_message` for the turn carries the partial text
926
+ plus `finishReason: "max_tokens"`.
927
+ 2. The terminal SSE event is an `error` (not `result`) with
928
+ `errorClass: "truncation"` and `data.partialText` set to the same
929
+ bytes.
930
+ 3. The run row exposed by `GET /agent-runs/:runId` has
931
+ `{ status: "failed", finalText: "<partial text>",
932
+ error: "Model output was truncated …", failureReason: { errorClass:
933
+ "truncation", finishReason: "max_tokens" } }`.
934
+
935
+ `partialText` is a **best-effort raw byte sequence** — for `outputSchema`
936
+ runs it will almost always fail `JSON.parse` because the JSON object was
937
+ not closed. SDKs should treat it as diagnostic data, never as a
938
+ schema-conformant reply. Surfacing it (as a "truncated reply — JSON
939
+ likely incomplete" status note) is the recommended pattern; silently
940
+ falling back to it as the answer is not.
941
+
942
+ **Run snapshot fields.** `GET /agent-runs/:runId` returns the run row
943
+ with these triage-relevant columns:
944
+
945
+ | Field | Notes |
946
+ | --------------- | ----- |
947
+ | `status` | `"queued" \| "running" \| "succeeded" \| "failed" \| "cancelled"`. |
948
+ | `finalText` | Final assistant text on success; same string as terminal `data.partialText` when `failureReason.errorClass === "truncation"`. Otherwise `null`. |
949
+ | `error` | Human-readable error message (matches terminal `error.data.error`). `null` on success / cancellation. |
950
+ | `failureReason` | JSON object `{ errorClass, finishReason }` on `status === "failed"` runs that carried a salvage payload. Future-proof for additional triage fields. `null` otherwise. |
736
951
 
737
952
  ## 8. Local tool result
738
953
 
@@ -788,6 +1003,32 @@ Common codes:
788
1003
  | `run_terminal` | 409 | Tool-result after run finished |
789
1004
  | `rate_limited` | 429 | Per-API-key sliding window |
790
1005
 
1006
+ **Run-level error categories.** When a run terminates via the SSE `error`
1007
+ event (§7), the payload carries an `errorClass` triage category in
1008
+ addition to the human-readable `error` message. SDKs typically expose
1009
+ this as a typed field on their run-error type (TS `MantyxRunError.errorClass`,
1010
+ Python `MantyxRunError.error_class`, Go `RunError.ErrorClass`). The
1011
+ canonical set:
1012
+
1013
+ | `errorClass` | Typical cause | Has `partialText`? |
1014
+ | ------------------- | ------------- | ------------------ |
1015
+ | `rate_limit` | Provider rate-limited the request (HTTP 429-equivalent). | No |
1016
+ | `overloaded` | Provider returned a transient "overloaded" / 5xx. | No |
1017
+ | `server` | Generic upstream provider error. | No |
1018
+ | `context_window` | Input exceeded the model's context window. | No |
1019
+ | `truncation` | Output budget exhausted mid-reply (`finishReason: "max_tokens"`). | **Yes** |
1020
+ | `invalid_request` | Provider rejected the spec / params. | No |
1021
+ | `auth` | BYOK credentials invalid for this run. | No |
1022
+ | `timeout` | Generic upstream timeout (provider-side). | No |
1023
+ | `local_timeout` | SDK didn't POST a `tool-result` within `localToolTimeoutMs`. | No |
1024
+ | `upstream_deadline` | MANTYX worker deadline exceeded waiting on the provider. | Sometimes |
1025
+ | `unknown` | Anything else — fallback so SDKs always have a category. | No |
1026
+
1027
+ The category set is **additive over the wire**: new categories may
1028
+ appear without bumping the protocol version, so SDKs should default to
1029
+ `unknown` (or simply pass the raw string through to callers) for
1030
+ unrecognized values rather than crashing.
1031
+
791
1032
  ## 11. Suggested client architecture
792
1033
 
793
1034
  A reference SDK should:
@@ -831,7 +1072,25 @@ A reference SDK should:
831
1072
  - Treat `thinking_delta` events as opt-in callback fodder; many UIs hide
832
1073
  them by default. Their presence depends on `reasoningLevel > 0` and
833
1074
  on the active model exposing thought parts.
834
- - On terminal `result`, resolve the call. On `error` subtype, throw.
1075
+ - Accept `loopDetection` and `toolBudgets` from the caller and pass
1076
+ them through unchanged (see §4.6 / §4.7). Both fields are *additive*:
1077
+ omitting them keeps MANTYX's runtime defaults; passing
1078
+ `loopDetection: false` opts out; passing `toolBudgets: {}` clears the
1079
+ defaults; passing entries layers caller overrides on top of the
1080
+ defaults.
1081
+ - Treat `loop_detected` and `tool_budget_exceeded` SSE events as
1082
+ observability-only — the server already substituted the synthetic
1083
+ tool-results / steering nudges, so the SDK's job is just to surface
1084
+ the event to the caller (status banner, log line, telemetry). Do
1085
+ **not** abort the run on these events; the run continues through
1086
+ `result` / `error` / `cancelled` as usual.
1087
+ - On terminal `result` with `subtype === "success"`, resolve the call
1088
+ with the final `text`. On a terminal `error` event, raise a typed
1089
+ run-error that carries the new triage attributes (`errorClass`,
1090
+ `finishReason`, `partialText`, `retryable`) so callers can render
1091
+ "truncated reply — JSON likely incomplete" banners and short-circuit
1092
+ retry policies. Treat `partialText` as **diagnostic** data — never
1093
+ auto-fall-back to it as the final answer.
835
1094
  4. Re-emit assistant deltas/events as a stream/iterator for callers who care
836
1095
  about live output.
837
1096
  5. Treat the protocol as the contract. Implementation details such as Valkey
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mantyx/sdk",
3
- "version": "0.8.0",
3
+ "version": "0.9.1",
4
4
  "description": "MANTYX as a hosted agent runtime: define ephemeral agents, mix server-side MANTYX tools with locally-executed tools, run them remotely.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",