npm - @mantyx/sdk - Versions diffs - 0.8.0 → 0.9.1 - Mend

@mantyx/sdk 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +15 -1
package/dist/a2a-server.cjs +13 -1
package/dist/a2a-server.cjs.map +1 -1
package/dist/a2a-server.d.cts +1 -1
package/dist/a2a-server.d.ts +1 -1
package/dist/a2a-server.js +1 -1
package/dist/{chunk-T5SXWC6E.js → chunk-AE7ZSLBH.js} +120 -3
package/dist/chunk-AE7ZSLBH.js.map +1 -0
package/dist/{client-B3NEFlIU.d.cts → client-BB6cjfsz.d.cts} +194 -2
package/dist/{client-B3NEFlIU.d.ts → client-BB6cjfsz.d.ts} +194 -2
package/dist/index.cjs +120 -3
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +49 -4
package/dist/index.d.ts +49 -4
package/dist/index.js +2 -2
package/dist/index.js.map +1 -1
package/docs/agent-runs-protocol.md +271 -12
package/package.json +1 -1
package/dist/chunk-T5SXWC6E.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as MantyxA2AOptions, M as MantyxClient, p as MantyxClientOptions, q as MantyxMcpOptions, r as MantyxPluginToolRef, s as MantyxToolRef, t as McpToolRef, u as ModelCatalog, v as ModelInfo, O as OutputSchema, R as ReasoningLevel, w as ResultEvent, x as RunEvent, y as RunEventBase, z as RunResult, B as RunSpec, S as ServerToolResultEvent, F as SessionInfo, G as SessionSpec, H as ThinkingDeltaEvent, T as ToolRef, Z as ZodLikeObject, I as defineLocalA2A, J as defineLocalMcp, K as defineLocalTool, N as isLocalA2ATool, P as isLocalMcpServer, Q as isLocalTool, U as mantyxA2A, V as mantyxMcp, W as mantyxPluginTool, X as mantyxTool, Y as parseRunOutput } from './client-B3NEFlIU.cjs';
+export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as LoopDetectedEvent, p as LoopDetection, q as MantyxA2AOptions, M as MantyxClient, r as MantyxClientOptions, s as MantyxMcpOptions, t as MantyxPluginToolRef, u as MantyxToolRef, v as McpToolRef, w as ModelCatalog, x as ModelInfo, O as OutputSchema, R as ReasoningLevel, y as ResultEvent, z as RunEvent, B as RunEventBase, F as RunResult, G as RunSpec, S as ServerToolResultEvent, H as SessionInfo, I as SessionSpec, J as ThinkingDeltaEvent, K as ToolBudget, N as ToolBudgetExceededEvent, P as ToolBudgets, T as ToolRef, Z as ZodLikeObject, Q as defineLocalA2A, U as defineLocalMcp, V as defineLocalTool, W as isLocalA2ATool, X as isLocalMcpServer, Y as isLocalTool, _ as mantyxA2A, $ as mantyxMcp, a0 as mantyxPluginTool, a1 as mantyxTool, a2 as parseRunOutput } from './client-BB6cjfsz.cjs';
 import { z } from 'zod';
 /**
@@ -26,10 +26,55 @@ declare class MantyxToolError extends MantyxError {
     readonly toolName: string;
     constructor(toolName: string, message: string);
 }
+/**
+ * Optional triage attributes the runner attaches to terminal `error`
+ * events. Mirrors the wire fields described in
+ * `docs/agent-runs-protocol.md` §7 ("error event payload fields") so SDK
+ * callers can render structured UI status notes ("model truncated — JSON
+ * likely incomplete") and drive retry policy without re-parsing the
+ * human-readable `message`.
+ */
+interface MantyxRunErrorInit {
+    /**
+     * Canonical category of failure. One of `"rate_limit"`, `"overloaded"`,
+     * `"server"`, `"context_window"`, `"truncation"`, `"invalid_request"`,
+     * `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`,
+     * `"unknown"`. New categories may land additively — callers should
+     * default-branch to `"unknown"` for unrecognized values.
+     */
+    errorClass?: string;
+    /**
+     * Canonical lowercase stop reason normalized across providers
+     * (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When
+     * present, mirrors the value carried on the last `assistant_message`
+     * event preceding the failure.
+     */
+    finishReason?: string | null;
+    /**
+     * **Best-effort raw bytes** the model emitted before the failure. For
+     * `outputSchema` runs this is likely **incomplete JSON** that will
+     * fail `JSON.parse` — treat it as diagnostic data, never as a
+     * schema-conformant reply.
+     */
+    partialText?: string;
+    /**
+     * Coarse retry hint inherited from the pipeline's error classifier.
+     * Informational; the SDK still owns the actual retry decision.
+     */
+    retryable?: boolean;
+}
 declare class MantyxRunError extends MantyxError {
     readonly runId: string;
     readonly subtype: string;
-    constructor(runId: string, subtype: string, message: string);
+    /** See {@link MantyxRunErrorInit.errorClass}. */
+    readonly errorClass: string | undefined;
+    /** See {@link MantyxRunErrorInit.finishReason}. */
+    readonly finishReason: string | null | undefined;
+    /** See {@link MantyxRunErrorInit.partialText}. */
+    readonly partialText: string | undefined;
+    /** See {@link MantyxRunErrorInit.retryable}. */
+    readonly retryable: boolean | undefined;
+    constructor(runId: string, subtype: string, message: string, init?: MantyxRunErrorInit);
 }
 /**
  * Thrown by {@link parseRunOutput} when the run's terminal text was supposed
@@ -97,6 +142,6 @@ declare function readSseStream(body: ReadableStream<Uint8Array> | null, opts?: S
 /**
  * Release version — synced from repo root VERSION (`npm run sync-version`).
  */
-declare const SDK_VERSION = "0.8.0";
+declare const SDK_VERSION = "0.9.1";
-export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
+export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, type MantyxRunErrorInit, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as MantyxA2AOptions, M as MantyxClient, p as MantyxClientOptions, q as MantyxMcpOptions, r as MantyxPluginToolRef, s as MantyxToolRef, t as McpToolRef, u as ModelCatalog, v as ModelInfo, O as OutputSchema, R as ReasoningLevel, w as ResultEvent, x as RunEvent, y as RunEventBase, z as RunResult, B as RunSpec, S as ServerToolResultEvent, F as SessionInfo, G as SessionSpec, H as ThinkingDeltaEvent, T as ToolRef, Z as ZodLikeObject, I as defineLocalA2A, J as defineLocalMcp, K as defineLocalTool, N as isLocalA2ATool, P as isLocalMcpServer, Q as isLocalTool, U as mantyxA2A, V as mantyxMcp, W as mantyxPluginTool, X as mantyxTool, Y as parseRunOutput } from './client-B3NEFlIU.js';
+export { A as A2AToolRef, a as AgentSession, b as AgentSpecBase, c as AssistantDeltaEvent, d as AssistantMessageEvent, C as CancelledEvent, D as DEFAULT_BASE_URL, e as DefineLocalA2AOptions, f as DefineLocalMcpOptions, g as DefineLocalToolOptions, E as ErrorEvent, L as LocalA2ATool, h as LocalHandlers, i as LocalMcpHttpTransport, j as LocalMcpServer, k as LocalMcpStdioTransport, l as LocalTool, m as LocalToolCallEvent, n as LocalToolResultInEvent, o as LoopDetectedEvent, p as LoopDetection, q as MantyxA2AOptions, M as MantyxClient, r as MantyxClientOptions, s as MantyxMcpOptions, t as MantyxPluginToolRef, u as MantyxToolRef, v as McpToolRef, w as ModelCatalog, x as ModelInfo, O as OutputSchema, R as ReasoningLevel, y as ResultEvent, z as RunEvent, B as RunEventBase, F as RunResult, G as RunSpec, S as ServerToolResultEvent, H as SessionInfo, I as SessionSpec, J as ThinkingDeltaEvent, K as ToolBudget, N as ToolBudgetExceededEvent, P as ToolBudgets, T as ToolRef, Z as ZodLikeObject, Q as defineLocalA2A, U as defineLocalMcp, V as defineLocalTool, W as isLocalA2ATool, X as isLocalMcpServer, Y as isLocalTool, _ as mantyxA2A, $ as mantyxMcp, a0 as mantyxPluginTool, a1 as mantyxTool, a2 as parseRunOutput } from './client-BB6cjfsz.js';
 import { z } from 'zod';
 /**
@@ -26,10 +26,55 @@ declare class MantyxToolError extends MantyxError {
     readonly toolName: string;
     constructor(toolName: string, message: string);
 }
+/**
+ * Optional triage attributes the runner attaches to terminal `error`
+ * events. Mirrors the wire fields described in
+ * `docs/agent-runs-protocol.md` §7 ("error event payload fields") so SDK
+ * callers can render structured UI status notes ("model truncated — JSON
+ * likely incomplete") and drive retry policy without re-parsing the
+ * human-readable `message`.
+ */
+interface MantyxRunErrorInit {
+    /**
+     * Canonical category of failure. One of `"rate_limit"`, `"overloaded"`,
+     * `"server"`, `"context_window"`, `"truncation"`, `"invalid_request"`,
+     * `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`,
+     * `"unknown"`. New categories may land additively — callers should
+     * default-branch to `"unknown"` for unrecognized values.
+     */
+    errorClass?: string;
+    /**
+     * Canonical lowercase stop reason normalized across providers
+     * (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When
+     * present, mirrors the value carried on the last `assistant_message`
+     * event preceding the failure.
+     */
+    finishReason?: string | null;
+    /**
+     * **Best-effort raw bytes** the model emitted before the failure. For
+     * `outputSchema` runs this is likely **incomplete JSON** that will
+     * fail `JSON.parse` — treat it as diagnostic data, never as a
+     * schema-conformant reply.
+     */
+    partialText?: string;
+    /**
+     * Coarse retry hint inherited from the pipeline's error classifier.
+     * Informational; the SDK still owns the actual retry decision.
+     */
+    retryable?: boolean;
+}
 declare class MantyxRunError extends MantyxError {
     readonly runId: string;
     readonly subtype: string;
-    constructor(runId: string, subtype: string, message: string);
+    /** See {@link MantyxRunErrorInit.errorClass}. */
+    readonly errorClass: string | undefined;
+    /** See {@link MantyxRunErrorInit.finishReason}. */
+    readonly finishReason: string | null | undefined;
+    /** See {@link MantyxRunErrorInit.partialText}. */
+    readonly partialText: string | undefined;
+    /** See {@link MantyxRunErrorInit.retryable}. */
+    readonly retryable: boolean | undefined;
+    constructor(runId: string, subtype: string, message: string, init?: MantyxRunErrorInit);
 }
 /**
  * Thrown by {@link parseRunOutput} when the run's terminal text was supposed
@@ -97,6 +142,6 @@ declare function readSseStream(body: ReadableStream<Uint8Array> | null, opts?: S
 /**
  * Release version — synced from repo root VERSION (`npm run sync-version`).
  */
-declare const SDK_VERSION = "0.8.0";
+declare const SDK_VERSION = "0.9.1";
-export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };
+export { MantyxAuthError, MantyxError, MantyxNetworkError, MantyxParseError, MantyxRunError, type MantyxRunErrorInit, MantyxToolError, SDK_VERSION, type SseEvent, type SseStreamOptions, readSseStream, toToolParametersWire, zodToJsonSchema };

package/dist/index.js CHANGED Viewed

@@ -22,10 +22,10 @@ import {
   readSseStream,
   toToolParametersWire,
   zodToJsonSchema
-} from "./chunk-T5SXWC6E.js";
+} from "./chunk-AE7ZSLBH.js";
 // src/version.ts
-var SDK_VERSION = "0.8.0";
+var SDK_VERSION = "0.9.1";
 export {
   AgentSession,
   DEFAULT_BASE_URL,

package/dist/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/version.ts"],"sourcesContent":["/*\n Release version — synced from repo root VERSION (`npm run sync-version`).\n */\nexport const SDK_VERSION = \"0.8.0\";\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AAGO,IAAM,cAAc;","names":[]}
1	+ {"version":3,"sources":["../src/version.ts"],"sourcesContent":["/*\n Release version — synced from repo root VERSION (`npm run sync-version`).\n */\nexport const SDK_VERSION = \"0.9.1\";\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AAGO,IAAM,cAAc;","names":[]}

package/docs/agent-runs-protocol.md CHANGED Viewed

@@ -231,7 +231,16 @@ The agent spec is the body shape used by `POST /agent-runs` and `POST
       "required": ["city", "temperature_c"]
     }
   },
-  "metadata": {                         // optional, see §4.6
+  "loopDetection": {                    // optional, see §4.6
+    "consecutiveThreshold": 3,
+    "hardCutoffThreshold": 6
+  },
+  "toolBudgets": {                      // optional, see §4.7
+    "recall":                { "maxCalls": 4 },
+    "hive_consult_ontology": { "maxCalls": 4 },
+    "scary_tool":            { "maxCalls": 0 }
+  },
+  "metadata": {                         // optional, see §4.8
     "customer": "acme",
     "env": "prod"
   }
@@ -581,7 +590,138 @@ control of error handling on malformed-but-rare provider outputs.
 `AgentSpec` it built for the run. When the field is omitted, runs return
 unconstrained plain text as before.
-### 4.6 `metadata` (developer-supplied KV for filtering)
+### 4.6 `loopDetection` (steering nudge + hard cutoff)
+`loopDetection` is the wire-protocol projection of the SDK's
+`RunAgentOptions.loopDetection`. The pipeline tracks a canonical
+order-invariant `(toolName, args)` signature for every assistant turn that
+makes one or more tool calls; when the same signature repeats consecutively,
+the guard fires.
+- **`consecutiveThreshold` rounds in a row** (default `3`) — the pipeline
+  skips the duplicate batch with a synthetic "you've made this exact call
+  before" tool result and prepends a user-style **steering nudge**
+  ("either deliver a final answer or change strategy"). The model gets the
+  nudge before its next turn and either finalises or pivots.
+- **`hardCutoffThreshold` rounds in a row** (default `6`) — the pipeline
+  forces a tools-disabled finalise turn (`maxToolTurnsExceeded: "finalize"`
+  semantics) so the run lands cleanly instead of churning forever.
+```jsonc
+"loopDetection": {
+  "consecutiveThreshold": 3,        // optional, default 3 — fires the steering nudge
+  "hardCutoffThreshold":  6         // optional, default 6 — forces finalisation
+}
+```
+The wire shape also accepts the literal `false`:
+```jsonc
+"loopDetection": false              // explicitly disable the guard for this run
+```
+| Field                  | Type            | Required | Notes |
+| ---------------------- | --------------- | -------- | ----- |
+| `consecutiveThreshold` | integer ≥ 2     | no       | Defaults to **3** when the field is omitted. Must be `>= 2` (one identical batch is just a single tool call, not a loop). |
+| `hardCutoffThreshold`  | integer ≥ 3     | no       | Defaults to **6** when the field is omitted. Must be `> consecutiveThreshold`; otherwise the soft nudge would never get a chance to land. |
+| (top-level `false`)    | literal `false` | no       | Disables the guard entirely for this run. The pipeline still enforces `budgets.maxToolTurns`. |
+Validation (server-side, `400 invalid_request` on violation):
+| Constraint                                         | Limit |
+| -------------------------------------------------- | ----- |
+| `consecutiveThreshold` / `hardCutoffThreshold` upper bound | `100` |
+| `hardCutoffThreshold` strictly greater than `consecutiveThreshold` | enforced |
+**Defaults.** When `loopDetection` is omitted entirely, MANTYX applies the
+runtime defaults from `runtime/default-run-guards.ts`:
+`{ consecutiveThreshold: 3, hardCutoffThreshold: 6 }`. This is the same
+configuration used by every in-process runner (chat, schedule, inbound) so
+SDK-driven runs and platform-driven runs behave identically.
+**Inheritance for sessions.**
+- `POST /agent-sessions { loopDetection }` — sets the session-default,
+  applied to every subsequent message run.
+- `POST /agent-sessions/:id/messages { loopDetection }` — optional
+  per-message override; applies to that one run only and does not mutate
+  the session's stored value.
+**Observability.** Each intervention emits a SSE `loop_detected` event
+(see §7) so SDK clients can render `looping — nudged` / `looping — gave up`
+status notes. The actual mechanism (skip + nudge or forced finalise) is
+fully handled server-side; the SDK only needs to surface the event.
+### 4.7 `toolBudgets` (per-tool call caps)
+`toolBudgets` caps how many times a specific tool may execute over the
+**lifetime of the run** (across every LLM turn). Calls under the cap run
+normally; calls past the cap are **intercepted before execution** and
+returned to the model as a synthetic "budget exceeded — pivot or finalize"
+tool result.
+```jsonc
+"toolBudgets": {
+  "recall":                { "maxCalls": 4 },
+  "hive_consult_ontology": { "maxCalls": 4 },
+  "traverse":              { "maxCalls": 3 },
+  "scary_tool":            { "maxCalls": 0 }   // disables the tool for this run
+}
+```
+| Field      | Type        | Required | Notes |
+| ---------- | ----------- | -------- | ----- |
+| `<key>`    | string      | yes      | Logical tool name as the model sees it (the same name on `ResolvedTool.name`; the SDK + pipeline handle sanitisation). 1–120 characters. |
+| `maxCalls` | integer ≥ 0 | yes      | Hard cap on executed calls per run. `0` disables the tool entirely (every attempt returns the synthetic body on the first try). Budgets are **per-tool, not pooled**: `hive_search_deals: { maxCalls: 5 }` and `hive_search_meetings: { maxCalls: 5 }` give the agent five of each, not five between them. |
+Validation (server-side, `400 invalid_request` on violation):
+| Constraint            | Limit |
+| --------------------- | ----- |
+| Max entries           | `32` |
+| `<key>` length        | `1..120` chars |
+| `maxCalls` upper bound | `1000` (functionally unlimited; the SDK's `maxToolTurns: 100` fires first) |
+**Defaults.** When `toolBudgets` is omitted, MANTYX layers the runtime
+defaults from `runtime/default-run-guards.ts` on top of the spec. The
+default research-tool surface is:
+| Tool                                                                                             | Default `maxCalls` |
+| ------------------------------------------------------------------------------------------------ | ------------------ |
+| `recall` (workspace memory hybrid search)                                                        | `4` |
+| `traverse` (memory graph BFS)                                                                    | `3` |
+| `hive_consult_ontology` (per-hive ontology read; same name across all three hives)               | `4` |
+| `hive_search_deals` / `_meetings` / `_companies` / `_people` (Sales Hive general search)         | `5` |
+| `hive_search_tickets` / `_conversations` / `_accounts` (Customer Hive general search)            | `5` |
+| `hive_search_releases` / `_issues` (Product Hive general search)                                 | `5` |
+Pass `"toolBudgets": {}` to start from a clean slate (no defaults applied
+on top — useful for runs that intentionally want unbounded research). When
+both the caller and the runtime defaults specify a budget for the same
+tool, **the caller's value wins**.
+**Inheritance for sessions.**
+- `POST /agent-sessions { toolBudgets }` — sets the session-default,
+  applied to every subsequent message run.
+- `POST /agent-sessions/:id/messages { toolBudgets }` — optional
+  per-message override; applies to that one run only and does not mutate
+  the session's stored value.
+**Observability.** Each interception emits a SSE `tool_budget_exceeded`
+event (see §7) so SDK clients can render `memory budget exhausted` /
+`research cap reached` status notes. The synthetic tool-result is emitted
+on the normal `tool_result` channel just like any other server-resolved
+result, so the run timeline stays linear.
+**Tools NOT capped by default.** `hive_list_*` and `hive_get_*` are
+intentionally not in the default budget map — agents legitimately call
+them once per entity-of-interest, which can easily exceed any small cap
+during normal multi-entity reads. The loop-detection guard catches the
+pathological "same `(name, args)` batch over and over" case for that
+family without needing per-tool caps.
+### 4.8 `metadata` (developer-supplied KV for filtering)
 `metadata` is a flat string→string KV that is **persisted alongside the run /
 session** and surfaced in the MANTYX dashboard. Use it to tag runs with your
@@ -703,8 +843,21 @@ data: <utf-8 JSON>
 // Gemini `includeThoughts`, OpenAI `reasoning_content` on reasoning models).
 { "seq": 2, "type": "thinking_delta", "data": { "text": "First, I should…" } }
-// completed assistant message (text + any tool calls about to execute)
-{ "seq": 3, "type": "assistant_message", "data": { "text": "...", "toolCalls": [...] } }
+// completed assistant message (text + optional tool calls about to execute).
+// `turn` is the 0-based tool-turn index this message closes.
+// `finishReason` is the canonical lowercase stop reason normalized across
+// providers (`"end_turn"`, `"tool_use"`, `"max_tokens"`, `"refusal"`,
+// `"malformed_function_call"`, …); `null` / omitted when the provider did
+// not report one. `toolCalls` is omitted when the model called no tools.
+{ "seq": 3, "type": "assistant_message",
+  "data": {
+    "text": "...",
+    "turn": 0,
+    "finishReason": "tool_use",
+    "toolCalls": [
+      { "id": "call_abc", "name": "search", "input": { /* JSON-Schema-matching args */ } }
+    ]
+  } }
 // server-side tool call/result (informational; SDK does not act on these)
 { "seq": 4, "type": "tool_call",   "data": { "toolUseId": "...", "name": "...", "input": {...} } }
@@ -721,18 +874,80 @@ data: <utf-8 JSON>
 // echo of the SDK's POSTed tool-result, persisted for replay
 { "seq": 7, "type": "local_tool_result_in", "data": { "toolUseId": "tu_x", "output": "127.0.0.1 ..." } }
-// terminal event
+// loop-detection guard fired (see §4.6). Soft nudge: hardCutoff=false. Hard cutoff: hardCutoff=true.
+// `tools` is the (toolName, …) batch the model just repeated; the synthetic skip + nudge are
+// emitted on the normal tool_result + assistant_delta channels — this event is observability only.
+{ "seq": 7, "type": "loop_detected", "data": { "consecutiveCount": 3, "hardCutoff": false, "tools": ["recall"] } }
+// per-tool budget exceeded (see §4.7). The pipeline already surfaced the synthetic
+// "budget exceeded — pivot or finalize" body on the normal tool_result channel; this event
+// is observability so SDK clients can render "memory budget exhausted" status notes.
+{ "seq": 7, "type": "tool_budget_exceeded", "data": { "tool": "recall", "maxCalls": 4, "callIndex": 5 } }
+// terminal event — exactly one of `result`, `error`, or `cancelled` lands per run.
 { "seq": 8, "type": "result",    "data": { "subtype": "success", "text": "Final reply" } }
 { "seq": 8, "type": "result",    "data": { "subtype": "error_local_tool_timeout", "error": "..." } }
+{ "seq": 8, "type": "error",     "data": {
+    "error":        "Model output was truncated (stop_reason=max_tokens). …",
+    "code":         "truncation",
+    "errorClass":   "truncation",
+    "finishReason": "max_tokens",
+    "partialText":  "{\n  \"answer\":… (truncated JSON) …",
+    "retryable":    false
+} }
 { "seq": 8, "type": "cancelled", "data": {} }
 ```
-A run terminates with exactly one of `result` or `cancelled`. The connection
-is closed by the server immediately after sending the terminal event. Clients
-should not assume any particular ordering between the human-readable `event:`
-field and the parsed `type` inside `data` — they are always equal, but
-implementations should rely on `data.type` because some HTTP middleware
-strips the `event:` line.
+A run terminates with exactly one of `result`, `error`, or `cancelled`. The
+connection is closed by the server immediately after sending the terminal
+event. Clients should not assume any particular ordering between the
+human-readable `event:` field and the parsed `type` inside `data` — they
+are always equal, but implementations should rely on `data.type` because
+some HTTP middleware strips the `event:` line.
+**`error` event payload fields.** The runner enriches the `error` event
+with structured triage attributes when the failure carried a salvage
+path (typically truncation, upstream deadline, or max-budget-with-text):
+| Field          | Type     | Required | Notes |
+| -------------- | -------- | -------- | ----- |
+| `error`        | string   | yes      | Human-readable message (also persisted on the run row's `error` column). |
+| `code`         | string   | yes      | Legacy alias for `errorClass`. Equals `errorClass` when present; otherwise a small lowercase token (`"error"`, `"invalid_spec"`, `"worker_error"`, …) the SDK can switch on. |
+| `errorClass`   | string   | no       | Canonical category. One of `"rate_limit"`, `"overloaded"`, `"server"`, `"context_window"` (input too big), `"truncation"` (output budget exhausted), `"invalid_request"`, `"auth"`, `"timeout"`, `"local_timeout"`, `"upstream_deadline"`, `"unknown"`. New categories may land additively. |
+| `finishReason` | string \| null | no | Canonical lowercase stop reason normalized across providers (`"max_tokens"`, `"refusal"`, `"malformed_function_call"`, …). When present, mirrors the value on the last `assistant_message`. |
+| `partialText`  | string   | no       | **Best-effort raw bytes** the model emitted before the failure. For `outputSchema` runs this is likely **incomplete JSON** that will fail `JSON.parse` — see §4.5 / `docs/wire-protocol.md` §7. Also persisted on the run row's `finalText` column so the Calls UI can render it alongside a truncation banner. |
+| `retryable`    | boolean  | no       | Coarse retry hint inherited from the pipeline's error classifier. Informational; the SDK still owns the actual retry decision. |
+**Truncation contract.** When the model is mid-output and Gemini /
+Anthropic / OpenAI hit the output budget, MANTYX does **not** discard
+the bytes that already streamed. Instead:
+1. The last `assistant_message` for the turn carries the partial text
+   plus `finishReason: "max_tokens"`.
+2. The terminal SSE event is an `error` (not `result`) with
+   `errorClass: "truncation"` and `data.partialText` set to the same
+   bytes.
+3. The run row exposed by `GET /agent-runs/:runId` has
+   `{ status: "failed", finalText: "<partial text>",
+   error: "Model output was truncated …", failureReason: { errorClass:
+   "truncation", finishReason: "max_tokens" } }`.
+`partialText` is a **best-effort raw byte sequence** — for `outputSchema`
+runs it will almost always fail `JSON.parse` because the JSON object was
+not closed. SDKs should treat it as diagnostic data, never as a
+schema-conformant reply. Surfacing it (as a "truncated reply — JSON
+likely incomplete" status note) is the recommended pattern; silently
+falling back to it as the answer is not.
+**Run snapshot fields.** `GET /agent-runs/:runId` returns the run row
+with these triage-relevant columns:
+| Field           | Notes |
+| --------------- | ----- |
+| `status`        | `"queued" \| "running" \| "succeeded" \| "failed" \| "cancelled"`. |
+| `finalText`     | Final assistant text on success; same string as terminal `data.partialText` when `failureReason.errorClass === "truncation"`. Otherwise `null`. |
+| `error`         | Human-readable error message (matches terminal `error.data.error`). `null` on success / cancellation. |
+| `failureReason` | JSON object `{ errorClass, finishReason }` on `status === "failed"` runs that carried a salvage payload. Future-proof for additional triage fields. `null` otherwise. |
 ## 8. Local tool result
@@ -788,6 +1003,32 @@ Common codes:
 | `run_terminal`         | 409  | Tool-result after run finished |
 | `rate_limited`         | 429  | Per-API-key sliding window |
+**Run-level error categories.** When a run terminates via the SSE `error`
+event (§7), the payload carries an `errorClass` triage category in
+addition to the human-readable `error` message. SDKs typically expose
+this as a typed field on their run-error type (TS `MantyxRunError.errorClass`,
+Python `MantyxRunError.error_class`, Go `RunError.ErrorClass`). The
+canonical set:
+| `errorClass`        | Typical cause | Has `partialText`? |
+| ------------------- | ------------- | ------------------ |
+| `rate_limit`        | Provider rate-limited the request (HTTP 429-equivalent). | No |
+| `overloaded`        | Provider returned a transient "overloaded" / 5xx. | No |
+| `server`            | Generic upstream provider error. | No |
+| `context_window`    | Input exceeded the model's context window. | No |
+| `truncation`        | Output budget exhausted mid-reply (`finishReason: "max_tokens"`). | **Yes** |
+| `invalid_request`   | Provider rejected the spec / params. | No |
+| `auth`              | BYOK credentials invalid for this run. | No |
+| `timeout`           | Generic upstream timeout (provider-side). | No |
+| `local_timeout`     | SDK didn't POST a `tool-result` within `localToolTimeoutMs`. | No |
+| `upstream_deadline` | MANTYX worker deadline exceeded waiting on the provider. | Sometimes |
+| `unknown`           | Anything else — fallback so SDKs always have a category. | No |
+The category set is **additive over the wire**: new categories may
+appear without bumping the protocol version, so SDKs should default to
+`unknown` (or simply pass the raw string through to callers) for
+unrecognized values rather than crashing.
 ## 11. Suggested client architecture
 A reference SDK should:
@@ -831,7 +1072,25 @@ A reference SDK should:
    - Treat `thinking_delta` events as opt-in callback fodder; many UIs hide
      them by default. Their presence depends on `reasoningLevel > 0` and
      on the active model exposing thought parts.
-   - On terminal `result`, resolve the call. On `error` subtype, throw.
+   - Accept `loopDetection` and `toolBudgets` from the caller and pass
+     them through unchanged (see §4.6 / §4.7). Both fields are *additive*:
+     omitting them keeps MANTYX's runtime defaults; passing
+     `loopDetection: false` opts out; passing `toolBudgets: {}` clears the
+     defaults; passing entries layers caller overrides on top of the
+     defaults.
+   - Treat `loop_detected` and `tool_budget_exceeded` SSE events as
+     observability-only — the server already substituted the synthetic
+     tool-results / steering nudges, so the SDK's job is just to surface
+     the event to the caller (status banner, log line, telemetry). Do
+     **not** abort the run on these events; the run continues through
+     `result` / `error` / `cancelled` as usual.
+   - On terminal `result` with `subtype === "success"`, resolve the call
+     with the final `text`. On a terminal `error` event, raise a typed
+     run-error that carries the new triage attributes (`errorClass`,
+     `finishReason`, `partialText`, `retryable`) so callers can render
+     "truncated reply — JSON likely incomplete" banners and short-circuit
+     retry policies. Treat `partialText` as **diagnostic** data — never
+     auto-fall-back to it as the final answer.
 4. Re-emit assistant deltas/events as a stream/iterator for callers who care
    about live output.
 5. Treat the protocol as the contract. Implementation details such as Valkey

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mantyx/sdk",
-  "version": "0.8.0",
+  "version": "0.9.1",
   "description": "MANTYX as a hosted agent runtime: define ephemeral agents, mix server-side MANTYX tools with locally-executed tools, run them remotely.",
   "type": "module",
   "main": "./dist/index.cjs",