deepseek-harness 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepseek-harness
3
+ Version: 0.2.0
4
+ Summary: Protocol-aware client for DeepSeek V4-Pro / V4-Flash. Survives the 16 documented quirks; ships the cache discount.
5
+ Author: Henry Zhang
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/HenryZ838978/deepseek-harness
8
+ Project-URL: Reports, https://github.com/HenryZ838978/deepseek-harness/tree/main/reports
9
+ Project-URL: Spec, https://github.com/HenryZ838978/deepseek-harness/tree/main/spec
10
+ Keywords: deepseek,llm,openai,agent,harness,mcp
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Topic :: Software Development :: Libraries
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: openai>=1.50.0
18
+ Requires-Dist: httpx>=0.27.0
19
+ Requires-Dist: tiktoken>=0.7.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
23
+ Provides-Extra: dotenv
24
+ Requires-Dist: python-dotenv>=1.0.0; extra == "dotenv"
25
+
26
+ # `deepseek-harness`
27
+
28
+ Protocol-aware Python client for **DeepSeek V4-Pro / V4-Flash**.
29
+ Survives the [16 documented quirks](https://github.com/HenryZ838978/deepseek-harness/blob/main/reports/REPORT_2026-05-09.md); ships the 50× cache discount.
30
+
31
+ ```bash
32
+ pip install deepseek-harness
33
+ ```
34
+
35
+ ```python
36
+ from deepseek_harness import DeepSeekHarness
37
+
38
+ c = DeepSeekHarness(disable_thinking_by_default=True)
39
+ out = c.chat(
40
+ model="deepseek-v4-pro",
41
+ messages=[{"role": "user", "content": "Hello"}],
42
+ max_tokens=4096,
43
+ )
44
+ print(out["message"]["content"])
45
+ print(f"cost: ${out['usage']['estimated_cost_usd']:.6f} · cache hit: {out['usage']['cache_hit_rate']:.0%}")
46
+ ```
47
+
48
+ The harness wraps `openai.OpenAI` and enforces 10 contract rules by default. See the [main repository](https://github.com/HenryZ838978/deepseek-harness) for the full spec, probe corpus, and three other distribution forms (`dsh` CLI, `@deepseek-harness/mcp` server, Anthropic `SKILL.md`).
49
+
50
+ License: MIT.
@@ -0,0 +1,25 @@
1
+ # `deepseek-harness`
2
+
3
+ Protocol-aware Python client for **DeepSeek V4-Pro / V4-Flash**.
4
+ Survives the [16 documented quirks](https://github.com/HenryZ838978/deepseek-harness/blob/main/reports/REPORT_2026-05-09.md); ships the 50× cache discount.
5
+
6
+ ```bash
7
+ pip install deepseek-harness
8
+ ```
9
+
10
+ ```python
11
+ from deepseek_harness import DeepSeekHarness
12
+
13
+ c = DeepSeekHarness(disable_thinking_by_default=True)
14
+ out = c.chat(
15
+ model="deepseek-v4-pro",
16
+ messages=[{"role": "user", "content": "Hello"}],
17
+ max_tokens=4096,
18
+ )
19
+ print(out["message"]["content"])
20
+ print(f"cost: ${out['usage']['estimated_cost_usd']:.6f} · cache hit: {out['usage']['cache_hit_rate']:.0%}")
21
+ ```
22
+
23
+ The harness wraps `openai.OpenAI` and enforces 10 contract rules by default. See the [main repository](https://github.com/HenryZ838978/deepseek-harness) for the full spec, probe corpus, and three other distribution forms (`dsh` CLI, `@deepseek-harness/mcp` server, Anthropic `SKILL.md`).
24
+
25
+ License: MIT.
@@ -0,0 +1,41 @@
1
+ """deepseek-harness · core — protocol-aware client for DeepSeek V4-Pro / V4-Flash.
2
+
3
+ Validated by 16 probes documented in `reports/REPORT_2026-05-09.md`.
4
+
5
+ Public API::
6
+
7
+ from deepseek_harness import DeepSeekHarness, normalize_usage, estimate_cache_hit
8
+ """
9
+
10
+ from .client import DeepSeekHarness
11
+ from .cache import estimate_cache_hit, normalize_usage
12
+ from .reasoning import ReasoningLifecycle
13
+ from .tool_calls import salvage_tool_calls_from_content
14
+ from .exceptions import (
15
+ HarnessError,
16
+ ReasoningContentMissingError,
17
+ ToolCallLeakageError,
18
+ StrictModeCorruptionError,
19
+ StreamShapeError,
20
+ )
21
+
22
+ # Backwards-compatible alias (transitional, kept for one minor release).
23
+ DeepSeekClient = DeepSeekHarness
24
+ DeepSeekKitError = HarnessError
25
+
26
+ __all__ = [
27
+ "DeepSeekHarness",
28
+ "DeepSeekClient",
29
+ "ReasoningLifecycle",
30
+ "salvage_tool_calls_from_content",
31
+ "estimate_cache_hit",
32
+ "normalize_usage",
33
+ "HarnessError",
34
+ "DeepSeekKitError",
35
+ "ReasoningContentMissingError",
36
+ "ToolCallLeakageError",
37
+ "StrictModeCorruptionError",
38
+ "StreamShapeError",
39
+ ]
40
+
41
+ __version__ = "0.2.0"
@@ -0,0 +1,198 @@
1
+ """Cache-hit field bridging + a local prefix-cache estimator.
2
+
3
+ Two concrete pains:
4
+
5
+ 1. Field naming mismatch (pi-mono#3880):
6
+ - DeepSeek puts cache-hit token count in `usage.prompt_cache_hit_tokens`
7
+ - OpenAI puts it in `usage.prompt_tokens_details.cached_tokens`
8
+ - Vanilla OpenAI parsers see 0% cache hit even when DeepSeek is happily charging
9
+ you the cached price. `normalize_usage()` below back-fills both fields.
10
+
11
+ 2. The DeepSeek cache only triggers on **byte-for-byte prefix match starting from
12
+ token 0**, with a practical minimum prefix of ~1024 tokens. `estimate_cache_hit()`
13
+ is a local pre-flight estimator: feed it the messages you are about to send +
14
+ the prefix you saw "stick" in the previous request, and it tells you the longest
15
+ common prefix in tokens.
16
+
17
+ References:
18
+ deepseek-ai/DeepSeek-V3#1261 (V3.2→V4 cache hit rate regression 92%→35%)
19
+ pi-mono#3880 (field mismatch fix)
20
+ DeepSeek docs: https://api-docs.deepseek.com/guides/kv_cache
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from typing import Any
26
+
27
+ try:
28
+ import tiktoken
29
+ except ImportError: # pragma: no cover
30
+ tiktoken = None
31
+
32
+
33
+ # DeepSeek V4-Flash quoted prices, USD per million tokens.
34
+ PRICE_PER_M_INPUT_MISS = 0.14
35
+ PRICE_PER_M_INPUT_HIT = 0.0028
36
+ PRICE_PER_M_OUTPUT = 0.28
37
+
38
+
39
+ def normalize_usage(usage: dict | Any) -> dict:
40
+ """Return a dict that has BOTH field shapes filled in.
41
+
42
+ Accepts the raw `usage` dict from a DeepSeek response (or an OpenAI usage object).
43
+ Output always includes:
44
+ - prompt_cache_hit_tokens (int)
45
+ - prompt_cache_miss_tokens (int)
46
+ - prompt_tokens_details.cached_tokens (int) # OpenAI shape
47
+ - completion_tokens, prompt_tokens, total_tokens (passthrough)
48
+ - estimated_cost_usd (float, V4-Flash pricing)
49
+ """
50
+ if usage is None:
51
+ return {}
52
+ u = _to_dict(usage)
53
+
54
+ prompt_total = int(u.get("prompt_tokens") or 0)
55
+ completion = int(u.get("completion_tokens") or 0)
56
+
57
+ # DeepSeek native field
58
+ hit = u.get("prompt_cache_hit_tokens")
59
+ miss = u.get("prompt_cache_miss_tokens")
60
+
61
+ # OpenAI shape
62
+ details = u.get("prompt_tokens_details") or {}
63
+ if isinstance(details, dict):
64
+ cached_oa = details.get("cached_tokens")
65
+ else:
66
+ cached_oa = getattr(details, "cached_tokens", None)
67
+
68
+ if hit is None and cached_oa is not None:
69
+ hit = int(cached_oa)
70
+ miss = max(prompt_total - hit, 0)
71
+ elif hit is not None and cached_oa is None:
72
+ cached_oa = int(hit)
73
+ elif hit is None and cached_oa is None:
74
+ hit, miss, cached_oa = 0, prompt_total, 0
75
+
76
+ cost = (
77
+ (miss / 1_000_000) * PRICE_PER_M_INPUT_MISS
78
+ + (hit / 1_000_000) * PRICE_PER_M_INPUT_HIT
79
+ + (completion / 1_000_000) * PRICE_PER_M_OUTPUT
80
+ )
81
+
82
+ return {
83
+ "prompt_tokens": prompt_total,
84
+ "completion_tokens": completion,
85
+ "total_tokens": int(u.get("total_tokens") or (prompt_total + completion)),
86
+ "prompt_cache_hit_tokens": int(hit),
87
+ "prompt_cache_miss_tokens": int(miss),
88
+ "prompt_tokens_details": {"cached_tokens": int(cached_oa)},
89
+ "estimated_cost_usd": round(cost, 8),
90
+ "cache_hit_rate": round(hit / prompt_total, 4) if prompt_total else 0.0,
91
+ }
92
+
93
+
94
+ def _to_dict(obj: Any) -> dict:
95
+ if isinstance(obj, dict):
96
+ return obj
97
+ if hasattr(obj, "model_dump"):
98
+ return obj.model_dump()
99
+ if hasattr(obj, "to_dict"):
100
+ return obj.to_dict()
101
+ if hasattr(obj, "__dict__"):
102
+ return {k: v for k, v in obj.__dict__.items() if not k.startswith("_")}
103
+ return {}
104
+
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # Pre-flight estimator
108
+ # ---------------------------------------------------------------------------
109
+
110
+
111
+ def _encode(text: str) -> list[int]:
112
+ if tiktoken is None:
113
+ # crude byte-pair fallback so the kit still imports without tiktoken
114
+ return list(text.encode("utf-8"))
115
+ enc = tiktoken.get_encoding("cl100k_base")
116
+ return enc.encode(text)
117
+
118
+
119
+ def estimate_cache_hit(
120
+ new_messages: list[dict],
121
+ previous_prefix_messages: list[dict] | None = None,
122
+ *,
123
+ minimum_prefix_tokens: int = 1024,
124
+ cache_block_size: int = 256,
125
+ ) -> dict:
126
+ """Estimate how much of `new_messages` will be a cache hit on DeepSeek.
127
+
128
+ DeepSeek's cache rule (validated by probe_5 on 2026-05-09):
129
+ - prefix-from-0 match
130
+ - bucketed by ~256-token blocks (cached tokens are always multiples of 256)
131
+ - minimum prefix length to BEGIN caching ≈ 1024 tokens
132
+ - tail edits preserve head cache; mid-prefix edits invalidate from the edit
133
+ point onwards (NOT the entire prefix)
134
+
135
+ We serialise both message lists deterministically (role + content + tool_calls
136
+ + reasoning_content), tokenise with cl100k_base (close enough; DeepSeek
137
+ tokenizer is ~3.6 chars/token for English ASCII vs cl100k's 4), and find the
138
+ longest common token prefix, rounded DOWN to the nearest cache_block_size
139
+ boundary. The estimator does NOT replace the server's truth — it is a
140
+ pre-flight sanity check, e.g. "is my client about to invalidate the
141
+ 99-cent prefix by re-ordering tool messages?".
142
+ """
143
+ new_text = _serialize_messages(new_messages)
144
+ prev_text = _serialize_messages(previous_prefix_messages or [])
145
+
146
+ new_tokens = _encode(new_text)
147
+ prev_tokens = _encode(prev_text)
148
+
149
+ common = 0
150
+ for a, b in zip(new_tokens, prev_tokens):
151
+ if a != b:
152
+ break
153
+ common += 1
154
+
155
+ if common < minimum_prefix_tokens:
156
+ eligible = 0
157
+ else:
158
+ # Server rounds cached tokens DOWN to the nearest cache_block_size (256).
159
+ eligible = (common // cache_block_size) * cache_block_size
160
+
161
+ return {
162
+ "common_prefix_tokens": common,
163
+ "eligible_cached_tokens": eligible,
164
+ "new_total_tokens": len(new_tokens),
165
+ "estimated_hit_rate": round(eligible / len(new_tokens), 4) if new_tokens else 0.0,
166
+ "minimum_prefix_threshold": minimum_prefix_tokens,
167
+ "cache_block_size": cache_block_size,
168
+ "explanation": (
169
+ f"common < {minimum_prefix_tokens} → server will NOT cache this request"
170
+ if common < minimum_prefix_tokens
171
+ else f"ok — {eligible} tokens (rounded to {cache_block_size}-block) will be discounted at $0.0028/M"
172
+ ),
173
+ }
174
+
175
+
176
+ def _serialize_messages(messages: list[dict]) -> str:
177
+ """Deterministic flattening used by both prefix estimator and cache-debug helpers.
178
+
179
+ Layout mirrors OpenAI chat-completions JSON ordering: role → content → tool_calls
180
+ → tool_call_id → reasoning_content. ANY field reorder by your agent code will
181
+ bust the prefix.
182
+ """
183
+ parts: list[str] = []
184
+ for msg in messages:
185
+ parts.append(f"<role>{msg.get('role', '')}</role>")
186
+ content = msg.get("content")
187
+ if isinstance(content, list):
188
+ for c in content:
189
+ parts.append(f"<part>{c}</part>")
190
+ elif content:
191
+ parts.append(f"<content>{content}</content>")
192
+ for tc in msg.get("tool_calls") or []:
193
+ parts.append(f"<tc>{tc.get('id','')}|{tc.get('function',{}).get('name','')}|{tc.get('function',{}).get('arguments','')}</tc>")
194
+ if msg.get("tool_call_id"):
195
+ parts.append(f"<tcid>{msg['tool_call_id']}</tcid>")
196
+ if msg.get("reasoning_content"):
197
+ parts.append(f"<rc>{msg['reasoning_content']}</rc>")
198
+ return "\n".join(parts)
@@ -0,0 +1,259 @@
1
+ """Drop-in OpenAI-compat client for DeepSeek V4-Pro / V4-Flash with all known protocol salvages enabled.
2
+
3
+ Replace::
4
+
5
+ from openai import OpenAI
6
+ client = OpenAI(api_key=..., base_url="https://api.deepseek.com")
7
+
8
+ with::
9
+
10
+ from deepseek_harness import DeepSeekHarness
11
+ client = DeepSeekHarness(api_key=..., base_url="https://api.deepseek.com")
12
+
13
+ The surface is `client.chat.completions.create(...)` — same as OpenAI — but every
14
+ response goes through:
15
+
16
+ - `from_deepseek_response` (preserves reasoning_content on the message dict)
17
+ - `salvage_tool_calls_from_content` (rescues 11% leaked tool calls)
18
+ - `normalize_usage` (back-fills both cache-hit field shapes + cost estimate)
19
+
20
+ Streaming uses `stream_chat()` which absorbs reasoning chunks via `ReasoningLifecycle`
21
+ and tolerates the empty-final-chunk shape (cline #1594).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ from typing import Any, Iterator
28
+
29
+ from openai import OpenAI
30
+
31
+ from .cache import normalize_usage
32
+ from .exceptions import StreamShapeError, ToolCallLeakageError
33
+ from .normalize import from_deepseek_response, strip_kit_warnings, to_deepseek_history
34
+ from .reasoning import ReasoningLifecycle
35
+ from .tool_calls import salvage_tool_calls_from_content
36
+
37
+
38
+ class DeepSeekHarness:
39
+ """Thin wrapper around `openai.OpenAI` with DeepSeek V4-specific safety guards.
40
+
41
+ Implements the 10 contract rules documented in `spec/` and validated by the
42
+ 16 probes in `reports/`. See `__init__.py` for the public re-export.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ api_key: str | None = None,
48
+ base_url: str | None = None,
49
+ *,
50
+ salvage_tool_calls: bool = True,
51
+ normalize_cache_fields: bool = True,
52
+ warn_on_missing_reasoning: bool = True,
53
+ disable_thinking_by_default: bool = False,
54
+ raw_dump_path: str | None = None,
55
+ ) -> None:
56
+ """Construct a DeepSeek-aware OpenAI-compatible client.
57
+
58
+ Args:
59
+ disable_thinking_by_default: If True, every request gets
60
+ `extra_body={"thinking":{"type":"disabled"}}` UNLESS the caller
61
+ explicitly passes their own `extra_body`. Recommended for cost-
62
+ sensitive deployments because `deepseek-v4-pro` defaults to
63
+ thinking-enabled (probe_0 finding 2026-05-09: ~30 reasoning
64
+ tokens are billed even on trivial prompts).
65
+ """
66
+ self._oai = OpenAI(
67
+ api_key=api_key or os.getenv("DEEPSEEK_API_KEY"),
68
+ base_url=base_url or os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
69
+ )
70
+ self._salvage = salvage_tool_calls
71
+ self._normalize_cache = normalize_cache_fields
72
+ self._warn = warn_on_missing_reasoning
73
+ self._disable_thinking = disable_thinking_by_default
74
+ self._raw_dump = raw_dump_path
75
+
76
+ def _maybe_inject_thinking_off(self, kwargs: dict) -> dict:
77
+ if self._disable_thinking and "extra_body" not in kwargs:
78
+ kwargs = dict(kwargs)
79
+ kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
80
+ return kwargs
81
+
82
+ # ------------------------------------------------------------------
83
+ # Non-streaming
84
+ # ------------------------------------------------------------------
85
+ def chat(
86
+ self,
87
+ *,
88
+ model: str,
89
+ messages: list[dict],
90
+ tools: list[dict] | None = None,
91
+ tool_choice: str | dict | None = None,
92
+ **kwargs: Any,
93
+ ) -> dict:
94
+ """Returns a dict with keys:
95
+ message: OpenAI-shaped assistant message (incl. reasoning_content if any)
96
+ usage: normalised usage (both cache-field shapes filled in)
97
+ finish_reason: passthrough
98
+ salvage: None if no tool-call salvage happened, else {pattern, original_content}
99
+ raw: the raw SDK response (for debugging)
100
+ """
101
+ normalized_in = strip_kit_warnings(to_deepseek_history(messages))
102
+ kwargs = self._maybe_inject_thinking_off(kwargs)
103
+ resp = self._oai.chat.completions.create(
104
+ model=model,
105
+ messages=normalized_in,
106
+ tools=tools,
107
+ tool_choice=tool_choice,
108
+ **kwargs,
109
+ )
110
+
111
+ msg = from_deepseek_response(resp)
112
+ finish_reason = msg.pop("_dsk_kit_finish_reason", None)
113
+ salvage = None
114
+
115
+ if self._salvage and not msg.get("tool_calls"):
116
+ tool_calls, residual, reason = salvage_tool_calls_from_content(
117
+ msg.get("content"), finish_reason
118
+ )
119
+ if tool_calls is not None:
120
+ salvage = {
121
+ "pattern": reason,
122
+ "original_content": msg.get("content"),
123
+ }
124
+ msg["tool_calls"] = tool_calls
125
+ msg["content"] = residual
126
+
127
+ usage = normalize_usage(getattr(resp, "usage", None)) if self._normalize_cache else None
128
+
129
+ return {
130
+ "message": msg,
131
+ "usage": usage,
132
+ "finish_reason": finish_reason,
133
+ "salvage": salvage,
134
+ "raw": resp,
135
+ }
136
+
137
+ # ------------------------------------------------------------------
138
+ # Streaming
139
+ # ------------------------------------------------------------------
140
+ def stream_chat(
141
+ self,
142
+ *,
143
+ model: str,
144
+ messages: list[dict],
145
+ tools: list[dict] | None = None,
146
+ tool_choice: str | dict | None = None,
147
+ **kwargs: Any,
148
+ ) -> Iterator[dict]:
149
+ """Yields **structured events** (not raw chunks) so callers don't have to re-implement
150
+ the cline #1594 / hermes #15353 mitigations.
151
+
152
+ Event types:
153
+ {"type": "content_delta", "data": str}
154
+ {"type": "reasoning_delta", "data": str}
155
+ {"type": "tool_call_delta", "data": {"index": int, "id": str|None, "name": str|None, "arguments": str}}
156
+ {"type": "done", "message": {...}, "usage": {...}, "finish_reason": str, "salvage": dict|None}
157
+ """
158
+ normalized_in = strip_kit_warnings(to_deepseek_history(messages))
159
+ kwargs = self._maybe_inject_thinking_off(kwargs)
160
+ stream = self._oai.chat.completions.create(
161
+ model=model,
162
+ messages=normalized_in,
163
+ tools=tools,
164
+ tool_choice=tool_choice,
165
+ stream=True,
166
+ stream_options={"include_usage": True},
167
+ **kwargs,
168
+ )
169
+
170
+ rl = ReasoningLifecycle()
171
+ content_buf: list[str] = []
172
+ tool_call_acc: dict[int, dict] = {}
173
+ finish_reason: str | None = None
174
+ usage_raw: Any | None = None
175
+
176
+ for chunk in stream:
177
+ choices = getattr(chunk, "choices", None) or []
178
+ # DeepSeek emits a final chunk with empty choices but populated usage —
179
+ # cline #1594 was the upstream bug from indexing into [0] blindly.
180
+ if not choices:
181
+ if getattr(chunk, "usage", None) is not None:
182
+ usage_raw = chunk.usage
183
+ continue
184
+
185
+ choice = choices[0]
186
+ delta = getattr(choice, "delta", None)
187
+ if delta is None:
188
+ # malformed chunk — neither delta nor finish_reason; skip rather than throw
189
+ continue
190
+
191
+ # reasoning_content goes through the lifecycle helper (also yields out for visibility)
192
+ rc = getattr(delta, "reasoning_content", None)
193
+ if rc:
194
+ rl.absorb_chunk(chunk)
195
+ yield {"type": "reasoning_delta", "data": rc}
196
+
197
+ content_piece = getattr(delta, "content", None)
198
+ if content_piece:
199
+ content_buf.append(content_piece)
200
+ yield {"type": "content_delta", "data": content_piece}
201
+
202
+ for tc in getattr(delta, "tool_calls", None) or []:
203
+ idx = getattr(tc, "index", 0)
204
+ slot = tool_call_acc.setdefault(
205
+ idx, {"id": None, "name": None, "arguments": ""}
206
+ )
207
+ if getattr(tc, "id", None):
208
+ slot["id"] = tc.id
209
+ fn = getattr(tc, "function", None)
210
+ if fn is not None:
211
+ if getattr(fn, "name", None):
212
+ slot["name"] = fn.name
213
+ if getattr(fn, "arguments", None):
214
+ slot["arguments"] += fn.arguments
215
+ yield {
216
+ "type": "tool_call_delta",
217
+ "data": {
218
+ "index": idx,
219
+ "id": slot["id"],
220
+ "name": slot["name"],
221
+ "arguments": slot["arguments"],
222
+ },
223
+ }
224
+
225
+ if getattr(choice, "finish_reason", None):
226
+ finish_reason = choice.finish_reason
227
+
228
+ content_str = "".join(content_buf) or None
229
+ message: dict[str, Any] = {"role": "assistant", "content": content_str}
230
+ if tool_call_acc:
231
+ message["tool_calls"] = [
232
+ {
233
+ "id": v["id"] or f"call_{i}",
234
+ "type": "function",
235
+ "function": {"name": v["name"] or "", "arguments": v["arguments"] or "{}"},
236
+ }
237
+ for i, v in sorted(tool_call_acc.items())
238
+ ]
239
+ message = rl.finalize_assistant_message(message)
240
+
241
+ salvage = None
242
+ if self._salvage and not message.get("tool_calls"):
243
+ tool_calls, residual, reason = salvage_tool_calls_from_content(
244
+ message.get("content"), finish_reason
245
+ )
246
+ if tool_calls is not None:
247
+ salvage = {"pattern": reason, "original_content": message.get("content")}
248
+ message["tool_calls"] = tool_calls
249
+ message["content"] = residual
250
+
251
+ usage = normalize_usage(usage_raw) if (self._normalize_cache and usage_raw) else None
252
+
253
+ yield {
254
+ "type": "done",
255
+ "message": message,
256
+ "usage": usage,
257
+ "finish_reason": finish_reason,
258
+ "salvage": salvage,
259
+ }
@@ -0,0 +1,36 @@
1
+ """Typed errors so callers can branch on the specific quirk that fired."""
2
+
3
+
4
+ class HarnessError(Exception):
5
+ """Base for every protocol-quirk we detect."""
6
+
7
+
8
+ class ReasoningContentMissingError(HarnessError):
9
+ """Server returned 400 because the previous turn's reasoning_content was not echoed back.
10
+
11
+ Reference: microsoft/agent-framework#5538, NousResearch/hermes-agent#15353.
12
+ Probe: reports/probes/probe_2_reasoning_lifecycle.py (3/3 reproduction on V4-Pro).
13
+ """
14
+
15
+
16
+ class ToolCallLeakageError(HarnessError):
17
+ """finish_reason='stop' but content carried a DSML / JSON tool call payload.
18
+
19
+ Reference: deepseek-ai/DeepSeek-V3#1244, NousResearch/hermes-agent#15453.
20
+ Probe: reports/probes/probe_3_tool_call_leakage.py (0/50 on V4 official endpoint).
21
+ """
22
+
23
+
24
+ class StrictModeCorruptionError(HarnessError):
25
+ """beta/strict path produced unparseable JSON (missing quote on first key).
26
+
27
+ Reference: deepseek-ai/DeepSeek-V3#1069 (closed as not-planned).
28
+ Probe: reports/probes/probe_4_strict_mode_corruption.py (0/32 on V4 series).
29
+ """
30
+
31
+
32
+ class StreamShapeError(HarnessError):
33
+ """Final SSE chunk lacked `choices`, or `delta` was undefined where indexed.
34
+
35
+ Reference: cline/cline#1594.
36
+ """