coderouter-cli 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. coderouter/__init__.py +17 -0
  2. coderouter/__main__.py +6 -0
  3. coderouter/adapters/__init__.py +23 -0
  4. coderouter/adapters/anthropic_native.py +502 -0
  5. coderouter/adapters/base.py +220 -0
  6. coderouter/adapters/openai_compat.py +395 -0
  7. coderouter/adapters/registry.py +17 -0
  8. coderouter/cli.py +345 -0
  9. coderouter/cli_stats.py +751 -0
  10. coderouter/config/__init__.py +10 -0
  11. coderouter/config/capability_registry.py +339 -0
  12. coderouter/config/env_file.py +295 -0
  13. coderouter/config/loader.py +73 -0
  14. coderouter/config/schemas.py +515 -0
  15. coderouter/data/__init__.py +7 -0
  16. coderouter/data/model-capabilities.yaml +86 -0
  17. coderouter/doctor.py +1596 -0
  18. coderouter/env_security.py +434 -0
  19. coderouter/errors.py +29 -0
  20. coderouter/ingress/__init__.py +5 -0
  21. coderouter/ingress/anthropic_routes.py +205 -0
  22. coderouter/ingress/app.py +144 -0
  23. coderouter/ingress/dashboard_routes.py +493 -0
  24. coderouter/ingress/metrics_routes.py +92 -0
  25. coderouter/ingress/openai_routes.py +153 -0
  26. coderouter/logging.py +315 -0
  27. coderouter/metrics/__init__.py +39 -0
  28. coderouter/metrics/collector.py +471 -0
  29. coderouter/metrics/prometheus.py +221 -0
  30. coderouter/output_filters.py +407 -0
  31. coderouter/routing/__init__.py +13 -0
  32. coderouter/routing/auto_router.py +244 -0
  33. coderouter/routing/capability.py +285 -0
  34. coderouter/routing/fallback.py +611 -0
  35. coderouter/translation/__init__.py +57 -0
  36. coderouter/translation/anthropic.py +204 -0
  37. coderouter/translation/convert.py +1291 -0
  38. coderouter/translation/tool_repair.py +236 -0
  39. coderouter_cli-1.7.0.dist-info/METADATA +509 -0
  40. coderouter_cli-1.7.0.dist-info/RECORD +43 -0
  41. coderouter_cli-1.7.0.dist-info/WHEEL +4 -0
  42. coderouter_cli-1.7.0.dist-info/entry_points.txt +2 -0
  43. coderouter_cli-1.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1291 @@
1
+ """Anthropic Messages ⇄ internal ChatRequest/ChatResponse translation.
2
+
3
+ The internal format is OpenAI-shaped (see coderouter/adapters/base.py), so the
4
+ translation is effectively Anthropic ⇄ OpenAI Chat Completions.
5
+
6
+ Three entry points:
7
+
8
+ to_chat_request() Anthropic request → internal ChatRequest
9
+ to_anthropic_response() internal ChatResponse → Anthropic response
10
+ stream_chat_to_anthropic_events() async iterator → Anthropic SSE events
11
+
12
+ v0.2 scope: text + image + tool_use + tool_result.
13
+ v0.3+: thinking blocks, cache_control, documents, citations — currently passed
14
+ through as opaque dicts (extra="allow" on the models).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import time
21
+ import uuid
22
+ from collections.abc import AsyncIterator
23
+ from typing import Any, Literal
24
+
25
+ from coderouter.adapters.base import (
26
+ AdapterError,
27
+ ChatRequest,
28
+ ChatResponse,
29
+ Message,
30
+ StreamChunk,
31
+ )
32
+ from coderouter.translation.anthropic import (
33
+ AnthropicMessage,
34
+ AnthropicRequest,
35
+ AnthropicResponse,
36
+ AnthropicStreamEvent,
37
+ AnthropicTool,
38
+ AnthropicUsage,
39
+ )
40
+ from coderouter.translation.tool_repair import repair_tool_calls_in_text
41
+
42
+ # ============================================================
43
+ # Anthropic → internal (OpenAI-shaped)
44
+ # ============================================================
45
+
46
+
47
+ def _system_as_text(system: str | list[dict[str, Any]] | None) -> str | None:
48
+ """Anthropic's `system` can be a string or a list of content blocks.
49
+
50
+ OpenAI accepts only a string for the system role, so join text blocks.
51
+ Unknown block types are skipped with their type logged in the joined text
52
+ so we don't silently drop user intent.
53
+ """
54
+ if system is None:
55
+ return None
56
+ if isinstance(system, str):
57
+ return system
58
+ parts: list[str] = []
59
+ for block in system:
60
+ btype = block.get("type")
61
+ if btype == "text":
62
+ parts.append(str(block.get("text", "")))
63
+ else:
64
+ # Preserve presence of non-text blocks (e.g. cache_control markers)
65
+ # so the absence doesn't silently degrade the prompt.
66
+ parts.append(f"[non-text block: {btype}]")
67
+ return "\n".join(p for p in parts if p)
68
+
69
+
70
+ def _tool_use_to_openai_tool_call(block: dict[str, Any]) -> dict[str, Any]:
71
+ """Anthropic tool_use block → OpenAI tool_calls entry."""
72
+ return {
73
+ "id": block.get("id", ""),
74
+ "type": "function",
75
+ "function": {
76
+ "name": block.get("name", ""),
77
+ # OpenAI expects a JSON-encoded string for arguments.
78
+ "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
79
+ },
80
+ }
81
+
82
+
83
+ def _tool_result_content_to_str(
84
+ content: str | list[dict[str, Any]] | None,
85
+ ) -> str:
86
+ """Normalize Anthropic tool_result content to a flat string.
87
+
88
+ OpenAI's `role: "tool"` message accepts string content only.
89
+ """
90
+ if content is None:
91
+ return ""
92
+ if isinstance(content, str):
93
+ return content
94
+ parts: list[str] = []
95
+ for block in content:
96
+ if block.get("type") == "text":
97
+ parts.append(str(block.get("text", "")))
98
+ else:
99
+ # Rare — images as tool results. Encode as a placeholder.
100
+ parts.append(f"[non-text tool_result block: {block.get('type')}]")
101
+ return "\n".join(parts)
102
+
103
+
104
+ def _convert_anthropic_message(
105
+ msg_dict: dict[str, Any],
106
+ ) -> list[dict[str, Any]]:
107
+ """Convert one Anthropic message to one-or-more OpenAI messages.
108
+
109
+ - Short-form string content → single {role, content} message.
110
+ - List of content blocks → may split into multiple messages when the
111
+ user side embeds tool_result blocks (OpenAI encodes those as role=tool).
112
+ - Assistant text + tool_use blocks merge into a single assistant message
113
+ with both `content` and `tool_calls` set.
114
+ """
115
+ role = msg_dict["role"]
116
+ content = msg_dict["content"]
117
+
118
+ if isinstance(content, str):
119
+ return [{"role": role, "content": content}]
120
+
121
+ # content is a list of blocks
122
+ text_parts: list[str] = []
123
+ image_parts: list[dict[str, Any]] = [] # OpenAI vision content parts
124
+ tool_calls: list[dict[str, Any]] = [] # for assistant
125
+ tool_result_messages: list[dict[str, Any]] = [] # for user
126
+
127
+ for block in content:
128
+ btype = block.get("type")
129
+ if btype == "text":
130
+ text_parts.append(str(block.get("text", "")))
131
+ elif btype == "image":
132
+ # Anthropic image block → OpenAI `image_url` content part.
133
+ src = block.get("source", {})
134
+ src_type = src.get("type")
135
+ if src_type == "base64":
136
+ url = f"data:{src.get('media_type', 'image/png')};base64,{src.get('data', '')}"
137
+ elif src_type == "url":
138
+ url = src.get("url", "")
139
+ else:
140
+ url = ""
141
+ image_parts.append({"type": "image_url", "image_url": {"url": url}})
142
+ elif btype == "tool_use":
143
+ tool_calls.append(_tool_use_to_openai_tool_call(block))
144
+ elif btype == "tool_result":
145
+ tool_result_messages.append(
146
+ {
147
+ "role": "tool",
148
+ "tool_call_id": block.get("tool_use_id", ""),
149
+ "content": _tool_result_content_to_str(block.get("content")),
150
+ }
151
+ )
152
+ # Unknown block types (thinking, document, …) are skipped in v0.2.
153
+
154
+ out: list[dict[str, Any]] = []
155
+
156
+ # tool_result blocks emit their own role=tool messages FIRST (they're the
157
+ # answer to a previous assistant tool_use, so they precede any new user
158
+ # text that might accompany them).
159
+ out.extend(tool_result_messages)
160
+
161
+ joined_text = "\n".join(t for t in text_parts if t)
162
+
163
+ if role == "assistant":
164
+ assistant_msg: dict[str, Any] = {"role": "assistant"}
165
+ # OpenAI allows content: null when only tool_calls are present.
166
+ assistant_msg["content"] = joined_text if joined_text else None
167
+ if tool_calls:
168
+ assistant_msg["tool_calls"] = tool_calls
169
+ # Only emit the assistant message if something meaningful remains.
170
+ if assistant_msg["content"] is not None or tool_calls:
171
+ out.append(assistant_msg)
172
+ else: # user
173
+ if image_parts:
174
+ # Multimodal: OpenAI wants a content list with text + image parts.
175
+ mm_content: list[dict[str, Any]] = []
176
+ if joined_text:
177
+ mm_content.append({"type": "text", "text": joined_text})
178
+ mm_content.extend(image_parts)
179
+ if mm_content:
180
+ out.append({"role": "user", "content": mm_content})
181
+ elif joined_text:
182
+ out.append({"role": "user", "content": joined_text})
183
+ # If it was purely tool_result blocks, tool_result_messages already
184
+ # captured that — no extra user message needed.
185
+
186
+ return out
187
+
188
+
189
+ def _convert_anthropic_tools(
190
+ tools: list[Any] | None,
191
+ ) -> list[dict[str, Any]] | None:
192
+ """Translate Anthropic ``tools`` array to OpenAI ``tools`` shape.
193
+
194
+ Accepts either :class:`AnthropicTool` models or plain dicts and
195
+ emits ``[{type: "function", function: {name, description, parameters}}]``
196
+ — the schema OpenAI-compat providers expect.
197
+ """
198
+ if not tools:
199
+ return None
200
+ out: list[dict[str, Any]] = []
201
+ for tool in tools:
202
+ # Support both AnthropicTool models and plain dicts.
203
+ t = tool.model_dump() if hasattr(tool, "model_dump") else dict(tool)
204
+ out.append(
205
+ {
206
+ "type": "function",
207
+ "function": {
208
+ "name": t.get("name", ""),
209
+ "description": t.get("description", ""),
210
+ "parameters": t.get("input_schema", {}) or {},
211
+ },
212
+ }
213
+ )
214
+ return out
215
+
216
+
217
+ def _convert_anthropic_tool_choice(
218
+ tc: dict[str, Any] | None,
219
+ ) -> Any | None:
220
+ """Anthropic tool_choice → OpenAI tool_choice.
221
+
222
+ Anthropic:
223
+ {"type": "auto"}
224
+ {"type": "any"} # force any tool
225
+ {"type": "tool", "name": "foo"}
226
+ {"type": "none"} # v0.3+
227
+ OpenAI:
228
+ "auto" | "none" | "required" | {"type": "function", "function": {"name"}}
229
+ """
230
+ if tc is None:
231
+ return None
232
+ t = tc.get("type")
233
+ if t == "auto":
234
+ return "auto"
235
+ if t == "any":
236
+ return "required"
237
+ if t == "none":
238
+ return "none"
239
+ if t == "tool":
240
+ return {"type": "function", "function": {"name": tc.get("name", "")}}
241
+ return None
242
+
243
+
244
+ def to_chat_request(req: AnthropicRequest) -> ChatRequest:
245
+ """Anthropic Messages request → internal ChatRequest (OpenAI-shaped)."""
246
+ messages: list[dict[str, Any]] = []
247
+
248
+ sys_text = _system_as_text(req.system)
249
+ if sys_text:
250
+ messages.append({"role": "system", "content": sys_text})
251
+
252
+ for msg in req.messages:
253
+ messages.extend(_convert_anthropic_message(msg.model_dump(exclude_none=True)))
254
+
255
+ # Convert to Message models so downstream adapters see a consistent type.
256
+ msg_models = [Message.model_validate(m) for m in messages]
257
+
258
+ chat_req = ChatRequest(
259
+ messages=msg_models,
260
+ stream=req.stream,
261
+ temperature=req.temperature,
262
+ max_tokens=req.max_tokens,
263
+ top_p=req.top_p,
264
+ # Anthropic's stop_sequences → OpenAI's stop
265
+ stop=req.stop_sequences,
266
+ tools=_convert_anthropic_tools(req.tools),
267
+ tool_choice=_convert_anthropic_tool_choice(req.tool_choice),
268
+ )
269
+ # Propagate CodeRouter routing hint.
270
+ chat_req.profile = req.profile
271
+ return chat_req
272
+
273
+
274
+ # ============================================================
275
+ # Internal → Anthropic (non-stream response)
276
+ # ============================================================
277
+
278
+
279
+ _FINISH_REASON_MAP: dict[
280
+ str, Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
281
+ ] = {
282
+ "stop": "end_turn",
283
+ "length": "max_tokens",
284
+ "tool_calls": "tool_use",
285
+ "function_call": "tool_use", # legacy OpenAI
286
+ "content_filter": "end_turn",
287
+ }
288
+
289
+
290
+ def _tool_call_to_tool_use_block(tool_call: dict[str, Any]) -> dict[str, Any]:
291
+ """OpenAI tool_calls entry → Anthropic tool_use content block."""
292
+ fn = tool_call.get("function", {}) or {}
293
+ args_raw = fn.get("arguments", "") or ""
294
+ if isinstance(args_raw, dict):
295
+ args_parsed: dict[str, Any] = args_raw
296
+ else:
297
+ try:
298
+ args_parsed = json.loads(args_raw) if args_raw else {}
299
+ except json.JSONDecodeError:
300
+ # v0.2: keep the raw string in a `_raw` field so v1.0 can repair.
301
+ args_parsed = {"_raw": args_raw}
302
+ return {
303
+ "type": "tool_use",
304
+ "id": tool_call.get("id", f"toolu_{uuid.uuid4().hex[:16]}"),
305
+ "name": fn.get("name", ""),
306
+ "input": args_parsed,
307
+ }
308
+
309
+
310
+ def to_anthropic_response(
311
+ resp: ChatResponse,
312
+ *,
313
+ allowed_tool_names: list[str] | None = None,
314
+ ) -> AnthropicResponse:
315
+ """Internal ChatResponse (OpenAI-shaped) → Anthropic response.
316
+
317
+ `allowed_tool_names`, when provided, enables v0.3 tool-call repair:
318
+ if the upstream model did not populate `tool_calls` but wrote a tool
319
+ invocation into the text body (a failure mode of qwen2.5-coder and
320
+ similar), the JSON is extracted and surfaced as a structured
321
+ `tool_use` content block. Without the allow-list, repair falls back
322
+ to accepting any tool-shaped JSON (higher false-positive risk).
323
+ """
324
+ choices = resp.choices or []
325
+ message: dict[str, Any] = {}
326
+ finish_reason: str | None = None
327
+ if choices:
328
+ message = choices[0].get("message", {}) or {}
329
+ finish_reason = choices[0].get("finish_reason")
330
+
331
+ tool_calls = list(message.get("tool_calls") or [])
332
+ text = message.get("content")
333
+
334
+ # v0.3 tool-call repair: only attempt if the model didn't already emit
335
+ # structured tool_calls (otherwise the text is just narration).
336
+ if not tool_calls and isinstance(text, str) and text:
337
+ cleaned, extracted = repair_tool_calls_in_text(text, allowed_tool_names)
338
+ if extracted:
339
+ text = cleaned
340
+ tool_calls = extracted
341
+ # Re-map finish_reason so Anthropic reports stop_reason=tool_use.
342
+ if finish_reason in (None, "stop"):
343
+ finish_reason = "tool_calls"
344
+
345
+ content_blocks: list[dict[str, Any]] = []
346
+
347
+ if isinstance(text, str) and text:
348
+ content_blocks.append({"type": "text", "text": text})
349
+ elif isinstance(text, list):
350
+ # Rare: multimodal assistant response. Flatten text parts.
351
+ for part in text:
352
+ if part.get("type") == "text":
353
+ content_blocks.append({"type": "text", "text": part.get("text", "")})
354
+
355
+ for tc in tool_calls:
356
+ content_blocks.append(_tool_call_to_tool_use_block(tc))
357
+
358
+ # Empty response guard: Anthropic requires at least one content block.
359
+ if not content_blocks:
360
+ content_blocks.append({"type": "text", "text": ""})
361
+
362
+ usage_in = resp.usage or {}
363
+ usage = AnthropicUsage(
364
+ input_tokens=int(usage_in.get("prompt_tokens", 0) or 0),
365
+ output_tokens=int(usage_in.get("completion_tokens", 0) or 0),
366
+ )
367
+
368
+ return AnthropicResponse(
369
+ id=f"msg_{resp.id}"
370
+ if resp.id and not resp.id.startswith("msg_")
371
+ else (resp.id or f"msg_{uuid.uuid4().hex[:24]}"),
372
+ model=resp.model,
373
+ content=content_blocks,
374
+ stop_reason=_FINISH_REASON_MAP.get(finish_reason or "stop", "end_turn"),
375
+ usage=usage,
376
+ coderouter_provider=resp.coderouter_provider,
377
+ )
378
+
379
+
380
+ # ============================================================
381
+ # Stream translation (OpenAI chunks → Anthropic SSE events)
382
+ # ============================================================
383
+
384
+
385
+ class _StreamState:
386
+ """Bookkeeping for the stateful stream translator.
387
+
388
+ Anthropic's wire protocol requires open/close markers per content block,
389
+ and block indices are contiguous (0, 1, 2, …). Text chunks and tool_call
390
+ chunks from OpenAI must be re-segmented into these blocks.
391
+ """
392
+
393
+ def __init__(self) -> None:
394
+ self.started: bool = False
395
+ self.finished: bool = False
396
+ self.current_block_index: int = -1
397
+ self.current_block_type: str | None = None # "text" | "tool_use"
398
+ # openai tool_call index (from delta.tool_calls[i].index) →
399
+ # anthropic content block index we allocated for it
400
+ self.tool_call_block_map: dict[int, int] = {}
401
+ self.message_id: str = f"msg_{uuid.uuid4().hex[:24]}"
402
+ self.model: str = "unknown"
403
+ # Usage accounting (v0.3-C). The translator's job is to make sure
404
+ # that message_delta.usage carries SOMETHING meaningful even when
405
+ # the upstream provider doesn't emit a usage chunk (Ollama without
406
+ # stream_options.include_usage, older OpenAI-compat servers, etc.).
407
+ # Policy:
408
+ # - If we receive chunk.usage.completion_tokens from upstream,
409
+ # it is authoritative and we use it verbatim.
410
+ # - Otherwise we fall back to a char-based estimate accumulated
411
+ # from the actual bytes we emitted (text_delta + input_json).
412
+ # prompt_tokens is pure passthrough from upstream — without it we
413
+ # report 0 rather than guess (the ingress doesn't see the prompt).
414
+ self.upstream_output_tokens: int | None = None
415
+ self.upstream_input_tokens: int | None = None
416
+ self.emitted_chars: int = 0
417
+
418
+
419
+ def _event(type_: str, data: dict[str, Any]) -> AnthropicStreamEvent:
420
+ return AnthropicStreamEvent(type=type_, data={"type": type_, **data})
421
+
422
+
423
+ def _start_event(model: str, message_id: str) -> AnthropicStreamEvent:
424
+ return _event(
425
+ "message_start",
426
+ {
427
+ "message": {
428
+ "id": message_id,
429
+ "type": "message",
430
+ "role": "assistant",
431
+ "content": [],
432
+ "model": model,
433
+ "stop_reason": None,
434
+ "stop_sequence": None,
435
+ "usage": {"input_tokens": 0, "output_tokens": 0},
436
+ }
437
+ },
438
+ )
439
+
440
+
441
+ def _close_current_block(state: _StreamState) -> list[AnthropicStreamEvent]:
442
+ if state.current_block_index < 0:
443
+ return []
444
+ evt = _event(
445
+ "content_block_stop",
446
+ {"index": state.current_block_index},
447
+ )
448
+ state.current_block_type = None
449
+ return [evt]
450
+
451
+
452
+ def _open_text_block(state: _StreamState) -> list[AnthropicStreamEvent]:
453
+ state.current_block_index += 1
454
+ state.current_block_type = "text"
455
+ return [
456
+ _event(
457
+ "content_block_start",
458
+ {
459
+ "index": state.current_block_index,
460
+ "content_block": {"type": "text", "text": ""},
461
+ },
462
+ )
463
+ ]
464
+
465
+
466
+ def _open_tool_use_block(
467
+ state: _StreamState,
468
+ openai_tc_index: int,
469
+ tool_id: str,
470
+ tool_name: str,
471
+ ) -> list[AnthropicStreamEvent]:
472
+ state.current_block_index += 1
473
+ state.current_block_type = "tool_use"
474
+ state.tool_call_block_map[openai_tc_index] = state.current_block_index
475
+ return [
476
+ _event(
477
+ "content_block_start",
478
+ {
479
+ "index": state.current_block_index,
480
+ "content_block": {
481
+ "type": "tool_use",
482
+ "id": tool_id or f"toolu_{uuid.uuid4().hex[:16]}",
483
+ "name": tool_name,
484
+ "input": {},
485
+ },
486
+ },
487
+ )
488
+ ]
489
+
490
+
491
+ def _handle_delta(state: _StreamState, delta: dict[str, Any]) -> list[AnthropicStreamEvent]:
492
+ """Translate one OpenAI delta dict into zero-or-more Anthropic events."""
493
+ out: list[AnthropicStreamEvent] = []
494
+
495
+ # Text content
496
+ text = delta.get("content")
497
+ if isinstance(text, str) and text:
498
+ if state.current_block_type != "text":
499
+ out.extend(_close_current_block(state))
500
+ out.extend(_open_text_block(state))
501
+ out.append(
502
+ _event(
503
+ "content_block_delta",
504
+ {
505
+ "index": state.current_block_index,
506
+ "delta": {"type": "text_delta", "text": text},
507
+ },
508
+ )
509
+ )
510
+ state.emitted_chars += len(text)
511
+
512
+ # Tool calls
513
+ for tc in delta.get("tool_calls") or []:
514
+ tc_index = tc.get("index", 0)
515
+ fn = tc.get("function", {}) or {}
516
+ args_fragment = fn.get("arguments", "") or ""
517
+
518
+ if tc_index not in state.tool_call_block_map:
519
+ # First time we see this tool_call — close any prior block and open a new tool_use block.
520
+ out.extend(_close_current_block(state))
521
+ out.extend(
522
+ _open_tool_use_block(
523
+ state,
524
+ openai_tc_index=tc_index,
525
+ tool_id=tc.get("id", ""),
526
+ tool_name=fn.get("name", ""),
527
+ )
528
+ )
529
+ # Function name itself is generated output even though it rides on
530
+ # content_block_start, not on a delta. Include it in the estimate
531
+ # so we don't under-count tool-heavy responses.
532
+ state.emitted_chars += len(fn.get("name", "") or "")
533
+ block_idx = state.tool_call_block_map[tc_index]
534
+ if args_fragment:
535
+ out.append(
536
+ _event(
537
+ "content_block_delta",
538
+ {
539
+ "index": block_idx,
540
+ "delta": {
541
+ "type": "input_json_delta",
542
+ "partial_json": args_fragment,
543
+ },
544
+ },
545
+ )
546
+ )
547
+ state.emitted_chars += len(args_fragment)
548
+
549
+ return out
550
+
551
+
552
+ def _estimate_output_tokens(state: _StreamState) -> int:
553
+ """Fallback output-token estimate when upstream didn't report usage.
554
+
555
+ Uses the well-known ~4 chars/token heuristic (accurate enough for
556
+ cost-tracking clients; not a billing source of truth). Always returns
557
+ at least 1 if anything was emitted, so a tiny non-empty response
558
+ doesn't get reported as 0 tokens.
559
+ """
560
+ if state.emitted_chars <= 0:
561
+ return 0
562
+ return max(1, (state.emitted_chars + 3) // 4)
563
+
564
+
565
+ def _finalize_usage(state: _StreamState) -> dict[str, int]:
566
+ """Build the usage dict for the terminal message_delta event.
567
+
568
+ Always includes output_tokens. Also includes input_tokens when the
569
+ upstream provided prompt_tokens (otherwise we don't fabricate it —
570
+ the translator has no access to the prompt).
571
+ """
572
+ if state.upstream_output_tokens is not None:
573
+ out_tokens = state.upstream_output_tokens
574
+ else:
575
+ out_tokens = _estimate_output_tokens(state)
576
+
577
+ usage: dict[str, int] = {"output_tokens": out_tokens}
578
+ if state.upstream_input_tokens is not None:
579
+ usage["input_tokens"] = state.upstream_input_tokens
580
+ return usage
581
+
582
+
583
+ async def stream_chat_to_anthropic_events(
584
+ chunks: AsyncIterator[StreamChunk],
585
+ ) -> AsyncIterator[AnthropicStreamEvent]:
586
+ """Translate an internal StreamChunk async iterator into Anthropic events.
587
+
588
+ Wire protocol emitted:
589
+ message_start
590
+ [content_block_start, (content_block_delta)*, content_block_stop]+
591
+ message_delta (with stop_reason)
592
+ message_stop
593
+
594
+ This function is stateful across chunks — do NOT use more than once on
595
+ the same instance.
596
+ """
597
+ state = _StreamState()
598
+ stop_reason_openai: str | None = None
599
+
600
+ async for chunk in chunks:
601
+ if not state.started:
602
+ state.started = True
603
+ state.message_id = chunk.id if chunk.id else state.message_id
604
+ if chunk.model:
605
+ state.model = chunk.model
606
+ yield _start_event(state.model, state.message_id)
607
+
608
+ for choice in chunk.choices or []:
609
+ delta = choice.get("delta", {}) or {}
610
+ for evt in _handle_delta(state, delta):
611
+ yield evt
612
+ if choice.get("finish_reason"):
613
+ stop_reason_openai = choice["finish_reason"]
614
+
615
+ # Some providers put usage on the last chunk (OpenAI with
616
+ # stream_options.include_usage=true, and anything that honors that
617
+ # flag). When it's there, trust it — otherwise we fall back to the
618
+ # char-based estimate computed inside _handle_delta.
619
+ usage = getattr(chunk, "usage", None)
620
+ if isinstance(usage, dict):
621
+ ct = usage.get("completion_tokens")
622
+ if isinstance(ct, int) and ct >= 0:
623
+ state.upstream_output_tokens = ct
624
+ pt = usage.get("prompt_tokens")
625
+ if isinstance(pt, int) and pt >= 0:
626
+ state.upstream_input_tokens = pt
627
+
628
+ # Terminator sequence
629
+ for evt in _close_current_block(state):
630
+ yield evt
631
+
632
+ stop_reason = _FINISH_REASON_MAP.get(stop_reason_openai or "stop", "end_turn")
633
+ yield _event(
634
+ "message_delta",
635
+ {
636
+ "delta": {"stop_reason": stop_reason, "stop_sequence": None},
637
+ "usage": _finalize_usage(state),
638
+ },
639
+ )
640
+ yield _event("message_stop", {})
641
+
642
+
643
+ # ============================================================
644
+ # Synthesize Anthropic stream events from a non-stream response
645
+ # ============================================================
646
+ #
647
+ # v0.3-D: for tool-using turns we cannot repair mid-stream (the partial
648
+ # JSON hasn't been closed yet), so the ingress downgrades the request to
649
+ # non-streaming internally, runs repair on the completed response, and
650
+ # replays it as a spec-compliant Anthropic SSE event sequence via the
651
+ # function below.
652
+ #
653
+ # From the client's point of view the stream is just slower to start —
654
+ # all content arrives in a single burst — but every event is wire-legal
655
+ # and tool_use blocks are structurally correct (not emitted as text that
656
+ # the client has to post-parse).
657
+
658
+
659
+ async def synthesize_anthropic_stream_from_response(
660
+ resp: AnthropicResponse,
661
+ ) -> AsyncIterator[AnthropicStreamEvent]:
662
+ """Replay a finalized AnthropicResponse as a sequence of stream events.
663
+
664
+ Emits, in order:
665
+ message_start
666
+ for each content block:
667
+ content_block_start
668
+ content_block_delta (text_delta OR input_json_delta)
669
+ content_block_stop
670
+ message_delta (carries stop_reason + usage)
671
+ message_stop
672
+
673
+ For tool_use blocks the input dict is serialized and delivered as a
674
+ single input_json_delta — Anthropic's wire spec permits the entire
675
+ JSON to ride on one partial_json fragment.
676
+ """
677
+ yield _event(
678
+ "message_start",
679
+ {
680
+ "message": {
681
+ "id": resp.id,
682
+ "type": "message",
683
+ "role": "assistant",
684
+ "content": [],
685
+ "model": resp.model,
686
+ "stop_reason": None,
687
+ "stop_sequence": None,
688
+ "usage": {
689
+ "input_tokens": resp.usage.input_tokens,
690
+ "output_tokens": 0,
691
+ },
692
+ }
693
+ },
694
+ )
695
+
696
+ for idx, block in enumerate(resp.content):
697
+ btype = block.get("type")
698
+ if btype == "text":
699
+ yield _event(
700
+ "content_block_start",
701
+ {
702
+ "index": idx,
703
+ "content_block": {"type": "text", "text": ""},
704
+ },
705
+ )
706
+ text = block.get("text", "") or ""
707
+ if text:
708
+ yield _event(
709
+ "content_block_delta",
710
+ {
711
+ "index": idx,
712
+ "delta": {"type": "text_delta", "text": text},
713
+ },
714
+ )
715
+ yield _event("content_block_stop", {"index": idx})
716
+ elif btype == "tool_use":
717
+ yield _event(
718
+ "content_block_start",
719
+ {
720
+ "index": idx,
721
+ "content_block": {
722
+ "type": "tool_use",
723
+ "id": block.get("id", ""),
724
+ "name": block.get("name", ""),
725
+ "input": {},
726
+ },
727
+ },
728
+ )
729
+ input_json = json.dumps(block.get("input", {}), ensure_ascii=False)
730
+ yield _event(
731
+ "content_block_delta",
732
+ {
733
+ "index": idx,
734
+ "delta": {
735
+ "type": "input_json_delta",
736
+ "partial_json": input_json,
737
+ },
738
+ },
739
+ )
740
+ yield _event("content_block_stop", {"index": idx})
741
+ # Unknown block types are skipped silently (v0.3 scope).
742
+
743
+ yield _event(
744
+ "message_delta",
745
+ {
746
+ "delta": {
747
+ "stop_reason": resp.stop_reason or "end_turn",
748
+ "stop_sequence": None,
749
+ },
750
+ "usage": {"output_tokens": resp.usage.output_tokens},
751
+ },
752
+ )
753
+ yield _event("message_stop", {})
754
+
755
+
756
+ # ============================================================
757
+ # Internal → Anthropic (request direction, v0.4-A)
758
+ # ============================================================
759
+ #
760
+ # Symmetric to to_chat_request / to_anthropic_response /
761
+ # stream_chat_to_anthropic_events. These are used when an OpenAI-shaped
762
+ # ingress (/v1/chat/completions) routes to a kind:anthropic provider —
763
+ # AnthropicAdapter internally converts ChatRequest → AnthropicRequest,
764
+ # calls the native Messages API, and converts AnthropicResponse /
765
+ # AnthropicStreamEvent back to OpenAI shape.
766
+
767
+ # Anthropic requires max_tokens on every request. OpenAI ChatRequest
768
+ # leaves it optional, so we need a sensible default. 4096 covers typical
769
+ # chat / coding turns; users who need more should set max_tokens on the
770
+ # client request.
771
+ _DEFAULT_ANTHROPIC_MAX_TOKENS = 4096
772
+
773
+
774
+ _REVERSE_FINISH_REASON_MAP = {
775
+ "end_turn": "stop",
776
+ "max_tokens": "length",
777
+ "tool_use": "tool_calls",
778
+ "stop_sequence": "stop",
779
+ }
780
+
781
+
782
+ def _openai_image_url_to_anthropic_source(url: str) -> dict[str, Any]:
783
+ """Convert an OpenAI image_url.url value to Anthropic image source shape.
784
+
785
+ Handles both remote URLs (https://...) and inline data URIs
786
+ (data:image/png;base64,<b64>). Data URIs without the ;base64 marker
787
+ are treated as URL sources so the upstream can reject them rather
788
+ than CodeRouter silently corrupting them.
789
+ """
790
+ if url.startswith("data:"):
791
+ comma = url.find(",")
792
+ if comma > 0:
793
+ header = url[len("data:") : comma]
794
+ data = url[comma + 1 :]
795
+ parts = [p.strip() for p in header.split(";")]
796
+ media_type = parts[0] or "image/png"
797
+ is_base64 = any(p == "base64" for p in parts[1:])
798
+ if is_base64:
799
+ return {
800
+ "type": "base64",
801
+ "media_type": media_type,
802
+ "data": data,
803
+ }
804
+ return {"type": "url", "url": url}
805
+
806
+
807
+ def _openai_user_content_to_anthropic(
808
+ content: str | list[dict[str, Any]] | None,
809
+ ) -> str | list[dict[str, Any]]:
810
+ """OpenAI user content → Anthropic user content.
811
+
812
+ Returns either a plain string (simple text case) or a list of
813
+ Anthropic content blocks (multimodal or mixed). Empty results
814
+ collapse to an empty string so the caller can skip the turn.
815
+ """
816
+ if content is None:
817
+ return ""
818
+ if isinstance(content, str):
819
+ return content
820
+ blocks: list[dict[str, Any]] = []
821
+ for part in content:
822
+ ptype = part.get("type")
823
+ if ptype == "text":
824
+ blocks.append({"type": "text", "text": part.get("text", "")})
825
+ elif ptype == "image_url":
826
+ iu = part.get("image_url", {}) or {}
827
+ url = iu.get("url", "")
828
+ if url:
829
+ blocks.append(
830
+ {
831
+ "type": "image",
832
+ "source": _openai_image_url_to_anthropic_source(url),
833
+ }
834
+ )
835
+ # Unknown part types: skip silently (v0.4-A scope).
836
+ if not blocks:
837
+ return ""
838
+ return blocks
839
+
840
+
841
+ def _openai_assistant_to_anthropic(msg: Message) -> dict[str, Any]:
842
+ """OpenAI assistant message → Anthropic assistant message.
843
+
844
+ Assistant turns may carry text content AND tool_calls. Anthropic
845
+ represents both as content blocks in a single turn, so we flatten
846
+ here. Malformed tool_call arguments (non-JSON) are preserved as
847
+ {"_raw": <string>} to mirror the forward translator's behavior.
848
+ """
849
+ blocks: list[dict[str, Any]] = []
850
+ content = msg.content
851
+ if isinstance(content, str) and content:
852
+ blocks.append({"type": "text", "text": content})
853
+ elif isinstance(content, list):
854
+ for part in content:
855
+ if part.get("type") == "text":
856
+ blocks.append({"type": "text", "text": part.get("text", "")})
857
+ # Assistant turns with images are rare — skip to keep scope tight.
858
+ for tc in msg.tool_calls or []:
859
+ fn = tc.get("function", {}) or {}
860
+ raw_args = fn.get("arguments", "") or ""
861
+ if isinstance(raw_args, dict):
862
+ parsed_args: dict[str, Any] = raw_args
863
+ else:
864
+ try:
865
+ parsed_args = json.loads(raw_args) if raw_args else {}
866
+ except json.JSONDecodeError:
867
+ parsed_args = {"_raw": raw_args}
868
+ blocks.append(
869
+ {
870
+ "type": "tool_use",
871
+ "id": tc.get("id", f"toolu_{uuid.uuid4().hex[:16]}"),
872
+ "name": fn.get("name", ""),
873
+ "input": parsed_args,
874
+ }
875
+ )
876
+ if not blocks:
877
+ # Anthropic rejects turns with zero content blocks; emit an empty
878
+ # text placeholder so the turn is still syntactically valid.
879
+ blocks.append({"type": "text", "text": ""})
880
+ return {"role": "assistant", "content": blocks}
881
+
882
+
883
+ def _openai_tool_message_to_block(msg: Message) -> dict[str, Any]:
884
+ """OpenAI role=tool message → Anthropic tool_result block.
885
+
886
+ OpenAI tool results are normally a flat string; we also accept a list
887
+ of text parts (multimodal tool output) and flatten those here.
888
+ """
889
+ content = msg.content
890
+ if isinstance(content, list):
891
+ text_parts: list[str] = []
892
+ for part in content:
893
+ if part.get("type") == "text":
894
+ text_parts.append(str(part.get("text", "")))
895
+ content_str = "\n".join(text_parts)
896
+ elif isinstance(content, str):
897
+ content_str = content
898
+ else:
899
+ content_str = ""
900
+ return {
901
+ "type": "tool_result",
902
+ "tool_use_id": msg.tool_call_id or "",
903
+ "content": content_str,
904
+ }
905
+
906
+
907
+ def _openai_tools_to_anthropic(
908
+ tools: list[dict[str, Any]] | None,
909
+ ) -> list[dict[str, Any]] | None:
910
+ """OpenAI tools array → Anthropic tools array.
911
+
912
+ OpenAI: {type: "function", function: {name, description, parameters}}
913
+ Anthropic: {name, description, input_schema}
914
+
915
+ Non-function tool types are skipped (Anthropic has no analog yet).
916
+ """
917
+ if not tools:
918
+ return None
919
+ out: list[dict[str, Any]] = []
920
+ for tool in tools:
921
+ if tool.get("type") != "function":
922
+ continue
923
+ fn = tool.get("function", {}) or {}
924
+ out.append(
925
+ {
926
+ "name": fn.get("name", ""),
927
+ "description": fn.get("description", ""),
928
+ "input_schema": fn.get("parameters") or {"type": "object", "properties": {}},
929
+ }
930
+ )
931
+ return out or None
932
+
933
+
934
+ def _openai_tool_choice_to_anthropic(tc: Any | None) -> dict[str, Any] | None:
935
+ """OpenAI tool_choice → Anthropic tool_choice."""
936
+ if tc is None:
937
+ return None
938
+ if isinstance(tc, str):
939
+ if tc == "auto":
940
+ return {"type": "auto"}
941
+ if tc == "none":
942
+ return {"type": "none"}
943
+ if tc == "required":
944
+ return {"type": "any"}
945
+ return None
946
+ if isinstance(tc, dict) and tc.get("type") == "function":
947
+ fn = tc.get("function", {}) or {}
948
+ return {"type": "tool", "name": fn.get("name", "")}
949
+ return None
950
+
951
+
952
+ def to_anthropic_request(chat_req: ChatRequest) -> AnthropicRequest:
953
+ """Internal ChatRequest → AnthropicRequest (reverse of to_chat_request).
954
+
955
+ Key transformations:
956
+ - role=system messages → top-level ``system`` field (joined with
957
+ newlines when multiple — OpenAI allows repeats, Anthropic takes
958
+ one string or block list).
959
+ - Consecutive role=tool messages → merged into a single user turn
960
+ with multiple ``tool_result`` blocks (Anthropic's canonical shape).
961
+ - tool_calls on assistant → ``tool_use`` content blocks.
962
+ - image_url parts → ``image`` blocks with base64 or url source.
963
+ - max_tokens is Anthropic-required; defaults to 4096 when omitted.
964
+
965
+ The returned request's ``model`` is a placeholder — the AnthropicAdapter
966
+ always overrides it with ``provider.config.model`` on the wire (same
967
+ routing rule as the OpenAI-compat adapter).
968
+ """
969
+ system_texts: list[str] = []
970
+ messages_out: list[dict[str, Any]] = []
971
+ pending_tool_results: list[dict[str, Any]] = []
972
+
973
+ def _flush_tool_results() -> None:
974
+ if pending_tool_results:
975
+ messages_out.append({"role": "user", "content": list(pending_tool_results)})
976
+ pending_tool_results.clear()
977
+
978
+ for msg in chat_req.messages:
979
+ role = msg.role
980
+ if role == "system":
981
+ _flush_tool_results()
982
+ content = msg.content
983
+ if isinstance(content, str):
984
+ if content:
985
+ system_texts.append(content)
986
+ elif isinstance(content, list):
987
+ parts: list[str] = []
988
+ for p in content:
989
+ if p.get("type") == "text":
990
+ parts.append(str(p.get("text", "")))
991
+ joined = "\n".join(parts)
992
+ if joined:
993
+ system_texts.append(joined)
994
+ continue
995
+ if role == "tool":
996
+ pending_tool_results.append(_openai_tool_message_to_block(msg))
997
+ continue
998
+ # Any other role flushes pending tool_results first.
999
+ _flush_tool_results()
1000
+ if role == "user":
1001
+ translated = _openai_user_content_to_anthropic(msg.content)
1002
+ # Skip empty user turns rather than send a block Anthropic will reject.
1003
+ if translated == "" or translated == []:
1004
+ continue
1005
+ messages_out.append({"role": "user", "content": translated})
1006
+ elif role == "assistant":
1007
+ messages_out.append(_openai_assistant_to_anthropic(msg))
1008
+ _flush_tool_results()
1009
+
1010
+ system_joined = "\n".join(s for s in system_texts if s) or None
1011
+
1012
+ tools_list = _openai_tools_to_anthropic(chat_req.tools)
1013
+ anth_tools = [AnthropicTool.model_validate(t) for t in tools_list] if tools_list else None
1014
+
1015
+ req = AnthropicRequest(
1016
+ # Placeholder — AnthropicAdapter._payload always overrides with
1017
+ # provider.config.model. We keep the client-supplied value for
1018
+ # diagnostic fidelity only.
1019
+ model=chat_req.model or "placeholder",
1020
+ max_tokens=chat_req.max_tokens or _DEFAULT_ANTHROPIC_MAX_TOKENS,
1021
+ messages=[AnthropicMessage.model_validate(m) for m in messages_out],
1022
+ system=system_joined,
1023
+ tools=anth_tools,
1024
+ tool_choice=_openai_tool_choice_to_anthropic(chat_req.tool_choice),
1025
+ temperature=chat_req.temperature,
1026
+ top_p=chat_req.top_p,
1027
+ stop_sequences=chat_req.stop,
1028
+ stream=chat_req.stream,
1029
+ )
1030
+ # Propagate CodeRouter routing hint.
1031
+ req.profile = chat_req.profile
1032
+ return req
1033
+
1034
+
1035
+ # ============================================================
1036
+ # Anthropic → internal (response direction, v0.4-A)
1037
+ # ============================================================
1038
+
1039
+
1040
+ def to_chat_response(resp: AnthropicResponse) -> ChatResponse:
1041
+ """AnthropicResponse → internal ChatResponse (reverse of to_anthropic_response).
1042
+
1043
+ Anthropic `content` may contain multiple text blocks (e.g. around tool_use
1044
+ interjections) — we concatenate them into one OpenAI `content` string
1045
+ since OpenAI's single-message shape doesn't model interleaving. tool_use
1046
+ blocks lift into the top-level `tool_calls` array.
1047
+ """
1048
+ text_parts: list[str] = []
1049
+ tool_calls: list[dict[str, Any]] = []
1050
+
1051
+ for block in resp.content:
1052
+ btype = block.get("type")
1053
+ if btype == "text":
1054
+ text_parts.append(str(block.get("text", "")))
1055
+ elif btype == "tool_use":
1056
+ tool_calls.append(
1057
+ {
1058
+ "id": block.get("id", ""),
1059
+ "type": "function",
1060
+ "function": {
1061
+ "name": block.get("name", ""),
1062
+ "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
1063
+ },
1064
+ }
1065
+ )
1066
+ # Unknown block types (thinking, etc.) are skipped silently.
1067
+
1068
+ joined_text = "".join(text_parts)
1069
+
1070
+ message: dict[str, Any] = {"role": "assistant"}
1071
+ # OpenAI spec: content may be null when tool_calls is populated.
1072
+ message["content"] = joined_text if joined_text else None
1073
+ if tool_calls:
1074
+ message["tool_calls"] = tool_calls
1075
+
1076
+ finish_reason = _REVERSE_FINISH_REASON_MAP.get(resp.stop_reason or "end_turn", "stop")
1077
+
1078
+ usage_in = resp.usage.input_tokens
1079
+ usage_out = resp.usage.output_tokens
1080
+ usage: dict[str, Any] = {
1081
+ "prompt_tokens": usage_in,
1082
+ "completion_tokens": usage_out,
1083
+ "total_tokens": usage_in + usage_out,
1084
+ }
1085
+
1086
+ return ChatResponse(
1087
+ id=resp.id,
1088
+ object="chat.completion",
1089
+ created=int(time.time()),
1090
+ model=resp.model,
1091
+ choices=[
1092
+ {
1093
+ "index": 0,
1094
+ "message": message,
1095
+ "finish_reason": finish_reason,
1096
+ }
1097
+ ],
1098
+ usage=usage,
1099
+ coderouter_provider=resp.coderouter_provider,
1100
+ )
1101
+
1102
+
1103
+ # ============================================================
1104
+ # Stream translation (Anthropic events → OpenAI chunks, v0.4-A)
1105
+ # ============================================================
1106
+
1107
+
1108
+ class _ReverseStreamState:
1109
+ """Bookkeeping for Anthropic events → OpenAI StreamChunk translation.
1110
+
1111
+ Mirrors _StreamState but in the opposite direction. We map Anthropic's
1112
+ per-block index space to OpenAI's flat tool_calls[].index space: only
1113
+ tool_use blocks get a mapping; text blocks are transparent (OpenAI's
1114
+ delta.content is non-indexed).
1115
+ """
1116
+
1117
+ def __init__(self) -> None:
1118
+ self.started: bool = False
1119
+ self.message_id: str = f"chatcmpl-{uuid.uuid4().hex[:16]}"
1120
+ self.model: str = "unknown"
1121
+ self.created: int = 0
1122
+ self.block_idx_to_tool_idx: dict[int, int] = {}
1123
+ self.next_tool_idx: int = 0
1124
+ self.stop_reason_anthropic: str | None = None
1125
+ self.usage_in: int = 0
1126
+ self.usage_out: int = 0
1127
+
1128
+
1129
+ def _make_chunk(
1130
+ state: _ReverseStreamState,
1131
+ delta: dict[str, Any],
1132
+ *,
1133
+ finish_reason: str | None = None,
1134
+ ) -> StreamChunk:
1135
+ return StreamChunk(
1136
+ id=state.message_id,
1137
+ created=state.created,
1138
+ model=state.model,
1139
+ choices=[
1140
+ {
1141
+ "index": 0,
1142
+ "delta": delta,
1143
+ "finish_reason": finish_reason,
1144
+ }
1145
+ ],
1146
+ )
1147
+
1148
+
1149
+ async def stream_anthropic_to_chat_chunks(
1150
+ events: AsyncIterator[AnthropicStreamEvent],
1151
+ *,
1152
+ provider_name: str = "anthropic",
1153
+ ) -> AsyncIterator[StreamChunk]:
1154
+ """Translate Anthropic SSE events → OpenAI StreamChunk async iterator.
1155
+
1156
+ Emits, in order:
1157
+ 1. First chunk with ``delta.role = "assistant"`` (OpenAI convention).
1158
+ 2. For each text_delta: chunk with ``delta.content``.
1159
+ 3. For each tool_use block_start: chunk with
1160
+ ``delta.tool_calls[].function.name`` (and empty arguments).
1161
+ 4. For each input_json_delta: chunk with
1162
+ ``delta.tool_calls[].function.arguments``.
1163
+ 5. Final chunk with empty delta and ``finish_reason`` set.
1164
+ 6. Usage chunk (``choices: []``) mirroring OpenAI's
1165
+ ``stream_options.include_usage=true`` pattern.
1166
+
1167
+ Anthropic ``event: error`` is converted to ``AdapterError(retryable=False)``
1168
+ so the engine's mid-stream guard can re-raise it as ``MidStreamError``.
1169
+
1170
+ ``provider_name`` is attached to any raised AdapterError; the
1171
+ FallbackEngine overrides this with the actual adapter name on
1172
+ mid-stream conversion, but having a sensible default keeps tests
1173
+ and direct callers sane.
1174
+ """
1175
+ state = _ReverseStreamState()
1176
+
1177
+ async for ev in events:
1178
+ etype = ev.type
1179
+ data = ev.data or {}
1180
+
1181
+ if etype == "message_start":
1182
+ msg = data.get("message", {}) or {}
1183
+ if msg.get("id"):
1184
+ state.message_id = msg["id"]
1185
+ if msg.get("model"):
1186
+ state.model = msg["model"]
1187
+ state.created = int(time.time())
1188
+ usage = msg.get("usage", {}) or {}
1189
+ state.usage_in = int(usage.get("input_tokens", 0) or 0)
1190
+ state.started = True
1191
+ # Initial role=assistant chunk (OpenAI clients expect this).
1192
+ yield _make_chunk(state, {"role": "assistant", "content": ""})
1193
+ continue
1194
+
1195
+ if etype == "content_block_start":
1196
+ idx = int(data.get("index", 0) or 0)
1197
+ block = data.get("content_block", {}) or {}
1198
+ if block.get("type") == "tool_use":
1199
+ tool_idx = state.next_tool_idx
1200
+ state.next_tool_idx += 1
1201
+ state.block_idx_to_tool_idx[idx] = tool_idx
1202
+ yield _make_chunk(
1203
+ state,
1204
+ {
1205
+ "tool_calls": [
1206
+ {
1207
+ "index": tool_idx,
1208
+ "id": block.get("id", ""),
1209
+ "type": "function",
1210
+ "function": {
1211
+ "name": block.get("name", ""),
1212
+ "arguments": "",
1213
+ },
1214
+ }
1215
+ ]
1216
+ },
1217
+ )
1218
+ # text blocks: no emit — the content_block_delta will carry text.
1219
+ continue
1220
+
1221
+ if etype == "content_block_delta":
1222
+ idx = int(data.get("index", 0) or 0)
1223
+ delta = data.get("delta", {}) or {}
1224
+ dtype = delta.get("type")
1225
+ if dtype == "text_delta":
1226
+ text = delta.get("text", "")
1227
+ if text:
1228
+ yield _make_chunk(state, {"content": text})
1229
+ elif dtype == "input_json_delta":
1230
+ partial = delta.get("partial_json", "")
1231
+ if partial and idx in state.block_idx_to_tool_idx:
1232
+ tool_idx = state.block_idx_to_tool_idx[idx]
1233
+ yield _make_chunk(
1234
+ state,
1235
+ {
1236
+ "tool_calls": [
1237
+ {
1238
+ "index": tool_idx,
1239
+ "function": {"arguments": partial},
1240
+ }
1241
+ ]
1242
+ },
1243
+ )
1244
+ continue
1245
+
1246
+ if etype == "content_block_stop":
1247
+ # OpenAI has no per-block stop — no emit.
1248
+ continue
1249
+
1250
+ if etype == "message_delta":
1251
+ delta = data.get("delta", {}) or {}
1252
+ sr = delta.get("stop_reason")
1253
+ if sr:
1254
+ state.stop_reason_anthropic = sr
1255
+ usage = data.get("usage", {}) or {}
1256
+ if isinstance(usage.get("output_tokens"), int):
1257
+ state.usage_out = int(usage["output_tokens"])
1258
+ if isinstance(usage.get("input_tokens"), int):
1259
+ state.usage_in = int(usage["input_tokens"])
1260
+ continue
1261
+
1262
+ if etype == "message_stop":
1263
+ finish = _REVERSE_FINISH_REASON_MAP.get(
1264
+ state.stop_reason_anthropic or "end_turn", "stop"
1265
+ )
1266
+ yield _make_chunk(state, {}, finish_reason=finish)
1267
+ # Final usage chunk (no choices) — parallels OpenAI's
1268
+ # stream_options.include_usage=true trailing chunk.
1269
+ yield StreamChunk(
1270
+ id=state.message_id,
1271
+ created=state.created,
1272
+ model=state.model,
1273
+ choices=[],
1274
+ usage={
1275
+ "prompt_tokens": state.usage_in,
1276
+ "completion_tokens": state.usage_out,
1277
+ "total_tokens": state.usage_in + state.usage_out,
1278
+ },
1279
+ )
1280
+ return
1281
+
1282
+ if etype == "error":
1283
+ err = data.get("error", {}) or {}
1284
+ msg_text = err.get("message") or err.get("type") or "anthropic error event"
1285
+ raise AdapterError(
1286
+ f"upstream Anthropic error event: {msg_text}",
1287
+ provider=provider_name,
1288
+ retryable=False,
1289
+ )
1290
+
1291
+ # Unknown event types are skipped silently (forward-compat).