agentblaster 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. agentblaster/__init__.py +3 -0
  2. agentblaster/adapters.py +1435 -0
  3. agentblaster/agent_profiles.py +420 -0
  4. agentblaster/audit.py +27 -0
  5. agentblaster/benchmark_kit.py +356 -0
  6. agentblaster/bundle.py +692 -0
  7. agentblaster/campaign.py +1031 -0
  8. agentblaster/campaign_preflight.py +647 -0
  9. agentblaster/capabilities.py +270 -0
  10. agentblaster/claim_readiness.py +3948 -0
  11. agentblaster/cleanup.py +226 -0
  12. agentblaster/cli.py +4202 -0
  13. agentblaster/compare.py +423 -0
  14. agentblaster/config.py +62 -0
  15. agentblaster/constants.py +8 -0
  16. agentblaster/contract_check.py +919 -0
  17. agentblaster/costs.py +74 -0
  18. agentblaster/dashboard.py +5974 -0
  19. agentblaster/engine_advisory.py +1045 -0
  20. agentblaster/engine_onboarding.py +224 -0
  21. agentblaster/engine_targets.py +545 -0
  22. agentblaster/environment.py +284 -0
  23. agentblaster/errors.py +21 -0
  24. agentblaster/evidence.py +188 -0
  25. agentblaster/evidence_index.py +1865 -0
  26. agentblaster/experiment.py +200 -0
  27. agentblaster/exports.py +158 -0
  28. agentblaster/failures.py +70 -0
  29. agentblaster/fixtures.py +775 -0
  30. agentblaster/harness.py +1254 -0
  31. agentblaster/implementation_status.py +719 -0
  32. agentblaster/integrity.py +161 -0
  33. agentblaster/launch_recipes.py +295 -0
  34. agentblaster/lcp.py +107 -0
  35. agentblaster/matrix.py +101 -0
  36. agentblaster/matrix_gate.py +565 -0
  37. agentblaster/matrix_pressure.py +187 -0
  38. agentblaster/matrix_saturation.py +601 -0
  39. agentblaster/mcp.py +187 -0
  40. agentblaster/metric_coverage.py +552 -0
  41. agentblaster/mock_provider.py +485 -0
  42. agentblaster/model_catalog.py +153 -0
  43. agentblaster/models.py +531 -0
  44. agentblaster/observability.py +110 -0
  45. agentblaster/planning.py +199 -0
  46. agentblaster/policy.py +635 -0
  47. agentblaster/presets.py +219 -0
  48. agentblaster/prompt_footprint.py +245 -0
  49. agentblaster/protocol_repair.py +431 -0
  50. agentblaster/provider_audit.py +210 -0
  51. agentblaster/publication_brief.py +893 -0
  52. agentblaster/quality.py +1142 -0
  53. agentblaster/rate_limits.py +74 -0
  54. agentblaster/readiness.py +241 -0
  55. agentblaster/redaction.py +58 -0
  56. agentblaster/redaction_scan.py +247 -0
  57. agentblaster/release.py +440 -0
  58. agentblaster/release_qualification.py +2248 -0
  59. agentblaster/remote_onboarding.py +308 -0
  60. agentblaster/reports.py +2245 -0
  61. agentblaster/runner.py +1677 -0
  62. agentblaster/schema_registry.py +1151 -0
  63. agentblaster/secrets.py +274 -0
  64. agentblaster/security_posture.py +492 -0
  65. agentblaster/skills.py +67 -0
  66. agentblaster/stress_matrix.py +113 -0
  67. agentblaster/suite_audit.py +259 -0
  68. agentblaster/suite_calibration.py +171 -0
  69. agentblaster/suites.py +805 -0
  70. agentblaster/telemetry.py +947 -0
  71. agentblaster/telemetry_audit.py +300 -0
  72. agentblaster/toolsim.py +193 -0
  73. agentblaster/workflow_readiness.py +570 -0
  74. agentblaster/workflow_surfaces.py +292 -0
  75. agentblaster-0.1.0.dist-info/METADATA +250 -0
  76. agentblaster-0.1.0.dist-info/RECORD +78 -0
  77. agentblaster-0.1.0.dist-info/WHEEL +4 -0
  78. agentblaster-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1435 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Mapping
5
+ from time import perf_counter
6
+ from typing import Any
7
+
8
+ import httpx
9
+
10
+ from agentblaster.constants import SMOKE_SENTINEL, SMOKE_SENTINEL_MAX_TOKENS, SMOKE_SENTINEL_PROMPT, SMOKE_SENTINEL_SYSTEM_PROMPT
11
+ from agentblaster.errors import AdapterError
12
+ from agentblaster.models import AdapterResponse, ApiContract, BenchmarkCase, ProbeResult, ProviderConfig, ToolCallRecord
13
+ from agentblaster.redaction import redact_value
14
+ from agentblaster.secrets import SecretResolver
15
+
16
+
17
+ SAFE_RESPONSE_HEADERS = {
18
+ "request-id",
19
+ "x-request-id",
20
+ "openai-request-id",
21
+ "anthropic-request-id",
22
+ "x-ratelimit-limit-requests",
23
+ "x-ratelimit-remaining-requests",
24
+ "x-ratelimit-reset-requests",
25
+ "x-ratelimit-limit-tokens",
26
+ "x-ratelimit-remaining-tokens",
27
+ "x-ratelimit-reset-tokens",
28
+ "retry-after",
29
+ }
30
+
31
+
32
+ class ProviderAdapter:
33
+ adapter_name = "provider"
34
+ adapter_version = "agentblaster-adapter-v1"
35
+
36
+ def __init__(
37
+ self,
38
+ provider: ProviderConfig,
39
+ *,
40
+ secrets: SecretResolver | None = None,
41
+ client: httpx.Client | None = None,
42
+ timeout: float = 10.0,
43
+ ) -> None:
44
+ self.provider = provider
45
+ self.secrets = secrets or SecretResolver()
46
+ self.client = client or httpx.Client(timeout=timeout, verify=httpx_verify_config(provider))
47
+
48
+ def probe(self) -> ProbeResult:
49
+ raise NotImplementedError
50
+
51
+ def smoke_chat(self, model: str) -> AdapterResponse:
52
+ case = BenchmarkCase(
53
+ id="protocol-smoke-chat",
54
+ title="Protocol smoke chat",
55
+ system_prompt=SMOKE_SENTINEL_SYSTEM_PROMPT,
56
+ prompt=SMOKE_SENTINEL_PROMPT,
57
+ expected_substring=SMOKE_SENTINEL,
58
+ max_tokens=SMOKE_SENTINEL_MAX_TOKENS,
59
+ )
60
+ return self.chat_completion(model, case)
61
+
62
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
63
+ raise NotImplementedError
64
+
65
+ def _headers(self) -> dict[str, str]:
66
+ headers = dict(self.provider.headers)
67
+ api_key = self.secrets.resolve(self.provider.api_key_ref)
68
+ if api_key:
69
+ headers.update(self._auth_headers(api_key))
70
+ return headers
71
+
72
+ def _auth_headers(self, api_key: str) -> dict[str, str]:
73
+ raise NotImplementedError
74
+
75
+
76
+ def httpx_verify_config(provider: ProviderConfig) -> bool | str:
77
+ if not provider.tls_verify:
78
+ return False
79
+ if provider.ca_bundle is not None:
80
+ return str(provider.ca_bundle)
81
+ return True
82
+
83
+
84
+ class OpenAICompatibleAdapter(ProviderAdapter):
85
+ adapter_name = "openai-chat-completions"
86
+
87
+ def _auth_headers(self, api_key: str) -> dict[str, str]:
88
+ return {"Authorization": f"Bearer {api_key}"}
89
+
90
+ def probe(self) -> ProbeResult:
91
+ url = str(self.provider.base_url).rstrip("/") + "/models"
92
+ try:
93
+ response = self.client.get(url, headers=self._headers())
94
+ except httpx.HTTPError as exc:
95
+ raise AdapterError(f"OpenAI probe failed for {self.provider.name}: {exc}") from exc
96
+
97
+ models: list[str] = []
98
+ raw = response_json_or_metadata(response)
99
+ data = raw.get("data", [])
100
+ if isinstance(data, list):
101
+ models = [str(item.get("id")) for item in data if isinstance(item, Mapping) and item.get("id")]
102
+
103
+ return ProbeResult(
104
+ provider=self.provider.name,
105
+ contract=self.provider.contract,
106
+ ok=response.is_success,
107
+ status_code=response.status_code,
108
+ message="ok" if response.is_success else _redacted_response_text(response),
109
+ models=models,
110
+ raw=raw,
111
+ )
112
+
113
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
114
+ url = str(self.provider.base_url).rstrip("/") + "/chat/completions"
115
+ payload = {
116
+ "model": model,
117
+ "messages": _openai_messages_from_case(case),
118
+ "temperature": case.temperature,
119
+ "max_tokens": case.max_tokens,
120
+ }
121
+ if case.streaming:
122
+ payload["stream"] = True
123
+ if case.response_format:
124
+ payload["response_format"] = case.response_format
125
+ if case.tools:
126
+ payload["tools"] = case.tools
127
+ if case.tool_choice:
128
+ payload["tool_choice"] = case.tool_choice
129
+ started = perf_counter()
130
+ try:
131
+ if case.streaming:
132
+ return self._chat_completion_stream(url, payload, case, started)
133
+ response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
134
+ except httpx.HTTPError as exc:
135
+ raise AdapterError(f"OpenAI smoke request failed for {self.provider.name}: {exc}") from exc
136
+ latency_ms = (perf_counter() - started) * 1000
137
+
138
+ raw = response_json_or_metadata(response)
139
+
140
+ text = ""
141
+ choices = raw.get("choices", [])
142
+ if choices and isinstance(choices[0], Mapping):
143
+ message = choices[0].get("message", {})
144
+ if isinstance(message, Mapping):
145
+ text = str(message.get("content") or "")
146
+ tool_calls = extract_openai_tool_calls(raw)
147
+
148
+ return AdapterResponse(
149
+ provider=self.provider.name,
150
+ contract=ApiContract.OPENAI,
151
+ status_code=response.status_code,
152
+ latency_ms=latency_ms,
153
+ raw=raw,
154
+ text=text,
155
+ tool_names=_tool_names(tool_calls),
156
+ tool_calls=tool_calls,
157
+ )
158
+
159
+ def _chat_completion_stream(
160
+ self,
161
+ url: str,
162
+ payload: dict[str, Any],
163
+ case: BenchmarkCase,
164
+ started: float,
165
+ ) -> AdapterResponse:
166
+ text_parts: list[str] = []
167
+ raw_events: list[dict[str, Any]] = []
168
+ status_code = 0
169
+ ttft_ms = None
170
+ canceled = False
171
+ cancellation_latency_ms = None
172
+ tool_call_fragments: dict[int, dict[str, Any]] = {}
173
+ try:
174
+ with self.client.stream(
175
+ "POST",
176
+ url,
177
+ headers=self._headers(),
178
+ json=payload,
179
+ timeout=case.timeout_seconds,
180
+ ) as response:
181
+ status_code = response.status_code
182
+ for line in response.iter_lines():
183
+ if not line or not line.startswith("data:"):
184
+ continue
185
+ data = line.removeprefix("data:").strip()
186
+ if data == "[DONE]":
187
+ break
188
+ try:
189
+ event = json.loads(data)
190
+ except json.JSONDecodeError:
191
+ raw_events.append({"malformed": data})
192
+ continue
193
+ raw_events.append(event)
194
+ if ttft_ms is None and _openai_stream_event_has_output(event):
195
+ ttft_ms = (perf_counter() - started) * 1000
196
+ _accumulate_openai_stream_event(event, text_parts, tool_call_fragments)
197
+ cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
198
+ if cancellation_elapsed is not None:
199
+ canceled = True
200
+ cancellation_latency_ms = cancellation_elapsed
201
+ break
202
+ except httpx.HTTPError as exc:
203
+ raise AdapterError(f"OpenAI streaming request failed for {self.provider.name}: {exc}") from exc
204
+ latency_ms = (perf_counter() - started) * 1000
205
+ raw = {
206
+ "stream": True,
207
+ "events": raw_events,
208
+ "agentblaster_http": _safe_http_metadata(response),
209
+ "agentblaster_cancelled": canceled,
210
+ "cancel_after_ms": case.cancel_after_ms,
211
+ "cancellation_latency_ms": cancellation_latency_ms,
212
+ }
213
+ tool_calls = _stream_tool_calls(tool_call_fragments)
214
+ return AdapterResponse(
215
+ provider=self.provider.name,
216
+ contract=ApiContract.OPENAI,
217
+ status_code=status_code,
218
+ latency_ms=latency_ms,
219
+ raw=raw,
220
+ text="".join(text_parts),
221
+ tool_names=_tool_names(tool_calls),
222
+ tool_calls=tool_calls,
223
+ streaming=True,
224
+ ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
225
+ canceled=canceled,
226
+ cancellation_latency_ms=cancellation_latency_ms,
227
+ )
228
+
229
+
230
+ class OpenAIResponsesAdapter(OpenAICompatibleAdapter):
231
+ """Adapter for OpenAI Responses-compatible `/responses` endpoints."""
232
+
233
+ adapter_name = "openai-responses"
234
+
235
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
236
+ url = str(self.provider.base_url).rstrip("/") + "/responses"
237
+ payload: dict[str, Any] = {
238
+ "model": model,
239
+ "input": _openai_responses_input_from_case(case),
240
+ "temperature": case.temperature,
241
+ "max_output_tokens": case.max_tokens,
242
+ }
243
+ if case.system_prompt:
244
+ payload["instructions"] = case.system_prompt
245
+ if case.previous_response_id:
246
+ payload["previous_response_id"] = case.previous_response_id
247
+ if case.max_tool_calls:
248
+ payload["max_tool_calls"] = case.max_tool_calls
249
+ if case.streaming:
250
+ payload["stream"] = True
251
+ if case.response_format:
252
+ payload["text"] = {"format": case.response_format}
253
+ if case.tools:
254
+ payload["tools"] = [_openai_chat_tool_to_responses_tool(tool) for tool in case.tools]
255
+ if case.tool_choice:
256
+ payload["tool_choice"] = _openai_chat_tool_choice_to_responses_tool_choice(case.tool_choice)
257
+
258
+ started = perf_counter()
259
+ try:
260
+ if case.streaming:
261
+ return self._responses_stream(url, payload, case, started)
262
+ response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
263
+ except httpx.HTTPError as exc:
264
+ raise AdapterError(f"OpenAI Responses request failed for {self.provider.name}: {exc}") from exc
265
+ latency_ms = (perf_counter() - started) * 1000
266
+
267
+ raw = response_json_or_metadata(response)
268
+ tool_calls = extract_openai_responses_tool_calls(raw)
269
+
270
+ return AdapterResponse(
271
+ provider=self.provider.name,
272
+ contract=ApiContract.OPENAI_RESPONSES,
273
+ status_code=response.status_code,
274
+ latency_ms=latency_ms,
275
+ raw=raw,
276
+ text=extract_openai_responses_text(raw),
277
+ tool_names=_tool_names(tool_calls),
278
+ tool_calls=tool_calls,
279
+ )
280
+
281
+ def _responses_stream(
282
+ self,
283
+ url: str,
284
+ payload: dict[str, Any],
285
+ case: BenchmarkCase,
286
+ started: float,
287
+ ) -> AdapterResponse:
288
+ text_parts: list[str] = []
289
+ raw_events: list[dict[str, Any]] = []
290
+ tool_call_fragments: dict[str, dict[str, Any]] = {}
291
+ usage: dict[str, Any] = {}
292
+ status = None
293
+ status_code = 0
294
+ ttft_ms = None
295
+ canceled = False
296
+ cancellation_latency_ms = None
297
+ try:
298
+ with self.client.stream(
299
+ "POST",
300
+ url,
301
+ headers=self._headers(),
302
+ json=payload,
303
+ timeout=case.timeout_seconds,
304
+ ) as response:
305
+ status_code = response.status_code
306
+ for line in response.iter_lines():
307
+ if not line or not line.startswith("data:"):
308
+ continue
309
+ data = line.removeprefix("data:").strip()
310
+ if data == "[DONE]":
311
+ break
312
+ try:
313
+ event = json.loads(data)
314
+ except json.JSONDecodeError:
315
+ raw_events.append({"malformed": data})
316
+ continue
317
+ raw_events.append(event)
318
+ if ttft_ms is None and _openai_responses_stream_event_has_output(event):
319
+ ttft_ms = (perf_counter() - started) * 1000
320
+ _accumulate_openai_responses_stream_event(event, text_parts, tool_call_fragments, usage)
321
+ response_status = _openai_responses_stream_status(event)
322
+ if response_status:
323
+ status = response_status
324
+ cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
325
+ if cancellation_elapsed is not None:
326
+ canceled = True
327
+ cancellation_latency_ms = cancellation_elapsed
328
+ break
329
+ except httpx.HTTPError as exc:
330
+ raise AdapterError(f"OpenAI Responses streaming request failed for {self.provider.name}: {exc}") from exc
331
+
332
+ latency_ms = (perf_counter() - started) * 1000
333
+ raw = {
334
+ "stream": True,
335
+ "events": raw_events,
336
+ "usage": usage,
337
+ "status": status,
338
+ "agentblaster_http": _safe_http_metadata(response),
339
+ "agentblaster_cancelled": canceled,
340
+ "cancel_after_ms": case.cancel_after_ms,
341
+ "cancellation_latency_ms": cancellation_latency_ms,
342
+ }
343
+ tool_calls = _openai_responses_stream_tool_calls(tool_call_fragments)
344
+ return AdapterResponse(
345
+ provider=self.provider.name,
346
+ contract=ApiContract.OPENAI_RESPONSES,
347
+ status_code=status_code,
348
+ latency_ms=latency_ms,
349
+ raw=raw,
350
+ text="".join(text_parts),
351
+ tool_names=_tool_names(tool_calls),
352
+ tool_calls=tool_calls,
353
+ streaming=True,
354
+ ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
355
+ canceled=canceled,
356
+ cancellation_latency_ms=cancellation_latency_ms,
357
+ )
358
+
359
+
360
+ class AnthropicCompatibleAdapter(ProviderAdapter):
361
+ adapter_name = "anthropic-messages"
362
+
363
+ def _auth_headers(self, api_key: str) -> dict[str, str]:
364
+ return {
365
+ "x-api-key": api_key,
366
+ "anthropic-version": self.provider.headers.get("anthropic-version", "2023-06-01"),
367
+ }
368
+
369
+ def probe(self) -> ProbeResult:
370
+ url = str(self.provider.base_url).rstrip("/") + "/models"
371
+ try:
372
+ response = self.client.get(url, headers=self._headers())
373
+ except httpx.HTTPError as exc:
374
+ raise AdapterError(f"Anthropic probe failed for {self.provider.name}: {exc}") from exc
375
+
376
+ models: list[str] = []
377
+ raw = response_json_or_metadata(response)
378
+ data = raw.get("data", [])
379
+ if isinstance(data, list):
380
+ models = [str(item.get("id")) for item in data if isinstance(item, Mapping) and item.get("id")]
381
+
382
+ return ProbeResult(
383
+ provider=self.provider.name,
384
+ contract=ApiContract.ANTHROPIC,
385
+ ok=response.is_success,
386
+ status_code=response.status_code,
387
+ message="ok" if response.is_success else _redacted_response_text(response),
388
+ models=models,
389
+ raw=raw,
390
+ )
391
+
392
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
393
+ url = str(self.provider.base_url).rstrip("/") + "/messages"
394
+ messages, system_prompt = _anthropic_messages_and_system_from_case(case)
395
+ payload = {
396
+ "model": model,
397
+ "max_tokens": case.max_tokens,
398
+ "temperature": case.temperature,
399
+ "messages": messages,
400
+ }
401
+ if case.streaming:
402
+ payload["stream"] = True
403
+ if system_prompt:
404
+ payload["system"] = system_prompt
405
+ if case.tools:
406
+ payload["tools"] = _anthropic_tools_from_case(case)
407
+ if case.tool_choice:
408
+ payload["tool_choice"] = _openai_tool_choice_to_anthropic(case.tool_choice)
409
+ started = perf_counter()
410
+ try:
411
+ if case.streaming:
412
+ return self._chat_completion_stream(url, payload, case, started)
413
+ response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
414
+ except httpx.HTTPError as exc:
415
+ raise AdapterError(f"Anthropic smoke request failed for {self.provider.name}: {exc}") from exc
416
+ latency_ms = (perf_counter() - started) * 1000
417
+
418
+ raw = response_json_or_metadata(response)
419
+
420
+ text_parts: list[str] = []
421
+ content = raw.get("content", [])
422
+ if isinstance(content, list):
423
+ for block in content:
424
+ if isinstance(block, Mapping) and block.get("type") == "text":
425
+ text_parts.append(str(block.get("text") or ""))
426
+ tool_calls = extract_anthropic_tool_calls(raw)
427
+
428
+ return AdapterResponse(
429
+ provider=self.provider.name,
430
+ contract=ApiContract.ANTHROPIC,
431
+ status_code=response.status_code,
432
+ latency_ms=latency_ms,
433
+ raw=raw,
434
+ text="".join(text_parts),
435
+ tool_names=_tool_names(tool_calls),
436
+ tool_calls=tool_calls,
437
+ )
438
+
439
+ def _chat_completion_stream(
440
+ self,
441
+ url: str,
442
+ payload: dict[str, Any],
443
+ case: BenchmarkCase,
444
+ started: float,
445
+ ) -> AdapterResponse:
446
+ text_parts: list[str] = []
447
+ raw_events: list[dict[str, Any]] = []
448
+ tool_call_fragments: dict[int, dict[str, Any]] = {}
449
+ usage: dict[str, Any] = {}
450
+ stop_reason = None
451
+ status_code = 0
452
+ ttft_ms = None
453
+ canceled = False
454
+ cancellation_latency_ms = None
455
+ try:
456
+ with self.client.stream(
457
+ "POST",
458
+ url,
459
+ headers=self._headers(),
460
+ json=payload,
461
+ timeout=case.timeout_seconds,
462
+ ) as response:
463
+ status_code = response.status_code
464
+ for line in response.iter_lines():
465
+ if not line or not line.startswith("data:"):
466
+ continue
467
+ data = line.removeprefix("data:").strip()
468
+ try:
469
+ event = json.loads(data)
470
+ except json.JSONDecodeError:
471
+ raw_events.append({"malformed": data})
472
+ continue
473
+ raw_events.append(event)
474
+ if ttft_ms is None and _anthropic_stream_event_has_output(event):
475
+ ttft_ms = (perf_counter() - started) * 1000
476
+ _accumulate_anthropic_stream_event(event, text_parts, tool_call_fragments, usage)
477
+ event_stop_reason = _anthropic_stream_stop_reason(event)
478
+ if event_stop_reason:
479
+ stop_reason = event_stop_reason
480
+ cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
481
+ if cancellation_elapsed is not None:
482
+ canceled = True
483
+ cancellation_latency_ms = cancellation_elapsed
484
+ break
485
+ except httpx.HTTPError as exc:
486
+ raise AdapterError(f"Anthropic streaming request failed for {self.provider.name}: {exc}") from exc
487
+
488
+ latency_ms = (perf_counter() - started) * 1000
489
+ raw = {
490
+ "stream": True,
491
+ "events": raw_events,
492
+ "usage": usage,
493
+ "stop_reason": stop_reason,
494
+ "agentblaster_http": _safe_http_metadata(response),
495
+ "agentblaster_cancelled": canceled,
496
+ "cancel_after_ms": case.cancel_after_ms,
497
+ "cancellation_latency_ms": cancellation_latency_ms,
498
+ }
499
+ tool_calls = _anthropic_stream_tool_calls(tool_call_fragments)
500
+ return AdapterResponse(
501
+ provider=self.provider.name,
502
+ contract=ApiContract.ANTHROPIC,
503
+ status_code=status_code,
504
+ latency_ms=latency_ms,
505
+ raw=raw,
506
+ text="".join(text_parts),
507
+ tool_names=_tool_names(tool_calls),
508
+ tool_calls=tool_calls,
509
+ streaming=True,
510
+ ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
511
+ canceled=canceled,
512
+ cancellation_latency_ms=cancellation_latency_ms,
513
+ )
514
+
515
+
516
+ class OllamaNativeAdapter(ProviderAdapter):
517
+ """Adapter for Ollama's native `/api/*` contract."""
518
+
519
+ adapter_name = "ollama-native"
520
+
521
+ def _auth_headers(self, api_key: str) -> dict[str, str]:
522
+ return {"Authorization": f"Bearer {api_key}"}
523
+
524
+ def probe(self) -> ProbeResult:
525
+ url = str(self.provider.base_url).rstrip("/") + "/api/tags"
526
+ try:
527
+ response = self.client.get(url, headers=self._headers())
528
+ except httpx.HTTPError as exc:
529
+ raise AdapterError(f"Ollama native probe failed for {self.provider.name}: {exc}") from exc
530
+
531
+ models: list[str] = []
532
+ raw = response_json_or_metadata(response)
533
+ data = raw.get("models", [])
534
+ if isinstance(data, list):
535
+ models = [
536
+ str(item.get("name") or item.get("model"))
537
+ for item in data
538
+ if isinstance(item, Mapping) and (item.get("name") or item.get("model"))
539
+ ]
540
+
541
+ return ProbeResult(
542
+ provider=self.provider.name,
543
+ contract=ApiContract.NATIVE,
544
+ ok=response.is_success,
545
+ status_code=response.status_code,
546
+ message="ok" if response.is_success else _redacted_response_text(response),
547
+ models=models,
548
+ raw=raw,
549
+ )
550
+
551
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
552
+ url = str(self.provider.base_url).rstrip("/") + "/api/chat"
553
+ payload: dict[str, Any] = {
554
+ "model": model,
555
+ "messages": _openai_messages_from_case(case),
556
+ "stream": False,
557
+ "options": {
558
+ "temperature": case.temperature,
559
+ "num_predict": case.max_tokens,
560
+ },
561
+ }
562
+ if case.tools:
563
+ payload["tools"] = case.tools
564
+ started = perf_counter()
565
+ try:
566
+ response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
567
+ except httpx.HTTPError as exc:
568
+ raise AdapterError(f"Ollama native chat request failed for {self.provider.name}: {exc}") from exc
569
+ latency_ms = (perf_counter() - started) * 1000
570
+
571
+ raw = response_json_or_metadata(response)
572
+
573
+ message = raw.get("message", {})
574
+ text = ""
575
+ if isinstance(message, Mapping):
576
+ text = str(message.get("content") or "")
577
+
578
+ tool_calls = extract_ollama_tool_calls(raw)
579
+ return AdapterResponse(
580
+ provider=self.provider.name,
581
+ contract=ApiContract.NATIVE,
582
+ status_code=response.status_code,
583
+ latency_ms=latency_ms,
584
+ raw=raw,
585
+ text=text,
586
+ tool_names=_tool_names(tool_calls),
587
+ tool_calls=tool_calls,
588
+ )
589
+
590
+
591
+ class LMStudioNativeAdapter(ProviderAdapter):
592
+ """Adapter for LM Studio's native `/api/v1/*` REST contract."""
593
+
594
+ adapter_name = "lm-studio-native"
595
+
596
+ def _auth_headers(self, api_key: str) -> dict[str, str]:
597
+ return {"Authorization": f"Bearer {api_key}"}
598
+
599
+ def probe(self) -> ProbeResult:
600
+ url = self._api_v1_base_url() + "/models"
601
+ try:
602
+ response = self.client.get(url, headers=self._headers())
603
+ except httpx.HTTPError as exc:
604
+ raise AdapterError(f"LM Studio native probe failed for {self.provider.name}: {exc}") from exc
605
+
606
+ models: list[str] = []
607
+ raw = response_json_or_metadata(response)
608
+ data = raw.get("models", raw.get("data", []))
609
+ if isinstance(data, list):
610
+ models = [
611
+ str(item.get("key") or item.get("id") or item.get("model"))
612
+ for item in data
613
+ if isinstance(item, Mapping) and (item.get("key") or item.get("id") or item.get("model"))
614
+ ]
615
+
616
+ return ProbeResult(
617
+ provider=self.provider.name,
618
+ contract=ApiContract.NATIVE,
619
+ ok=response.is_success,
620
+ status_code=response.status_code,
621
+ message="ok" if response.is_success else _redacted_response_text(response),
622
+ models=models,
623
+ raw=raw,
624
+ )
625
+
626
+ def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
627
+ url = self._api_v1_base_url() + "/chat"
628
+ payload: dict[str, Any] = {
629
+ "model": model,
630
+ "input": case.prompt,
631
+ "stream": False,
632
+ "store": False,
633
+ "temperature": case.temperature,
634
+ "max_output_tokens": case.max_tokens,
635
+ }
636
+ if case.system_prompt:
637
+ payload["system_prompt"] = case.system_prompt
638
+
639
+ started = perf_counter()
640
+ try:
641
+ response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
642
+ except httpx.HTTPError as exc:
643
+ raise AdapterError(f"LM Studio native chat request failed for {self.provider.name}: {exc}") from exc
644
+ latency_ms = (perf_counter() - started) * 1000
645
+
646
+ raw = response_json_or_metadata(response)
647
+
648
+ tool_calls = extract_lmstudio_tool_calls(raw)
649
+ return AdapterResponse(
650
+ provider=self.provider.name,
651
+ contract=ApiContract.NATIVE,
652
+ status_code=response.status_code,
653
+ latency_ms=latency_ms,
654
+ raw=raw,
655
+ text=extract_lmstudio_text(raw),
656
+ tool_names=_tool_names(tool_calls),
657
+ tool_calls=tool_calls,
658
+ )
659
+
660
+ def _api_v1_base_url(self) -> str:
661
+ base_url = str(self.provider.base_url).rstrip("/")
662
+ if base_url.endswith("/api/v1"):
663
+ return base_url
664
+ return base_url + "/api/v1"
665
+
666
+
667
+ def adapter_for(
668
+ provider: ProviderConfig,
669
+ *,
670
+ secrets: SecretResolver | None = None,
671
+ client: httpx.Client | None = None,
672
+ timeout: float = 10.0,
673
+ ) -> ProviderAdapter:
674
+ if provider.contract is ApiContract.OPENAI:
675
+ return OpenAICompatibleAdapter(provider, secrets=secrets, client=client, timeout=timeout)
676
+ if provider.contract is ApiContract.OPENAI_RESPONSES:
677
+ return OpenAIResponsesAdapter(provider, secrets=secrets, client=client, timeout=timeout)
678
+ if provider.contract is ApiContract.ANTHROPIC:
679
+ return AnthropicCompatibleAdapter(provider, secrets=secrets, client=client, timeout=timeout)
680
+ if provider.contract is ApiContract.NATIVE and provider.native_adapter == "ollama":
681
+ return OllamaNativeAdapter(provider, secrets=secrets, client=client, timeout=timeout)
682
+ if provider.contract is ApiContract.NATIVE and provider.native_adapter == "lm-studio":
683
+ return LMStudioNativeAdapter(provider, secrets=secrets, client=client, timeout=timeout)
684
+ raise AdapterError(f"no generic adapter exists for native provider: {provider.name}")
685
+
686
+
687
+ def response_json_or_metadata(response: httpx.Response) -> dict[str, Any]:
688
+ content_type = response.headers.get("content-type", "")
689
+ media_type = content_type.split(";", 1)[0].strip().lower()
690
+ raw: dict[str, Any]
691
+ if media_type == "application/json" or media_type.endswith("+json"):
692
+ try:
693
+ parsed = response.json()
694
+ except json.JSONDecodeError:
695
+ raw = {"agentblaster_parse_error": "invalid_json_response"}
696
+ else:
697
+ raw = dict(parsed) if isinstance(parsed, Mapping) else {"agentblaster_json": parsed}
698
+ else:
699
+ raw = {"agentblaster_non_json_response": True}
700
+ preview = _redacted_body_preview(response)
701
+ if preview:
702
+ raw["agentblaster_body_preview"] = preview
703
+ raw["agentblaster_http"] = _safe_http_metadata(response)
704
+ return raw
705
+
706
+
707
+ def _safe_http_metadata(response: httpx.Response) -> dict[str, Any]:
708
+ safe_headers = {
709
+ key.lower(): value
710
+ for key, value in response.headers.items()
711
+ if key.lower() in SAFE_RESPONSE_HEADERS
712
+ }
713
+ return {
714
+ "status_code": response.status_code,
715
+ "content_type": response.headers.get("content-type"),
716
+ "headers": dict(redact_value(safe_headers)),
717
+ }
718
+
719
+
720
+ def _redacted_body_preview(response: httpx.Response, *, limit: int = 240) -> str:
721
+ try:
722
+ text = response.text
723
+ except UnicodeDecodeError:
724
+ return "<binary response>"
725
+ return str(redact_value(text[:limit]))
726
+
727
+
728
+ def _redacted_response_text(response: httpx.Response, *, limit: int = 240) -> str:
729
+ return _redacted_body_preview(response, limit=limit)
730
+
731
+
732
+ def _openai_messages_from_case(case: BenchmarkCase, *, include_system_prompt: bool = True) -> list[dict[str, Any]]:
733
+ messages: list[dict[str, Any]] = []
734
+ if include_system_prompt and case.system_prompt:
735
+ messages.append({"role": "system", "content": case.system_prompt})
736
+ if case.messages:
737
+ messages.extend(_trace_message_to_openai(message) for message in case.messages)
738
+ return messages
739
+
740
+ messages.append({"role": "user", "content": case.prompt})
741
+ return messages
742
+
743
+
744
+ def _trace_message_to_openai(message: Any) -> dict[str, Any]:
745
+ data: dict[str, Any] = {"role": message.role, "content": message.content}
746
+ if message.name:
747
+ data["name"] = message.name
748
+ if message.tool_call_id:
749
+ data["tool_call_id"] = message.tool_call_id
750
+ if message.tool_calls:
751
+ data["tool_calls"] = message.tool_calls
752
+ return data
753
+
754
+
755
+ def _openai_responses_input_from_case(case: BenchmarkCase) -> str | list[dict[str, Any]]:
756
+ if case.messages:
757
+ return _openai_messages_from_case(case, include_system_prompt=False)
758
+ return case.prompt
759
+
760
+
761
+ def _anthropic_messages_and_system_from_case(case: BenchmarkCase) -> tuple[list[dict[str, Any]], Any | None]:
762
+ if not case.messages:
763
+ return [{"role": "user", "content": case.prompt}], _anthropic_system_value(case.system_prompt, case.cache_control)
764
+
765
+ system_parts: list[str] = []
766
+ if case.system_prompt:
767
+ system_parts.append(case.system_prompt)
768
+ messages: list[dict[str, Any]] = []
769
+ for message in case.messages:
770
+ if message.role == "system":
771
+ system_parts.append(_trace_content_text(message.content))
772
+ continue
773
+ if message.role == "tool":
774
+ messages.append(
775
+ {
776
+ "role": "user",
777
+ "content": [
778
+ {
779
+ "type": "tool_result",
780
+ "tool_use_id": message.tool_call_id or message.name or "toolu_agentblaster",
781
+ "content": _trace_content_text(message.content),
782
+ }
783
+ ],
784
+ }
785
+ )
786
+ continue
787
+ if message.role == "assistant":
788
+ messages.append({"role": "assistant", "content": _anthropic_assistant_content(message)})
789
+ continue
790
+ messages.append({"role": "user", "content": message.content})
791
+
792
+ system_text = "\n\n".join(part for part in system_parts if part) or None
793
+ return messages, _anthropic_system_value(system_text, case.cache_control)
794
+
795
+
796
+ def _anthropic_system_value(text: str | None, cache_control: dict[str, Any] | None) -> str | list[dict[str, Any]] | None:
797
+ if not text:
798
+ return None
799
+ if not cache_control:
800
+ return text
801
+ return [{"type": "text", "text": text, "cache_control": dict(cache_control)}]
802
+
803
+
804
+ def _anthropic_assistant_content(message: Any) -> str | list[dict[str, Any]]:
805
+ if not message.tool_calls:
806
+ return message.content
807
+
808
+ blocks: list[dict[str, Any]] = []
809
+ text = _trace_content_text(message.content)
810
+ if text:
811
+ blocks.append({"type": "text", "text": text})
812
+ for tool_call in message.tool_calls:
813
+ block = _openai_tool_call_to_anthropic_content_block(tool_call)
814
+ if block is not None:
815
+ blocks.append(block)
816
+ return blocks
817
+
818
+
819
+ def _openai_tool_call_to_anthropic_content_block(tool_call: Mapping[str, Any]) -> dict[str, Any] | None:
820
+ function = tool_call.get("function", {})
821
+ if not isinstance(function, Mapping) or not function.get("name"):
822
+ return None
823
+ return {
824
+ "type": "tool_use",
825
+ "id": str(tool_call.get("id") or f"toolu_{function['name']}"),
826
+ "name": str(function["name"]),
827
+ "input": _parse_tool_arguments(function.get("arguments")),
828
+ }
829
+
830
+
831
+ def _trace_content_text(content: str | list[dict[str, Any]]) -> str:
832
+ if isinstance(content, str):
833
+ return content
834
+ return json.dumps(content, sort_keys=True, separators=(",", ":"))
835
+
836
+
837
+ def extract_openai_tool_names(raw: Mapping[str, Any]) -> list[str]:
838
+ return _tool_names(extract_openai_tool_calls(raw))
839
+
840
+
841
+ def _openai_stream_event_has_output(event: Mapping[str, Any]) -> bool:
842
+ choices = event.get("choices", [])
843
+ if not isinstance(choices, list):
844
+ return False
845
+ for choice in choices:
846
+ if not isinstance(choice, Mapping):
847
+ continue
848
+ delta = choice.get("delta", {})
849
+ if not isinstance(delta, Mapping):
850
+ continue
851
+ if delta.get("content"):
852
+ return True
853
+ if delta.get("tool_calls"):
854
+ return True
855
+ return False
856
+
857
+
858
+ def _accumulate_openai_stream_event(
859
+ event: Mapping[str, Any],
860
+ text_parts: list[str],
861
+ tool_call_fragments: dict[int, dict[str, Any]],
862
+ ) -> None:
863
+ choices = event.get("choices", [])
864
+ if not isinstance(choices, list):
865
+ return
866
+ for choice in choices:
867
+ if not isinstance(choice, Mapping):
868
+ continue
869
+ delta = choice.get("delta", {})
870
+ if not isinstance(delta, Mapping):
871
+ continue
872
+ if delta.get("content"):
873
+ text_parts.append(str(delta["content"]))
874
+ tool_calls = delta.get("tool_calls", [])
875
+ if not isinstance(tool_calls, list):
876
+ continue
877
+ for tool_call in tool_calls:
878
+ if not isinstance(tool_call, Mapping):
879
+ continue
880
+ index = int(tool_call.get("index") or 0)
881
+ fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": ""})
882
+ function = tool_call.get("function", {})
883
+ if isinstance(function, Mapping):
884
+ if function.get("name"):
885
+ fragment["name"] += str(function["name"])
886
+ if function.get("arguments"):
887
+ fragment["arguments"] += str(function["arguments"])
888
+
889
+
890
+ def _stream_tool_calls(tool_call_fragments: dict[int, dict[str, Any]]) -> list[ToolCallRecord]:
891
+ calls: list[ToolCallRecord] = []
892
+ for index in sorted(tool_call_fragments):
893
+ fragment = tool_call_fragments[index]
894
+ if not fragment.get("name"):
895
+ continue
896
+ calls.append(
897
+ ToolCallRecord(
898
+ name=str(fragment["name"]),
899
+ arguments=_parse_tool_arguments(fragment.get("arguments")),
900
+ )
901
+ )
902
+ return calls
903
+
904
+
905
+ def _openai_responses_stream_event_has_output(event: Mapping[str, Any]) -> bool:
906
+ event_type = event.get("type")
907
+ if event_type in {"response.output_text.delta", "response.refusal.delta"}:
908
+ return bool(event.get("delta"))
909
+ if event_type == "response.function_call_arguments.delta":
910
+ return bool(event.get("delta"))
911
+ if event_type in {"response.output_item.added", "response.output_item.done"}:
912
+ item = event.get("item", {})
913
+ return isinstance(item, Mapping) and item.get("type") in {"function_call", "custom_tool_call"}
914
+ return False
915
+
916
+
917
+ def _accumulate_openai_responses_stream_event(
918
+ event: Mapping[str, Any],
919
+ text_parts: list[str],
920
+ tool_call_fragments: dict[str, dict[str, Any]],
921
+ usage: dict[str, Any],
922
+ ) -> None:
923
+ event_type = event.get("type")
924
+ if event_type == "response.output_text.delta":
925
+ delta = event.get("delta")
926
+ if delta:
927
+ text_parts.append(str(delta))
928
+ return
929
+ if event_type == "response.refusal.delta":
930
+ delta = event.get("delta")
931
+ if delta:
932
+ text_parts.append(str(delta))
933
+ return
934
+ if event_type in {"response.output_item.added", "response.output_item.done"}:
935
+ _accumulate_openai_responses_output_item(event, tool_call_fragments)
936
+ _update_openai_responses_usage_from_event(event, usage)
937
+ return
938
+ if event_type == "response.function_call_arguments.delta":
939
+ fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
940
+ fragment["arguments"] += str(event.get("delta") or "")
941
+ return
942
+ if event_type == "response.function_call_arguments.done":
943
+ fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
944
+ if event.get("name"):
945
+ fragment["name"] = str(event["name"])
946
+ if event.get("arguments") is not None:
947
+ fragment["arguments"] = str(event.get("arguments") or "")
948
+ return
949
+ _update_openai_responses_usage_from_event(event, usage)
950
+
951
+
952
+ def _accumulate_openai_responses_output_item(
953
+ event: Mapping[str, Any],
954
+ tool_call_fragments: dict[str, dict[str, Any]],
955
+ ) -> None:
956
+ item = event.get("item", {})
957
+ if not isinstance(item, Mapping):
958
+ return
959
+ if item.get("type") not in {"function_call", "custom_tool_call"}:
960
+ return
961
+ fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
962
+ if item.get("name"):
963
+ fragment["name"] = str(item["name"])
964
+ if item.get("arguments") is not None:
965
+ fragment["arguments"] = str(item.get("arguments") or "")
966
+ if item.get("input") is not None:
967
+ fragment["arguments"] = json.dumps(item["input"], sort_keys=True) if isinstance(item["input"], Mapping) else str(item["input"])
968
+
969
+
970
+ def _openai_responses_tool_fragment(
971
+ event: Mapping[str, Any],
972
+ tool_call_fragments: dict[str, dict[str, Any]],
973
+ ) -> dict[str, Any]:
974
+ key = str(event.get("item_id") or event.get("output_index") or len(tool_call_fragments))
975
+ return tool_call_fragments.setdefault(key, {"name": "", "arguments": ""})
976
+
977
+
978
+ def _update_openai_responses_usage_from_event(event: Mapping[str, Any], usage: dict[str, Any]) -> None:
979
+ response = event.get("response")
980
+ if not isinstance(response, Mapping):
981
+ return
982
+ event_usage = response.get("usage")
983
+ if isinstance(event_usage, Mapping):
984
+ usage.update(dict(event_usage))
985
+
986
+
987
+ def _openai_responses_stream_status(event: Mapping[str, Any]) -> str | None:
988
+ response = event.get("response")
989
+ if isinstance(response, Mapping) and response.get("status"):
990
+ return str(response["status"])
991
+ event_type = event.get("type")
992
+ if event_type == "response.completed":
993
+ return "completed"
994
+ if event_type == "response.failed":
995
+ return "failed"
996
+ if event_type == "response.incomplete":
997
+ return "incomplete"
998
+ return None
999
+
1000
+
1001
+ def _openai_responses_stream_tool_calls(tool_call_fragments: dict[str, dict[str, Any]]) -> list[ToolCallRecord]:
1002
+ calls: list[ToolCallRecord] = []
1003
+ for key in sorted(tool_call_fragments):
1004
+ fragment = tool_call_fragments[key]
1005
+ if not fragment.get("name"):
1006
+ continue
1007
+ calls.append(
1008
+ ToolCallRecord(
1009
+ name=str(fragment["name"]),
1010
+ arguments=_parse_tool_arguments(fragment.get("arguments")),
1011
+ )
1012
+ )
1013
+ return calls
1014
+
1015
+
1016
+ def extract_openai_responses_text(raw: Mapping[str, Any]) -> str:
1017
+ if raw.get("output_text"):
1018
+ return str(raw["output_text"])
1019
+
1020
+ text_parts: list[str] = []
1021
+ output = raw.get("output", [])
1022
+ if not isinstance(output, list):
1023
+ return ""
1024
+ for item in output:
1025
+ if not isinstance(item, Mapping) or item.get("type") != "message":
1026
+ continue
1027
+ content = item.get("content", [])
1028
+ if isinstance(content, str):
1029
+ text_parts.append(content)
1030
+ continue
1031
+ if not isinstance(content, list):
1032
+ continue
1033
+ for part in content:
1034
+ if not isinstance(part, Mapping):
1035
+ continue
1036
+ if part.get("type") in {"output_text", "text"}:
1037
+ text_parts.append(str(part.get("text") or ""))
1038
+ return "".join(text_parts)
1039
+
1040
+
1041
+ def extract_openai_responses_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
1042
+ calls: list[ToolCallRecord] = []
1043
+ output = raw.get("output", [])
1044
+ if not isinstance(output, list):
1045
+ return calls
1046
+ for item in output:
1047
+ if not isinstance(item, Mapping):
1048
+ continue
1049
+ if item.get("type") in {"function_call", "custom_tool_call"} and item.get("name"):
1050
+ calls.append(
1051
+ ToolCallRecord(
1052
+ name=str(item["name"]),
1053
+ arguments=_parse_tool_arguments(item.get("arguments") or item.get("input")),
1054
+ )
1055
+ )
1056
+ return calls
1057
+
1058
+
1059
+ def extract_openai_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
1060
+ calls: list[ToolCallRecord] = []
1061
+ choices = raw.get("choices", [])
1062
+ if not isinstance(choices, list):
1063
+ return calls
1064
+ for choice in choices:
1065
+ if not isinstance(choice, Mapping):
1066
+ continue
1067
+ message = choice.get("message", {})
1068
+ if not isinstance(message, Mapping):
1069
+ continue
1070
+ tool_calls = message.get("tool_calls", [])
1071
+ if not isinstance(tool_calls, list):
1072
+ continue
1073
+ for tool_call in tool_calls:
1074
+ if not isinstance(tool_call, Mapping):
1075
+ continue
1076
+ function = tool_call.get("function", {})
1077
+ if not isinstance(function, Mapping) or not function.get("name"):
1078
+ continue
1079
+ calls.append(
1080
+ ToolCallRecord(
1081
+ name=str(function["name"]),
1082
+ arguments=_parse_tool_arguments(function.get("arguments")),
1083
+ )
1084
+ )
1085
+ return calls
1086
+
1087
+
1088
+ def _legacy_extract_openai_tool_names(raw: Mapping[str, Any]) -> list[str]:
1089
+ names: list[str] = []
1090
+ choices = raw.get("choices", [])
1091
+ if not isinstance(choices, list):
1092
+ return names
1093
+ for choice in choices:
1094
+ if not isinstance(choice, Mapping):
1095
+ continue
1096
+ message = choice.get("message", {})
1097
+ if not isinstance(message, Mapping):
1098
+ continue
1099
+ tool_calls = message.get("tool_calls", [])
1100
+ if not isinstance(tool_calls, list):
1101
+ continue
1102
+ for tool_call in tool_calls:
1103
+ if not isinstance(tool_call, Mapping):
1104
+ continue
1105
+ function = tool_call.get("function", {})
1106
+ if isinstance(function, Mapping) and function.get("name"):
1107
+ names.append(str(function["name"]))
1108
+ return names
1109
+
1110
+
1111
+ def extract_anthropic_tool_names(raw: Mapping[str, Any]) -> list[str]:
1112
+ return _tool_names(extract_anthropic_tool_calls(raw))
1113
+
1114
+
1115
+ def extract_anthropic_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
1116
+ calls: list[ToolCallRecord] = []
1117
+ content = raw.get("content", [])
1118
+ if not isinstance(content, list):
1119
+ return calls
1120
+ for block in content:
1121
+ if isinstance(block, Mapping) and block.get("type") == "tool_use" and block.get("name"):
1122
+ arguments = block.get("input") if isinstance(block.get("input"), Mapping) else {}
1123
+ calls.append(ToolCallRecord(name=str(block["name"]), arguments=dict(arguments)))
1124
+ return calls
1125
+
1126
+
1127
+ def _anthropic_stream_event_has_output(event: Mapping[str, Any]) -> bool:
1128
+ event_type = event.get("type")
1129
+ if event_type == "content_block_start":
1130
+ content_block = event.get("content_block", {})
1131
+ if not isinstance(content_block, Mapping):
1132
+ return False
1133
+ if content_block.get("type") == "text" and content_block.get("text"):
1134
+ return True
1135
+ return content_block.get("type") in {"tool_use", "server_tool_use"} and bool(content_block.get("name"))
1136
+ if event_type == "content_block_delta":
1137
+ delta = event.get("delta", {})
1138
+ if not isinstance(delta, Mapping):
1139
+ return False
1140
+ if delta.get("type") == "text_delta" and delta.get("text"):
1141
+ return True
1142
+ return delta.get("type") == "input_json_delta" and bool(delta.get("partial_json"))
1143
+ return False
1144
+
1145
+
1146
+ def _accumulate_anthropic_stream_event(
1147
+ event: Mapping[str, Any],
1148
+ text_parts: list[str],
1149
+ tool_call_fragments: dict[int, dict[str, Any]],
1150
+ usage: dict[str, Any],
1151
+ ) -> None:
1152
+ event_type = event.get("type")
1153
+ if event_type == "message_start":
1154
+ message = event.get("message", {})
1155
+ if isinstance(message, Mapping):
1156
+ _update_anthropic_stream_usage(usage, message.get("usage"))
1157
+ return
1158
+ if event_type == "message_delta":
1159
+ _update_anthropic_stream_usage(usage, event.get("usage"))
1160
+ return
1161
+ if event_type == "content_block_start":
1162
+ _accumulate_anthropic_content_block_start(event, text_parts, tool_call_fragments)
1163
+ return
1164
+ if event_type == "content_block_delta":
1165
+ _accumulate_anthropic_content_block_delta(event, text_parts, tool_call_fragments)
1166
+
1167
+
1168
+ def _update_anthropic_stream_usage(usage: dict[str, Any], value: Any) -> None:
1169
+ if isinstance(value, Mapping):
1170
+ usage.update(dict(value))
1171
+
1172
+
1173
+ def _accumulate_anthropic_content_block_start(
1174
+ event: Mapping[str, Any],
1175
+ text_parts: list[str],
1176
+ tool_call_fragments: dict[int, dict[str, Any]],
1177
+ ) -> None:
1178
+ content_block = event.get("content_block", {})
1179
+ if not isinstance(content_block, Mapping):
1180
+ return
1181
+ block_type = content_block.get("type")
1182
+ if block_type == "text":
1183
+ text = content_block.get("text")
1184
+ if text:
1185
+ text_parts.append(str(text))
1186
+ return
1187
+ if block_type not in {"tool_use", "server_tool_use"}:
1188
+ return
1189
+
1190
+ index = _event_index(event)
1191
+ fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": "", "arguments_object": {}})
1192
+ if content_block.get("name"):
1193
+ fragment["name"] = str(content_block["name"])
1194
+ input_value = content_block.get("input")
1195
+ if isinstance(input_value, Mapping) and input_value:
1196
+ fragment["arguments_object"] = dict(input_value)
1197
+
1198
+
1199
+ def _accumulate_anthropic_content_block_delta(
1200
+ event: Mapping[str, Any],
1201
+ text_parts: list[str],
1202
+ tool_call_fragments: dict[int, dict[str, Any]],
1203
+ ) -> None:
1204
+ delta = event.get("delta", {})
1205
+ if not isinstance(delta, Mapping):
1206
+ return
1207
+ delta_type = delta.get("type")
1208
+ if delta_type == "text_delta":
1209
+ text = delta.get("text")
1210
+ if text:
1211
+ text_parts.append(str(text))
1212
+ return
1213
+ if delta_type != "input_json_delta":
1214
+ return
1215
+
1216
+ index = _event_index(event)
1217
+ fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": "", "arguments_object": {}})
1218
+ if "partial_json" in delta:
1219
+ fragment["arguments"] += str(delta.get("partial_json") or "")
1220
+
1221
+
1222
+ def _anthropic_stream_stop_reason(event: Mapping[str, Any]) -> str | None:
1223
+ if event.get("type") != "message_delta":
1224
+ return None
1225
+ delta = event.get("delta", {})
1226
+ if isinstance(delta, Mapping) and delta.get("stop_reason"):
1227
+ return str(delta["stop_reason"])
1228
+ return None
1229
+
1230
+
1231
+ def _anthropic_stream_tool_calls(tool_call_fragments: dict[int, dict[str, Any]]) -> list[ToolCallRecord]:
1232
+ calls: list[ToolCallRecord] = []
1233
+ for index in sorted(tool_call_fragments):
1234
+ fragment = tool_call_fragments[index]
1235
+ if not fragment.get("name"):
1236
+ continue
1237
+ argument_fragments = fragment.get("arguments")
1238
+ if argument_fragments:
1239
+ arguments = _parse_tool_arguments(argument_fragments)
1240
+ else:
1241
+ arguments_object = fragment.get("arguments_object")
1242
+ arguments = dict(arguments_object) if isinstance(arguments_object, Mapping) else {}
1243
+ calls.append(ToolCallRecord(name=str(fragment["name"]), arguments=arguments))
1244
+ return calls
1245
+
1246
+
1247
+ def _event_index(event: Mapping[str, Any]) -> int:
1248
+ try:
1249
+ return int(event.get("index") or 0)
1250
+ except (TypeError, ValueError):
1251
+ return 0
1252
+
1253
+
1254
+ def _legacy_extract_anthropic_tool_names(raw: Mapping[str, Any]) -> list[str]:
1255
+ names: list[str] = []
1256
+ content = raw.get("content", [])
1257
+ if not isinstance(content, list):
1258
+ return names
1259
+ for block in content:
1260
+ if isinstance(block, Mapping) and block.get("type") == "tool_use" and block.get("name"):
1261
+ names.append(str(block["name"]))
1262
+ return names
1263
+
1264
+
1265
+ def extract_ollama_tool_names(raw: Mapping[str, Any]) -> list[str]:
1266
+ return _tool_names(extract_ollama_tool_calls(raw))
1267
+
1268
+
1269
+ def extract_ollama_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
1270
+ calls: list[ToolCallRecord] = []
1271
+ message = raw.get("message", {})
1272
+ if not isinstance(message, Mapping):
1273
+ return calls
1274
+ tool_calls = message.get("tool_calls", [])
1275
+ if not isinstance(tool_calls, list):
1276
+ return calls
1277
+ for tool_call in tool_calls:
1278
+ if not isinstance(tool_call, Mapping):
1279
+ continue
1280
+ function = tool_call.get("function", {})
1281
+ if isinstance(function, Mapping) and function.get("name"):
1282
+ calls.append(
1283
+ ToolCallRecord(
1284
+ name=str(function["name"]),
1285
+ arguments=_parse_tool_arguments(function.get("arguments")),
1286
+ )
1287
+ )
1288
+ return calls
1289
+
1290
+
1291
+ def _legacy_extract_ollama_tool_names(raw: Mapping[str, Any]) -> list[str]:
1292
+ names: list[str] = []
1293
+ message = raw.get("message", {})
1294
+ if not isinstance(message, Mapping):
1295
+ return names
1296
+ tool_calls = message.get("tool_calls", [])
1297
+ if not isinstance(tool_calls, list):
1298
+ return names
1299
+ for tool_call in tool_calls:
1300
+ if not isinstance(tool_call, Mapping):
1301
+ continue
1302
+ function = tool_call.get("function", {})
1303
+ if isinstance(function, Mapping) and function.get("name"):
1304
+ names.append(str(function["name"]))
1305
+ return names
1306
+
1307
+
1308
+ def extract_lmstudio_text(raw: Mapping[str, Any]) -> str:
1309
+ output = raw.get("output", [])
1310
+ if isinstance(output, list):
1311
+ text_parts = [
1312
+ str(item.get("content") or "")
1313
+ for item in output
1314
+ if isinstance(item, Mapping) and item.get("type") == "message"
1315
+ ]
1316
+ if text_parts:
1317
+ return "".join(text_parts)
1318
+
1319
+ choices = raw.get("choices", [])
1320
+ if choices and isinstance(choices, list) and isinstance(choices[0], Mapping):
1321
+ message = choices[0].get("message", {})
1322
+ if isinstance(message, Mapping):
1323
+ return str(message.get("content") or "")
1324
+
1325
+ message = raw.get("message", {})
1326
+ if isinstance(message, Mapping):
1327
+ return str(message.get("content") or "")
1328
+
1329
+ return ""
1330
+
1331
+
1332
+ def extract_lmstudio_tool_names(raw: Mapping[str, Any]) -> list[str]:
1333
+ return _tool_names(extract_lmstudio_tool_calls(raw))
1334
+
1335
+
1336
+ def extract_lmstudio_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
1337
+ calls: list[ToolCallRecord] = []
1338
+ output = raw.get("output", [])
1339
+ if not isinstance(output, list):
1340
+ return calls
1341
+ for item in output:
1342
+ if not isinstance(item, Mapping):
1343
+ continue
1344
+ if item.get("type") == "tool_call" and item.get("tool"):
1345
+ arguments = item.get("arguments") if isinstance(item.get("arguments"), Mapping) else {}
1346
+ calls.append(ToolCallRecord(name=str(item["tool"]), arguments=dict(arguments)))
1347
+ elif item.get("type") == "invalid_tool_call":
1348
+ metadata = item.get("metadata", {})
1349
+ if isinstance(metadata, Mapping) and metadata.get("tool_name"):
1350
+ arguments = metadata.get("arguments") if isinstance(metadata.get("arguments"), Mapping) else {}
1351
+ calls.append(ToolCallRecord(name=str(metadata["tool_name"]), arguments=dict(arguments), valid=False))
1352
+ return calls
1353
+
1354
+
1355
+ def _tool_names(calls: list[ToolCallRecord]) -> list[str]:
1356
+ return [call.name for call in calls]
1357
+
1358
+
1359
+ def _stream_cancellation_elapsed_ms(case: BenchmarkCase, started: float) -> float | None:
1360
+ if case.cancel_after_ms is None:
1361
+ return None
1362
+ elapsed_ms = (perf_counter() - started) * 1000
1363
+ if elapsed_ms < case.cancel_after_ms:
1364
+ return None
1365
+ return round(elapsed_ms, 3)
1366
+
1367
+
1368
+ def _parse_tool_arguments(value: Any) -> dict[str, Any]:
1369
+ if isinstance(value, Mapping):
1370
+ return dict(value)
1371
+ if isinstance(value, str) and value.strip():
1372
+ try:
1373
+ parsed = json.loads(value)
1374
+ except json.JSONDecodeError:
1375
+ return {}
1376
+ if isinstance(parsed, Mapping):
1377
+ return dict(parsed)
1378
+ return {}
1379
+
1380
+
1381
+ def _openai_tool_to_anthropic(tool: Mapping[str, Any]) -> dict[str, Any]:
1382
+ if tool.get("type") == "function" and isinstance(tool.get("function"), Mapping):
1383
+ function = tool["function"]
1384
+ converted = {
1385
+ "name": function.get("name"),
1386
+ "description": function.get("description", ""),
1387
+ "input_schema": function.get("parameters", {"type": "object", "properties": {}}),
1388
+ }
1389
+ cache_control = tool.get("cache_control") or function.get("cache_control")
1390
+ if isinstance(cache_control, Mapping):
1391
+ converted["cache_control"] = dict(cache_control)
1392
+ return converted
1393
+ return dict(tool)
1394
+
1395
+
1396
+ def _anthropic_tools_from_case(case: BenchmarkCase) -> list[dict[str, Any]]:
1397
+ tools = [_openai_tool_to_anthropic(tool) for tool in case.tools]
1398
+ if tools and case.cache_control and "cache_control" not in tools[-1]:
1399
+ tools[-1] = {**tools[-1], "cache_control": dict(case.cache_control)}
1400
+ return tools
1401
+
1402
+
1403
+ def _openai_chat_tool_to_responses_tool(tool: Mapping[str, Any]) -> dict[str, Any]:
1404
+ if tool.get("type") == "function" and isinstance(tool.get("function"), Mapping):
1405
+ function = tool["function"]
1406
+ converted = {
1407
+ "type": "function",
1408
+ "name": function.get("name"),
1409
+ "description": function.get("description", ""),
1410
+ "parameters": function.get("parameters", {"type": "object", "properties": {}}),
1411
+ }
1412
+ if "strict" in function:
1413
+ converted["strict"] = function["strict"]
1414
+ return converted
1415
+ return dict(tool)
1416
+
1417
+
1418
+ def _openai_chat_tool_choice_to_responses_tool_choice(tool_choice: str | Mapping[str, Any]) -> str | dict[str, Any]:
1419
+ if isinstance(tool_choice, str):
1420
+ return tool_choice
1421
+ if tool_choice.get("type") == "function" and isinstance(tool_choice.get("function"), Mapping):
1422
+ function = tool_choice["function"]
1423
+ if function.get("name"):
1424
+ return {"type": "function", "name": function["name"]}
1425
+ return dict(tool_choice)
1426
+
1427
+
1428
+ def _openai_tool_choice_to_anthropic(tool_choice: str | Mapping[str, Any]) -> str | dict[str, Any]:
1429
+ if isinstance(tool_choice, str):
1430
+ if tool_choice in {"auto", "any", "none"}:
1431
+ return tool_choice
1432
+ return {"type": "tool", "name": tool_choice}
1433
+ if tool_choice.get("type") == "function" and isinstance(tool_choice.get("function"), Mapping):
1434
+ return {"type": "tool", "name": str(tool_choice["function"].get("name"))}
1435
+ return dict(tool_choice)