agentblaster 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentblaster/__init__.py +3 -0
- agentblaster/adapters.py +1435 -0
- agentblaster/agent_profiles.py +420 -0
- agentblaster/audit.py +27 -0
- agentblaster/benchmark_kit.py +356 -0
- agentblaster/bundle.py +692 -0
- agentblaster/campaign.py +1031 -0
- agentblaster/campaign_preflight.py +647 -0
- agentblaster/capabilities.py +270 -0
- agentblaster/claim_readiness.py +3948 -0
- agentblaster/cleanup.py +226 -0
- agentblaster/cli.py +4202 -0
- agentblaster/compare.py +423 -0
- agentblaster/config.py +62 -0
- agentblaster/constants.py +8 -0
- agentblaster/contract_check.py +919 -0
- agentblaster/costs.py +74 -0
- agentblaster/dashboard.py +5974 -0
- agentblaster/engine_advisory.py +1045 -0
- agentblaster/engine_onboarding.py +224 -0
- agentblaster/engine_targets.py +545 -0
- agentblaster/environment.py +284 -0
- agentblaster/errors.py +21 -0
- agentblaster/evidence.py +188 -0
- agentblaster/evidence_index.py +1865 -0
- agentblaster/experiment.py +200 -0
- agentblaster/exports.py +158 -0
- agentblaster/failures.py +70 -0
- agentblaster/fixtures.py +775 -0
- agentblaster/harness.py +1254 -0
- agentblaster/implementation_status.py +719 -0
- agentblaster/integrity.py +161 -0
- agentblaster/launch_recipes.py +295 -0
- agentblaster/lcp.py +107 -0
- agentblaster/matrix.py +101 -0
- agentblaster/matrix_gate.py +565 -0
- agentblaster/matrix_pressure.py +187 -0
- agentblaster/matrix_saturation.py +601 -0
- agentblaster/mcp.py +187 -0
- agentblaster/metric_coverage.py +552 -0
- agentblaster/mock_provider.py +485 -0
- agentblaster/model_catalog.py +153 -0
- agentblaster/models.py +531 -0
- agentblaster/observability.py +110 -0
- agentblaster/planning.py +199 -0
- agentblaster/policy.py +635 -0
- agentblaster/presets.py +219 -0
- agentblaster/prompt_footprint.py +245 -0
- agentblaster/protocol_repair.py +431 -0
- agentblaster/provider_audit.py +210 -0
- agentblaster/publication_brief.py +893 -0
- agentblaster/quality.py +1142 -0
- agentblaster/rate_limits.py +74 -0
- agentblaster/readiness.py +241 -0
- agentblaster/redaction.py +58 -0
- agentblaster/redaction_scan.py +247 -0
- agentblaster/release.py +440 -0
- agentblaster/release_qualification.py +2248 -0
- agentblaster/remote_onboarding.py +308 -0
- agentblaster/reports.py +2245 -0
- agentblaster/runner.py +1677 -0
- agentblaster/schema_registry.py +1151 -0
- agentblaster/secrets.py +274 -0
- agentblaster/security_posture.py +492 -0
- agentblaster/skills.py +67 -0
- agentblaster/stress_matrix.py +113 -0
- agentblaster/suite_audit.py +259 -0
- agentblaster/suite_calibration.py +171 -0
- agentblaster/suites.py +805 -0
- agentblaster/telemetry.py +947 -0
- agentblaster/telemetry_audit.py +300 -0
- agentblaster/toolsim.py +193 -0
- agentblaster/workflow_readiness.py +570 -0
- agentblaster/workflow_surfaces.py +292 -0
- agentblaster-0.1.0.dist-info/METADATA +250 -0
- agentblaster-0.1.0.dist-info/RECORD +78 -0
- agentblaster-0.1.0.dist-info/WHEEL +4 -0
- agentblaster-0.1.0.dist-info/entry_points.txt +2 -0
agentblaster/adapters.py
ADDED
|
@@ -0,0 +1,1435 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from time import perf_counter
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from agentblaster.constants import SMOKE_SENTINEL, SMOKE_SENTINEL_MAX_TOKENS, SMOKE_SENTINEL_PROMPT, SMOKE_SENTINEL_SYSTEM_PROMPT
|
|
11
|
+
from agentblaster.errors import AdapterError
|
|
12
|
+
from agentblaster.models import AdapterResponse, ApiContract, BenchmarkCase, ProbeResult, ProviderConfig, ToolCallRecord
|
|
13
|
+
from agentblaster.redaction import redact_value
|
|
14
|
+
from agentblaster.secrets import SecretResolver
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
SAFE_RESPONSE_HEADERS = {
|
|
18
|
+
"request-id",
|
|
19
|
+
"x-request-id",
|
|
20
|
+
"openai-request-id",
|
|
21
|
+
"anthropic-request-id",
|
|
22
|
+
"x-ratelimit-limit-requests",
|
|
23
|
+
"x-ratelimit-remaining-requests",
|
|
24
|
+
"x-ratelimit-reset-requests",
|
|
25
|
+
"x-ratelimit-limit-tokens",
|
|
26
|
+
"x-ratelimit-remaining-tokens",
|
|
27
|
+
"x-ratelimit-reset-tokens",
|
|
28
|
+
"retry-after",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ProviderAdapter:
|
|
33
|
+
adapter_name = "provider"
|
|
34
|
+
adapter_version = "agentblaster-adapter-v1"
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
provider: ProviderConfig,
|
|
39
|
+
*,
|
|
40
|
+
secrets: SecretResolver | None = None,
|
|
41
|
+
client: httpx.Client | None = None,
|
|
42
|
+
timeout: float = 10.0,
|
|
43
|
+
) -> None:
|
|
44
|
+
self.provider = provider
|
|
45
|
+
self.secrets = secrets or SecretResolver()
|
|
46
|
+
self.client = client or httpx.Client(timeout=timeout, verify=httpx_verify_config(provider))
|
|
47
|
+
|
|
48
|
+
def probe(self) -> ProbeResult:
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
def smoke_chat(self, model: str) -> AdapterResponse:
|
|
52
|
+
case = BenchmarkCase(
|
|
53
|
+
id="protocol-smoke-chat",
|
|
54
|
+
title="Protocol smoke chat",
|
|
55
|
+
system_prompt=SMOKE_SENTINEL_SYSTEM_PROMPT,
|
|
56
|
+
prompt=SMOKE_SENTINEL_PROMPT,
|
|
57
|
+
expected_substring=SMOKE_SENTINEL,
|
|
58
|
+
max_tokens=SMOKE_SENTINEL_MAX_TOKENS,
|
|
59
|
+
)
|
|
60
|
+
return self.chat_completion(model, case)
|
|
61
|
+
|
|
62
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
def _headers(self) -> dict[str, str]:
|
|
66
|
+
headers = dict(self.provider.headers)
|
|
67
|
+
api_key = self.secrets.resolve(self.provider.api_key_ref)
|
|
68
|
+
if api_key:
|
|
69
|
+
headers.update(self._auth_headers(api_key))
|
|
70
|
+
return headers
|
|
71
|
+
|
|
72
|
+
def _auth_headers(self, api_key: str) -> dict[str, str]:
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def httpx_verify_config(provider: ProviderConfig) -> bool | str:
|
|
77
|
+
if not provider.tls_verify:
|
|
78
|
+
return False
|
|
79
|
+
if provider.ca_bundle is not None:
|
|
80
|
+
return str(provider.ca_bundle)
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class OpenAICompatibleAdapter(ProviderAdapter):
|
|
85
|
+
adapter_name = "openai-chat-completions"
|
|
86
|
+
|
|
87
|
+
def _auth_headers(self, api_key: str) -> dict[str, str]:
|
|
88
|
+
return {"Authorization": f"Bearer {api_key}"}
|
|
89
|
+
|
|
90
|
+
def probe(self) -> ProbeResult:
|
|
91
|
+
url = str(self.provider.base_url).rstrip("/") + "/models"
|
|
92
|
+
try:
|
|
93
|
+
response = self.client.get(url, headers=self._headers())
|
|
94
|
+
except httpx.HTTPError as exc:
|
|
95
|
+
raise AdapterError(f"OpenAI probe failed for {self.provider.name}: {exc}") from exc
|
|
96
|
+
|
|
97
|
+
models: list[str] = []
|
|
98
|
+
raw = response_json_or_metadata(response)
|
|
99
|
+
data = raw.get("data", [])
|
|
100
|
+
if isinstance(data, list):
|
|
101
|
+
models = [str(item.get("id")) for item in data if isinstance(item, Mapping) and item.get("id")]
|
|
102
|
+
|
|
103
|
+
return ProbeResult(
|
|
104
|
+
provider=self.provider.name,
|
|
105
|
+
contract=self.provider.contract,
|
|
106
|
+
ok=response.is_success,
|
|
107
|
+
status_code=response.status_code,
|
|
108
|
+
message="ok" if response.is_success else _redacted_response_text(response),
|
|
109
|
+
models=models,
|
|
110
|
+
raw=raw,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
114
|
+
url = str(self.provider.base_url).rstrip("/") + "/chat/completions"
|
|
115
|
+
payload = {
|
|
116
|
+
"model": model,
|
|
117
|
+
"messages": _openai_messages_from_case(case),
|
|
118
|
+
"temperature": case.temperature,
|
|
119
|
+
"max_tokens": case.max_tokens,
|
|
120
|
+
}
|
|
121
|
+
if case.streaming:
|
|
122
|
+
payload["stream"] = True
|
|
123
|
+
if case.response_format:
|
|
124
|
+
payload["response_format"] = case.response_format
|
|
125
|
+
if case.tools:
|
|
126
|
+
payload["tools"] = case.tools
|
|
127
|
+
if case.tool_choice:
|
|
128
|
+
payload["tool_choice"] = case.tool_choice
|
|
129
|
+
started = perf_counter()
|
|
130
|
+
try:
|
|
131
|
+
if case.streaming:
|
|
132
|
+
return self._chat_completion_stream(url, payload, case, started)
|
|
133
|
+
response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
|
|
134
|
+
except httpx.HTTPError as exc:
|
|
135
|
+
raise AdapterError(f"OpenAI smoke request failed for {self.provider.name}: {exc}") from exc
|
|
136
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
137
|
+
|
|
138
|
+
raw = response_json_or_metadata(response)
|
|
139
|
+
|
|
140
|
+
text = ""
|
|
141
|
+
choices = raw.get("choices", [])
|
|
142
|
+
if choices and isinstance(choices[0], Mapping):
|
|
143
|
+
message = choices[0].get("message", {})
|
|
144
|
+
if isinstance(message, Mapping):
|
|
145
|
+
text = str(message.get("content") or "")
|
|
146
|
+
tool_calls = extract_openai_tool_calls(raw)
|
|
147
|
+
|
|
148
|
+
return AdapterResponse(
|
|
149
|
+
provider=self.provider.name,
|
|
150
|
+
contract=ApiContract.OPENAI,
|
|
151
|
+
status_code=response.status_code,
|
|
152
|
+
latency_ms=latency_ms,
|
|
153
|
+
raw=raw,
|
|
154
|
+
text=text,
|
|
155
|
+
tool_names=_tool_names(tool_calls),
|
|
156
|
+
tool_calls=tool_calls,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def _chat_completion_stream(
|
|
160
|
+
self,
|
|
161
|
+
url: str,
|
|
162
|
+
payload: dict[str, Any],
|
|
163
|
+
case: BenchmarkCase,
|
|
164
|
+
started: float,
|
|
165
|
+
) -> AdapterResponse:
|
|
166
|
+
text_parts: list[str] = []
|
|
167
|
+
raw_events: list[dict[str, Any]] = []
|
|
168
|
+
status_code = 0
|
|
169
|
+
ttft_ms = None
|
|
170
|
+
canceled = False
|
|
171
|
+
cancellation_latency_ms = None
|
|
172
|
+
tool_call_fragments: dict[int, dict[str, Any]] = {}
|
|
173
|
+
try:
|
|
174
|
+
with self.client.stream(
|
|
175
|
+
"POST",
|
|
176
|
+
url,
|
|
177
|
+
headers=self._headers(),
|
|
178
|
+
json=payload,
|
|
179
|
+
timeout=case.timeout_seconds,
|
|
180
|
+
) as response:
|
|
181
|
+
status_code = response.status_code
|
|
182
|
+
for line in response.iter_lines():
|
|
183
|
+
if not line or not line.startswith("data:"):
|
|
184
|
+
continue
|
|
185
|
+
data = line.removeprefix("data:").strip()
|
|
186
|
+
if data == "[DONE]":
|
|
187
|
+
break
|
|
188
|
+
try:
|
|
189
|
+
event = json.loads(data)
|
|
190
|
+
except json.JSONDecodeError:
|
|
191
|
+
raw_events.append({"malformed": data})
|
|
192
|
+
continue
|
|
193
|
+
raw_events.append(event)
|
|
194
|
+
if ttft_ms is None and _openai_stream_event_has_output(event):
|
|
195
|
+
ttft_ms = (perf_counter() - started) * 1000
|
|
196
|
+
_accumulate_openai_stream_event(event, text_parts, tool_call_fragments)
|
|
197
|
+
cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
|
|
198
|
+
if cancellation_elapsed is not None:
|
|
199
|
+
canceled = True
|
|
200
|
+
cancellation_latency_ms = cancellation_elapsed
|
|
201
|
+
break
|
|
202
|
+
except httpx.HTTPError as exc:
|
|
203
|
+
raise AdapterError(f"OpenAI streaming request failed for {self.provider.name}: {exc}") from exc
|
|
204
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
205
|
+
raw = {
|
|
206
|
+
"stream": True,
|
|
207
|
+
"events": raw_events,
|
|
208
|
+
"agentblaster_http": _safe_http_metadata(response),
|
|
209
|
+
"agentblaster_cancelled": canceled,
|
|
210
|
+
"cancel_after_ms": case.cancel_after_ms,
|
|
211
|
+
"cancellation_latency_ms": cancellation_latency_ms,
|
|
212
|
+
}
|
|
213
|
+
tool_calls = _stream_tool_calls(tool_call_fragments)
|
|
214
|
+
return AdapterResponse(
|
|
215
|
+
provider=self.provider.name,
|
|
216
|
+
contract=ApiContract.OPENAI,
|
|
217
|
+
status_code=status_code,
|
|
218
|
+
latency_ms=latency_ms,
|
|
219
|
+
raw=raw,
|
|
220
|
+
text="".join(text_parts),
|
|
221
|
+
tool_names=_tool_names(tool_calls),
|
|
222
|
+
tool_calls=tool_calls,
|
|
223
|
+
streaming=True,
|
|
224
|
+
ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
|
|
225
|
+
canceled=canceled,
|
|
226
|
+
cancellation_latency_ms=cancellation_latency_ms,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class OpenAIResponsesAdapter(OpenAICompatibleAdapter):
|
|
231
|
+
"""Adapter for OpenAI Responses-compatible `/responses` endpoints."""
|
|
232
|
+
|
|
233
|
+
adapter_name = "openai-responses"
|
|
234
|
+
|
|
235
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
236
|
+
url = str(self.provider.base_url).rstrip("/") + "/responses"
|
|
237
|
+
payload: dict[str, Any] = {
|
|
238
|
+
"model": model,
|
|
239
|
+
"input": _openai_responses_input_from_case(case),
|
|
240
|
+
"temperature": case.temperature,
|
|
241
|
+
"max_output_tokens": case.max_tokens,
|
|
242
|
+
}
|
|
243
|
+
if case.system_prompt:
|
|
244
|
+
payload["instructions"] = case.system_prompt
|
|
245
|
+
if case.previous_response_id:
|
|
246
|
+
payload["previous_response_id"] = case.previous_response_id
|
|
247
|
+
if case.max_tool_calls:
|
|
248
|
+
payload["max_tool_calls"] = case.max_tool_calls
|
|
249
|
+
if case.streaming:
|
|
250
|
+
payload["stream"] = True
|
|
251
|
+
if case.response_format:
|
|
252
|
+
payload["text"] = {"format": case.response_format}
|
|
253
|
+
if case.tools:
|
|
254
|
+
payload["tools"] = [_openai_chat_tool_to_responses_tool(tool) for tool in case.tools]
|
|
255
|
+
if case.tool_choice:
|
|
256
|
+
payload["tool_choice"] = _openai_chat_tool_choice_to_responses_tool_choice(case.tool_choice)
|
|
257
|
+
|
|
258
|
+
started = perf_counter()
|
|
259
|
+
try:
|
|
260
|
+
if case.streaming:
|
|
261
|
+
return self._responses_stream(url, payload, case, started)
|
|
262
|
+
response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
|
|
263
|
+
except httpx.HTTPError as exc:
|
|
264
|
+
raise AdapterError(f"OpenAI Responses request failed for {self.provider.name}: {exc}") from exc
|
|
265
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
266
|
+
|
|
267
|
+
raw = response_json_or_metadata(response)
|
|
268
|
+
tool_calls = extract_openai_responses_tool_calls(raw)
|
|
269
|
+
|
|
270
|
+
return AdapterResponse(
|
|
271
|
+
provider=self.provider.name,
|
|
272
|
+
contract=ApiContract.OPENAI_RESPONSES,
|
|
273
|
+
status_code=response.status_code,
|
|
274
|
+
latency_ms=latency_ms,
|
|
275
|
+
raw=raw,
|
|
276
|
+
text=extract_openai_responses_text(raw),
|
|
277
|
+
tool_names=_tool_names(tool_calls),
|
|
278
|
+
tool_calls=tool_calls,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def _responses_stream(
|
|
282
|
+
self,
|
|
283
|
+
url: str,
|
|
284
|
+
payload: dict[str, Any],
|
|
285
|
+
case: BenchmarkCase,
|
|
286
|
+
started: float,
|
|
287
|
+
) -> AdapterResponse:
|
|
288
|
+
text_parts: list[str] = []
|
|
289
|
+
raw_events: list[dict[str, Any]] = []
|
|
290
|
+
tool_call_fragments: dict[str, dict[str, Any]] = {}
|
|
291
|
+
usage: dict[str, Any] = {}
|
|
292
|
+
status = None
|
|
293
|
+
status_code = 0
|
|
294
|
+
ttft_ms = None
|
|
295
|
+
canceled = False
|
|
296
|
+
cancellation_latency_ms = None
|
|
297
|
+
try:
|
|
298
|
+
with self.client.stream(
|
|
299
|
+
"POST",
|
|
300
|
+
url,
|
|
301
|
+
headers=self._headers(),
|
|
302
|
+
json=payload,
|
|
303
|
+
timeout=case.timeout_seconds,
|
|
304
|
+
) as response:
|
|
305
|
+
status_code = response.status_code
|
|
306
|
+
for line in response.iter_lines():
|
|
307
|
+
if not line or not line.startswith("data:"):
|
|
308
|
+
continue
|
|
309
|
+
data = line.removeprefix("data:").strip()
|
|
310
|
+
if data == "[DONE]":
|
|
311
|
+
break
|
|
312
|
+
try:
|
|
313
|
+
event = json.loads(data)
|
|
314
|
+
except json.JSONDecodeError:
|
|
315
|
+
raw_events.append({"malformed": data})
|
|
316
|
+
continue
|
|
317
|
+
raw_events.append(event)
|
|
318
|
+
if ttft_ms is None and _openai_responses_stream_event_has_output(event):
|
|
319
|
+
ttft_ms = (perf_counter() - started) * 1000
|
|
320
|
+
_accumulate_openai_responses_stream_event(event, text_parts, tool_call_fragments, usage)
|
|
321
|
+
response_status = _openai_responses_stream_status(event)
|
|
322
|
+
if response_status:
|
|
323
|
+
status = response_status
|
|
324
|
+
cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
|
|
325
|
+
if cancellation_elapsed is not None:
|
|
326
|
+
canceled = True
|
|
327
|
+
cancellation_latency_ms = cancellation_elapsed
|
|
328
|
+
break
|
|
329
|
+
except httpx.HTTPError as exc:
|
|
330
|
+
raise AdapterError(f"OpenAI Responses streaming request failed for {self.provider.name}: {exc}") from exc
|
|
331
|
+
|
|
332
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
333
|
+
raw = {
|
|
334
|
+
"stream": True,
|
|
335
|
+
"events": raw_events,
|
|
336
|
+
"usage": usage,
|
|
337
|
+
"status": status,
|
|
338
|
+
"agentblaster_http": _safe_http_metadata(response),
|
|
339
|
+
"agentblaster_cancelled": canceled,
|
|
340
|
+
"cancel_after_ms": case.cancel_after_ms,
|
|
341
|
+
"cancellation_latency_ms": cancellation_latency_ms,
|
|
342
|
+
}
|
|
343
|
+
tool_calls = _openai_responses_stream_tool_calls(tool_call_fragments)
|
|
344
|
+
return AdapterResponse(
|
|
345
|
+
provider=self.provider.name,
|
|
346
|
+
contract=ApiContract.OPENAI_RESPONSES,
|
|
347
|
+
status_code=status_code,
|
|
348
|
+
latency_ms=latency_ms,
|
|
349
|
+
raw=raw,
|
|
350
|
+
text="".join(text_parts),
|
|
351
|
+
tool_names=_tool_names(tool_calls),
|
|
352
|
+
tool_calls=tool_calls,
|
|
353
|
+
streaming=True,
|
|
354
|
+
ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
|
|
355
|
+
canceled=canceled,
|
|
356
|
+
cancellation_latency_ms=cancellation_latency_ms,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class AnthropicCompatibleAdapter(ProviderAdapter):
|
|
361
|
+
adapter_name = "anthropic-messages"
|
|
362
|
+
|
|
363
|
+
def _auth_headers(self, api_key: str) -> dict[str, str]:
|
|
364
|
+
return {
|
|
365
|
+
"x-api-key": api_key,
|
|
366
|
+
"anthropic-version": self.provider.headers.get("anthropic-version", "2023-06-01"),
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
def probe(self) -> ProbeResult:
|
|
370
|
+
url = str(self.provider.base_url).rstrip("/") + "/models"
|
|
371
|
+
try:
|
|
372
|
+
response = self.client.get(url, headers=self._headers())
|
|
373
|
+
except httpx.HTTPError as exc:
|
|
374
|
+
raise AdapterError(f"Anthropic probe failed for {self.provider.name}: {exc}") from exc
|
|
375
|
+
|
|
376
|
+
models: list[str] = []
|
|
377
|
+
raw = response_json_or_metadata(response)
|
|
378
|
+
data = raw.get("data", [])
|
|
379
|
+
if isinstance(data, list):
|
|
380
|
+
models = [str(item.get("id")) for item in data if isinstance(item, Mapping) and item.get("id")]
|
|
381
|
+
|
|
382
|
+
return ProbeResult(
|
|
383
|
+
provider=self.provider.name,
|
|
384
|
+
contract=ApiContract.ANTHROPIC,
|
|
385
|
+
ok=response.is_success,
|
|
386
|
+
status_code=response.status_code,
|
|
387
|
+
message="ok" if response.is_success else _redacted_response_text(response),
|
|
388
|
+
models=models,
|
|
389
|
+
raw=raw,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
393
|
+
url = str(self.provider.base_url).rstrip("/") + "/messages"
|
|
394
|
+
messages, system_prompt = _anthropic_messages_and_system_from_case(case)
|
|
395
|
+
payload = {
|
|
396
|
+
"model": model,
|
|
397
|
+
"max_tokens": case.max_tokens,
|
|
398
|
+
"temperature": case.temperature,
|
|
399
|
+
"messages": messages,
|
|
400
|
+
}
|
|
401
|
+
if case.streaming:
|
|
402
|
+
payload["stream"] = True
|
|
403
|
+
if system_prompt:
|
|
404
|
+
payload["system"] = system_prompt
|
|
405
|
+
if case.tools:
|
|
406
|
+
payload["tools"] = _anthropic_tools_from_case(case)
|
|
407
|
+
if case.tool_choice:
|
|
408
|
+
payload["tool_choice"] = _openai_tool_choice_to_anthropic(case.tool_choice)
|
|
409
|
+
started = perf_counter()
|
|
410
|
+
try:
|
|
411
|
+
if case.streaming:
|
|
412
|
+
return self._chat_completion_stream(url, payload, case, started)
|
|
413
|
+
response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
|
|
414
|
+
except httpx.HTTPError as exc:
|
|
415
|
+
raise AdapterError(f"Anthropic smoke request failed for {self.provider.name}: {exc}") from exc
|
|
416
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
417
|
+
|
|
418
|
+
raw = response_json_or_metadata(response)
|
|
419
|
+
|
|
420
|
+
text_parts: list[str] = []
|
|
421
|
+
content = raw.get("content", [])
|
|
422
|
+
if isinstance(content, list):
|
|
423
|
+
for block in content:
|
|
424
|
+
if isinstance(block, Mapping) and block.get("type") == "text":
|
|
425
|
+
text_parts.append(str(block.get("text") or ""))
|
|
426
|
+
tool_calls = extract_anthropic_tool_calls(raw)
|
|
427
|
+
|
|
428
|
+
return AdapterResponse(
|
|
429
|
+
provider=self.provider.name,
|
|
430
|
+
contract=ApiContract.ANTHROPIC,
|
|
431
|
+
status_code=response.status_code,
|
|
432
|
+
latency_ms=latency_ms,
|
|
433
|
+
raw=raw,
|
|
434
|
+
text="".join(text_parts),
|
|
435
|
+
tool_names=_tool_names(tool_calls),
|
|
436
|
+
tool_calls=tool_calls,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
def _chat_completion_stream(
|
|
440
|
+
self,
|
|
441
|
+
url: str,
|
|
442
|
+
payload: dict[str, Any],
|
|
443
|
+
case: BenchmarkCase,
|
|
444
|
+
started: float,
|
|
445
|
+
) -> AdapterResponse:
|
|
446
|
+
text_parts: list[str] = []
|
|
447
|
+
raw_events: list[dict[str, Any]] = []
|
|
448
|
+
tool_call_fragments: dict[int, dict[str, Any]] = {}
|
|
449
|
+
usage: dict[str, Any] = {}
|
|
450
|
+
stop_reason = None
|
|
451
|
+
status_code = 0
|
|
452
|
+
ttft_ms = None
|
|
453
|
+
canceled = False
|
|
454
|
+
cancellation_latency_ms = None
|
|
455
|
+
try:
|
|
456
|
+
with self.client.stream(
|
|
457
|
+
"POST",
|
|
458
|
+
url,
|
|
459
|
+
headers=self._headers(),
|
|
460
|
+
json=payload,
|
|
461
|
+
timeout=case.timeout_seconds,
|
|
462
|
+
) as response:
|
|
463
|
+
status_code = response.status_code
|
|
464
|
+
for line in response.iter_lines():
|
|
465
|
+
if not line or not line.startswith("data:"):
|
|
466
|
+
continue
|
|
467
|
+
data = line.removeprefix("data:").strip()
|
|
468
|
+
try:
|
|
469
|
+
event = json.loads(data)
|
|
470
|
+
except json.JSONDecodeError:
|
|
471
|
+
raw_events.append({"malformed": data})
|
|
472
|
+
continue
|
|
473
|
+
raw_events.append(event)
|
|
474
|
+
if ttft_ms is None and _anthropic_stream_event_has_output(event):
|
|
475
|
+
ttft_ms = (perf_counter() - started) * 1000
|
|
476
|
+
_accumulate_anthropic_stream_event(event, text_parts, tool_call_fragments, usage)
|
|
477
|
+
event_stop_reason = _anthropic_stream_stop_reason(event)
|
|
478
|
+
if event_stop_reason:
|
|
479
|
+
stop_reason = event_stop_reason
|
|
480
|
+
cancellation_elapsed = _stream_cancellation_elapsed_ms(case, started)
|
|
481
|
+
if cancellation_elapsed is not None:
|
|
482
|
+
canceled = True
|
|
483
|
+
cancellation_latency_ms = cancellation_elapsed
|
|
484
|
+
break
|
|
485
|
+
except httpx.HTTPError as exc:
|
|
486
|
+
raise AdapterError(f"Anthropic streaming request failed for {self.provider.name}: {exc}") from exc
|
|
487
|
+
|
|
488
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
489
|
+
raw = {
|
|
490
|
+
"stream": True,
|
|
491
|
+
"events": raw_events,
|
|
492
|
+
"usage": usage,
|
|
493
|
+
"stop_reason": stop_reason,
|
|
494
|
+
"agentblaster_http": _safe_http_metadata(response),
|
|
495
|
+
"agentblaster_cancelled": canceled,
|
|
496
|
+
"cancel_after_ms": case.cancel_after_ms,
|
|
497
|
+
"cancellation_latency_ms": cancellation_latency_ms,
|
|
498
|
+
}
|
|
499
|
+
tool_calls = _anthropic_stream_tool_calls(tool_call_fragments)
|
|
500
|
+
return AdapterResponse(
|
|
501
|
+
provider=self.provider.name,
|
|
502
|
+
contract=ApiContract.ANTHROPIC,
|
|
503
|
+
status_code=status_code,
|
|
504
|
+
latency_ms=latency_ms,
|
|
505
|
+
raw=raw,
|
|
506
|
+
text="".join(text_parts),
|
|
507
|
+
tool_names=_tool_names(tool_calls),
|
|
508
|
+
tool_calls=tool_calls,
|
|
509
|
+
streaming=True,
|
|
510
|
+
ttft_ms=round(ttft_ms, 3) if ttft_ms is not None else None,
|
|
511
|
+
canceled=canceled,
|
|
512
|
+
cancellation_latency_ms=cancellation_latency_ms,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
class OllamaNativeAdapter(ProviderAdapter):
|
|
517
|
+
"""Adapter for Ollama's native `/api/*` contract."""
|
|
518
|
+
|
|
519
|
+
adapter_name = "ollama-native"
|
|
520
|
+
|
|
521
|
+
def _auth_headers(self, api_key: str) -> dict[str, str]:
|
|
522
|
+
return {"Authorization": f"Bearer {api_key}"}
|
|
523
|
+
|
|
524
|
+
def probe(self) -> ProbeResult:
|
|
525
|
+
url = str(self.provider.base_url).rstrip("/") + "/api/tags"
|
|
526
|
+
try:
|
|
527
|
+
response = self.client.get(url, headers=self._headers())
|
|
528
|
+
except httpx.HTTPError as exc:
|
|
529
|
+
raise AdapterError(f"Ollama native probe failed for {self.provider.name}: {exc}") from exc
|
|
530
|
+
|
|
531
|
+
models: list[str] = []
|
|
532
|
+
raw = response_json_or_metadata(response)
|
|
533
|
+
data = raw.get("models", [])
|
|
534
|
+
if isinstance(data, list):
|
|
535
|
+
models = [
|
|
536
|
+
str(item.get("name") or item.get("model"))
|
|
537
|
+
for item in data
|
|
538
|
+
if isinstance(item, Mapping) and (item.get("name") or item.get("model"))
|
|
539
|
+
]
|
|
540
|
+
|
|
541
|
+
return ProbeResult(
|
|
542
|
+
provider=self.provider.name,
|
|
543
|
+
contract=ApiContract.NATIVE,
|
|
544
|
+
ok=response.is_success,
|
|
545
|
+
status_code=response.status_code,
|
|
546
|
+
message="ok" if response.is_success else _redacted_response_text(response),
|
|
547
|
+
models=models,
|
|
548
|
+
raw=raw,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
552
|
+
url = str(self.provider.base_url).rstrip("/") + "/api/chat"
|
|
553
|
+
payload: dict[str, Any] = {
|
|
554
|
+
"model": model,
|
|
555
|
+
"messages": _openai_messages_from_case(case),
|
|
556
|
+
"stream": False,
|
|
557
|
+
"options": {
|
|
558
|
+
"temperature": case.temperature,
|
|
559
|
+
"num_predict": case.max_tokens,
|
|
560
|
+
},
|
|
561
|
+
}
|
|
562
|
+
if case.tools:
|
|
563
|
+
payload["tools"] = case.tools
|
|
564
|
+
started = perf_counter()
|
|
565
|
+
try:
|
|
566
|
+
response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
|
|
567
|
+
except httpx.HTTPError as exc:
|
|
568
|
+
raise AdapterError(f"Ollama native chat request failed for {self.provider.name}: {exc}") from exc
|
|
569
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
570
|
+
|
|
571
|
+
raw = response_json_or_metadata(response)
|
|
572
|
+
|
|
573
|
+
message = raw.get("message", {})
|
|
574
|
+
text = ""
|
|
575
|
+
if isinstance(message, Mapping):
|
|
576
|
+
text = str(message.get("content") or "")
|
|
577
|
+
|
|
578
|
+
tool_calls = extract_ollama_tool_calls(raw)
|
|
579
|
+
return AdapterResponse(
|
|
580
|
+
provider=self.provider.name,
|
|
581
|
+
contract=ApiContract.NATIVE,
|
|
582
|
+
status_code=response.status_code,
|
|
583
|
+
latency_ms=latency_ms,
|
|
584
|
+
raw=raw,
|
|
585
|
+
text=text,
|
|
586
|
+
tool_names=_tool_names(tool_calls),
|
|
587
|
+
tool_calls=tool_calls,
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class LMStudioNativeAdapter(ProviderAdapter):
|
|
592
|
+
"""Adapter for LM Studio's native `/api/v1/*` REST contract."""
|
|
593
|
+
|
|
594
|
+
adapter_name = "lm-studio-native"
|
|
595
|
+
|
|
596
|
+
def _auth_headers(self, api_key: str) -> dict[str, str]:
|
|
597
|
+
return {"Authorization": f"Bearer {api_key}"}
|
|
598
|
+
|
|
599
|
+
def probe(self) -> ProbeResult:
|
|
600
|
+
url = self._api_v1_base_url() + "/models"
|
|
601
|
+
try:
|
|
602
|
+
response = self.client.get(url, headers=self._headers())
|
|
603
|
+
except httpx.HTTPError as exc:
|
|
604
|
+
raise AdapterError(f"LM Studio native probe failed for {self.provider.name}: {exc}") from exc
|
|
605
|
+
|
|
606
|
+
models: list[str] = []
|
|
607
|
+
raw = response_json_or_metadata(response)
|
|
608
|
+
data = raw.get("models", raw.get("data", []))
|
|
609
|
+
if isinstance(data, list):
|
|
610
|
+
models = [
|
|
611
|
+
str(item.get("key") or item.get("id") or item.get("model"))
|
|
612
|
+
for item in data
|
|
613
|
+
if isinstance(item, Mapping) and (item.get("key") or item.get("id") or item.get("model"))
|
|
614
|
+
]
|
|
615
|
+
|
|
616
|
+
return ProbeResult(
|
|
617
|
+
provider=self.provider.name,
|
|
618
|
+
contract=ApiContract.NATIVE,
|
|
619
|
+
ok=response.is_success,
|
|
620
|
+
status_code=response.status_code,
|
|
621
|
+
message="ok" if response.is_success else _redacted_response_text(response),
|
|
622
|
+
models=models,
|
|
623
|
+
raw=raw,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
def chat_completion(self, model: str, case: BenchmarkCase) -> AdapterResponse:
|
|
627
|
+
url = self._api_v1_base_url() + "/chat"
|
|
628
|
+
payload: dict[str, Any] = {
|
|
629
|
+
"model": model,
|
|
630
|
+
"input": case.prompt,
|
|
631
|
+
"stream": False,
|
|
632
|
+
"store": False,
|
|
633
|
+
"temperature": case.temperature,
|
|
634
|
+
"max_output_tokens": case.max_tokens,
|
|
635
|
+
}
|
|
636
|
+
if case.system_prompt:
|
|
637
|
+
payload["system_prompt"] = case.system_prompt
|
|
638
|
+
|
|
639
|
+
started = perf_counter()
|
|
640
|
+
try:
|
|
641
|
+
response = self.client.post(url, headers=self._headers(), json=payload, timeout=case.timeout_seconds)
|
|
642
|
+
except httpx.HTTPError as exc:
|
|
643
|
+
raise AdapterError(f"LM Studio native chat request failed for {self.provider.name}: {exc}") from exc
|
|
644
|
+
latency_ms = (perf_counter() - started) * 1000
|
|
645
|
+
|
|
646
|
+
raw = response_json_or_metadata(response)
|
|
647
|
+
|
|
648
|
+
tool_calls = extract_lmstudio_tool_calls(raw)
|
|
649
|
+
return AdapterResponse(
|
|
650
|
+
provider=self.provider.name,
|
|
651
|
+
contract=ApiContract.NATIVE,
|
|
652
|
+
status_code=response.status_code,
|
|
653
|
+
latency_ms=latency_ms,
|
|
654
|
+
raw=raw,
|
|
655
|
+
text=extract_lmstudio_text(raw),
|
|
656
|
+
tool_names=_tool_names(tool_calls),
|
|
657
|
+
tool_calls=tool_calls,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
def _api_v1_base_url(self) -> str:
|
|
661
|
+
base_url = str(self.provider.base_url).rstrip("/")
|
|
662
|
+
if base_url.endswith("/api/v1"):
|
|
663
|
+
return base_url
|
|
664
|
+
return base_url + "/api/v1"
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def adapter_for(
|
|
668
|
+
provider: ProviderConfig,
|
|
669
|
+
*,
|
|
670
|
+
secrets: SecretResolver | None = None,
|
|
671
|
+
client: httpx.Client | None = None,
|
|
672
|
+
timeout: float = 10.0,
|
|
673
|
+
) -> ProviderAdapter:
|
|
674
|
+
if provider.contract is ApiContract.OPENAI:
|
|
675
|
+
return OpenAICompatibleAdapter(provider, secrets=secrets, client=client, timeout=timeout)
|
|
676
|
+
if provider.contract is ApiContract.OPENAI_RESPONSES:
|
|
677
|
+
return OpenAIResponsesAdapter(provider, secrets=secrets, client=client, timeout=timeout)
|
|
678
|
+
if provider.contract is ApiContract.ANTHROPIC:
|
|
679
|
+
return AnthropicCompatibleAdapter(provider, secrets=secrets, client=client, timeout=timeout)
|
|
680
|
+
if provider.contract is ApiContract.NATIVE and provider.native_adapter == "ollama":
|
|
681
|
+
return OllamaNativeAdapter(provider, secrets=secrets, client=client, timeout=timeout)
|
|
682
|
+
if provider.contract is ApiContract.NATIVE and provider.native_adapter == "lm-studio":
|
|
683
|
+
return LMStudioNativeAdapter(provider, secrets=secrets, client=client, timeout=timeout)
|
|
684
|
+
raise AdapterError(f"no generic adapter exists for native provider: {provider.name}")
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def response_json_or_metadata(response: httpx.Response) -> dict[str, Any]:
|
|
688
|
+
content_type = response.headers.get("content-type", "")
|
|
689
|
+
media_type = content_type.split(";", 1)[0].strip().lower()
|
|
690
|
+
raw: dict[str, Any]
|
|
691
|
+
if media_type == "application/json" or media_type.endswith("+json"):
|
|
692
|
+
try:
|
|
693
|
+
parsed = response.json()
|
|
694
|
+
except json.JSONDecodeError:
|
|
695
|
+
raw = {"agentblaster_parse_error": "invalid_json_response"}
|
|
696
|
+
else:
|
|
697
|
+
raw = dict(parsed) if isinstance(parsed, Mapping) else {"agentblaster_json": parsed}
|
|
698
|
+
else:
|
|
699
|
+
raw = {"agentblaster_non_json_response": True}
|
|
700
|
+
preview = _redacted_body_preview(response)
|
|
701
|
+
if preview:
|
|
702
|
+
raw["agentblaster_body_preview"] = preview
|
|
703
|
+
raw["agentblaster_http"] = _safe_http_metadata(response)
|
|
704
|
+
return raw
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _safe_http_metadata(response: httpx.Response) -> dict[str, Any]:
|
|
708
|
+
safe_headers = {
|
|
709
|
+
key.lower(): value
|
|
710
|
+
for key, value in response.headers.items()
|
|
711
|
+
if key.lower() in SAFE_RESPONSE_HEADERS
|
|
712
|
+
}
|
|
713
|
+
return {
|
|
714
|
+
"status_code": response.status_code,
|
|
715
|
+
"content_type": response.headers.get("content-type"),
|
|
716
|
+
"headers": dict(redact_value(safe_headers)),
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _redacted_body_preview(response: httpx.Response, *, limit: int = 240) -> str:
|
|
721
|
+
try:
|
|
722
|
+
text = response.text
|
|
723
|
+
except UnicodeDecodeError:
|
|
724
|
+
return "<binary response>"
|
|
725
|
+
return str(redact_value(text[:limit]))
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _redacted_response_text(response: httpx.Response, *, limit: int = 240) -> str:
|
|
729
|
+
return _redacted_body_preview(response, limit=limit)
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _openai_messages_from_case(case: BenchmarkCase, *, include_system_prompt: bool = True) -> list[dict[str, Any]]:
|
|
733
|
+
messages: list[dict[str, Any]] = []
|
|
734
|
+
if include_system_prompt and case.system_prompt:
|
|
735
|
+
messages.append({"role": "system", "content": case.system_prompt})
|
|
736
|
+
if case.messages:
|
|
737
|
+
messages.extend(_trace_message_to_openai(message) for message in case.messages)
|
|
738
|
+
return messages
|
|
739
|
+
|
|
740
|
+
messages.append({"role": "user", "content": case.prompt})
|
|
741
|
+
return messages
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def _trace_message_to_openai(message: Any) -> dict[str, Any]:
|
|
745
|
+
data: dict[str, Any] = {"role": message.role, "content": message.content}
|
|
746
|
+
if message.name:
|
|
747
|
+
data["name"] = message.name
|
|
748
|
+
if message.tool_call_id:
|
|
749
|
+
data["tool_call_id"] = message.tool_call_id
|
|
750
|
+
if message.tool_calls:
|
|
751
|
+
data["tool_calls"] = message.tool_calls
|
|
752
|
+
return data
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def _openai_responses_input_from_case(case: BenchmarkCase) -> str | list[dict[str, Any]]:
|
|
756
|
+
if case.messages:
|
|
757
|
+
return _openai_messages_from_case(case, include_system_prompt=False)
|
|
758
|
+
return case.prompt
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def _anthropic_messages_and_system_from_case(case: BenchmarkCase) -> tuple[list[dict[str, Any]], Any | None]:
|
|
762
|
+
if not case.messages:
|
|
763
|
+
return [{"role": "user", "content": case.prompt}], _anthropic_system_value(case.system_prompt, case.cache_control)
|
|
764
|
+
|
|
765
|
+
system_parts: list[str] = []
|
|
766
|
+
if case.system_prompt:
|
|
767
|
+
system_parts.append(case.system_prompt)
|
|
768
|
+
messages: list[dict[str, Any]] = []
|
|
769
|
+
for message in case.messages:
|
|
770
|
+
if message.role == "system":
|
|
771
|
+
system_parts.append(_trace_content_text(message.content))
|
|
772
|
+
continue
|
|
773
|
+
if message.role == "tool":
|
|
774
|
+
messages.append(
|
|
775
|
+
{
|
|
776
|
+
"role": "user",
|
|
777
|
+
"content": [
|
|
778
|
+
{
|
|
779
|
+
"type": "tool_result",
|
|
780
|
+
"tool_use_id": message.tool_call_id or message.name or "toolu_agentblaster",
|
|
781
|
+
"content": _trace_content_text(message.content),
|
|
782
|
+
}
|
|
783
|
+
],
|
|
784
|
+
}
|
|
785
|
+
)
|
|
786
|
+
continue
|
|
787
|
+
if message.role == "assistant":
|
|
788
|
+
messages.append({"role": "assistant", "content": _anthropic_assistant_content(message)})
|
|
789
|
+
continue
|
|
790
|
+
messages.append({"role": "user", "content": message.content})
|
|
791
|
+
|
|
792
|
+
system_text = "\n\n".join(part for part in system_parts if part) or None
|
|
793
|
+
return messages, _anthropic_system_value(system_text, case.cache_control)
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def _anthropic_system_value(text: str | None, cache_control: dict[str, Any] | None) -> str | list[dict[str, Any]] | None:
|
|
797
|
+
if not text:
|
|
798
|
+
return None
|
|
799
|
+
if not cache_control:
|
|
800
|
+
return text
|
|
801
|
+
return [{"type": "text", "text": text, "cache_control": dict(cache_control)}]
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def _anthropic_assistant_content(message: Any) -> str | list[dict[str, Any]]:
|
|
805
|
+
if not message.tool_calls:
|
|
806
|
+
return message.content
|
|
807
|
+
|
|
808
|
+
blocks: list[dict[str, Any]] = []
|
|
809
|
+
text = _trace_content_text(message.content)
|
|
810
|
+
if text:
|
|
811
|
+
blocks.append({"type": "text", "text": text})
|
|
812
|
+
for tool_call in message.tool_calls:
|
|
813
|
+
block = _openai_tool_call_to_anthropic_content_block(tool_call)
|
|
814
|
+
if block is not None:
|
|
815
|
+
blocks.append(block)
|
|
816
|
+
return blocks
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def _openai_tool_call_to_anthropic_content_block(tool_call: Mapping[str, Any]) -> dict[str, Any] | None:
|
|
820
|
+
function = tool_call.get("function", {})
|
|
821
|
+
if not isinstance(function, Mapping) or not function.get("name"):
|
|
822
|
+
return None
|
|
823
|
+
return {
|
|
824
|
+
"type": "tool_use",
|
|
825
|
+
"id": str(tool_call.get("id") or f"toolu_{function['name']}"),
|
|
826
|
+
"name": str(function["name"]),
|
|
827
|
+
"input": _parse_tool_arguments(function.get("arguments")),
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _trace_content_text(content: str | list[dict[str, Any]]) -> str:
|
|
832
|
+
if isinstance(content, str):
|
|
833
|
+
return content
|
|
834
|
+
return json.dumps(content, sort_keys=True, separators=(",", ":"))
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
def extract_openai_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
838
|
+
return _tool_names(extract_openai_tool_calls(raw))
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _openai_stream_event_has_output(event: Mapping[str, Any]) -> bool:
|
|
842
|
+
choices = event.get("choices", [])
|
|
843
|
+
if not isinstance(choices, list):
|
|
844
|
+
return False
|
|
845
|
+
for choice in choices:
|
|
846
|
+
if not isinstance(choice, Mapping):
|
|
847
|
+
continue
|
|
848
|
+
delta = choice.get("delta", {})
|
|
849
|
+
if not isinstance(delta, Mapping):
|
|
850
|
+
continue
|
|
851
|
+
if delta.get("content"):
|
|
852
|
+
return True
|
|
853
|
+
if delta.get("tool_calls"):
|
|
854
|
+
return True
|
|
855
|
+
return False
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def _accumulate_openai_stream_event(
|
|
859
|
+
event: Mapping[str, Any],
|
|
860
|
+
text_parts: list[str],
|
|
861
|
+
tool_call_fragments: dict[int, dict[str, Any]],
|
|
862
|
+
) -> None:
|
|
863
|
+
choices = event.get("choices", [])
|
|
864
|
+
if not isinstance(choices, list):
|
|
865
|
+
return
|
|
866
|
+
for choice in choices:
|
|
867
|
+
if not isinstance(choice, Mapping):
|
|
868
|
+
continue
|
|
869
|
+
delta = choice.get("delta", {})
|
|
870
|
+
if not isinstance(delta, Mapping):
|
|
871
|
+
continue
|
|
872
|
+
if delta.get("content"):
|
|
873
|
+
text_parts.append(str(delta["content"]))
|
|
874
|
+
tool_calls = delta.get("tool_calls", [])
|
|
875
|
+
if not isinstance(tool_calls, list):
|
|
876
|
+
continue
|
|
877
|
+
for tool_call in tool_calls:
|
|
878
|
+
if not isinstance(tool_call, Mapping):
|
|
879
|
+
continue
|
|
880
|
+
index = int(tool_call.get("index") or 0)
|
|
881
|
+
fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": ""})
|
|
882
|
+
function = tool_call.get("function", {})
|
|
883
|
+
if isinstance(function, Mapping):
|
|
884
|
+
if function.get("name"):
|
|
885
|
+
fragment["name"] += str(function["name"])
|
|
886
|
+
if function.get("arguments"):
|
|
887
|
+
fragment["arguments"] += str(function["arguments"])
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
def _stream_tool_calls(tool_call_fragments: dict[int, dict[str, Any]]) -> list[ToolCallRecord]:
|
|
891
|
+
calls: list[ToolCallRecord] = []
|
|
892
|
+
for index in sorted(tool_call_fragments):
|
|
893
|
+
fragment = tool_call_fragments[index]
|
|
894
|
+
if not fragment.get("name"):
|
|
895
|
+
continue
|
|
896
|
+
calls.append(
|
|
897
|
+
ToolCallRecord(
|
|
898
|
+
name=str(fragment["name"]),
|
|
899
|
+
arguments=_parse_tool_arguments(fragment.get("arguments")),
|
|
900
|
+
)
|
|
901
|
+
)
|
|
902
|
+
return calls
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def _openai_responses_stream_event_has_output(event: Mapping[str, Any]) -> bool:
|
|
906
|
+
event_type = event.get("type")
|
|
907
|
+
if event_type in {"response.output_text.delta", "response.refusal.delta"}:
|
|
908
|
+
return bool(event.get("delta"))
|
|
909
|
+
if event_type == "response.function_call_arguments.delta":
|
|
910
|
+
return bool(event.get("delta"))
|
|
911
|
+
if event_type in {"response.output_item.added", "response.output_item.done"}:
|
|
912
|
+
item = event.get("item", {})
|
|
913
|
+
return isinstance(item, Mapping) and item.get("type") in {"function_call", "custom_tool_call"}
|
|
914
|
+
return False
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def _accumulate_openai_responses_stream_event(
|
|
918
|
+
event: Mapping[str, Any],
|
|
919
|
+
text_parts: list[str],
|
|
920
|
+
tool_call_fragments: dict[str, dict[str, Any]],
|
|
921
|
+
usage: dict[str, Any],
|
|
922
|
+
) -> None:
|
|
923
|
+
event_type = event.get("type")
|
|
924
|
+
if event_type == "response.output_text.delta":
|
|
925
|
+
delta = event.get("delta")
|
|
926
|
+
if delta:
|
|
927
|
+
text_parts.append(str(delta))
|
|
928
|
+
return
|
|
929
|
+
if event_type == "response.refusal.delta":
|
|
930
|
+
delta = event.get("delta")
|
|
931
|
+
if delta:
|
|
932
|
+
text_parts.append(str(delta))
|
|
933
|
+
return
|
|
934
|
+
if event_type in {"response.output_item.added", "response.output_item.done"}:
|
|
935
|
+
_accumulate_openai_responses_output_item(event, tool_call_fragments)
|
|
936
|
+
_update_openai_responses_usage_from_event(event, usage)
|
|
937
|
+
return
|
|
938
|
+
if event_type == "response.function_call_arguments.delta":
|
|
939
|
+
fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
|
|
940
|
+
fragment["arguments"] += str(event.get("delta") or "")
|
|
941
|
+
return
|
|
942
|
+
if event_type == "response.function_call_arguments.done":
|
|
943
|
+
fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
|
|
944
|
+
if event.get("name"):
|
|
945
|
+
fragment["name"] = str(event["name"])
|
|
946
|
+
if event.get("arguments") is not None:
|
|
947
|
+
fragment["arguments"] = str(event.get("arguments") or "")
|
|
948
|
+
return
|
|
949
|
+
_update_openai_responses_usage_from_event(event, usage)
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
def _accumulate_openai_responses_output_item(
|
|
953
|
+
event: Mapping[str, Any],
|
|
954
|
+
tool_call_fragments: dict[str, dict[str, Any]],
|
|
955
|
+
) -> None:
|
|
956
|
+
item = event.get("item", {})
|
|
957
|
+
if not isinstance(item, Mapping):
|
|
958
|
+
return
|
|
959
|
+
if item.get("type") not in {"function_call", "custom_tool_call"}:
|
|
960
|
+
return
|
|
961
|
+
fragment = _openai_responses_tool_fragment(event, tool_call_fragments)
|
|
962
|
+
if item.get("name"):
|
|
963
|
+
fragment["name"] = str(item["name"])
|
|
964
|
+
if item.get("arguments") is not None:
|
|
965
|
+
fragment["arguments"] = str(item.get("arguments") or "")
|
|
966
|
+
if item.get("input") is not None:
|
|
967
|
+
fragment["arguments"] = json.dumps(item["input"], sort_keys=True) if isinstance(item["input"], Mapping) else str(item["input"])
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def _openai_responses_tool_fragment(
|
|
971
|
+
event: Mapping[str, Any],
|
|
972
|
+
tool_call_fragments: dict[str, dict[str, Any]],
|
|
973
|
+
) -> dict[str, Any]:
|
|
974
|
+
key = str(event.get("item_id") or event.get("output_index") or len(tool_call_fragments))
|
|
975
|
+
return tool_call_fragments.setdefault(key, {"name": "", "arguments": ""})
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _update_openai_responses_usage_from_event(event: Mapping[str, Any], usage: dict[str, Any]) -> None:
|
|
979
|
+
response = event.get("response")
|
|
980
|
+
if not isinstance(response, Mapping):
|
|
981
|
+
return
|
|
982
|
+
event_usage = response.get("usage")
|
|
983
|
+
if isinstance(event_usage, Mapping):
|
|
984
|
+
usage.update(dict(event_usage))
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def _openai_responses_stream_status(event: Mapping[str, Any]) -> str | None:
|
|
988
|
+
response = event.get("response")
|
|
989
|
+
if isinstance(response, Mapping) and response.get("status"):
|
|
990
|
+
return str(response["status"])
|
|
991
|
+
event_type = event.get("type")
|
|
992
|
+
if event_type == "response.completed":
|
|
993
|
+
return "completed"
|
|
994
|
+
if event_type == "response.failed":
|
|
995
|
+
return "failed"
|
|
996
|
+
if event_type == "response.incomplete":
|
|
997
|
+
return "incomplete"
|
|
998
|
+
return None
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def _openai_responses_stream_tool_calls(tool_call_fragments: dict[str, dict[str, Any]]) -> list[ToolCallRecord]:
|
|
1002
|
+
calls: list[ToolCallRecord] = []
|
|
1003
|
+
for key in sorted(tool_call_fragments):
|
|
1004
|
+
fragment = tool_call_fragments[key]
|
|
1005
|
+
if not fragment.get("name"):
|
|
1006
|
+
continue
|
|
1007
|
+
calls.append(
|
|
1008
|
+
ToolCallRecord(
|
|
1009
|
+
name=str(fragment["name"]),
|
|
1010
|
+
arguments=_parse_tool_arguments(fragment.get("arguments")),
|
|
1011
|
+
)
|
|
1012
|
+
)
|
|
1013
|
+
return calls
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def extract_openai_responses_text(raw: Mapping[str, Any]) -> str:
|
|
1017
|
+
if raw.get("output_text"):
|
|
1018
|
+
return str(raw["output_text"])
|
|
1019
|
+
|
|
1020
|
+
text_parts: list[str] = []
|
|
1021
|
+
output = raw.get("output", [])
|
|
1022
|
+
if not isinstance(output, list):
|
|
1023
|
+
return ""
|
|
1024
|
+
for item in output:
|
|
1025
|
+
if not isinstance(item, Mapping) or item.get("type") != "message":
|
|
1026
|
+
continue
|
|
1027
|
+
content = item.get("content", [])
|
|
1028
|
+
if isinstance(content, str):
|
|
1029
|
+
text_parts.append(content)
|
|
1030
|
+
continue
|
|
1031
|
+
if not isinstance(content, list):
|
|
1032
|
+
continue
|
|
1033
|
+
for part in content:
|
|
1034
|
+
if not isinstance(part, Mapping):
|
|
1035
|
+
continue
|
|
1036
|
+
if part.get("type") in {"output_text", "text"}:
|
|
1037
|
+
text_parts.append(str(part.get("text") or ""))
|
|
1038
|
+
return "".join(text_parts)
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def extract_openai_responses_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
|
|
1042
|
+
calls: list[ToolCallRecord] = []
|
|
1043
|
+
output = raw.get("output", [])
|
|
1044
|
+
if not isinstance(output, list):
|
|
1045
|
+
return calls
|
|
1046
|
+
for item in output:
|
|
1047
|
+
if not isinstance(item, Mapping):
|
|
1048
|
+
continue
|
|
1049
|
+
if item.get("type") in {"function_call", "custom_tool_call"} and item.get("name"):
|
|
1050
|
+
calls.append(
|
|
1051
|
+
ToolCallRecord(
|
|
1052
|
+
name=str(item["name"]),
|
|
1053
|
+
arguments=_parse_tool_arguments(item.get("arguments") or item.get("input")),
|
|
1054
|
+
)
|
|
1055
|
+
)
|
|
1056
|
+
return calls
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def extract_openai_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
|
|
1060
|
+
calls: list[ToolCallRecord] = []
|
|
1061
|
+
choices = raw.get("choices", [])
|
|
1062
|
+
if not isinstance(choices, list):
|
|
1063
|
+
return calls
|
|
1064
|
+
for choice in choices:
|
|
1065
|
+
if not isinstance(choice, Mapping):
|
|
1066
|
+
continue
|
|
1067
|
+
message = choice.get("message", {})
|
|
1068
|
+
if not isinstance(message, Mapping):
|
|
1069
|
+
continue
|
|
1070
|
+
tool_calls = message.get("tool_calls", [])
|
|
1071
|
+
if not isinstance(tool_calls, list):
|
|
1072
|
+
continue
|
|
1073
|
+
for tool_call in tool_calls:
|
|
1074
|
+
if not isinstance(tool_call, Mapping):
|
|
1075
|
+
continue
|
|
1076
|
+
function = tool_call.get("function", {})
|
|
1077
|
+
if not isinstance(function, Mapping) or not function.get("name"):
|
|
1078
|
+
continue
|
|
1079
|
+
calls.append(
|
|
1080
|
+
ToolCallRecord(
|
|
1081
|
+
name=str(function["name"]),
|
|
1082
|
+
arguments=_parse_tool_arguments(function.get("arguments")),
|
|
1083
|
+
)
|
|
1084
|
+
)
|
|
1085
|
+
return calls
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
def _legacy_extract_openai_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1089
|
+
names: list[str] = []
|
|
1090
|
+
choices = raw.get("choices", [])
|
|
1091
|
+
if not isinstance(choices, list):
|
|
1092
|
+
return names
|
|
1093
|
+
for choice in choices:
|
|
1094
|
+
if not isinstance(choice, Mapping):
|
|
1095
|
+
continue
|
|
1096
|
+
message = choice.get("message", {})
|
|
1097
|
+
if not isinstance(message, Mapping):
|
|
1098
|
+
continue
|
|
1099
|
+
tool_calls = message.get("tool_calls", [])
|
|
1100
|
+
if not isinstance(tool_calls, list):
|
|
1101
|
+
continue
|
|
1102
|
+
for tool_call in tool_calls:
|
|
1103
|
+
if not isinstance(tool_call, Mapping):
|
|
1104
|
+
continue
|
|
1105
|
+
function = tool_call.get("function", {})
|
|
1106
|
+
if isinstance(function, Mapping) and function.get("name"):
|
|
1107
|
+
names.append(str(function["name"]))
|
|
1108
|
+
return names
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def extract_anthropic_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1112
|
+
return _tool_names(extract_anthropic_tool_calls(raw))
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def extract_anthropic_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
|
|
1116
|
+
calls: list[ToolCallRecord] = []
|
|
1117
|
+
content = raw.get("content", [])
|
|
1118
|
+
if not isinstance(content, list):
|
|
1119
|
+
return calls
|
|
1120
|
+
for block in content:
|
|
1121
|
+
if isinstance(block, Mapping) and block.get("type") == "tool_use" and block.get("name"):
|
|
1122
|
+
arguments = block.get("input") if isinstance(block.get("input"), Mapping) else {}
|
|
1123
|
+
calls.append(ToolCallRecord(name=str(block["name"]), arguments=dict(arguments)))
|
|
1124
|
+
return calls
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def _anthropic_stream_event_has_output(event: Mapping[str, Any]) -> bool:
|
|
1128
|
+
event_type = event.get("type")
|
|
1129
|
+
if event_type == "content_block_start":
|
|
1130
|
+
content_block = event.get("content_block", {})
|
|
1131
|
+
if not isinstance(content_block, Mapping):
|
|
1132
|
+
return False
|
|
1133
|
+
if content_block.get("type") == "text" and content_block.get("text"):
|
|
1134
|
+
return True
|
|
1135
|
+
return content_block.get("type") in {"tool_use", "server_tool_use"} and bool(content_block.get("name"))
|
|
1136
|
+
if event_type == "content_block_delta":
|
|
1137
|
+
delta = event.get("delta", {})
|
|
1138
|
+
if not isinstance(delta, Mapping):
|
|
1139
|
+
return False
|
|
1140
|
+
if delta.get("type") == "text_delta" and delta.get("text"):
|
|
1141
|
+
return True
|
|
1142
|
+
return delta.get("type") == "input_json_delta" and bool(delta.get("partial_json"))
|
|
1143
|
+
return False
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
def _accumulate_anthropic_stream_event(
|
|
1147
|
+
event: Mapping[str, Any],
|
|
1148
|
+
text_parts: list[str],
|
|
1149
|
+
tool_call_fragments: dict[int, dict[str, Any]],
|
|
1150
|
+
usage: dict[str, Any],
|
|
1151
|
+
) -> None:
|
|
1152
|
+
event_type = event.get("type")
|
|
1153
|
+
if event_type == "message_start":
|
|
1154
|
+
message = event.get("message", {})
|
|
1155
|
+
if isinstance(message, Mapping):
|
|
1156
|
+
_update_anthropic_stream_usage(usage, message.get("usage"))
|
|
1157
|
+
return
|
|
1158
|
+
if event_type == "message_delta":
|
|
1159
|
+
_update_anthropic_stream_usage(usage, event.get("usage"))
|
|
1160
|
+
return
|
|
1161
|
+
if event_type == "content_block_start":
|
|
1162
|
+
_accumulate_anthropic_content_block_start(event, text_parts, tool_call_fragments)
|
|
1163
|
+
return
|
|
1164
|
+
if event_type == "content_block_delta":
|
|
1165
|
+
_accumulate_anthropic_content_block_delta(event, text_parts, tool_call_fragments)
|
|
1166
|
+
|
|
1167
|
+
|
|
1168
|
+
def _update_anthropic_stream_usage(usage: dict[str, Any], value: Any) -> None:
|
|
1169
|
+
if isinstance(value, Mapping):
|
|
1170
|
+
usage.update(dict(value))
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
def _accumulate_anthropic_content_block_start(
|
|
1174
|
+
event: Mapping[str, Any],
|
|
1175
|
+
text_parts: list[str],
|
|
1176
|
+
tool_call_fragments: dict[int, dict[str, Any]],
|
|
1177
|
+
) -> None:
|
|
1178
|
+
content_block = event.get("content_block", {})
|
|
1179
|
+
if not isinstance(content_block, Mapping):
|
|
1180
|
+
return
|
|
1181
|
+
block_type = content_block.get("type")
|
|
1182
|
+
if block_type == "text":
|
|
1183
|
+
text = content_block.get("text")
|
|
1184
|
+
if text:
|
|
1185
|
+
text_parts.append(str(text))
|
|
1186
|
+
return
|
|
1187
|
+
if block_type not in {"tool_use", "server_tool_use"}:
|
|
1188
|
+
return
|
|
1189
|
+
|
|
1190
|
+
index = _event_index(event)
|
|
1191
|
+
fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": "", "arguments_object": {}})
|
|
1192
|
+
if content_block.get("name"):
|
|
1193
|
+
fragment["name"] = str(content_block["name"])
|
|
1194
|
+
input_value = content_block.get("input")
|
|
1195
|
+
if isinstance(input_value, Mapping) and input_value:
|
|
1196
|
+
fragment["arguments_object"] = dict(input_value)
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
def _accumulate_anthropic_content_block_delta(
|
|
1200
|
+
event: Mapping[str, Any],
|
|
1201
|
+
text_parts: list[str],
|
|
1202
|
+
tool_call_fragments: dict[int, dict[str, Any]],
|
|
1203
|
+
) -> None:
|
|
1204
|
+
delta = event.get("delta", {})
|
|
1205
|
+
if not isinstance(delta, Mapping):
|
|
1206
|
+
return
|
|
1207
|
+
delta_type = delta.get("type")
|
|
1208
|
+
if delta_type == "text_delta":
|
|
1209
|
+
text = delta.get("text")
|
|
1210
|
+
if text:
|
|
1211
|
+
text_parts.append(str(text))
|
|
1212
|
+
return
|
|
1213
|
+
if delta_type != "input_json_delta":
|
|
1214
|
+
return
|
|
1215
|
+
|
|
1216
|
+
index = _event_index(event)
|
|
1217
|
+
fragment = tool_call_fragments.setdefault(index, {"name": "", "arguments": "", "arguments_object": {}})
|
|
1218
|
+
if "partial_json" in delta:
|
|
1219
|
+
fragment["arguments"] += str(delta.get("partial_json") or "")
|
|
1220
|
+
|
|
1221
|
+
|
|
1222
|
+
def _anthropic_stream_stop_reason(event: Mapping[str, Any]) -> str | None:
|
|
1223
|
+
if event.get("type") != "message_delta":
|
|
1224
|
+
return None
|
|
1225
|
+
delta = event.get("delta", {})
|
|
1226
|
+
if isinstance(delta, Mapping) and delta.get("stop_reason"):
|
|
1227
|
+
return str(delta["stop_reason"])
|
|
1228
|
+
return None
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def _anthropic_stream_tool_calls(tool_call_fragments: dict[int, dict[str, Any]]) -> list[ToolCallRecord]:
|
|
1232
|
+
calls: list[ToolCallRecord] = []
|
|
1233
|
+
for index in sorted(tool_call_fragments):
|
|
1234
|
+
fragment = tool_call_fragments[index]
|
|
1235
|
+
if not fragment.get("name"):
|
|
1236
|
+
continue
|
|
1237
|
+
argument_fragments = fragment.get("arguments")
|
|
1238
|
+
if argument_fragments:
|
|
1239
|
+
arguments = _parse_tool_arguments(argument_fragments)
|
|
1240
|
+
else:
|
|
1241
|
+
arguments_object = fragment.get("arguments_object")
|
|
1242
|
+
arguments = dict(arguments_object) if isinstance(arguments_object, Mapping) else {}
|
|
1243
|
+
calls.append(ToolCallRecord(name=str(fragment["name"]), arguments=arguments))
|
|
1244
|
+
return calls
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
def _event_index(event: Mapping[str, Any]) -> int:
|
|
1248
|
+
try:
|
|
1249
|
+
return int(event.get("index") or 0)
|
|
1250
|
+
except (TypeError, ValueError):
|
|
1251
|
+
return 0
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def _legacy_extract_anthropic_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1255
|
+
names: list[str] = []
|
|
1256
|
+
content = raw.get("content", [])
|
|
1257
|
+
if not isinstance(content, list):
|
|
1258
|
+
return names
|
|
1259
|
+
for block in content:
|
|
1260
|
+
if isinstance(block, Mapping) and block.get("type") == "tool_use" and block.get("name"):
|
|
1261
|
+
names.append(str(block["name"]))
|
|
1262
|
+
return names
|
|
1263
|
+
|
|
1264
|
+
|
|
1265
|
+
def extract_ollama_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1266
|
+
return _tool_names(extract_ollama_tool_calls(raw))
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
def extract_ollama_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
|
|
1270
|
+
calls: list[ToolCallRecord] = []
|
|
1271
|
+
message = raw.get("message", {})
|
|
1272
|
+
if not isinstance(message, Mapping):
|
|
1273
|
+
return calls
|
|
1274
|
+
tool_calls = message.get("tool_calls", [])
|
|
1275
|
+
if not isinstance(tool_calls, list):
|
|
1276
|
+
return calls
|
|
1277
|
+
for tool_call in tool_calls:
|
|
1278
|
+
if not isinstance(tool_call, Mapping):
|
|
1279
|
+
continue
|
|
1280
|
+
function = tool_call.get("function", {})
|
|
1281
|
+
if isinstance(function, Mapping) and function.get("name"):
|
|
1282
|
+
calls.append(
|
|
1283
|
+
ToolCallRecord(
|
|
1284
|
+
name=str(function["name"]),
|
|
1285
|
+
arguments=_parse_tool_arguments(function.get("arguments")),
|
|
1286
|
+
)
|
|
1287
|
+
)
|
|
1288
|
+
return calls
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
def _legacy_extract_ollama_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1292
|
+
names: list[str] = []
|
|
1293
|
+
message = raw.get("message", {})
|
|
1294
|
+
if not isinstance(message, Mapping):
|
|
1295
|
+
return names
|
|
1296
|
+
tool_calls = message.get("tool_calls", [])
|
|
1297
|
+
if not isinstance(tool_calls, list):
|
|
1298
|
+
return names
|
|
1299
|
+
for tool_call in tool_calls:
|
|
1300
|
+
if not isinstance(tool_call, Mapping):
|
|
1301
|
+
continue
|
|
1302
|
+
function = tool_call.get("function", {})
|
|
1303
|
+
if isinstance(function, Mapping) and function.get("name"):
|
|
1304
|
+
names.append(str(function["name"]))
|
|
1305
|
+
return names
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def extract_lmstudio_text(raw: Mapping[str, Any]) -> str:
|
|
1309
|
+
output = raw.get("output", [])
|
|
1310
|
+
if isinstance(output, list):
|
|
1311
|
+
text_parts = [
|
|
1312
|
+
str(item.get("content") or "")
|
|
1313
|
+
for item in output
|
|
1314
|
+
if isinstance(item, Mapping) and item.get("type") == "message"
|
|
1315
|
+
]
|
|
1316
|
+
if text_parts:
|
|
1317
|
+
return "".join(text_parts)
|
|
1318
|
+
|
|
1319
|
+
choices = raw.get("choices", [])
|
|
1320
|
+
if choices and isinstance(choices, list) and isinstance(choices[0], Mapping):
|
|
1321
|
+
message = choices[0].get("message", {})
|
|
1322
|
+
if isinstance(message, Mapping):
|
|
1323
|
+
return str(message.get("content") or "")
|
|
1324
|
+
|
|
1325
|
+
message = raw.get("message", {})
|
|
1326
|
+
if isinstance(message, Mapping):
|
|
1327
|
+
return str(message.get("content") or "")
|
|
1328
|
+
|
|
1329
|
+
return ""
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
def extract_lmstudio_tool_names(raw: Mapping[str, Any]) -> list[str]:
|
|
1333
|
+
return _tool_names(extract_lmstudio_tool_calls(raw))
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def extract_lmstudio_tool_calls(raw: Mapping[str, Any]) -> list[ToolCallRecord]:
|
|
1337
|
+
calls: list[ToolCallRecord] = []
|
|
1338
|
+
output = raw.get("output", [])
|
|
1339
|
+
if not isinstance(output, list):
|
|
1340
|
+
return calls
|
|
1341
|
+
for item in output:
|
|
1342
|
+
if not isinstance(item, Mapping):
|
|
1343
|
+
continue
|
|
1344
|
+
if item.get("type") == "tool_call" and item.get("tool"):
|
|
1345
|
+
arguments = item.get("arguments") if isinstance(item.get("arguments"), Mapping) else {}
|
|
1346
|
+
calls.append(ToolCallRecord(name=str(item["tool"]), arguments=dict(arguments)))
|
|
1347
|
+
elif item.get("type") == "invalid_tool_call":
|
|
1348
|
+
metadata = item.get("metadata", {})
|
|
1349
|
+
if isinstance(metadata, Mapping) and metadata.get("tool_name"):
|
|
1350
|
+
arguments = metadata.get("arguments") if isinstance(metadata.get("arguments"), Mapping) else {}
|
|
1351
|
+
calls.append(ToolCallRecord(name=str(metadata["tool_name"]), arguments=dict(arguments), valid=False))
|
|
1352
|
+
return calls
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
def _tool_names(calls: list[ToolCallRecord]) -> list[str]:
|
|
1356
|
+
return [call.name for call in calls]
|
|
1357
|
+
|
|
1358
|
+
|
|
1359
|
+
def _stream_cancellation_elapsed_ms(case: BenchmarkCase, started: float) -> float | None:
|
|
1360
|
+
if case.cancel_after_ms is None:
|
|
1361
|
+
return None
|
|
1362
|
+
elapsed_ms = (perf_counter() - started) * 1000
|
|
1363
|
+
if elapsed_ms < case.cancel_after_ms:
|
|
1364
|
+
return None
|
|
1365
|
+
return round(elapsed_ms, 3)
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
def _parse_tool_arguments(value: Any) -> dict[str, Any]:
|
|
1369
|
+
if isinstance(value, Mapping):
|
|
1370
|
+
return dict(value)
|
|
1371
|
+
if isinstance(value, str) and value.strip():
|
|
1372
|
+
try:
|
|
1373
|
+
parsed = json.loads(value)
|
|
1374
|
+
except json.JSONDecodeError:
|
|
1375
|
+
return {}
|
|
1376
|
+
if isinstance(parsed, Mapping):
|
|
1377
|
+
return dict(parsed)
|
|
1378
|
+
return {}
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
def _openai_tool_to_anthropic(tool: Mapping[str, Any]) -> dict[str, Any]:
|
|
1382
|
+
if tool.get("type") == "function" and isinstance(tool.get("function"), Mapping):
|
|
1383
|
+
function = tool["function"]
|
|
1384
|
+
converted = {
|
|
1385
|
+
"name": function.get("name"),
|
|
1386
|
+
"description": function.get("description", ""),
|
|
1387
|
+
"input_schema": function.get("parameters", {"type": "object", "properties": {}}),
|
|
1388
|
+
}
|
|
1389
|
+
cache_control = tool.get("cache_control") or function.get("cache_control")
|
|
1390
|
+
if isinstance(cache_control, Mapping):
|
|
1391
|
+
converted["cache_control"] = dict(cache_control)
|
|
1392
|
+
return converted
|
|
1393
|
+
return dict(tool)
|
|
1394
|
+
|
|
1395
|
+
|
|
1396
|
+
def _anthropic_tools_from_case(case: BenchmarkCase) -> list[dict[str, Any]]:
|
|
1397
|
+
tools = [_openai_tool_to_anthropic(tool) for tool in case.tools]
|
|
1398
|
+
if tools and case.cache_control and "cache_control" not in tools[-1]:
|
|
1399
|
+
tools[-1] = {**tools[-1], "cache_control": dict(case.cache_control)}
|
|
1400
|
+
return tools
|
|
1401
|
+
|
|
1402
|
+
|
|
1403
|
+
def _openai_chat_tool_to_responses_tool(tool: Mapping[str, Any]) -> dict[str, Any]:
|
|
1404
|
+
if tool.get("type") == "function" and isinstance(tool.get("function"), Mapping):
|
|
1405
|
+
function = tool["function"]
|
|
1406
|
+
converted = {
|
|
1407
|
+
"type": "function",
|
|
1408
|
+
"name": function.get("name"),
|
|
1409
|
+
"description": function.get("description", ""),
|
|
1410
|
+
"parameters": function.get("parameters", {"type": "object", "properties": {}}),
|
|
1411
|
+
}
|
|
1412
|
+
if "strict" in function:
|
|
1413
|
+
converted["strict"] = function["strict"]
|
|
1414
|
+
return converted
|
|
1415
|
+
return dict(tool)
|
|
1416
|
+
|
|
1417
|
+
|
|
1418
|
+
def _openai_chat_tool_choice_to_responses_tool_choice(tool_choice: str | Mapping[str, Any]) -> str | dict[str, Any]:
|
|
1419
|
+
if isinstance(tool_choice, str):
|
|
1420
|
+
return tool_choice
|
|
1421
|
+
if tool_choice.get("type") == "function" and isinstance(tool_choice.get("function"), Mapping):
|
|
1422
|
+
function = tool_choice["function"]
|
|
1423
|
+
if function.get("name"):
|
|
1424
|
+
return {"type": "function", "name": function["name"]}
|
|
1425
|
+
return dict(tool_choice)
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
def _openai_tool_choice_to_anthropic(tool_choice: str | Mapping[str, Any]) -> str | dict[str, Any]:
|
|
1429
|
+
if isinstance(tool_choice, str):
|
|
1430
|
+
if tool_choice in {"auto", "any", "none"}:
|
|
1431
|
+
return tool_choice
|
|
1432
|
+
return {"type": "tool", "name": tool_choice}
|
|
1433
|
+
if tool_choice.get("type") == "function" and isinstance(tool_choice.get("function"), Mapping):
|
|
1434
|
+
return {"type": "tool", "name": str(tool_choice["function"].get("name"))}
|
|
1435
|
+
return dict(tool_choice)
|