docent-python 0.1.56a0__tar.gz → 0.1.57a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/.gitignore +3 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/PKG-INFO +1 -1
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_svc.py +7 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/model_registry.py +11 -1
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/anthropic.py +103 -103
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/common.py +26 -1
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/google.py +144 -139
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/openai.py +79 -79
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/openrouter.py +6 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/provider_registry.py +2 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/agent_run.py +1 -0
- docent_python-0.1.57a0/docent/data_models/feedback.py +410 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/runner.py +2 -2
- docent_python-0.1.57a0/docent/mcp/server.py +392 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/client.py +109 -52
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/llm_context.py +3 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/reading.py +3 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/pyproject.toml +1 -1
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/uv.lock +1 -1
- docent_python-0.1.56a0/docent/data_models/feedback.py +0 -458
- docent_python-0.1.56a0/docent/mcp/server.py +0 -202
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/LICENSE.md +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/README.md +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/preference_types.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/reading.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/types.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/py.typed +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/trace.py +0 -0
- {docent_python-0.1.56a0 → docent_python-0.1.57a0}/docent/trace_temp.py +0 -0
|
@@ -89,6 +89,7 @@ async def _parallelize_calls(
|
|
|
89
89
|
top_logprobs: int | None,
|
|
90
90
|
timeout: float,
|
|
91
91
|
semaphore: Semaphore,
|
|
92
|
+
max_retries: int,
|
|
92
93
|
# use_tqdm: bool,
|
|
93
94
|
cache: LLMCache | None = None,
|
|
94
95
|
response_format: ResponseFormat | None = None,
|
|
@@ -106,6 +107,7 @@ async def _parallelize_calls(
|
|
|
106
107
|
top_logprobs=top_logprobs,
|
|
107
108
|
timeout=timeout,
|
|
108
109
|
response_format=response_format,
|
|
110
|
+
max_retries=max_retries,
|
|
109
111
|
)
|
|
110
112
|
|
|
111
113
|
responses: list[LLMOutput | None] = [None for _ in inputs]
|
|
@@ -357,10 +359,14 @@ class BaseLLMService:
|
|
|
357
359
|
completion_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
358
360
|
use_cache: bool = False,
|
|
359
361
|
response_format: ResponseFormat | None = None,
|
|
362
|
+
max_retries: int = 1,
|
|
360
363
|
_api_key_overrides: dict[str, str] = dict(),
|
|
361
364
|
) -> list[LLMOutput]:
|
|
362
365
|
"""Request completions from a configured LLM provider."""
|
|
363
366
|
|
|
367
|
+
if max_retries < 0:
|
|
368
|
+
raise ValueError("max_retries must be non-negative")
|
|
369
|
+
|
|
364
370
|
# We don't support logprobs for Anthropic yet
|
|
365
371
|
if logprobs:
|
|
366
372
|
for model_option in model_options:
|
|
@@ -429,6 +435,7 @@ class BaseLLMService:
|
|
|
429
435
|
top_logprobs=top_logprobs,
|
|
430
436
|
timeout=timeout,
|
|
431
437
|
semaphore=self._semaphore,
|
|
438
|
+
max_retries=max_retries,
|
|
432
439
|
cache=cache,
|
|
433
440
|
response_format=response_format,
|
|
434
441
|
)
|
|
@@ -62,6 +62,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
62
62
|
"claude-sonnet-4-5",
|
|
63
63
|
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
|
|
64
64
|
),
|
|
65
|
+
(
|
|
66
|
+
"claude-sonnet-4-6",
|
|
67
|
+
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
|
|
68
|
+
),
|
|
69
|
+
(
|
|
70
|
+
"claude-opus-4-6",
|
|
71
|
+
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
72
|
+
),
|
|
65
73
|
(
|
|
66
74
|
"claude-haiku-4-5",
|
|
67
75
|
ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
|
|
@@ -140,7 +148,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
140
148
|
|
|
141
149
|
@lru_cache(maxsize=None)
|
|
142
150
|
def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
143
|
-
for registry_model_name, info in
|
|
151
|
+
for registry_model_name, info in sorted(
|
|
152
|
+
_REGISTRY, key=lambda entry: len(entry[0]), reverse=True
|
|
153
|
+
):
|
|
144
154
|
if registry_model_name in model_name:
|
|
145
155
|
return info
|
|
146
156
|
return None
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from typing import Any, Literal, cast
|
|
2
2
|
|
|
3
|
-
import backoff
|
|
4
|
-
|
|
5
3
|
# all errors: https://docs.anthropic.com/en/api/errors
|
|
6
4
|
from anthropic import (
|
|
7
5
|
AsyncAnthropic,
|
|
@@ -60,6 +58,7 @@ from docent._llm_util.providers.common import (
|
|
|
60
58
|
ReasoningEffort,
|
|
61
59
|
async_timeout_ctx,
|
|
62
60
|
reasoning_budget,
|
|
61
|
+
retry_async,
|
|
63
62
|
)
|
|
64
63
|
from docent._log_util import get_logger
|
|
65
64
|
from docent.data_models.chat import (
|
|
@@ -218,19 +217,12 @@ def _convert_anthropic_error(e: Exception):
|
|
|
218
217
|
return None
|
|
219
218
|
|
|
220
219
|
|
|
221
|
-
@backoff.on_exception(
|
|
222
|
-
backoff.expo,
|
|
223
|
-
exception=(Exception),
|
|
224
|
-
giveup=lambda e: not _is_retryable_error(e),
|
|
225
|
-
max_tries=5,
|
|
226
|
-
factor=3.0,
|
|
227
|
-
on_backoff=_print_backoff_message,
|
|
228
|
-
)
|
|
229
220
|
async def get_anthropic_chat_completion_streaming_async(
|
|
230
221
|
client: AsyncAnthropic,
|
|
231
222
|
streaming_callback: AsyncSingleLLMOutputStreamingCallback | None,
|
|
232
223
|
messages: list[ChatMessage],
|
|
233
224
|
model_name: str,
|
|
225
|
+
*,
|
|
234
226
|
tools: list[ToolInfo] | None = None,
|
|
235
227
|
tool_choice: Literal["auto", "required"] | None = None,
|
|
236
228
|
max_new_tokens: int = 32,
|
|
@@ -240,6 +232,7 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
240
232
|
top_logprobs: int | None = None,
|
|
241
233
|
timeout: float = 5.0,
|
|
242
234
|
response_format: ResponseFormat | None = None,
|
|
235
|
+
max_retries: int = 1,
|
|
243
236
|
):
|
|
244
237
|
if logprobs or top_logprobs is not None:
|
|
245
238
|
raise NotImplementedError(
|
|
@@ -248,58 +241,63 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
248
241
|
|
|
249
242
|
system, input_messages = parse_chat_messages(messages)
|
|
250
243
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
if reasoning_effort:
|
|
261
|
-
create_kwargs["thinking"] = {
|
|
262
|
-
"type": "enabled",
|
|
263
|
-
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
244
|
+
async def _call() -> LLMOutput:
|
|
245
|
+
try:
|
|
246
|
+
async with async_timeout_ctx(timeout):
|
|
247
|
+
create_kwargs: dict[str, Any] = {
|
|
248
|
+
"model": model_name,
|
|
249
|
+
"messages": input_messages,
|
|
250
|
+
"max_tokens": max_new_tokens,
|
|
251
|
+
"temperature": temperature,
|
|
252
|
+
"stream": True,
|
|
264
253
|
}
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
254
|
+
if reasoning_effort:
|
|
255
|
+
create_kwargs["thinking"] = {
|
|
256
|
+
"type": "enabled",
|
|
257
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
258
|
+
}
|
|
259
|
+
if tools:
|
|
260
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
261
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
262
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
263
|
+
if system is not None:
|
|
264
|
+
create_kwargs["system"] = system
|
|
265
|
+
if response_format is not None:
|
|
266
|
+
output_format = _build_output_format(response_format)
|
|
267
|
+
extra_headers = dict(create_kwargs.get("extra_headers", {}))
|
|
268
|
+
extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
|
|
269
|
+
create_kwargs["extra_headers"] = extra_headers
|
|
270
|
+
extra_body = dict(create_kwargs.get("extra_body", {}))
|
|
271
|
+
extra_body["output_format"] = output_format
|
|
272
|
+
create_kwargs["extra_body"] = extra_body
|
|
273
|
+
|
|
274
|
+
stream = cast(
|
|
275
|
+
AsyncStream[RawMessageStreamEvent],
|
|
276
|
+
await client.messages.create(**create_kwargs),
|
|
277
|
+
)
|
|
284
278
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
279
|
+
llm_output_partial = None
|
|
280
|
+
async for chunk in stream:
|
|
281
|
+
llm_output_partial = update_llm_output(llm_output_partial, chunk)
|
|
282
|
+
if streaming_callback:
|
|
283
|
+
await streaming_callback(finalize_llm_output_partial(llm_output_partial))
|
|
290
284
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
return finalize_llm_output_partial(llm_output_partial)
|
|
294
|
-
else:
|
|
295
|
-
# Streaming did not produce anything
|
|
285
|
+
if llm_output_partial:
|
|
286
|
+
return finalize_llm_output_partial(llm_output_partial)
|
|
296
287
|
return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
else:
|
|
288
|
+
except (RateLimitError, BadRequestError) as e:
|
|
289
|
+
if e2 := _convert_anthropic_error(e):
|
|
290
|
+
raise e2 from e
|
|
301
291
|
raise
|
|
302
292
|
|
|
293
|
+
return await retry_async(
|
|
294
|
+
_call,
|
|
295
|
+
max_retries=max_retries,
|
|
296
|
+
is_retryable_error=_is_retryable_error,
|
|
297
|
+
factor=3.0,
|
|
298
|
+
on_backoff=_print_backoff_message,
|
|
299
|
+
)
|
|
300
|
+
|
|
303
301
|
|
|
304
302
|
FINISH_REASON_MAP: dict[str, FinishReasonType] = {
|
|
305
303
|
"end_turn": "stop",
|
|
@@ -414,18 +412,11 @@ def update_llm_output(
|
|
|
414
412
|
)
|
|
415
413
|
|
|
416
414
|
|
|
417
|
-
@backoff.on_exception(
|
|
418
|
-
backoff.expo,
|
|
419
|
-
exception=(Exception),
|
|
420
|
-
giveup=lambda e: not _is_retryable_error(e),
|
|
421
|
-
max_tries=5,
|
|
422
|
-
factor=3.0,
|
|
423
|
-
on_backoff=_print_backoff_message,
|
|
424
|
-
)
|
|
425
415
|
async def get_anthropic_chat_completion_async(
|
|
426
416
|
client: AsyncAnthropic,
|
|
427
417
|
messages: list[ChatMessage],
|
|
428
418
|
model_name: str,
|
|
419
|
+
*,
|
|
429
420
|
tools: list[ToolInfo] | None = None,
|
|
430
421
|
tool_choice: Literal["auto", "required"] | None = None,
|
|
431
422
|
max_new_tokens: int = 32,
|
|
@@ -435,6 +426,7 @@ async def get_anthropic_chat_completion_async(
|
|
|
435
426
|
top_logprobs: int | None = None,
|
|
436
427
|
timeout: float = 5.0,
|
|
437
428
|
response_format: ResponseFormat | None = None,
|
|
429
|
+
max_retries: int = 1,
|
|
438
430
|
) -> LLMOutput:
|
|
439
431
|
"""
|
|
440
432
|
Note from kevin 1/29/2025:
|
|
@@ -452,49 +444,57 @@ async def get_anthropic_chat_completion_async(
|
|
|
452
444
|
|
|
453
445
|
system, input_messages = parse_chat_messages(messages)
|
|
454
446
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
if reasoning_effort:
|
|
464
|
-
create_kwargs["thinking"] = {
|
|
465
|
-
"type": "enabled",
|
|
466
|
-
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
447
|
+
async def _call() -> LLMOutput:
|
|
448
|
+
try:
|
|
449
|
+
async with async_timeout_ctx(timeout):
|
|
450
|
+
create_kwargs: dict[str, Any] = {
|
|
451
|
+
"model": model_name,
|
|
452
|
+
"messages": input_messages,
|
|
453
|
+
"max_tokens": max_new_tokens,
|
|
454
|
+
"temperature": temperature,
|
|
467
455
|
}
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
456
|
+
if reasoning_effort:
|
|
457
|
+
create_kwargs["thinking"] = {
|
|
458
|
+
"type": "enabled",
|
|
459
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
460
|
+
}
|
|
461
|
+
if tools:
|
|
462
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
463
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
464
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
465
|
+
if system is not None:
|
|
466
|
+
create_kwargs["system"] = system
|
|
467
|
+
if response_format is not None:
|
|
468
|
+
output_format = _build_output_format(response_format)
|
|
469
|
+
extra_headers = dict(create_kwargs.get("extra_headers", {}))
|
|
470
|
+
extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
|
|
471
|
+
create_kwargs["extra_headers"] = extra_headers
|
|
472
|
+
extra_body = dict(create_kwargs.get("extra_body", {}))
|
|
473
|
+
extra_body["output_format"] = output_format
|
|
474
|
+
create_kwargs["extra_body"] = extra_body
|
|
475
|
+
|
|
476
|
+
raw_output = cast(Message, await client.messages.create(**create_kwargs))
|
|
477
|
+
|
|
478
|
+
output = parse_anthropic_completion(raw_output, model_name)
|
|
479
|
+
if output.first and output.first.finish_reason == "length" and output.first.no_text:
|
|
480
|
+
raise CompletionTooLongException(
|
|
481
|
+
"Completion empty due to truncation. Consider increasing max_new_tokens."
|
|
482
|
+
)
|
|
490
483
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
else:
|
|
484
|
+
return output
|
|
485
|
+
except (RateLimitError, BadRequestError) as e:
|
|
486
|
+
if e2 := _convert_anthropic_error(e):
|
|
487
|
+
raise e2 from e
|
|
496
488
|
raise
|
|
497
489
|
|
|
490
|
+
return await retry_async(
|
|
491
|
+
_call,
|
|
492
|
+
max_retries=max_retries,
|
|
493
|
+
is_retryable_error=_is_retryable_error,
|
|
494
|
+
factor=3.0,
|
|
495
|
+
on_backoff=_print_backoff_message,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
498
|
|
|
499
499
|
def get_anthropic_client_async(api_key: str | None = None) -> AsyncAnthropic:
|
|
500
500
|
return AsyncAnthropic(api_key=api_key) if api_key else AsyncAnthropic()
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
from contextlib import asynccontextmanager
|
|
4
|
-
from typing import Any, AsyncIterator, Literal, cast
|
|
4
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Literal, cast
|
|
5
|
+
|
|
6
|
+
import backoff
|
|
7
|
+
from backoff.types import Details
|
|
5
8
|
|
|
6
9
|
ReasoningEffort = Literal["minimal", "low", "medium", "high"]
|
|
7
10
|
|
|
@@ -43,3 +46,25 @@ def coerce_tool_args(args: Any) -> dict[str, Any]:
|
|
|
43
46
|
return {"__parse_error_raw_args": args}
|
|
44
47
|
# Fallback: unknown structure
|
|
45
48
|
return {"__parse_error_raw_args": str(args)}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def retry_async(
|
|
52
|
+
func: Callable[[], Awaitable[Any]],
|
|
53
|
+
*,
|
|
54
|
+
max_retries: int,
|
|
55
|
+
is_retryable_error: Callable[[BaseException], bool],
|
|
56
|
+
factor: float,
|
|
57
|
+
on_backoff: Callable[[Details], None] | None = None,
|
|
58
|
+
) -> Any:
|
|
59
|
+
if max_retries < 0:
|
|
60
|
+
raise ValueError("max_retries must be non-negative")
|
|
61
|
+
|
|
62
|
+
decorated = backoff.on_exception(
|
|
63
|
+
backoff.expo,
|
|
64
|
+
exception=(Exception,),
|
|
65
|
+
giveup=lambda e: not is_retryable_error(e),
|
|
66
|
+
max_tries=max_retries + 1,
|
|
67
|
+
factor=factor,
|
|
68
|
+
on_backoff=on_backoff,
|
|
69
|
+
)(func)
|
|
70
|
+
return await decorated()
|