docent-python 0.1.55a0__tar.gz → 0.1.57a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/.gitignore +3 -0
  2. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/PKG-INFO +2 -1
  3. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/__init__.py +2 -0
  4. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_svc.py +7 -0
  5. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/model_registry.py +11 -1
  6. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/anthropic.py +106 -105
  7. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/common.py +32 -3
  8. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/google.py +147 -141
  9. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/openai.py +83 -79
  10. docent_python-0.1.57a0/docent/_llm_util/providers/openrouter.py +160 -0
  11. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/preference_types.py +3 -1
  12. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/provider_registry.py +5 -2
  13. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/agent_run.py +16 -0
  14. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/citation.py +7 -0
  15. docent_python-0.1.57a0/docent/data_models/feedback.py +410 -0
  16. docent_python-0.1.57a0/docent/data_models/reading.py +331 -0
  17. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/runner.py +2 -2
  18. docent_python-0.1.57a0/docent/mcp/server.py +392 -0
  19. docent_python-0.1.57a0/docent/sdk/__init__.py +16 -0
  20. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/client.py +884 -52
  21. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/llm_context.py +234 -16
  22. docent_python-0.1.57a0/docent/sdk/reading.py +286 -0
  23. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/pyproject.toml +2 -1
  24. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/uv.lock +6 -4
  25. docent_python-0.1.55a0/docent/_llm_util/providers/openrouter.py +0 -428
  26. docent_python-0.1.55a0/docent/data_models/feedback.py +0 -458
  27. docent_python-0.1.55a0/docent/mcp/server.py +0 -138
  28. docent_python-0.1.55a0/docent/sdk/__init__.py +0 -5
  29. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/LICENSE.md +0 -0
  30. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/README.md +0 -0
  31. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/__init__.py +0 -0
  32. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/__init__.py +0 -0
  33. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  34. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  35. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_cache.py +0 -0
  36. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/__init__.py +0 -0
  37. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_log_util/__init__.py +0 -0
  38. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_log_util/logger.py +0 -0
  39. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/__init__.py +0 -0
  40. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/_tiktoken_util.py +0 -0
  41. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/__init__.py +0 -0
  42. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/content.py +0 -0
  43. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/message.py +0 -0
  44. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/response_format.py +0 -0
  45. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/tool.py +0 -0
  46. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/formatted_objects.py +0 -0
  47. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/judge.py +0 -0
  48. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/metadata_util.py +0 -0
  49. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/regex.py +0 -0
  50. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/transcript.py +0 -0
  51. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/util.py +0 -0
  52. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/__init__.py +0 -0
  53. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/analysis.py +0 -0
  54. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/impl.py +0 -0
  55. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/stats.py +0 -0
  56. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/types.py +0 -0
  57. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/forgiving_json.py +0 -0
  58. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.json +0 -0
  59. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.py +0 -0
  60. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/parse_output.py +0 -0
  61. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/template_formatter.py +0 -0
  62. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/voting.py +0 -0
  63. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/loaders/load_inspect.py +0 -0
  64. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/mcp/__init__.py +0 -0
  65. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/mcp/__main__.py +0 -0
  66. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/py.typed +0 -0
  67. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/__init__.py +0 -0
  68. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/load.py +0 -0
  69. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/log.eval +0 -0
  70. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/tb_airline.json +0 -0
  71. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/agent_run_writer.py +0 -0
  72. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/llm_request.py +0 -0
  73. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/trace.py +0 -0
  74. {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/trace_temp.py +0 -0
@@ -145,6 +145,9 @@ ENV/
145
145
  env.bak/
146
146
  venv.bak/
147
147
 
148
+ # Docent environment files
149
+ docent.env*
150
+
148
151
  # Spyder project settings
149
152
  .spyderproject
150
153
  .spyproject
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.55a0
3
+ Version: 0.1.57a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -30,6 +30,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1
30
30
  Requires-Dist: orjson>=3.11.6
31
31
  Requires-Dist: pandas>=2.3.3
32
32
  Requires-Dist: pydantic>=2.11.7
33
+ Requires-Dist: pyjwt>=2.12.0
33
34
  Requires-Dist: python-dotenv>=1.0.0
34
35
  Requires-Dist: pyyaml>=6.0.2
35
36
  Requires-Dist: tiktoken>=0.7.0
@@ -4,6 +4,7 @@ __all__ = [
4
4
  "load_config_file",
5
5
  "AgentRunRef",
6
6
  "TranscriptRef",
7
+ "ReadingResultRef",
7
8
  "ResultRef",
8
9
  "Prompt",
9
10
  ]
@@ -13,6 +14,7 @@ from docent.sdk.client import Docent, load_config_file
13
14
  from docent.sdk.llm_context import (
14
15
  AgentRunRef,
15
16
  Prompt,
17
+ ReadingResultRef,
16
18
  ResultRef,
17
19
  TranscriptRef,
18
20
  )
@@ -89,6 +89,7 @@ async def _parallelize_calls(
89
89
  top_logprobs: int | None,
90
90
  timeout: float,
91
91
  semaphore: Semaphore,
92
+ max_retries: int,
92
93
  # use_tqdm: bool,
93
94
  cache: LLMCache | None = None,
94
95
  response_format: ResponseFormat | None = None,
@@ -106,6 +107,7 @@ async def _parallelize_calls(
106
107
  top_logprobs=top_logprobs,
107
108
  timeout=timeout,
108
109
  response_format=response_format,
110
+ max_retries=max_retries,
109
111
  )
110
112
 
111
113
  responses: list[LLMOutput | None] = [None for _ in inputs]
@@ -357,10 +359,14 @@ class BaseLLMService:
357
359
  completion_callback: AsyncLLMOutputStreamingCallback | None = None,
358
360
  use_cache: bool = False,
359
361
  response_format: ResponseFormat | None = None,
362
+ max_retries: int = 1,
360
363
  _api_key_overrides: dict[str, str] = dict(),
361
364
  ) -> list[LLMOutput]:
362
365
  """Request completions from a configured LLM provider."""
363
366
 
367
+ if max_retries < 0:
368
+ raise ValueError("max_retries must be non-negative")
369
+
364
370
  # We don't support logprobs for Anthropic yet
365
371
  if logprobs:
366
372
  for model_option in model_options:
@@ -429,6 +435,7 @@ class BaseLLMService:
429
435
  top_logprobs=top_logprobs,
430
436
  timeout=timeout,
431
437
  semaphore=self._semaphore,
438
+ max_retries=max_retries,
432
439
  cache=cache,
433
440
  response_format=response_format,
434
441
  )
@@ -62,6 +62,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
62
62
  "claude-sonnet-4-5",
63
63
  ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
64
64
  ),
65
+ (
66
+ "claude-sonnet-4-6",
67
+ ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
68
+ ),
69
+ (
70
+ "claude-opus-4-6",
71
+ ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
72
+ ),
65
73
  (
66
74
  "claude-haiku-4-5",
67
75
  ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
@@ -140,7 +148,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
140
148
 
141
149
  @lru_cache(maxsize=None)
142
150
  def get_model_info(model_name: str) -> Optional[ModelInfo]:
143
- for registry_model_name, info in _REGISTRY:
151
+ for registry_model_name, info in sorted(
152
+ _REGISTRY, key=lambda entry: len(entry[0]), reverse=True
153
+ ):
144
154
  if registry_model_name in model_name:
145
155
  return info
146
156
  return None
@@ -1,7 +1,5 @@
1
1
  from typing import Any, Literal, cast
2
2
 
3
- import backoff
4
-
5
3
  # all errors: https://docs.anthropic.com/en/api/errors
6
4
  from anthropic import (
7
5
  AsyncAnthropic,
@@ -57,8 +55,10 @@ from docent._llm_util.data_models.llm_output import (
57
55
  finalize_llm_output_partial,
58
56
  )
59
57
  from docent._llm_util.providers.common import (
58
+ ReasoningEffort,
60
59
  async_timeout_ctx,
61
60
  reasoning_budget,
61
+ retry_async,
62
62
  )
63
63
  from docent._log_util import get_logger
64
64
  from docent.data_models.chat import (
@@ -217,28 +217,22 @@ def _convert_anthropic_error(e: Exception):
217
217
  return None
218
218
 
219
219
 
220
- @backoff.on_exception(
221
- backoff.expo,
222
- exception=(Exception),
223
- giveup=lambda e: not _is_retryable_error(e),
224
- max_tries=5,
225
- factor=3.0,
226
- on_backoff=_print_backoff_message,
227
- )
228
220
  async def get_anthropic_chat_completion_streaming_async(
229
221
  client: AsyncAnthropic,
230
222
  streaming_callback: AsyncSingleLLMOutputStreamingCallback | None,
231
223
  messages: list[ChatMessage],
232
224
  model_name: str,
225
+ *,
233
226
  tools: list[ToolInfo] | None = None,
234
227
  tool_choice: Literal["auto", "required"] | None = None,
235
228
  max_new_tokens: int = 32,
236
229
  temperature: float = 1.0,
237
- reasoning_effort: Literal["low", "medium", "high"] | None = None,
230
+ reasoning_effort: ReasoningEffort | None = None,
238
231
  logprobs: bool = False,
239
232
  top_logprobs: int | None = None,
240
233
  timeout: float = 5.0,
241
234
  response_format: ResponseFormat | None = None,
235
+ max_retries: int = 1,
242
236
  ):
243
237
  if logprobs or top_logprobs is not None:
244
238
  raise NotImplementedError(
@@ -247,58 +241,63 @@ async def get_anthropic_chat_completion_streaming_async(
247
241
 
248
242
  system, input_messages = parse_chat_messages(messages)
249
243
 
250
- try:
251
- async with async_timeout_ctx(timeout):
252
- create_kwargs: dict[str, Any] = {
253
- "model": model_name,
254
- "messages": input_messages,
255
- "max_tokens": max_new_tokens,
256
- "temperature": temperature,
257
- "stream": True,
258
- }
259
- if reasoning_effort:
260
- create_kwargs["thinking"] = {
261
- "type": "enabled",
262
- "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
244
+ async def _call() -> LLMOutput:
245
+ try:
246
+ async with async_timeout_ctx(timeout):
247
+ create_kwargs: dict[str, Any] = {
248
+ "model": model_name,
249
+ "messages": input_messages,
250
+ "max_tokens": max_new_tokens,
251
+ "temperature": temperature,
252
+ "stream": True,
263
253
  }
264
- if tools:
265
- create_kwargs["tools"] = parse_tools(tools)
266
- if tool_choice_param := _parse_tool_choice(tool_choice):
267
- create_kwargs["tool_choice"] = tool_choice_param
268
- if system is not None:
269
- create_kwargs["system"] = system
270
- if response_format is not None:
271
- output_format = _build_output_format(response_format)
272
- extra_headers = dict(create_kwargs.get("extra_headers", {}))
273
- extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
274
- create_kwargs["extra_headers"] = extra_headers
275
- extra_body = dict(create_kwargs.get("extra_body", {}))
276
- extra_body["output_format"] = output_format
277
- create_kwargs["extra_body"] = extra_body
278
-
279
- stream = cast(
280
- AsyncStream[RawMessageStreamEvent],
281
- await client.messages.create(**create_kwargs),
282
- )
254
+ if reasoning_effort:
255
+ create_kwargs["thinking"] = {
256
+ "type": "enabled",
257
+ "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
258
+ }
259
+ if tools:
260
+ create_kwargs["tools"] = parse_tools(tools)
261
+ if tool_choice_param := _parse_tool_choice(tool_choice):
262
+ create_kwargs["tool_choice"] = tool_choice_param
263
+ if system is not None:
264
+ create_kwargs["system"] = system
265
+ if response_format is not None:
266
+ output_format = _build_output_format(response_format)
267
+ extra_headers = dict(create_kwargs.get("extra_headers", {}))
268
+ extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
269
+ create_kwargs["extra_headers"] = extra_headers
270
+ extra_body = dict(create_kwargs.get("extra_body", {}))
271
+ extra_body["output_format"] = output_format
272
+ create_kwargs["extra_body"] = extra_body
273
+
274
+ stream = cast(
275
+ AsyncStream[RawMessageStreamEvent],
276
+ await client.messages.create(**create_kwargs),
277
+ )
283
278
 
284
- llm_output_partial = None
285
- async for chunk in stream:
286
- llm_output_partial = update_llm_output(llm_output_partial, chunk)
287
- if streaming_callback:
288
- await streaming_callback(finalize_llm_output_partial(llm_output_partial))
279
+ llm_output_partial = None
280
+ async for chunk in stream:
281
+ llm_output_partial = update_llm_output(llm_output_partial, chunk)
282
+ if streaming_callback:
283
+ await streaming_callback(finalize_llm_output_partial(llm_output_partial))
289
284
 
290
- # Fully parse the partial output
291
- if llm_output_partial:
292
- return finalize_llm_output_partial(llm_output_partial)
293
- else:
294
- # Streaming did not produce anything
285
+ if llm_output_partial:
286
+ return finalize_llm_output_partial(llm_output_partial)
295
287
  return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
296
- except (RateLimitError, BadRequestError) as e:
297
- if e2 := _convert_anthropic_error(e):
298
- raise e2 from e
299
- else:
288
+ except (RateLimitError, BadRequestError) as e:
289
+ if e2 := _convert_anthropic_error(e):
290
+ raise e2 from e
300
291
  raise
301
292
 
293
+ return await retry_async(
294
+ _call,
295
+ max_retries=max_retries,
296
+ is_retryable_error=_is_retryable_error,
297
+ factor=3.0,
298
+ on_backoff=_print_backoff_message,
299
+ )
300
+
302
301
 
303
302
  FINISH_REASON_MAP: dict[str, FinishReasonType] = {
304
303
  "end_turn": "stop",
@@ -413,27 +412,21 @@ def update_llm_output(
413
412
  )
414
413
 
415
414
 
416
- @backoff.on_exception(
417
- backoff.expo,
418
- exception=(Exception),
419
- giveup=lambda e: not _is_retryable_error(e),
420
- max_tries=5,
421
- factor=3.0,
422
- on_backoff=_print_backoff_message,
423
- )
424
415
  async def get_anthropic_chat_completion_async(
425
416
  client: AsyncAnthropic,
426
417
  messages: list[ChatMessage],
427
418
  model_name: str,
419
+ *,
428
420
  tools: list[ToolInfo] | None = None,
429
421
  tool_choice: Literal["auto", "required"] | None = None,
430
422
  max_new_tokens: int = 32,
431
423
  temperature: float = 1.0,
432
- reasoning_effort: Literal["low", "medium", "high"] | None = None,
424
+ reasoning_effort: ReasoningEffort | None = None,
433
425
  logprobs: bool = False,
434
426
  top_logprobs: int | None = None,
435
427
  timeout: float = 5.0,
436
428
  response_format: ResponseFormat | None = None,
429
+ max_retries: int = 1,
437
430
  ) -> LLMOutput:
438
431
  """
439
432
  Note from kevin 1/29/2025:
@@ -451,49 +444,57 @@ async def get_anthropic_chat_completion_async(
451
444
 
452
445
  system, input_messages = parse_chat_messages(messages)
453
446
 
454
- try:
455
- async with async_timeout_ctx(timeout):
456
- create_kwargs: dict[str, Any] = {
457
- "model": model_name,
458
- "messages": input_messages,
459
- "max_tokens": max_new_tokens,
460
- "temperature": temperature,
461
- }
462
- if reasoning_effort:
463
- create_kwargs["thinking"] = {
464
- "type": "enabled",
465
- "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
447
+ async def _call() -> LLMOutput:
448
+ try:
449
+ async with async_timeout_ctx(timeout):
450
+ create_kwargs: dict[str, Any] = {
451
+ "model": model_name,
452
+ "messages": input_messages,
453
+ "max_tokens": max_new_tokens,
454
+ "temperature": temperature,
466
455
  }
467
- if tools:
468
- create_kwargs["tools"] = parse_tools(tools)
469
- if tool_choice_param := _parse_tool_choice(tool_choice):
470
- create_kwargs["tool_choice"] = tool_choice_param
471
- if system is not None:
472
- create_kwargs["system"] = system
473
- if response_format is not None:
474
- output_format = _build_output_format(response_format)
475
- extra_headers = dict(create_kwargs.get("extra_headers", {}))
476
- extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
477
- create_kwargs["extra_headers"] = extra_headers
478
- extra_body = dict(create_kwargs.get("extra_body", {}))
479
- extra_body["output_format"] = output_format
480
- create_kwargs["extra_body"] = extra_body
481
-
482
- raw_output = cast(Message, await client.messages.create(**create_kwargs))
483
-
484
- output = parse_anthropic_completion(raw_output, model_name)
485
- if output.first and output.first.finish_reason == "length" and output.first.no_text:
486
- raise CompletionTooLongException(
487
- "Completion empty due to truncation. Consider increasing max_new_tokens."
488
- )
456
+ if reasoning_effort:
457
+ create_kwargs["thinking"] = {
458
+ "type": "enabled",
459
+ "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
460
+ }
461
+ if tools:
462
+ create_kwargs["tools"] = parse_tools(tools)
463
+ if tool_choice_param := _parse_tool_choice(tool_choice):
464
+ create_kwargs["tool_choice"] = tool_choice_param
465
+ if system is not None:
466
+ create_kwargs["system"] = system
467
+ if response_format is not None:
468
+ output_format = _build_output_format(response_format)
469
+ extra_headers = dict(create_kwargs.get("extra_headers", {}))
470
+ extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
471
+ create_kwargs["extra_headers"] = extra_headers
472
+ extra_body = dict(create_kwargs.get("extra_body", {}))
473
+ extra_body["output_format"] = output_format
474
+ create_kwargs["extra_body"] = extra_body
475
+
476
+ raw_output = cast(Message, await client.messages.create(**create_kwargs))
477
+
478
+ output = parse_anthropic_completion(raw_output, model_name)
479
+ if output.first and output.first.finish_reason == "length" and output.first.no_text:
480
+ raise CompletionTooLongException(
481
+ "Completion empty due to truncation. Consider increasing max_new_tokens."
482
+ )
489
483
 
490
- return output
491
- except (RateLimitError, BadRequestError) as e:
492
- if e2 := _convert_anthropic_error(e):
493
- raise e2 from e
494
- else:
484
+ return output
485
+ except (RateLimitError, BadRequestError) as e:
486
+ if e2 := _convert_anthropic_error(e):
487
+ raise e2 from e
495
488
  raise
496
489
 
490
+ return await retry_async(
491
+ _call,
492
+ max_retries=max_retries,
493
+ is_retryable_error=_is_retryable_error,
494
+ factor=3.0,
495
+ on_backoff=_print_backoff_message,
496
+ )
497
+
497
498
 
498
499
  def get_anthropic_client_async(api_key: str | None = None) -> AsyncAnthropic:
499
500
  return AsyncAnthropic(api_key=api_key) if api_key else AsyncAnthropic()
@@ -1,7 +1,12 @@
1
1
  import asyncio
2
2
  import json
3
3
  from contextlib import asynccontextmanager
4
- from typing import Any, AsyncIterator, Literal, cast
4
+ from typing import Any, AsyncIterator, Awaitable, Callable, Literal, cast
5
+
6
+ import backoff
7
+ from backoff.types import Details
8
+
9
+ ReasoningEffort = Literal["minimal", "low", "medium", "high"]
5
10
 
6
11
 
7
12
  @asynccontextmanager
@@ -14,13 +19,15 @@ async def async_timeout_ctx(timeout: float | None) -> AsyncIterator[None]:
14
19
  yield
15
20
 
16
21
 
17
- def reasoning_budget(max_new_tokens: int, effort: Literal["low", "medium", "high"]) -> int:
22
+ def reasoning_budget(max_new_tokens: int, effort: ReasoningEffort) -> int:
18
23
  if effort == "high":
19
24
  ratio = 0.75
20
25
  elif effort == "medium":
21
26
  ratio = 0.5
22
- else:
27
+ elif effort == "low":
23
28
  ratio = 0.25
29
+ else:
30
+ ratio = 0.1
24
31
  return int(max_new_tokens * ratio)
25
32
 
26
33
 
@@ -39,3 +46,25 @@ def coerce_tool_args(args: Any) -> dict[str, Any]:
39
46
  return {"__parse_error_raw_args": args}
40
47
  # Fallback: unknown structure
41
48
  return {"__parse_error_raw_args": str(args)}
49
+
50
+
51
+ async def retry_async(
52
+ func: Callable[[], Awaitable[Any]],
53
+ *,
54
+ max_retries: int,
55
+ is_retryable_error: Callable[[BaseException], bool],
56
+ factor: float,
57
+ on_backoff: Callable[[Details], None] | None = None,
58
+ ) -> Any:
59
+ if max_retries < 0:
60
+ raise ValueError("max_retries must be non-negative")
61
+
62
+ decorated = backoff.on_exception(
63
+ backoff.expo,
64
+ exception=(Exception,),
65
+ giveup=lambda e: not is_retryable_error(e),
66
+ max_tries=max_retries + 1,
67
+ factor=factor,
68
+ on_backoff=on_backoff,
69
+ )(func)
70
+ return await decorated()