docent-python 0.1.55a0__tar.gz → 0.1.57a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/.gitignore +3 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/PKG-INFO +2 -1
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/__init__.py +2 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_svc.py +7 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/model_registry.py +11 -1
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/anthropic.py +106 -105
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/common.py +32 -3
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/google.py +147 -141
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/openai.py +83 -79
- docent_python-0.1.57a0/docent/_llm_util/providers/openrouter.py +160 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/preference_types.py +3 -1
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/provider_registry.py +5 -2
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/agent_run.py +16 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/citation.py +7 -0
- docent_python-0.1.57a0/docent/data_models/feedback.py +410 -0
- docent_python-0.1.57a0/docent/data_models/reading.py +331 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/runner.py +2 -2
- docent_python-0.1.57a0/docent/mcp/server.py +392 -0
- docent_python-0.1.57a0/docent/sdk/__init__.py +16 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/client.py +884 -52
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/llm_context.py +234 -16
- docent_python-0.1.57a0/docent/sdk/reading.py +286 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/pyproject.toml +2 -1
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/uv.lock +6 -4
- docent_python-0.1.55a0/docent/_llm_util/providers/openrouter.py +0 -428
- docent_python-0.1.55a0/docent/data_models/feedback.py +0 -458
- docent_python-0.1.55a0/docent/mcp/server.py +0 -138
- docent_python-0.1.55a0/docent/sdk/__init__.py +0 -5
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/LICENSE.md +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/README.md +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/types.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/py.typed +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/trace.py +0 -0
- {docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/trace_temp.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docent-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.57a0
|
|
4
4
|
Summary: Docent SDK
|
|
5
5
|
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
|
6
6
|
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
|
@@ -30,6 +30,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1
|
|
|
30
30
|
Requires-Dist: orjson>=3.11.6
|
|
31
31
|
Requires-Dist: pandas>=2.3.3
|
|
32
32
|
Requires-Dist: pydantic>=2.11.7
|
|
33
|
+
Requires-Dist: pyjwt>=2.12.0
|
|
33
34
|
Requires-Dist: python-dotenv>=1.0.0
|
|
34
35
|
Requires-Dist: pyyaml>=6.0.2
|
|
35
36
|
Requires-Dist: tiktoken>=0.7.0
|
|
@@ -4,6 +4,7 @@ __all__ = [
|
|
|
4
4
|
"load_config_file",
|
|
5
5
|
"AgentRunRef",
|
|
6
6
|
"TranscriptRef",
|
|
7
|
+
"ReadingResultRef",
|
|
7
8
|
"ResultRef",
|
|
8
9
|
"Prompt",
|
|
9
10
|
]
|
|
@@ -13,6 +14,7 @@ from docent.sdk.client import Docent, load_config_file
|
|
|
13
14
|
from docent.sdk.llm_context import (
|
|
14
15
|
AgentRunRef,
|
|
15
16
|
Prompt,
|
|
17
|
+
ReadingResultRef,
|
|
16
18
|
ResultRef,
|
|
17
19
|
TranscriptRef,
|
|
18
20
|
)
|
|
@@ -89,6 +89,7 @@ async def _parallelize_calls(
|
|
|
89
89
|
top_logprobs: int | None,
|
|
90
90
|
timeout: float,
|
|
91
91
|
semaphore: Semaphore,
|
|
92
|
+
max_retries: int,
|
|
92
93
|
# use_tqdm: bool,
|
|
93
94
|
cache: LLMCache | None = None,
|
|
94
95
|
response_format: ResponseFormat | None = None,
|
|
@@ -106,6 +107,7 @@ async def _parallelize_calls(
|
|
|
106
107
|
top_logprobs=top_logprobs,
|
|
107
108
|
timeout=timeout,
|
|
108
109
|
response_format=response_format,
|
|
110
|
+
max_retries=max_retries,
|
|
109
111
|
)
|
|
110
112
|
|
|
111
113
|
responses: list[LLMOutput | None] = [None for _ in inputs]
|
|
@@ -357,10 +359,14 @@ class BaseLLMService:
|
|
|
357
359
|
completion_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
358
360
|
use_cache: bool = False,
|
|
359
361
|
response_format: ResponseFormat | None = None,
|
|
362
|
+
max_retries: int = 1,
|
|
360
363
|
_api_key_overrides: dict[str, str] = dict(),
|
|
361
364
|
) -> list[LLMOutput]:
|
|
362
365
|
"""Request completions from a configured LLM provider."""
|
|
363
366
|
|
|
367
|
+
if max_retries < 0:
|
|
368
|
+
raise ValueError("max_retries must be non-negative")
|
|
369
|
+
|
|
364
370
|
# We don't support logprobs for Anthropic yet
|
|
365
371
|
if logprobs:
|
|
366
372
|
for model_option in model_options:
|
|
@@ -429,6 +435,7 @@ class BaseLLMService:
|
|
|
429
435
|
top_logprobs=top_logprobs,
|
|
430
436
|
timeout=timeout,
|
|
431
437
|
semaphore=self._semaphore,
|
|
438
|
+
max_retries=max_retries,
|
|
432
439
|
cache=cache,
|
|
433
440
|
response_format=response_format,
|
|
434
441
|
)
|
|
@@ -62,6 +62,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
62
62
|
"claude-sonnet-4-5",
|
|
63
63
|
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
|
|
64
64
|
),
|
|
65
|
+
(
|
|
66
|
+
"claude-sonnet-4-6",
|
|
67
|
+
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
|
|
68
|
+
),
|
|
69
|
+
(
|
|
70
|
+
"claude-opus-4-6",
|
|
71
|
+
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
72
|
+
),
|
|
65
73
|
(
|
|
66
74
|
"claude-haiku-4-5",
|
|
67
75
|
ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
|
|
@@ -140,7 +148,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
140
148
|
|
|
141
149
|
@lru_cache(maxsize=None)
|
|
142
150
|
def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
143
|
-
for registry_model_name, info in
|
|
151
|
+
for registry_model_name, info in sorted(
|
|
152
|
+
_REGISTRY, key=lambda entry: len(entry[0]), reverse=True
|
|
153
|
+
):
|
|
144
154
|
if registry_model_name in model_name:
|
|
145
155
|
return info
|
|
146
156
|
return None
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from typing import Any, Literal, cast
|
|
2
2
|
|
|
3
|
-
import backoff
|
|
4
|
-
|
|
5
3
|
# all errors: https://docs.anthropic.com/en/api/errors
|
|
6
4
|
from anthropic import (
|
|
7
5
|
AsyncAnthropic,
|
|
@@ -57,8 +55,10 @@ from docent._llm_util.data_models.llm_output import (
|
|
|
57
55
|
finalize_llm_output_partial,
|
|
58
56
|
)
|
|
59
57
|
from docent._llm_util.providers.common import (
|
|
58
|
+
ReasoningEffort,
|
|
60
59
|
async_timeout_ctx,
|
|
61
60
|
reasoning_budget,
|
|
61
|
+
retry_async,
|
|
62
62
|
)
|
|
63
63
|
from docent._log_util import get_logger
|
|
64
64
|
from docent.data_models.chat import (
|
|
@@ -217,28 +217,22 @@ def _convert_anthropic_error(e: Exception):
|
|
|
217
217
|
return None
|
|
218
218
|
|
|
219
219
|
|
|
220
|
-
@backoff.on_exception(
|
|
221
|
-
backoff.expo,
|
|
222
|
-
exception=(Exception),
|
|
223
|
-
giveup=lambda e: not _is_retryable_error(e),
|
|
224
|
-
max_tries=5,
|
|
225
|
-
factor=3.0,
|
|
226
|
-
on_backoff=_print_backoff_message,
|
|
227
|
-
)
|
|
228
220
|
async def get_anthropic_chat_completion_streaming_async(
|
|
229
221
|
client: AsyncAnthropic,
|
|
230
222
|
streaming_callback: AsyncSingleLLMOutputStreamingCallback | None,
|
|
231
223
|
messages: list[ChatMessage],
|
|
232
224
|
model_name: str,
|
|
225
|
+
*,
|
|
233
226
|
tools: list[ToolInfo] | None = None,
|
|
234
227
|
tool_choice: Literal["auto", "required"] | None = None,
|
|
235
228
|
max_new_tokens: int = 32,
|
|
236
229
|
temperature: float = 1.0,
|
|
237
|
-
reasoning_effort:
|
|
230
|
+
reasoning_effort: ReasoningEffort | None = None,
|
|
238
231
|
logprobs: bool = False,
|
|
239
232
|
top_logprobs: int | None = None,
|
|
240
233
|
timeout: float = 5.0,
|
|
241
234
|
response_format: ResponseFormat | None = None,
|
|
235
|
+
max_retries: int = 1,
|
|
242
236
|
):
|
|
243
237
|
if logprobs or top_logprobs is not None:
|
|
244
238
|
raise NotImplementedError(
|
|
@@ -247,58 +241,63 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
247
241
|
|
|
248
242
|
system, input_messages = parse_chat_messages(messages)
|
|
249
243
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
if reasoning_effort:
|
|
260
|
-
create_kwargs["thinking"] = {
|
|
261
|
-
"type": "enabled",
|
|
262
|
-
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
244
|
+
async def _call() -> LLMOutput:
|
|
245
|
+
try:
|
|
246
|
+
async with async_timeout_ctx(timeout):
|
|
247
|
+
create_kwargs: dict[str, Any] = {
|
|
248
|
+
"model": model_name,
|
|
249
|
+
"messages": input_messages,
|
|
250
|
+
"max_tokens": max_new_tokens,
|
|
251
|
+
"temperature": temperature,
|
|
252
|
+
"stream": True,
|
|
263
253
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
254
|
+
if reasoning_effort:
|
|
255
|
+
create_kwargs["thinking"] = {
|
|
256
|
+
"type": "enabled",
|
|
257
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
258
|
+
}
|
|
259
|
+
if tools:
|
|
260
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
261
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
262
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
263
|
+
if system is not None:
|
|
264
|
+
create_kwargs["system"] = system
|
|
265
|
+
if response_format is not None:
|
|
266
|
+
output_format = _build_output_format(response_format)
|
|
267
|
+
extra_headers = dict(create_kwargs.get("extra_headers", {}))
|
|
268
|
+
extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
|
|
269
|
+
create_kwargs["extra_headers"] = extra_headers
|
|
270
|
+
extra_body = dict(create_kwargs.get("extra_body", {}))
|
|
271
|
+
extra_body["output_format"] = output_format
|
|
272
|
+
create_kwargs["extra_body"] = extra_body
|
|
273
|
+
|
|
274
|
+
stream = cast(
|
|
275
|
+
AsyncStream[RawMessageStreamEvent],
|
|
276
|
+
await client.messages.create(**create_kwargs),
|
|
277
|
+
)
|
|
283
278
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
279
|
+
llm_output_partial = None
|
|
280
|
+
async for chunk in stream:
|
|
281
|
+
llm_output_partial = update_llm_output(llm_output_partial, chunk)
|
|
282
|
+
if streaming_callback:
|
|
283
|
+
await streaming_callback(finalize_llm_output_partial(llm_output_partial))
|
|
289
284
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
return finalize_llm_output_partial(llm_output_partial)
|
|
293
|
-
else:
|
|
294
|
-
# Streaming did not produce anything
|
|
285
|
+
if llm_output_partial:
|
|
286
|
+
return finalize_llm_output_partial(llm_output_partial)
|
|
295
287
|
return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
else:
|
|
288
|
+
except (RateLimitError, BadRequestError) as e:
|
|
289
|
+
if e2 := _convert_anthropic_error(e):
|
|
290
|
+
raise e2 from e
|
|
300
291
|
raise
|
|
301
292
|
|
|
293
|
+
return await retry_async(
|
|
294
|
+
_call,
|
|
295
|
+
max_retries=max_retries,
|
|
296
|
+
is_retryable_error=_is_retryable_error,
|
|
297
|
+
factor=3.0,
|
|
298
|
+
on_backoff=_print_backoff_message,
|
|
299
|
+
)
|
|
300
|
+
|
|
302
301
|
|
|
303
302
|
FINISH_REASON_MAP: dict[str, FinishReasonType] = {
|
|
304
303
|
"end_turn": "stop",
|
|
@@ -413,27 +412,21 @@ def update_llm_output(
|
|
|
413
412
|
)
|
|
414
413
|
|
|
415
414
|
|
|
416
|
-
@backoff.on_exception(
|
|
417
|
-
backoff.expo,
|
|
418
|
-
exception=(Exception),
|
|
419
|
-
giveup=lambda e: not _is_retryable_error(e),
|
|
420
|
-
max_tries=5,
|
|
421
|
-
factor=3.0,
|
|
422
|
-
on_backoff=_print_backoff_message,
|
|
423
|
-
)
|
|
424
415
|
async def get_anthropic_chat_completion_async(
|
|
425
416
|
client: AsyncAnthropic,
|
|
426
417
|
messages: list[ChatMessage],
|
|
427
418
|
model_name: str,
|
|
419
|
+
*,
|
|
428
420
|
tools: list[ToolInfo] | None = None,
|
|
429
421
|
tool_choice: Literal["auto", "required"] | None = None,
|
|
430
422
|
max_new_tokens: int = 32,
|
|
431
423
|
temperature: float = 1.0,
|
|
432
|
-
reasoning_effort:
|
|
424
|
+
reasoning_effort: ReasoningEffort | None = None,
|
|
433
425
|
logprobs: bool = False,
|
|
434
426
|
top_logprobs: int | None = None,
|
|
435
427
|
timeout: float = 5.0,
|
|
436
428
|
response_format: ResponseFormat | None = None,
|
|
429
|
+
max_retries: int = 1,
|
|
437
430
|
) -> LLMOutput:
|
|
438
431
|
"""
|
|
439
432
|
Note from kevin 1/29/2025:
|
|
@@ -451,49 +444,57 @@ async def get_anthropic_chat_completion_async(
|
|
|
451
444
|
|
|
452
445
|
system, input_messages = parse_chat_messages(messages)
|
|
453
446
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
if reasoning_effort:
|
|
463
|
-
create_kwargs["thinking"] = {
|
|
464
|
-
"type": "enabled",
|
|
465
|
-
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
447
|
+
async def _call() -> LLMOutput:
|
|
448
|
+
try:
|
|
449
|
+
async with async_timeout_ctx(timeout):
|
|
450
|
+
create_kwargs: dict[str, Any] = {
|
|
451
|
+
"model": model_name,
|
|
452
|
+
"messages": input_messages,
|
|
453
|
+
"max_tokens": max_new_tokens,
|
|
454
|
+
"temperature": temperature,
|
|
466
455
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
456
|
+
if reasoning_effort:
|
|
457
|
+
create_kwargs["thinking"] = {
|
|
458
|
+
"type": "enabled",
|
|
459
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
460
|
+
}
|
|
461
|
+
if tools:
|
|
462
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
463
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
464
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
465
|
+
if system is not None:
|
|
466
|
+
create_kwargs["system"] = system
|
|
467
|
+
if response_format is not None:
|
|
468
|
+
output_format = _build_output_format(response_format)
|
|
469
|
+
extra_headers = dict(create_kwargs.get("extra_headers", {}))
|
|
470
|
+
extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
|
|
471
|
+
create_kwargs["extra_headers"] = extra_headers
|
|
472
|
+
extra_body = dict(create_kwargs.get("extra_body", {}))
|
|
473
|
+
extra_body["output_format"] = output_format
|
|
474
|
+
create_kwargs["extra_body"] = extra_body
|
|
475
|
+
|
|
476
|
+
raw_output = cast(Message, await client.messages.create(**create_kwargs))
|
|
477
|
+
|
|
478
|
+
output = parse_anthropic_completion(raw_output, model_name)
|
|
479
|
+
if output.first and output.first.finish_reason == "length" and output.first.no_text:
|
|
480
|
+
raise CompletionTooLongException(
|
|
481
|
+
"Completion empty due to truncation. Consider increasing max_new_tokens."
|
|
482
|
+
)
|
|
489
483
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
else:
|
|
484
|
+
return output
|
|
485
|
+
except (RateLimitError, BadRequestError) as e:
|
|
486
|
+
if e2 := _convert_anthropic_error(e):
|
|
487
|
+
raise e2 from e
|
|
495
488
|
raise
|
|
496
489
|
|
|
490
|
+
return await retry_async(
|
|
491
|
+
_call,
|
|
492
|
+
max_retries=max_retries,
|
|
493
|
+
is_retryable_error=_is_retryable_error,
|
|
494
|
+
factor=3.0,
|
|
495
|
+
on_backoff=_print_backoff_message,
|
|
496
|
+
)
|
|
497
|
+
|
|
497
498
|
|
|
498
499
|
def get_anthropic_client_async(api_key: str | None = None) -> AsyncAnthropic:
|
|
499
500
|
return AsyncAnthropic(api_key=api_key) if api_key else AsyncAnthropic()
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
from contextlib import asynccontextmanager
|
|
4
|
-
from typing import Any, AsyncIterator, Literal, cast
|
|
4
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Literal, cast
|
|
5
|
+
|
|
6
|
+
import backoff
|
|
7
|
+
from backoff.types import Details
|
|
8
|
+
|
|
9
|
+
ReasoningEffort = Literal["minimal", "low", "medium", "high"]
|
|
5
10
|
|
|
6
11
|
|
|
7
12
|
@asynccontextmanager
|
|
@@ -14,13 +19,15 @@ async def async_timeout_ctx(timeout: float | None) -> AsyncIterator[None]:
|
|
|
14
19
|
yield
|
|
15
20
|
|
|
16
21
|
|
|
17
|
-
def reasoning_budget(max_new_tokens: int, effort:
|
|
22
|
+
def reasoning_budget(max_new_tokens: int, effort: ReasoningEffort) -> int:
|
|
18
23
|
if effort == "high":
|
|
19
24
|
ratio = 0.75
|
|
20
25
|
elif effort == "medium":
|
|
21
26
|
ratio = 0.5
|
|
22
|
-
|
|
27
|
+
elif effort == "low":
|
|
23
28
|
ratio = 0.25
|
|
29
|
+
else:
|
|
30
|
+
ratio = 0.1
|
|
24
31
|
return int(max_new_tokens * ratio)
|
|
25
32
|
|
|
26
33
|
|
|
@@ -39,3 +46,25 @@ def coerce_tool_args(args: Any) -> dict[str, Any]:
|
|
|
39
46
|
return {"__parse_error_raw_args": args}
|
|
40
47
|
# Fallback: unknown structure
|
|
41
48
|
return {"__parse_error_raw_args": str(args)}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def retry_async(
|
|
52
|
+
func: Callable[[], Awaitable[Any]],
|
|
53
|
+
*,
|
|
54
|
+
max_retries: int,
|
|
55
|
+
is_retryable_error: Callable[[BaseException], bool],
|
|
56
|
+
factor: float,
|
|
57
|
+
on_backoff: Callable[[Details], None] | None = None,
|
|
58
|
+
) -> Any:
|
|
59
|
+
if max_retries < 0:
|
|
60
|
+
raise ValueError("max_retries must be non-negative")
|
|
61
|
+
|
|
62
|
+
decorated = backoff.on_exception(
|
|
63
|
+
backoff.expo,
|
|
64
|
+
exception=(Exception,),
|
|
65
|
+
giveup=lambda e: not is_retryable_error(e),
|
|
66
|
+
max_tries=max_retries + 1,
|
|
67
|
+
factor=factor,
|
|
68
|
+
on_backoff=on_backoff,
|
|
69
|
+
)(func)
|
|
70
|
+
return await decorated()
|