docent-python 0.1.44a0__tar.gz → 0.1.46a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/PKG-INFO +1 -1
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/data_models/llm_output.py +1 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/llm_cache.py +16 -3
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/llm_svc.py +7 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/anthropic.py +53 -37
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/google.py +11 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/openai.py +49 -10
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/openrouter.py +46 -2
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/provider_registry.py +5 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/chat/__init__.py +2 -0
- docent_python-0.1.46a0/docent/data_models/chat/response_format.py +47 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/__init__.py +4 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/impl.py +165 -118
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/runner.py +9 -1
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/types.py +122 -66
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/util/meta_schema.json +5 -0
- docent_python-0.1.46a0/docent/judges/util/template_formatter.py +166 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/mcp/server.py +5 -5
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/sdk/client.py +16 -2
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/sdk/llm_context.py +1 -1
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/pyproject.toml +2 -2
- docent_python-0.1.46a0/uv.lock +3277 -0
- docent_python-0.1.44a0/uv.lock +0 -2541
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/.gitignore +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/LICENSE.md +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/README.md +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/model_registry.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/preference_types.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/agent_run.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/py.typed +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/trace.py +0 -0
- {docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/trace_temp.py +0 -0
{docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/data_models/llm_output.py
RENAMED
|
@@ -62,6 +62,7 @@ class LLMCompletion(BaseModel):
|
|
|
62
62
|
tool_calls: List of tool calls made during the completion.
|
|
63
63
|
finish_reason: Reason why the completion finished.
|
|
64
64
|
top_logprobs: Probability distribution for top token choices.
|
|
65
|
+
reasoning_tokens: Extended thinking tokens (for reasoning models).
|
|
65
66
|
"""
|
|
66
67
|
|
|
67
68
|
text: str | None = None
|
|
@@ -9,6 +9,7 @@ from typing import Literal
|
|
|
9
9
|
from docent._llm_util.data_models.llm_output import LLMOutput
|
|
10
10
|
from docent._log_util import get_logger
|
|
11
11
|
from docent.data_models.chat import ChatMessage, ToolInfo
|
|
12
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
14
15
|
|
|
@@ -59,6 +60,7 @@ class LLMCache:
|
|
|
59
60
|
temperature: float = 1.0,
|
|
60
61
|
logprobs: bool = False,
|
|
61
62
|
top_logprobs: int | None = None,
|
|
63
|
+
response_format: ResponseFormat | None = None,
|
|
62
64
|
) -> str:
|
|
63
65
|
"""Create a deterministic hash key from messages and model."""
|
|
64
66
|
# Convert messages to a stable string representation
|
|
@@ -71,10 +73,15 @@ class LLMCache:
|
|
|
71
73
|
json.dumps([tool.model_dump() for tool in tools], sort_keys=True) if tools else None
|
|
72
74
|
)
|
|
73
75
|
|
|
74
|
-
#
|
|
75
|
-
|
|
76
|
-
|
|
76
|
+
# Convert response_format to a stable string representation if present
|
|
77
|
+
response_format_str = (
|
|
78
|
+
json.dumps(response_format.model_dump(by_alias=True), sort_keys=True)
|
|
79
|
+
if response_format
|
|
80
|
+
else None
|
|
77
81
|
)
|
|
82
|
+
|
|
83
|
+
# Combine all parameters into a single string
|
|
84
|
+
key_str = f"{message_str}:{model_name}:{tools_str}:{tool_choice}:{reasoning_effort}:{temperature}:{response_format_str}"
|
|
78
85
|
if logprobs:
|
|
79
86
|
key_str += f":{top_logprobs}"
|
|
80
87
|
return hashlib.sha256(key_str.encode()).hexdigest()
|
|
@@ -90,6 +97,7 @@ class LLMCache:
|
|
|
90
97
|
temperature: float = 1.0,
|
|
91
98
|
logprobs: bool = False,
|
|
92
99
|
top_logprobs: int | None = None,
|
|
100
|
+
response_format: ResponseFormat | None = None,
|
|
93
101
|
) -> LLMOutput | None:
|
|
94
102
|
"""Get cached completion for a conversation if it exists."""
|
|
95
103
|
|
|
@@ -102,6 +110,7 @@ class LLMCache:
|
|
|
102
110
|
temperature=temperature,
|
|
103
111
|
logprobs=logprobs,
|
|
104
112
|
top_logprobs=top_logprobs,
|
|
113
|
+
response_format=response_format,
|
|
105
114
|
)
|
|
106
115
|
|
|
107
116
|
with self._get_connection() as conn:
|
|
@@ -125,6 +134,7 @@ class LLMCache:
|
|
|
125
134
|
temperature: float = 1.0,
|
|
126
135
|
logprobs: bool = False,
|
|
127
136
|
top_logprobs: int | None = None,
|
|
137
|
+
response_format: ResponseFormat | None = None,
|
|
128
138
|
) -> None:
|
|
129
139
|
"""Cache a completion for a conversation."""
|
|
130
140
|
|
|
@@ -137,6 +147,7 @@ class LLMCache:
|
|
|
137
147
|
temperature=temperature,
|
|
138
148
|
logprobs=logprobs,
|
|
139
149
|
top_logprobs=top_logprobs,
|
|
150
|
+
response_format=response_format,
|
|
140
151
|
)
|
|
141
152
|
|
|
142
153
|
with self._get_connection() as conn:
|
|
@@ -158,6 +169,7 @@ class LLMCache:
|
|
|
158
169
|
temperature: float = 1.0,
|
|
159
170
|
logprobs: bool = False,
|
|
160
171
|
top_logprobs: int | None = None,
|
|
172
|
+
response_format: ResponseFormat | None = None,
|
|
161
173
|
) -> None:
|
|
162
174
|
"""Cache a completion for a conversation."""
|
|
163
175
|
|
|
@@ -172,6 +184,7 @@ class LLMCache:
|
|
|
172
184
|
temperature=temperature,
|
|
173
185
|
logprobs=logprobs,
|
|
174
186
|
top_logprobs=top_logprobs,
|
|
187
|
+
response_format=response_format,
|
|
175
188
|
)
|
|
176
189
|
keys.append(key)
|
|
177
190
|
|
|
@@ -37,6 +37,7 @@ from docent._llm_util.providers.provider_registry import (
|
|
|
37
37
|
)
|
|
38
38
|
from docent._log_util import get_logger
|
|
39
39
|
from docent.data_models.chat import ChatMessage, ToolInfo, parse_chat_message
|
|
40
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
40
41
|
|
|
41
42
|
logger = get_logger(__name__)
|
|
42
43
|
|
|
@@ -90,6 +91,7 @@ async def _parallelize_calls(
|
|
|
90
91
|
semaphore: Semaphore,
|
|
91
92
|
# use_tqdm: bool,
|
|
92
93
|
cache: LLMCache | None = None,
|
|
94
|
+
response_format: ResponseFormat | None = None,
|
|
93
95
|
):
|
|
94
96
|
base_func = partial(
|
|
95
97
|
single_output_getter,
|
|
@@ -103,6 +105,7 @@ async def _parallelize_calls(
|
|
|
103
105
|
logprobs=logprobs,
|
|
104
106
|
top_logprobs=top_logprobs,
|
|
105
107
|
timeout=timeout,
|
|
108
|
+
response_format=response_format,
|
|
106
109
|
)
|
|
107
110
|
|
|
108
111
|
responses: list[LLMOutput | None] = [None for _ in inputs]
|
|
@@ -143,6 +146,7 @@ async def _parallelize_calls(
|
|
|
143
146
|
temperature=temperature,
|
|
144
147
|
logprobs=logprobs,
|
|
145
148
|
top_logprobs=top_logprobs,
|
|
149
|
+
response_format=response_format,
|
|
146
150
|
)
|
|
147
151
|
if cache is not None
|
|
148
152
|
else None
|
|
@@ -271,6 +275,7 @@ async def _parallelize_calls(
|
|
|
271
275
|
temperature=temperature,
|
|
272
276
|
logprobs=logprobs,
|
|
273
277
|
top_logprobs=top_logprobs,
|
|
278
|
+
response_format=response_format,
|
|
274
279
|
)
|
|
275
280
|
return len(indices)
|
|
276
281
|
else:
|
|
@@ -351,6 +356,7 @@ class BaseLLMService:
|
|
|
351
356
|
validation_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
352
357
|
completion_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
353
358
|
use_cache: bool = False,
|
|
359
|
+
response_format: ResponseFormat | None = None,
|
|
354
360
|
_api_key_overrides: dict[str, str] = dict(),
|
|
355
361
|
) -> list[LLMOutput]:
|
|
356
362
|
"""Request completions from a configured LLM provider."""
|
|
@@ -424,6 +430,7 @@ class BaseLLMService:
|
|
|
424
430
|
timeout=timeout,
|
|
425
431
|
semaphore=self._semaphore,
|
|
426
432
|
cache=cache,
|
|
433
|
+
response_format=response_format,
|
|
427
434
|
)
|
|
428
435
|
assert len(outputs) == len(inputs), "Number of outputs must match number of messages"
|
|
429
436
|
|
|
@@ -5,6 +5,7 @@ import backoff
|
|
|
5
5
|
# all errors: https://docs.anthropic.com/en/api/errors
|
|
6
6
|
from anthropic import (
|
|
7
7
|
AsyncAnthropic,
|
|
8
|
+
AsyncStream,
|
|
8
9
|
AuthenticationError,
|
|
9
10
|
BadRequestError,
|
|
10
11
|
NotFoundError,
|
|
@@ -12,7 +13,6 @@ from anthropic import (
|
|
|
12
13
|
RateLimitError,
|
|
13
14
|
UnprocessableEntityError,
|
|
14
15
|
)
|
|
15
|
-
from anthropic._types import NOT_GIVEN
|
|
16
16
|
from anthropic.types import (
|
|
17
17
|
InputJSONDelta,
|
|
18
18
|
Message,
|
|
@@ -70,6 +70,7 @@ from docent.data_models.chat import (
|
|
|
70
70
|
ToolInfo,
|
|
71
71
|
ToolMessage,
|
|
72
72
|
)
|
|
73
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
73
74
|
|
|
74
75
|
logger = get_logger(__name__)
|
|
75
76
|
|
|
@@ -217,34 +218,43 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
217
218
|
logprobs: bool = False,
|
|
218
219
|
top_logprobs: int | None = None,
|
|
219
220
|
timeout: float = 5.0,
|
|
221
|
+
response_format: ResponseFormat | None = None,
|
|
220
222
|
):
|
|
223
|
+
if response_format is not None:
|
|
224
|
+
raise NotImplementedError(
|
|
225
|
+
"Structured outputs (response_format) are not implemented for Anthropic yet."
|
|
226
|
+
)
|
|
221
227
|
if logprobs or top_logprobs is not None:
|
|
222
228
|
raise NotImplementedError(
|
|
223
229
|
"We have not implemented logprobs or top_logprobs for Anthropic yet."
|
|
224
230
|
)
|
|
225
231
|
|
|
226
232
|
system, input_messages = parse_chat_messages(messages)
|
|
227
|
-
input_tools = parse_tools(tools) if tools else NOT_GIVEN
|
|
228
233
|
|
|
229
234
|
try:
|
|
230
235
|
async with async_timeout_ctx(timeout):
|
|
231
|
-
|
|
232
|
-
model
|
|
233
|
-
messages
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
236
|
+
create_kwargs: dict[str, Any] = {
|
|
237
|
+
"model": model_name,
|
|
238
|
+
"messages": input_messages,
|
|
239
|
+
"max_tokens": max_new_tokens,
|
|
240
|
+
"temperature": temperature,
|
|
241
|
+
"stream": True,
|
|
242
|
+
}
|
|
243
|
+
if reasoning_effort:
|
|
244
|
+
create_kwargs["thinking"] = {
|
|
245
|
+
"type": "enabled",
|
|
246
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
247
|
+
}
|
|
248
|
+
if tools:
|
|
249
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
250
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
251
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
252
|
+
if system is not None:
|
|
253
|
+
create_kwargs["system"] = system
|
|
254
|
+
|
|
255
|
+
stream = cast(
|
|
256
|
+
AsyncStream[RawMessageStreamEvent],
|
|
257
|
+
await client.messages.create(**create_kwargs),
|
|
248
258
|
)
|
|
249
259
|
|
|
250
260
|
llm_output_partial = None
|
|
@@ -399,6 +409,7 @@ async def get_anthropic_chat_completion_async(
|
|
|
399
409
|
logprobs: bool = False,
|
|
400
410
|
top_logprobs: int | None = None,
|
|
401
411
|
timeout: float = 5.0,
|
|
412
|
+
response_format: ResponseFormat | None = None,
|
|
402
413
|
) -> LLMOutput:
|
|
403
414
|
"""
|
|
404
415
|
Note from kevin 1/29/2025:
|
|
@@ -409,33 +420,38 @@ async def get_anthropic_chat_completion_async(
|
|
|
409
420
|
We should actually implement this at some point, but it does not work.
|
|
410
421
|
"""
|
|
411
422
|
|
|
423
|
+
if response_format is not None:
|
|
424
|
+
raise NotImplementedError(
|
|
425
|
+
"Structured outputs (response_format) are not implemented for Anthropic yet."
|
|
426
|
+
)
|
|
412
427
|
if logprobs or top_logprobs is not None:
|
|
413
428
|
raise NotImplementedError(
|
|
414
429
|
"We have not implemented logprobs or top_logprobs for Anthropic yet."
|
|
415
430
|
)
|
|
416
431
|
|
|
417
432
|
system, input_messages = parse_chat_messages(messages)
|
|
418
|
-
input_tools = parse_tools(tools) if tools else NOT_GIVEN
|
|
419
433
|
|
|
420
434
|
try:
|
|
421
435
|
async with async_timeout_ctx(timeout):
|
|
422
|
-
|
|
423
|
-
model
|
|
424
|
-
messages
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
436
|
+
create_kwargs: dict[str, Any] = {
|
|
437
|
+
"model": model_name,
|
|
438
|
+
"messages": input_messages,
|
|
439
|
+
"max_tokens": max_new_tokens,
|
|
440
|
+
"temperature": temperature,
|
|
441
|
+
}
|
|
442
|
+
if reasoning_effort:
|
|
443
|
+
create_kwargs["thinking"] = {
|
|
444
|
+
"type": "enabled",
|
|
445
|
+
"budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
|
|
446
|
+
}
|
|
447
|
+
if tools:
|
|
448
|
+
create_kwargs["tools"] = parse_tools(tools)
|
|
449
|
+
if tool_choice_param := _parse_tool_choice(tool_choice):
|
|
450
|
+
create_kwargs["tool_choice"] = tool_choice_param
|
|
451
|
+
if system is not None:
|
|
452
|
+
create_kwargs["system"] = system
|
|
453
|
+
|
|
454
|
+
raw_output = cast(Message, await client.messages.create(**create_kwargs))
|
|
439
455
|
|
|
440
456
|
output = parse_anthropic_completion(raw_output, model_name)
|
|
441
457
|
if output.first and output.first.finish_reason == "length" and output.first.no_text:
|
|
@@ -28,6 +28,7 @@ from docent._llm_util.providers.common import (
|
|
|
28
28
|
)
|
|
29
29
|
from docent._log_util import get_logger
|
|
30
30
|
from docent.data_models.chat import ChatMessage, Content, ContentText, ToolCall, ToolInfo
|
|
31
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
def get_google_client_async(api_key: str | None = None) -> AsyncGoogle:
|
|
@@ -82,7 +83,12 @@ async def get_google_chat_completion_async(
|
|
|
82
83
|
logprobs: bool = False,
|
|
83
84
|
top_logprobs: int | None = None,
|
|
84
85
|
timeout: float = 5.0,
|
|
86
|
+
response_format: ResponseFormat | None = None,
|
|
85
87
|
) -> LLMOutput:
|
|
88
|
+
if response_format is not None:
|
|
89
|
+
raise NotImplementedError(
|
|
90
|
+
"Structured outputs (response_format) are not implemented for Google yet."
|
|
91
|
+
)
|
|
86
92
|
if logprobs or top_logprobs is not None:
|
|
87
93
|
raise NotImplementedError(
|
|
88
94
|
"We have not implemented logprobs or top_logprobs for Google yet."
|
|
@@ -145,7 +151,12 @@ async def get_google_chat_completion_streaming_async(
|
|
|
145
151
|
logprobs: bool = False,
|
|
146
152
|
top_logprobs: int | None = None,
|
|
147
153
|
timeout: float = 5.0,
|
|
154
|
+
response_format: ResponseFormat | None = None,
|
|
148
155
|
) -> LLMOutput:
|
|
156
|
+
if response_format is not None:
|
|
157
|
+
raise NotImplementedError(
|
|
158
|
+
"Structured outputs (response_format) are not implemented for Google yet."
|
|
159
|
+
)
|
|
149
160
|
if logprobs or top_logprobs is not None:
|
|
150
161
|
raise NotImplementedError(
|
|
151
162
|
"We have not implemented logprobs or top_logprobs for Google yet."
|
|
@@ -40,6 +40,10 @@ from openai.types.chat.chat_completion_message_tool_call_param import (
|
|
|
40
40
|
Function as OpenAIFunctionParam,
|
|
41
41
|
)
|
|
42
42
|
from openai.types.shared_params.function_definition import FunctionDefinition
|
|
43
|
+
from openai.types.shared_params.response_format_json_schema import (
|
|
44
|
+
JSONSchema,
|
|
45
|
+
ResponseFormatJSONSchema,
|
|
46
|
+
)
|
|
43
47
|
|
|
44
48
|
from docent._llm_util.data_models.exceptions import (
|
|
45
49
|
CompletionTooLongException,
|
|
@@ -70,6 +74,7 @@ from docent.data_models.chat import (
|
|
|
70
74
|
ToolInfo,
|
|
71
75
|
ToolMessage,
|
|
72
76
|
)
|
|
77
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
73
78
|
|
|
74
79
|
logger = get_logger(__name__)
|
|
75
80
|
DEFAULT_TIKTOKEN_ENCODING = "cl100k_base"
|
|
@@ -194,6 +199,42 @@ def parse_tools(tools: list[ToolInfo]) -> list[ChatCompletionToolParam]:
|
|
|
194
199
|
return result
|
|
195
200
|
|
|
196
201
|
|
|
202
|
+
def _build_response_format(
|
|
203
|
+
response_format: ResponseFormat | None,
|
|
204
|
+
) -> ResponseFormatJSONSchema | None:
|
|
205
|
+
"""Build OpenAI response_format dict from unified ResponseFormat.
|
|
206
|
+
|
|
207
|
+
Converts the unified ResponseFormat specification to OpenAI's
|
|
208
|
+
expected response_format structure for structured outputs.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
response_format: The unified response format specification, or None.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
OpenAI response_format dict if provided, empty dict otherwise.
|
|
215
|
+
|
|
216
|
+
Raises:
|
|
217
|
+
ValueError: If response_format.type is not 'json_schema'.
|
|
218
|
+
"""
|
|
219
|
+
if response_format is None:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
if response_format.type != "json_schema":
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Unsupported response format type: {response_format.type}. "
|
|
225
|
+
"Only 'json_schema' is currently supported."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return ResponseFormatJSONSchema(
|
|
229
|
+
type="json_schema",
|
|
230
|
+
json_schema=JSONSchema(
|
|
231
|
+
name=response_format.name,
|
|
232
|
+
strict=response_format.strict,
|
|
233
|
+
schema=response_format.schema_,
|
|
234
|
+
),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
197
238
|
@backoff.on_exception(
|
|
198
239
|
backoff.expo,
|
|
199
240
|
exception=(Exception,),
|
|
@@ -215,16 +256,14 @@ async def get_openai_chat_completion_streaming_async(
|
|
|
215
256
|
logprobs: bool = False,
|
|
216
257
|
top_logprobs: int | None = None,
|
|
217
258
|
timeout: float = 30.0,
|
|
259
|
+
response_format: ResponseFormat | None = None,
|
|
218
260
|
):
|
|
219
|
-
input_messages = parse_chat_messages(messages)
|
|
220
|
-
input_tools = parse_tools(tools) if tools else omit
|
|
221
|
-
|
|
222
261
|
try:
|
|
223
262
|
async with async_timeout_ctx(timeout):
|
|
224
263
|
stream = await client.chat.completions.create(
|
|
225
264
|
model=model_name,
|
|
226
|
-
messages=
|
|
227
|
-
tools=
|
|
265
|
+
messages=parse_chat_messages(messages),
|
|
266
|
+
tools=parse_tools(tools) if tools else omit,
|
|
228
267
|
tool_choice=tool_choice or omit,
|
|
229
268
|
max_completion_tokens=max_new_tokens,
|
|
230
269
|
temperature=temperature,
|
|
@@ -233,6 +272,7 @@ async def get_openai_chat_completion_streaming_async(
|
|
|
233
272
|
top_logprobs=top_logprobs,
|
|
234
273
|
stream_options={"include_usage": True},
|
|
235
274
|
stream=True,
|
|
275
|
+
response_format=_build_response_format(response_format) or omit,
|
|
236
276
|
)
|
|
237
277
|
|
|
238
278
|
llm_output_partial = None
|
|
@@ -406,22 +446,21 @@ async def get_openai_chat_completion_async(
|
|
|
406
446
|
logprobs: bool = False,
|
|
407
447
|
top_logprobs: int | None = None,
|
|
408
448
|
timeout: float = 5.0,
|
|
449
|
+
response_format: ResponseFormat | None = None,
|
|
409
450
|
) -> LLMOutput:
|
|
410
|
-
input_messages = parse_chat_messages(messages)
|
|
411
|
-
input_tools = parse_tools(tools) if tools else omit
|
|
412
|
-
|
|
413
451
|
try:
|
|
414
452
|
async with async_timeout_ctx(timeout): # type: ignore
|
|
415
453
|
raw_output = await client.chat.completions.create(
|
|
416
454
|
model=model_name,
|
|
417
|
-
messages=
|
|
418
|
-
tools=
|
|
455
|
+
messages=parse_chat_messages(messages),
|
|
456
|
+
tools=parse_tools(tools) if tools else omit,
|
|
419
457
|
tool_choice=tool_choice or omit,
|
|
420
458
|
max_completion_tokens=max_new_tokens,
|
|
421
459
|
temperature=temperature,
|
|
422
460
|
reasoning_effort=reasoning_effort or omit,
|
|
423
461
|
logprobs=logprobs,
|
|
424
462
|
top_logprobs=top_logprobs,
|
|
463
|
+
response_format=_build_response_format(response_format) or omit,
|
|
425
464
|
)
|
|
426
465
|
|
|
427
466
|
# If the completion is empty and was truncated (likely due to too much reasoning), raise an exception
|
|
@@ -31,6 +31,7 @@ from docent.data_models.chat import (
|
|
|
31
31
|
ToolInfo,
|
|
32
32
|
ToolMessage,
|
|
33
33
|
)
|
|
34
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
34
35
|
|
|
35
36
|
logger = get_logger(__name__)
|
|
36
37
|
|
|
@@ -59,6 +60,7 @@ class OpenRouterClient:
|
|
|
59
60
|
max_tokens: int = 32,
|
|
60
61
|
temperature: float = 1.0,
|
|
61
62
|
timeout: float = 30.0,
|
|
63
|
+
response_format: dict[str, Any] | None = None,
|
|
62
64
|
) -> dict[str, Any]:
|
|
63
65
|
"""Make an async chat completion request."""
|
|
64
66
|
url = f"{self.base_url}/chat/completions"
|
|
@@ -74,6 +76,8 @@ class OpenRouterClient:
|
|
|
74
76
|
payload["tools"] = tools
|
|
75
77
|
if tool_choice:
|
|
76
78
|
payload["tool_choice"] = tool_choice
|
|
79
|
+
if response_format:
|
|
80
|
+
payload["response_format"] = response_format
|
|
77
81
|
|
|
78
82
|
async with aiohttp.ClientSession() as session:
|
|
79
83
|
async with session.post(
|
|
@@ -203,6 +207,37 @@ def parse_tools(tools: list[ToolInfo]) -> list[dict[str, Any]]:
|
|
|
203
207
|
return result
|
|
204
208
|
|
|
205
209
|
|
|
210
|
+
def _build_response_format(response_format: ResponseFormat | None) -> dict[str, Any] | None:
|
|
211
|
+
"""Convert ResponseFormat to OpenRouter's response_format parameter.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
response_format: The unified response format specification.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
OpenRouter-formatted response_format dict, or None if not provided.
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
ValueError: If response_format.type is not a supported format type.
|
|
221
|
+
"""
|
|
222
|
+
if response_format is None:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
if response_format.type != "json_schema":
|
|
226
|
+
raise ValueError(
|
|
227
|
+
f"Unsupported response format type: {response_format.type}. "
|
|
228
|
+
"Only 'json_schema' is currently supported."
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
"type": "json_schema",
|
|
233
|
+
"json_schema": {
|
|
234
|
+
"name": response_format.name,
|
|
235
|
+
"strict": response_format.strict,
|
|
236
|
+
"schema": response_format.schema_,
|
|
237
|
+
},
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
206
241
|
def _parse_openrouter_tool_call(tc: dict[str, Any]) -> ToolCall:
|
|
207
242
|
"""Parse tool call from OpenRouter response."""
|
|
208
243
|
if tc.get("type") != "function":
|
|
@@ -232,7 +267,10 @@ def _parse_openrouter_tool_call(tc: dict[str, Any]) -> ToolCall:
|
|
|
232
267
|
)
|
|
233
268
|
|
|
234
269
|
|
|
235
|
-
def parse_openrouter_completion(
|
|
270
|
+
def parse_openrouter_completion(
|
|
271
|
+
response: dict[str, Any],
|
|
272
|
+
model: str,
|
|
273
|
+
) -> LLMOutput:
|
|
236
274
|
"""Parse OpenRouter completion response."""
|
|
237
275
|
choices = response.get("choices", [])
|
|
238
276
|
if not choices:
|
|
@@ -252,10 +290,11 @@ def parse_openrouter_completion(response: dict[str, Any], model: str) -> LLMOutp
|
|
|
252
290
|
for choice in choices:
|
|
253
291
|
message = choice.get("message", {})
|
|
254
292
|
tool_calls_data = message.get("tool_calls")
|
|
293
|
+
content = message.get("content")
|
|
255
294
|
|
|
256
295
|
completions.append(
|
|
257
296
|
LLMCompletion(
|
|
258
|
-
text=
|
|
297
|
+
text=content,
|
|
259
298
|
finish_reason=choice.get("finish_reason"),
|
|
260
299
|
tool_calls=(
|
|
261
300
|
[_parse_openrouter_tool_call(tc) for tc in tool_calls_data]
|
|
@@ -292,6 +331,7 @@ async def get_openrouter_chat_completion_async(
|
|
|
292
331
|
logprobs: bool = False,
|
|
293
332
|
top_logprobs: int | None = None,
|
|
294
333
|
timeout: float = 30.0,
|
|
334
|
+
response_format: ResponseFormat | None = None,
|
|
295
335
|
) -> LLMOutput:
|
|
296
336
|
"""Get completion from OpenRouter."""
|
|
297
337
|
if logprobs or top_logprobs is not None:
|
|
@@ -304,6 +344,7 @@ async def get_openrouter_chat_completion_async(
|
|
|
304
344
|
|
|
305
345
|
input_messages = parse_chat_messages(messages)
|
|
306
346
|
input_tools = parse_tools(tools) if tools else None
|
|
347
|
+
input_response_format = _build_response_format(response_format)
|
|
307
348
|
|
|
308
349
|
response = await client.chat_completions_create(
|
|
309
350
|
model=model_name,
|
|
@@ -313,6 +354,7 @@ async def get_openrouter_chat_completion_async(
|
|
|
313
354
|
max_tokens=max_new_tokens,
|
|
314
355
|
temperature=temperature,
|
|
315
356
|
timeout=timeout,
|
|
357
|
+
response_format=input_response_format,
|
|
316
358
|
)
|
|
317
359
|
|
|
318
360
|
output = parse_openrouter_completion(response, model_name)
|
|
@@ -346,6 +388,7 @@ async def get_openrouter_chat_completion_streaming_async(
|
|
|
346
388
|
logprobs: bool = False,
|
|
347
389
|
top_logprobs: int | None = None,
|
|
348
390
|
timeout: float = 30.0,
|
|
391
|
+
response_format: ResponseFormat | None = None,
|
|
349
392
|
) -> LLMOutput:
|
|
350
393
|
"""Get streaming completion from OpenRouter (falls back to non-streaming)."""
|
|
351
394
|
logger.warning("Streaming not yet implemented for OpenRouter, using non-streaming.")
|
|
@@ -362,6 +405,7 @@ async def get_openrouter_chat_completion_streaming_async(
|
|
|
362
405
|
logprobs=logprobs,
|
|
363
406
|
top_logprobs=top_logprobs,
|
|
364
407
|
timeout=timeout,
|
|
408
|
+
response_format=response_format,
|
|
365
409
|
)
|
|
366
410
|
|
|
367
411
|
|
{docent_python-0.1.44a0 → docent_python-0.1.46a0}/docent/_llm_util/providers/provider_registry.py
RENAMED
|
@@ -26,6 +26,7 @@ from docent._llm_util.providers.openrouter import (
|
|
|
26
26
|
get_openrouter_chat_completion_streaming_async,
|
|
27
27
|
)
|
|
28
28
|
from docent.data_models.chat import ChatMessage, ToolInfo
|
|
29
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class SingleOutputGetter(Protocol):
|
|
@@ -49,6 +50,7 @@ class SingleOutputGetter(Protocol):
|
|
|
49
50
|
logprobs: bool,
|
|
50
51
|
top_logprobs: int | None,
|
|
51
52
|
timeout: float,
|
|
53
|
+
response_format: ResponseFormat | None,
|
|
52
54
|
) -> LLMOutput:
|
|
53
55
|
"""Get a single completion from an LLM.
|
|
54
56
|
|
|
@@ -64,6 +66,7 @@ class SingleOutputGetter(Protocol):
|
|
|
64
66
|
logprobs: Whether to return log probabilities.
|
|
65
67
|
top_logprobs: Number of most likely tokens to return probabilities for.
|
|
66
68
|
timeout: Maximum time to wait for a response in seconds.
|
|
69
|
+
response_format: Optional structured output format specification.
|
|
67
70
|
|
|
68
71
|
Returns:
|
|
69
72
|
LLMOutput: The model's response.
|
|
@@ -93,6 +96,7 @@ class SingleStreamingOutputGetter(Protocol):
|
|
|
93
96
|
logprobs: bool,
|
|
94
97
|
top_logprobs: int | None,
|
|
95
98
|
timeout: float,
|
|
99
|
+
response_format: ResponseFormat | None,
|
|
96
100
|
) -> LLMOutput:
|
|
97
101
|
"""Get a streaming completion from an LLM.
|
|
98
102
|
|
|
@@ -109,6 +113,7 @@ class SingleStreamingOutputGetter(Protocol):
|
|
|
109
113
|
logprobs: Whether to return log probabilities.
|
|
110
114
|
top_logprobs: Number of most likely tokens to return probabilities for.
|
|
111
115
|
timeout: Maximum time to wait for a response in seconds.
|
|
116
|
+
response_format: Optional structured output format specification.
|
|
112
117
|
|
|
113
118
|
Returns:
|
|
114
119
|
LLMOutput: The complete model response after streaming finishes.
|
|
@@ -10,6 +10,7 @@ from docent.data_models.chat.message import (
|
|
|
10
10
|
parse_chat_message,
|
|
11
11
|
parse_docent_chat_message,
|
|
12
12
|
)
|
|
13
|
+
from docent.data_models.chat.response_format import ResponseFormat
|
|
13
14
|
from docent.data_models.chat.tool import (
|
|
14
15
|
ToolCall,
|
|
15
16
|
ToolCallContent,
|
|
@@ -28,6 +29,7 @@ __all__ = [
|
|
|
28
29
|
"Content",
|
|
29
30
|
"ContentReasoning",
|
|
30
31
|
"ContentText",
|
|
32
|
+
"ResponseFormat",
|
|
31
33
|
"ToolCall",
|
|
32
34
|
"ToolCallContent",
|
|
33
35
|
"ToolInfo",
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Response format specification for structured outputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ResponseFormat(BaseModel):
|
|
11
|
+
"""Unified response format specification for structured outputs.
|
|
12
|
+
|
|
13
|
+
Supports JSON Schema-based constrained decoding across LLM providers.
|
|
14
|
+
Each provider converts this to their specific format:
|
|
15
|
+
- OpenAI: response_format parameter
|
|
16
|
+
- Anthropic: output_format parameter (with beta header)
|
|
17
|
+
- OpenRouter: response_format parameter (same as OpenAI)
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
type: The format type. Currently only "json_schema" is supported.
|
|
21
|
+
name: A name for the schema (required by all providers).
|
|
22
|
+
schema_: The JSON Schema definition as a dict.
|
|
23
|
+
strict: Whether to enforce strict schema adherence (default True).
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
response_format = ResponseFormat(
|
|
28
|
+
name="analysis_result",
|
|
29
|
+
schema={
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"score": {"type": "number"},
|
|
33
|
+
"explanation": {"type": "string"},
|
|
34
|
+
},
|
|
35
|
+
"required": ["score", "explanation"],
|
|
36
|
+
},
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
type: Literal["json_schema"] = "json_schema"
|
|
42
|
+
name: str
|
|
43
|
+
# Named `schema_` to avoid conflict with Pydantic's internal schema methods
|
|
44
|
+
schema_: dict[str, Any] = Field(alias="schema")
|
|
45
|
+
strict: bool = True
|
|
46
|
+
|
|
47
|
+
model_config = {"populate_by_name": True}
|