docent-python 0.1.61a0__tar.gz → 0.1.63a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/.gitignore +5 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/PKG-INFO +1 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/__init__.py +2 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/exceptions.py +18 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/llm_output.py +1 -5
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/llm_svc.py +125 -165
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/model_registry.py +3 -3
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/anthropic.py +10 -4
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/google.py +47 -31
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openai.py +38 -7
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openrouter.py +3 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_log_util/logger.py +3 -2
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/__init__.py +2 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/agent_run.py +139 -165
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/citation.py +49 -11
- docent_python-0.1.63a0/docent/data_models/context_config.py +88 -0
- docent_python-0.1.63a0/docent/data_models/metadata_util.py +180 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/reading.py +120 -48
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/regex.py +2 -2
- docent_python-0.1.63a0/docent/data_models/report.py +16 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/transcript.py +75 -38
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/impl.py +41 -44
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/types.py +2 -2
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/parse_output.py +1 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/loaders/load_inspect.py +1 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/mcp/server.py +250 -9
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_base.py +53 -6
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_client_util.py +75 -6
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_collections.py +16 -12
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_dql.py +8 -6
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_readings.py +236 -44
- docent_python-0.1.63a0/docent/sdk/_reports.py +281 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_results.py +3 -3
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_rubrics.py +1 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_sharing.py +1 -1
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/agent_run_writer.py +9 -4
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/client.py +2 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/integrations/inspect.py +8 -6
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/llm_context.py +208 -94
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/reading.py +19 -7
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/trace.py +46 -41
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/pyproject.toml +1 -1
- docent_python-0.1.61a0/docent/_llm_util/llm_cache.py +0 -206
- docent_python-0.1.61a0/docent/data_models/metadata_util.py +0 -32
- docent_python-0.1.61a0/docent/trace_temp.py +0 -1088
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/LICENSE.md +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/README.md +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/preference_types.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/feedback.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/py.typed +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_agent_runs.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_feedback.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/_labels.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/integrations/__init__.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/integrations/harbor.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/integrations/nemogym.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/integrations/util.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/sdk/util.py +0 -0
- {docent_python-0.1.61a0 → docent_python-0.1.63a0}/uv.lock +0 -0
|
@@ -145,8 +145,9 @@ ENV/
|
|
|
145
145
|
env.bak/
|
|
146
146
|
venv.bak/
|
|
147
147
|
|
|
148
|
-
# Docent
|
|
148
|
+
# Docent
|
|
149
149
|
docent.env*
|
|
150
|
+
docent_analyses/
|
|
150
151
|
|
|
151
152
|
# Spyder project settings
|
|
152
153
|
.spyderproject
|
|
@@ -204,3 +205,6 @@ data/cache
|
|
|
204
205
|
|
|
205
206
|
# dont commit package lock, force use of bun lock
|
|
206
207
|
package-lock.json
|
|
208
|
+
|
|
209
|
+
# Claude Code worktrees
|
|
210
|
+
.claude/worktrees/
|
|
@@ -4,6 +4,7 @@ __all__ = [
|
|
|
4
4
|
"load_config_file",
|
|
5
5
|
"AgentRunRef",
|
|
6
6
|
"TranscriptRef",
|
|
7
|
+
"TranscriptSliceRef",
|
|
7
8
|
"ReadingResultRef",
|
|
8
9
|
"ResultRef",
|
|
9
10
|
"Prompt",
|
|
@@ -17,4 +18,5 @@ from docent.sdk.llm_context import (
|
|
|
17
18
|
ReadingResultRef,
|
|
18
19
|
ResultRef,
|
|
19
20
|
TranscriptRef,
|
|
21
|
+
TranscriptSliceRef,
|
|
20
22
|
)
|
{docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/exceptions.py
RENAMED
|
@@ -35,6 +35,11 @@ class ContextWindowException(LLMException):
|
|
|
35
35
|
user_message = "Context window exceeded."
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
class InvalidPromptException(LLMException):
|
|
39
|
+
error_type_id = "invalid_prompt"
|
|
40
|
+
user_message = "The model provider rejected this prompt for safety reasons."
|
|
41
|
+
|
|
42
|
+
|
|
38
43
|
class NoResponseException(LLMException):
|
|
39
44
|
error_type_id = "no_response"
|
|
40
45
|
user_message = "The model returned an empty response. Please try again later."
|
|
@@ -45,6 +50,17 @@ class DocentUsageLimitException(LLMException):
|
|
|
45
50
|
user_message = "Free daily usage limit reached. Add your own API key in settings or contact us for increased limits."
|
|
46
51
|
|
|
47
52
|
|
|
53
|
+
class ProviderAuthenticationException(LLMException):
|
|
54
|
+
error_type_id = "provider_authentication"
|
|
55
|
+
|
|
56
|
+
def __init__(self, message: str = ""):
|
|
57
|
+
super().__init__(message)
|
|
58
|
+
self.user_message = (
|
|
59
|
+
"The model provider API key could not be authenticated. "
|
|
60
|
+
"If you added your own key, update it in Settings > Model providers."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
48
64
|
class ValidationFailedException(LLMException):
|
|
49
65
|
error_type_id = "validation_failed"
|
|
50
66
|
user_message = "The model returned invalid output that failed validation."
|
|
@@ -64,8 +80,10 @@ LLM_ERROR_TYPES: list[type[LLMException]] = [
|
|
|
64
80
|
CompletionTooLongException,
|
|
65
81
|
RateLimitException,
|
|
66
82
|
ContextWindowException,
|
|
83
|
+
InvalidPromptException,
|
|
67
84
|
NoResponseException,
|
|
68
85
|
DocentUsageLimitException,
|
|
86
|
+
ProviderAuthenticationException,
|
|
69
87
|
ValidationFailedException,
|
|
70
88
|
TimeoutException,
|
|
71
89
|
]
|
{docent_python-0.1.61a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/llm_output.py
RENAMED
|
@@ -97,7 +97,6 @@ class LLMOutput:
|
|
|
97
97
|
completions: list[LLMCompletion]
|
|
98
98
|
errors: list[LLMException] = field(default_factory=lambda: [])
|
|
99
99
|
usage: UsageMetrics = field(default_factory=UsageMetrics)
|
|
100
|
-
from_cache: bool = False
|
|
101
100
|
duration: float | None = None
|
|
102
101
|
|
|
103
102
|
@property
|
|
@@ -142,7 +141,6 @@ class LLMOutput:
|
|
|
142
141
|
"completions": [comp.model_dump() for comp in self.completions],
|
|
143
142
|
"errors": [e.error_type_id for e in self.errors],
|
|
144
143
|
"usage": self.usage.to_dict(),
|
|
145
|
-
"from_cache": self.from_cache,
|
|
146
144
|
"duration": self.duration,
|
|
147
145
|
}
|
|
148
146
|
|
|
@@ -156,7 +154,7 @@ class LLMOutput:
|
|
|
156
154
|
]
|
|
157
155
|
errors_to_log = [e for e in errors if e not in error_types_to_not_log]
|
|
158
156
|
if errors_to_log:
|
|
159
|
-
logger.error(
|
|
157
|
+
logger.error("Loading LLM output with errors: %s", errors)
|
|
160
158
|
errors = [error_type_map.get(e, LLMException)() for e in errors]
|
|
161
159
|
|
|
162
160
|
completions = data.get("completions", [])
|
|
@@ -171,7 +169,6 @@ class LLMOutput:
|
|
|
171
169
|
completions=completions,
|
|
172
170
|
errors=errors,
|
|
173
171
|
usage=UsageMetrics(**usage),
|
|
174
|
-
from_cache=bool(data.get("from_cache", False)),
|
|
175
172
|
duration=data.get("duration"),
|
|
176
173
|
)
|
|
177
174
|
|
|
@@ -275,7 +272,6 @@ def finalize_llm_output_partial(partial: LLMOutputPartial) -> LLMOutput:
|
|
|
275
272
|
for c in partial.completions
|
|
276
273
|
],
|
|
277
274
|
usage=partial.usage,
|
|
278
|
-
from_cache=False,
|
|
279
275
|
)
|
|
280
276
|
|
|
281
277
|
# If the completion is empty and was truncated (likely due to too much reasoning), raise an exception
|
|
@@ -28,7 +28,6 @@ from docent._llm_util.data_models.llm_output import (
|
|
|
28
28
|
AsyncSingleLLMOutputStreamingCallback,
|
|
29
29
|
LLMOutput,
|
|
30
30
|
)
|
|
31
|
-
from docent._llm_util.llm_cache import LLMCache
|
|
32
31
|
from docent._llm_util.providers.preference_types import ModelOption
|
|
33
32
|
from docent._llm_util.providers.provider_registry import (
|
|
34
33
|
PROVIDERS,
|
|
@@ -37,6 +36,7 @@ from docent._llm_util.providers.provider_registry import (
|
|
|
37
36
|
)
|
|
38
37
|
from docent._log_util import get_logger
|
|
39
38
|
from docent.data_models.chat import ChatMessage, ToolInfo, parse_chat_message
|
|
39
|
+
from docent.data_models.chat.message import AssistantMessage, UserMessage
|
|
40
40
|
from docent.data_models.chat.response_format import ResponseFormat
|
|
41
41
|
|
|
42
42
|
logger = get_logger(__name__)
|
|
@@ -91,8 +91,8 @@ async def _parallelize_calls(
|
|
|
91
91
|
semaphore: Semaphore,
|
|
92
92
|
max_retries: int,
|
|
93
93
|
# use_tqdm: bool,
|
|
94
|
-
cache: LLMCache | None = None,
|
|
95
94
|
response_format: ResponseFormat | None = None,
|
|
95
|
+
retry_with_feedback: bool = False,
|
|
96
96
|
):
|
|
97
97
|
base_func = partial(
|
|
98
98
|
single_output_getter,
|
|
@@ -120,122 +120,129 @@ async def _parallelize_calls(
|
|
|
120
120
|
else None
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
-
# Save resolved messages to avoid multiple resolutions
|
|
124
|
-
resolved_messages: list[list[ChatMessage] | None] = [None] * len(inputs)
|
|
125
|
-
|
|
126
123
|
# Not sure why the cast is necessary for the type checker
|
|
127
124
|
cancelled_due_to_usage_limit: bool = cast(bool, False)
|
|
128
125
|
|
|
126
|
+
def _mark_usage_limit_responses() -> None:
|
|
127
|
+
for i, response in enumerate(responses):
|
|
128
|
+
if response is None:
|
|
129
|
+
responses[i] = LLMOutput(
|
|
130
|
+
model=model_name,
|
|
131
|
+
completions=[],
|
|
132
|
+
errors=[DocentUsageLimitException()],
|
|
133
|
+
)
|
|
134
|
+
elif not response.completions and not response.errors:
|
|
135
|
+
response.errors.append(DocentUsageLimitException())
|
|
136
|
+
|
|
129
137
|
async def _limited_task(i: int, cur_input: MessagesInput, tg: TaskGroup):
|
|
130
|
-
nonlocal responses, pbar,
|
|
138
|
+
nonlocal responses, pbar, cancelled_due_to_usage_limit
|
|
131
139
|
|
|
132
140
|
async with semaphore:
|
|
133
141
|
messages = _resolve_messages_input(cur_input)
|
|
134
|
-
resolved_messages[i] = messages
|
|
135
142
|
|
|
136
143
|
retry_count = 0
|
|
137
144
|
result = None
|
|
138
|
-
call_started_at
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
response_format=response_format,
|
|
152
|
-
)
|
|
153
|
-
if cache is not None
|
|
154
|
-
else None
|
|
155
|
-
)
|
|
156
|
-
if cached_result is not None:
|
|
157
|
-
result = cached_result
|
|
158
|
-
if streaming_callback is not None:
|
|
159
|
-
await streaming_callback(i, result)
|
|
160
|
-
else:
|
|
161
|
-
call_started_at = time.perf_counter()
|
|
162
|
-
while retry_count < MAX_VALIDATION_ATTEMPTS:
|
|
163
|
-
try:
|
|
164
|
-
if streaming_callback is None:
|
|
165
|
-
result = await base_func(client=client, messages=messages)
|
|
166
|
-
else:
|
|
167
|
-
result = await base_func(
|
|
168
|
-
client=client,
|
|
169
|
-
streaming_callback=_get_single_streaming_callback(
|
|
170
|
-
i, streaming_callback
|
|
171
|
-
),
|
|
172
|
-
messages=messages,
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
# Validate if validation callback provided and result is successful
|
|
176
|
-
if validation_callback and not result.did_error:
|
|
177
|
-
await validation_callback(i, result)
|
|
178
|
-
|
|
179
|
-
break
|
|
180
|
-
except ValidationFailedException as e:
|
|
181
|
-
retry_count += 1
|
|
182
|
-
logger.warning(
|
|
183
|
-
f"Validation failed for {model_name} after {retry_count} attempts: {e}"
|
|
145
|
+
call_started_at = time.perf_counter()
|
|
146
|
+
current_messages = messages
|
|
147
|
+
while retry_count < MAX_VALIDATION_ATTEMPTS:
|
|
148
|
+
try:
|
|
149
|
+
if streaming_callback is None:
|
|
150
|
+
result = await base_func(client=client, messages=current_messages)
|
|
151
|
+
else:
|
|
152
|
+
result = await base_func(
|
|
153
|
+
client=client,
|
|
154
|
+
streaming_callback=_get_single_streaming_callback(
|
|
155
|
+
i, streaming_callback
|
|
156
|
+
),
|
|
157
|
+
messages=current_messages,
|
|
184
158
|
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
159
|
+
|
|
160
|
+
# Validate if validation callback provided and result is successful
|
|
161
|
+
if validation_callback and not result.did_error:
|
|
162
|
+
await validation_callback(i, result)
|
|
163
|
+
|
|
164
|
+
break
|
|
165
|
+
except ValidationFailedException as e:
|
|
166
|
+
retry_count += 1
|
|
167
|
+
logger.warning(
|
|
168
|
+
"Validation failed for %s after %d attempts: %s",
|
|
169
|
+
model_name,
|
|
170
|
+
retry_count,
|
|
171
|
+
e,
|
|
172
|
+
extra={"original_output": e.failed_output},
|
|
173
|
+
)
|
|
174
|
+
if retry_count >= MAX_VALIDATION_ATTEMPTS:
|
|
175
|
+
logger.error(
|
|
176
|
+
"Validation failed for %s after %d attempts: %s",
|
|
177
|
+
model_name,
|
|
178
|
+
retry_count,
|
|
179
|
+
e,
|
|
180
|
+
extra={"original_output": e.failed_output},
|
|
200
181
|
)
|
|
201
|
-
cancelled_due_to_usage_limit = True
|
|
202
|
-
tg.cancel_scope.cancel()
|
|
203
|
-
break
|
|
204
|
-
except asyncio.TimeoutError as e:
|
|
205
|
-
timeout_exception = TimeoutException(str(e) or "Request timed out")
|
|
206
|
-
timeout_exception.__cause__ = e
|
|
207
|
-
logger.error(f"Call to {model_name} timed out")
|
|
208
182
|
result = LLMOutput(
|
|
209
183
|
model=model_name,
|
|
210
184
|
completions=[],
|
|
211
|
-
errors=[
|
|
185
|
+
errors=[e],
|
|
212
186
|
)
|
|
213
187
|
break
|
|
214
|
-
except Exception as e:
|
|
215
|
-
if not isinstance(e, LLMException):
|
|
216
|
-
logger.error(
|
|
217
|
-
f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
|
|
218
|
-
)
|
|
219
|
-
llm_exception = LLMException(e)
|
|
220
|
-
llm_exception.__cause__ = e
|
|
221
|
-
else:
|
|
222
|
-
llm_exception = e
|
|
223
|
-
|
|
224
|
-
error_message = f"Call to {model_name} failed even with backoff: {e.__class__.__name__}."
|
|
225
|
-
|
|
226
|
-
if not isinstance(e, RateLimitException):
|
|
227
|
-
error_message += f" Failure traceback:\n{traceback.format_exc()}"
|
|
228
|
-
logger.error(error_message)
|
|
229
188
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
189
|
+
if retry_with_feedback:
|
|
190
|
+
# Build a new message list with the failed output and
|
|
191
|
+
# error feedback so the model can correct itself
|
|
192
|
+
current_messages = [
|
|
193
|
+
*messages,
|
|
194
|
+
AssistantMessage(content=e.failed_output or ""),
|
|
195
|
+
UserMessage(
|
|
196
|
+
content=f"Your previous output failed validation: {e}\n\nPlease try again with a corrected output."
|
|
197
|
+
),
|
|
198
|
+
]
|
|
199
|
+
except DocentUsageLimitException as _:
|
|
200
|
+
result = LLMOutput(
|
|
201
|
+
model=model_name,
|
|
202
|
+
completions=[],
|
|
203
|
+
errors=[], # Usage limit exceptions will be added to all results later if cancelled_due_to_usage_limit
|
|
204
|
+
)
|
|
205
|
+
cancelled_due_to_usage_limit = True
|
|
206
|
+
tg.cancel_scope.cancel()
|
|
207
|
+
break
|
|
208
|
+
except asyncio.TimeoutError as e:
|
|
209
|
+
timeout_exception = TimeoutException(str(e) or "Request timed out")
|
|
210
|
+
timeout_exception.__cause__ = e
|
|
211
|
+
logger.error("Call to %s timed out", model_name)
|
|
212
|
+
result = LLMOutput(
|
|
213
|
+
model=model_name,
|
|
214
|
+
completions=[],
|
|
215
|
+
errors=[timeout_exception],
|
|
216
|
+
)
|
|
217
|
+
break
|
|
218
|
+
except Exception as e:
|
|
219
|
+
if not isinstance(e, LLMException):
|
|
220
|
+
logger.error(
|
|
221
|
+
"LLM call raised an exception that is not an LLMException: %s. Failure traceback:\n%s",
|
|
222
|
+
e,
|
|
223
|
+
traceback.format_exc(),
|
|
234
224
|
)
|
|
235
|
-
|
|
225
|
+
llm_exception = LLMException(e)
|
|
226
|
+
llm_exception.__cause__ = e
|
|
227
|
+
else:
|
|
228
|
+
llm_exception = e
|
|
229
|
+
|
|
230
|
+
error_message = (
|
|
231
|
+
f"Call to {model_name} failed even with backoff: {e.__class__.__name__}."
|
|
232
|
+
)
|
|
236
233
|
|
|
237
|
-
|
|
238
|
-
|
|
234
|
+
if not isinstance(e, RateLimitException):
|
|
235
|
+
error_message += f" Failure traceback:\n{traceback.format_exc()}"
|
|
236
|
+
logger.error(error_message)
|
|
237
|
+
|
|
238
|
+
result = LLMOutput(
|
|
239
|
+
model=model_name,
|
|
240
|
+
completions=[],
|
|
241
|
+
errors=[llm_exception],
|
|
242
|
+
)
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if result is not None:
|
|
239
246
|
result.duration = time.perf_counter() - call_started_at
|
|
240
247
|
|
|
241
248
|
# Always call completion callback with final result (success or error)
|
|
@@ -244,44 +251,14 @@ async def _parallelize_calls(
|
|
|
244
251
|
await completion_callback(i, result)
|
|
245
252
|
# LLMService uses this callback to record cost, and may throw an error if we just exceeded limit
|
|
246
253
|
except DocentUsageLimitException as e:
|
|
247
|
-
result.errors
|
|
254
|
+
if not result.completions and not result.errors:
|
|
255
|
+
result.errors.append(e)
|
|
248
256
|
cancelled_due_to_usage_limit = True
|
|
249
257
|
tg.cancel_scope.cancel()
|
|
250
258
|
|
|
251
259
|
responses[i] = result
|
|
252
260
|
if pbar is not None:
|
|
253
261
|
pbar.update(1)
|
|
254
|
-
if pbar is None or pbar.n == pbar.total:
|
|
255
|
-
tg.cancel_scope.cancel()
|
|
256
|
-
|
|
257
|
-
def _cache_responses():
|
|
258
|
-
nonlocal responses, cache
|
|
259
|
-
|
|
260
|
-
if cache is not None:
|
|
261
|
-
indices = [
|
|
262
|
-
i
|
|
263
|
-
for i, response in enumerate(responses)
|
|
264
|
-
if resolved_messages[i] is not None
|
|
265
|
-
and response is not None
|
|
266
|
-
and not response.did_error
|
|
267
|
-
]
|
|
268
|
-
cache.set_batch(
|
|
269
|
-
# We already checked that each index has a resolved messages list
|
|
270
|
-
[cast(list[ChatMessage], resolved_messages[i]) for i in indices],
|
|
271
|
-
model_name,
|
|
272
|
-
# We already checked that each index corresponds to an LLMOutput object
|
|
273
|
-
[cast(LLMOutput, responses[i]) for i in indices],
|
|
274
|
-
tools=tools,
|
|
275
|
-
tool_choice=tool_choice,
|
|
276
|
-
reasoning_effort=reasoning_effort,
|
|
277
|
-
temperature=temperature,
|
|
278
|
-
logprobs=logprobs,
|
|
279
|
-
top_logprobs=top_logprobs,
|
|
280
|
-
response_format=response_format,
|
|
281
|
-
)
|
|
282
|
-
return len(indices)
|
|
283
|
-
else:
|
|
284
|
-
return 0
|
|
285
262
|
|
|
286
263
|
# Get all results concurrently
|
|
287
264
|
try:
|
|
@@ -290,30 +267,14 @@ async def _parallelize_calls(
|
|
|
290
267
|
for i, cur_input in enumerate(inputs):
|
|
291
268
|
tg.start_soon(_limited_task, i, cur_input, tg)
|
|
292
269
|
|
|
293
|
-
# Cache what we have so far if something got cancelled
|
|
294
270
|
except anyio.get_cancelled_exc_class():
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
logger.info(
|
|
298
|
-
f"Cancelled {len(inputs) - num_cached} unfinished LLM API calls, but cached {num_cached} completed responses"
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
# If the task was cancelled due to usage limit, set the response to a usage limit exception
|
|
302
|
-
if cancelled_due_to_usage_limit:
|
|
303
|
-
for i, response in enumerate(responses):
|
|
304
|
-
if response is None:
|
|
305
|
-
responses[i] = LLMOutput(
|
|
306
|
-
model=model_name,
|
|
307
|
-
completions=[],
|
|
308
|
-
errors=[DocentUsageLimitException()],
|
|
309
|
-
)
|
|
310
|
-
else:
|
|
311
|
-
response.errors.append(DocentUsageLimitException())
|
|
271
|
+
if not cancelled_due_to_usage_limit:
|
|
272
|
+
raise
|
|
312
273
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
274
|
+
# If we stopped the batch due to usage limits, make sure every input has a
|
|
275
|
+
# structured result instead of relying on AnyIO's cancellation propagation.
|
|
276
|
+
if cancelled_due_to_usage_limit:
|
|
277
|
+
_mark_usage_limit_responses()
|
|
317
278
|
|
|
318
279
|
# At this point, all indices should have a result
|
|
319
280
|
assert all(isinstance(r, LLMOutput) for r in responses), (
|
|
@@ -357,9 +318,9 @@ class BaseLLMService:
|
|
|
357
318
|
streaming_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
358
319
|
validation_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
359
320
|
completion_callback: AsyncLLMOutputStreamingCallback | None = None,
|
|
360
|
-
use_cache: bool = False,
|
|
361
321
|
response_format: ResponseFormat | None = None,
|
|
362
322
|
max_retries: int = 1,
|
|
323
|
+
retry_with_feedback: bool = False,
|
|
363
324
|
_api_key_overrides: dict[str, str] = dict(),
|
|
364
325
|
) -> list[LLMOutput]:
|
|
365
326
|
"""Request completions from a configured LLM provider."""
|
|
@@ -375,14 +336,6 @@ class BaseLLMService:
|
|
|
375
336
|
f"Logprobs are not supported for Anthropic, so we can't use model {model_option.model_name}"
|
|
376
337
|
)
|
|
377
338
|
|
|
378
|
-
# Instantiate cache
|
|
379
|
-
# TODO(mengk): make this more robust, possibly move to a NoSQL database or something
|
|
380
|
-
try:
|
|
381
|
-
cache = LLMCache() if use_cache else None
|
|
382
|
-
except ValueError as e:
|
|
383
|
-
logger.warning(f"Disabling LLM cache due to init error: {e}")
|
|
384
|
-
cache = None
|
|
385
|
-
|
|
386
339
|
# Initialize pointer to which model we're using; used for model rotation after failures
|
|
387
340
|
current_model_option_index = 0
|
|
388
341
|
|
|
@@ -395,7 +348,7 @@ class BaseLLMService:
|
|
|
395
348
|
return None
|
|
396
349
|
|
|
397
350
|
new_model_option = model_options[current_model_option_index]
|
|
398
|
-
logger.warning(
|
|
351
|
+
logger.warning("Switched to next model %s", new_model_option.model_name)
|
|
399
352
|
return new_model_option
|
|
400
353
|
|
|
401
354
|
while True:
|
|
@@ -413,7 +366,7 @@ class BaseLLMService:
|
|
|
413
366
|
single_output_getter = PROVIDERS[provider]["single_output_getter"]
|
|
414
367
|
single_streaming_output_getter = PROVIDERS[provider]["single_streaming_output_getter"]
|
|
415
368
|
|
|
416
|
-
# Get completions for
|
|
369
|
+
# Get completions for messages.
|
|
417
370
|
outputs: list[LLMOutput] = await _parallelize_calls(
|
|
418
371
|
(
|
|
419
372
|
single_output_getter
|
|
@@ -436,11 +389,18 @@ class BaseLLMService:
|
|
|
436
389
|
timeout=timeout,
|
|
437
390
|
semaphore=self._semaphore,
|
|
438
391
|
max_retries=max_retries,
|
|
439
|
-
cache=cache,
|
|
440
392
|
response_format=response_format,
|
|
393
|
+
retry_with_feedback=retry_with_feedback,
|
|
441
394
|
)
|
|
442
395
|
assert len(outputs) == len(inputs), "Number of outputs must match number of messages"
|
|
443
396
|
|
|
397
|
+
if any(
|
|
398
|
+
isinstance(e, DocentUsageLimitException)
|
|
399
|
+
for output in outputs
|
|
400
|
+
for e in output.errors
|
|
401
|
+
):
|
|
402
|
+
break
|
|
403
|
+
|
|
444
404
|
# Only count errors that should trigger model rotation (API errors, not validation/usage errors)
|
|
445
405
|
num_rotation_errors = sum(
|
|
446
406
|
1
|
|
@@ -452,7 +412,7 @@ class BaseLLMService:
|
|
|
452
412
|
)
|
|
453
413
|
)
|
|
454
414
|
if num_rotation_errors > 0:
|
|
455
|
-
logger.warning(
|
|
415
|
+
logger.warning("%s: %s API errors", model_name, num_rotation_errors)
|
|
456
416
|
if not _rotate_model_option():
|
|
457
417
|
break
|
|
458
418
|
else:
|
|
@@ -183,7 +183,7 @@ def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
|
183
183
|
def get_context_window(model_name: str) -> int:
|
|
184
184
|
info = get_model_info(model_name)
|
|
185
185
|
if info is None:
|
|
186
|
-
logger.warning(
|
|
186
|
+
logger.warning("No context window found for model %s", model_name)
|
|
187
187
|
return 100_000
|
|
188
188
|
return info.context_window
|
|
189
189
|
|
|
@@ -196,11 +196,11 @@ def get_rates_for_model_name(model_name: str) -> Optional[ModelRate]:
|
|
|
196
196
|
def estimate_cost_cents(model_name: str, token_count: int, token_type: TokenType) -> float:
|
|
197
197
|
rate = get_rates_for_model_name(model_name)
|
|
198
198
|
if rate is None:
|
|
199
|
-
logger.warning(
|
|
199
|
+
logger.warning("No rate found for model %s", model_name)
|
|
200
200
|
return 0.0
|
|
201
201
|
usd_per_mtok = rate.get(token_type)
|
|
202
202
|
if usd_per_mtok is None:
|
|
203
|
-
logger.warning(
|
|
203
|
+
logger.warning("No rate found for model %s token type %s", model_name, token_type)
|
|
204
204
|
return 0.0
|
|
205
205
|
cents_per_token = usd_per_mtok * 100 / 1_000_000.0
|
|
206
206
|
return token_count * cents_per_token
|
|
@@ -41,6 +41,7 @@ from docent._llm_util.data_models.exceptions import (
|
|
|
41
41
|
CompletionTooLongException,
|
|
42
42
|
ContextWindowException,
|
|
43
43
|
NoResponseException,
|
|
44
|
+
ProviderAuthenticationException,
|
|
44
45
|
RateLimitException,
|
|
45
46
|
)
|
|
46
47
|
from docent._llm_util.data_models.llm_output import (
|
|
@@ -78,7 +79,9 @@ ANTHROPIC_STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"
|
|
|
78
79
|
|
|
79
80
|
def _print_backoff_message(e: Details):
|
|
80
81
|
logger.warning(
|
|
81
|
-
|
|
82
|
+
"Anthropic backing off for %.2fs due to %s",
|
|
83
|
+
e["wait"], # type: ignore
|
|
84
|
+
e["exception"].__class__.__name__, # type: ignore
|
|
82
85
|
)
|
|
83
86
|
|
|
84
87
|
|
|
@@ -86,6 +89,7 @@ def _is_retryable_error(e: BaseException) -> bool:
|
|
|
86
89
|
if (
|
|
87
90
|
isinstance(e, BadRequestError)
|
|
88
91
|
or isinstance(e, ContextWindowException)
|
|
92
|
+
or isinstance(e, ProviderAuthenticationException)
|
|
89
93
|
or isinstance(e, AuthenticationError)
|
|
90
94
|
or isinstance(e, NotImplementedError)
|
|
91
95
|
or isinstance(e, PermissionDeniedError)
|
|
@@ -209,6 +213,8 @@ def _build_output_format(response_format: ResponseFormat | None) -> dict[str, An
|
|
|
209
213
|
|
|
210
214
|
|
|
211
215
|
def _convert_anthropic_error(e: Exception):
|
|
216
|
+
if isinstance(e, (AuthenticationError, PermissionDeniedError)):
|
|
217
|
+
return ProviderAuthenticationException(e.message)
|
|
212
218
|
if isinstance(e, BadRequestError):
|
|
213
219
|
if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
|
|
214
220
|
return ContextWindowException()
|
|
@@ -285,7 +291,7 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
285
291
|
if llm_output_partial:
|
|
286
292
|
return finalize_llm_output_partial(llm_output_partial)
|
|
287
293
|
return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
|
|
288
|
-
except (RateLimitError, BadRequestError) as e:
|
|
294
|
+
except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
|
|
289
295
|
if e2 := _convert_anthropic_error(e):
|
|
290
296
|
raise e2 from e
|
|
291
297
|
raise
|
|
@@ -365,7 +371,7 @@ def update_llm_output(
|
|
|
365
371
|
):
|
|
366
372
|
# This should not happen with a well-behaved API, log and skip
|
|
367
373
|
logger.warning(
|
|
368
|
-
|
|
374
|
+
"Received InputJSONDelta before start event at index %s, skipping", index
|
|
369
375
|
)
|
|
370
376
|
else:
|
|
371
377
|
cur_tool_calls[index] = ToolCallPartial(
|
|
@@ -482,7 +488,7 @@ async def get_anthropic_chat_completion_async(
|
|
|
482
488
|
)
|
|
483
489
|
|
|
484
490
|
return output
|
|
485
|
-
except (RateLimitError, BadRequestError) as e:
|
|
491
|
+
except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
|
|
486
492
|
if e2 := _convert_anthropic_error(e):
|
|
487
493
|
raise e2 from e
|
|
488
494
|
raise
|