docent-python 0.1.62a0__tar.gz → 0.1.63a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/PKG-INFO +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/__init__.py +2 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/exceptions.py +18 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/llm_output.py +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/llm_svc.py +6 -4
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/model_registry.py +3 -3
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/anthropic.py +10 -4
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/google.py +47 -31
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openai.py +38 -7
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openrouter.py +3 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/agent_run.py +5 -3
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/citation.py +44 -7
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/reading.py +39 -29
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/regex.py +2 -2
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/transcript.py +8 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/types.py +2 -2
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/loaders/load_inspect.py +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_base.py +9 -4
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_client_util.py +7 -6
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_collections.py +16 -12
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_dql.py +8 -6
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_readings.py +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_results.py +3 -3
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_rubrics.py +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_sharing.py +1 -1
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/agent_run_writer.py +9 -4
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/inspect.py +8 -6
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/llm_context.py +39 -16
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/trace.py +46 -41
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/pyproject.toml +1 -1
- docent_python-0.1.62a0/docent/trace_temp.py +0 -1088
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/.gitignore +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/LICENSE.md +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/README.md +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/preference_types.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/context_config.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/feedback.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/report.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/server.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/py.typed +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_agent_runs.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_feedback.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_labels.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_reports.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/client.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/__init__.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/harbor.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/nemogym.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/util.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/reading.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/util.py +0 -0
- {docent_python-0.1.62a0 → docent_python-0.1.63a0}/uv.lock +0 -0
|
@@ -4,6 +4,7 @@ __all__ = [
|
|
|
4
4
|
"load_config_file",
|
|
5
5
|
"AgentRunRef",
|
|
6
6
|
"TranscriptRef",
|
|
7
|
+
"TranscriptSliceRef",
|
|
7
8
|
"ReadingResultRef",
|
|
8
9
|
"ResultRef",
|
|
9
10
|
"Prompt",
|
|
@@ -17,4 +18,5 @@ from docent.sdk.llm_context import (
|
|
|
17
18
|
ReadingResultRef,
|
|
18
19
|
ResultRef,
|
|
19
20
|
TranscriptRef,
|
|
21
|
+
TranscriptSliceRef,
|
|
20
22
|
)
|
{docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/exceptions.py
RENAMED
|
@@ -35,6 +35,11 @@ class ContextWindowException(LLMException):
|
|
|
35
35
|
user_message = "Context window exceeded."
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
class InvalidPromptException(LLMException):
|
|
39
|
+
error_type_id = "invalid_prompt"
|
|
40
|
+
user_message = "The model provider rejected this prompt for safety reasons."
|
|
41
|
+
|
|
42
|
+
|
|
38
43
|
class NoResponseException(LLMException):
|
|
39
44
|
error_type_id = "no_response"
|
|
40
45
|
user_message = "The model returned an empty response. Please try again later."
|
|
@@ -45,6 +50,17 @@ class DocentUsageLimitException(LLMException):
|
|
|
45
50
|
user_message = "Free daily usage limit reached. Add your own API key in settings or contact us for increased limits."
|
|
46
51
|
|
|
47
52
|
|
|
53
|
+
class ProviderAuthenticationException(LLMException):
|
|
54
|
+
error_type_id = "provider_authentication"
|
|
55
|
+
|
|
56
|
+
def __init__(self, message: str = ""):
|
|
57
|
+
super().__init__(message)
|
|
58
|
+
self.user_message = (
|
|
59
|
+
"The model provider API key could not be authenticated. "
|
|
60
|
+
"If you added your own key, update it in Settings > Model providers."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
48
64
|
class ValidationFailedException(LLMException):
|
|
49
65
|
error_type_id = "validation_failed"
|
|
50
66
|
user_message = "The model returned invalid output that failed validation."
|
|
@@ -64,8 +80,10 @@ LLM_ERROR_TYPES: list[type[LLMException]] = [
|
|
|
64
80
|
CompletionTooLongException,
|
|
65
81
|
RateLimitException,
|
|
66
82
|
ContextWindowException,
|
|
83
|
+
InvalidPromptException,
|
|
67
84
|
NoResponseException,
|
|
68
85
|
DocentUsageLimitException,
|
|
86
|
+
ProviderAuthenticationException,
|
|
69
87
|
ValidationFailedException,
|
|
70
88
|
TimeoutException,
|
|
71
89
|
]
|
{docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/llm_output.py
RENAMED
|
@@ -154,7 +154,7 @@ class LLMOutput:
|
|
|
154
154
|
]
|
|
155
155
|
errors_to_log = [e for e in errors if e not in error_types_to_not_log]
|
|
156
156
|
if errors_to_log:
|
|
157
|
-
logger.error(
|
|
157
|
+
logger.error("Loading LLM output with errors: %s", errors)
|
|
158
158
|
errors = [error_type_map.get(e, LLMException)() for e in errors]
|
|
159
159
|
|
|
160
160
|
completions = data.get("completions", [])
|
|
@@ -208,7 +208,7 @@ async def _parallelize_calls(
|
|
|
208
208
|
except asyncio.TimeoutError as e:
|
|
209
209
|
timeout_exception = TimeoutException(str(e) or "Request timed out")
|
|
210
210
|
timeout_exception.__cause__ = e
|
|
211
|
-
logger.error(
|
|
211
|
+
logger.error("Call to %s timed out", model_name)
|
|
212
212
|
result = LLMOutput(
|
|
213
213
|
model=model_name,
|
|
214
214
|
completions=[],
|
|
@@ -218,7 +218,9 @@ async def _parallelize_calls(
|
|
|
218
218
|
except Exception as e:
|
|
219
219
|
if not isinstance(e, LLMException):
|
|
220
220
|
logger.error(
|
|
221
|
-
|
|
221
|
+
"LLM call raised an exception that is not an LLMException: %s. Failure traceback:\n%s",
|
|
222
|
+
e,
|
|
223
|
+
traceback.format_exc(),
|
|
222
224
|
)
|
|
223
225
|
llm_exception = LLMException(e)
|
|
224
226
|
llm_exception.__cause__ = e
|
|
@@ -346,7 +348,7 @@ class BaseLLMService:
|
|
|
346
348
|
return None
|
|
347
349
|
|
|
348
350
|
new_model_option = model_options[current_model_option_index]
|
|
349
|
-
logger.warning(
|
|
351
|
+
logger.warning("Switched to next model %s", new_model_option.model_name)
|
|
350
352
|
return new_model_option
|
|
351
353
|
|
|
352
354
|
while True:
|
|
@@ -410,7 +412,7 @@ class BaseLLMService:
|
|
|
410
412
|
)
|
|
411
413
|
)
|
|
412
414
|
if num_rotation_errors > 0:
|
|
413
|
-
logger.warning(
|
|
415
|
+
logger.warning("%s: %s API errors", model_name, num_rotation_errors)
|
|
414
416
|
if not _rotate_model_option():
|
|
415
417
|
break
|
|
416
418
|
else:
|
|
@@ -183,7 +183,7 @@ def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
|
183
183
|
def get_context_window(model_name: str) -> int:
|
|
184
184
|
info = get_model_info(model_name)
|
|
185
185
|
if info is None:
|
|
186
|
-
logger.warning(
|
|
186
|
+
logger.warning("No context window found for model %s", model_name)
|
|
187
187
|
return 100_000
|
|
188
188
|
return info.context_window
|
|
189
189
|
|
|
@@ -196,11 +196,11 @@ def get_rates_for_model_name(model_name: str) -> Optional[ModelRate]:
|
|
|
196
196
|
def estimate_cost_cents(model_name: str, token_count: int, token_type: TokenType) -> float:
|
|
197
197
|
rate = get_rates_for_model_name(model_name)
|
|
198
198
|
if rate is None:
|
|
199
|
-
logger.warning(
|
|
199
|
+
logger.warning("No rate found for model %s", model_name)
|
|
200
200
|
return 0.0
|
|
201
201
|
usd_per_mtok = rate.get(token_type)
|
|
202
202
|
if usd_per_mtok is None:
|
|
203
|
-
logger.warning(
|
|
203
|
+
logger.warning("No rate found for model %s token type %s", model_name, token_type)
|
|
204
204
|
return 0.0
|
|
205
205
|
cents_per_token = usd_per_mtok * 100 / 1_000_000.0
|
|
206
206
|
return token_count * cents_per_token
|
|
@@ -41,6 +41,7 @@ from docent._llm_util.data_models.exceptions import (
|
|
|
41
41
|
CompletionTooLongException,
|
|
42
42
|
ContextWindowException,
|
|
43
43
|
NoResponseException,
|
|
44
|
+
ProviderAuthenticationException,
|
|
44
45
|
RateLimitException,
|
|
45
46
|
)
|
|
46
47
|
from docent._llm_util.data_models.llm_output import (
|
|
@@ -78,7 +79,9 @@ ANTHROPIC_STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"
|
|
|
78
79
|
|
|
79
80
|
def _print_backoff_message(e: Details):
|
|
80
81
|
logger.warning(
|
|
81
|
-
|
|
82
|
+
"Anthropic backing off for %.2fs due to %s",
|
|
83
|
+
e["wait"], # type: ignore
|
|
84
|
+
e["exception"].__class__.__name__, # type: ignore
|
|
82
85
|
)
|
|
83
86
|
|
|
84
87
|
|
|
@@ -86,6 +89,7 @@ def _is_retryable_error(e: BaseException) -> bool:
|
|
|
86
89
|
if (
|
|
87
90
|
isinstance(e, BadRequestError)
|
|
88
91
|
or isinstance(e, ContextWindowException)
|
|
92
|
+
or isinstance(e, ProviderAuthenticationException)
|
|
89
93
|
or isinstance(e, AuthenticationError)
|
|
90
94
|
or isinstance(e, NotImplementedError)
|
|
91
95
|
or isinstance(e, PermissionDeniedError)
|
|
@@ -209,6 +213,8 @@ def _build_output_format(response_format: ResponseFormat | None) -> dict[str, An
|
|
|
209
213
|
|
|
210
214
|
|
|
211
215
|
def _convert_anthropic_error(e: Exception):
|
|
216
|
+
if isinstance(e, (AuthenticationError, PermissionDeniedError)):
|
|
217
|
+
return ProviderAuthenticationException(e.message)
|
|
212
218
|
if isinstance(e, BadRequestError):
|
|
213
219
|
if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
|
|
214
220
|
return ContextWindowException()
|
|
@@ -285,7 +291,7 @@ async def get_anthropic_chat_completion_streaming_async(
|
|
|
285
291
|
if llm_output_partial:
|
|
286
292
|
return finalize_llm_output_partial(llm_output_partial)
|
|
287
293
|
return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
|
|
288
|
-
except (RateLimitError, BadRequestError) as e:
|
|
294
|
+
except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
|
|
289
295
|
if e2 := _convert_anthropic_error(e):
|
|
290
296
|
raise e2 from e
|
|
291
297
|
raise
|
|
@@ -365,7 +371,7 @@ def update_llm_output(
|
|
|
365
371
|
):
|
|
366
372
|
# This should not happen with a well-behaved API, log and skip
|
|
367
373
|
logger.warning(
|
|
368
|
-
|
|
374
|
+
"Received InputJSONDelta before start event at index %s, skipping", index
|
|
369
375
|
)
|
|
370
376
|
else:
|
|
371
377
|
cur_tool_calls[index] = ToolCallPartial(
|
|
@@ -482,7 +488,7 @@ async def get_anthropic_chat_completion_async(
|
|
|
482
488
|
)
|
|
483
489
|
|
|
484
490
|
return output
|
|
485
|
-
except (RateLimitError, BadRequestError) as e:
|
|
491
|
+
except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
|
|
486
492
|
if e2 := _convert_anthropic_error(e):
|
|
487
493
|
raise e2 from e
|
|
488
494
|
raise
|
|
@@ -11,6 +11,7 @@ from docent._llm_util.data_models.exceptions import (
|
|
|
11
11
|
CompletionTooLongException,
|
|
12
12
|
ContextWindowException,
|
|
13
13
|
NoResponseException,
|
|
14
|
+
ProviderAuthenticationException,
|
|
14
15
|
RateLimitException,
|
|
15
16
|
)
|
|
16
17
|
from docent._llm_util.data_models.llm_output import (
|
|
@@ -41,7 +42,9 @@ logger = get_logger(__name__)
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def _convert_google_error(e: errors.APIError):
|
|
44
|
-
if e.code in [
|
|
45
|
+
if e.code in [401, 403]:
|
|
46
|
+
return ProviderAuthenticationException(str(e))
|
|
47
|
+
elif e.code in [429, 502, 503, 504]:
|
|
45
48
|
return RateLimitException(e)
|
|
46
49
|
elif e.code == 400 and "maximum number of tokens" in str(e).lower():
|
|
47
50
|
return ContextWindowException()
|
|
@@ -50,12 +53,18 @@ def _convert_google_error(e: errors.APIError):
|
|
|
50
53
|
|
|
51
54
|
def _print_backoff_message(e: Any):
|
|
52
55
|
logger.warning(
|
|
53
|
-
|
|
56
|
+
"Google backing off for %.2fs due to %s",
|
|
57
|
+
e["wait"], # type: ignore
|
|
58
|
+
e["exception"].__class__.__name__, # type: ignore
|
|
54
59
|
)
|
|
55
60
|
|
|
56
61
|
|
|
57
62
|
def _is_retryable_error(exception: BaseException) -> bool:
|
|
58
63
|
"""Checks if the exception is a retryable error based on the criteria."""
|
|
64
|
+
if isinstance(exception, RateLimitException):
|
|
65
|
+
return True
|
|
66
|
+
if isinstance(exception, (ContextWindowException, CompletionTooLongException)):
|
|
67
|
+
return False
|
|
59
68
|
if isinstance(exception, errors.APIError):
|
|
60
69
|
return exception.code in [429, 500, 502, 503, 504]
|
|
61
70
|
if isinstance(exception, requests.exceptions.ConnectionError):
|
|
@@ -112,39 +121,46 @@ async def get_google_chat_completion_async(
|
|
|
112
121
|
model_name=model_name,
|
|
113
122
|
)
|
|
114
123
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
124
|
+
try:
|
|
125
|
+
async with async_timeout_ctx(timeout):
|
|
126
|
+
thinking_cfg = None
|
|
127
|
+
if reasoning_effort:
|
|
128
|
+
thinking_cfg = types.ThinkingConfig(
|
|
129
|
+
include_thoughts=True,
|
|
130
|
+
thinking_budget=reasoning_budget(max_new_tokens, reasoning_effort),
|
|
131
|
+
)
|
|
122
132
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
133
|
+
raw_output = await client.models.generate_content( # type: ignore
|
|
134
|
+
model=model_name,
|
|
135
|
+
contents=input_messages, # type: ignore
|
|
136
|
+
config=types.GenerateContentConfig(
|
|
137
|
+
temperature=temperature,
|
|
138
|
+
thinking_config=thinking_cfg,
|
|
139
|
+
max_output_tokens=max_new_tokens,
|
|
140
|
+
system_instruction=system,
|
|
141
|
+
tools=cast(Any, _parse_tools(tools)) if tools else None,
|
|
142
|
+
tool_config=(
|
|
143
|
+
types.ToolConfig(
|
|
144
|
+
function_calling_config=_parse_tool_choice(tool_choice)
|
|
145
|
+
)
|
|
146
|
+
if tool_choice is not None
|
|
147
|
+
else None
|
|
148
|
+
),
|
|
149
|
+
**response_format_config,
|
|
136
150
|
),
|
|
137
|
-
**response_format_config,
|
|
138
|
-
),
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
output = _parse_google_completion(raw_output, model_name)
|
|
142
|
-
if output.first and output.first.finish_reason == "length" and output.first.no_text:
|
|
143
|
-
raise CompletionTooLongException(
|
|
144
|
-
f"Completion empty due to truncation. Consider increasing max_new_tokens (currently {max_new_tokens})."
|
|
145
151
|
)
|
|
146
152
|
|
|
147
|
-
|
|
153
|
+
output = _parse_google_completion(raw_output, model_name)
|
|
154
|
+
if output.first and output.first.finish_reason == "length" and output.first.no_text:
|
|
155
|
+
raise CompletionTooLongException(
|
|
156
|
+
f"Completion empty due to truncation. Consider increasing max_new_tokens (currently {max_new_tokens})."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return output
|
|
160
|
+
except errors.APIError as e:
|
|
161
|
+
if e2 := _convert_google_error(e):
|
|
162
|
+
raise e2 from e
|
|
163
|
+
raise
|
|
148
164
|
|
|
149
165
|
return await retry_async(
|
|
150
166
|
_call,
|
|
@@ -9,6 +9,7 @@ from backoff.types import Details
|
|
|
9
9
|
# all errors: https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types
|
|
10
10
|
from openai import (
|
|
11
11
|
APIConnectionError,
|
|
12
|
+
APITimeoutError,
|
|
12
13
|
AsyncAzureOpenAI,
|
|
13
14
|
AsyncOpenAI,
|
|
14
15
|
AuthenticationError,
|
|
@@ -48,8 +49,11 @@ from openai.types.shared_params.response_format_json_schema import (
|
|
|
48
49
|
from docent._llm_util.data_models.exceptions import (
|
|
49
50
|
CompletionTooLongException,
|
|
50
51
|
ContextWindowException,
|
|
52
|
+
InvalidPromptException,
|
|
51
53
|
NoResponseException,
|
|
54
|
+
ProviderAuthenticationException,
|
|
52
55
|
RateLimitException,
|
|
56
|
+
TimeoutException,
|
|
53
57
|
)
|
|
54
58
|
from docent._llm_util.data_models.llm_output import (
|
|
55
59
|
AsyncEmbeddingStreamingCallback,
|
|
@@ -83,7 +87,9 @@ MAX_EMBEDDING_TOKENS = 8000
|
|
|
83
87
|
|
|
84
88
|
def _print_backoff_message(e: Details):
|
|
85
89
|
logger.warning(
|
|
86
|
-
|
|
90
|
+
"OpenAI backing off for %.2fs due to %s",
|
|
91
|
+
e["wait"], # type: ignore
|
|
92
|
+
e["exception"].__class__.__name__, # type: ignore
|
|
87
93
|
)
|
|
88
94
|
|
|
89
95
|
|
|
@@ -91,6 +97,8 @@ def _is_retryable_error(e: BaseException) -> bool:
|
|
|
91
97
|
if (
|
|
92
98
|
isinstance(e, BadRequestError)
|
|
93
99
|
or isinstance(e, ContextWindowException)
|
|
100
|
+
or isinstance(e, InvalidPromptException)
|
|
101
|
+
or isinstance(e, ProviderAuthenticationException)
|
|
94
102
|
or isinstance(e, AuthenticationError)
|
|
95
103
|
or isinstance(e, PermissionDeniedError)
|
|
96
104
|
or isinstance(e, NotFoundError)
|
|
@@ -281,7 +289,13 @@ async def get_openai_chat_completion_streaming_async(
|
|
|
281
289
|
if llm_output_partial:
|
|
282
290
|
return finalize_llm_output_partial(llm_output_partial)
|
|
283
291
|
return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
|
|
284
|
-
except (
|
|
292
|
+
except (
|
|
293
|
+
APITimeoutError,
|
|
294
|
+
RateLimitError,
|
|
295
|
+
BadRequestError,
|
|
296
|
+
AuthenticationError,
|
|
297
|
+
PermissionDeniedError,
|
|
298
|
+
) as e:
|
|
285
299
|
if e2 := _convert_openai_error(e):
|
|
286
300
|
raise e2 from e
|
|
287
301
|
raise
|
|
@@ -296,10 +310,19 @@ async def get_openai_chat_completion_streaming_async(
|
|
|
296
310
|
|
|
297
311
|
|
|
298
312
|
def _convert_openai_error(e: Exception):
|
|
299
|
-
if isinstance(e,
|
|
313
|
+
if isinstance(e, (AuthenticationError, PermissionDeniedError)):
|
|
314
|
+
return ProviderAuthenticationException(e.message)
|
|
315
|
+
elif isinstance(e, RateLimitError):
|
|
300
316
|
return RateLimitException(e)
|
|
301
|
-
elif isinstance(e,
|
|
317
|
+
elif isinstance(e, APITimeoutError):
|
|
318
|
+
return TimeoutException(str(e) or "Request timed out")
|
|
319
|
+
elif isinstance(e, BadRequestError) and e.code in (
|
|
320
|
+
"context_length_exceeded",
|
|
321
|
+
"string_above_max_length",
|
|
322
|
+
):
|
|
302
323
|
return ContextWindowException()
|
|
324
|
+
elif isinstance(e, BadRequestError) and e.code == "invalid_prompt":
|
|
325
|
+
return InvalidPromptException()
|
|
303
326
|
return None
|
|
304
327
|
|
|
305
328
|
|
|
@@ -473,7 +496,13 @@ async def get_openai_chat_completion_async(
|
|
|
473
496
|
)
|
|
474
497
|
|
|
475
498
|
return output
|
|
476
|
-
except (
|
|
499
|
+
except (
|
|
500
|
+
APITimeoutError,
|
|
501
|
+
RateLimitError,
|
|
502
|
+
BadRequestError,
|
|
503
|
+
AuthenticationError,
|
|
504
|
+
PermissionDeniedError,
|
|
505
|
+
) as e:
|
|
477
506
|
if e2 := _convert_openai_error(e):
|
|
478
507
|
raise e2 from e
|
|
479
508
|
raise
|
|
@@ -549,8 +578,10 @@ async def _get_openai_embeddings_async_one_batch(
|
|
|
549
578
|
dimensions=dimensions if dimensions is not None else omit,
|
|
550
579
|
)
|
|
551
580
|
return [data.embedding for data in response.data]
|
|
552
|
-
except RateLimitError as e:
|
|
553
|
-
|
|
581
|
+
except (RateLimitError, AuthenticationError, PermissionDeniedError) as e:
|
|
582
|
+
if e2 := _convert_openai_error(e):
|
|
583
|
+
raise e2 from e
|
|
584
|
+
raise
|
|
554
585
|
|
|
555
586
|
|
|
556
587
|
async def get_chunked_openai_embeddings_async(
|
|
@@ -6,7 +6,9 @@ from typing import Literal, cast
|
|
|
6
6
|
|
|
7
7
|
from openai import AsyncOpenAI, AuthenticationError, BadRequestError
|
|
8
8
|
|
|
9
|
-
from docent._llm_util.data_models.exceptions import
|
|
9
|
+
from docent._llm_util.data_models.exceptions import (
|
|
10
|
+
ContextWindowException,
|
|
11
|
+
)
|
|
10
12
|
from docent._llm_util.data_models.llm_output import (
|
|
11
13
|
AsyncSingleLLMOutputStreamingCallback,
|
|
12
14
|
LLMOutput,
|
|
@@ -252,7 +252,9 @@ class AgentRunTree(BaseModel):
|
|
|
252
252
|
# This should never happen, but check anyways for safety; fallback to global root
|
|
253
253
|
if par_id not in nodes:
|
|
254
254
|
logger.error(
|
|
255
|
-
|
|
255
|
+
"Parent %s not found for transcript %s. Assigning to global root as a fallback",
|
|
256
|
+
par_id,
|
|
257
|
+
t_id,
|
|
256
258
|
)
|
|
257
259
|
par_id = GLOBAL_ROOT_ID
|
|
258
260
|
nodes[par_id].children_ids.append(t_id)
|
|
@@ -264,13 +266,13 @@ class AgentRunTree(BaseModel):
|
|
|
264
266
|
if obj_type == NodeType.TRANSCRIPT_GROUP:
|
|
265
267
|
# This should never happen, but check anyways for safety
|
|
266
268
|
if obj_id not in tg_dict:
|
|
267
|
-
logger.error(
|
|
269
|
+
logger.error("Transcript group %s not found", obj_id)
|
|
268
270
|
return datetime.max
|
|
269
271
|
return tg_dict[obj_id].created_at or datetime.max
|
|
270
272
|
elif obj_type == NodeType.TRANSCRIPT:
|
|
271
273
|
# This should never happen, but check anyways for safety
|
|
272
274
|
if obj_id not in t_dict:
|
|
273
|
-
logger.error(
|
|
275
|
+
logger.error("Transcript %s not found", obj_id)
|
|
274
276
|
return datetime.max
|
|
275
277
|
return t_dict[obj_id].created_at or datetime.max
|
|
276
278
|
else:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from typing import Annotated, Literal, Union
|
|
3
4
|
from uuid import uuid4
|
|
@@ -22,6 +23,8 @@ class CitationTarget(BaseModel):
|
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class ParsedCitation(BaseModel):
|
|
26
|
+
"""Citation parsed from text. start_idx and end_idx are UTF-16 code unit offsets for browser string slicing."""
|
|
27
|
+
|
|
25
28
|
start_idx: int
|
|
26
29
|
end_idx: int
|
|
27
30
|
item_alias: str
|
|
@@ -107,6 +110,34 @@ RANGE_BEGIN = "<RANGE>"
|
|
|
107
110
|
RANGE_END = "</RANGE>"
|
|
108
111
|
|
|
109
112
|
|
|
113
|
+
# Citation alias grammar (single source of truth).
|
|
114
|
+
# Each regex is anchored so it can be used independently of evaluation order.
|
|
115
|
+
CITATION_BLOCK_RE = re.compile(r"^T(\d+)B(\d+)$") # [T0B1]
|
|
116
|
+
CITATION_AGENT_RUN_METADATA_RE = re.compile(r"^R(\d+)M\.([^:]+)$") # [R0M.key]
|
|
117
|
+
CITATION_TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$") # [T0M.key]
|
|
118
|
+
CITATION_MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$") # [T0B1M.key]
|
|
119
|
+
CITATION_ANALYSIS_RESULT_RE = re.compile(r"^A(\d+)$") # [A0]
|
|
120
|
+
|
|
121
|
+
_CITATION_ALIAS_RES = (
|
|
122
|
+
CITATION_MESSAGE_METADATA_RE,
|
|
123
|
+
CITATION_TRANSCRIPT_METADATA_RE,
|
|
124
|
+
CITATION_AGENT_RUN_METADATA_RE,
|
|
125
|
+
CITATION_BLOCK_RE,
|
|
126
|
+
CITATION_ANALYSIS_RESULT_RE,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def is_valid_citation_alias(item_alias: str) -> bool:
|
|
131
|
+
"""Whether `item_alias` matches one of the supported citation alias shapes."""
|
|
132
|
+
return any(rx.match(item_alias) for rx in _CITATION_ALIAS_RES)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _utf16_code_unit_len(text: str) -> int:
|
|
136
|
+
"""Return the number of UTF-16 code units in text. Used to ensure indices match browser string
|
|
137
|
+
slicing with non-BMP characters"""
|
|
138
|
+
return len(text.encode("utf-16-le")) // 2
|
|
139
|
+
|
|
140
|
+
|
|
110
141
|
def scan_brackets(text: str) -> list[tuple[int, int, str]]:
|
|
111
142
|
"""Scan text for bracketed segments, respecting RANGE markers and nested brackets.
|
|
112
143
|
|
|
@@ -160,15 +191,14 @@ def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | Non
|
|
|
160
191
|
"""
|
|
161
192
|
Parse a single citation token inside a bracket and return its components.
|
|
162
193
|
|
|
163
|
-
Returns
|
|
164
|
-
|
|
194
|
+
Returns (item_alias, text_range) or None if the token is not a syntactically
|
|
195
|
+
valid citation alias (see `is_valid_citation_alias`).
|
|
165
196
|
Supports optional text range for all valid citation kinds.
|
|
166
197
|
"""
|
|
167
198
|
token = part.strip()
|
|
168
199
|
if not token:
|
|
169
200
|
return None
|
|
170
201
|
|
|
171
|
-
# Extract optional range part
|
|
172
202
|
item_alias = token
|
|
173
203
|
text_range: CitationTargetTextRange | None = None
|
|
174
204
|
if ":" in token:
|
|
@@ -176,6 +206,9 @@ def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | Non
|
|
|
176
206
|
item_alias = left.strip()
|
|
177
207
|
text_range = _extract_range_pattern(right)
|
|
178
208
|
|
|
209
|
+
if not is_valid_citation_alias(item_alias):
|
|
210
|
+
return None
|
|
211
|
+
|
|
179
212
|
return item_alias, text_range
|
|
180
213
|
|
|
181
214
|
|
|
@@ -196,9 +229,8 @@ def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
|
|
|
196
229
|
text: The text to parse citations from
|
|
197
230
|
|
|
198
231
|
Returns:
|
|
199
|
-
A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed
|
|
200
|
-
|
|
201
|
-
in the cleaned text
|
|
232
|
+
A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed.
|
|
233
|
+
Citation start_idx and end_idx are UTF-16 code unit offsets for browser string slicing.
|
|
202
234
|
"""
|
|
203
235
|
citations: list[ParsedCitation] = []
|
|
204
236
|
|
|
@@ -212,7 +244,12 @@ def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
|
|
|
212
244
|
label, text_range = parsed
|
|
213
245
|
|
|
214
246
|
citations.append(
|
|
215
|
-
ParsedCitation(
|
|
247
|
+
ParsedCitation(
|
|
248
|
+
start_idx=_utf16_code_unit_len(text[:start]),
|
|
249
|
+
end_idx=_utf16_code_unit_len(text[:end]),
|
|
250
|
+
item_alias=label,
|
|
251
|
+
text_range=text_range,
|
|
252
|
+
)
|
|
216
253
|
)
|
|
217
254
|
|
|
218
255
|
# We're not cleaning the text right now but may do that later
|
|
@@ -323,24 +323,46 @@ class ReadingStepSubmission(BaseModel):
|
|
|
323
323
|
return _with_legacy_max_new_tokens_default(value)
|
|
324
324
|
|
|
325
325
|
@model_validator(mode="after")
|
|
326
|
-
def
|
|
327
|
-
|
|
326
|
+
def _validate(self) -> "ReadingStepSubmission":
|
|
327
|
+
# Scripted and template are mutually exclusive
|
|
328
|
+
if (self.requests is None) == (self.prompt_template_segments is None):
|
|
328
329
|
raise ValueError(
|
|
329
|
-
"ReadingStepSubmission:
|
|
330
|
-
"without source_reading_preset_id"
|
|
330
|
+
"ReadingStepSubmission: must set one of requests / prompt_template_segments"
|
|
331
331
|
)
|
|
332
|
+
|
|
333
|
+
# Validate scripted reading
|
|
332
334
|
if self.requests is not None:
|
|
333
|
-
if
|
|
335
|
+
if (
|
|
336
|
+
self.dql_query is not None
|
|
337
|
+
or self.dql_step_alias is not None
|
|
338
|
+
or self.context_configs is not None
|
|
339
|
+
):
|
|
334
340
|
raise ValueError(
|
|
335
|
-
"
|
|
341
|
+
"ReadingStepSubmission: scripted readings must not set dql_query, dql_step_alias, or context_configs"
|
|
336
342
|
)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
343
|
+
if (
|
|
344
|
+
self.source_reading_preset_version is not None
|
|
345
|
+
or self.source_reading_preset_id is not None
|
|
346
|
+
):
|
|
347
|
+
raise ValueError(
|
|
348
|
+
"ReadingStepSubmission: scripted readings cannot be associated with a reading preset"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Validate template reading
|
|
352
|
+
else:
|
|
353
|
+
if (
|
|
354
|
+
self.source_reading_preset_version is not None
|
|
355
|
+
and self.source_reading_preset_id is None
|
|
356
|
+
):
|
|
357
|
+
raise ValueError(
|
|
358
|
+
"ReadingStepSubmission: source_reading_preset_version cannot be set "
|
|
359
|
+
"without source_reading_preset_id"
|
|
360
|
+
)
|
|
361
|
+
if (self.dql_query is None) == (self.dql_step_alias is None):
|
|
362
|
+
raise ValueError(
|
|
363
|
+
"ReadingStepSubmission: template readings must set exactly one of dql_query / dql_step_alias"
|
|
364
|
+
)
|
|
365
|
+
|
|
344
366
|
return self
|
|
345
367
|
|
|
346
368
|
|
|
@@ -362,28 +384,16 @@ class PresetReadingStepSubmission(BaseModel):
|
|
|
362
384
|
cache_mode: ReadingCacheMode = "reading"
|
|
363
385
|
|
|
364
386
|
@model_validator(mode="after")
|
|
365
|
-
def
|
|
366
|
-
if self.source_reading_preset_id is None
|
|
367
|
-
raise ValueError(
|
|
368
|
-
"PresetReadingStepSubmission: set one of "
|
|
369
|
-
"source_reading_preset_id / source_reading_preset_name"
|
|
370
|
-
)
|
|
371
|
-
if (
|
|
372
|
-
self.source_reading_preset_id is not None
|
|
373
|
-
and self.source_reading_preset_name is not None
|
|
374
|
-
):
|
|
387
|
+
def _validate(self) -> "PresetReadingStepSubmission":
|
|
388
|
+
if (self.source_reading_preset_id is None) == (self.source_reading_preset_name is None):
|
|
375
389
|
raise ValueError(
|
|
376
|
-
"PresetReadingStepSubmission: set
|
|
390
|
+
"PresetReadingStepSubmission: set exactly one of "
|
|
377
391
|
"source_reading_preset_id / source_reading_preset_name"
|
|
378
392
|
)
|
|
379
|
-
if self.dql_query is
|
|
393
|
+
if (self.dql_query is None) == (self.dql_step_alias is None):
|
|
380
394
|
raise ValueError(
|
|
381
395
|
"PresetReadingStepSubmission: set exactly one of dql_query / dql_step_alias"
|
|
382
396
|
)
|
|
383
|
-
if self.dql_query is None and self.dql_step_alias is None:
|
|
384
|
-
raise ValueError(
|
|
385
|
-
"PresetReadingStepSubmission: must set one of dql_query / dql_step_alias"
|
|
386
|
-
)
|
|
387
397
|
return self
|
|
388
398
|
|
|
389
399
|
|