docent-python 0.1.62a0__tar.gz → 0.1.63a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/__init__.py +2 -0
  3. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/exceptions.py +18 -0
  4. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/llm_output.py +1 -1
  5. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/llm_svc.py +6 -4
  6. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/model_registry.py +3 -3
  7. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/anthropic.py +10 -4
  8. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/google.py +47 -31
  9. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openai.py +38 -7
  10. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/openrouter.py +3 -1
  11. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/agent_run.py +5 -3
  12. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/citation.py +44 -7
  13. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/reading.py +39 -29
  14. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/regex.py +2 -2
  15. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/transcript.py +8 -1
  16. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/types.py +2 -2
  17. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/loaders/load_inspect.py +1 -1
  18. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_base.py +9 -4
  19. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_client_util.py +7 -6
  20. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_collections.py +16 -12
  21. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_dql.py +8 -6
  22. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_readings.py +1 -1
  23. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_results.py +3 -3
  24. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_rubrics.py +1 -1
  25. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_sharing.py +1 -1
  26. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/agent_run_writer.py +9 -4
  27. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/inspect.py +8 -6
  28. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/llm_context.py +39 -16
  29. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/trace.py +46 -41
  30. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/pyproject.toml +1 -1
  31. docent_python-0.1.62a0/docent/trace_temp.py +0 -1088
  32. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/.gitignore +0 -0
  33. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/LICENSE.md +0 -0
  34. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/README.md +0 -0
  35. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/__init__.py +0 -0
  36. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/data_models/__init__.py +0 -0
  37. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/__init__.py +0 -0
  38. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/common.py +0 -0
  39. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/preference_types.py +0 -0
  40. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  41. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_log_util/__init__.py +0 -0
  42. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/_log_util/logger.py +0 -0
  43. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/__init__.py +0 -0
  44. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/_tiktoken_util.py +0 -0
  45. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/__init__.py +0 -0
  46. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/content.py +0 -0
  47. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/message.py +0 -0
  48. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/response_format.py +0 -0
  49. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/chat/tool.py +0 -0
  50. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/context_config.py +0 -0
  51. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/feedback.py +0 -0
  52. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/formatted_objects.py +0 -0
  53. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/judge.py +0 -0
  54. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/metadata_util.py +0 -0
  55. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/report.py +0 -0
  56. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/data_models/util.py +0 -0
  57. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/__init__.py +0 -0
  58. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/analysis.py +0 -0
  59. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/impl.py +0 -0
  60. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/runner.py +0 -0
  61. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/stats.py +0 -0
  62. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/forgiving_json.py +0 -0
  63. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.json +0 -0
  64. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/meta_schema.py +0 -0
  65. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/parse_output.py +0 -0
  66. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/template_formatter.py +0 -0
  67. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/judges/util/voting.py +0 -0
  68. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/__init__.py +0 -0
  69. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/__main__.py +0 -0
  70. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/mcp/server.py +0 -0
  71. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/py.typed +0 -0
  72. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/__init__.py +0 -0
  73. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/load.py +0 -0
  74. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/log.eval +0 -0
  75. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/samples/tb_airline.json +0 -0
  76. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/__init__.py +0 -0
  77. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_agent_runs.py +0 -0
  78. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_feedback.py +0 -0
  79. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_labels.py +0 -0
  80. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/_reports.py +0 -0
  81. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/client.py +0 -0
  82. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/__init__.py +0 -0
  83. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/harbor.py +0 -0
  84. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/nemogym.py +0 -0
  85. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/integrations/util.py +0 -0
  86. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/llm_request.py +0 -0
  87. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/reading.py +0 -0
  88. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/docent/sdk/util.py +0 -0
  89. {docent_python-0.1.62a0 → docent_python-0.1.63a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.62a0
3
+ Version: 0.1.63a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -4,6 +4,7 @@ __all__ = [
4
4
  "load_config_file",
5
5
  "AgentRunRef",
6
6
  "TranscriptRef",
7
+ "TranscriptSliceRef",
7
8
  "ReadingResultRef",
8
9
  "ResultRef",
9
10
  "Prompt",
@@ -17,4 +18,5 @@ from docent.sdk.llm_context import (
17
18
  ReadingResultRef,
18
19
  ResultRef,
19
20
  TranscriptRef,
21
+ TranscriptSliceRef,
20
22
  )
@@ -35,6 +35,11 @@ class ContextWindowException(LLMException):
35
35
  user_message = "Context window exceeded."
36
36
 
37
37
 
38
+ class InvalidPromptException(LLMException):
39
+ error_type_id = "invalid_prompt"
40
+ user_message = "The model provider rejected this prompt for safety reasons."
41
+
42
+
38
43
  class NoResponseException(LLMException):
39
44
  error_type_id = "no_response"
40
45
  user_message = "The model returned an empty response. Please try again later."
@@ -45,6 +50,17 @@ class DocentUsageLimitException(LLMException):
45
50
  user_message = "Free daily usage limit reached. Add your own API key in settings or contact us for increased limits."
46
51
 
47
52
 
53
+ class ProviderAuthenticationException(LLMException):
54
+ error_type_id = "provider_authentication"
55
+
56
+ def __init__(self, message: str = ""):
57
+ super().__init__(message)
58
+ self.user_message = (
59
+ "The model provider API key could not be authenticated. "
60
+ "If you added your own key, update it in Settings > Model providers."
61
+ )
62
+
63
+
48
64
  class ValidationFailedException(LLMException):
49
65
  error_type_id = "validation_failed"
50
66
  user_message = "The model returned invalid output that failed validation."
@@ -64,8 +80,10 @@ LLM_ERROR_TYPES: list[type[LLMException]] = [
64
80
  CompletionTooLongException,
65
81
  RateLimitException,
66
82
  ContextWindowException,
83
+ InvalidPromptException,
67
84
  NoResponseException,
68
85
  DocentUsageLimitException,
86
+ ProviderAuthenticationException,
69
87
  ValidationFailedException,
70
88
  TimeoutException,
71
89
  ]
@@ -154,7 +154,7 @@ class LLMOutput:
154
154
  ]
155
155
  errors_to_log = [e for e in errors if e not in error_types_to_not_log]
156
156
  if errors_to_log:
157
- logger.error(f"Loading LLM output with errors: {errors}")
157
+ logger.error("Loading LLM output with errors: %s", errors)
158
158
  errors = [error_type_map.get(e, LLMException)() for e in errors]
159
159
 
160
160
  completions = data.get("completions", [])
@@ -208,7 +208,7 @@ async def _parallelize_calls(
208
208
  except asyncio.TimeoutError as e:
209
209
  timeout_exception = TimeoutException(str(e) or "Request timed out")
210
210
  timeout_exception.__cause__ = e
211
- logger.error(f"Call to {model_name} timed out")
211
+ logger.error("Call to %s timed out", model_name)
212
212
  result = LLMOutput(
213
213
  model=model_name,
214
214
  completions=[],
@@ -218,7 +218,9 @@ async def _parallelize_calls(
218
218
  except Exception as e:
219
219
  if not isinstance(e, LLMException):
220
220
  logger.error(
221
- f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
221
+ "LLM call raised an exception that is not an LLMException: %s. Failure traceback:\n%s",
222
+ e,
223
+ traceback.format_exc(),
222
224
  )
223
225
  llm_exception = LLMException(e)
224
226
  llm_exception.__cause__ = e
@@ -346,7 +348,7 @@ class BaseLLMService:
346
348
  return None
347
349
 
348
350
  new_model_option = model_options[current_model_option_index]
349
- logger.warning(f"Switched to next model {new_model_option.model_name}")
351
+ logger.warning("Switched to next model %s", new_model_option.model_name)
350
352
  return new_model_option
351
353
 
352
354
  while True:
@@ -410,7 +412,7 @@ class BaseLLMService:
410
412
  )
411
413
  )
412
414
  if num_rotation_errors > 0:
413
- logger.warning(f"{model_name}: {num_rotation_errors} API errors")
415
+ logger.warning("%s: %s API errors", model_name, num_rotation_errors)
414
416
  if not _rotate_model_option():
415
417
  break
416
418
  else:
@@ -183,7 +183,7 @@ def get_model_info(model_name: str) -> Optional[ModelInfo]:
183
183
  def get_context_window(model_name: str) -> int:
184
184
  info = get_model_info(model_name)
185
185
  if info is None:
186
- logger.warning(f"No context window found for model {model_name}")
186
+ logger.warning("No context window found for model %s", model_name)
187
187
  return 100_000
188
188
  return info.context_window
189
189
 
@@ -196,11 +196,11 @@ def get_rates_for_model_name(model_name: str) -> Optional[ModelRate]:
196
196
  def estimate_cost_cents(model_name: str, token_count: int, token_type: TokenType) -> float:
197
197
  rate = get_rates_for_model_name(model_name)
198
198
  if rate is None:
199
- logger.warning(f"No rate found for model {model_name}")
199
+ logger.warning("No rate found for model %s", model_name)
200
200
  return 0.0
201
201
  usd_per_mtok = rate.get(token_type)
202
202
  if usd_per_mtok is None:
203
- logger.warning(f"No rate found for model {model_name} token type {token_type}")
203
+ logger.warning("No rate found for model %s token type %s", model_name, token_type)
204
204
  return 0.0
205
205
  cents_per_token = usd_per_mtok * 100 / 1_000_000.0
206
206
  return token_count * cents_per_token
@@ -41,6 +41,7 @@ from docent._llm_util.data_models.exceptions import (
41
41
  CompletionTooLongException,
42
42
  ContextWindowException,
43
43
  NoResponseException,
44
+ ProviderAuthenticationException,
44
45
  RateLimitException,
45
46
  )
46
47
  from docent._llm_util.data_models.llm_output import (
@@ -78,7 +79,9 @@ ANTHROPIC_STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"
78
79
 
79
80
  def _print_backoff_message(e: Details):
80
81
  logger.warning(
81
- f"Anthropic backing off for {e['wait']:.2f}s due to {e['exception'].__class__.__name__}" # type: ignore
82
+ "Anthropic backing off for %.2fs due to %s",
83
+ e["wait"], # type: ignore
84
+ e["exception"].__class__.__name__, # type: ignore
82
85
  )
83
86
 
84
87
 
@@ -86,6 +89,7 @@ def _is_retryable_error(e: BaseException) -> bool:
86
89
  if (
87
90
  isinstance(e, BadRequestError)
88
91
  or isinstance(e, ContextWindowException)
92
+ or isinstance(e, ProviderAuthenticationException)
89
93
  or isinstance(e, AuthenticationError)
90
94
  or isinstance(e, NotImplementedError)
91
95
  or isinstance(e, PermissionDeniedError)
@@ -209,6 +213,8 @@ def _build_output_format(response_format: ResponseFormat | None) -> dict[str, An
209
213
 
210
214
 
211
215
  def _convert_anthropic_error(e: Exception):
216
+ if isinstance(e, (AuthenticationError, PermissionDeniedError)):
217
+ return ProviderAuthenticationException(e.message)
212
218
  if isinstance(e, BadRequestError):
213
219
  if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
214
220
  return ContextWindowException()
@@ -285,7 +291,7 @@ async def get_anthropic_chat_completion_streaming_async(
285
291
  if llm_output_partial:
286
292
  return finalize_llm_output_partial(llm_output_partial)
287
293
  return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
288
- except (RateLimitError, BadRequestError) as e:
294
+ except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
289
295
  if e2 := _convert_anthropic_error(e):
290
296
  raise e2 from e
291
297
  raise
@@ -365,7 +371,7 @@ def update_llm_output(
365
371
  ):
366
372
  # This should not happen with a well-behaved API, log and skip
367
373
  logger.warning(
368
- f"Received InputJSONDelta before start event at index {index}, skipping"
374
+ "Received InputJSONDelta before start event at index %s, skipping", index
369
375
  )
370
376
  else:
371
377
  cur_tool_calls[index] = ToolCallPartial(
@@ -482,7 +488,7 @@ async def get_anthropic_chat_completion_async(
482
488
  )
483
489
 
484
490
  return output
485
- except (RateLimitError, BadRequestError) as e:
491
+ except (RateLimitError, BadRequestError, AuthenticationError, PermissionDeniedError) as e:
486
492
  if e2 := _convert_anthropic_error(e):
487
493
  raise e2 from e
488
494
  raise
@@ -11,6 +11,7 @@ from docent._llm_util.data_models.exceptions import (
11
11
  CompletionTooLongException,
12
12
  ContextWindowException,
13
13
  NoResponseException,
14
+ ProviderAuthenticationException,
14
15
  RateLimitException,
15
16
  )
16
17
  from docent._llm_util.data_models.llm_output import (
@@ -41,7 +42,9 @@ logger = get_logger(__name__)
41
42
 
42
43
 
43
44
  def _convert_google_error(e: errors.APIError):
44
- if e.code in [429, 502, 503, 504]:
45
+ if e.code in [401, 403]:
46
+ return ProviderAuthenticationException(str(e))
47
+ elif e.code in [429, 502, 503, 504]:
45
48
  return RateLimitException(e)
46
49
  elif e.code == 400 and "maximum number of tokens" in str(e).lower():
47
50
  return ContextWindowException()
@@ -50,12 +53,18 @@ def _convert_google_error(e: errors.APIError):
50
53
 
51
54
  def _print_backoff_message(e: Any):
52
55
  logger.warning(
53
- f"Google backing off for {e['wait']:.2f}s due to {e['exception'].__class__.__name__}" # type: ignore
56
+ "Google backing off for %.2fs due to %s",
57
+ e["wait"], # type: ignore
58
+ e["exception"].__class__.__name__, # type: ignore
54
59
  )
55
60
 
56
61
 
57
62
  def _is_retryable_error(exception: BaseException) -> bool:
58
63
  """Checks if the exception is a retryable error based on the criteria."""
64
+ if isinstance(exception, RateLimitException):
65
+ return True
66
+ if isinstance(exception, (ContextWindowException, CompletionTooLongException)):
67
+ return False
59
68
  if isinstance(exception, errors.APIError):
60
69
  return exception.code in [429, 500, 502, 503, 504]
61
70
  if isinstance(exception, requests.exceptions.ConnectionError):
@@ -112,39 +121,46 @@ async def get_google_chat_completion_async(
112
121
  model_name=model_name,
113
122
  )
114
123
 
115
- async with async_timeout_ctx(timeout):
116
- thinking_cfg = None
117
- if reasoning_effort:
118
- thinking_cfg = types.ThinkingConfig(
119
- include_thoughts=True,
120
- thinking_budget=reasoning_budget(max_new_tokens, reasoning_effort),
121
- )
124
+ try:
125
+ async with async_timeout_ctx(timeout):
126
+ thinking_cfg = None
127
+ if reasoning_effort:
128
+ thinking_cfg = types.ThinkingConfig(
129
+ include_thoughts=True,
130
+ thinking_budget=reasoning_budget(max_new_tokens, reasoning_effort),
131
+ )
122
132
 
123
- raw_output = await client.models.generate_content( # type: ignore
124
- model=model_name,
125
- contents=input_messages, # type: ignore
126
- config=types.GenerateContentConfig(
127
- temperature=temperature,
128
- thinking_config=thinking_cfg,
129
- max_output_tokens=max_new_tokens,
130
- system_instruction=system,
131
- tools=cast(Any, _parse_tools(tools)) if tools else None,
132
- tool_config=(
133
- types.ToolConfig(function_calling_config=_parse_tool_choice(tool_choice))
134
- if tool_choice is not None
135
- else None
133
+ raw_output = await client.models.generate_content( # type: ignore
134
+ model=model_name,
135
+ contents=input_messages, # type: ignore
136
+ config=types.GenerateContentConfig(
137
+ temperature=temperature,
138
+ thinking_config=thinking_cfg,
139
+ max_output_tokens=max_new_tokens,
140
+ system_instruction=system,
141
+ tools=cast(Any, _parse_tools(tools)) if tools else None,
142
+ tool_config=(
143
+ types.ToolConfig(
144
+ function_calling_config=_parse_tool_choice(tool_choice)
145
+ )
146
+ if tool_choice is not None
147
+ else None
148
+ ),
149
+ **response_format_config,
136
150
  ),
137
- **response_format_config,
138
- ),
139
- )
140
-
141
- output = _parse_google_completion(raw_output, model_name)
142
- if output.first and output.first.finish_reason == "length" and output.first.no_text:
143
- raise CompletionTooLongException(
144
- f"Completion empty due to truncation. Consider increasing max_new_tokens (currently {max_new_tokens})."
145
151
  )
146
152
 
147
- return output
153
+ output = _parse_google_completion(raw_output, model_name)
154
+ if output.first and output.first.finish_reason == "length" and output.first.no_text:
155
+ raise CompletionTooLongException(
156
+ f"Completion empty due to truncation. Consider increasing max_new_tokens (currently {max_new_tokens})."
157
+ )
158
+
159
+ return output
160
+ except errors.APIError as e:
161
+ if e2 := _convert_google_error(e):
162
+ raise e2 from e
163
+ raise
148
164
 
149
165
  return await retry_async(
150
166
  _call,
@@ -9,6 +9,7 @@ from backoff.types import Details
9
9
  # all errors: https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types
10
10
  from openai import (
11
11
  APIConnectionError,
12
+ APITimeoutError,
12
13
  AsyncAzureOpenAI,
13
14
  AsyncOpenAI,
14
15
  AuthenticationError,
@@ -48,8 +49,11 @@ from openai.types.shared_params.response_format_json_schema import (
48
49
  from docent._llm_util.data_models.exceptions import (
49
50
  CompletionTooLongException,
50
51
  ContextWindowException,
52
+ InvalidPromptException,
51
53
  NoResponseException,
54
+ ProviderAuthenticationException,
52
55
  RateLimitException,
56
+ TimeoutException,
53
57
  )
54
58
  from docent._llm_util.data_models.llm_output import (
55
59
  AsyncEmbeddingStreamingCallback,
@@ -83,7 +87,9 @@ MAX_EMBEDDING_TOKENS = 8000
83
87
 
84
88
  def _print_backoff_message(e: Details):
85
89
  logger.warning(
86
- f"OpenAI backing off for {e['wait']:.2f}s due to {e['exception'].__class__.__name__}" # type: ignore
90
+ "OpenAI backing off for %.2fs due to %s",
91
+ e["wait"], # type: ignore
92
+ e["exception"].__class__.__name__, # type: ignore
87
93
  )
88
94
 
89
95
 
@@ -91,6 +97,8 @@ def _is_retryable_error(e: BaseException) -> bool:
91
97
  if (
92
98
  isinstance(e, BadRequestError)
93
99
  or isinstance(e, ContextWindowException)
100
+ or isinstance(e, InvalidPromptException)
101
+ or isinstance(e, ProviderAuthenticationException)
94
102
  or isinstance(e, AuthenticationError)
95
103
  or isinstance(e, PermissionDeniedError)
96
104
  or isinstance(e, NotFoundError)
@@ -281,7 +289,13 @@ async def get_openai_chat_completion_streaming_async(
281
289
  if llm_output_partial:
282
290
  return finalize_llm_output_partial(llm_output_partial)
283
291
  return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
284
- except (RateLimitError, BadRequestError) as e:
292
+ except (
293
+ APITimeoutError,
294
+ RateLimitError,
295
+ BadRequestError,
296
+ AuthenticationError,
297
+ PermissionDeniedError,
298
+ ) as e:
285
299
  if e2 := _convert_openai_error(e):
286
300
  raise e2 from e
287
301
  raise
@@ -296,10 +310,19 @@ async def get_openai_chat_completion_streaming_async(
296
310
 
297
311
 
298
312
  def _convert_openai_error(e: Exception):
299
- if isinstance(e, RateLimitError):
313
+ if isinstance(e, (AuthenticationError, PermissionDeniedError)):
314
+ return ProviderAuthenticationException(e.message)
315
+ elif isinstance(e, RateLimitError):
300
316
  return RateLimitException(e)
301
- elif isinstance(e, BadRequestError) and e.code == "context_length_exceeded":
317
+ elif isinstance(e, APITimeoutError):
318
+ return TimeoutException(str(e) or "Request timed out")
319
+ elif isinstance(e, BadRequestError) and e.code in (
320
+ "context_length_exceeded",
321
+ "string_above_max_length",
322
+ ):
302
323
  return ContextWindowException()
324
+ elif isinstance(e, BadRequestError) and e.code == "invalid_prompt":
325
+ return InvalidPromptException()
303
326
  return None
304
327
 
305
328
 
@@ -473,7 +496,13 @@ async def get_openai_chat_completion_async(
473
496
  )
474
497
 
475
498
  return output
476
- except (RateLimitError, BadRequestError) as e:
499
+ except (
500
+ APITimeoutError,
501
+ RateLimitError,
502
+ BadRequestError,
503
+ AuthenticationError,
504
+ PermissionDeniedError,
505
+ ) as e:
477
506
  if e2 := _convert_openai_error(e):
478
507
  raise e2 from e
479
508
  raise
@@ -549,8 +578,10 @@ async def _get_openai_embeddings_async_one_batch(
549
578
  dimensions=dimensions if dimensions is not None else omit,
550
579
  )
551
580
  return [data.embedding for data in response.data]
552
- except RateLimitError as e:
553
- raise RateLimitException(e) from e
581
+ except (RateLimitError, AuthenticationError, PermissionDeniedError) as e:
582
+ if e2 := _convert_openai_error(e):
583
+ raise e2 from e
584
+ raise
554
585
 
555
586
 
556
587
  async def get_chunked_openai_embeddings_async(
@@ -6,7 +6,9 @@ from typing import Literal, cast
6
6
 
7
7
  from openai import AsyncOpenAI, AuthenticationError, BadRequestError
8
8
 
9
- from docent._llm_util.data_models.exceptions import ContextWindowException
9
+ from docent._llm_util.data_models.exceptions import (
10
+ ContextWindowException,
11
+ )
10
12
  from docent._llm_util.data_models.llm_output import (
11
13
  AsyncSingleLLMOutputStreamingCallback,
12
14
  LLMOutput,
@@ -252,7 +252,9 @@ class AgentRunTree(BaseModel):
252
252
  # This should never happen, but check anyways for safety; fallback to global root
253
253
  if par_id not in nodes:
254
254
  logger.error(
255
- f"Parent {par_id} not found for transcript {t_id}. Assigning to global root as a fallback"
255
+ "Parent %s not found for transcript %s. Assigning to global root as a fallback",
256
+ par_id,
257
+ t_id,
256
258
  )
257
259
  par_id = GLOBAL_ROOT_ID
258
260
  nodes[par_id].children_ids.append(t_id)
@@ -264,13 +266,13 @@ class AgentRunTree(BaseModel):
264
266
  if obj_type == NodeType.TRANSCRIPT_GROUP:
265
267
  # This should never happen, but check anyways for safety
266
268
  if obj_id not in tg_dict:
267
- logger.error(f"Transcript group {obj_id} not found")
269
+ logger.error("Transcript group %s not found", obj_id)
268
270
  return datetime.max
269
271
  return tg_dict[obj_id].created_at or datetime.max
270
272
  elif obj_type == NodeType.TRANSCRIPT:
271
273
  # This should never happen, but check anyways for safety
272
274
  if obj_id not in t_dict:
273
- logger.error(f"Transcript {obj_id} not found")
275
+ logger.error("Transcript %s not found", obj_id)
274
276
  return datetime.max
275
277
  return t_dict[obj_id].created_at or datetime.max
276
278
  else:
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from datetime import datetime
2
3
  from typing import Annotated, Literal, Union
3
4
  from uuid import uuid4
@@ -22,6 +23,8 @@ class CitationTarget(BaseModel):
22
23
 
23
24
 
24
25
  class ParsedCitation(BaseModel):
26
+ """Citation parsed from text. start_idx and end_idx are UTF-16 code unit offsets for browser string slicing."""
27
+
25
28
  start_idx: int
26
29
  end_idx: int
27
30
  item_alias: str
@@ -107,6 +110,34 @@ RANGE_BEGIN = "<RANGE>"
107
110
  RANGE_END = "</RANGE>"
108
111
 
109
112
 
113
+ # Citation alias grammar (single source of truth).
114
+ # Each regex is anchored so it can be used independently of evaluation order.
115
+ CITATION_BLOCK_RE = re.compile(r"^T(\d+)B(\d+)$") # [T0B1]
116
+ CITATION_AGENT_RUN_METADATA_RE = re.compile(r"^R(\d+)M\.([^:]+)$") # [R0M.key]
117
+ CITATION_TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$") # [T0M.key]
118
+ CITATION_MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$") # [T0B1M.key]
119
+ CITATION_ANALYSIS_RESULT_RE = re.compile(r"^A(\d+)$") # [A0]
120
+
121
+ _CITATION_ALIAS_RES = (
122
+ CITATION_MESSAGE_METADATA_RE,
123
+ CITATION_TRANSCRIPT_METADATA_RE,
124
+ CITATION_AGENT_RUN_METADATA_RE,
125
+ CITATION_BLOCK_RE,
126
+ CITATION_ANALYSIS_RESULT_RE,
127
+ )
128
+
129
+
130
+ def is_valid_citation_alias(item_alias: str) -> bool:
131
+ """Whether `item_alias` matches one of the supported citation alias shapes."""
132
+ return any(rx.match(item_alias) for rx in _CITATION_ALIAS_RES)
133
+
134
+
135
+ def _utf16_code_unit_len(text: str) -> int:
136
+ """Return the number of UTF-16 code units in text. Used to ensure indices match browser string
137
+ slicing with non-BMP characters"""
138
+ return len(text.encode("utf-16-le")) // 2
139
+
140
+
110
141
  def scan_brackets(text: str) -> list[tuple[int, int, str]]:
111
142
  """Scan text for bracketed segments, respecting RANGE markers and nested brackets.
112
143
 
@@ -160,15 +191,14 @@ def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | Non
160
191
  """
161
192
  Parse a single citation token inside a bracket and return its components.
162
193
 
163
- Returns ParsedCitation or None if invalid.
164
- For metadata citations, transcript_idx may be None (for agent run metadata).
194
+ Returns (item_alias, text_range) or None if the token is not a syntactically
195
+ valid citation alias (see `is_valid_citation_alias`).
165
196
  Supports optional text range for all valid citation kinds.
166
197
  """
167
198
  token = part.strip()
168
199
  if not token:
169
200
  return None
170
201
 
171
- # Extract optional range part
172
202
  item_alias = token
173
203
  text_range: CitationTargetTextRange | None = None
174
204
  if ":" in token:
@@ -176,6 +206,9 @@ def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | Non
176
206
  item_alias = left.strip()
177
207
  text_range = _extract_range_pattern(right)
178
208
 
209
+ if not is_valid_citation_alias(item_alias):
210
+ return None
211
+
179
212
  return item_alias, text_range
180
213
 
181
214
 
@@ -196,9 +229,8 @@ def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
196
229
  text: The text to parse citations from
197
230
 
198
231
  Returns:
199
- A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed
200
- and citations have start_idx and end_idx representing character positions
201
- in the cleaned text
232
+ A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed.
233
+ Citation start_idx and end_idx are UTF-16 code unit offsets for browser string slicing.
202
234
  """
203
235
  citations: list[ParsedCitation] = []
204
236
 
@@ -212,7 +244,12 @@ def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
212
244
  label, text_range = parsed
213
245
 
214
246
  citations.append(
215
- ParsedCitation(start_idx=start, end_idx=end, item_alias=label, text_range=text_range)
247
+ ParsedCitation(
248
+ start_idx=_utf16_code_unit_len(text[:start]),
249
+ end_idx=_utf16_code_unit_len(text[:end]),
250
+ item_alias=label,
251
+ text_range=text_range,
252
+ )
216
253
  )
217
254
 
218
255
  # We're not cleaning the text right now but may do that later
@@ -323,24 +323,46 @@ class ReadingStepSubmission(BaseModel):
323
323
  return _with_legacy_max_new_tokens_default(value)
324
324
 
325
325
  @model_validator(mode="after")
326
- def _validate_dql_source(self) -> "ReadingStepSubmission":
327
- if self.source_reading_preset_version is not None and self.source_reading_preset_id is None:
326
+ def _validate(self) -> "ReadingStepSubmission":
327
+ # Scripted and template are mutually exclusive
328
+ if (self.requests is None) == (self.prompt_template_segments is None):
328
329
  raise ValueError(
329
- "ReadingStepSubmission: source_reading_preset_version cannot be set "
330
- "without source_reading_preset_id"
330
+ "ReadingStepSubmission: must set one of requests / prompt_template_segments"
331
331
  )
332
+
333
+ # Validate scripted reading
332
334
  if self.requests is not None:
333
- if self.dql_query is not None or self.dql_step_alias is not None:
335
+ if (
336
+ self.dql_query is not None
337
+ or self.dql_step_alias is not None
338
+ or self.context_configs is not None
339
+ ):
334
340
  raise ValueError(
335
- "Scripted reading submissions must not set dql_query or dql_step_alias"
341
+ "ReadingStepSubmission: scripted readings must not set dql_query, dql_step_alias, or context_configs"
336
342
  )
337
- return self
338
- if self.dql_query is not None and self.dql_step_alias is not None:
339
- raise ValueError("ReadingStepSubmission: set exactly one of dql_query / dql_step_alias")
340
- if self.dql_query is None and self.dql_step_alias is None:
341
- raise ValueError(
342
- "ReadingStepSubmission: template entries must set one of dql_query / dql_step_alias"
343
- )
343
+ if (
344
+ self.source_reading_preset_version is not None
345
+ or self.source_reading_preset_id is not None
346
+ ):
347
+ raise ValueError(
348
+ "ReadingStepSubmission: scripted readings cannot be associated with a reading preset"
349
+ )
350
+
351
+ # Validate template reading
352
+ else:
353
+ if (
354
+ self.source_reading_preset_version is not None
355
+ and self.source_reading_preset_id is None
356
+ ):
357
+ raise ValueError(
358
+ "ReadingStepSubmission: source_reading_preset_version cannot be set "
359
+ "without source_reading_preset_id"
360
+ )
361
+ if (self.dql_query is None) == (self.dql_step_alias is None):
362
+ raise ValueError(
363
+ "ReadingStepSubmission: template readings must set exactly one of dql_query / dql_step_alias"
364
+ )
365
+
344
366
  return self
345
367
 
346
368
 
@@ -362,28 +384,16 @@ class PresetReadingStepSubmission(BaseModel):
362
384
  cache_mode: ReadingCacheMode = "reading"
363
385
 
364
386
  @model_validator(mode="after")
365
- def _validate_dql_source(self) -> "PresetReadingStepSubmission":
366
- if self.source_reading_preset_id is None and self.source_reading_preset_name is None:
367
- raise ValueError(
368
- "PresetReadingStepSubmission: set one of "
369
- "source_reading_preset_id / source_reading_preset_name"
370
- )
371
- if (
372
- self.source_reading_preset_id is not None
373
- and self.source_reading_preset_name is not None
374
- ):
387
+ def _validate(self) -> "PresetReadingStepSubmission":
388
+ if (self.source_reading_preset_id is None) == (self.source_reading_preset_name is None):
375
389
  raise ValueError(
376
- "PresetReadingStepSubmission: set only one of "
390
+ "PresetReadingStepSubmission: set exactly one of "
377
391
  "source_reading_preset_id / source_reading_preset_name"
378
392
  )
379
- if self.dql_query is not None and self.dql_step_alias is not None:
393
+ if (self.dql_query is None) == (self.dql_step_alias is None):
380
394
  raise ValueError(
381
395
  "PresetReadingStepSubmission: set exactly one of dql_query / dql_step_alias"
382
396
  )
383
- if self.dql_query is None and self.dql_step_alias is None:
384
- raise ValueError(
385
- "PresetReadingStepSubmission: must set one of dql_query / dql_step_alias"
386
- )
387
397
  return self
388
398
 
389
399