docent-python 0.1.47a0__tar.gz → 0.1.49a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/model_registry.py +8 -0
  3. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/anthropic.py +36 -8
  4. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/google.py +31 -8
  5. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/_tiktoken_util.py +6 -6
  6. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/agent_run.py +11 -1
  7. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/response_format.py +1 -0
  8. docent_python-0.1.49a0/docent/data_models/metadata_util.py +32 -0
  9. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/client.py +23 -1
  10. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/pyproject.toml +1 -1
  11. docent_python-0.1.47a0/docent/data_models/metadata_util.py +0 -16
  12. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/.gitignore +0 -0
  13. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/LICENSE.md +0 -0
  14. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/README.md +0 -0
  15. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/__init__.py +0 -0
  16. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/__init__.py +0 -0
  17. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/data_models/__init__.py +0 -0
  18. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  19. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  20. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/llm_cache.py +0 -0
  21. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/llm_svc.py +0 -0
  22. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/__init__.py +0 -0
  23. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/common.py +0 -0
  24. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/openai.py +0 -0
  25. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/openrouter.py +0 -0
  26. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/preference_types.py +0 -0
  27. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  28. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_log_util/__init__.py +0 -0
  29. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_log_util/logger.py +0 -0
  30. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/__init__.py +0 -0
  31. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/__init__.py +0 -0
  32. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/content.py +0 -0
  33. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/message.py +0 -0
  34. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/tool.py +0 -0
  35. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/citation.py +0 -0
  36. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/formatted_objects.py +0 -0
  37. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/judge.py +0 -0
  38. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/regex.py +0 -0
  39. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/transcript.py +0 -0
  40. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/util.py +0 -0
  41. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/__init__.py +0 -0
  42. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/analysis.py +0 -0
  43. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/impl.py +0 -0
  44. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/runner.py +0 -0
  45. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/stats.py +0 -0
  46. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/types.py +0 -0
  47. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/forgiving_json.py +0 -0
  48. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/meta_schema.json +0 -0
  49. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/meta_schema.py +0 -0
  50. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/parse_output.py +0 -0
  51. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/template_formatter.py +0 -0
  52. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/judges/util/voting.py +0 -0
  53. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/loaders/load_inspect.py +0 -0
  54. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/mcp/__init__.py +0 -0
  55. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/mcp/__main__.py +0 -0
  56. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/mcp/server.py +0 -0
  57. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/py.typed +0 -0
  58. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/samples/__init__.py +0 -0
  59. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/samples/load.py +0 -0
  60. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/samples/log.eval +0 -0
  61. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/samples/tb_airline.json +0 -0
  62. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/__init__.py +0 -0
  63. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/agent_run_writer.py +0 -0
  64. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/llm_context.py +0 -0
  65. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/llm_request.py +0 -0
  66. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/trace.py +0 -0
  67. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/trace_temp.py +0 -0
  68. {docent_python-0.1.47a0 → docent_python-0.1.49a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.47a0
3
+ Version: 0.1.49a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -30,6 +30,10 @@ class ModelInfo:
30
30
 
31
31
  # Note: some providers charge extra for long prompts/outputs. We don't account for this yet.
32
32
  _REGISTRY: list[tuple[str, ModelInfo]] = [
33
+ (
34
+ "gpt-5-chat-latest",
35
+ ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
36
+ ),
33
37
  (
34
38
  "gpt-5-nano",
35
39
  ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
@@ -62,6 +66,10 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
62
66
  "claude-haiku-4-5",
63
67
  ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
64
68
  ),
69
+ (
70
+ "claude-opus-4-5-20251101",
71
+ ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
72
+ ),
65
73
  (
66
74
  "gemini-2.5-flash-lite",
67
75
  ModelInfo(
@@ -73,6 +73,7 @@ from docent.data_models.chat import (
73
73
  from docent.data_models.chat.response_format import ResponseFormat
74
74
 
75
75
  logger = get_logger(__name__)
76
+ ANTHROPIC_STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"
76
77
 
77
78
 
78
79
  def _print_backoff_message(e: Details):
@@ -188,6 +189,25 @@ def _parse_tool_choice(tool_choice: Literal["auto", "required"] | None) -> ToolC
188
189
  return ToolChoiceAnyParam(type="any")
189
190
 
190
191
 
192
+ def _build_output_format(response_format: ResponseFormat | None) -> dict[str, Any] | None:
193
+ if response_format is None:
194
+ return None
195
+ if response_format.strict is False:
196
+ raise NotImplementedError(
197
+ "Anthropic structured outputs do not support strict=False; "
198
+ "set ResponseFormat.strict=True."
199
+ )
200
+ if response_format.type != "json_schema":
201
+ raise ValueError(
202
+ f"Unsupported response format type: {response_format.type}. "
203
+ "Only 'json_schema' is currently supported."
204
+ )
205
+ return {
206
+ "type": "json_schema",
207
+ "schema": response_format.schema_,
208
+ }
209
+
210
+
191
211
  def _convert_anthropic_error(e: Exception):
192
212
  if isinstance(e, BadRequestError):
193
213
  if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
@@ -220,10 +240,6 @@ async def get_anthropic_chat_completion_streaming_async(
220
240
  timeout: float = 5.0,
221
241
  response_format: ResponseFormat | None = None,
222
242
  ):
223
- if response_format is not None:
224
- raise NotImplementedError(
225
- "Structured outputs (response_format) are not implemented for Anthropic yet."
226
- )
227
243
  if logprobs or top_logprobs is not None:
228
244
  raise NotImplementedError(
229
245
  "We have not implemented logprobs or top_logprobs for Anthropic yet."
@@ -251,6 +267,14 @@ async def get_anthropic_chat_completion_streaming_async(
251
267
  create_kwargs["tool_choice"] = tool_choice_param
252
268
  if system is not None:
253
269
  create_kwargs["system"] = system
270
+ if response_format is not None:
271
+ output_format = _build_output_format(response_format)
272
+ extra_headers = dict(create_kwargs.get("extra_headers", {}))
273
+ extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
274
+ create_kwargs["extra_headers"] = extra_headers
275
+ extra_body = dict(create_kwargs.get("extra_body", {}))
276
+ extra_body["output_format"] = output_format
277
+ create_kwargs["extra_body"] = extra_body
254
278
 
255
279
  stream = cast(
256
280
  AsyncStream[RawMessageStreamEvent],
@@ -420,10 +444,6 @@ async def get_anthropic_chat_completion_async(
420
444
  We should actually implement this at some point, but it does not work.
421
445
  """
422
446
 
423
- if response_format is not None:
424
- raise NotImplementedError(
425
- "Structured outputs (response_format) are not implemented for Anthropic yet."
426
- )
427
447
  if logprobs or top_logprobs is not None:
428
448
  raise NotImplementedError(
429
449
  "We have not implemented logprobs or top_logprobs for Anthropic yet."
@@ -450,6 +470,14 @@ async def get_anthropic_chat_completion_async(
450
470
  create_kwargs["tool_choice"] = tool_choice_param
451
471
  if system is not None:
452
472
  create_kwargs["system"] = system
473
+ if response_format is not None:
474
+ output_format = _build_output_format(response_format)
475
+ extra_headers = dict(create_kwargs.get("extra_headers", {}))
476
+ extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
477
+ create_kwargs["extra_headers"] = extra_headers
478
+ extra_body = dict(create_kwargs.get("extra_body", {}))
479
+ extra_body["output_format"] = output_format
480
+ create_kwargs["extra_body"] = extra_body
453
481
 
454
482
  raw_output = cast(Message, await client.messages.create(**create_kwargs))
455
483
 
@@ -63,6 +63,27 @@ def _is_retryable_error(exception: BaseException) -> bool:
63
63
  return False
64
64
 
65
65
 
66
+ def _build_response_format_config(
67
+ response_format: ResponseFormat | None,
68
+ *,
69
+ model_name: str,
70
+ ) -> dict[str, Any]:
71
+ """Build Gemini structured output config from unified ResponseFormat."""
72
+ if response_format is None:
73
+ return {}
74
+
75
+ if response_format.type != "json_schema":
76
+ raise ValueError(
77
+ f"Unsupported response format type: {response_format.type} for model {model_name}. "
78
+ "Only 'json_schema' is currently supported."
79
+ )
80
+
81
+ return {
82
+ "response_mime_type": "application/json",
83
+ "response_json_schema": response_format.schema_,
84
+ }
85
+
86
+
66
87
  @backoff.on_exception(
67
88
  backoff.expo,
68
89
  exception=(Exception),
@@ -85,16 +106,16 @@ async def get_google_chat_completion_async(
85
106
  timeout: float = 5.0,
86
107
  response_format: ResponseFormat | None = None,
87
108
  ) -> LLMOutput:
88
- if response_format is not None:
89
- raise NotImplementedError(
90
- "Structured outputs (response_format) are not implemented for Google yet."
91
- )
92
109
  if logprobs or top_logprobs is not None:
93
110
  raise NotImplementedError(
94
111
  "We have not implemented logprobs or top_logprobs for Google yet."
95
112
  )
96
113
 
97
114
  system, input_messages = _parse_chat_messages(messages, tools_provided=bool(tools))
115
+ response_format_config = _build_response_format_config(
116
+ response_format,
117
+ model_name=model_name,
118
+ )
98
119
 
99
120
  async with async_timeout_ctx(timeout):
100
121
  thinking_cfg = None
@@ -118,6 +139,7 @@ async def get_google_chat_completion_async(
118
139
  if tool_choice is not None
119
140
  else None
120
141
  ),
142
+ **response_format_config,
121
143
  ),
122
144
  )
123
145
 
@@ -153,16 +175,16 @@ async def get_google_chat_completion_streaming_async(
153
175
  timeout: float = 5.0,
154
176
  response_format: ResponseFormat | None = None,
155
177
  ) -> LLMOutput:
156
- if response_format is not None:
157
- raise NotImplementedError(
158
- "Structured outputs (response_format) are not implemented for Google yet."
159
- )
160
178
  if logprobs or top_logprobs is not None:
161
179
  raise NotImplementedError(
162
180
  "We have not implemented logprobs or top_logprobs for Google yet."
163
181
  )
164
182
 
165
183
  system, input_messages = _parse_chat_messages(messages, tools_provided=bool(tools))
184
+ response_format_config = _build_response_format_config(
185
+ response_format,
186
+ model_name=model_name,
187
+ )
166
188
 
167
189
  try:
168
190
  async with async_timeout_ctx(timeout):
@@ -187,6 +209,7 @@ async def get_google_chat_completion_streaming_async(
187
209
  if tool_choice is not None
188
210
  else None
189
211
  ),
212
+ **response_format_config,
190
213
  ),
191
214
  )
192
215
 
@@ -9,15 +9,15 @@ def get_token_count(text: str, model: str = "gpt-4") -> int:
9
9
  return len(encoding.encode(text, disallowed_special=()))
10
10
 
11
11
 
12
- def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
12
+ def truncate_to_token_limit(
13
+ text: str, max_tokens: int, model: str = "gpt-4"
14
+ ) -> tuple[str, int, int]:
13
15
  """Truncate text to stay within the specified token limit."""
14
16
  encoding = tiktoken.encoding_for_model(model)
15
17
  tokens = encoding.encode(text, disallowed_special=())
16
-
17
- if len(tokens) <= max_tokens:
18
- return text
19
-
20
- return encoding.decode(tokens[:max_tokens])
18
+ orig_num_tokens = len(tokens)
19
+ new_tokens = tokens[:max_tokens]
20
+ return encoding.decode(new_tokens), len(new_tokens), orig_num_tokens
21
21
 
22
22
 
23
23
  class MessageRange:
@@ -21,7 +21,7 @@ from docent.data_models.citation import (
21
21
  TranscriptBlockMetadataItem,
22
22
  TranscriptMetadataItem,
23
23
  )
24
- from docent.data_models.metadata_util import dump_metadata
24
+ from docent.data_models.metadata_util import deep_merge_metadata, dump_metadata
25
25
  from docent.data_models.transcript import Transcript, TranscriptGroup, render_metadata_comments
26
26
 
27
27
  logger = get_logger(__name__)
@@ -100,6 +100,16 @@ class AgentRun(BaseModel):
100
100
  """Returns a dictionary mapping transcript group IDs to TranscriptGroup objects."""
101
101
  return {tg.id: tg for tg in self.transcript_groups}
102
102
 
103
+ def merge_metadata(self, metadata: dict[str, Any] | None) -> None:
104
+ """
105
+ Merge metadata into the agent run metadata in-place.
106
+
107
+ Uses a deep merge so nested dictionaries accumulate without losing existing keys.
108
+ """
109
+ if not metadata:
110
+ return
111
+ deep_merge_metadata(self.metadata, metadata)
112
+
103
113
  def to_text(
104
114
  self,
105
115
  children_text: str,
@@ -15,6 +15,7 @@ class ResponseFormat(BaseModel):
15
15
  - OpenAI: response_format parameter
16
16
  - Anthropic: output_format parameter (with beta header)
17
17
  - OpenRouter: response_format parameter (same as OpenAI)
18
+ - Google Gemini: response_mime_type + response_json_schema in GenerateContentConfig
18
19
 
19
20
  Attributes:
20
21
  type: The format type. Currently only "json_schema" is supported.
@@ -0,0 +1,32 @@
1
+ import json
2
+ from typing import Any, cast
3
+
4
+ from pydantic_core import to_jsonable_python
5
+
6
+
7
+ def dump_metadata(metadata: dict[str, Any]) -> str | None:
8
+ """
9
+ Dump metadata to a JSON string.
10
+ We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
11
+ """
12
+ if not metadata:
13
+ return None
14
+ metadata_obj = to_jsonable_python(metadata)
15
+ text = json.dumps(metadata_obj, indent=2)
16
+ return text.strip()
17
+
18
+
19
+ def deep_merge_metadata(destination: dict[str, Any], source: dict[str, Any]) -> dict[str, Any]:
20
+ """
21
+ Recursively merge metadata dictionaries in-place.
22
+
23
+ Nested dictionaries are merged to preserve existing keys while allowing
24
+ later values to override earlier ones.
25
+ """
26
+ for key, value in source.items():
27
+ dest_value = destination.get(key)
28
+ if isinstance(dest_value, dict) and isinstance(value, dict):
29
+ deep_merge_metadata(cast(dict[str, Any], dest_value), cast(dict[str, Any], value))
30
+ else:
31
+ destination[key] = value
32
+ return destination
@@ -253,6 +253,28 @@ class Docent:
253
253
 
254
254
  raise requests.HTTPError(f"HTTP {response.status_code}: {detail}", response=response)
255
255
 
256
+ def _post_with_retry(
257
+ self,
258
+ url: str,
259
+ max_retries: int = 3,
260
+ backoff_factor: float = 1.0,
261
+ **kwargs: Any,
262
+ ) -> requests.Response:
263
+ """POST with retries on 5xx errors."""
264
+ last_response: requests.Response | None = None
265
+ for attempt in range(max_retries + 1):
266
+ last_response = self._session.post(url, **kwargs)
267
+ if last_response.status_code < 500 or attempt == max_retries:
268
+ return last_response
269
+ wait = backoff_factor * (2**attempt)
270
+ self._logger.warning(
271
+ f"Server error {last_response.status_code} on POST {url}, "
272
+ f"retrying in {wait:.1f}s (attempt {attempt + 1}/{max_retries})"
273
+ )
274
+ time.sleep(wait)
275
+ assert last_response is not None
276
+ return last_response
277
+
256
278
  def _login(self, api_key: str):
257
279
  """Login with email/password to establish session."""
258
280
  self._session.headers.update({"Authorization": f"Bearer {api_key}"})
@@ -405,7 +427,7 @@ class Docent:
405
427
  else:
406
428
  raise ValueError(f"Unsupported compression '{compression}'")
407
429
 
408
- response = self._session.post(url, **request_kwargs)
430
+ response = self._post_with_retry(url, **request_kwargs)
409
431
  self._handle_response_errors(response)
410
432
 
411
433
  # Server returns 202 with job_id for async processing
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.47-alpha"
4
+ version = "0.1.49-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]
@@ -1,16 +0,0 @@
1
- import json
2
- from typing import Any
3
-
4
- from pydantic_core import to_jsonable_python
5
-
6
-
7
- def dump_metadata(metadata: dict[str, Any]) -> str | None:
8
- """
9
- Dump metadata to a JSON string.
10
- We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
11
- """
12
- if not metadata:
13
- return None
14
- metadata_obj = to_jsonable_python(metadata)
15
- text = json.dumps(metadata_obj, indent=2)
16
- return text.strip()