docent-python 0.1.57a0__tar.gz → 0.1.59a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/model_registry.py +37 -13
  3. docent_python-0.1.59a0/docent/_llm_util/providers/preference_types.py +268 -0
  4. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/reading.py +19 -1
  5. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/client.py +40 -95
  6. docent_python-0.1.59a0/docent/sdk/integrations/__init__.py +27 -0
  7. docent_python-0.1.59a0/docent/sdk/integrations/harbor.py +893 -0
  8. docent_python-0.1.59a0/docent/sdk/integrations/inspect.py +148 -0
  9. docent_python-0.1.59a0/docent/sdk/integrations/nemogym.py +611 -0
  10. docent_python-0.1.59a0/docent/sdk/integrations/util.py +84 -0
  11. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/llm_request.py +9 -2
  12. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/reading.py +16 -2
  13. docent_python-0.1.59a0/docent/sdk/util.py +16 -0
  14. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/pyproject.toml +1 -1
  15. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/uv.lock +149 -149
  16. docent_python-0.1.57a0/docent/_llm_util/providers/preference_types.py +0 -110
  17. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/.gitignore +0 -0
  18. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/LICENSE.md +0 -0
  19. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/README.md +0 -0
  20. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/__init__.py +0 -0
  21. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/__init__.py +0 -0
  22. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/__init__.py +0 -0
  23. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  24. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  25. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_cache.py +0 -0
  26. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_svc.py +0 -0
  27. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/__init__.py +0 -0
  28. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/anthropic.py +0 -0
  29. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/common.py +0 -0
  30. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/google.py +0 -0
  31. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openai.py +0 -0
  32. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openrouter.py +0 -0
  33. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  34. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_log_util/__init__.py +0 -0
  35. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_log_util/logger.py +0 -0
  36. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/__init__.py +0 -0
  37. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/_tiktoken_util.py +0 -0
  38. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/agent_run.py +0 -0
  39. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/__init__.py +0 -0
  40. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/content.py +0 -0
  41. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/message.py +0 -0
  42. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/response_format.py +0 -0
  43. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/tool.py +0 -0
  44. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/citation.py +0 -0
  45. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/feedback.py +0 -0
  46. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/formatted_objects.py +0 -0
  47. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/judge.py +0 -0
  48. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/metadata_util.py +0 -0
  49. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/regex.py +0 -0
  50. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/transcript.py +0 -0
  51. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/util.py +0 -0
  52. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/__init__.py +0 -0
  53. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/analysis.py +0 -0
  54. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/impl.py +0 -0
  55. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/runner.py +0 -0
  56. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/stats.py +0 -0
  57. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/types.py +0 -0
  58. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/forgiving_json.py +0 -0
  59. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.json +0 -0
  60. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.py +0 -0
  61. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/parse_output.py +0 -0
  62. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/template_formatter.py +0 -0
  63. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/voting.py +0 -0
  64. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/loaders/load_inspect.py +0 -0
  65. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/__init__.py +0 -0
  66. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/__main__.py +0 -0
  67. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/server.py +0 -0
  68. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/py.typed +0 -0
  69. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/__init__.py +0 -0
  70. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/load.py +0 -0
  71. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/log.eval +0 -0
  72. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/tb_airline.json +0 -0
  73. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/__init__.py +0 -0
  74. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/agent_run_writer.py +0 -0
  75. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/llm_context.py +0 -0
  76. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/trace.py +0 -0
  77. {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/trace_temp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.57a0
3
+ Version: 0.1.59a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  from dataclasses import dataclass
4
5
  from functools import lru_cache
5
6
  from typing import Optional
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
9
10
 
10
11
  logger = get_logger(__name__)
11
12
 
13
+ _CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
14
+
12
15
 
13
16
  """
14
17
  Values are USD per million tokens
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
34
37
  "gpt-5-chat-latest",
35
38
  ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
36
39
  ),
40
+ (
41
+ "gpt-5.4-mini",
42
+ ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
43
+ ),
44
+ (
45
+ "gpt-5.4",
46
+ ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
47
+ ),
37
48
  (
38
49
  "gpt-5-nano",
39
50
  ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
62
73
  "claude-sonnet-4-5",
63
74
  ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
64
75
  ),
65
- (
66
- "claude-sonnet-4-6",
67
- ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
68
- ),
69
- (
70
- "claude-opus-4-6",
71
- ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
72
- ),
73
- (
74
- "claude-haiku-4-5",
75
- ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
76
- ),
76
+ ("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
77
+ ("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
78
+ ("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
77
79
  (
78
80
  "claude-opus-4-5-20251101",
79
81
  ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
108
110
  context_window=1_048_576,
109
111
  ),
110
112
  ),
113
+ (
114
+ "gemini-3.1-pro-preview",
115
+ ModelInfo(
116
+ rate={"input": 2.00, "output": 12.00},
117
+ context_window=1_048_576,
118
+ ),
119
+ ),
111
120
  (
112
121
  "gemini-3-flash-preview",
113
122
  ModelInfo(
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
146
155
  ]
147
156
 
148
157
 
158
+ def normalize_model_name(model_name: str) -> str:
159
+ """Normalize provider-specific naming differences before registry lookup."""
160
+
161
+ return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
162
+
163
+
164
+ def model_names_match(expected: str, actual: str) -> bool:
165
+ """Match a configured model name against a provider-reported model string."""
166
+
167
+ normalized_expected = normalize_model_name(expected)
168
+ normalized_actual = normalize_model_name(actual)
169
+ return normalized_expected in normalized_actual
170
+
171
+
149
172
  @lru_cache(maxsize=None)
150
173
  def get_model_info(model_name: str) -> Optional[ModelInfo]:
174
+ normalized_model_name = normalize_model_name(model_name)
151
175
  for registry_model_name, info in sorted(
152
176
  _REGISTRY, key=lambda entry: len(entry[0]), reverse=True
153
177
  ):
154
- if registry_model_name in model_name:
178
+ if registry_model_name in normalized_model_name:
155
179
  return info
156
180
  return None
157
181
 
@@ -0,0 +1,268 @@
1
+ """Provides preferences of which LLM models to use for different Docent functions."""
2
+
3
+ from functools import cached_property
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, ConfigDict
7
+
8
+ from docent._llm_util.model_registry import get_context_window
9
+ from docent._log_util import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ class ModelOption(BaseModel):
15
+ """Configuration for a specific model from a provider. Not to be confused with ModelInfo.
16
+
17
+ Attributes:
18
+ provider: The name of the LLM provider (e.g., "openai", "anthropic").
19
+ model_name: The specific model to use from the provider.
20
+ reasoning_effort: Optional indication of computational effort to use.
21
+ """
22
+
23
+ model_config = ConfigDict(extra="ignore")
24
+
25
+ provider: str
26
+ model_name: str
27
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
28
+
29
+
30
+ class ModelOptionWithContext(BaseModel):
31
+ """Enhanced model option that includes context window information for frontend use.
32
+ Not to be confused with ModelInfo or ModelOption.
33
+
34
+ Attributes:
35
+ provider: The name of the LLM provider (e.g., "openai", "anthropic").
36
+ model_name: The specific model to use from the provider.
37
+ reasoning_effort: Optional indication of computational effort to use.
38
+ context_window: The context window size in tokens.
39
+ uses_byok: Whether this model would use the user's own API key.
40
+ """
41
+
42
+ provider: str
43
+ model_name: str
44
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
45
+ context_window: int
46
+ uses_byok: bool
47
+
48
+ @classmethod
49
+ def from_model_option(
50
+ cls, model_option: ModelOption, uses_byok: bool = False
51
+ ) -> "ModelOptionWithContext":
52
+ """Create a ModelOptionWithContext from a ModelOption.
53
+
54
+ Args:
55
+ model_option: The base model option
56
+ uses_byok: Whether this model requires bring-your-own-key
57
+
58
+ Returns:
59
+ ModelOptionWithContext with context window looked up from global mapping
60
+ """
61
+ context_window = get_context_window(model_option.model_name)
62
+
63
+ return cls(
64
+ provider=model_option.provider,
65
+ model_name=model_option.model_name,
66
+ reasoning_effort=model_option.reasoning_effort,
67
+ context_window=context_window,
68
+ uses_byok=uses_byok,
69
+ )
70
+
71
+
72
+ def merge_models_with_byok(
73
+ defaults: list[ModelOption],
74
+ byok: list[ModelOption],
75
+ api_keys: dict[str, str] | None,
76
+ ) -> list[ModelOptionWithContext]:
77
+ user_keys = api_keys or {}
78
+
79
+ merged: list[ModelOption] = list(defaults)
80
+ if user_keys:
81
+ merged.extend([m for m in byok if m.provider in user_keys])
82
+
83
+ return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
84
+
85
+
86
+ class PublicProviderPreferences(BaseModel):
87
+ @cached_property
88
+ def default_judge_models(self) -> list[ModelOption]:
89
+ """Judge models that any user can access without providing their own API key"""
90
+
91
+ return [
92
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
93
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
94
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
95
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
96
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
97
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
98
+ ModelOption(
99
+ provider="anthropic",
100
+ model_name="claude-haiku-4-5",
101
+ reasoning_effort="low",
102
+ ),
103
+ ModelOption(
104
+ provider="anthropic",
105
+ model_name="claude-haiku-4-5",
106
+ reasoning_effort="medium",
107
+ ),
108
+ ModelOption(
109
+ provider="anthropic",
110
+ model_name="claude-haiku-4-5",
111
+ reasoning_effort="high",
112
+ ),
113
+ ModelOption(
114
+ provider="anthropic",
115
+ model_name="claude-opus-4-6",
116
+ reasoning_effort="low",
117
+ ),
118
+ ModelOption(
119
+ provider="anthropic",
120
+ model_name="claude-opus-4-6",
121
+ reasoning_effort="medium",
122
+ ),
123
+ ModelOption(
124
+ provider="anthropic",
125
+ model_name="claude-opus-4-6",
126
+ reasoning_effort="high",
127
+ ),
128
+ ModelOption(
129
+ provider="anthropic",
130
+ model_name="claude-sonnet-4-6",
131
+ reasoning_effort="low",
132
+ ),
133
+ ModelOption(
134
+ provider="anthropic",
135
+ model_name="claude-sonnet-4-6",
136
+ reasoning_effort="medium",
137
+ ),
138
+ ModelOption(
139
+ provider="anthropic",
140
+ model_name="claude-sonnet-4-6",
141
+ reasoning_effort="high",
142
+ ),
143
+ ModelOption(
144
+ provider="google",
145
+ model_name="gemini-3-flash-preview",
146
+ reasoning_effort="low",
147
+ ),
148
+ ModelOption(
149
+ provider="google",
150
+ model_name="gemini-3-flash-preview",
151
+ reasoning_effort="medium",
152
+ ),
153
+ ModelOption(
154
+ provider="google",
155
+ model_name="gemini-3-flash-preview",
156
+ reasoning_effort="high",
157
+ ),
158
+ ModelOption(
159
+ provider="google",
160
+ model_name="gemini-3.1-pro-preview",
161
+ reasoning_effort="low",
162
+ ),
163
+ ModelOption(
164
+ provider="google",
165
+ model_name="gemini-3.1-pro-preview",
166
+ reasoning_effort="medium",
167
+ ),
168
+ ModelOption(
169
+ provider="google",
170
+ model_name="gemini-3.1-pro-preview",
171
+ reasoning_effort="high",
172
+ ),
173
+ # Open Router equivalents
174
+ ModelOption(
175
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
176
+ ),
177
+ ModelOption(
178
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
179
+ ),
180
+ ModelOption(
181
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
182
+ ),
183
+ ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
184
+ ModelOption(
185
+ provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
186
+ ),
187
+ ModelOption(
188
+ provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
189
+ ),
190
+ ModelOption(
191
+ provider="openrouter",
192
+ model_name="anthropic/claude-haiku-4.5",
193
+ reasoning_effort="low",
194
+ ),
195
+ ModelOption(
196
+ provider="openrouter",
197
+ model_name="anthropic/claude-haiku-4.5",
198
+ reasoning_effort="medium",
199
+ ),
200
+ ModelOption(
201
+ provider="openrouter",
202
+ model_name="anthropic/claude-haiku-4.5",
203
+ reasoning_effort="high",
204
+ ),
205
+ ModelOption(
206
+ provider="openrouter",
207
+ model_name="anthropic/claude-opus-4.6",
208
+ reasoning_effort="low",
209
+ ),
210
+ ModelOption(
211
+ provider="openrouter",
212
+ model_name="anthropic/claude-opus-4.6",
213
+ reasoning_effort="medium",
214
+ ),
215
+ ModelOption(
216
+ provider="openrouter",
217
+ model_name="anthropic/claude-opus-4.6",
218
+ reasoning_effort="high",
219
+ ),
220
+ ModelOption(
221
+ provider="openrouter",
222
+ model_name="anthropic/claude-sonnet-4.6",
223
+ reasoning_effort="low",
224
+ ),
225
+ ModelOption(
226
+ provider="openrouter",
227
+ model_name="anthropic/claude-sonnet-4.6",
228
+ reasoning_effort="medium",
229
+ ),
230
+ ModelOption(
231
+ provider="openrouter",
232
+ model_name="anthropic/claude-sonnet-4.6",
233
+ reasoning_effort="high",
234
+ ),
235
+ ModelOption(
236
+ provider="openrouter",
237
+ model_name="google/gemini-3-flash-preview",
238
+ reasoning_effort="low",
239
+ ),
240
+ ModelOption(
241
+ provider="openrouter",
242
+ model_name="google/gemini-3-flash-preview",
243
+ reasoning_effort="medium",
244
+ ),
245
+ ModelOption(
246
+ provider="openrouter",
247
+ model_name="google/gemini-3-flash-preview",
248
+ reasoning_effort="high",
249
+ ),
250
+ ModelOption(
251
+ provider="openrouter",
252
+ model_name="google/gemini-3.1-pro-preview",
253
+ reasoning_effort="low",
254
+ ),
255
+ ModelOption(
256
+ provider="openrouter",
257
+ model_name="google/gemini-3.1-pro-preview",
258
+ reasoning_effort="medium",
259
+ ),
260
+ ModelOption(
261
+ provider="openrouter",
262
+ model_name="google/gemini-3.1-pro-preview",
263
+ reasoning_effort="high",
264
+ ),
265
+ ]
266
+
267
+
268
+ PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()
@@ -185,6 +185,25 @@ class ReadingStep(BaseModel):
185
185
  approved_at: datetime | None = None
186
186
  submitted_at: datetime | None = None
187
187
 
188
+ def to_submission(self, *, dql_query: str | None = None) -> "ReadingStepSubmission":
189
+ """Convert to a ReadingStepSubmission for resolve_reading_entry.
190
+
191
+ Optionally overrides dql_query (e.g. after alias substitution).
192
+ """
193
+ return ReadingStepSubmission(
194
+ alias=self.alias,
195
+ name=self.name,
196
+ model=self.model,
197
+ output_schema=self.output_schema,
198
+ max_new_tokens=self.max_new_tokens,
199
+ user_metadata=self.user_metadata,
200
+ prompt_template_segments=self.prompt_template_segments,
201
+ context_config=self.context_config,
202
+ dql_query=dql_query if dql_query is not None else self.dql_query,
203
+ source_reading_preset_id=self.source_reading_preset_id,
204
+ cache_mode=self.cache_mode,
205
+ )
206
+
188
207
 
189
208
  PlanStep: TypeAlias = BeginGroupStep | EndGroupStep | DqlOnlyStep | ReadingStep
190
209
 
@@ -285,7 +304,6 @@ class PlanSubmissionRequest(BaseModel):
285
304
  plan_name: str | None = None
286
305
  source_script: str | None = None
287
306
  entries: list[PlanStepSubmission]
288
- upsert_by_name: bool = False
289
307
 
290
308
 
291
309
  class PlanStepSubmissionStatus(BaseModel):
@@ -5,10 +5,9 @@ import os
5
5
  import sys
6
6
  import time
7
7
  import webbrowser
8
- from itertools import islice
9
8
  from pathlib import Path
10
9
  from textwrap import dedent
11
- from typing import IO, TYPE_CHECKING, Any, Iterable, Iterator, Literal, TypeVar, cast
10
+ from typing import IO, TYPE_CHECKING, Any, Iterator, Literal, cast
12
11
  from urllib.parse import urlsplit
13
12
 
14
13
  if TYPE_CHECKING:
@@ -46,7 +45,7 @@ from docent.data_models.reading import (
46
45
  StepGroupSubmission,
47
46
  )
48
47
  from docent.judges.util.meta_schema import validate_judge_result_schema
49
- from docent.loaders import load_inspect
48
+ from docent.sdk.integrations.inspect import ingest_inspect_directory
50
49
  from docent.sdk.llm_context import ContextItemRef, LLMContext, LLMContextItem, Prompt
51
50
  from docent.sdk.llm_request import ExternalAnalysisResult, LLMRequest
52
51
  from docent.sdk.reading import (
@@ -63,25 +62,17 @@ from docent.sdk.reading import (
63
62
  _PendingReading, # pyright: ignore[reportPrivateUsage]
64
63
  _PendingStepGroup, # pyright: ignore[reportPrivateUsage]
65
64
  )
65
+ from docent.sdk.util import batched as _batched
66
66
 
67
67
  MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024 # 100MB backend limit
68
68
  _AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
69
69
  _AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
70
+ batched = _batched
70
71
 
71
72
 
72
- _T = TypeVar("_T")
73
73
  _LOCAL_DOMAINS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
74
74
 
75
75
 
76
- def batched(iterable: Iterable[_T], n: int) -> Iterator[tuple[_T, ...]]:
77
- """Backport of itertools.batched for Python <3.12."""
78
- if n < 1:
79
- raise ValueError("n must be at least one")
80
- it = iter(iterable)
81
- while batch := tuple(islice(it, n)):
82
- yield batch
83
-
84
-
85
76
  def _domain_host(domain: str) -> str:
86
77
  """Extract normalized host from a domain string, handling optional port and IPv6 brackets."""
87
78
  normalized = domain.strip().lower()
@@ -373,7 +364,6 @@ class Docent:
373
364
  self._plan_name_sent: bool = False
374
365
  self._is_notebook: bool = False
375
366
  self._notebook_hook_registered: bool = False
376
- self._flushed_names: set[str] = set()
377
367
 
378
368
  self._register_notebook_hook()
379
369
 
@@ -2057,67 +2047,20 @@ class Docent:
2057
2047
  ValueError: If the path doesn't exist or isn't a directory.
2058
2048
  requests.exceptions.HTTPError: If any API requests fail.
2059
2049
  """
2060
- root_path = Path(fpath)
2061
- if not root_path.exists():
2062
- raise ValueError(f"Path does not exist: {fpath}")
2063
- if not root_path.is_dir():
2064
- raise ValueError(f"Path is not a directory: {fpath}")
2065
-
2066
- # Find all .eval files recursively
2067
- eval_files = list(root_path.rglob("*.eval"))
2068
-
2069
- if not eval_files:
2070
- self._logger.info(f"No .eval files found in {fpath}")
2071
- return
2072
-
2073
- self._logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
2074
-
2075
- total_runs_added = 0
2076
- batch_size = 100
2077
-
2078
- # Process each .eval file
2079
- for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
2080
- # Get total samples for progress tracking
2081
- total_samples = load_inspect.get_total_samples(eval_file, format="eval")
2082
-
2083
- if total_samples == 0:
2084
- self._logger.info(f"No samples found in {eval_file}")
2085
- continue
2086
-
2087
- # Load runs from file
2088
- with open(eval_file, "rb") as f:
2089
- _, runs_generator = load_inspect.runs_from_file(f, format="eval")
2090
-
2091
- # Process runs in batches
2092
- runs_from_file = 0
2093
- batches = batched(runs_generator, batch_size)
2094
-
2095
- with tqdm(
2096
- total=total_samples,
2097
- desc=f"Processing {eval_file.name}",
2098
- unit="runs",
2099
- leave=False,
2100
- ) as file_pbar:
2101
- for batch in batches:
2102
- batch_list = list(batch) # Convert generator batch to list
2103
- if not batch_list:
2104
- break
2105
-
2106
- # Add batch to collection
2107
- url = f"{self._api_url}/{collection_id}/agent_runs"
2108
- payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
2109
-
2110
- response = self._session.post(url, json=payload)
2111
- self._handle_response_errors(response)
2112
-
2113
- runs_from_file += len(batch_list)
2114
- file_pbar.update(len(batch_list))
2115
2050
 
2116
- total_runs_added += runs_from_file
2117
- self._logger.info(f"Added {runs_from_file} runs from {eval_file}")
2118
-
2119
- self._logger.info(
2120
- f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
2051
+ def _upload_agent_run_batch(agent_runs: list[AgentRun]) -> None:
2052
+ url = f"{self._api_url}/{collection_id}/agent_runs"
2053
+ payload = {
2054
+ "agent_runs": [agent_run.model_dump(mode="json") for agent_run in agent_runs]
2055
+ }
2056
+ response = self._session.post(url, json=payload)
2057
+ self._handle_response_errors(response)
2058
+
2059
+ ingest_inspect_directory(
2060
+ collection_id,
2061
+ fpath,
2062
+ upload_agent_run_batch=_upload_agent_run_batch,
2063
+ logger=self._logger,
2121
2064
  )
2122
2065
 
2123
2066
  def start_chat(
@@ -2279,7 +2222,10 @@ class Docent:
2279
2222
  output_schema: dict[str, Any] | None = None,
2280
2223
  max_concurrency: int | None = None,
2281
2224
  ) -> dict[str, Any]:
2282
- """Submit LLM requests for processing.
2225
+ """
2226
+ Deprecated - use readings instead.
2227
+
2228
+ Submit LLM requests for processing.
2283
2229
 
2284
2230
  Creates a result set and submits requests for background LLM processing.
2285
2231
  Prints the result set URL and returns submission details.
@@ -2380,7 +2326,10 @@ class Docent:
2380
2326
  result_set_name: str | None = None,
2381
2327
  exists_ok: bool = False,
2382
2328
  ) -> dict[str, Any]:
2383
- """Submit pre-computed results directly.
2329
+ """
2330
+ Deprecated.
2331
+
2332
+ Submit pre-computed results directly.
2384
2333
 
2385
2334
  For use when you've run analysis locally (e.g., with a local LLM)
2386
2335
  and want to upload the results to Docent for viewing.
@@ -2431,7 +2380,10 @@ class Docent:
2431
2380
  collection_id: str,
2432
2381
  name_or_id: str,
2433
2382
  ) -> dict[str, Any]:
2434
- """Get a result set by name or ID.
2383
+ """
2384
+ Deprecated - use readings instead.
2385
+
2386
+ Get a result set by name or ID.
2435
2387
 
2436
2388
  Args:
2437
2389
  collection_id: ID of the Collection.
@@ -2456,7 +2408,10 @@ class Docent:
2456
2408
  with_auto_joins: bool = False,
2457
2409
  include_incomplete: bool = False,
2458
2410
  ) -> "pd.DataFrame":
2459
- """Get result set contents as a pandas DataFrame.
2411
+ """
2412
+ Deprecated - use readings instead.
2413
+
2414
+ Get result set contents as a pandas DataFrame.
2460
2415
 
2461
2416
  Args:
2462
2417
  collection_id: ID of the Collection.
@@ -2563,7 +2518,10 @@ class Docent:
2563
2518
  collection_id: str,
2564
2519
  name_or_id: str,
2565
2520
  ) -> str:
2566
- """Open a result set in the browser.
2521
+ """
2522
+ Deprecated - use readings instead.
2523
+
2524
+ Open a result set in the browser.
2567
2525
 
2568
2526
  Args:
2569
2527
  collection_id: ID of the Collection.
@@ -2665,14 +2623,6 @@ class Docent:
2665
2623
  return alias
2666
2624
 
2667
2625
  def _enqueue_pending(self, entry: PendingEntry) -> None:
2668
- """Add a pending entry, replacing any existing entry with the same name."""
2669
- name: str | None = getattr(entry, "name", None)
2670
- if name is not None:
2671
- for i, existing in enumerate(self._pending):
2672
- existing_name: str | None = getattr(existing, "name", None)
2673
- if existing_name == name:
2674
- self._pending[i] = entry
2675
- return
2676
2626
  self._pending.append(entry)
2677
2627
 
2678
2628
  def _register_atexit(self) -> None:
@@ -2900,11 +2850,13 @@ class Docent:
2900
2850
 
2901
2851
  param_name = seg.column_name
2902
2852
  param_type = seg.type_annotation or "unknown"
2853
+ # Unknown type means "defer to server-side inference"; false here does not
2854
+ # mean the caller explicitly declared a scalar placeholder.
2903
2855
  segments.append(
2904
2856
  {
2905
2857
  "param_name": param_name,
2906
2858
  "param_type": param_type,
2907
- "is_list": False,
2859
+ "is_list": seg.is_list_annotation if seg.type_annotation else False,
2908
2860
  }
2909
2861
  )
2910
2862
  if context_config and param_name not in param_configs:
@@ -3119,14 +3071,11 @@ class Docent:
3119
3071
  else None
3120
3072
  )
3121
3073
 
3122
- upsert_by_name = bool(self._flushed_names)
3123
-
3124
3074
  request_body = PlanSubmissionRequest(
3125
3075
  plan_id=self._plan_id,
3126
3076
  plan_name=plan_name,
3127
3077
  source_script=source_script,
3128
3078
  entries=entries,
3129
- upsert_by_name=upsert_by_name,
3130
3079
  )
3131
3080
  is_first_flush_for_plan = self._plan_id is None
3132
3081
 
@@ -3141,10 +3090,6 @@ class Docent:
3141
3090
  self._flushed_collection_id = collection_id
3142
3091
  self._plan_name_sent = True
3143
3092
 
3144
- for p in self._pending:
3145
- entry_name: str | None = getattr(p, "name", None)
3146
- if entry_name is not None:
3147
- self._flushed_names.add(entry_name)
3148
3093
  self._pending.clear()
3149
3094
 
3150
3095
  for status_entry in result.get("entry_statuses", []):
@@ -0,0 +1,27 @@
1
+ """Integrations for converting and ingesting external trace formats."""
2
+
3
+ from docent.sdk.integrations.harbor import (
4
+ convert_atif_to_agent_run,
5
+ convert_harbor_directory_to_agent_runs,
6
+ convert_harbor_trial_to_agent_run,
7
+ )
8
+ from docent.sdk.integrations.inspect import (
9
+ convert_inspect_directory_to_agent_runs,
10
+ convert_inspect_eval_file_to_agent_runs,
11
+ ingest_inspect_directory,
12
+ )
13
+ from docent.sdk.integrations.nemogym import (
14
+ convert_nemogym_jsonl_file_to_agent_runs,
15
+ convert_nemogym_rollout_to_agent_run,
16
+ )
17
+
18
+ __all__ = [
19
+ "convert_atif_to_agent_run",
20
+ "convert_harbor_directory_to_agent_runs",
21
+ "convert_harbor_trial_to_agent_run",
22
+ "convert_inspect_directory_to_agent_runs",
23
+ "convert_inspect_eval_file_to_agent_runs",
24
+ "convert_nemogym_jsonl_file_to_agent_runs",
25
+ "convert_nemogym_rollout_to_agent_run",
26
+ "ingest_inspect_directory",
27
+ ]