docent-python 0.1.58a0__tar.gz → 0.1.59a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/model_registry.py +37 -13
  3. docent_python-0.1.59a0/docent/_llm_util/providers/preference_types.py +268 -0
  4. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/client.py +17 -73
  5. docent_python-0.1.59a0/docent/sdk/integrations/__init__.py +27 -0
  6. docent_python-0.1.59a0/docent/sdk/integrations/harbor.py +893 -0
  7. docent_python-0.1.59a0/docent/sdk/integrations/inspect.py +148 -0
  8. docent_python-0.1.59a0/docent/sdk/integrations/nemogym.py +611 -0
  9. docent_python-0.1.59a0/docent/sdk/integrations/util.py +84 -0
  10. docent_python-0.1.59a0/docent/sdk/util.py +16 -0
  11. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/pyproject.toml +1 -1
  12. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/uv.lock +148 -148
  13. docent_python-0.1.58a0/docent/_llm_util/providers/preference_types.py +0 -110
  14. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/.gitignore +0 -0
  15. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/LICENSE.md +0 -0
  16. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/README.md +0 -0
  17. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/__init__.py +0 -0
  18. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/__init__.py +0 -0
  19. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/__init__.py +0 -0
  20. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  21. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  22. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_cache.py +0 -0
  23. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_svc.py +0 -0
  24. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/__init__.py +0 -0
  25. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/anthropic.py +0 -0
  26. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/common.py +0 -0
  27. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/google.py +0 -0
  28. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openai.py +0 -0
  29. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openrouter.py +0 -0
  30. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  31. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_log_util/__init__.py +0 -0
  32. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_log_util/logger.py +0 -0
  33. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/__init__.py +0 -0
  34. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/_tiktoken_util.py +0 -0
  35. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/agent_run.py +0 -0
  36. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/__init__.py +0 -0
  37. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/content.py +0 -0
  38. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/message.py +0 -0
  39. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/response_format.py +0 -0
  40. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/tool.py +0 -0
  41. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/citation.py +0 -0
  42. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/feedback.py +0 -0
  43. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/formatted_objects.py +0 -0
  44. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/judge.py +0 -0
  45. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/metadata_util.py +0 -0
  46. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/reading.py +0 -0
  47. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/regex.py +0 -0
  48. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/transcript.py +0 -0
  49. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/util.py +0 -0
  50. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/__init__.py +0 -0
  51. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/analysis.py +0 -0
  52. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/impl.py +0 -0
  53. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/runner.py +0 -0
  54. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/stats.py +0 -0
  55. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/types.py +0 -0
  56. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/forgiving_json.py +0 -0
  57. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.json +0 -0
  58. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.py +0 -0
  59. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/parse_output.py +0 -0
  60. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/template_formatter.py +0 -0
  61. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/voting.py +0 -0
  62. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/loaders/load_inspect.py +0 -0
  63. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/__init__.py +0 -0
  64. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/__main__.py +0 -0
  65. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/server.py +0 -0
  66. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/py.typed +0 -0
  67. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/__init__.py +0 -0
  68. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/load.py +0 -0
  69. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/log.eval +0 -0
  70. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/tb_airline.json +0 -0
  71. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/__init__.py +0 -0
  72. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/agent_run_writer.py +0 -0
  73. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/llm_context.py +0 -0
  74. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/llm_request.py +0 -0
  75. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/reading.py +0 -0
  76. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/trace.py +0 -0
  77. {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/trace_temp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.58a0
3
+ Version: 0.1.59a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  from dataclasses import dataclass
4
5
  from functools import lru_cache
5
6
  from typing import Optional
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
9
10
 
10
11
  logger = get_logger(__name__)
11
12
 
13
+ _CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
14
+
12
15
 
13
16
  """
14
17
  Values are USD per million tokens
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
34
37
  "gpt-5-chat-latest",
35
38
  ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
36
39
  ),
40
+ (
41
+ "gpt-5.4-mini",
42
+ ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
43
+ ),
44
+ (
45
+ "gpt-5.4",
46
+ ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
47
+ ),
37
48
  (
38
49
  "gpt-5-nano",
39
50
  ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
62
73
  "claude-sonnet-4-5",
63
74
  ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
64
75
  ),
65
- (
66
- "claude-sonnet-4-6",
67
- ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
68
- ),
69
- (
70
- "claude-opus-4-6",
71
- ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
72
- ),
73
- (
74
- "claude-haiku-4-5",
75
- ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
76
- ),
76
+ ("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
77
+ ("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
78
+ ("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
77
79
  (
78
80
  "claude-opus-4-5-20251101",
79
81
  ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
108
110
  context_window=1_048_576,
109
111
  ),
110
112
  ),
113
+ (
114
+ "gemini-3.1-pro-preview",
115
+ ModelInfo(
116
+ rate={"input": 2.00, "output": 12.00},
117
+ context_window=1_048_576,
118
+ ),
119
+ ),
111
120
  (
112
121
  "gemini-3-flash-preview",
113
122
  ModelInfo(
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
146
155
  ]
147
156
 
148
157
 
158
+ def normalize_model_name(model_name: str) -> str:
159
+ """Normalize provider-specific naming differences before registry lookup."""
160
+
161
+ return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
162
+
163
+
164
+ def model_names_match(expected: str, actual: str) -> bool:
165
+ """Match a configured model name against a provider-reported model string."""
166
+
167
+ normalized_expected = normalize_model_name(expected)
168
+ normalized_actual = normalize_model_name(actual)
169
+ return normalized_expected in normalized_actual
170
+
171
+
149
172
  @lru_cache(maxsize=None)
150
173
  def get_model_info(model_name: str) -> Optional[ModelInfo]:
174
+ normalized_model_name = normalize_model_name(model_name)
151
175
  for registry_model_name, info in sorted(
152
176
  _REGISTRY, key=lambda entry: len(entry[0]), reverse=True
153
177
  ):
154
- if registry_model_name in model_name:
178
+ if registry_model_name in normalized_model_name:
155
179
  return info
156
180
  return None
157
181
 
@@ -0,0 +1,268 @@
1
+ """Provides preferences of which LLM models to use for different Docent functions."""
2
+
3
+ from functools import cached_property
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, ConfigDict
7
+
8
+ from docent._llm_util.model_registry import get_context_window
9
+ from docent._log_util import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ class ModelOption(BaseModel):
15
+ """Configuration for a specific model from a provider. Not to be confused with ModelInfo.
16
+
17
+ Attributes:
18
+ provider: The name of the LLM provider (e.g., "openai", "anthropic").
19
+ model_name: The specific model to use from the provider.
20
+ reasoning_effort: Optional indication of computational effort to use.
21
+ """
22
+
23
+ model_config = ConfigDict(extra="ignore")
24
+
25
+ provider: str
26
+ model_name: str
27
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
28
+
29
+
30
+ class ModelOptionWithContext(BaseModel):
31
+ """Enhanced model option that includes context window information for frontend use.
32
+ Not to be confused with ModelInfo or ModelOption.
33
+
34
+ Attributes:
35
+ provider: The name of the LLM provider (e.g., "openai", "anthropic").
36
+ model_name: The specific model to use from the provider.
37
+ reasoning_effort: Optional indication of computational effort to use.
38
+ context_window: The context window size in tokens.
39
+ uses_byok: Whether this model would use the user's own API key.
40
+ """
41
+
42
+ provider: str
43
+ model_name: str
44
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
45
+ context_window: int
46
+ uses_byok: bool
47
+
48
+ @classmethod
49
+ def from_model_option(
50
+ cls, model_option: ModelOption, uses_byok: bool = False
51
+ ) -> "ModelOptionWithContext":
52
+ """Create a ModelOptionWithContext from a ModelOption.
53
+
54
+ Args:
55
+ model_option: The base model option
56
+ uses_byok: Whether this model requires bring-your-own-key
57
+
58
+ Returns:
59
+ ModelOptionWithContext with context window looked up from global mapping
60
+ """
61
+ context_window = get_context_window(model_option.model_name)
62
+
63
+ return cls(
64
+ provider=model_option.provider,
65
+ model_name=model_option.model_name,
66
+ reasoning_effort=model_option.reasoning_effort,
67
+ context_window=context_window,
68
+ uses_byok=uses_byok,
69
+ )
70
+
71
+
72
+ def merge_models_with_byok(
73
+ defaults: list[ModelOption],
74
+ byok: list[ModelOption],
75
+ api_keys: dict[str, str] | None,
76
+ ) -> list[ModelOptionWithContext]:
77
+ user_keys = api_keys or {}
78
+
79
+ merged: list[ModelOption] = list(defaults)
80
+ if user_keys:
81
+ merged.extend([m for m in byok if m.provider in user_keys])
82
+
83
+ return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
84
+
85
+
86
+ class PublicProviderPreferences(BaseModel):
87
+ @cached_property
88
+ def default_judge_models(self) -> list[ModelOption]:
89
+ """Judge models that any user can access without providing their own API key"""
90
+
91
+ return [
92
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
93
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
94
+ ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
95
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
96
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
97
+ ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
98
+ ModelOption(
99
+ provider="anthropic",
100
+ model_name="claude-haiku-4-5",
101
+ reasoning_effort="low",
102
+ ),
103
+ ModelOption(
104
+ provider="anthropic",
105
+ model_name="claude-haiku-4-5",
106
+ reasoning_effort="medium",
107
+ ),
108
+ ModelOption(
109
+ provider="anthropic",
110
+ model_name="claude-haiku-4-5",
111
+ reasoning_effort="high",
112
+ ),
113
+ ModelOption(
114
+ provider="anthropic",
115
+ model_name="claude-opus-4-6",
116
+ reasoning_effort="low",
117
+ ),
118
+ ModelOption(
119
+ provider="anthropic",
120
+ model_name="claude-opus-4-6",
121
+ reasoning_effort="medium",
122
+ ),
123
+ ModelOption(
124
+ provider="anthropic",
125
+ model_name="claude-opus-4-6",
126
+ reasoning_effort="high",
127
+ ),
128
+ ModelOption(
129
+ provider="anthropic",
130
+ model_name="claude-sonnet-4-6",
131
+ reasoning_effort="low",
132
+ ),
133
+ ModelOption(
134
+ provider="anthropic",
135
+ model_name="claude-sonnet-4-6",
136
+ reasoning_effort="medium",
137
+ ),
138
+ ModelOption(
139
+ provider="anthropic",
140
+ model_name="claude-sonnet-4-6",
141
+ reasoning_effort="high",
142
+ ),
143
+ ModelOption(
144
+ provider="google",
145
+ model_name="gemini-3-flash-preview",
146
+ reasoning_effort="low",
147
+ ),
148
+ ModelOption(
149
+ provider="google",
150
+ model_name="gemini-3-flash-preview",
151
+ reasoning_effort="medium",
152
+ ),
153
+ ModelOption(
154
+ provider="google",
155
+ model_name="gemini-3-flash-preview",
156
+ reasoning_effort="high",
157
+ ),
158
+ ModelOption(
159
+ provider="google",
160
+ model_name="gemini-3.1-pro-preview",
161
+ reasoning_effort="low",
162
+ ),
163
+ ModelOption(
164
+ provider="google",
165
+ model_name="gemini-3.1-pro-preview",
166
+ reasoning_effort="medium",
167
+ ),
168
+ ModelOption(
169
+ provider="google",
170
+ model_name="gemini-3.1-pro-preview",
171
+ reasoning_effort="high",
172
+ ),
173
+ # Open Router equivalents
174
+ ModelOption(
175
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
176
+ ),
177
+ ModelOption(
178
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
179
+ ),
180
+ ModelOption(
181
+ provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
182
+ ),
183
+ ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
184
+ ModelOption(
185
+ provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
186
+ ),
187
+ ModelOption(
188
+ provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
189
+ ),
190
+ ModelOption(
191
+ provider="openrouter",
192
+ model_name="anthropic/claude-haiku-4.5",
193
+ reasoning_effort="low",
194
+ ),
195
+ ModelOption(
196
+ provider="openrouter",
197
+ model_name="anthropic/claude-haiku-4.5",
198
+ reasoning_effort="medium",
199
+ ),
200
+ ModelOption(
201
+ provider="openrouter",
202
+ model_name="anthropic/claude-haiku-4.5",
203
+ reasoning_effort="high",
204
+ ),
205
+ ModelOption(
206
+ provider="openrouter",
207
+ model_name="anthropic/claude-opus-4.6",
208
+ reasoning_effort="low",
209
+ ),
210
+ ModelOption(
211
+ provider="openrouter",
212
+ model_name="anthropic/claude-opus-4.6",
213
+ reasoning_effort="medium",
214
+ ),
215
+ ModelOption(
216
+ provider="openrouter",
217
+ model_name="anthropic/claude-opus-4.6",
218
+ reasoning_effort="high",
219
+ ),
220
+ ModelOption(
221
+ provider="openrouter",
222
+ model_name="anthropic/claude-sonnet-4.6",
223
+ reasoning_effort="low",
224
+ ),
225
+ ModelOption(
226
+ provider="openrouter",
227
+ model_name="anthropic/claude-sonnet-4.6",
228
+ reasoning_effort="medium",
229
+ ),
230
+ ModelOption(
231
+ provider="openrouter",
232
+ model_name="anthropic/claude-sonnet-4.6",
233
+ reasoning_effort="high",
234
+ ),
235
+ ModelOption(
236
+ provider="openrouter",
237
+ model_name="google/gemini-3-flash-preview",
238
+ reasoning_effort="low",
239
+ ),
240
+ ModelOption(
241
+ provider="openrouter",
242
+ model_name="google/gemini-3-flash-preview",
243
+ reasoning_effort="medium",
244
+ ),
245
+ ModelOption(
246
+ provider="openrouter",
247
+ model_name="google/gemini-3-flash-preview",
248
+ reasoning_effort="high",
249
+ ),
250
+ ModelOption(
251
+ provider="openrouter",
252
+ model_name="google/gemini-3.1-pro-preview",
253
+ reasoning_effort="low",
254
+ ),
255
+ ModelOption(
256
+ provider="openrouter",
257
+ model_name="google/gemini-3.1-pro-preview",
258
+ reasoning_effort="medium",
259
+ ),
260
+ ModelOption(
261
+ provider="openrouter",
262
+ model_name="google/gemini-3.1-pro-preview",
263
+ reasoning_effort="high",
264
+ ),
265
+ ]
266
+
267
+
268
+ PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()
@@ -5,10 +5,9 @@ import os
5
5
  import sys
6
6
  import time
7
7
  import webbrowser
8
- from itertools import islice
9
8
  from pathlib import Path
10
9
  from textwrap import dedent
11
- from typing import IO, TYPE_CHECKING, Any, Iterable, Iterator, Literal, TypeVar, cast
10
+ from typing import IO, TYPE_CHECKING, Any, Iterator, Literal, cast
12
11
  from urllib.parse import urlsplit
13
12
 
14
13
  if TYPE_CHECKING:
@@ -46,7 +45,7 @@ from docent.data_models.reading import (
46
45
  StepGroupSubmission,
47
46
  )
48
47
  from docent.judges.util.meta_schema import validate_judge_result_schema
49
- from docent.loaders import load_inspect
48
+ from docent.sdk.integrations.inspect import ingest_inspect_directory
50
49
  from docent.sdk.llm_context import ContextItemRef, LLMContext, LLMContextItem, Prompt
51
50
  from docent.sdk.llm_request import ExternalAnalysisResult, LLMRequest
52
51
  from docent.sdk.reading import (
@@ -63,25 +62,17 @@ from docent.sdk.reading import (
63
62
  _PendingReading, # pyright: ignore[reportPrivateUsage]
64
63
  _PendingStepGroup, # pyright: ignore[reportPrivateUsage]
65
64
  )
65
+ from docent.sdk.util import batched as _batched
66
66
 
67
67
  MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024 # 100MB backend limit
68
68
  _AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
69
69
  _AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
70
+ batched = _batched
70
71
 
71
72
 
72
- _T = TypeVar("_T")
73
73
  _LOCAL_DOMAINS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
74
74
 
75
75
 
76
- def batched(iterable: Iterable[_T], n: int) -> Iterator[tuple[_T, ...]]:
77
- """Backport of itertools.batched for Python <3.12."""
78
- if n < 1:
79
- raise ValueError("n must be at least one")
80
- it = iter(iterable)
81
- while batch := tuple(islice(it, n)):
82
- yield batch
83
-
84
-
85
76
  def _domain_host(domain: str) -> str:
86
77
  """Extract normalized host from a domain string, handling optional port and IPv6 brackets."""
87
78
  normalized = domain.strip().lower()
@@ -2056,67 +2047,20 @@ class Docent:
2056
2047
  ValueError: If the path doesn't exist or isn't a directory.
2057
2048
  requests.exceptions.HTTPError: If any API requests fail.
2058
2049
  """
2059
- root_path = Path(fpath)
2060
- if not root_path.exists():
2061
- raise ValueError(f"Path does not exist: {fpath}")
2062
- if not root_path.is_dir():
2063
- raise ValueError(f"Path is not a directory: {fpath}")
2064
-
2065
- # Find all .eval files recursively
2066
- eval_files = list(root_path.rglob("*.eval"))
2067
-
2068
- if not eval_files:
2069
- self._logger.info(f"No .eval files found in {fpath}")
2070
- return
2071
-
2072
- self._logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
2073
-
2074
- total_runs_added = 0
2075
- batch_size = 100
2076
-
2077
- # Process each .eval file
2078
- for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
2079
- # Get total samples for progress tracking
2080
- total_samples = load_inspect.get_total_samples(eval_file, format="eval")
2081
-
2082
- if total_samples == 0:
2083
- self._logger.info(f"No samples found in {eval_file}")
2084
- continue
2085
2050
 
2086
- # Load runs from file
2087
- with open(eval_file, "rb") as f:
2088
- _, runs_generator = load_inspect.runs_from_file(f, format="eval")
2089
-
2090
- # Process runs in batches
2091
- runs_from_file = 0
2092
- batches = batched(runs_generator, batch_size)
2093
-
2094
- with tqdm(
2095
- total=total_samples,
2096
- desc=f"Processing {eval_file.name}",
2097
- unit="runs",
2098
- leave=False,
2099
- ) as file_pbar:
2100
- for batch in batches:
2101
- batch_list = list(batch) # Convert generator batch to list
2102
- if not batch_list:
2103
- break
2104
-
2105
- # Add batch to collection
2106
- url = f"{self._api_url}/{collection_id}/agent_runs"
2107
- payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
2108
-
2109
- response = self._session.post(url, json=payload)
2110
- self._handle_response_errors(response)
2111
-
2112
- runs_from_file += len(batch_list)
2113
- file_pbar.update(len(batch_list))
2114
-
2115
- total_runs_added += runs_from_file
2116
- self._logger.info(f"Added {runs_from_file} runs from {eval_file}")
2117
-
2118
- self._logger.info(
2119
- f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
2051
+ def _upload_agent_run_batch(agent_runs: list[AgentRun]) -> None:
2052
+ url = f"{self._api_url}/{collection_id}/agent_runs"
2053
+ payload = {
2054
+ "agent_runs": [agent_run.model_dump(mode="json") for agent_run in agent_runs]
2055
+ }
2056
+ response = self._session.post(url, json=payload)
2057
+ self._handle_response_errors(response)
2058
+
2059
+ ingest_inspect_directory(
2060
+ collection_id,
2061
+ fpath,
2062
+ upload_agent_run_batch=_upload_agent_run_batch,
2063
+ logger=self._logger,
2120
2064
  )
2121
2065
 
2122
2066
  def start_chat(
@@ -0,0 +1,27 @@
1
+ """Integrations for converting and ingesting external trace formats."""
2
+
3
+ from docent.sdk.integrations.harbor import (
4
+ convert_atif_to_agent_run,
5
+ convert_harbor_directory_to_agent_runs,
6
+ convert_harbor_trial_to_agent_run,
7
+ )
8
+ from docent.sdk.integrations.inspect import (
9
+ convert_inspect_directory_to_agent_runs,
10
+ convert_inspect_eval_file_to_agent_runs,
11
+ ingest_inspect_directory,
12
+ )
13
+ from docent.sdk.integrations.nemogym import (
14
+ convert_nemogym_jsonl_file_to_agent_runs,
15
+ convert_nemogym_rollout_to_agent_run,
16
+ )
17
+
18
+ __all__ = [
19
+ "convert_atif_to_agent_run",
20
+ "convert_harbor_directory_to_agent_runs",
21
+ "convert_harbor_trial_to_agent_run",
22
+ "convert_inspect_directory_to_agent_runs",
23
+ "convert_inspect_eval_file_to_agent_runs",
24
+ "convert_nemogym_jsonl_file_to_agent_runs",
25
+ "convert_nemogym_rollout_to_agent_run",
26
+ "ingest_inspect_directory",
27
+ ]