docent-python 0.1.57a0__tar.gz → 0.1.59a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/PKG-INFO +1 -1
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/model_registry.py +37 -13
- docent_python-0.1.59a0/docent/_llm_util/providers/preference_types.py +268 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/reading.py +19 -1
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/client.py +40 -95
- docent_python-0.1.59a0/docent/sdk/integrations/__init__.py +27 -0
- docent_python-0.1.59a0/docent/sdk/integrations/harbor.py +893 -0
- docent_python-0.1.59a0/docent/sdk/integrations/inspect.py +148 -0
- docent_python-0.1.59a0/docent/sdk/integrations/nemogym.py +611 -0
- docent_python-0.1.59a0/docent/sdk/integrations/util.py +84 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/llm_request.py +9 -2
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/reading.py +16 -2
- docent_python-0.1.59a0/docent/sdk/util.py +16 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/pyproject.toml +1 -1
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/uv.lock +149 -149
- docent_python-0.1.57a0/docent/_llm_util/providers/preference_types.py +0 -110
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/.gitignore +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/LICENSE.md +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/README.md +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_svc.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/anthropic.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/google.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openai.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openrouter.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/agent_run.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/feedback.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/types.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/mcp/server.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/py.typed +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/llm_context.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/trace.py +0 -0
- {docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/trace_temp.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import Optional
|
|
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
|
|
|
9
10
|
|
|
10
11
|
logger = get_logger(__name__)
|
|
11
12
|
|
|
13
|
+
_CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
"""
|
|
14
17
|
Values are USD per million tokens
|
|
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
34
37
|
"gpt-5-chat-latest",
|
|
35
38
|
ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
|
|
36
39
|
),
|
|
40
|
+
(
|
|
41
|
+
"gpt-5.4-mini",
|
|
42
|
+
ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
|
|
43
|
+
),
|
|
44
|
+
(
|
|
45
|
+
"gpt-5.4",
|
|
46
|
+
ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
|
|
47
|
+
),
|
|
37
48
|
(
|
|
38
49
|
"gpt-5-nano",
|
|
39
50
|
ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
|
|
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
62
73
|
"claude-sonnet-4-5",
|
|
63
74
|
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
|
|
64
75
|
),
|
|
65
|
-
(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
),
|
|
69
|
-
(
|
|
70
|
-
"claude-opus-4-6",
|
|
71
|
-
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
72
|
-
),
|
|
73
|
-
(
|
|
74
|
-
"claude-haiku-4-5",
|
|
75
|
-
ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
|
|
76
|
-
),
|
|
76
|
+
("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
|
|
77
|
+
("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
|
|
78
|
+
("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
|
|
77
79
|
(
|
|
78
80
|
"claude-opus-4-5-20251101",
|
|
79
81
|
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
108
110
|
context_window=1_048_576,
|
|
109
111
|
),
|
|
110
112
|
),
|
|
113
|
+
(
|
|
114
|
+
"gemini-3.1-pro-preview",
|
|
115
|
+
ModelInfo(
|
|
116
|
+
rate={"input": 2.00, "output": 12.00},
|
|
117
|
+
context_window=1_048_576,
|
|
118
|
+
),
|
|
119
|
+
),
|
|
111
120
|
(
|
|
112
121
|
"gemini-3-flash-preview",
|
|
113
122
|
ModelInfo(
|
|
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
146
155
|
]
|
|
147
156
|
|
|
148
157
|
|
|
158
|
+
def normalize_model_name(model_name: str) -> str:
|
|
159
|
+
"""Normalize provider-specific naming differences before registry lookup."""
|
|
160
|
+
|
|
161
|
+
return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def model_names_match(expected: str, actual: str) -> bool:
|
|
165
|
+
"""Match a configured model name against a provider-reported model string."""
|
|
166
|
+
|
|
167
|
+
normalized_expected = normalize_model_name(expected)
|
|
168
|
+
normalized_actual = normalize_model_name(actual)
|
|
169
|
+
return normalized_expected in normalized_actual
|
|
170
|
+
|
|
171
|
+
|
|
149
172
|
@lru_cache(maxsize=None)
|
|
150
173
|
def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
174
|
+
normalized_model_name = normalize_model_name(model_name)
|
|
151
175
|
for registry_model_name, info in sorted(
|
|
152
176
|
_REGISTRY, key=lambda entry: len(entry[0]), reverse=True
|
|
153
177
|
):
|
|
154
|
-
if registry_model_name in
|
|
178
|
+
if registry_model_name in normalized_model_name:
|
|
155
179
|
return info
|
|
156
180
|
return None
|
|
157
181
|
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Provides preferences of which LLM models to use for different Docent functions."""
|
|
2
|
+
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
|
7
|
+
|
|
8
|
+
from docent._llm_util.model_registry import get_context_window
|
|
9
|
+
from docent._log_util import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelOption(BaseModel):
|
|
15
|
+
"""Configuration for a specific model from a provider. Not to be confused with ModelInfo.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
19
|
+
model_name: The specific model to use from the provider.
|
|
20
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_config = ConfigDict(extra="ignore")
|
|
24
|
+
|
|
25
|
+
provider: str
|
|
26
|
+
model_name: str
|
|
27
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelOptionWithContext(BaseModel):
|
|
31
|
+
"""Enhanced model option that includes context window information for frontend use.
|
|
32
|
+
Not to be confused with ModelInfo or ModelOption.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
36
|
+
model_name: The specific model to use from the provider.
|
|
37
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
38
|
+
context_window: The context window size in tokens.
|
|
39
|
+
uses_byok: Whether this model would use the user's own API key.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
provider: str
|
|
43
|
+
model_name: str
|
|
44
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
45
|
+
context_window: int
|
|
46
|
+
uses_byok: bool
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_model_option(
|
|
50
|
+
cls, model_option: ModelOption, uses_byok: bool = False
|
|
51
|
+
) -> "ModelOptionWithContext":
|
|
52
|
+
"""Create a ModelOptionWithContext from a ModelOption.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model_option: The base model option
|
|
56
|
+
uses_byok: Whether this model requires bring-your-own-key
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
ModelOptionWithContext with context window looked up from global mapping
|
|
60
|
+
"""
|
|
61
|
+
context_window = get_context_window(model_option.model_name)
|
|
62
|
+
|
|
63
|
+
return cls(
|
|
64
|
+
provider=model_option.provider,
|
|
65
|
+
model_name=model_option.model_name,
|
|
66
|
+
reasoning_effort=model_option.reasoning_effort,
|
|
67
|
+
context_window=context_window,
|
|
68
|
+
uses_byok=uses_byok,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def merge_models_with_byok(
|
|
73
|
+
defaults: list[ModelOption],
|
|
74
|
+
byok: list[ModelOption],
|
|
75
|
+
api_keys: dict[str, str] | None,
|
|
76
|
+
) -> list[ModelOptionWithContext]:
|
|
77
|
+
user_keys = api_keys or {}
|
|
78
|
+
|
|
79
|
+
merged: list[ModelOption] = list(defaults)
|
|
80
|
+
if user_keys:
|
|
81
|
+
merged.extend([m for m in byok if m.provider in user_keys])
|
|
82
|
+
|
|
83
|
+
return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class PublicProviderPreferences(BaseModel):
|
|
87
|
+
@cached_property
|
|
88
|
+
def default_judge_models(self) -> list[ModelOption]:
|
|
89
|
+
"""Judge models that any user can access without providing their own API key"""
|
|
90
|
+
|
|
91
|
+
return [
|
|
92
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
|
|
93
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
|
|
94
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
|
|
95
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
|
|
96
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
|
|
97
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
|
|
98
|
+
ModelOption(
|
|
99
|
+
provider="anthropic",
|
|
100
|
+
model_name="claude-haiku-4-5",
|
|
101
|
+
reasoning_effort="low",
|
|
102
|
+
),
|
|
103
|
+
ModelOption(
|
|
104
|
+
provider="anthropic",
|
|
105
|
+
model_name="claude-haiku-4-5",
|
|
106
|
+
reasoning_effort="medium",
|
|
107
|
+
),
|
|
108
|
+
ModelOption(
|
|
109
|
+
provider="anthropic",
|
|
110
|
+
model_name="claude-haiku-4-5",
|
|
111
|
+
reasoning_effort="high",
|
|
112
|
+
),
|
|
113
|
+
ModelOption(
|
|
114
|
+
provider="anthropic",
|
|
115
|
+
model_name="claude-opus-4-6",
|
|
116
|
+
reasoning_effort="low",
|
|
117
|
+
),
|
|
118
|
+
ModelOption(
|
|
119
|
+
provider="anthropic",
|
|
120
|
+
model_name="claude-opus-4-6",
|
|
121
|
+
reasoning_effort="medium",
|
|
122
|
+
),
|
|
123
|
+
ModelOption(
|
|
124
|
+
provider="anthropic",
|
|
125
|
+
model_name="claude-opus-4-6",
|
|
126
|
+
reasoning_effort="high",
|
|
127
|
+
),
|
|
128
|
+
ModelOption(
|
|
129
|
+
provider="anthropic",
|
|
130
|
+
model_name="claude-sonnet-4-6",
|
|
131
|
+
reasoning_effort="low",
|
|
132
|
+
),
|
|
133
|
+
ModelOption(
|
|
134
|
+
provider="anthropic",
|
|
135
|
+
model_name="claude-sonnet-4-6",
|
|
136
|
+
reasoning_effort="medium",
|
|
137
|
+
),
|
|
138
|
+
ModelOption(
|
|
139
|
+
provider="anthropic",
|
|
140
|
+
model_name="claude-sonnet-4-6",
|
|
141
|
+
reasoning_effort="high",
|
|
142
|
+
),
|
|
143
|
+
ModelOption(
|
|
144
|
+
provider="google",
|
|
145
|
+
model_name="gemini-3-flash-preview",
|
|
146
|
+
reasoning_effort="low",
|
|
147
|
+
),
|
|
148
|
+
ModelOption(
|
|
149
|
+
provider="google",
|
|
150
|
+
model_name="gemini-3-flash-preview",
|
|
151
|
+
reasoning_effort="medium",
|
|
152
|
+
),
|
|
153
|
+
ModelOption(
|
|
154
|
+
provider="google",
|
|
155
|
+
model_name="gemini-3-flash-preview",
|
|
156
|
+
reasoning_effort="high",
|
|
157
|
+
),
|
|
158
|
+
ModelOption(
|
|
159
|
+
provider="google",
|
|
160
|
+
model_name="gemini-3.1-pro-preview",
|
|
161
|
+
reasoning_effort="low",
|
|
162
|
+
),
|
|
163
|
+
ModelOption(
|
|
164
|
+
provider="google",
|
|
165
|
+
model_name="gemini-3.1-pro-preview",
|
|
166
|
+
reasoning_effort="medium",
|
|
167
|
+
),
|
|
168
|
+
ModelOption(
|
|
169
|
+
provider="google",
|
|
170
|
+
model_name="gemini-3.1-pro-preview",
|
|
171
|
+
reasoning_effort="high",
|
|
172
|
+
),
|
|
173
|
+
# Open Router equivalents
|
|
174
|
+
ModelOption(
|
|
175
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
|
|
176
|
+
),
|
|
177
|
+
ModelOption(
|
|
178
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
|
|
179
|
+
),
|
|
180
|
+
ModelOption(
|
|
181
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
|
|
182
|
+
),
|
|
183
|
+
ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
|
|
184
|
+
ModelOption(
|
|
185
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
|
|
186
|
+
),
|
|
187
|
+
ModelOption(
|
|
188
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
|
|
189
|
+
),
|
|
190
|
+
ModelOption(
|
|
191
|
+
provider="openrouter",
|
|
192
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
193
|
+
reasoning_effort="low",
|
|
194
|
+
),
|
|
195
|
+
ModelOption(
|
|
196
|
+
provider="openrouter",
|
|
197
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
198
|
+
reasoning_effort="medium",
|
|
199
|
+
),
|
|
200
|
+
ModelOption(
|
|
201
|
+
provider="openrouter",
|
|
202
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
203
|
+
reasoning_effort="high",
|
|
204
|
+
),
|
|
205
|
+
ModelOption(
|
|
206
|
+
provider="openrouter",
|
|
207
|
+
model_name="anthropic/claude-opus-4.6",
|
|
208
|
+
reasoning_effort="low",
|
|
209
|
+
),
|
|
210
|
+
ModelOption(
|
|
211
|
+
provider="openrouter",
|
|
212
|
+
model_name="anthropic/claude-opus-4.6",
|
|
213
|
+
reasoning_effort="medium",
|
|
214
|
+
),
|
|
215
|
+
ModelOption(
|
|
216
|
+
provider="openrouter",
|
|
217
|
+
model_name="anthropic/claude-opus-4.6",
|
|
218
|
+
reasoning_effort="high",
|
|
219
|
+
),
|
|
220
|
+
ModelOption(
|
|
221
|
+
provider="openrouter",
|
|
222
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
223
|
+
reasoning_effort="low",
|
|
224
|
+
),
|
|
225
|
+
ModelOption(
|
|
226
|
+
provider="openrouter",
|
|
227
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
228
|
+
reasoning_effort="medium",
|
|
229
|
+
),
|
|
230
|
+
ModelOption(
|
|
231
|
+
provider="openrouter",
|
|
232
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
233
|
+
reasoning_effort="high",
|
|
234
|
+
),
|
|
235
|
+
ModelOption(
|
|
236
|
+
provider="openrouter",
|
|
237
|
+
model_name="google/gemini-3-flash-preview",
|
|
238
|
+
reasoning_effort="low",
|
|
239
|
+
),
|
|
240
|
+
ModelOption(
|
|
241
|
+
provider="openrouter",
|
|
242
|
+
model_name="google/gemini-3-flash-preview",
|
|
243
|
+
reasoning_effort="medium",
|
|
244
|
+
),
|
|
245
|
+
ModelOption(
|
|
246
|
+
provider="openrouter",
|
|
247
|
+
model_name="google/gemini-3-flash-preview",
|
|
248
|
+
reasoning_effort="high",
|
|
249
|
+
),
|
|
250
|
+
ModelOption(
|
|
251
|
+
provider="openrouter",
|
|
252
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
253
|
+
reasoning_effort="low",
|
|
254
|
+
),
|
|
255
|
+
ModelOption(
|
|
256
|
+
provider="openrouter",
|
|
257
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
258
|
+
reasoning_effort="medium",
|
|
259
|
+
),
|
|
260
|
+
ModelOption(
|
|
261
|
+
provider="openrouter",
|
|
262
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
263
|
+
reasoning_effort="high",
|
|
264
|
+
),
|
|
265
|
+
]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()
|
|
@@ -185,6 +185,25 @@ class ReadingStep(BaseModel):
|
|
|
185
185
|
approved_at: datetime | None = None
|
|
186
186
|
submitted_at: datetime | None = None
|
|
187
187
|
|
|
188
|
+
def to_submission(self, *, dql_query: str | None = None) -> "ReadingStepSubmission":
|
|
189
|
+
"""Convert to a ReadingStepSubmission for resolve_reading_entry.
|
|
190
|
+
|
|
191
|
+
Optionally overrides dql_query (e.g. after alias substitution).
|
|
192
|
+
"""
|
|
193
|
+
return ReadingStepSubmission(
|
|
194
|
+
alias=self.alias,
|
|
195
|
+
name=self.name,
|
|
196
|
+
model=self.model,
|
|
197
|
+
output_schema=self.output_schema,
|
|
198
|
+
max_new_tokens=self.max_new_tokens,
|
|
199
|
+
user_metadata=self.user_metadata,
|
|
200
|
+
prompt_template_segments=self.prompt_template_segments,
|
|
201
|
+
context_config=self.context_config,
|
|
202
|
+
dql_query=dql_query if dql_query is not None else self.dql_query,
|
|
203
|
+
source_reading_preset_id=self.source_reading_preset_id,
|
|
204
|
+
cache_mode=self.cache_mode,
|
|
205
|
+
)
|
|
206
|
+
|
|
188
207
|
|
|
189
208
|
PlanStep: TypeAlias = BeginGroupStep | EndGroupStep | DqlOnlyStep | ReadingStep
|
|
190
209
|
|
|
@@ -285,7 +304,6 @@ class PlanSubmissionRequest(BaseModel):
|
|
|
285
304
|
plan_name: str | None = None
|
|
286
305
|
source_script: str | None = None
|
|
287
306
|
entries: list[PlanStepSubmission]
|
|
288
|
-
upsert_by_name: bool = False
|
|
289
307
|
|
|
290
308
|
|
|
291
309
|
class PlanStepSubmissionStatus(BaseModel):
|
|
@@ -5,10 +5,9 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
import time
|
|
7
7
|
import webbrowser
|
|
8
|
-
from itertools import islice
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
from textwrap import dedent
|
|
11
|
-
from typing import IO, TYPE_CHECKING, Any,
|
|
10
|
+
from typing import IO, TYPE_CHECKING, Any, Iterator, Literal, cast
|
|
12
11
|
from urllib.parse import urlsplit
|
|
13
12
|
|
|
14
13
|
if TYPE_CHECKING:
|
|
@@ -46,7 +45,7 @@ from docent.data_models.reading import (
|
|
|
46
45
|
StepGroupSubmission,
|
|
47
46
|
)
|
|
48
47
|
from docent.judges.util.meta_schema import validate_judge_result_schema
|
|
49
|
-
from docent.
|
|
48
|
+
from docent.sdk.integrations.inspect import ingest_inspect_directory
|
|
50
49
|
from docent.sdk.llm_context import ContextItemRef, LLMContext, LLMContextItem, Prompt
|
|
51
50
|
from docent.sdk.llm_request import ExternalAnalysisResult, LLMRequest
|
|
52
51
|
from docent.sdk.reading import (
|
|
@@ -63,25 +62,17 @@ from docent.sdk.reading import (
|
|
|
63
62
|
_PendingReading, # pyright: ignore[reportPrivateUsage]
|
|
64
63
|
_PendingStepGroup, # pyright: ignore[reportPrivateUsage]
|
|
65
64
|
)
|
|
65
|
+
from docent.sdk.util import batched as _batched
|
|
66
66
|
|
|
67
67
|
MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024 # 100MB backend limit
|
|
68
68
|
_AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
|
|
69
69
|
_AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
|
|
70
|
+
batched = _batched
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
_T = TypeVar("_T")
|
|
73
73
|
_LOCAL_DOMAINS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
def batched(iterable: Iterable[_T], n: int) -> Iterator[tuple[_T, ...]]:
|
|
77
|
-
"""Backport of itertools.batched for Python <3.12."""
|
|
78
|
-
if n < 1:
|
|
79
|
-
raise ValueError("n must be at least one")
|
|
80
|
-
it = iter(iterable)
|
|
81
|
-
while batch := tuple(islice(it, n)):
|
|
82
|
-
yield batch
|
|
83
|
-
|
|
84
|
-
|
|
85
76
|
def _domain_host(domain: str) -> str:
|
|
86
77
|
"""Extract normalized host from a domain string, handling optional port and IPv6 brackets."""
|
|
87
78
|
normalized = domain.strip().lower()
|
|
@@ -373,7 +364,6 @@ class Docent:
|
|
|
373
364
|
self._plan_name_sent: bool = False
|
|
374
365
|
self._is_notebook: bool = False
|
|
375
366
|
self._notebook_hook_registered: bool = False
|
|
376
|
-
self._flushed_names: set[str] = set()
|
|
377
367
|
|
|
378
368
|
self._register_notebook_hook()
|
|
379
369
|
|
|
@@ -2057,67 +2047,20 @@ class Docent:
|
|
|
2057
2047
|
ValueError: If the path doesn't exist or isn't a directory.
|
|
2058
2048
|
requests.exceptions.HTTPError: If any API requests fail.
|
|
2059
2049
|
"""
|
|
2060
|
-
root_path = Path(fpath)
|
|
2061
|
-
if not root_path.exists():
|
|
2062
|
-
raise ValueError(f"Path does not exist: {fpath}")
|
|
2063
|
-
if not root_path.is_dir():
|
|
2064
|
-
raise ValueError(f"Path is not a directory: {fpath}")
|
|
2065
|
-
|
|
2066
|
-
# Find all .eval files recursively
|
|
2067
|
-
eval_files = list(root_path.rglob("*.eval"))
|
|
2068
|
-
|
|
2069
|
-
if not eval_files:
|
|
2070
|
-
self._logger.info(f"No .eval files found in {fpath}")
|
|
2071
|
-
return
|
|
2072
|
-
|
|
2073
|
-
self._logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
|
|
2074
|
-
|
|
2075
|
-
total_runs_added = 0
|
|
2076
|
-
batch_size = 100
|
|
2077
|
-
|
|
2078
|
-
# Process each .eval file
|
|
2079
|
-
for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
|
|
2080
|
-
# Get total samples for progress tracking
|
|
2081
|
-
total_samples = load_inspect.get_total_samples(eval_file, format="eval")
|
|
2082
|
-
|
|
2083
|
-
if total_samples == 0:
|
|
2084
|
-
self._logger.info(f"No samples found in {eval_file}")
|
|
2085
|
-
continue
|
|
2086
|
-
|
|
2087
|
-
# Load runs from file
|
|
2088
|
-
with open(eval_file, "rb") as f:
|
|
2089
|
-
_, runs_generator = load_inspect.runs_from_file(f, format="eval")
|
|
2090
|
-
|
|
2091
|
-
# Process runs in batches
|
|
2092
|
-
runs_from_file = 0
|
|
2093
|
-
batches = batched(runs_generator, batch_size)
|
|
2094
|
-
|
|
2095
|
-
with tqdm(
|
|
2096
|
-
total=total_samples,
|
|
2097
|
-
desc=f"Processing {eval_file.name}",
|
|
2098
|
-
unit="runs",
|
|
2099
|
-
leave=False,
|
|
2100
|
-
) as file_pbar:
|
|
2101
|
-
for batch in batches:
|
|
2102
|
-
batch_list = list(batch) # Convert generator batch to list
|
|
2103
|
-
if not batch_list:
|
|
2104
|
-
break
|
|
2105
|
-
|
|
2106
|
-
# Add batch to collection
|
|
2107
|
-
url = f"{self._api_url}/{collection_id}/agent_runs"
|
|
2108
|
-
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
|
|
2109
|
-
|
|
2110
|
-
response = self._session.post(url, json=payload)
|
|
2111
|
-
self._handle_response_errors(response)
|
|
2112
|
-
|
|
2113
|
-
runs_from_file += len(batch_list)
|
|
2114
|
-
file_pbar.update(len(batch_list))
|
|
2115
2050
|
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2051
|
+
def _upload_agent_run_batch(agent_runs: list[AgentRun]) -> None:
|
|
2052
|
+
url = f"{self._api_url}/{collection_id}/agent_runs"
|
|
2053
|
+
payload = {
|
|
2054
|
+
"agent_runs": [agent_run.model_dump(mode="json") for agent_run in agent_runs]
|
|
2055
|
+
}
|
|
2056
|
+
response = self._session.post(url, json=payload)
|
|
2057
|
+
self._handle_response_errors(response)
|
|
2058
|
+
|
|
2059
|
+
ingest_inspect_directory(
|
|
2060
|
+
collection_id,
|
|
2061
|
+
fpath,
|
|
2062
|
+
upload_agent_run_batch=_upload_agent_run_batch,
|
|
2063
|
+
logger=self._logger,
|
|
2121
2064
|
)
|
|
2122
2065
|
|
|
2123
2066
|
def start_chat(
|
|
@@ -2279,7 +2222,10 @@ class Docent:
|
|
|
2279
2222
|
output_schema: dict[str, Any] | None = None,
|
|
2280
2223
|
max_concurrency: int | None = None,
|
|
2281
2224
|
) -> dict[str, Any]:
|
|
2282
|
-
"""
|
|
2225
|
+
"""
|
|
2226
|
+
Deprecated - use readings instead.
|
|
2227
|
+
|
|
2228
|
+
Submit LLM requests for processing.
|
|
2283
2229
|
|
|
2284
2230
|
Creates a result set and submits requests for background LLM processing.
|
|
2285
2231
|
Prints the result set URL and returns submission details.
|
|
@@ -2380,7 +2326,10 @@ class Docent:
|
|
|
2380
2326
|
result_set_name: str | None = None,
|
|
2381
2327
|
exists_ok: bool = False,
|
|
2382
2328
|
) -> dict[str, Any]:
|
|
2383
|
-
"""
|
|
2329
|
+
"""
|
|
2330
|
+
Deprecated.
|
|
2331
|
+
|
|
2332
|
+
Submit pre-computed results directly.
|
|
2384
2333
|
|
|
2385
2334
|
For use when you've run analysis locally (e.g., with a local LLM)
|
|
2386
2335
|
and want to upload the results to Docent for viewing.
|
|
@@ -2431,7 +2380,10 @@ class Docent:
|
|
|
2431
2380
|
collection_id: str,
|
|
2432
2381
|
name_or_id: str,
|
|
2433
2382
|
) -> dict[str, Any]:
|
|
2434
|
-
"""
|
|
2383
|
+
"""
|
|
2384
|
+
Deprecated - use readings instead.
|
|
2385
|
+
|
|
2386
|
+
Get a result set by name or ID.
|
|
2435
2387
|
|
|
2436
2388
|
Args:
|
|
2437
2389
|
collection_id: ID of the Collection.
|
|
@@ -2456,7 +2408,10 @@ class Docent:
|
|
|
2456
2408
|
with_auto_joins: bool = False,
|
|
2457
2409
|
include_incomplete: bool = False,
|
|
2458
2410
|
) -> "pd.DataFrame":
|
|
2459
|
-
"""
|
|
2411
|
+
"""
|
|
2412
|
+
Deprecated - use readings instead.
|
|
2413
|
+
|
|
2414
|
+
Get result set contents as a pandas DataFrame.
|
|
2460
2415
|
|
|
2461
2416
|
Args:
|
|
2462
2417
|
collection_id: ID of the Collection.
|
|
@@ -2563,7 +2518,10 @@ class Docent:
|
|
|
2563
2518
|
collection_id: str,
|
|
2564
2519
|
name_or_id: str,
|
|
2565
2520
|
) -> str:
|
|
2566
|
-
"""
|
|
2521
|
+
"""
|
|
2522
|
+
Deprecated - use readings instead.
|
|
2523
|
+
|
|
2524
|
+
Open a result set in the browser.
|
|
2567
2525
|
|
|
2568
2526
|
Args:
|
|
2569
2527
|
collection_id: ID of the Collection.
|
|
@@ -2665,14 +2623,6 @@ class Docent:
|
|
|
2665
2623
|
return alias
|
|
2666
2624
|
|
|
2667
2625
|
def _enqueue_pending(self, entry: PendingEntry) -> None:
|
|
2668
|
-
"""Add a pending entry, replacing any existing entry with the same name."""
|
|
2669
|
-
name: str | None = getattr(entry, "name", None)
|
|
2670
|
-
if name is not None:
|
|
2671
|
-
for i, existing in enumerate(self._pending):
|
|
2672
|
-
existing_name: str | None = getattr(existing, "name", None)
|
|
2673
|
-
if existing_name == name:
|
|
2674
|
-
self._pending[i] = entry
|
|
2675
|
-
return
|
|
2676
2626
|
self._pending.append(entry)
|
|
2677
2627
|
|
|
2678
2628
|
def _register_atexit(self) -> None:
|
|
@@ -2900,11 +2850,13 @@ class Docent:
|
|
|
2900
2850
|
|
|
2901
2851
|
param_name = seg.column_name
|
|
2902
2852
|
param_type = seg.type_annotation or "unknown"
|
|
2853
|
+
# Unknown type means "defer to server-side inference"; false here does not
|
|
2854
|
+
# mean the caller explicitly declared a scalar placeholder.
|
|
2903
2855
|
segments.append(
|
|
2904
2856
|
{
|
|
2905
2857
|
"param_name": param_name,
|
|
2906
2858
|
"param_type": param_type,
|
|
2907
|
-
"is_list": False,
|
|
2859
|
+
"is_list": seg.is_list_annotation if seg.type_annotation else False,
|
|
2908
2860
|
}
|
|
2909
2861
|
)
|
|
2910
2862
|
if context_config and param_name not in param_configs:
|
|
@@ -3119,14 +3071,11 @@ class Docent:
|
|
|
3119
3071
|
else None
|
|
3120
3072
|
)
|
|
3121
3073
|
|
|
3122
|
-
upsert_by_name = bool(self._flushed_names)
|
|
3123
|
-
|
|
3124
3074
|
request_body = PlanSubmissionRequest(
|
|
3125
3075
|
plan_id=self._plan_id,
|
|
3126
3076
|
plan_name=plan_name,
|
|
3127
3077
|
source_script=source_script,
|
|
3128
3078
|
entries=entries,
|
|
3129
|
-
upsert_by_name=upsert_by_name,
|
|
3130
3079
|
)
|
|
3131
3080
|
is_first_flush_for_plan = self._plan_id is None
|
|
3132
3081
|
|
|
@@ -3141,10 +3090,6 @@ class Docent:
|
|
|
3141
3090
|
self._flushed_collection_id = collection_id
|
|
3142
3091
|
self._plan_name_sent = True
|
|
3143
3092
|
|
|
3144
|
-
for p in self._pending:
|
|
3145
|
-
entry_name: str | None = getattr(p, "name", None)
|
|
3146
|
-
if entry_name is not None:
|
|
3147
|
-
self._flushed_names.add(entry_name)
|
|
3148
3093
|
self._pending.clear()
|
|
3149
3094
|
|
|
3150
3095
|
for status_entry in result.get("entry_statuses", []):
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Integrations for converting and ingesting external trace formats."""
|
|
2
|
+
|
|
3
|
+
from docent.sdk.integrations.harbor import (
|
|
4
|
+
convert_atif_to_agent_run,
|
|
5
|
+
convert_harbor_directory_to_agent_runs,
|
|
6
|
+
convert_harbor_trial_to_agent_run,
|
|
7
|
+
)
|
|
8
|
+
from docent.sdk.integrations.inspect import (
|
|
9
|
+
convert_inspect_directory_to_agent_runs,
|
|
10
|
+
convert_inspect_eval_file_to_agent_runs,
|
|
11
|
+
ingest_inspect_directory,
|
|
12
|
+
)
|
|
13
|
+
from docent.sdk.integrations.nemogym import (
|
|
14
|
+
convert_nemogym_jsonl_file_to_agent_runs,
|
|
15
|
+
convert_nemogym_rollout_to_agent_run,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"convert_atif_to_agent_run",
|
|
20
|
+
"convert_harbor_directory_to_agent_runs",
|
|
21
|
+
"convert_harbor_trial_to_agent_run",
|
|
22
|
+
"convert_inspect_directory_to_agent_runs",
|
|
23
|
+
"convert_inspect_eval_file_to_agent_runs",
|
|
24
|
+
"convert_nemogym_jsonl_file_to_agent_runs",
|
|
25
|
+
"convert_nemogym_rollout_to_agent_run",
|
|
26
|
+
"ingest_inspect_directory",
|
|
27
|
+
]
|