docent-python 0.1.58a0__tar.gz → 0.1.59a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/PKG-INFO +1 -1
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/model_registry.py +37 -13
- docent_python-0.1.59a0/docent/_llm_util/providers/preference_types.py +268 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/client.py +17 -73
- docent_python-0.1.59a0/docent/sdk/integrations/__init__.py +27 -0
- docent_python-0.1.59a0/docent/sdk/integrations/harbor.py +893 -0
- docent_python-0.1.59a0/docent/sdk/integrations/inspect.py +148 -0
- docent_python-0.1.59a0/docent/sdk/integrations/nemogym.py +611 -0
- docent_python-0.1.59a0/docent/sdk/integrations/util.py +84 -0
- docent_python-0.1.59a0/docent/sdk/util.py +16 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/pyproject.toml +1 -1
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/uv.lock +148 -148
- docent_python-0.1.58a0/docent/_llm_util/providers/preference_types.py +0 -110
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/.gitignore +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/LICENSE.md +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/README.md +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/llm_svc.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/anthropic.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/google.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openai.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/openrouter.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/agent_run.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/feedback.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/reading.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/types.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/mcp/server.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/py.typed +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/llm_context.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/sdk/reading.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/trace.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.59a0}/docent/trace_temp.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import Optional
|
|
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
|
|
|
9
10
|
|
|
10
11
|
logger = get_logger(__name__)
|
|
11
12
|
|
|
13
|
+
_CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
"""
|
|
14
17
|
Values are USD per million tokens
|
|
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
34
37
|
"gpt-5-chat-latest",
|
|
35
38
|
ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
|
|
36
39
|
),
|
|
40
|
+
(
|
|
41
|
+
"gpt-5.4-mini",
|
|
42
|
+
ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
|
|
43
|
+
),
|
|
44
|
+
(
|
|
45
|
+
"gpt-5.4",
|
|
46
|
+
ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
|
|
47
|
+
),
|
|
37
48
|
(
|
|
38
49
|
"gpt-5-nano",
|
|
39
50
|
ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
|
|
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
62
73
|
"claude-sonnet-4-5",
|
|
63
74
|
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
|
|
64
75
|
),
|
|
65
|
-
(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
),
|
|
69
|
-
(
|
|
70
|
-
"claude-opus-4-6",
|
|
71
|
-
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
72
|
-
),
|
|
73
|
-
(
|
|
74
|
-
"claude-haiku-4-5",
|
|
75
|
-
ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
|
|
76
|
-
),
|
|
76
|
+
("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
|
|
77
|
+
("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
|
|
78
|
+
("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
|
|
77
79
|
(
|
|
78
80
|
"claude-opus-4-5-20251101",
|
|
79
81
|
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
108
110
|
context_window=1_048_576,
|
|
109
111
|
),
|
|
110
112
|
),
|
|
113
|
+
(
|
|
114
|
+
"gemini-3.1-pro-preview",
|
|
115
|
+
ModelInfo(
|
|
116
|
+
rate={"input": 2.00, "output": 12.00},
|
|
117
|
+
context_window=1_048_576,
|
|
118
|
+
),
|
|
119
|
+
),
|
|
111
120
|
(
|
|
112
121
|
"gemini-3-flash-preview",
|
|
113
122
|
ModelInfo(
|
|
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
146
155
|
]
|
|
147
156
|
|
|
148
157
|
|
|
158
|
+
def normalize_model_name(model_name: str) -> str:
|
|
159
|
+
"""Normalize provider-specific naming differences before registry lookup."""
|
|
160
|
+
|
|
161
|
+
return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def model_names_match(expected: str, actual: str) -> bool:
|
|
165
|
+
"""Match a configured model name against a provider-reported model string."""
|
|
166
|
+
|
|
167
|
+
normalized_expected = normalize_model_name(expected)
|
|
168
|
+
normalized_actual = normalize_model_name(actual)
|
|
169
|
+
return normalized_expected in normalized_actual
|
|
170
|
+
|
|
171
|
+
|
|
149
172
|
@lru_cache(maxsize=None)
|
|
150
173
|
def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
174
|
+
normalized_model_name = normalize_model_name(model_name)
|
|
151
175
|
for registry_model_name, info in sorted(
|
|
152
176
|
_REGISTRY, key=lambda entry: len(entry[0]), reverse=True
|
|
153
177
|
):
|
|
154
|
-
if registry_model_name in
|
|
178
|
+
if registry_model_name in normalized_model_name:
|
|
155
179
|
return info
|
|
156
180
|
return None
|
|
157
181
|
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Provides preferences of which LLM models to use for different Docent functions."""
|
|
2
|
+
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
|
7
|
+
|
|
8
|
+
from docent._llm_util.model_registry import get_context_window
|
|
9
|
+
from docent._log_util import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelOption(BaseModel):
|
|
15
|
+
"""Configuration for a specific model from a provider. Not to be confused with ModelInfo.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
19
|
+
model_name: The specific model to use from the provider.
|
|
20
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_config = ConfigDict(extra="ignore")
|
|
24
|
+
|
|
25
|
+
provider: str
|
|
26
|
+
model_name: str
|
|
27
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelOptionWithContext(BaseModel):
|
|
31
|
+
"""Enhanced model option that includes context window information for frontend use.
|
|
32
|
+
Not to be confused with ModelInfo or ModelOption.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
36
|
+
model_name: The specific model to use from the provider.
|
|
37
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
38
|
+
context_window: The context window size in tokens.
|
|
39
|
+
uses_byok: Whether this model would use the user's own API key.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
provider: str
|
|
43
|
+
model_name: str
|
|
44
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
45
|
+
context_window: int
|
|
46
|
+
uses_byok: bool
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_model_option(
|
|
50
|
+
cls, model_option: ModelOption, uses_byok: bool = False
|
|
51
|
+
) -> "ModelOptionWithContext":
|
|
52
|
+
"""Create a ModelOptionWithContext from a ModelOption.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model_option: The base model option
|
|
56
|
+
uses_byok: Whether this model requires bring-your-own-key
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
ModelOptionWithContext with context window looked up from global mapping
|
|
60
|
+
"""
|
|
61
|
+
context_window = get_context_window(model_option.model_name)
|
|
62
|
+
|
|
63
|
+
return cls(
|
|
64
|
+
provider=model_option.provider,
|
|
65
|
+
model_name=model_option.model_name,
|
|
66
|
+
reasoning_effort=model_option.reasoning_effort,
|
|
67
|
+
context_window=context_window,
|
|
68
|
+
uses_byok=uses_byok,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def merge_models_with_byok(
|
|
73
|
+
defaults: list[ModelOption],
|
|
74
|
+
byok: list[ModelOption],
|
|
75
|
+
api_keys: dict[str, str] | None,
|
|
76
|
+
) -> list[ModelOptionWithContext]:
|
|
77
|
+
user_keys = api_keys or {}
|
|
78
|
+
|
|
79
|
+
merged: list[ModelOption] = list(defaults)
|
|
80
|
+
if user_keys:
|
|
81
|
+
merged.extend([m for m in byok if m.provider in user_keys])
|
|
82
|
+
|
|
83
|
+
return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class PublicProviderPreferences(BaseModel):
|
|
87
|
+
@cached_property
|
|
88
|
+
def default_judge_models(self) -> list[ModelOption]:
|
|
89
|
+
"""Judge models that any user can access without providing their own API key"""
|
|
90
|
+
|
|
91
|
+
return [
|
|
92
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
|
|
93
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
|
|
94
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
|
|
95
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
|
|
96
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
|
|
97
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
|
|
98
|
+
ModelOption(
|
|
99
|
+
provider="anthropic",
|
|
100
|
+
model_name="claude-haiku-4-5",
|
|
101
|
+
reasoning_effort="low",
|
|
102
|
+
),
|
|
103
|
+
ModelOption(
|
|
104
|
+
provider="anthropic",
|
|
105
|
+
model_name="claude-haiku-4-5",
|
|
106
|
+
reasoning_effort="medium",
|
|
107
|
+
),
|
|
108
|
+
ModelOption(
|
|
109
|
+
provider="anthropic",
|
|
110
|
+
model_name="claude-haiku-4-5",
|
|
111
|
+
reasoning_effort="high",
|
|
112
|
+
),
|
|
113
|
+
ModelOption(
|
|
114
|
+
provider="anthropic",
|
|
115
|
+
model_name="claude-opus-4-6",
|
|
116
|
+
reasoning_effort="low",
|
|
117
|
+
),
|
|
118
|
+
ModelOption(
|
|
119
|
+
provider="anthropic",
|
|
120
|
+
model_name="claude-opus-4-6",
|
|
121
|
+
reasoning_effort="medium",
|
|
122
|
+
),
|
|
123
|
+
ModelOption(
|
|
124
|
+
provider="anthropic",
|
|
125
|
+
model_name="claude-opus-4-6",
|
|
126
|
+
reasoning_effort="high",
|
|
127
|
+
),
|
|
128
|
+
ModelOption(
|
|
129
|
+
provider="anthropic",
|
|
130
|
+
model_name="claude-sonnet-4-6",
|
|
131
|
+
reasoning_effort="low",
|
|
132
|
+
),
|
|
133
|
+
ModelOption(
|
|
134
|
+
provider="anthropic",
|
|
135
|
+
model_name="claude-sonnet-4-6",
|
|
136
|
+
reasoning_effort="medium",
|
|
137
|
+
),
|
|
138
|
+
ModelOption(
|
|
139
|
+
provider="anthropic",
|
|
140
|
+
model_name="claude-sonnet-4-6",
|
|
141
|
+
reasoning_effort="high",
|
|
142
|
+
),
|
|
143
|
+
ModelOption(
|
|
144
|
+
provider="google",
|
|
145
|
+
model_name="gemini-3-flash-preview",
|
|
146
|
+
reasoning_effort="low",
|
|
147
|
+
),
|
|
148
|
+
ModelOption(
|
|
149
|
+
provider="google",
|
|
150
|
+
model_name="gemini-3-flash-preview",
|
|
151
|
+
reasoning_effort="medium",
|
|
152
|
+
),
|
|
153
|
+
ModelOption(
|
|
154
|
+
provider="google",
|
|
155
|
+
model_name="gemini-3-flash-preview",
|
|
156
|
+
reasoning_effort="high",
|
|
157
|
+
),
|
|
158
|
+
ModelOption(
|
|
159
|
+
provider="google",
|
|
160
|
+
model_name="gemini-3.1-pro-preview",
|
|
161
|
+
reasoning_effort="low",
|
|
162
|
+
),
|
|
163
|
+
ModelOption(
|
|
164
|
+
provider="google",
|
|
165
|
+
model_name="gemini-3.1-pro-preview",
|
|
166
|
+
reasoning_effort="medium",
|
|
167
|
+
),
|
|
168
|
+
ModelOption(
|
|
169
|
+
provider="google",
|
|
170
|
+
model_name="gemini-3.1-pro-preview",
|
|
171
|
+
reasoning_effort="high",
|
|
172
|
+
),
|
|
173
|
+
# Open Router equivalents
|
|
174
|
+
ModelOption(
|
|
175
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
|
|
176
|
+
),
|
|
177
|
+
ModelOption(
|
|
178
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
|
|
179
|
+
),
|
|
180
|
+
ModelOption(
|
|
181
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
|
|
182
|
+
),
|
|
183
|
+
ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
|
|
184
|
+
ModelOption(
|
|
185
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
|
|
186
|
+
),
|
|
187
|
+
ModelOption(
|
|
188
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
|
|
189
|
+
),
|
|
190
|
+
ModelOption(
|
|
191
|
+
provider="openrouter",
|
|
192
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
193
|
+
reasoning_effort="low",
|
|
194
|
+
),
|
|
195
|
+
ModelOption(
|
|
196
|
+
provider="openrouter",
|
|
197
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
198
|
+
reasoning_effort="medium",
|
|
199
|
+
),
|
|
200
|
+
ModelOption(
|
|
201
|
+
provider="openrouter",
|
|
202
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
203
|
+
reasoning_effort="high",
|
|
204
|
+
),
|
|
205
|
+
ModelOption(
|
|
206
|
+
provider="openrouter",
|
|
207
|
+
model_name="anthropic/claude-opus-4.6",
|
|
208
|
+
reasoning_effort="low",
|
|
209
|
+
),
|
|
210
|
+
ModelOption(
|
|
211
|
+
provider="openrouter",
|
|
212
|
+
model_name="anthropic/claude-opus-4.6",
|
|
213
|
+
reasoning_effort="medium",
|
|
214
|
+
),
|
|
215
|
+
ModelOption(
|
|
216
|
+
provider="openrouter",
|
|
217
|
+
model_name="anthropic/claude-opus-4.6",
|
|
218
|
+
reasoning_effort="high",
|
|
219
|
+
),
|
|
220
|
+
ModelOption(
|
|
221
|
+
provider="openrouter",
|
|
222
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
223
|
+
reasoning_effort="low",
|
|
224
|
+
),
|
|
225
|
+
ModelOption(
|
|
226
|
+
provider="openrouter",
|
|
227
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
228
|
+
reasoning_effort="medium",
|
|
229
|
+
),
|
|
230
|
+
ModelOption(
|
|
231
|
+
provider="openrouter",
|
|
232
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
233
|
+
reasoning_effort="high",
|
|
234
|
+
),
|
|
235
|
+
ModelOption(
|
|
236
|
+
provider="openrouter",
|
|
237
|
+
model_name="google/gemini-3-flash-preview",
|
|
238
|
+
reasoning_effort="low",
|
|
239
|
+
),
|
|
240
|
+
ModelOption(
|
|
241
|
+
provider="openrouter",
|
|
242
|
+
model_name="google/gemini-3-flash-preview",
|
|
243
|
+
reasoning_effort="medium",
|
|
244
|
+
),
|
|
245
|
+
ModelOption(
|
|
246
|
+
provider="openrouter",
|
|
247
|
+
model_name="google/gemini-3-flash-preview",
|
|
248
|
+
reasoning_effort="high",
|
|
249
|
+
),
|
|
250
|
+
ModelOption(
|
|
251
|
+
provider="openrouter",
|
|
252
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
253
|
+
reasoning_effort="low",
|
|
254
|
+
),
|
|
255
|
+
ModelOption(
|
|
256
|
+
provider="openrouter",
|
|
257
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
258
|
+
reasoning_effort="medium",
|
|
259
|
+
),
|
|
260
|
+
ModelOption(
|
|
261
|
+
provider="openrouter",
|
|
262
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
263
|
+
reasoning_effort="high",
|
|
264
|
+
),
|
|
265
|
+
]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()
|
|
@@ -5,10 +5,9 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
import time
|
|
7
7
|
import webbrowser
|
|
8
|
-
from itertools import islice
|
|
9
8
|
from pathlib import Path
|
|
10
9
|
from textwrap import dedent
|
|
11
|
-
from typing import IO, TYPE_CHECKING, Any,
|
|
10
|
+
from typing import IO, TYPE_CHECKING, Any, Iterator, Literal, cast
|
|
12
11
|
from urllib.parse import urlsplit
|
|
13
12
|
|
|
14
13
|
if TYPE_CHECKING:
|
|
@@ -46,7 +45,7 @@ from docent.data_models.reading import (
|
|
|
46
45
|
StepGroupSubmission,
|
|
47
46
|
)
|
|
48
47
|
from docent.judges.util.meta_schema import validate_judge_result_schema
|
|
49
|
-
from docent.
|
|
48
|
+
from docent.sdk.integrations.inspect import ingest_inspect_directory
|
|
50
49
|
from docent.sdk.llm_context import ContextItemRef, LLMContext, LLMContextItem, Prompt
|
|
51
50
|
from docent.sdk.llm_request import ExternalAnalysisResult, LLMRequest
|
|
52
51
|
from docent.sdk.reading import (
|
|
@@ -63,25 +62,17 @@ from docent.sdk.reading import (
|
|
|
63
62
|
_PendingReading, # pyright: ignore[reportPrivateUsage]
|
|
64
63
|
_PendingStepGroup, # pyright: ignore[reportPrivateUsage]
|
|
65
64
|
)
|
|
65
|
+
from docent.sdk.util import batched as _batched
|
|
66
66
|
|
|
67
67
|
MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024 # 100MB backend limit
|
|
68
68
|
_AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
|
|
69
69
|
_AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
|
|
70
|
+
batched = _batched
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
_T = TypeVar("_T")
|
|
73
73
|
_LOCAL_DOMAINS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
def batched(iterable: Iterable[_T], n: int) -> Iterator[tuple[_T, ...]]:
|
|
77
|
-
"""Backport of itertools.batched for Python <3.12."""
|
|
78
|
-
if n < 1:
|
|
79
|
-
raise ValueError("n must be at least one")
|
|
80
|
-
it = iter(iterable)
|
|
81
|
-
while batch := tuple(islice(it, n)):
|
|
82
|
-
yield batch
|
|
83
|
-
|
|
84
|
-
|
|
85
76
|
def _domain_host(domain: str) -> str:
|
|
86
77
|
"""Extract normalized host from a domain string, handling optional port and IPv6 brackets."""
|
|
87
78
|
normalized = domain.strip().lower()
|
|
@@ -2056,67 +2047,20 @@ class Docent:
|
|
|
2056
2047
|
ValueError: If the path doesn't exist or isn't a directory.
|
|
2057
2048
|
requests.exceptions.HTTPError: If any API requests fail.
|
|
2058
2049
|
"""
|
|
2059
|
-
root_path = Path(fpath)
|
|
2060
|
-
if not root_path.exists():
|
|
2061
|
-
raise ValueError(f"Path does not exist: {fpath}")
|
|
2062
|
-
if not root_path.is_dir():
|
|
2063
|
-
raise ValueError(f"Path is not a directory: {fpath}")
|
|
2064
|
-
|
|
2065
|
-
# Find all .eval files recursively
|
|
2066
|
-
eval_files = list(root_path.rglob("*.eval"))
|
|
2067
|
-
|
|
2068
|
-
if not eval_files:
|
|
2069
|
-
self._logger.info(f"No .eval files found in {fpath}")
|
|
2070
|
-
return
|
|
2071
|
-
|
|
2072
|
-
self._logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
|
|
2073
|
-
|
|
2074
|
-
total_runs_added = 0
|
|
2075
|
-
batch_size = 100
|
|
2076
|
-
|
|
2077
|
-
# Process each .eval file
|
|
2078
|
-
for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
|
|
2079
|
-
# Get total samples for progress tracking
|
|
2080
|
-
total_samples = load_inspect.get_total_samples(eval_file, format="eval")
|
|
2081
|
-
|
|
2082
|
-
if total_samples == 0:
|
|
2083
|
-
self._logger.info(f"No samples found in {eval_file}")
|
|
2084
|
-
continue
|
|
2085
2050
|
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
) as file_pbar:
|
|
2100
|
-
for batch in batches:
|
|
2101
|
-
batch_list = list(batch) # Convert generator batch to list
|
|
2102
|
-
if not batch_list:
|
|
2103
|
-
break
|
|
2104
|
-
|
|
2105
|
-
# Add batch to collection
|
|
2106
|
-
url = f"{self._api_url}/{collection_id}/agent_runs"
|
|
2107
|
-
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
|
|
2108
|
-
|
|
2109
|
-
response = self._session.post(url, json=payload)
|
|
2110
|
-
self._handle_response_errors(response)
|
|
2111
|
-
|
|
2112
|
-
runs_from_file += len(batch_list)
|
|
2113
|
-
file_pbar.update(len(batch_list))
|
|
2114
|
-
|
|
2115
|
-
total_runs_added += runs_from_file
|
|
2116
|
-
self._logger.info(f"Added {runs_from_file} runs from {eval_file}")
|
|
2117
|
-
|
|
2118
|
-
self._logger.info(
|
|
2119
|
-
f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
|
|
2051
|
+
def _upload_agent_run_batch(agent_runs: list[AgentRun]) -> None:
|
|
2052
|
+
url = f"{self._api_url}/{collection_id}/agent_runs"
|
|
2053
|
+
payload = {
|
|
2054
|
+
"agent_runs": [agent_run.model_dump(mode="json") for agent_run in agent_runs]
|
|
2055
|
+
}
|
|
2056
|
+
response = self._session.post(url, json=payload)
|
|
2057
|
+
self._handle_response_errors(response)
|
|
2058
|
+
|
|
2059
|
+
ingest_inspect_directory(
|
|
2060
|
+
collection_id,
|
|
2061
|
+
fpath,
|
|
2062
|
+
upload_agent_run_batch=_upload_agent_run_batch,
|
|
2063
|
+
logger=self._logger,
|
|
2120
2064
|
)
|
|
2121
2065
|
|
|
2122
2066
|
def start_chat(
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Integrations for converting and ingesting external trace formats."""
|
|
2
|
+
|
|
3
|
+
from docent.sdk.integrations.harbor import (
|
|
4
|
+
convert_atif_to_agent_run,
|
|
5
|
+
convert_harbor_directory_to_agent_runs,
|
|
6
|
+
convert_harbor_trial_to_agent_run,
|
|
7
|
+
)
|
|
8
|
+
from docent.sdk.integrations.inspect import (
|
|
9
|
+
convert_inspect_directory_to_agent_runs,
|
|
10
|
+
convert_inspect_eval_file_to_agent_runs,
|
|
11
|
+
ingest_inspect_directory,
|
|
12
|
+
)
|
|
13
|
+
from docent.sdk.integrations.nemogym import (
|
|
14
|
+
convert_nemogym_jsonl_file_to_agent_runs,
|
|
15
|
+
convert_nemogym_rollout_to_agent_run,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"convert_atif_to_agent_run",
|
|
20
|
+
"convert_harbor_directory_to_agent_runs",
|
|
21
|
+
"convert_harbor_trial_to_agent_run",
|
|
22
|
+
"convert_inspect_directory_to_agent_runs",
|
|
23
|
+
"convert_inspect_eval_file_to_agent_runs",
|
|
24
|
+
"convert_nemogym_jsonl_file_to_agent_runs",
|
|
25
|
+
"convert_nemogym_rollout_to_agent_run",
|
|
26
|
+
"ingest_inspect_directory",
|
|
27
|
+
]
|