docent-python 0.1.58a0__tar.gz → 0.1.60a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/PKG-INFO +1 -1
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/model_registry.py +37 -13
- docent_python-0.1.60a0/docent/_llm_util/providers/preference_types.py +268 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/reading.py +138 -2
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/mcp/server.py +13 -3
- docent_python-0.1.60a0/docent/sdk/_agent_runs.py +217 -0
- docent_python-0.1.60a0/docent/sdk/_base.py +299 -0
- docent_python-0.1.60a0/docent/sdk/_client_util.py +141 -0
- docent_python-0.1.60a0/docent/sdk/_collections.py +421 -0
- docent_python-0.1.60a0/docent/sdk/_dql.py +297 -0
- docent_python-0.1.60a0/docent/sdk/_feedback.py +157 -0
- docent_python-0.1.60a0/docent/sdk/_labels.py +225 -0
- docent_python-0.1.60a0/docent/sdk/_readings.py +1121 -0
- docent_python-0.1.60a0/docent/sdk/_results.py +311 -0
- docent_python-0.1.60a0/docent/sdk/_rubrics.py +320 -0
- docent_python-0.1.60a0/docent/sdk/_sharing.py +229 -0
- docent_python-0.1.60a0/docent/sdk/client.py +45 -0
- docent_python-0.1.60a0/docent/sdk/integrations/__init__.py +27 -0
- docent_python-0.1.60a0/docent/sdk/integrations/harbor.py +893 -0
- docent_python-0.1.60a0/docent/sdk/integrations/inspect.py +148 -0
- docent_python-0.1.60a0/docent/sdk/integrations/nemogym.py +611 -0
- docent_python-0.1.60a0/docent/sdk/integrations/util.py +84 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/sdk/reading.py +6 -4
- docent_python-0.1.60a0/docent/sdk/util.py +16 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/pyproject.toml +1 -1
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/uv.lock +149 -149
- docent_python-0.1.58a0/docent/_llm_util/providers/preference_types.py +0 -110
- docent_python-0.1.58a0/docent/sdk/client.py +0 -3337
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/.gitignore +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/LICENSE.md +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/README.md +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/llm_svc.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/anthropic.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/google.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/openai.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/openrouter.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/agent_run.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/chat/response_format.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/feedback.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/formatted_objects.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/transcript.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/types.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/parse_output.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/template_formatter.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/mcp/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/mcp/__main__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/py.typed +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/sdk/llm_context.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/sdk/llm_request.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/trace.py +0 -0
- {docent_python-0.1.58a0 → docent_python-0.1.60a0}/docent/trace_temp.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
from typing import Optional
|
|
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
|
|
|
9
10
|
|
|
10
11
|
logger = get_logger(__name__)
|
|
11
12
|
|
|
13
|
+
_CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
"""
|
|
14
17
|
Values are USD per million tokens
|
|
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
34
37
|
"gpt-5-chat-latest",
|
|
35
38
|
ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
|
|
36
39
|
),
|
|
40
|
+
(
|
|
41
|
+
"gpt-5.4-mini",
|
|
42
|
+
ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
|
|
43
|
+
),
|
|
44
|
+
(
|
|
45
|
+
"gpt-5.4",
|
|
46
|
+
ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
|
|
47
|
+
),
|
|
37
48
|
(
|
|
38
49
|
"gpt-5-nano",
|
|
39
50
|
ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
|
|
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
62
73
|
"claude-sonnet-4-5",
|
|
63
74
|
ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
|
|
64
75
|
),
|
|
65
|
-
(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
),
|
|
69
|
-
(
|
|
70
|
-
"claude-opus-4-6",
|
|
71
|
-
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
72
|
-
),
|
|
73
|
-
(
|
|
74
|
-
"claude-haiku-4-5",
|
|
75
|
-
ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
|
|
76
|
-
),
|
|
76
|
+
("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
|
|
77
|
+
("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
|
|
78
|
+
("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
|
|
77
79
|
(
|
|
78
80
|
"claude-opus-4-5-20251101",
|
|
79
81
|
ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
|
|
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
108
110
|
context_window=1_048_576,
|
|
109
111
|
),
|
|
110
112
|
),
|
|
113
|
+
(
|
|
114
|
+
"gemini-3.1-pro-preview",
|
|
115
|
+
ModelInfo(
|
|
116
|
+
rate={"input": 2.00, "output": 12.00},
|
|
117
|
+
context_window=1_048_576,
|
|
118
|
+
),
|
|
119
|
+
),
|
|
111
120
|
(
|
|
112
121
|
"gemini-3-flash-preview",
|
|
113
122
|
ModelInfo(
|
|
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
|
|
|
146
155
|
]
|
|
147
156
|
|
|
148
157
|
|
|
158
|
+
def normalize_model_name(model_name: str) -> str:
|
|
159
|
+
"""Normalize provider-specific naming differences before registry lookup."""
|
|
160
|
+
|
|
161
|
+
return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def model_names_match(expected: str, actual: str) -> bool:
|
|
165
|
+
"""Match a configured model name against a provider-reported model string."""
|
|
166
|
+
|
|
167
|
+
normalized_expected = normalize_model_name(expected)
|
|
168
|
+
normalized_actual = normalize_model_name(actual)
|
|
169
|
+
return normalized_expected in normalized_actual
|
|
170
|
+
|
|
171
|
+
|
|
149
172
|
@lru_cache(maxsize=None)
|
|
150
173
|
def get_model_info(model_name: str) -> Optional[ModelInfo]:
|
|
174
|
+
normalized_model_name = normalize_model_name(model_name)
|
|
151
175
|
for registry_model_name, info in sorted(
|
|
152
176
|
_REGISTRY, key=lambda entry: len(entry[0]), reverse=True
|
|
153
177
|
):
|
|
154
|
-
if registry_model_name in
|
|
178
|
+
if registry_model_name in normalized_model_name:
|
|
155
179
|
return info
|
|
156
180
|
return None
|
|
157
181
|
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Provides preferences of which LLM models to use for different Docent functions."""
|
|
2
|
+
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
|
7
|
+
|
|
8
|
+
from docent._llm_util.model_registry import get_context_window
|
|
9
|
+
from docent._log_util import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelOption(BaseModel):
|
|
15
|
+
"""Configuration for a specific model from a provider. Not to be confused with ModelInfo.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
19
|
+
model_name: The specific model to use from the provider.
|
|
20
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_config = ConfigDict(extra="ignore")
|
|
24
|
+
|
|
25
|
+
provider: str
|
|
26
|
+
model_name: str
|
|
27
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelOptionWithContext(BaseModel):
|
|
31
|
+
"""Enhanced model option that includes context window information for frontend use.
|
|
32
|
+
Not to be confused with ModelInfo or ModelOption.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
provider: The name of the LLM provider (e.g., "openai", "anthropic").
|
|
36
|
+
model_name: The specific model to use from the provider.
|
|
37
|
+
reasoning_effort: Optional indication of computational effort to use.
|
|
38
|
+
context_window: The context window size in tokens.
|
|
39
|
+
uses_byok: Whether this model would use the user's own API key.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
provider: str
|
|
43
|
+
model_name: str
|
|
44
|
+
reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
|
|
45
|
+
context_window: int
|
|
46
|
+
uses_byok: bool
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_model_option(
|
|
50
|
+
cls, model_option: ModelOption, uses_byok: bool = False
|
|
51
|
+
) -> "ModelOptionWithContext":
|
|
52
|
+
"""Create a ModelOptionWithContext from a ModelOption.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model_option: The base model option
|
|
56
|
+
uses_byok: Whether this model requires bring-your-own-key
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
ModelOptionWithContext with context window looked up from global mapping
|
|
60
|
+
"""
|
|
61
|
+
context_window = get_context_window(model_option.model_name)
|
|
62
|
+
|
|
63
|
+
return cls(
|
|
64
|
+
provider=model_option.provider,
|
|
65
|
+
model_name=model_option.model_name,
|
|
66
|
+
reasoning_effort=model_option.reasoning_effort,
|
|
67
|
+
context_window=context_window,
|
|
68
|
+
uses_byok=uses_byok,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def merge_models_with_byok(
|
|
73
|
+
defaults: list[ModelOption],
|
|
74
|
+
byok: list[ModelOption],
|
|
75
|
+
api_keys: dict[str, str] | None,
|
|
76
|
+
) -> list[ModelOptionWithContext]:
|
|
77
|
+
user_keys = api_keys or {}
|
|
78
|
+
|
|
79
|
+
merged: list[ModelOption] = list(defaults)
|
|
80
|
+
if user_keys:
|
|
81
|
+
merged.extend([m for m in byok if m.provider in user_keys])
|
|
82
|
+
|
|
83
|
+
return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class PublicProviderPreferences(BaseModel):
|
|
87
|
+
@cached_property
|
|
88
|
+
def default_judge_models(self) -> list[ModelOption]:
|
|
89
|
+
"""Judge models that any user can access without providing their own API key"""
|
|
90
|
+
|
|
91
|
+
return [
|
|
92
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
|
|
93
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
|
|
94
|
+
ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
|
|
95
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
|
|
96
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
|
|
97
|
+
ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
|
|
98
|
+
ModelOption(
|
|
99
|
+
provider="anthropic",
|
|
100
|
+
model_name="claude-haiku-4-5",
|
|
101
|
+
reasoning_effort="low",
|
|
102
|
+
),
|
|
103
|
+
ModelOption(
|
|
104
|
+
provider="anthropic",
|
|
105
|
+
model_name="claude-haiku-4-5",
|
|
106
|
+
reasoning_effort="medium",
|
|
107
|
+
),
|
|
108
|
+
ModelOption(
|
|
109
|
+
provider="anthropic",
|
|
110
|
+
model_name="claude-haiku-4-5",
|
|
111
|
+
reasoning_effort="high",
|
|
112
|
+
),
|
|
113
|
+
ModelOption(
|
|
114
|
+
provider="anthropic",
|
|
115
|
+
model_name="claude-opus-4-6",
|
|
116
|
+
reasoning_effort="low",
|
|
117
|
+
),
|
|
118
|
+
ModelOption(
|
|
119
|
+
provider="anthropic",
|
|
120
|
+
model_name="claude-opus-4-6",
|
|
121
|
+
reasoning_effort="medium",
|
|
122
|
+
),
|
|
123
|
+
ModelOption(
|
|
124
|
+
provider="anthropic",
|
|
125
|
+
model_name="claude-opus-4-6",
|
|
126
|
+
reasoning_effort="high",
|
|
127
|
+
),
|
|
128
|
+
ModelOption(
|
|
129
|
+
provider="anthropic",
|
|
130
|
+
model_name="claude-sonnet-4-6",
|
|
131
|
+
reasoning_effort="low",
|
|
132
|
+
),
|
|
133
|
+
ModelOption(
|
|
134
|
+
provider="anthropic",
|
|
135
|
+
model_name="claude-sonnet-4-6",
|
|
136
|
+
reasoning_effort="medium",
|
|
137
|
+
),
|
|
138
|
+
ModelOption(
|
|
139
|
+
provider="anthropic",
|
|
140
|
+
model_name="claude-sonnet-4-6",
|
|
141
|
+
reasoning_effort="high",
|
|
142
|
+
),
|
|
143
|
+
ModelOption(
|
|
144
|
+
provider="google",
|
|
145
|
+
model_name="gemini-3-flash-preview",
|
|
146
|
+
reasoning_effort="low",
|
|
147
|
+
),
|
|
148
|
+
ModelOption(
|
|
149
|
+
provider="google",
|
|
150
|
+
model_name="gemini-3-flash-preview",
|
|
151
|
+
reasoning_effort="medium",
|
|
152
|
+
),
|
|
153
|
+
ModelOption(
|
|
154
|
+
provider="google",
|
|
155
|
+
model_name="gemini-3-flash-preview",
|
|
156
|
+
reasoning_effort="high",
|
|
157
|
+
),
|
|
158
|
+
ModelOption(
|
|
159
|
+
provider="google",
|
|
160
|
+
model_name="gemini-3.1-pro-preview",
|
|
161
|
+
reasoning_effort="low",
|
|
162
|
+
),
|
|
163
|
+
ModelOption(
|
|
164
|
+
provider="google",
|
|
165
|
+
model_name="gemini-3.1-pro-preview",
|
|
166
|
+
reasoning_effort="medium",
|
|
167
|
+
),
|
|
168
|
+
ModelOption(
|
|
169
|
+
provider="google",
|
|
170
|
+
model_name="gemini-3.1-pro-preview",
|
|
171
|
+
reasoning_effort="high",
|
|
172
|
+
),
|
|
173
|
+
# Open Router equivalents
|
|
174
|
+
ModelOption(
|
|
175
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
|
|
176
|
+
),
|
|
177
|
+
ModelOption(
|
|
178
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
|
|
179
|
+
),
|
|
180
|
+
ModelOption(
|
|
181
|
+
provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
|
|
182
|
+
),
|
|
183
|
+
ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
|
|
184
|
+
ModelOption(
|
|
185
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
|
|
186
|
+
),
|
|
187
|
+
ModelOption(
|
|
188
|
+
provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
|
|
189
|
+
),
|
|
190
|
+
ModelOption(
|
|
191
|
+
provider="openrouter",
|
|
192
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
193
|
+
reasoning_effort="low",
|
|
194
|
+
),
|
|
195
|
+
ModelOption(
|
|
196
|
+
provider="openrouter",
|
|
197
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
198
|
+
reasoning_effort="medium",
|
|
199
|
+
),
|
|
200
|
+
ModelOption(
|
|
201
|
+
provider="openrouter",
|
|
202
|
+
model_name="anthropic/claude-haiku-4.5",
|
|
203
|
+
reasoning_effort="high",
|
|
204
|
+
),
|
|
205
|
+
ModelOption(
|
|
206
|
+
provider="openrouter",
|
|
207
|
+
model_name="anthropic/claude-opus-4.6",
|
|
208
|
+
reasoning_effort="low",
|
|
209
|
+
),
|
|
210
|
+
ModelOption(
|
|
211
|
+
provider="openrouter",
|
|
212
|
+
model_name="anthropic/claude-opus-4.6",
|
|
213
|
+
reasoning_effort="medium",
|
|
214
|
+
),
|
|
215
|
+
ModelOption(
|
|
216
|
+
provider="openrouter",
|
|
217
|
+
model_name="anthropic/claude-opus-4.6",
|
|
218
|
+
reasoning_effort="high",
|
|
219
|
+
),
|
|
220
|
+
ModelOption(
|
|
221
|
+
provider="openrouter",
|
|
222
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
223
|
+
reasoning_effort="low",
|
|
224
|
+
),
|
|
225
|
+
ModelOption(
|
|
226
|
+
provider="openrouter",
|
|
227
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
228
|
+
reasoning_effort="medium",
|
|
229
|
+
),
|
|
230
|
+
ModelOption(
|
|
231
|
+
provider="openrouter",
|
|
232
|
+
model_name="anthropic/claude-sonnet-4.6",
|
|
233
|
+
reasoning_effort="high",
|
|
234
|
+
),
|
|
235
|
+
ModelOption(
|
|
236
|
+
provider="openrouter",
|
|
237
|
+
model_name="google/gemini-3-flash-preview",
|
|
238
|
+
reasoning_effort="low",
|
|
239
|
+
),
|
|
240
|
+
ModelOption(
|
|
241
|
+
provider="openrouter",
|
|
242
|
+
model_name="google/gemini-3-flash-preview",
|
|
243
|
+
reasoning_effort="medium",
|
|
244
|
+
),
|
|
245
|
+
ModelOption(
|
|
246
|
+
provider="openrouter",
|
|
247
|
+
model_name="google/gemini-3-flash-preview",
|
|
248
|
+
reasoning_effort="high",
|
|
249
|
+
),
|
|
250
|
+
ModelOption(
|
|
251
|
+
provider="openrouter",
|
|
252
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
253
|
+
reasoning_effort="low",
|
|
254
|
+
),
|
|
255
|
+
ModelOption(
|
|
256
|
+
provider="openrouter",
|
|
257
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
258
|
+
reasoning_effort="medium",
|
|
259
|
+
),
|
|
260
|
+
ModelOption(
|
|
261
|
+
provider="openrouter",
|
|
262
|
+
model_name="google/gemini-3.1-pro-preview",
|
|
263
|
+
reasoning_effort="high",
|
|
264
|
+
),
|
|
265
|
+
]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Any, Literal, TypeAlias
|
|
2
|
+
from typing import Annotated, Any, Literal, TypeAlias
|
|
3
3
|
from uuid import uuid4
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel, Field
|
|
@@ -41,6 +41,7 @@ prompt segments, model config, output schema, and user-supplied arguments.
|
|
|
41
41
|
re-evaluation.
|
|
42
42
|
"""
|
|
43
43
|
ReadingCacheMode = Literal["reading", "results", "none"]
|
|
44
|
+
ReadingStatus = Literal["completed", "failed", "pending", "cached", "needs_approval", "unresolved"]
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
class ContextFilterSection(BaseModel):
|
|
@@ -73,6 +74,7 @@ class ReadingPreset(BaseModel):
|
|
|
73
74
|
collection_id: str
|
|
74
75
|
name: str
|
|
75
76
|
created_at: datetime | None = None
|
|
77
|
+
created_by: str | None = None
|
|
76
78
|
updated_at: datetime | None = None
|
|
77
79
|
|
|
78
80
|
|
|
@@ -117,6 +119,7 @@ class Reading(BaseModel):
|
|
|
117
119
|
user_metadata: dict[str, Any] | None = None
|
|
118
120
|
source_reading_preset_id: str | None = None
|
|
119
121
|
created_at: datetime | None = None
|
|
122
|
+
created_by: str | None = None
|
|
120
123
|
|
|
121
124
|
|
|
122
125
|
class ReadingResult(BaseModel):
|
|
@@ -226,6 +229,7 @@ class ReadingPlan(BaseModel):
|
|
|
226
229
|
name: str | None = None
|
|
227
230
|
steps: list[PlanStep] = Field(default_factory=list) # type: ignore[reportUnknownVariableType]
|
|
228
231
|
created_at: datetime | None = None
|
|
232
|
+
created_by: str | None = None
|
|
229
233
|
updated_at: datetime | None = None
|
|
230
234
|
|
|
231
235
|
|
|
@@ -306,23 +310,140 @@ class PlanSubmissionRequest(BaseModel):
|
|
|
306
310
|
entries: list[PlanStepSubmission]
|
|
307
311
|
|
|
308
312
|
|
|
313
|
+
class DqlPreview(BaseModel):
|
|
314
|
+
columns: list[str]
|
|
315
|
+
rows: list[list[Any]]
|
|
316
|
+
truncated: bool
|
|
317
|
+
row_count: int
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class ReadingResultPreview(BaseModel):
|
|
321
|
+
id: str
|
|
322
|
+
output: dict[str, Any] | None = None
|
|
323
|
+
error: dict[str, Any] | None = None
|
|
324
|
+
|
|
325
|
+
|
|
309
326
|
class PlanStepSubmissionStatus(BaseModel):
|
|
310
327
|
alias: str
|
|
311
|
-
|
|
328
|
+
entry_type: str
|
|
329
|
+
status: ReadingStatus
|
|
312
330
|
reading_id: str | None = None
|
|
331
|
+
result_count: int | None = None
|
|
332
|
+
dql_preview: DqlPreview | None = None
|
|
333
|
+
result_preview: list[ReadingResultPreview] | None = None
|
|
313
334
|
|
|
314
335
|
|
|
315
336
|
class PlanSubmissionResponse(BaseModel):
|
|
316
337
|
plan_id: str
|
|
338
|
+
plan_name: str | None = None
|
|
339
|
+
previous_latest_plan_id: str | None = None
|
|
340
|
+
has_active_listeners: bool = False
|
|
317
341
|
entry_statuses: list[PlanStepSubmissionStatus]
|
|
318
342
|
|
|
319
343
|
|
|
344
|
+
# ── Plan SSE stream events (server → SDK) ────────────────────────────
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class PlanStreamStepStatus(BaseModel):
|
|
348
|
+
"""Minimal step shape carried inside a snapshot event."""
|
|
349
|
+
|
|
350
|
+
alias: str
|
|
351
|
+
reading_id: str | None = None
|
|
352
|
+
derived_status: str
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class PlanSnapshotEvent(BaseModel):
|
|
356
|
+
type: Literal["snapshot"] = "snapshot"
|
|
357
|
+
steps: list[PlanStreamStepStatus]
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class PlanStepError(BaseModel):
|
|
361
|
+
message: str
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class PlanStepStartedEvent(BaseModel):
|
|
365
|
+
type: Literal["step_started"] = "step_started"
|
|
366
|
+
plan_id: str
|
|
367
|
+
step_alias: str
|
|
368
|
+
job_id: str
|
|
369
|
+
reading_id: str
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class PlanStepCompletedEvent(BaseModel):
|
|
373
|
+
type: Literal["step_completed"] = "step_completed"
|
|
374
|
+
plan_id: str
|
|
375
|
+
step_alias: str
|
|
376
|
+
job_id: str
|
|
377
|
+
reading_id: str
|
|
378
|
+
result_count: int | None = None
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class PlanStepFailedEvent(BaseModel):
|
|
382
|
+
type: Literal["step_failed"] = "step_failed"
|
|
383
|
+
plan_id: str
|
|
384
|
+
step_alias: str
|
|
385
|
+
job_id: str
|
|
386
|
+
error: PlanStepError | None = None
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class PlanStepsUpdatedEvent(BaseModel):
|
|
390
|
+
type: Literal["steps_updated"] = "steps_updated"
|
|
391
|
+
plan_id: str
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class PlanJobStartedEvent(BaseModel):
|
|
395
|
+
type: Literal["job_started"] = "job_started"
|
|
396
|
+
plan_id: str
|
|
397
|
+
job_id: str
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class PlanJobCompletedEvent(BaseModel):
|
|
401
|
+
type: Literal["job_completed"] = "job_completed"
|
|
402
|
+
plan_id: str
|
|
403
|
+
job_id: str
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class PlanJobFailedEvent(BaseModel):
|
|
407
|
+
type: Literal["job_failed"] = "job_failed"
|
|
408
|
+
plan_id: str
|
|
409
|
+
job_id: str
|
|
410
|
+
error: PlanStepError | None = None
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class PlanSupersededEvent(BaseModel):
|
|
414
|
+
type: Literal["plan_superseded"] = "plan_superseded"
|
|
415
|
+
plan_id: str
|
|
416
|
+
superseded_by_plan_id: str
|
|
417
|
+
name: str | None = None
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class PlanJobCancelledEvent(BaseModel):
|
|
421
|
+
type: Literal["job_cancelled"] = "job_cancelled"
|
|
422
|
+
plan_id: str
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
PlanStreamEvent: TypeAlias = Annotated[
|
|
426
|
+
PlanSnapshotEvent
|
|
427
|
+
| PlanStepStartedEvent
|
|
428
|
+
| PlanStepCompletedEvent
|
|
429
|
+
| PlanStepFailedEvent
|
|
430
|
+
| PlanStepsUpdatedEvent
|
|
431
|
+
| PlanJobStartedEvent
|
|
432
|
+
| PlanJobCompletedEvent
|
|
433
|
+
| PlanJobFailedEvent
|
|
434
|
+
| PlanJobCancelledEvent
|
|
435
|
+
| PlanSupersededEvent,
|
|
436
|
+
Field(discriminator="type"),
|
|
437
|
+
]
|
|
438
|
+
|
|
439
|
+
|
|
320
440
|
__all__ = [
|
|
321
441
|
"AnnotatableReadingParamType",
|
|
322
442
|
"BeginGroupStep",
|
|
323
443
|
"ContextFilterSection",
|
|
324
444
|
"DqlOnlyStep",
|
|
325
445
|
"DqlOnlyStepSubmission",
|
|
446
|
+
"DqlPreview",
|
|
326
447
|
"EndGroupStep",
|
|
327
448
|
"EndStepGroupSubmission",
|
|
328
449
|
"ScriptedRequest",
|
|
@@ -335,6 +456,8 @@ __all__ = [
|
|
|
335
456
|
"ReadingCacheMode",
|
|
336
457
|
"ReadingParamPlaceholder",
|
|
337
458
|
"ReadingParamType",
|
|
459
|
+
"ReadingResultPreview",
|
|
460
|
+
"ReadingStatus",
|
|
338
461
|
"ReadingStep",
|
|
339
462
|
"ReadingStepSubmission",
|
|
340
463
|
"ReadingTemplateSegment",
|
|
@@ -346,4 +469,17 @@ __all__ = [
|
|
|
346
469
|
"ReadingResult",
|
|
347
470
|
"StepGroupSubmission",
|
|
348
471
|
"PresetReadingStepSubmission",
|
|
472
|
+
"PlanStreamEvent",
|
|
473
|
+
"PlanStreamStepStatus",
|
|
474
|
+
"PlanSnapshotEvent",
|
|
475
|
+
"PlanStepStartedEvent",
|
|
476
|
+
"PlanStepCompletedEvent",
|
|
477
|
+
"PlanStepError",
|
|
478
|
+
"PlanStepFailedEvent",
|
|
479
|
+
"PlanStepsUpdatedEvent",
|
|
480
|
+
"PlanJobStartedEvent",
|
|
481
|
+
"PlanJobCompletedEvent",
|
|
482
|
+
"PlanJobFailedEvent",
|
|
483
|
+
"PlanJobCancelledEvent",
|
|
484
|
+
"PlanSupersededEvent",
|
|
349
485
|
]
|
|
@@ -33,10 +33,13 @@ def get_metadata_fields(collection_id: str) -> str:
|
|
|
33
33
|
"""
|
|
34
34
|
client = get_client()
|
|
35
35
|
try:
|
|
36
|
-
|
|
36
|
+
response = client.get_metadata_fields(
|
|
37
37
|
collection_id, include_sample_values=True, sample_limit=10
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
+
fields = response.get("fields", [])
|
|
41
|
+
total_runs = response.get("total_runs")
|
|
42
|
+
|
|
40
43
|
if not fields:
|
|
41
44
|
return f"No metadata fields found for collection {collection_id}"
|
|
42
45
|
|
|
@@ -71,7 +74,10 @@ def get_metadata_fields(collection_id: str) -> str:
|
|
|
71
74
|
lines.append(line)
|
|
72
75
|
|
|
73
76
|
field_list = "\n".join(lines)
|
|
74
|
-
|
|
77
|
+
tool_output = f"Metadata fields for collection {collection_id}:\n{field_list}"
|
|
78
|
+
if total_runs is not None:
|
|
79
|
+
tool_output += f"\n\nTotal runs: {total_runs}"
|
|
80
|
+
return tool_output
|
|
75
81
|
except Exception as e:
|
|
76
82
|
error_msg = str(e)
|
|
77
83
|
if "404" in error_msg:
|
|
@@ -321,7 +327,11 @@ def get_reading_plan_results(
|
|
|
321
327
|
for step in steps:
|
|
322
328
|
if step.get("type") == "reading" and step.get("reading_id"):
|
|
323
329
|
try:
|
|
324
|
-
results = client.get_reading_results(
|
|
330
|
+
results = client.get_reading_results(
|
|
331
|
+
collection_id,
|
|
332
|
+
step["reading_id"],
|
|
333
|
+
include_output=False,
|
|
334
|
+
)
|
|
325
335
|
result_counts[step["alias"]] = len(results)
|
|
326
336
|
except Exception:
|
|
327
337
|
pass
|