docent-python 0.1.0a7__py3-none-any.whl → 0.1.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docent/data_models/metadata.py +39 -1
- docent/data_models/transcript.py +2 -0
- docent/sdk/client.py +113 -91
- {docent_python-0.1.0a7.dist-info → docent_python-0.1.0a8.dist-info}/METADATA +2 -1
- {docent_python-0.1.0a7.dist-info → docent_python-0.1.0a8.dist-info}/RECORD +7 -7
- {docent_python-0.1.0a7.dist-info → docent_python-0.1.0a8.dist-info}/WHEEL +0 -0
- {docent_python-0.1.0a7.dist-info → docent_python-0.1.0a8.dist-info}/licenses/LICENSE.md +0 -0
docent/data_models/metadata.py
CHANGED
@@ -34,7 +34,7 @@ class BaseMetadata(BaseModel):
|
|
34
34
|
"""
|
35
35
|
|
36
36
|
model_config = ConfigDict(extra="allow")
|
37
|
-
allow_fields_without_descriptions: bool =
|
37
|
+
allow_fields_without_descriptions: bool = True
|
38
38
|
|
39
39
|
# Private attribute to store field descriptions
|
40
40
|
_field_descriptions: dict[str, str | None] | None = PrivateAttr(default=None)
|
@@ -189,3 +189,41 @@ class BaseAgentRunMetadata(BaseMetadata):
|
|
189
189
|
scores: dict[str, int | float | bool | None] = Field(
|
190
190
|
description="A dict of score_key -> score_value. Use one key for each metric you're tracking."
|
191
191
|
)
|
192
|
+
|
193
|
+
|
194
|
+
class InspectAgentRunMetadata(BaseAgentRunMetadata):
|
195
|
+
"""Extends BaseAgentRunMetadata with fields specific to Inspect runs.
|
196
|
+
|
197
|
+
Attributes:
|
198
|
+
task_id: The ID of the 'benchmark' or 'set of evals' that the transcript belongs to
|
199
|
+
sample_id: The specific task inside of the `task_id` benchmark that the transcript was run on
|
200
|
+
epoch_id: Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run.
|
201
|
+
model: The model that was used to generate the transcript
|
202
|
+
scoring_metadata: Additional metadata about the scoring process
|
203
|
+
additional_metadata: Additional metadata about the transcript
|
204
|
+
"""
|
205
|
+
|
206
|
+
task_id: str = Field(
|
207
|
+
description="The ID of the 'benchmark' or 'set of evals' that the transcript belongs to"
|
208
|
+
)
|
209
|
+
|
210
|
+
# Identification of this particular run
|
211
|
+
sample_id: str = Field(
|
212
|
+
description="The specific task inside of the `task_id` benchmark that the transcript was run on"
|
213
|
+
)
|
214
|
+
epoch_id: int = Field(
|
215
|
+
description="Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run."
|
216
|
+
)
|
217
|
+
|
218
|
+
# Parameters for the run
|
219
|
+
model: str = Field(description="The model that was used to generate the transcript")
|
220
|
+
|
221
|
+
# Outcome
|
222
|
+
scoring_metadata: dict[str, Any] | None = Field(
|
223
|
+
description="Additional metadata about the scoring process"
|
224
|
+
)
|
225
|
+
|
226
|
+
# Inspect metadata
|
227
|
+
additional_metadata: dict[str, Any] | None = Field(
|
228
|
+
description="Additional metadata about the transcript"
|
229
|
+
)
|
docent/data_models/transcript.py
CHANGED
@@ -110,6 +110,8 @@ class Transcript(BaseModel):
|
|
110
110
|
A unit of action represents a logical group of messages, such as a system message
|
111
111
|
on its own or a user message followed by assistant responses and tool outputs.
|
112
112
|
|
113
|
+
For precise details on how action units are determined, refer to the _compute_units_of_action method implementation.
|
114
|
+
|
113
115
|
Returns:
|
114
116
|
list[list[int]]: List of units of action, where each unit is a list of message indices.
|
115
117
|
"""
|
docent/sdk/client.py
CHANGED
@@ -12,7 +12,7 @@ logger = get_logger(__name__)
|
|
12
12
|
class Docent:
|
13
13
|
"""Client for interacting with the Docent API.
|
14
14
|
|
15
|
-
This client provides methods for creating and managing
|
15
|
+
This client provides methods for creating and managing Collections,
|
16
16
|
dimensions, agent runs, and filters in the Docent system.
|
17
17
|
|
18
18
|
Args:
|
@@ -26,70 +26,61 @@ class Docent:
|
|
26
26
|
self,
|
27
27
|
server_url: str = "https://aws-docent-backend.transluce.org",
|
28
28
|
web_url: str = "https://docent-alpha.transluce.org",
|
29
|
-
|
30
|
-
password: str | None = None,
|
29
|
+
api_key: str | None = None,
|
31
30
|
):
|
32
31
|
self._server_url = server_url.rstrip("/") + "/rest"
|
33
32
|
self._web_url = web_url.rstrip("/")
|
34
33
|
|
35
|
-
self._email = email or os.getenv("DOCENT_EMAIL")
|
36
|
-
if self._email is None:
|
37
|
-
raise ValueError(
|
38
|
-
"Email address must be provided through keyword argument or DOCENT_EMAIL environment variable"
|
39
|
-
)
|
40
|
-
|
41
|
-
self._password = password or os.getenv("DOCENT_PASSWORD")
|
42
|
-
if self._password is None:
|
43
|
-
raise ValueError(
|
44
|
-
"Password must be provided through keyword argument or DOCENT_PASSWORD environment variable"
|
45
|
-
)
|
46
|
-
|
47
34
|
# Use requests.Session for connection pooling and persistent headers
|
48
35
|
self._session = requests.Session()
|
49
|
-
self._login()
|
50
36
|
|
51
|
-
|
52
|
-
"""Login with email/password to establish session."""
|
53
|
-
login_url = f"{self._server_url}/login"
|
54
|
-
response = self._session.post(
|
55
|
-
login_url, json={"email": self._email, "password": self._password}
|
56
|
-
)
|
37
|
+
api_key = api_key or os.getenv("DOCENT_API_KEY")
|
57
38
|
|
58
|
-
if
|
39
|
+
if api_key is None:
|
59
40
|
raise ValueError(
|
60
|
-
"
|
61
|
-
|
41
|
+
"api_key is required. Please provide an "
|
42
|
+
"api_key or set the DOCENT_API_KEY environment variable."
|
62
43
|
)
|
63
44
|
|
45
|
+
self._login(api_key)
|
46
|
+
|
47
|
+
def _login(self, api_key: str):
|
48
|
+
"""Login with email/password to establish session."""
|
49
|
+
self._session.headers.update({"Authorization": f"Bearer {api_key}"})
|
50
|
+
|
51
|
+
url = f"{self._server_url}/api-keys/test"
|
52
|
+
response = self._session.get(url)
|
64
53
|
response.raise_for_status()
|
65
|
-
logger.info(f"Successfully logged in as {self._email}")
|
66
54
|
|
67
|
-
|
55
|
+
logger.info("Logged in with API key")
|
56
|
+
return
|
57
|
+
|
58
|
+
def create_collection(
|
68
59
|
self,
|
69
|
-
|
60
|
+
collection_id: str | None = None,
|
70
61
|
name: str | None = None,
|
71
62
|
description: str | None = None,
|
72
63
|
) -> str:
|
73
|
-
"""Creates a new
|
64
|
+
"""Creates a new Collection.
|
74
65
|
|
75
|
-
Creates a new
|
66
|
+
Creates a new Collection and sets up a default MECE dimension
|
76
67
|
for grouping on the homepage.
|
77
68
|
|
78
69
|
Args:
|
79
|
-
|
80
|
-
name: Optional name for the
|
81
|
-
description: Optional description for the
|
70
|
+
collection_id: Optional ID for the new Collection. If not provided, one will be generated.
|
71
|
+
name: Optional name for the Collection.
|
72
|
+
description: Optional description for the Collection.
|
82
73
|
|
83
74
|
Returns:
|
84
|
-
str: The ID of the created
|
75
|
+
str: The ID of the created Collection.
|
85
76
|
|
86
77
|
Raises:
|
87
|
-
ValueError: If the response is missing the
|
78
|
+
ValueError: If the response is missing the Collection ID.
|
88
79
|
requests.exceptions.HTTPError: If the API request fails.
|
89
80
|
"""
|
90
81
|
url = f"{self._server_url}/create"
|
91
82
|
payload = {
|
92
|
-
"
|
83
|
+
"collection_id": collection_id,
|
93
84
|
"name": name,
|
94
85
|
"description": description,
|
95
86
|
}
|
@@ -98,42 +89,46 @@ class Docent:
|
|
98
89
|
response.raise_for_status()
|
99
90
|
|
100
91
|
response_data = response.json()
|
101
|
-
|
102
|
-
if
|
103
|
-
raise ValueError("Failed to create
|
92
|
+
collection_id = response_data.get("collection_id")
|
93
|
+
if collection_id is None:
|
94
|
+
raise ValueError("Failed to create collection: 'collection_id' missing in response.")
|
104
95
|
|
105
|
-
logger.info(f"Successfully created
|
96
|
+
logger.info(f"Successfully created Collection with id='{collection_id}'")
|
106
97
|
|
107
|
-
logger.info(
|
108
|
-
|
98
|
+
logger.info(
|
99
|
+
f"Collection creation complete. Frontend available at: {self._web_url}/dashboard/{collection_id}"
|
100
|
+
)
|
101
|
+
return collection_id
|
109
102
|
|
110
|
-
def set_io_bin_keys(
|
111
|
-
|
103
|
+
def set_io_bin_keys(
|
104
|
+
self, collection_id: str, inner_bin_key: str | None, outer_bin_key: str | None
|
105
|
+
):
|
106
|
+
"""Set inner and outer bin keys for a collection."""
|
112
107
|
response = self._session.post(
|
113
|
-
f"{self._server_url}/{
|
108
|
+
f"{self._server_url}/{collection_id}/set_io_bin_keys",
|
114
109
|
json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
|
115
110
|
)
|
116
111
|
response.raise_for_status()
|
117
112
|
|
118
|
-
def set_inner_bin_key(self,
|
119
|
-
"""Set the inner bin key for a
|
120
|
-
current_io_bin_keys = self.get_io_bin_keys(
|
113
|
+
def set_inner_bin_key(self, collection_id: str, dim: str):
|
114
|
+
"""Set the inner bin key for a collection."""
|
115
|
+
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
121
116
|
if current_io_bin_keys is None:
|
122
117
|
current_io_bin_keys = (None, None)
|
123
|
-
self.set_io_bin_keys(
|
118
|
+
self.set_io_bin_keys(collection_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
|
124
119
|
|
125
|
-
def set_outer_bin_key(self,
|
126
|
-
"""Set the outer bin key for a
|
127
|
-
current_io_bin_keys = self.get_io_bin_keys(
|
120
|
+
def set_outer_bin_key(self, collection_id: str, dim: str):
|
121
|
+
"""Set the outer bin key for a collection."""
|
122
|
+
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
128
123
|
if current_io_bin_keys is None:
|
129
124
|
current_io_bin_keys = (None, None)
|
130
|
-
self.set_io_bin_keys(
|
125
|
+
self.set_io_bin_keys(collection_id, current_io_bin_keys[0], dim) # Keep inner, set outer
|
131
126
|
|
132
|
-
def get_io_bin_keys(self,
|
133
|
-
"""Gets the current inner and outer bin keys for a
|
127
|
+
def get_io_bin_keys(self, collection_id: str) -> tuple[str | None, str | None] | None:
|
128
|
+
"""Gets the current inner and outer bin keys for a Collection.
|
134
129
|
|
135
130
|
Args:
|
136
|
-
|
131
|
+
collection_id: ID of the Collection.
|
137
132
|
|
138
133
|
Returns:
|
139
134
|
tuple: (inner_bin_key | None, outer_bin_key | None)
|
@@ -141,20 +136,20 @@ class Docent:
|
|
141
136
|
Raises:
|
142
137
|
requests.exceptions.HTTPError: If the API request fails.
|
143
138
|
"""
|
144
|
-
url = f"{self._server_url}/{
|
139
|
+
url = f"{self._server_url}/{collection_id}/io_bin_keys"
|
145
140
|
response = self._session.get(url)
|
146
141
|
response.raise_for_status()
|
147
142
|
data = response.json()
|
148
143
|
return (data.get("inner_bin_key"), data.get("outer_bin_key"))
|
149
144
|
|
150
|
-
def add_agent_runs(self,
|
151
|
-
"""Adds agent runs to a
|
145
|
+
def add_agent_runs(self, collection_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
|
146
|
+
"""Adds agent runs to a Collection.
|
152
147
|
|
153
148
|
Agent runs represent execution traces that can be visualized and analyzed.
|
154
|
-
This method batches the insertion in groups of
|
149
|
+
This method batches the insertion in groups of 1,000 for better performance.
|
155
150
|
|
156
151
|
Args:
|
157
|
-
|
152
|
+
collection_id: ID of the Collection.
|
158
153
|
agent_runs: List of AgentRun objects to add.
|
159
154
|
|
160
155
|
Returns:
|
@@ -165,7 +160,7 @@ class Docent:
|
|
165
160
|
"""
|
166
161
|
from tqdm import tqdm
|
167
162
|
|
168
|
-
url = f"{self._server_url}/{
|
163
|
+
url = f"{self._server_url}/{collection_id}/agent_runs"
|
169
164
|
batch_size = 1000
|
170
165
|
total_runs = len(agent_runs)
|
171
166
|
|
@@ -180,69 +175,96 @@ class Docent:
|
|
180
175
|
|
181
176
|
pbar.update(len(batch))
|
182
177
|
|
183
|
-
url = f"{self._server_url}/{
|
178
|
+
url = f"{self._server_url}/{collection_id}/compute_embeddings"
|
184
179
|
response = self._session.post(url)
|
185
180
|
response.raise_for_status()
|
186
181
|
|
187
|
-
logger.info(f"Successfully added {total_runs} agent runs to
|
182
|
+
logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
|
188
183
|
return {"status": "success", "total_runs_added": total_runs}
|
189
184
|
|
190
|
-
def
|
191
|
-
"""Lists all available
|
185
|
+
def list_collections(self) -> list[dict[str, Any]]:
|
186
|
+
"""Lists all available Collections.
|
192
187
|
|
193
188
|
Returns:
|
194
|
-
list: List of dictionaries containing
|
189
|
+
list: List of dictionaries containing Collection information.
|
195
190
|
|
196
191
|
Raises:
|
197
192
|
requests.exceptions.HTTPError: If the API request fails.
|
198
193
|
"""
|
199
|
-
url = f"{self._server_url}/
|
194
|
+
url = f"{self._server_url}/collections"
|
200
195
|
response = self._session.get(url)
|
201
196
|
response.raise_for_status()
|
202
197
|
return response.json()
|
203
198
|
|
204
|
-
def
|
205
|
-
"""
|
199
|
+
def list_searches(self, collection_id: str) -> list[dict[str, Any]]:
|
200
|
+
"""List all searches for a given collection.
|
206
201
|
|
207
202
|
Args:
|
208
|
-
|
209
|
-
dim_ids: Optional list of dimension IDs to retrieve. If None, retrieves all dimensions.
|
203
|
+
collection_id: ID of the Collection.
|
210
204
|
|
211
205
|
Returns:
|
212
|
-
list: List of dictionaries containing
|
206
|
+
list: List of dictionaries containing search query information.
|
213
207
|
|
214
208
|
Raises:
|
215
209
|
requests.exceptions.HTTPError: If the API request fails.
|
216
210
|
"""
|
217
|
-
url = f"{self._server_url}/{
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
211
|
+
url = f"{self._server_url}/{collection_id}/list_search_queries"
|
212
|
+
response = self._session.get(url)
|
213
|
+
response.raise_for_status()
|
214
|
+
return response.json()
|
215
|
+
|
216
|
+
def get_search_results(self, collection_id: str, search_query: str) -> list[dict[str, Any]]:
|
217
|
+
"""Get search results for a given collection and search query.
|
218
|
+
Pass in either search_query or query_id.
|
219
|
+
|
220
|
+
Args:
|
221
|
+
collection_id: ID of the Collection.
|
222
|
+
search_query: The search query to get results for.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
list: List of dictionaries containing search result information.
|
226
|
+
|
227
|
+
Raises:
|
228
|
+
requests.exceptions.HTTPError: If the API request fails.
|
229
|
+
"""
|
230
|
+
url = f"{self._server_url}/{collection_id}/get_search_results"
|
231
|
+
response = self._session.post(url, json={"search_query": search_query})
|
222
232
|
response.raise_for_status()
|
223
233
|
return response.json()
|
224
234
|
|
225
|
-
def
|
226
|
-
|
227
|
-
|
228
|
-
"""Lists available attribute searches for a FrameGrid.
|
235
|
+
def list_search_clusters(self, collection_id: str, search_query: str) -> list[dict[str, Any]]:
|
236
|
+
"""List all search clusters for a given collection.
|
237
|
+
Pass in either search_query or query_id.
|
229
238
|
|
230
|
-
|
239
|
+
Args:
|
240
|
+
collection_id: ID of the Collection.
|
241
|
+
search_query: The search query to get clusters for.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
list: List of dictionaries containing search cluster information.
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
requests.exceptions.HTTPError: If the API request fails.
|
248
|
+
"""
|
249
|
+
url = f"{self._server_url}/{collection_id}/list_search_clusters"
|
250
|
+
response = self._session.post(url, json={"search_query": search_query})
|
251
|
+
response.raise_for_status()
|
252
|
+
return response.json()
|
253
|
+
|
254
|
+
def get_cluster_matches(self, collection_id: str, centroid: str) -> list[dict[str, Any]]:
|
255
|
+
"""Get the matches for a given cluster.
|
231
256
|
|
232
257
|
Args:
|
233
|
-
|
234
|
-
|
258
|
+
collection_id: ID of the Collection.
|
259
|
+
cluster_id: The ID of the cluster to get matches for.
|
235
260
|
|
236
261
|
Returns:
|
237
|
-
list: List of dictionaries containing
|
262
|
+
list: List of dictionaries containing the search results that match the cluster.
|
238
263
|
|
239
264
|
Raises:
|
240
265
|
requests.exceptions.HTTPError: If the API request fails.
|
241
266
|
"""
|
242
|
-
url = f"{self._server_url}/{
|
243
|
-
|
244
|
-
"base_data_only": base_data_only,
|
245
|
-
}
|
246
|
-
response = self._session.get(url, params=params)
|
267
|
+
url = f"{self._server_url}/{collection_id}/get_cluster_matches"
|
268
|
+
response = self._session.post(url, json={"centroid": centroid})
|
247
269
|
response.raise_for_status()
|
248
270
|
return response.json()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docent-python
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.0a8
|
4
4
|
Summary: Docent SDK
|
5
5
|
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
6
6
|
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
@@ -12,3 +12,4 @@ Requires-Python: >=3.11
|
|
12
12
|
Requires-Dist: pydantic>=2.11.7
|
13
13
|
Requires-Dist: pyyaml>=6.0.2
|
14
14
|
Requires-Dist: tiktoken>=0.7.0
|
15
|
+
Requires-Dist: tqdm>=4.67.1
|
@@ -6,10 +6,10 @@ docent/data_models/__init__.py,sha256=4NghEq_T9JqGTSo-hu_aZm35TDfahTzsbkrt8WB4Go
|
|
6
6
|
docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
|
7
7
|
docent/data_models/agent_run.py,sha256=sdvoUUpOhQAHqJHNR5KoHthCXrpJajdIREMacoR1ODk,9516
|
8
8
|
docent/data_models/citation.py,sha256=WsVQZcBT2EJD24ysyeVOC5Xfo165RI7P5_cOnJBgHj0,10015
|
9
|
-
docent/data_models/metadata.py,sha256=
|
9
|
+
docent/data_models/metadata.py,sha256=XVPfUPwGcs6JlJg2u9Ry0A8NdHNZTvELwmrsmb7TagE,8702
|
10
10
|
docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
|
11
11
|
docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
|
12
|
-
docent/data_models/transcript.py,sha256=
|
12
|
+
docent/data_models/transcript.py,sha256=7xYCPi6gIUftX2tjOcteryQE9GWV7ThRZv4PmpteJhM,13793
|
13
13
|
docent/data_models/chat/__init__.py,sha256=O04XQ2NmO8GTWqkkB_Iydj8j_CucZuLhoyMVTxJN_cs,570
|
14
14
|
docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
|
15
15
|
docent/data_models/chat/message.py,sha256=iAo38kbV6wYbFh8S23cxLy6HY4C_i3PzQ6RpSQG5dxM,3861
|
@@ -19,8 +19,8 @@ docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
|
|
19
19
|
docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
|
20
20
|
docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
|
21
21
|
docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
docent/sdk/client.py,sha256=
|
23
|
-
docent_python-0.1.
|
24
|
-
docent_python-0.1.
|
25
|
-
docent_python-0.1.
|
26
|
-
docent_python-0.1.
|
22
|
+
docent/sdk/client.py,sha256=2fS2bmO9wOvIdjoG_2mOGyeX2xcWXm-vzVAeqIf8BZQ,9784
|
23
|
+
docent_python-0.1.0a8.dist-info/METADATA,sha256=KtUqXmRDX0QpnxiDox667QOkwHpmu7VTh8k3nYQIMHY,521
|
24
|
+
docent_python-0.1.0a8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
25
|
+
docent_python-0.1.0a8.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
|
26
|
+
docent_python-0.1.0a8.dist-info/RECORD,,
|
File without changes
|
File without changes
|