docent-python 0.1.0a7__tar.gz → 0.1.0a8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/PKG-INFO +2 -1
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/README.md +1 -1
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/metadata.py +39 -1
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/transcript.py +2 -0
- docent_python-0.1.0a8/docent/sdk/client.py +270 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/pyproject.toml +2 -1
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/uv.lock +24 -1
- docent_python-0.1.0a7/docent/sdk/client.py +0 -248
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/.gitignore +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/LICENSE.md +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/__init__.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/__init__.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/agent_run.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/chat/__init__.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/chat/message.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/citation.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/data_models/shared_types.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/py.typed +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/samples/load.py +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/samples/log.eval +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.0a7 → docent_python-0.1.0a8}/docent/sdk/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docent-python
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.0a8
|
4
4
|
Summary: Docent SDK
|
5
5
|
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
6
6
|
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
@@ -12,3 +12,4 @@ Requires-Python: >=3.11
|
|
12
12
|
Requires-Dist: pydantic>=2.11.7
|
13
13
|
Requires-Dist: pyyaml>=6.0.2
|
14
14
|
Requires-Dist: tiktoken>=0.7.0
|
15
|
+
Requires-Dist: tqdm>=4.67.1
|
@@ -9,7 +9,7 @@ The official Python SDK for [Docent](https://github.com/TransluceAI/docent) - a
|
|
9
9
|
|
10
10
|
Docent helps you understand AI agent behavior by providing tools to collect, analyze, and visualize agent execution data. This SDK allows you to programmatically interact with the Docent platform to:
|
11
11
|
|
12
|
-
- Create and manage
|
12
|
+
- Create and manage collections of agent runs
|
13
13
|
- Upload agent execution traces and transcripts
|
14
14
|
- Define custom dimensions and filters
|
15
15
|
- Perform searches and analyses on agent behavior
|
@@ -34,7 +34,7 @@ class BaseMetadata(BaseModel):
|
|
34
34
|
"""
|
35
35
|
|
36
36
|
model_config = ConfigDict(extra="allow")
|
37
|
-
allow_fields_without_descriptions: bool =
|
37
|
+
allow_fields_without_descriptions: bool = True
|
38
38
|
|
39
39
|
# Private attribute to store field descriptions
|
40
40
|
_field_descriptions: dict[str, str | None] | None = PrivateAttr(default=None)
|
@@ -189,3 +189,41 @@ class BaseAgentRunMetadata(BaseMetadata):
|
|
189
189
|
scores: dict[str, int | float | bool | None] = Field(
|
190
190
|
description="A dict of score_key -> score_value. Use one key for each metric you're tracking."
|
191
191
|
)
|
192
|
+
|
193
|
+
|
194
|
+
class InspectAgentRunMetadata(BaseAgentRunMetadata):
|
195
|
+
"""Extends BaseAgentRunMetadata with fields specific to Inspect runs.
|
196
|
+
|
197
|
+
Attributes:
|
198
|
+
task_id: The ID of the 'benchmark' or 'set of evals' that the transcript belongs to
|
199
|
+
sample_id: The specific task inside of the `task_id` benchmark that the transcript was run on
|
200
|
+
epoch_id: Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run.
|
201
|
+
model: The model that was used to generate the transcript
|
202
|
+
scoring_metadata: Additional metadata about the scoring process
|
203
|
+
additional_metadata: Additional metadata about the transcript
|
204
|
+
"""
|
205
|
+
|
206
|
+
task_id: str = Field(
|
207
|
+
description="The ID of the 'benchmark' or 'set of evals' that the transcript belongs to"
|
208
|
+
)
|
209
|
+
|
210
|
+
# Identification of this particular run
|
211
|
+
sample_id: str = Field(
|
212
|
+
description="The specific task inside of the `task_id` benchmark that the transcript was run on"
|
213
|
+
)
|
214
|
+
epoch_id: int = Field(
|
215
|
+
description="Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run."
|
216
|
+
)
|
217
|
+
|
218
|
+
# Parameters for the run
|
219
|
+
model: str = Field(description="The model that was used to generate the transcript")
|
220
|
+
|
221
|
+
# Outcome
|
222
|
+
scoring_metadata: dict[str, Any] | None = Field(
|
223
|
+
description="Additional metadata about the scoring process"
|
224
|
+
)
|
225
|
+
|
226
|
+
# Inspect metadata
|
227
|
+
additional_metadata: dict[str, Any] | None = Field(
|
228
|
+
description="Additional metadata about the transcript"
|
229
|
+
)
|
@@ -110,6 +110,8 @@ class Transcript(BaseModel):
|
|
110
110
|
A unit of action represents a logical group of messages, such as a system message
|
111
111
|
on its own or a user message followed by assistant responses and tool outputs.
|
112
112
|
|
113
|
+
For precise details on how action units are determined, refer to the _compute_units_of_action method implementation.
|
114
|
+
|
113
115
|
Returns:
|
114
116
|
list[list[int]]: List of units of action, where each unit is a list of message indices.
|
115
117
|
"""
|
@@ -0,0 +1,270 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
import requests
|
5
|
+
|
6
|
+
from docent._log_util.logger import get_logger
|
7
|
+
from docent.data_models.agent_run import AgentRun
|
8
|
+
|
9
|
+
logger = get_logger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
class Docent:
|
13
|
+
"""Client for interacting with the Docent API.
|
14
|
+
|
15
|
+
This client provides methods for creating and managing Collections,
|
16
|
+
dimensions, agent runs, and filters in the Docent system.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
server_url: URL of the Docent API server.
|
20
|
+
web_url: URL of the Docent web UI.
|
21
|
+
email: Email address for authentication.
|
22
|
+
password: Password for authentication.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
server_url: str = "https://aws-docent-backend.transluce.org",
|
28
|
+
web_url: str = "https://docent-alpha.transluce.org",
|
29
|
+
api_key: str | None = None,
|
30
|
+
):
|
31
|
+
self._server_url = server_url.rstrip("/") + "/rest"
|
32
|
+
self._web_url = web_url.rstrip("/")
|
33
|
+
|
34
|
+
# Use requests.Session for connection pooling and persistent headers
|
35
|
+
self._session = requests.Session()
|
36
|
+
|
37
|
+
api_key = api_key or os.getenv("DOCENT_API_KEY")
|
38
|
+
|
39
|
+
if api_key is None:
|
40
|
+
raise ValueError(
|
41
|
+
"api_key is required. Please provide an "
|
42
|
+
"api_key or set the DOCENT_API_KEY environment variable."
|
43
|
+
)
|
44
|
+
|
45
|
+
self._login(api_key)
|
46
|
+
|
47
|
+
def _login(self, api_key: str):
|
48
|
+
"""Login with email/password to establish session."""
|
49
|
+
self._session.headers.update({"Authorization": f"Bearer {api_key}"})
|
50
|
+
|
51
|
+
url = f"{self._server_url}/api-keys/test"
|
52
|
+
response = self._session.get(url)
|
53
|
+
response.raise_for_status()
|
54
|
+
|
55
|
+
logger.info("Logged in with API key")
|
56
|
+
return
|
57
|
+
|
58
|
+
def create_collection(
|
59
|
+
self,
|
60
|
+
collection_id: str | None = None,
|
61
|
+
name: str | None = None,
|
62
|
+
description: str | None = None,
|
63
|
+
) -> str:
|
64
|
+
"""Creates a new Collection.
|
65
|
+
|
66
|
+
Creates a new Collection and sets up a default MECE dimension
|
67
|
+
for grouping on the homepage.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
collection_id: Optional ID for the new Collection. If not provided, one will be generated.
|
71
|
+
name: Optional name for the Collection.
|
72
|
+
description: Optional description for the Collection.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
str: The ID of the created Collection.
|
76
|
+
|
77
|
+
Raises:
|
78
|
+
ValueError: If the response is missing the Collection ID.
|
79
|
+
requests.exceptions.HTTPError: If the API request fails.
|
80
|
+
"""
|
81
|
+
url = f"{self._server_url}/create"
|
82
|
+
payload = {
|
83
|
+
"collection_id": collection_id,
|
84
|
+
"name": name,
|
85
|
+
"description": description,
|
86
|
+
}
|
87
|
+
|
88
|
+
response = self._session.post(url, json=payload)
|
89
|
+
response.raise_for_status()
|
90
|
+
|
91
|
+
response_data = response.json()
|
92
|
+
collection_id = response_data.get("collection_id")
|
93
|
+
if collection_id is None:
|
94
|
+
raise ValueError("Failed to create collection: 'collection_id' missing in response.")
|
95
|
+
|
96
|
+
logger.info(f"Successfully created Collection with id='{collection_id}'")
|
97
|
+
|
98
|
+
logger.info(
|
99
|
+
f"Collection creation complete. Frontend available at: {self._web_url}/dashboard/{collection_id}"
|
100
|
+
)
|
101
|
+
return collection_id
|
102
|
+
|
103
|
+
def set_io_bin_keys(
|
104
|
+
self, collection_id: str, inner_bin_key: str | None, outer_bin_key: str | None
|
105
|
+
):
|
106
|
+
"""Set inner and outer bin keys for a collection."""
|
107
|
+
response = self._session.post(
|
108
|
+
f"{self._server_url}/{collection_id}/set_io_bin_keys",
|
109
|
+
json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
|
110
|
+
)
|
111
|
+
response.raise_for_status()
|
112
|
+
|
113
|
+
def set_inner_bin_key(self, collection_id: str, dim: str):
|
114
|
+
"""Set the inner bin key for a collection."""
|
115
|
+
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
116
|
+
if current_io_bin_keys is None:
|
117
|
+
current_io_bin_keys = (None, None)
|
118
|
+
self.set_io_bin_keys(collection_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
|
119
|
+
|
120
|
+
def set_outer_bin_key(self, collection_id: str, dim: str):
|
121
|
+
"""Set the outer bin key for a collection."""
|
122
|
+
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
123
|
+
if current_io_bin_keys is None:
|
124
|
+
current_io_bin_keys = (None, None)
|
125
|
+
self.set_io_bin_keys(collection_id, current_io_bin_keys[0], dim) # Keep inner, set outer
|
126
|
+
|
127
|
+
def get_io_bin_keys(self, collection_id: str) -> tuple[str | None, str | None] | None:
|
128
|
+
"""Gets the current inner and outer bin keys for a Collection.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
collection_id: ID of the Collection.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
tuple: (inner_bin_key | None, outer_bin_key | None)
|
135
|
+
|
136
|
+
Raises:
|
137
|
+
requests.exceptions.HTTPError: If the API request fails.
|
138
|
+
"""
|
139
|
+
url = f"{self._server_url}/{collection_id}/io_bin_keys"
|
140
|
+
response = self._session.get(url)
|
141
|
+
response.raise_for_status()
|
142
|
+
data = response.json()
|
143
|
+
return (data.get("inner_bin_key"), data.get("outer_bin_key"))
|
144
|
+
|
145
|
+
def add_agent_runs(self, collection_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
|
146
|
+
"""Adds agent runs to a Collection.
|
147
|
+
|
148
|
+
Agent runs represent execution traces that can be visualized and analyzed.
|
149
|
+
This method batches the insertion in groups of 1,000 for better performance.
|
150
|
+
|
151
|
+
Args:
|
152
|
+
collection_id: ID of the Collection.
|
153
|
+
agent_runs: List of AgentRun objects to add.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
dict: API response data.
|
157
|
+
|
158
|
+
Raises:
|
159
|
+
requests.exceptions.HTTPError: If the API request fails.
|
160
|
+
"""
|
161
|
+
from tqdm import tqdm
|
162
|
+
|
163
|
+
url = f"{self._server_url}/{collection_id}/agent_runs"
|
164
|
+
batch_size = 1000
|
165
|
+
total_runs = len(agent_runs)
|
166
|
+
|
167
|
+
# Process agent runs in batches
|
168
|
+
with tqdm(total=total_runs, desc="Adding agent runs", unit="runs") as pbar:
|
169
|
+
for i in range(0, total_runs, batch_size):
|
170
|
+
batch = agent_runs[i : i + batch_size]
|
171
|
+
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
|
172
|
+
|
173
|
+
response = self._session.post(url, json=payload)
|
174
|
+
response.raise_for_status()
|
175
|
+
|
176
|
+
pbar.update(len(batch))
|
177
|
+
|
178
|
+
url = f"{self._server_url}/{collection_id}/compute_embeddings"
|
179
|
+
response = self._session.post(url)
|
180
|
+
response.raise_for_status()
|
181
|
+
|
182
|
+
logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
|
183
|
+
return {"status": "success", "total_runs_added": total_runs}
|
184
|
+
|
185
|
+
def list_collections(self) -> list[dict[str, Any]]:
|
186
|
+
"""Lists all available Collections.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
list: List of dictionaries containing Collection information.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
requests.exceptions.HTTPError: If the API request fails.
|
193
|
+
"""
|
194
|
+
url = f"{self._server_url}/collections"
|
195
|
+
response = self._session.get(url)
|
196
|
+
response.raise_for_status()
|
197
|
+
return response.json()
|
198
|
+
|
199
|
+
def list_searches(self, collection_id: str) -> list[dict[str, Any]]:
|
200
|
+
"""List all searches for a given collection.
|
201
|
+
|
202
|
+
Args:
|
203
|
+
collection_id: ID of the Collection.
|
204
|
+
|
205
|
+
Returns:
|
206
|
+
list: List of dictionaries containing search query information.
|
207
|
+
|
208
|
+
Raises:
|
209
|
+
requests.exceptions.HTTPError: If the API request fails.
|
210
|
+
"""
|
211
|
+
url = f"{self._server_url}/{collection_id}/list_search_queries"
|
212
|
+
response = self._session.get(url)
|
213
|
+
response.raise_for_status()
|
214
|
+
return response.json()
|
215
|
+
|
216
|
+
def get_search_results(self, collection_id: str, search_query: str) -> list[dict[str, Any]]:
|
217
|
+
"""Get search results for a given collection and search query.
|
218
|
+
Pass in either search_query or query_id.
|
219
|
+
|
220
|
+
Args:
|
221
|
+
collection_id: ID of the Collection.
|
222
|
+
search_query: The search query to get results for.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
list: List of dictionaries containing search result information.
|
226
|
+
|
227
|
+
Raises:
|
228
|
+
requests.exceptions.HTTPError: If the API request fails.
|
229
|
+
"""
|
230
|
+
url = f"{self._server_url}/{collection_id}/get_search_results"
|
231
|
+
response = self._session.post(url, json={"search_query": search_query})
|
232
|
+
response.raise_for_status()
|
233
|
+
return response.json()
|
234
|
+
|
235
|
+
def list_search_clusters(self, collection_id: str, search_query: str) -> list[dict[str, Any]]:
|
236
|
+
"""List all search clusters for a given collection.
|
237
|
+
Pass in either search_query or query_id.
|
238
|
+
|
239
|
+
Args:
|
240
|
+
collection_id: ID of the Collection.
|
241
|
+
search_query: The search query to get clusters for.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
list: List of dictionaries containing search cluster information.
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
requests.exceptions.HTTPError: If the API request fails.
|
248
|
+
"""
|
249
|
+
url = f"{self._server_url}/{collection_id}/list_search_clusters"
|
250
|
+
response = self._session.post(url, json={"search_query": search_query})
|
251
|
+
response.raise_for_status()
|
252
|
+
return response.json()
|
253
|
+
|
254
|
+
def get_cluster_matches(self, collection_id: str, centroid: str) -> list[dict[str, Any]]:
|
255
|
+
"""Get the matches for a given cluster.
|
256
|
+
|
257
|
+
Args:
|
258
|
+
collection_id: ID of the Collection.
|
259
|
+
cluster_id: The ID of the cluster to get matches for.
|
260
|
+
|
261
|
+
Returns:
|
262
|
+
list: List of dictionaries containing the search results that match the cluster.
|
263
|
+
|
264
|
+
Raises:
|
265
|
+
requests.exceptions.HTTPError: If the API request fails.
|
266
|
+
"""
|
267
|
+
url = f"{self._server_url}/{collection_id}/get_cluster_matches"
|
268
|
+
response = self._session.post(url, json={"centroid": centroid})
|
269
|
+
response.raise_for_status()
|
270
|
+
return response.json()
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[project]
|
2
2
|
name = "docent-python"
|
3
3
|
description = "Docent SDK"
|
4
|
-
version = "0.1.0-alpha.
|
4
|
+
version = "0.1.0-alpha.8"
|
5
5
|
authors = [
|
6
6
|
{ name="Transluce", email="info@transluce.org" },
|
7
7
|
]
|
@@ -13,6 +13,7 @@ dependencies = [
|
|
13
13
|
"pydantic>=2.11.7",
|
14
14
|
"pyyaml>=6.0.2",
|
15
15
|
"tiktoken>=0.7.0",
|
16
|
+
"tqdm>=4.67.1",
|
16
17
|
]
|
17
18
|
|
18
19
|
[build-system]
|
@@ -68,14 +68,24 @@ wheels = [
|
|
68
68
|
{ url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
|
69
69
|
]
|
70
70
|
|
71
|
+
[[package]]
|
72
|
+
name = "colorama"
|
73
|
+
version = "0.4.6"
|
74
|
+
source = { registry = "https://pypi.org/simple" }
|
75
|
+
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
76
|
+
wheels = [
|
77
|
+
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
78
|
+
]
|
79
|
+
|
71
80
|
[[package]]
|
72
81
|
name = "docent-python"
|
73
|
-
version = "0.1.
|
82
|
+
version = "0.1.0a7"
|
74
83
|
source = { editable = "." }
|
75
84
|
dependencies = [
|
76
85
|
{ name = "pydantic" },
|
77
86
|
{ name = "pyyaml" },
|
78
87
|
{ name = "tiktoken" },
|
88
|
+
{ name = "tqdm" },
|
79
89
|
]
|
80
90
|
|
81
91
|
[package.metadata]
|
@@ -83,6 +93,7 @@ requires-dist = [
|
|
83
93
|
{ name = "pydantic", specifier = ">=2.11.7" },
|
84
94
|
{ name = "pyyaml", specifier = ">=6.0.2" },
|
85
95
|
{ name = "tiktoken", specifier = ">=0.7.0" },
|
96
|
+
{ name = "tqdm", specifier = ">=4.67.1" },
|
86
97
|
]
|
87
98
|
|
88
99
|
[[package]]
|
@@ -307,6 +318,18 @@ wheels = [
|
|
307
318
|
{ url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" },
|
308
319
|
]
|
309
320
|
|
321
|
+
[[package]]
|
322
|
+
name = "tqdm"
|
323
|
+
version = "4.67.1"
|
324
|
+
source = { registry = "https://pypi.org/simple" }
|
325
|
+
dependencies = [
|
326
|
+
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
327
|
+
]
|
328
|
+
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
|
329
|
+
wheels = [
|
330
|
+
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
331
|
+
]
|
332
|
+
|
310
333
|
[[package]]
|
311
334
|
name = "typing-extensions"
|
312
335
|
version = "4.14.0"
|
@@ -1,248 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
import requests
|
5
|
-
|
6
|
-
from docent._log_util.logger import get_logger
|
7
|
-
from docent.data_models.agent_run import AgentRun
|
8
|
-
|
9
|
-
logger = get_logger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class Docent:
|
13
|
-
"""Client for interacting with the Docent API.
|
14
|
-
|
15
|
-
This client provides methods for creating and managing FrameGrids,
|
16
|
-
dimensions, agent runs, and filters in the Docent system.
|
17
|
-
|
18
|
-
Args:
|
19
|
-
server_url: URL of the Docent API server.
|
20
|
-
web_url: URL of the Docent web UI.
|
21
|
-
email: Email address for authentication.
|
22
|
-
password: Password for authentication.
|
23
|
-
"""
|
24
|
-
|
25
|
-
def __init__(
|
26
|
-
self,
|
27
|
-
server_url: str = "https://aws-docent-backend.transluce.org",
|
28
|
-
web_url: str = "https://docent-alpha.transluce.org",
|
29
|
-
email: str | None = None,
|
30
|
-
password: str | None = None,
|
31
|
-
):
|
32
|
-
self._server_url = server_url.rstrip("/") + "/rest"
|
33
|
-
self._web_url = web_url.rstrip("/")
|
34
|
-
|
35
|
-
self._email = email or os.getenv("DOCENT_EMAIL")
|
36
|
-
if self._email is None:
|
37
|
-
raise ValueError(
|
38
|
-
"Email address must be provided through keyword argument or DOCENT_EMAIL environment variable"
|
39
|
-
)
|
40
|
-
|
41
|
-
self._password = password or os.getenv("DOCENT_PASSWORD")
|
42
|
-
if self._password is None:
|
43
|
-
raise ValueError(
|
44
|
-
"Password must be provided through keyword argument or DOCENT_PASSWORD environment variable"
|
45
|
-
)
|
46
|
-
|
47
|
-
# Use requests.Session for connection pooling and persistent headers
|
48
|
-
self._session = requests.Session()
|
49
|
-
self._login()
|
50
|
-
|
51
|
-
def _login(self):
|
52
|
-
"""Login with email/password to establish session."""
|
53
|
-
login_url = f"{self._server_url}/login"
|
54
|
-
response = self._session.post(
|
55
|
-
login_url, json={"email": self._email, "password": self._password}
|
56
|
-
)
|
57
|
-
|
58
|
-
if response.status_code == 401:
|
59
|
-
raise ValueError(
|
60
|
-
"Invalid username/password combination. "
|
61
|
-
f"If you don't already have an account, please sign up at {self._web_url}/signup"
|
62
|
-
)
|
63
|
-
|
64
|
-
response.raise_for_status()
|
65
|
-
logger.info(f"Successfully logged in as {self._email}")
|
66
|
-
|
67
|
-
def create_framegrid(
|
68
|
-
self,
|
69
|
-
fg_id: str | None = None,
|
70
|
-
name: str | None = None,
|
71
|
-
description: str | None = None,
|
72
|
-
) -> str:
|
73
|
-
"""Creates a new FrameGrid.
|
74
|
-
|
75
|
-
Creates a new FrameGrid and sets up a default MECE dimension
|
76
|
-
for grouping on the homepage.
|
77
|
-
|
78
|
-
Args:
|
79
|
-
fg_id: Optional ID for the new FrameGrid. If not provided, one will be generated.
|
80
|
-
name: Optional name for the FrameGrid.
|
81
|
-
description: Optional description for the FrameGrid.
|
82
|
-
|
83
|
-
Returns:
|
84
|
-
str: The ID of the created FrameGrid.
|
85
|
-
|
86
|
-
Raises:
|
87
|
-
ValueError: If the response is missing the FrameGrid ID.
|
88
|
-
requests.exceptions.HTTPError: If the API request fails.
|
89
|
-
"""
|
90
|
-
url = f"{self._server_url}/create"
|
91
|
-
payload = {
|
92
|
-
"fg_id": fg_id,
|
93
|
-
"name": name,
|
94
|
-
"description": description,
|
95
|
-
}
|
96
|
-
|
97
|
-
response = self._session.post(url, json=payload)
|
98
|
-
response.raise_for_status()
|
99
|
-
|
100
|
-
response_data = response.json()
|
101
|
-
fg_id = response_data.get("fg_id")
|
102
|
-
if fg_id is None:
|
103
|
-
raise ValueError("Failed to create frame grid: 'fg_id' missing in response.")
|
104
|
-
|
105
|
-
logger.info(f"Successfully created FrameGrid with id='{fg_id}'")
|
106
|
-
|
107
|
-
logger.info(f"FrameGrid creation complete. Frontend available at: {self._web_url}/{fg_id}")
|
108
|
-
return fg_id
|
109
|
-
|
110
|
-
def set_io_bin_keys(self, fg_id: str, inner_bin_key: str | None, outer_bin_key: str | None):
|
111
|
-
"""Set inner and outer bin keys for a frame grid."""
|
112
|
-
response = self._session.post(
|
113
|
-
f"{self._server_url}/{fg_id}/set_io_bin_keys",
|
114
|
-
json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
|
115
|
-
)
|
116
|
-
response.raise_for_status()
|
117
|
-
|
118
|
-
def set_inner_bin_key(self, fg_id: str, dim: str):
|
119
|
-
"""Set the inner bin key for a frame grid."""
|
120
|
-
current_io_bin_keys = self.get_io_bin_keys(fg_id)
|
121
|
-
if current_io_bin_keys is None:
|
122
|
-
current_io_bin_keys = (None, None)
|
123
|
-
self.set_io_bin_keys(fg_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
|
124
|
-
|
125
|
-
def set_outer_bin_key(self, fg_id: str, dim: str):
|
126
|
-
"""Set the outer bin key for a frame grid."""
|
127
|
-
current_io_bin_keys = self.get_io_bin_keys(fg_id)
|
128
|
-
if current_io_bin_keys is None:
|
129
|
-
current_io_bin_keys = (None, None)
|
130
|
-
self.set_io_bin_keys(fg_id, current_io_bin_keys[0], dim) # Keep inner, set outer
|
131
|
-
|
132
|
-
def get_io_bin_keys(self, fg_id: str) -> tuple[str | None, str | None] | None:
|
133
|
-
"""Gets the current inner and outer bin keys for a FrameGrid.
|
134
|
-
|
135
|
-
Args:
|
136
|
-
fg_id: ID of the FrameGrid.
|
137
|
-
|
138
|
-
Returns:
|
139
|
-
tuple: (inner_bin_key | None, outer_bin_key | None)
|
140
|
-
|
141
|
-
Raises:
|
142
|
-
requests.exceptions.HTTPError: If the API request fails.
|
143
|
-
"""
|
144
|
-
url = f"{self._server_url}/{fg_id}/io_bin_keys"
|
145
|
-
response = self._session.get(url)
|
146
|
-
response.raise_for_status()
|
147
|
-
data = response.json()
|
148
|
-
return (data.get("inner_bin_key"), data.get("outer_bin_key"))
|
149
|
-
|
150
|
-
def add_agent_runs(self, fg_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
|
151
|
-
"""Adds agent runs to a FrameGrid.
|
152
|
-
|
153
|
-
Agent runs represent execution traces that can be visualized and analyzed.
|
154
|
-
This method batches the insertion in groups of 5,000 for better performance.
|
155
|
-
|
156
|
-
Args:
|
157
|
-
fg_id: ID of the FrameGrid.
|
158
|
-
agent_runs: List of AgentRun objects to add.
|
159
|
-
|
160
|
-
Returns:
|
161
|
-
dict: API response data.
|
162
|
-
|
163
|
-
Raises:
|
164
|
-
requests.exceptions.HTTPError: If the API request fails.
|
165
|
-
"""
|
166
|
-
from tqdm import tqdm
|
167
|
-
|
168
|
-
url = f"{self._server_url}/{fg_id}/agent_runs"
|
169
|
-
batch_size = 1000
|
170
|
-
total_runs = len(agent_runs)
|
171
|
-
|
172
|
-
# Process agent runs in batches
|
173
|
-
with tqdm(total=total_runs, desc="Adding agent runs", unit="runs") as pbar:
|
174
|
-
for i in range(0, total_runs, batch_size):
|
175
|
-
batch = agent_runs[i : i + batch_size]
|
176
|
-
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
|
177
|
-
|
178
|
-
response = self._session.post(url, json=payload)
|
179
|
-
response.raise_for_status()
|
180
|
-
|
181
|
-
pbar.update(len(batch))
|
182
|
-
|
183
|
-
url = f"{self._server_url}/{fg_id}/compute_embeddings"
|
184
|
-
response = self._session.post(url)
|
185
|
-
response.raise_for_status()
|
186
|
-
|
187
|
-
logger.info(f"Successfully added {total_runs} agent runs to FrameGrid '{fg_id}'")
|
188
|
-
return {"status": "success", "total_runs_added": total_runs}
|
189
|
-
|
190
|
-
def list_framegrids(self) -> list[dict[str, Any]]:
|
191
|
-
"""Lists all available FrameGrids.
|
192
|
-
|
193
|
-
Returns:
|
194
|
-
list: List of dictionaries containing FrameGrid information.
|
195
|
-
|
196
|
-
Raises:
|
197
|
-
requests.exceptions.HTTPError: If the API request fails.
|
198
|
-
"""
|
199
|
-
url = f"{self._server_url}/framegrids"
|
200
|
-
response = self._session.get(url)
|
201
|
-
response.raise_for_status()
|
202
|
-
return response.json()
|
203
|
-
|
204
|
-
def get_dimensions(self, fg_id: str, dim_ids: list[str] | None = None) -> list[dict[str, Any]]:
|
205
|
-
"""Retrieves dimensions for a FrameGrid.
|
206
|
-
|
207
|
-
Args:
|
208
|
-
fg_id: ID of the FrameGrid.
|
209
|
-
dim_ids: Optional list of dimension IDs to retrieve. If None, retrieves all dimensions.
|
210
|
-
|
211
|
-
Returns:
|
212
|
-
list: List of dictionaries containing dimension information.
|
213
|
-
|
214
|
-
Raises:
|
215
|
-
requests.exceptions.HTTPError: If the API request fails.
|
216
|
-
"""
|
217
|
-
url = f"{self._server_url}/{fg_id}/get_dimensions"
|
218
|
-
payload = {
|
219
|
-
"dim_ids": dim_ids,
|
220
|
-
}
|
221
|
-
response = self._session.post(url, json=payload)
|
222
|
-
response.raise_for_status()
|
223
|
-
return response.json()
|
224
|
-
|
225
|
-
def list_attribute_searches(
|
226
|
-
self, fg_id: str, base_data_only: bool = True
|
227
|
-
) -> list[dict[str, Any]]:
|
228
|
-
"""Lists available attribute searches for a FrameGrid.
|
229
|
-
|
230
|
-
Attribute searches allow finding frames with specific metadata attributes.
|
231
|
-
|
232
|
-
Args:
|
233
|
-
fg_id: ID of the FrameGrid.
|
234
|
-
base_data_only: If True, returns only basic search information.
|
235
|
-
|
236
|
-
Returns:
|
237
|
-
list: List of dictionaries containing attribute search information.
|
238
|
-
|
239
|
-
Raises:
|
240
|
-
requests.exceptions.HTTPError: If the API request fails.
|
241
|
-
"""
|
242
|
-
url = f"{self._server_url}/{fg_id}/attribute_searches"
|
243
|
-
params = {
|
244
|
-
"base_data_only": base_data_only,
|
245
|
-
}
|
246
|
-
response = self._session.get(url, params=params)
|
247
|
-
response.raise_for_status()
|
248
|
-
return response.json()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|