docent-python 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/__init__.py +3 -0
- docent/_log_util/__init__.py +3 -0
- docent/_log_util/logger.py +141 -0
- docent/data_models/__init__.py +25 -0
- docent/data_models/_tiktoken_util.py +91 -0
- docent/data_models/agent_run.py +231 -0
- docent/data_models/chat/__init__.py +25 -0
- docent/data_models/chat/content.py +56 -0
- docent/data_models/chat/message.py +125 -0
- docent/data_models/chat/tool.py +109 -0
- docent/data_models/citation.py +223 -0
- docent/data_models/filters.py +205 -0
- docent/data_models/metadata.py +219 -0
- docent/data_models/regex.py +56 -0
- docent/data_models/shared_types.py +10 -0
- docent/data_models/transcript.py +347 -0
- docent/py.typed +0 -0
- docent/sdk/__init__.py +0 -0
- docent/sdk/client.py +285 -0
- docent_python-0.1.0a1.dist-info/METADATA +16 -0
- docent_python-0.1.0a1.dist-info/RECORD +23 -0
- docent_python-0.1.0a1.dist-info/WHEEL +4 -0
- docent_python-0.1.0a1.dist-info/licenses/LICENSE.md +7 -0
docent/sdk/client.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from docent._log_util.logger import get_logger
|
|
7
|
+
from docent.data_models.agent_run import AgentRun
|
|
8
|
+
from docent.data_models.filters import FrameFilter
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Docent:
|
|
14
|
+
"""Client for interacting with the Docent API.
|
|
15
|
+
|
|
16
|
+
This client provides methods for creating and managing FrameGrids,
|
|
17
|
+
dimensions, agent runs, and filters in the Docent system.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
server_url: URL of the Docent API server.
|
|
21
|
+
web_url: URL of the Docent web UI.
|
|
22
|
+
email: Email address for authentication.
|
|
23
|
+
password: Password for authentication.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self, server_url: str, web_url: str, email: str | None = None, password: str | None = None
|
|
28
|
+
):
|
|
29
|
+
self._server_url = server_url.rstrip("/") + "/rest"
|
|
30
|
+
self._web_url = web_url.rstrip("/")
|
|
31
|
+
|
|
32
|
+
self._email = email or os.getenv("DOCENT_EMAIL")
|
|
33
|
+
if self._email is None:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
"Email address must be provided through keyword argument or DOCENT_EMAIL environment variable"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
self._password = password or os.getenv("DOCENT_PASSWORD")
|
|
39
|
+
if self._password is None:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Password must be provided through keyword argument or DOCENT_PASSWORD environment variable"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Use requests.Session for connection pooling and persistent headers
|
|
45
|
+
self._session = requests.Session()
|
|
46
|
+
self._login()
|
|
47
|
+
|
|
48
|
+
def _login(self):
|
|
49
|
+
"""Login with email/password to establish session."""
|
|
50
|
+
login_url = f"{self._server_url}/login"
|
|
51
|
+
response = self._session.post(
|
|
52
|
+
login_url, json={"email": self._email, "password": self._password}
|
|
53
|
+
)
|
|
54
|
+
response.raise_for_status()
|
|
55
|
+
logger.info(f"Successfully logged in as {self._email}")
|
|
56
|
+
|
|
57
|
+
def create_framegrid(
|
|
58
|
+
self,
|
|
59
|
+
fg_id: str | None = None,
|
|
60
|
+
name: str | None = None,
|
|
61
|
+
description: str | None = None,
|
|
62
|
+
) -> str:
|
|
63
|
+
"""Creates a new FrameGrid.
|
|
64
|
+
|
|
65
|
+
Creates a new FrameGrid and sets up a default MECE dimension
|
|
66
|
+
for grouping on the homepage.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
fg_id: Optional ID for the new FrameGrid. If not provided, one will be generated.
|
|
70
|
+
name: Optional name for the FrameGrid.
|
|
71
|
+
description: Optional description for the FrameGrid.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: The ID of the created FrameGrid.
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If the response is missing the FrameGrid ID.
|
|
78
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
79
|
+
"""
|
|
80
|
+
url = f"{self._server_url}/create"
|
|
81
|
+
payload = {
|
|
82
|
+
"fg_id": fg_id,
|
|
83
|
+
"name": name,
|
|
84
|
+
"description": description,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
response = self._session.post(url, json=payload)
|
|
88
|
+
response.raise_for_status()
|
|
89
|
+
|
|
90
|
+
response_data = response.json()
|
|
91
|
+
fg_id = response_data.get("fg_id")
|
|
92
|
+
if fg_id is None:
|
|
93
|
+
raise ValueError("Failed to create frame grid: 'fg_id' missing in response.")
|
|
94
|
+
|
|
95
|
+
logger.info(f"Successfully created FrameGrid with id='{fg_id}'")
|
|
96
|
+
|
|
97
|
+
logger.info(f"FrameGrid creation complete. Frontend available at: {self._web_url}/{fg_id}")
|
|
98
|
+
return fg_id
|
|
99
|
+
|
|
100
|
+
def set_io_bin_keys(self, fg_id: str, inner_bin_key: str | None, outer_bin_key: str | None):
|
|
101
|
+
"""Set inner and outer bin keys for a frame grid."""
|
|
102
|
+
response = self._session.post(
|
|
103
|
+
f"{self._server_url}/{fg_id}/set_io_bin_keys",
|
|
104
|
+
json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
|
|
105
|
+
)
|
|
106
|
+
response.raise_for_status()
|
|
107
|
+
|
|
108
|
+
def set_inner_bin_key(self, fg_id: str, dim: str):
|
|
109
|
+
"""Set the inner bin key for a frame grid."""
|
|
110
|
+
current_io_bin_keys = self.get_io_bin_keys(fg_id)
|
|
111
|
+
if current_io_bin_keys is None:
|
|
112
|
+
current_io_bin_keys = (None, None)
|
|
113
|
+
self.set_io_bin_keys(fg_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
|
|
114
|
+
|
|
115
|
+
def set_outer_bin_key(self, fg_id: str, dim: str):
|
|
116
|
+
"""Set the outer bin key for a frame grid."""
|
|
117
|
+
current_io_bin_keys = self.get_io_bin_keys(fg_id)
|
|
118
|
+
if current_io_bin_keys is None:
|
|
119
|
+
current_io_bin_keys = (None, None)
|
|
120
|
+
self.set_io_bin_keys(fg_id, current_io_bin_keys[0], dim) # Keep inner, set outer
|
|
121
|
+
|
|
122
|
+
def get_io_bin_keys(self, fg_id: str) -> tuple[str | None, str | None] | None:
|
|
123
|
+
"""Gets the current inner and outer bin keys for a FrameGrid.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
fg_id: ID of the FrameGrid.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
tuple: (inner_bin_key | None, outer_bin_key | None)
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
133
|
+
"""
|
|
134
|
+
url = f"{self._server_url}/{fg_id}/io_bin_keys"
|
|
135
|
+
response = self._session.get(url)
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
data = response.json()
|
|
138
|
+
return (data.get("inner_bin_key"), data.get("outer_bin_key"))
|
|
139
|
+
|
|
140
|
+
def add_agent_runs(self, fg_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
|
|
141
|
+
"""Adds agent runs to a FrameGrid.
|
|
142
|
+
|
|
143
|
+
Agent runs represent execution traces that can be visualized and analyzed.
|
|
144
|
+
This method batches the insertion in groups of 5,000 for better performance.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
fg_id: ID of the FrameGrid.
|
|
148
|
+
agent_runs: List of AgentRun objects to add.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
dict: API response data.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
155
|
+
"""
|
|
156
|
+
from tqdm import tqdm
|
|
157
|
+
|
|
158
|
+
url = f"{self._server_url}/{fg_id}/agent_runs"
|
|
159
|
+
batch_size = 1000
|
|
160
|
+
total_runs = len(agent_runs)
|
|
161
|
+
|
|
162
|
+
# Process agent runs in batches
|
|
163
|
+
with tqdm(total=total_runs, desc="Adding agent runs", unit="runs") as pbar:
|
|
164
|
+
for i in range(0, total_runs, batch_size):
|
|
165
|
+
batch = agent_runs[i : i + batch_size]
|
|
166
|
+
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
|
|
167
|
+
|
|
168
|
+
response = self._session.post(url, json=payload)
|
|
169
|
+
response.raise_for_status()
|
|
170
|
+
|
|
171
|
+
pbar.update(len(batch))
|
|
172
|
+
|
|
173
|
+
url = f"{self._server_url}/{fg_id}/compute_embeddings"
|
|
174
|
+
response = self._session.post(url)
|
|
175
|
+
response.raise_for_status()
|
|
176
|
+
|
|
177
|
+
logger.info(f"Successfully added {total_runs} agent runs to FrameGrid '{fg_id}'")
|
|
178
|
+
return {"status": "success", "total_runs_added": total_runs}
|
|
179
|
+
|
|
180
|
+
def get_base_filter(self, fg_id: str) -> dict[str, Any] | None:
|
|
181
|
+
"""Retrieves the base filter for a FrameGrid.
|
|
182
|
+
|
|
183
|
+
The base filter defines default filtering applied to all views.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
fg_id: ID of the FrameGrid.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
dict or None: Filter data if a filter exists, None otherwise.
|
|
190
|
+
|
|
191
|
+
Raises:
|
|
192
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
193
|
+
"""
|
|
194
|
+
url = f"{self._server_url}/{fg_id}/base_filter"
|
|
195
|
+
response = self._session.get(url)
|
|
196
|
+
response.raise_for_status()
|
|
197
|
+
# The endpoint returns the filter model directly or null
|
|
198
|
+
filter_data = response.json()
|
|
199
|
+
return filter_data
|
|
200
|
+
|
|
201
|
+
def set_base_filter(self, fg_id: str, filter: FrameFilter | None) -> dict[str, Any]:
|
|
202
|
+
"""Sets the base filter for a FrameGrid.
|
|
203
|
+
|
|
204
|
+
The base filter defines default filtering applied to all views.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
fg_id: ID of the FrameGrid.
|
|
208
|
+
filter: FrameFilter object defining the filter, or None to clear the filter.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
dict: API response data.
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
215
|
+
"""
|
|
216
|
+
url = f"{self._server_url}/{fg_id}/base_filter"
|
|
217
|
+
payload = {
|
|
218
|
+
"filter": filter.model_dump() if filter else None,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
response = self._session.post(url, json=payload)
|
|
222
|
+
response.raise_for_status()
|
|
223
|
+
|
|
224
|
+
logger.info(f"Successfully set base filter for FrameGrid '{fg_id}'")
|
|
225
|
+
return response.json()
|
|
226
|
+
|
|
227
|
+
def list_framegrids(self) -> list[dict[str, Any]]:
|
|
228
|
+
"""Lists all available FrameGrids.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
list: List of dictionaries containing FrameGrid information.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
235
|
+
"""
|
|
236
|
+
url = f"{self._server_url}/framegrids"
|
|
237
|
+
response = self._session.get(url)
|
|
238
|
+
response.raise_for_status()
|
|
239
|
+
return response.json()
|
|
240
|
+
|
|
241
|
+
def get_dimensions(self, fg_id: str, dim_ids: list[str] | None = None) -> list[dict[str, Any]]:
|
|
242
|
+
"""Retrieves dimensions for a FrameGrid.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
fg_id: ID of the FrameGrid.
|
|
246
|
+
dim_ids: Optional list of dimension IDs to retrieve. If None, retrieves all dimensions.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
list: List of dictionaries containing dimension information.
|
|
250
|
+
|
|
251
|
+
Raises:
|
|
252
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
253
|
+
"""
|
|
254
|
+
url = f"{self._server_url}/{fg_id}/get_dimensions"
|
|
255
|
+
payload = {
|
|
256
|
+
"dim_ids": dim_ids,
|
|
257
|
+
}
|
|
258
|
+
response = self._session.post(url, json=payload)
|
|
259
|
+
response.raise_for_status()
|
|
260
|
+
return response.json()
|
|
261
|
+
|
|
262
|
+
def list_attribute_searches(
|
|
263
|
+
self, fg_id: str, base_data_only: bool = True
|
|
264
|
+
) -> list[dict[str, Any]]:
|
|
265
|
+
"""Lists available attribute searches for a FrameGrid.
|
|
266
|
+
|
|
267
|
+
Attribute searches allow finding frames with specific metadata attributes.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
fg_id: ID of the FrameGrid.
|
|
271
|
+
base_data_only: If True, returns only basic search information.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
list: List of dictionaries containing attribute search information.
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
requests.exceptions.HTTPError: If the API request fails.
|
|
278
|
+
"""
|
|
279
|
+
url = f"{self._server_url}/{fg_id}/attribute_searches"
|
|
280
|
+
params = {
|
|
281
|
+
"base_data_only": base_data_only,
|
|
282
|
+
}
|
|
283
|
+
response = self._session.get(url, params=params)
|
|
284
|
+
response.raise_for_status()
|
|
285
|
+
return response.json()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docent-python
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: Docent SDK
|
|
5
|
+
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
|
6
|
+
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
|
7
|
+
Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
|
|
8
|
+
Author-email: Transluce AI <info@transluce.org>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Requires-Dist: logging>=0.4.9.6
|
|
13
|
+
Requires-Dist: pydantic>=2.11.7
|
|
14
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
15
|
+
Requires-Dist: sqlalchemy>=2.0.41
|
|
16
|
+
Requires-Dist: tiktoken>=0.7.0
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
docent/__init__.py,sha256=J2BbO6rzilfw9WXRUeolr439EGFezqbMU_kCpCCryRA,59
|
|
2
|
+
docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
|
|
4
|
+
docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
|
|
5
|
+
docent/data_models/__init__.py,sha256=M84x4yoPZz97-e-2KQ4qXud8i6Ykdjl1ChbwZalu2NQ,689
|
|
6
|
+
docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
|
|
7
|
+
docent/data_models/agent_run.py,sha256=sdvoUUpOhQAHqJHNR5KoHthCXrpJajdIREMacoR1ODk,9516
|
|
8
|
+
docent/data_models/citation.py,sha256=WsVQZcBT2EJD24ysyeVOC5Xfo165RI7P5_cOnJBgHj0,10015
|
|
9
|
+
docent/data_models/filters.py,sha256=nZquRQji_xZwea6nBxM_SRECaYECDMEQ8Zt1TXl-_jI,7484
|
|
10
|
+
docent/data_models/metadata.py,sha256=yBEm5M_gSNuoG---Fezsjm0YDGUQoJeM_BL9rwbBt-U,8035
|
|
11
|
+
docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
|
|
12
|
+
docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
|
|
13
|
+
docent/data_models/transcript.py,sha256=HK1NvtD06XqVto6O5qzQ_wCmxlx0O88wSBB2NRTPs44,13667
|
|
14
|
+
docent/data_models/chat/__init__.py,sha256=O04XQ2NmO8GTWqkkB_Iydj8j_CucZuLhoyMVTxJN_cs,570
|
|
15
|
+
docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
|
|
16
|
+
docent/data_models/chat/message.py,sha256=iAo38kbV6wYbFh8S23cxLy6HY4C_i3PzQ6RpSQG5dxM,3861
|
|
17
|
+
docent/data_models/chat/tool.py,sha256=x7NKINswPe0Kqvcx4ubjHzB-n0-i4DbFodvaBb2vitk,3042
|
|
18
|
+
docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
docent/sdk/client.py,sha256=XoeO8d7vCghf0WZoc22lJdPRBt6i88S2QV-viQubuEA,10090
|
|
20
|
+
docent_python-0.1.0a1.dist-info/METADATA,sha256=NewmZvefKjD0vLDEHpc_VRf4v-asf_jf9DEEsVG2xmw,562
|
|
21
|
+
docent_python-0.1.0a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
docent_python-0.1.0a1.dist-info/licenses/LICENSE.md,sha256=-TCY5GNyVAczrb96jTgTPalccQeyZqVXgs0N_dhC1OI,1086
|
|
23
|
+
docent_python-0.1.0a1.dist-info/RECORD,,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2025 Clarity AI Research, Inc. dba Transluce
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|