docent-python 0.1.13a0__py3-none-any.whl → 0.1.15a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/__init__.py +1 -1
- docent/data_models/agent_run.py +268 -99
- docent/data_models/remove_invalid_citation_ranges.py +3 -6
- docent/data_models/transcript.py +59 -37
- docent/data_models/yaml_util.py +12 -0
- docent/loaders/load_inspect.py +15 -10
- docent/sdk/client.py +90 -46
- docent/trace.py +4 -2
- {docent_python-0.1.13a0.dist-info → docent_python-0.1.15a0.dist-info}/METADATA +4 -2
- {docent_python-0.1.13a0.dist-info → docent_python-0.1.15a0.dist-info}/RECORD +13 -12
- docent_python-0.1.15a0.dist-info/licenses/LICENSE.md +13 -0
- docent_python-0.1.13a0.dist-info/licenses/LICENSE.md +0 -7
- /docent/{agent_run_writer.py → sdk/agent_run_writer.py} +0 -0
- {docent_python-0.1.13a0.dist-info → docent_python-0.1.15a0.dist-info}/WHEEL +0 -0
docent/__init__.py
CHANGED
docent/data_models/agent_run.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import sys
|
|
2
|
+
import textwrap
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from queue import Queue
|
|
3
5
|
from typing import Any, Literal, TypedDict, cast
|
|
4
6
|
from uuid import uuid4
|
|
5
7
|
|
|
@@ -7,18 +9,18 @@ import yaml
|
|
|
7
9
|
from pydantic import (
|
|
8
10
|
BaseModel,
|
|
9
11
|
Field,
|
|
10
|
-
|
|
12
|
+
PrivateAttr,
|
|
11
13
|
field_validator,
|
|
12
14
|
model_validator,
|
|
13
15
|
)
|
|
16
|
+
from pydantic_core import to_jsonable_python
|
|
14
17
|
|
|
18
|
+
from docent._log_util import get_logger
|
|
15
19
|
from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
|
|
16
|
-
from docent.data_models.transcript import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
fake_model_dump,
|
|
21
|
-
)
|
|
20
|
+
from docent.data_models.transcript import Transcript, TranscriptGroup
|
|
21
|
+
from docent.data_models.yaml_util import yaml_dump_metadata
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
class FilterableField(TypedDict):
|
|
@@ -36,8 +38,8 @@ class AgentRun(BaseModel):
|
|
|
36
38
|
id: Unique identifier for the agent run, auto-generated by default.
|
|
37
39
|
name: Optional human-readable name for the agent run.
|
|
38
40
|
description: Optional description of the agent run.
|
|
39
|
-
transcripts:
|
|
40
|
-
transcript_groups:
|
|
41
|
+
transcripts: List of Transcript objects.
|
|
42
|
+
transcript_groups: List of TranscriptGroup objects.
|
|
41
43
|
metadata: Additional structured metadata about the agent run as a JSON-serializable dictionary.
|
|
42
44
|
"""
|
|
43
45
|
|
|
@@ -45,36 +47,31 @@ class AgentRun(BaseModel):
|
|
|
45
47
|
name: str | None = None
|
|
46
48
|
description: str | None = None
|
|
47
49
|
|
|
48
|
-
transcripts:
|
|
49
|
-
transcript_groups:
|
|
50
|
+
transcripts: list[Transcript]
|
|
51
|
+
transcript_groups: list[TranscriptGroup] = Field(default_factory=list)
|
|
50
52
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
51
53
|
|
|
52
|
-
@
|
|
53
|
-
def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
|
|
54
|
-
"""
|
|
55
|
-
Custom serializer for the metadata field - returns the dict as-is since it's already serializable.
|
|
56
|
-
"""
|
|
57
|
-
return fake_model_dump(metadata)
|
|
58
|
-
|
|
59
|
-
@field_validator("metadata", mode="before")
|
|
54
|
+
@field_validator("transcripts", mode="before")
|
|
60
55
|
@classmethod
|
|
61
|
-
def
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
raise ValueError(f"metadata must be a dictionary, got {type(v).__name__}")
|
|
70
|
-
|
|
71
|
-
# Check that the metadata is JSON serializable
|
|
72
|
-
try:
|
|
73
|
-
json.dumps(fake_model_dump(cast(dict[str, Any], v)))
|
|
74
|
-
except (TypeError, ValueError) as e:
|
|
75
|
-
raise ValueError(f"metadata must be JSON-serializable: {e}")
|
|
56
|
+
def _validate_transcripts_type(cls, v: Any) -> Any:
|
|
57
|
+
if isinstance(v, dict):
|
|
58
|
+
logger.warning(
|
|
59
|
+
"dict[str, Transcript] for transcripts is deprecated. Use list[Transcript] instead."
|
|
60
|
+
)
|
|
61
|
+
v = cast(dict[str, Transcript], v)
|
|
62
|
+
return [Transcript.model_validate(t) for t in v.values()]
|
|
63
|
+
return v
|
|
76
64
|
|
|
77
|
-
|
|
65
|
+
@field_validator("transcript_groups", mode="before")
|
|
66
|
+
@classmethod
|
|
67
|
+
def _validate_transcript_groups_type(cls, v: Any) -> Any:
|
|
68
|
+
if isinstance(v, dict):
|
|
69
|
+
logger.warning(
|
|
70
|
+
"dict[str, TranscriptGroup] for transcript_groups is deprecated. Use list[TranscriptGroup] instead."
|
|
71
|
+
)
|
|
72
|
+
v = cast(dict[str, TranscriptGroup], v)
|
|
73
|
+
return [TranscriptGroup.model_validate(tg) for tg in v.values()]
|
|
74
|
+
return v
|
|
78
75
|
|
|
79
76
|
@model_validator(mode="after")
|
|
80
77
|
def _validate_transcripts_not_empty(self):
|
|
@@ -90,6 +87,52 @@ class AgentRun(BaseModel):
|
|
|
90
87
|
raise ValueError("AgentRun must have at least one transcript")
|
|
91
88
|
return self
|
|
92
89
|
|
|
90
|
+
def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
|
|
91
|
+
"""Returns a list of all fields that can be used to filter the agent run,
|
|
92
|
+
by recursively exploring the model_dump() for singleton types in dictionaries.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
list[FilterableField]: A list of filterable fields, where each field is a
|
|
96
|
+
dictionary containing its 'name' (path) and 'type'.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
result: list[FilterableField] = []
|
|
100
|
+
|
|
101
|
+
def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
|
|
102
|
+
nonlocal result
|
|
103
|
+
|
|
104
|
+
if depth > max_depth:
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
for k, v in d.items():
|
|
108
|
+
if isinstance(v, (str, int, float, bool)):
|
|
109
|
+
result.append(
|
|
110
|
+
{
|
|
111
|
+
"name": f"{prefix}.{k}",
|
|
112
|
+
"type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
elif isinstance(v, dict):
|
|
116
|
+
_explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)
|
|
117
|
+
|
|
118
|
+
# Look at the agent run metadata
|
|
119
|
+
_explore_dict(to_jsonable_python(self.metadata), "metadata", 0)
|
|
120
|
+
# Look at the transcript metadata
|
|
121
|
+
# TODO(mengk): restore this later when we have the ability to integrate with SQL.
|
|
122
|
+
# for t_id, t in self.transcripts.items():
|
|
123
|
+
# _explore_dict(
|
|
124
|
+
# t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
|
|
125
|
+
# )
|
|
126
|
+
|
|
127
|
+
# Append the text field
|
|
128
|
+
result.append({"name": "text", "type": "str"})
|
|
129
|
+
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
######################
|
|
133
|
+
# Converting to text #
|
|
134
|
+
######################
|
|
135
|
+
|
|
93
136
|
def _to_text_impl(self, token_limit: int = sys.maxsize, use_blocks: bool = False) -> list[str]:
|
|
94
137
|
"""
|
|
95
138
|
Core implementation for converting agent run to text representation.
|
|
@@ -103,7 +146,7 @@ class AgentRun(BaseModel):
|
|
|
103
146
|
"""
|
|
104
147
|
# Generate transcript strings using appropriate method
|
|
105
148
|
transcript_strs: list[str] = []
|
|
106
|
-
for i,
|
|
149
|
+
for i, t in enumerate(self.transcripts):
|
|
107
150
|
if use_blocks:
|
|
108
151
|
transcript_content = t.to_str_blocks_with_token_limit(
|
|
109
152
|
token_limit=sys.maxsize,
|
|
@@ -116,14 +159,12 @@ class AgentRun(BaseModel):
|
|
|
116
159
|
transcript_idx=i,
|
|
117
160
|
agent_run_idx=None,
|
|
118
161
|
)[0]
|
|
119
|
-
transcript_strs.append(
|
|
120
|
-
f"<transcript {t_key}>\n{transcript_content}\n</transcript {t_key}>"
|
|
121
|
-
)
|
|
162
|
+
transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
|
|
122
163
|
|
|
123
164
|
transcripts_str = "\n\n".join(transcript_strs)
|
|
124
165
|
|
|
125
166
|
# Gather metadata
|
|
126
|
-
metadata_obj =
|
|
167
|
+
metadata_obj = to_jsonable_python(self.metadata)
|
|
127
168
|
if self.name is not None:
|
|
128
169
|
metadata_obj["name"] = self.name
|
|
129
170
|
if self.description is not None:
|
|
@@ -164,12 +205,12 @@ class AgentRun(BaseModel):
|
|
|
164
205
|
assert (
|
|
165
206
|
msg_range.end == msg_range.start + 1
|
|
166
207
|
), "Ranges without metadata should be a single message"
|
|
167
|
-
|
|
208
|
+
t = self.transcripts[msg_range.start]
|
|
168
209
|
if msg_range.num_tokens < token_limit - 50:
|
|
169
210
|
if use_blocks:
|
|
170
|
-
transcript = f"<transcript
|
|
211
|
+
transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
|
|
171
212
|
else:
|
|
172
|
-
transcript = f"<transcript
|
|
213
|
+
transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
|
|
173
214
|
result = (
|
|
174
215
|
f"Here is a partial agent run for analysis purposes only:\n{transcript}"
|
|
175
216
|
)
|
|
@@ -184,7 +225,7 @@ class AgentRun(BaseModel):
|
|
|
184
225
|
token_limit=token_limit - 50,
|
|
185
226
|
)
|
|
186
227
|
for fragment in transcript_fragments:
|
|
187
|
-
result = f"<transcript
|
|
228
|
+
result = f"<transcript>\n{fragment}\n</transcript>"
|
|
188
229
|
result = (
|
|
189
230
|
f"Here is a partial agent run for analysis purposes only:\n{result}"
|
|
190
231
|
)
|
|
@@ -229,71 +270,199 @@ class AgentRun(BaseModel):
|
|
|
229
270
|
"""
|
|
230
271
|
return self._to_text_impl(token_limit=sys.maxsize, use_blocks=True)[0]
|
|
231
272
|
|
|
232
|
-
|
|
233
|
-
|
|
273
|
+
##############################
|
|
274
|
+
# New text rendering methods #
|
|
275
|
+
##############################
|
|
276
|
+
|
|
277
|
+
# Transcript ID -> Transcript
|
|
278
|
+
_transcript_dict: dict[str, Transcript] | None = PrivateAttr(default=None)
|
|
279
|
+
# Transcript Group ID -> Transcript Group
|
|
280
|
+
_transcript_group_dict: dict[str, TranscriptGroup] | None = PrivateAttr(default=None)
|
|
281
|
+
# Canonical tree cache keyed by full_tree flag
|
|
282
|
+
_canonical_tree_cache: dict[bool, dict[str | None, list[tuple[Literal["t", "tg"], str]]]] = (
|
|
283
|
+
PrivateAttr(default_factory=dict)
|
|
284
|
+
)
|
|
285
|
+
# Transcript IDs (depth-first) cache keyed by full_tree flag
|
|
286
|
+
_transcript_ids_ordered_cache: dict[bool, list[str]] = PrivateAttr(default_factory=dict)
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def transcript_dict(self) -> dict[str, Transcript]:
|
|
290
|
+
"""Lazily compute and cache a mapping from transcript ID to Transcript."""
|
|
291
|
+
if self._transcript_dict is None:
|
|
292
|
+
self._transcript_dict = {t.id: t for t in self.transcripts}
|
|
293
|
+
return self._transcript_dict
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
def transcript_group_dict(self) -> dict[str, TranscriptGroup]:
|
|
297
|
+
"""Lazily compute and cache a mapping from transcript group ID to TranscriptGroup."""
|
|
298
|
+
if self._transcript_group_dict is None:
|
|
299
|
+
self._transcript_group_dict = {tg.id: tg for tg in self.transcript_groups}
|
|
300
|
+
return self._transcript_group_dict
|
|
301
|
+
|
|
302
|
+
def get_canonical_tree(
|
|
303
|
+
self, full_tree: bool = False
|
|
304
|
+
) -> dict[str | None, list[tuple[Literal["t", "tg"], str]]]:
|
|
305
|
+
"""Compute and cache the canonical, sorted transcript group tree.
|
|
234
306
|
|
|
235
307
|
Args:
|
|
236
|
-
|
|
237
|
-
|
|
308
|
+
full_tree: If True, include all transcript groups regardless of whether
|
|
309
|
+
they contain transcripts. If False, include only the minimal tree
|
|
310
|
+
that connects relevant groups and transcripts.
|
|
238
311
|
|
|
239
312
|
Returns:
|
|
240
|
-
|
|
313
|
+
Canonical tree mapping parent group id (or "__global_root") to a list of
|
|
314
|
+
children (type, id) tuples sorted by creation time.
|
|
241
315
|
"""
|
|
242
|
-
|
|
316
|
+
if (
|
|
317
|
+
full_tree not in self._canonical_tree_cache
|
|
318
|
+
or full_tree not in self._transcript_ids_ordered_cache
|
|
319
|
+
):
|
|
320
|
+
canonical_tree, transcript_idx_map = self._build_canonical_tree(full_tree=full_tree)
|
|
321
|
+
self._canonical_tree_cache[full_tree] = canonical_tree
|
|
322
|
+
self._transcript_ids_ordered_cache[full_tree] = list(transcript_idx_map.keys())
|
|
323
|
+
return self._canonical_tree_cache[full_tree]
|
|
324
|
+
|
|
325
|
+
def get_transcript_ids_ordered(self, full_tree: bool = False) -> list[str]:
|
|
326
|
+
"""Compute and cache the depth-first transcript id ordering.
|
|
243
327
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
by recursively exploring the model_dump() for singleton types in dictionaries.
|
|
328
|
+
Args:
|
|
329
|
+
full_tree: Whether to compute based on the full tree or the minimal tree.
|
|
247
330
|
|
|
248
331
|
Returns:
|
|
249
|
-
|
|
250
|
-
dictionary containing its 'name' (path) and 'type'.
|
|
332
|
+
List of transcript ids in depth-first order.
|
|
251
333
|
"""
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
334
|
+
if (
|
|
335
|
+
full_tree not in self._transcript_ids_ordered_cache
|
|
336
|
+
or full_tree not in self._canonical_tree_cache
|
|
337
|
+
):
|
|
338
|
+
canonical_tree, transcript_idx_map = self._build_canonical_tree(full_tree=full_tree)
|
|
339
|
+
self._canonical_tree_cache[full_tree] = canonical_tree
|
|
340
|
+
self._transcript_ids_ordered_cache[full_tree] = list(transcript_idx_map.keys())
|
|
341
|
+
return self._transcript_ids_ordered_cache[full_tree]
|
|
342
|
+
|
|
343
|
+
def _build_canonical_tree(self, full_tree: bool = False):
|
|
344
|
+
t_dict = self.transcript_dict
|
|
345
|
+
tg_dict = self.transcript_group_dict
|
|
346
|
+
|
|
347
|
+
# Find all transcript groups that have direct transcript children
|
|
348
|
+
# Also keep track of transcripts that are not in a group
|
|
349
|
+
tgs_to_transcripts: dict[str, set[str]] = {}
|
|
350
|
+
for transcript in t_dict.values():
|
|
351
|
+
if transcript.transcript_group_id is None:
|
|
352
|
+
tgs_to_transcripts.setdefault("__global_root", set()).add(transcript.id)
|
|
353
|
+
else:
|
|
354
|
+
tgs_to_transcripts.setdefault(transcript.transcript_group_id, set()).add(
|
|
355
|
+
transcript.id
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# tg_tree maps from parent -> children. A child can be a group or a transcript.
|
|
359
|
+
# A parent must be a group (or None, for transcripts that are not in a group).
|
|
360
|
+
tg_tree: dict[str, set[tuple[Literal["t", "tg"], str]]] = {}
|
|
361
|
+
|
|
362
|
+
if full_tree:
|
|
363
|
+
for tg_id, tg in tg_dict.items():
|
|
364
|
+
tg_tree.setdefault(tg.parent_transcript_group_id or "__global_root", set()).add(
|
|
365
|
+
("tg", tg_id)
|
|
366
|
+
)
|
|
367
|
+
for t_id in tgs_to_transcripts.get(tg_id, []):
|
|
368
|
+
tg_tree.setdefault(tg_id, set()).add(("t", t_id))
|
|
369
|
+
for t_id, t in t_dict.items():
|
|
370
|
+
tg_tree.setdefault(t.transcript_group_id or "__global_root", set()).add(("t", t_id))
|
|
371
|
+
else:
|
|
372
|
+
# Initialize q with "important" tgs
|
|
373
|
+
q, seen = Queue[str](), set[str]()
|
|
374
|
+
for tg_id in tgs_to_transcripts.keys():
|
|
375
|
+
q.put(tg_id)
|
|
376
|
+
seen.add(tg_id)
|
|
377
|
+
|
|
378
|
+
# Do an "upwards BFS" from leaves up to the root. Builds a tree of only relevant nodes.
|
|
379
|
+
while q.qsize() > 0:
|
|
380
|
+
u_id = q.get()
|
|
381
|
+
u = tg_dict.get(u_id) # None if __global_root
|
|
382
|
+
|
|
383
|
+
# Add the transcripts under this tg
|
|
384
|
+
for t_id in tgs_to_transcripts.get(u_id, []):
|
|
385
|
+
tg_tree.setdefault(u_id, set()).add(("t", t_id))
|
|
386
|
+
|
|
387
|
+
# Add an edge from the parent
|
|
388
|
+
if u is not None:
|
|
389
|
+
par_id = u.parent_transcript_group_id or "__global_root"
|
|
390
|
+
# Mark u as a child of par
|
|
391
|
+
tg_tree.setdefault(par_id, set()).add(("tg", u_id))
|
|
392
|
+
# If we haven't investigated the parent before, add to q
|
|
393
|
+
if par_id not in seen:
|
|
394
|
+
q.put(par_id)
|
|
395
|
+
seen.add(par_id)
|
|
396
|
+
|
|
397
|
+
# For each node, sort by created_at timestamp
|
|
398
|
+
|
|
399
|
+
def _cmp(element: tuple[Literal["t", "tg"], str]) -> datetime:
|
|
400
|
+
obj_type, obj_id = element
|
|
401
|
+
if obj_type == "tg":
|
|
402
|
+
return tg_dict[obj_id].created_at or datetime.max
|
|
403
|
+
else:
|
|
404
|
+
return t_dict[obj_id].created_at or datetime.max
|
|
405
|
+
|
|
406
|
+
c_tree: dict[str | None, list[tuple[Literal["t", "tg"], str]]] = {}
|
|
407
|
+
for tg_id in tg_tree:
|
|
408
|
+
children_ids = list(set(tg_tree[tg_id]))
|
|
409
|
+
sorted_children_ids = sorted(children_ids, key=_cmp)
|
|
410
|
+
c_tree[tg_id] = sorted_children_ids
|
|
411
|
+
|
|
412
|
+
# Compute transcript indices as the depth-first traversal index
|
|
413
|
+
transcript_idx_map: dict[str, int] = {}
|
|
414
|
+
|
|
415
|
+
def _assign_transcript_indices(cur_tg_id: str, next_idx: int) -> int:
|
|
416
|
+
children = c_tree.get(cur_tg_id, [])
|
|
417
|
+
for child_type, child_id in children:
|
|
418
|
+
if child_type == "tg":
|
|
419
|
+
next_idx = _assign_transcript_indices(child_id, next_idx)
|
|
420
|
+
else:
|
|
421
|
+
transcript_idx_map[child_id] = next_idx
|
|
422
|
+
next_idx += 1
|
|
423
|
+
return next_idx
|
|
424
|
+
|
|
425
|
+
_assign_transcript_indices("__global_root", 0)
|
|
426
|
+
|
|
427
|
+
return c_tree, transcript_idx_map
|
|
428
|
+
|
|
429
|
+
def to_text_new(self, indent: int = 0, full_tree: bool = False):
|
|
430
|
+
c_tree = self.get_canonical_tree(full_tree=full_tree)
|
|
431
|
+
t_ids_ordered = self.get_transcript_ids_ordered(full_tree=full_tree)
|
|
432
|
+
t_idx_map = {t_id: i for i, t_id in enumerate(t_ids_ordered)}
|
|
433
|
+
t_dict = self.transcript_dict
|
|
434
|
+
tg_dict = self.transcript_group_dict
|
|
435
|
+
|
|
436
|
+
# Traverse the tree and render the string
|
|
437
|
+
def _recurse(tg_id: str) -> str:
|
|
438
|
+
children_ids = c_tree.get(tg_id, [])
|
|
439
|
+
children_texts: list[str] = []
|
|
440
|
+
for child_type, child_id in children_ids:
|
|
441
|
+
if child_type == "tg":
|
|
442
|
+
children_texts.append(_recurse(child_id))
|
|
443
|
+
else:
|
|
444
|
+
cur_text = t_dict[child_id].to_text_new(
|
|
445
|
+
transcript_idx=t_idx_map[child_id],
|
|
446
|
+
indent=indent,
|
|
268
447
|
)
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
# Look at the agent run metadata
|
|
273
|
-
_explore_dict(fake_model_dump(self.metadata), "metadata", 0)
|
|
274
|
-
# Look at the transcript metadata
|
|
275
|
-
# TODO(mengk): restore this later when we have the ability to integrate with SQL.
|
|
276
|
-
# for t_id, t in self.transcripts.items():
|
|
277
|
-
# _explore_dict(
|
|
278
|
-
# t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
|
|
279
|
-
# )
|
|
280
|
-
|
|
281
|
-
# Append the text field
|
|
282
|
-
result.append({"name": "text", "type": "str"})
|
|
283
|
-
|
|
284
|
-
return result
|
|
448
|
+
children_texts.append(cur_text)
|
|
449
|
+
children_text = "\n".join(children_texts)
|
|
285
450
|
|
|
451
|
+
# No wrapper for global root
|
|
452
|
+
if tg_id == "__global_root":
|
|
453
|
+
return children_text
|
|
454
|
+
# Delegate rendering to TranscriptGroup
|
|
455
|
+
else:
|
|
456
|
+
tg = tg_dict[tg_id]
|
|
457
|
+
return tg.to_text_new(children_text=children_text, indent=indent)
|
|
286
458
|
|
|
287
|
-
|
|
288
|
-
"""
|
|
289
|
-
A version of AgentRun that doesn't have the model_validator on metadata.
|
|
290
|
-
Needed for sending/receiving agent runs via JSON, since they incorrectly trip the existing model_validator.
|
|
291
|
-
"""
|
|
459
|
+
text = _recurse("__global_root")
|
|
292
460
|
|
|
293
|
-
|
|
461
|
+
# Append agent run metadata below the full content
|
|
462
|
+
yaml_text = yaml_dump_metadata(self.metadata)
|
|
463
|
+
if yaml_text is not None:
|
|
464
|
+
if indent > 0:
|
|
465
|
+
yaml_text = textwrap.indent(yaml_text, " " * indent)
|
|
466
|
+
text += f"\n<|agent run metadata|>\n{yaml_text}\n</|agent run metadata|>"
|
|
294
467
|
|
|
295
|
-
|
|
296
|
-
@classmethod
|
|
297
|
-
def _validate_metadata_type(cls, v: Any) -> Any:
|
|
298
|
-
# Bypass the model_validator
|
|
299
|
-
return v
|
|
468
|
+
return text
|
|
@@ -66,16 +66,13 @@ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) ->
|
|
|
66
66
|
return None
|
|
67
67
|
|
|
68
68
|
try:
|
|
69
|
-
|
|
70
|
-
if citation.transcript_idx >= len(transcript_keys):
|
|
69
|
+
if citation.transcript_idx >= len(agent_run.get_transcript_ids_ordered()):
|
|
71
70
|
return None
|
|
71
|
+
transcript_id = agent_run.get_transcript_ids_ordered()[citation.transcript_idx]
|
|
72
|
+
transcript = agent_run.transcript_dict[transcript_id]
|
|
72
73
|
|
|
73
|
-
transcript_key = transcript_keys[citation.transcript_idx]
|
|
74
|
-
|
|
75
|
-
transcript = agent_run.transcripts[transcript_key]
|
|
76
74
|
if citation.block_idx >= len(transcript.messages):
|
|
77
75
|
return None
|
|
78
|
-
|
|
79
76
|
message = transcript.messages[citation.block_idx]
|
|
80
77
|
|
|
81
78
|
# Use the same formatting function that generates content for LLMs
|
docent/data_models/transcript.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import sys
|
|
2
|
+
import textwrap
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from typing import Any
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
6
7
|
import yaml
|
|
7
|
-
from pydantic import BaseModel, Field, PrivateAttr,
|
|
8
|
+
from pydantic import BaseModel, Field, PrivateAttr, field_validator
|
|
9
|
+
from pydantic_core import to_jsonable_python
|
|
8
10
|
|
|
9
11
|
from docent.data_models._tiktoken_util import (
|
|
10
12
|
get_token_count,
|
|
@@ -13,12 +15,13 @@ from docent.data_models._tiktoken_util import (
|
|
|
13
15
|
)
|
|
14
16
|
from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
|
|
15
17
|
from docent.data_models.citation import RANGE_BEGIN, RANGE_END
|
|
18
|
+
from docent.data_models.yaml_util import yaml_dump_metadata
|
|
16
19
|
|
|
17
20
|
# Template for formatting individual transcript blocks
|
|
18
21
|
TRANSCRIPT_BLOCK_TEMPLATE = """
|
|
19
|
-
|
|
22
|
+
<|{index_label}; role: {role}|>
|
|
20
23
|
{content}
|
|
21
|
-
|
|
24
|
+
</|{index_label}; role: {role}|>
|
|
22
25
|
""".strip()
|
|
23
26
|
|
|
24
27
|
# Instructions for citing single transcript blocks
|
|
@@ -35,7 +38,7 @@ Important notes:
|
|
|
35
38
|
- Each pair of brackets must contain only one citation. To cite multiple blocks, use multiple pairs of brackets, like [T0B0] [T0B1].
|
|
36
39
|
"""
|
|
37
40
|
|
|
38
|
-
BLOCK_CITE_INSTRUCTION =
|
|
41
|
+
BLOCK_CITE_INSTRUCTION = """Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."""
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def format_chat_message(
|
|
@@ -94,19 +97,11 @@ class TranscriptGroup(BaseModel):
|
|
|
94
97
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
95
98
|
name: str | None = None
|
|
96
99
|
description: str | None = None
|
|
97
|
-
collection_id: str
|
|
98
100
|
agent_run_id: str
|
|
99
101
|
parent_transcript_group_id: str | None = None
|
|
100
102
|
created_at: datetime | None = None
|
|
101
103
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
102
104
|
|
|
103
|
-
@field_serializer("metadata")
|
|
104
|
-
def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
|
|
105
|
-
"""
|
|
106
|
-
Custom serializer for the metadata field so the internal fields are explicitly preserved.
|
|
107
|
-
"""
|
|
108
|
-
return fake_model_dump(metadata)
|
|
109
|
-
|
|
110
105
|
@field_validator("metadata", mode="before")
|
|
111
106
|
@classmethod
|
|
112
107
|
def _validate_metadata_type(cls, v: Any) -> Any:
|
|
@@ -114,16 +109,33 @@ class TranscriptGroup(BaseModel):
|
|
|
114
109
|
raise ValueError(f"metadata must be a dictionary, got {type(v).__name__}")
|
|
115
110
|
return v # type: ignore
|
|
116
111
|
|
|
112
|
+
def to_text_new(self, children_text: str, indent: int = 0) -> str:
|
|
113
|
+
"""Render this transcript group with its children and metadata.
|
|
117
114
|
|
|
118
|
-
|
|
119
|
-
"""
|
|
120
|
-
Emulate the action of pydantic.model_dump() for non-pydantic objects (to handle nested values)
|
|
121
|
-
"""
|
|
115
|
+
Metadata appears below the rendered children content.
|
|
122
116
|
|
|
123
|
-
|
|
124
|
-
|
|
117
|
+
Args:
|
|
118
|
+
children_text: Pre-rendered text of this group's children (groups/transcripts).
|
|
119
|
+
indent: Number of spaces to indent the rendered output.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: XML-like wrapped text including the group's metadata.
|
|
123
|
+
"""
|
|
124
|
+
# Prepare YAML metadata
|
|
125
|
+
yaml_text = yaml_dump_metadata(self.metadata)
|
|
126
|
+
if yaml_text is not None:
|
|
127
|
+
if indent > 0:
|
|
128
|
+
yaml_text = textwrap.indent(yaml_text, " " * indent)
|
|
129
|
+
inner = (
|
|
130
|
+
f"{children_text}\n<|{self.name} metadata|>\n{yaml_text}\n</|{self.name} metadata|>"
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
inner = children_text
|
|
125
134
|
|
|
126
|
-
|
|
135
|
+
# Compose final text: content first, then metadata, all inside the group wrapper
|
|
136
|
+
if indent > 0:
|
|
137
|
+
inner = textwrap.indent(inner, " " * indent)
|
|
138
|
+
return f"<|{self.name}|>\n{inner}\n</|{self.name}|>"
|
|
127
139
|
|
|
128
140
|
|
|
129
141
|
class Transcript(BaseModel):
|
|
@@ -152,13 +164,6 @@ class Transcript(BaseModel):
|
|
|
152
164
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
153
165
|
_units_of_action: list[list[int]] | None = PrivateAttr(default=None)
|
|
154
166
|
|
|
155
|
-
@field_serializer("metadata")
|
|
156
|
-
def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
|
|
157
|
-
"""
|
|
158
|
-
Custom serializer for the metadata field so the internal fields are explicitly preserved.
|
|
159
|
-
"""
|
|
160
|
-
return fake_model_dump(metadata)
|
|
161
|
-
|
|
162
167
|
@field_validator("metadata", mode="before")
|
|
163
168
|
@classmethod
|
|
164
169
|
def _validate_metadata_type(cls, v: Any) -> Any:
|
|
@@ -400,7 +405,7 @@ class Transcript(BaseModel):
|
|
|
400
405
|
blocks_str = "\n".join(blocks)
|
|
401
406
|
|
|
402
407
|
# Gather metadata
|
|
403
|
-
metadata_obj =
|
|
408
|
+
metadata_obj = to_jsonable_python(self.metadata)
|
|
404
409
|
yaml_width = float("inf")
|
|
405
410
|
block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
|
|
406
411
|
metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
|
|
@@ -484,15 +489,32 @@ class Transcript(BaseModel):
|
|
|
484
489
|
use_action_units=False,
|
|
485
490
|
)
|
|
486
491
|
|
|
492
|
+
##############################
|
|
493
|
+
# New text rendering methods #
|
|
494
|
+
##############################
|
|
487
495
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
496
|
+
def to_text_new(self, transcript_idx: int = 0, indent: int = 0) -> str:
|
|
497
|
+
# Format individual message blocks
|
|
498
|
+
blocks: list[str] = []
|
|
499
|
+
for msg_idx, message in enumerate(self.messages):
|
|
500
|
+
block_text = format_chat_message(message, msg_idx, transcript_idx)
|
|
501
|
+
blocks.append(block_text)
|
|
502
|
+
blocks_str = "\n".join(blocks)
|
|
503
|
+
if indent > 0:
|
|
504
|
+
blocks_str = textwrap.indent(blocks_str, " " * indent)
|
|
505
|
+
|
|
506
|
+
content_str = f"<|T{transcript_idx} blocks|>\n{blocks_str}\n</|T{transcript_idx} blocks|>"
|
|
507
|
+
|
|
508
|
+
# Gather metadata and add to content
|
|
509
|
+
yaml_text = yaml_dump_metadata(self.metadata)
|
|
510
|
+
if yaml_text is not None:
|
|
511
|
+
if indent > 0:
|
|
512
|
+
yaml_text = textwrap.indent(yaml_text, " " * indent)
|
|
513
|
+
content_str += (
|
|
514
|
+
f"\n<|T{transcript_idx} metadata|>\n{yaml_text}\n</|T{transcript_idx} metadata|>"
|
|
515
|
+
)
|
|
493
516
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
return v
|
|
517
|
+
# Format content and return
|
|
518
|
+
if indent > 0:
|
|
519
|
+
content_str = textwrap.indent(content_str, " " * indent)
|
|
520
|
+
return f"<|T{transcript_idx}|>\n{content_str}\n</|T{transcript_idx}|>\n"
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
from pydantic_core import to_jsonable_python
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def yaml_dump_metadata(metadata: dict[str, Any]) -> str | None:
|
|
8
|
+
if not metadata:
|
|
9
|
+
return None
|
|
10
|
+
metadata_obj = to_jsonable_python(metadata)
|
|
11
|
+
yaml_text = yaml.dump(metadata_obj, width=float("inf"))
|
|
12
|
+
return yaml_text.strip()
|
docent/loaders/load_inspect.py
CHANGED
|
@@ -6,9 +6,12 @@ from zipfile import ZipFile
|
|
|
6
6
|
from inspect_ai.log import EvalLog
|
|
7
7
|
from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
|
|
8
8
|
|
|
9
|
+
from docent._log_util.logger import get_logger
|
|
9
10
|
from docent.data_models import AgentRun, Transcript
|
|
10
11
|
from docent.data_models.chat import parse_chat_message
|
|
11
12
|
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
def _normalize_inspect_score(score: Score | dict[str, Any]) -> Any:
|
|
14
17
|
"""
|
|
@@ -83,12 +86,12 @@ def load_inspect_log(log: EvalLog) -> list[AgentRun]:
|
|
|
83
86
|
|
|
84
87
|
agent_runs.append(
|
|
85
88
|
AgentRun(
|
|
86
|
-
transcripts=
|
|
87
|
-
|
|
89
|
+
transcripts=[
|
|
90
|
+
Transcript(
|
|
88
91
|
messages=[parse_chat_message(m.model_dump()) for m in s.messages],
|
|
89
92
|
metadata={},
|
|
90
93
|
)
|
|
91
|
-
|
|
94
|
+
],
|
|
92
95
|
metadata=metadata,
|
|
93
96
|
)
|
|
94
97
|
)
|
|
@@ -120,11 +123,9 @@ def _read_sample_as_run(data: dict[str, Any], header_metadata: dict[str, Any] =
|
|
|
120
123
|
}
|
|
121
124
|
|
|
122
125
|
run = AgentRun(
|
|
123
|
-
transcripts=
|
|
124
|
-
"
|
|
125
|
-
|
|
126
|
-
),
|
|
127
|
-
},
|
|
126
|
+
transcripts=[
|
|
127
|
+
Transcript(messages=[parse_chat_message(m) for m in data["messages"]], metadata={})
|
|
128
|
+
],
|
|
128
129
|
metadata=run_metadata,
|
|
129
130
|
)
|
|
130
131
|
return run
|
|
@@ -166,8 +167,12 @@ def _runs_from_eval_file(
|
|
|
166
167
|
file: BinaryIO,
|
|
167
168
|
) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
|
|
168
169
|
zip = ZipFile(file, mode="r")
|
|
169
|
-
|
|
170
|
-
|
|
170
|
+
try:
|
|
171
|
+
header: dict[str, Any] = json.load(zip.open("header.json", "r"))
|
|
172
|
+
header_metadata = _run_metadata_from_header(header)
|
|
173
|
+
except KeyError:
|
|
174
|
+
logger.warning(f"No header found in {file.name} file")
|
|
175
|
+
header_metadata = {}
|
|
171
176
|
|
|
172
177
|
def _iter_runs() -> Generator[AgentRun, None, None]:
|
|
173
178
|
try:
|
docent/sdk/client.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
import os
|
|
3
|
+
from pathlib import Path
|
|
2
4
|
from typing import Any
|
|
3
5
|
|
|
4
6
|
import requests
|
|
7
|
+
from tqdm import tqdm
|
|
5
8
|
|
|
6
9
|
from docent._log_util.logger import get_logger
|
|
7
|
-
from docent.data_models.agent_run import AgentRun
|
|
10
|
+
from docent.data_models.agent_run import AgentRun
|
|
11
|
+
from docent.loaders import load_inspect
|
|
8
12
|
|
|
9
13
|
logger = get_logger(__name__)
|
|
10
14
|
|
|
@@ -100,49 +104,9 @@ class Docent:
|
|
|
100
104
|
)
|
|
101
105
|
return collection_id
|
|
102
106
|
|
|
103
|
-
def
|
|
104
|
-
self, collection_id: str,
|
|
105
|
-
):
|
|
106
|
-
"""Set inner and outer bin keys for a collection."""
|
|
107
|
-
response = self._session.post(
|
|
108
|
-
f"{self._server_url}/{collection_id}/set_io_bin_keys",
|
|
109
|
-
json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
|
|
110
|
-
)
|
|
111
|
-
response.raise_for_status()
|
|
112
|
-
|
|
113
|
-
def set_inner_bin_key(self, collection_id: str, dim: str):
|
|
114
|
-
"""Set the inner bin key for a collection."""
|
|
115
|
-
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
|
116
|
-
if current_io_bin_keys is None:
|
|
117
|
-
current_io_bin_keys = (None, None)
|
|
118
|
-
self.set_io_bin_keys(collection_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
|
|
119
|
-
|
|
120
|
-
def set_outer_bin_key(self, collection_id: str, dim: str):
|
|
121
|
-
"""Set the outer bin key for a collection."""
|
|
122
|
-
current_io_bin_keys = self.get_io_bin_keys(collection_id)
|
|
123
|
-
if current_io_bin_keys is None:
|
|
124
|
-
current_io_bin_keys = (None, None)
|
|
125
|
-
self.set_io_bin_keys(collection_id, current_io_bin_keys[0], dim) # Keep inner, set outer
|
|
126
|
-
|
|
127
|
-
def get_io_bin_keys(self, collection_id: str) -> tuple[str | None, str | None] | None:
|
|
128
|
-
"""Gets the current inner and outer bin keys for a Collection.
|
|
129
|
-
|
|
130
|
-
Args:
|
|
131
|
-
collection_id: ID of the Collection.
|
|
132
|
-
|
|
133
|
-
Returns:
|
|
134
|
-
tuple: (inner_bin_key | None, outer_bin_key | None)
|
|
135
|
-
|
|
136
|
-
Raises:
|
|
137
|
-
requests.exceptions.HTTPError: If the API request fails.
|
|
138
|
-
"""
|
|
139
|
-
url = f"{self._server_url}/{collection_id}/io_bin_keys"
|
|
140
|
-
response = self._session.get(url)
|
|
141
|
-
response.raise_for_status()
|
|
142
|
-
data = response.json()
|
|
143
|
-
return (data.get("inner_bin_key"), data.get("outer_bin_key"))
|
|
144
|
-
|
|
145
|
-
def add_agent_runs(self, collection_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
|
|
107
|
+
def add_agent_runs(
|
|
108
|
+
self, collection_id: str, agent_runs: list[AgentRun], batch_size: int = 1000
|
|
109
|
+
) -> dict[str, Any]:
|
|
146
110
|
"""Adds agent runs to a Collection.
|
|
147
111
|
|
|
148
112
|
Agent runs represent execution traces that can be visualized and analyzed.
|
|
@@ -161,7 +125,6 @@ class Docent:
|
|
|
161
125
|
from tqdm import tqdm
|
|
162
126
|
|
|
163
127
|
url = f"{self._server_url}/{collection_id}/agent_runs"
|
|
164
|
-
batch_size = 1000
|
|
165
128
|
total_runs = len(agent_runs)
|
|
166
129
|
|
|
167
130
|
# Process agent runs in batches
|
|
@@ -302,7 +265,7 @@ class Docent:
|
|
|
302
265
|
else:
|
|
303
266
|
# We do this to avoid metadata validation failing
|
|
304
267
|
# TODO(mengk): kinda hacky
|
|
305
|
-
return
|
|
268
|
+
return AgentRun.model_validate(response.json())
|
|
306
269
|
|
|
307
270
|
def make_collection_public(self, collection_id: str) -> dict[str, Any]:
|
|
308
271
|
"""Make a collection publicly accessible to anyone with the link.
|
|
@@ -367,3 +330,84 @@ class Docent:
|
|
|
367
330
|
response = self._session.get(url)
|
|
368
331
|
response.raise_for_status()
|
|
369
332
|
return response.json()
|
|
333
|
+
|
|
334
|
+
def recursively_ingest_inspect_logs(self, collection_id: str, fpath: str):
|
|
335
|
+
"""Recursively search directory for .eval files and ingest them as agent runs.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
collection_id: ID of the Collection to add agent runs to.
|
|
339
|
+
fpath: Path to directory to search recursively.
|
|
340
|
+
|
|
341
|
+
Raises:
|
|
342
|
+
ValueError: If the path doesn't exist or isn't a directory.
|
|
343
|
+
requests.exceptions.HTTPError: If any API requests fail.
|
|
344
|
+
"""
|
|
345
|
+
root_path = Path(fpath)
|
|
346
|
+
if not root_path.exists():
|
|
347
|
+
raise ValueError(f"Path does not exist: {fpath}")
|
|
348
|
+
if not root_path.is_dir():
|
|
349
|
+
raise ValueError(f"Path is not a directory: {fpath}")
|
|
350
|
+
|
|
351
|
+
# Find all .eval files recursively
|
|
352
|
+
eval_files = list(root_path.rglob("*.eval"))
|
|
353
|
+
|
|
354
|
+
if not eval_files:
|
|
355
|
+
logger.info(f"No .eval files found in {fpath}")
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
|
|
359
|
+
|
|
360
|
+
total_runs_added = 0
|
|
361
|
+
batch_size = 100
|
|
362
|
+
|
|
363
|
+
# Process each .eval file
|
|
364
|
+
for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
|
|
365
|
+
# Get total samples for progress tracking
|
|
366
|
+
total_samples = load_inspect.get_total_samples(eval_file, format="eval")
|
|
367
|
+
|
|
368
|
+
if total_samples == 0:
|
|
369
|
+
logger.info(f"No samples found in {eval_file}")
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
# Load runs from file
|
|
373
|
+
with open(eval_file, "rb") as f:
|
|
374
|
+
_, runs_generator = load_inspect.runs_from_file(f, format="eval")
|
|
375
|
+
|
|
376
|
+
# Process runs in batches
|
|
377
|
+
runs_from_file = 0
|
|
378
|
+
batches = itertools.batched(runs_generator, batch_size)
|
|
379
|
+
|
|
380
|
+
with tqdm(
|
|
381
|
+
total=total_samples,
|
|
382
|
+
desc=f"Processing {eval_file.name}",
|
|
383
|
+
unit="runs",
|
|
384
|
+
leave=False,
|
|
385
|
+
) as file_pbar:
|
|
386
|
+
for batch in batches:
|
|
387
|
+
batch_list = list(batch) # Convert generator batch to list
|
|
388
|
+
if not batch_list:
|
|
389
|
+
break
|
|
390
|
+
|
|
391
|
+
# Add batch to collection
|
|
392
|
+
url = f"{self._server_url}/{collection_id}/agent_runs"
|
|
393
|
+
payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
|
|
394
|
+
|
|
395
|
+
response = self._session.post(url, json=payload)
|
|
396
|
+
response.raise_for_status()
|
|
397
|
+
|
|
398
|
+
runs_from_file += len(batch_list)
|
|
399
|
+
file_pbar.update(len(batch_list))
|
|
400
|
+
|
|
401
|
+
total_runs_added += runs_from_file
|
|
402
|
+
logger.info(f"Added {runs_from_file} runs from {eval_file}")
|
|
403
|
+
|
|
404
|
+
# Compute embeddings after all files are processed
|
|
405
|
+
if total_runs_added > 0:
|
|
406
|
+
logger.info("Computing embeddings for added runs...")
|
|
407
|
+
url = f"{self._server_url}/{collection_id}/compute_embeddings"
|
|
408
|
+
response = self._session.post(url)
|
|
409
|
+
response.raise_for_status()
|
|
410
|
+
|
|
411
|
+
logger.info(
|
|
412
|
+
f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
|
|
413
|
+
)
|
docent/trace.py
CHANGED
|
@@ -197,12 +197,14 @@ class DocentTracer:
|
|
|
197
197
|
try:
|
|
198
198
|
if "http" in endpoint.lower() or "https" in endpoint.lower():
|
|
199
199
|
http_exporter: HTTPExporter = HTTPExporter(
|
|
200
|
-
endpoint=f"{endpoint}/v1/traces", headers=self.headers
|
|
200
|
+
endpoint=f"{endpoint}/v1/traces", headers=self.headers, timeout=30
|
|
201
201
|
)
|
|
202
202
|
logger.debug(f"Initialized HTTP exporter for endpoint: {endpoint}/v1/traces")
|
|
203
203
|
return http_exporter
|
|
204
204
|
else:
|
|
205
|
-
grpc_exporter: GRPCExporter = GRPCExporter(
|
|
205
|
+
grpc_exporter: GRPCExporter = GRPCExporter(
|
|
206
|
+
endpoint=endpoint, headers=self.headers, timeout=30
|
|
207
|
+
)
|
|
206
208
|
logger.debug(f"Initialized gRPC exporter for endpoint: {endpoint}")
|
|
207
209
|
return grpc_exporter
|
|
208
210
|
except Exception as e:
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docent-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.15a0
|
|
4
4
|
Summary: Docent SDK
|
|
5
5
|
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
|
6
6
|
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
|
7
7
|
Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
|
|
8
8
|
Author-email: Transluce <info@transluce.org>
|
|
9
|
-
License-Expression:
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
10
|
License-File: LICENSE.md
|
|
11
11
|
Requires-Python: >=3.11
|
|
12
|
+
Requires-Dist: backoff>=2.2.1
|
|
13
|
+
Requires-Dist: inspect-ai>=0.3.132
|
|
12
14
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
13
15
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
|
|
14
16
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
docent/__init__.py,sha256=
|
|
2
|
-
docent/agent_run_writer.py,sha256=QNCV4m36c9BuhzWCyuzs0wH9ql8uubzcQUXMhc3XVug,9135
|
|
1
|
+
docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
|
|
3
2
|
docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
docent/trace.py,sha256=
|
|
3
|
+
docent/trace.py,sha256=bCO66QFgQ9L_4XM4PXnZToMi1Crtc9n0723kNjwCdm0,66823
|
|
5
4
|
docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
|
|
6
5
|
docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
|
|
7
6
|
docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
|
|
8
7
|
docent/data_models/__init__.py,sha256=4JbTDVzRhS5VZgo8MALwd_YI17GaN7X9E3rOc4Xl7kw,327
|
|
9
8
|
docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
|
|
10
|
-
docent/data_models/agent_run.py,sha256=
|
|
9
|
+
docent/data_models/agent_run.py,sha256=bsZGL0D3HIO8oxfaeUzaUWRS82u-IiO7vs0-Lv9spks,19970
|
|
11
10
|
docent/data_models/citation.py,sha256=zpF9WuvVEfktltw1M9P3hwpg5yywizFUKF5zROBR2cY,5062
|
|
12
11
|
docent/data_models/metadata.py,sha256=r0SYC4i2x096dXMLfw_rAMtcJQCsoV6EOMPZuEngbGA,9062
|
|
13
12
|
docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
|
|
14
|
-
docent/data_models/remove_invalid_citation_ranges.py,sha256=
|
|
13
|
+
docent/data_models/remove_invalid_citation_ranges.py,sha256=U-aIzRL-SuWFQZr1MqEGqXMNyIKQs7VQLxHDoFrMJwI,5658
|
|
15
14
|
docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
|
|
16
|
-
docent/data_models/transcript.py,sha256=
|
|
15
|
+
docent/data_models/transcript.py,sha256=xA6fcGwYn8ewgqWdIgrXcq1Qbt7rByCKqDabffvCL0A,21387
|
|
16
|
+
docent/data_models/yaml_util.py,sha256=6GrPWqbTZrryZh71cnSsiqbHkWVCd-8V3-6GeiEchUg,325
|
|
17
17
|
docent/data_models/chat/__init__.py,sha256=GleyRzYqKRkwwSRm_tQJw5BudCbgu9WRSa71Fntz0L0,610
|
|
18
18
|
docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
|
|
19
19
|
docent/data_models/chat/message.py,sha256=xGt09keA6HRxw40xB_toNzEqA9ip7k53dnhXrEbKGO8,4157
|
|
20
20
|
docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
|
|
21
|
-
docent/loaders/load_inspect.py,sha256=
|
|
21
|
+
docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
|
|
22
22
|
docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
|
|
23
23
|
docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
|
|
24
24
|
docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
|
|
25
25
|
docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
|
|
26
26
|
docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
docent/sdk/
|
|
28
|
-
|
|
29
|
-
docent_python-0.1.
|
|
30
|
-
docent_python-0.1.
|
|
31
|
-
docent_python-0.1.
|
|
27
|
+
docent/sdk/agent_run_writer.py,sha256=QNCV4m36c9BuhzWCyuzs0wH9ql8uubzcQUXMhc3XVug,9135
|
|
28
|
+
docent/sdk/client.py,sha256=fuJrTF87OtUojULFY7acZuqg5xmE8F-4HgEeEV8_gq0,14781
|
|
29
|
+
docent_python-0.1.15a0.dist-info/METADATA,sha256=UP7y-vp_VE2RWb33COgh-mh0lhcJZhedCkKcsgBpUYc,1110
|
|
30
|
+
docent_python-0.1.15a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
31
|
+
docent_python-0.1.15a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
|
|
32
|
+
docent_python-0.1.15a0.dist-info/RECORD,,
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Copyright 2025 Clarity AI Research Inc., dba Transluce
|
|
2
|
+
|
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
you may not use this file except in compliance with the License.
|
|
5
|
+
You may obtain a copy of the License at
|
|
6
|
+
|
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
See the License for the specific language governing permissions and
|
|
13
|
+
limitations under the License.
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
Copyright 2025 Clarity AI Research, Inc. dba Transluce
|
|
2
|
-
|
|
3
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
-
|
|
5
|
-
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
-
|
|
7
|
-
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
File without changes
|
|
File without changes
|