docent-python 0.1.12a0__py3-none-any.whl → 0.1.14a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

docent/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
- __all__ = ["Docent"]
1
+ __all__ = ["Docent", "init"]
2
2
 
3
+ from docent.sdk.agent_run_writer import init
3
4
  from docent.sdk.client import Docent
@@ -1,5 +1,7 @@
1
- import json
2
1
  import sys
2
+ import textwrap
3
+ from datetime import datetime
4
+ from queue import Queue
3
5
  from typing import Any, Literal, TypedDict, cast
4
6
  from uuid import uuid4
5
7
 
@@ -7,18 +9,18 @@ import yaml
7
9
  from pydantic import (
8
10
  BaseModel,
9
11
  Field,
10
- field_serializer,
12
+ PrivateAttr,
11
13
  field_validator,
12
14
  model_validator,
13
15
  )
16
+ from pydantic_core import to_jsonable_python
14
17
 
18
+ from docent._log_util import get_logger
15
19
  from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
16
- from docent.data_models.transcript import (
17
- Transcript,
18
- TranscriptGroup,
19
- TranscriptWithoutMetadataValidator,
20
- fake_model_dump,
21
- )
20
+ from docent.data_models.transcript import Transcript, TranscriptGroup
21
+ from docent.data_models.yaml_util import yaml_dump_metadata
22
+
23
+ logger = get_logger(__name__)
22
24
 
23
25
 
24
26
  class FilterableField(TypedDict):
@@ -36,8 +38,8 @@ class AgentRun(BaseModel):
36
38
  id: Unique identifier for the agent run, auto-generated by default.
37
39
  name: Optional human-readable name for the agent run.
38
40
  description: Optional description of the agent run.
39
- transcripts: Dict mapping transcript IDs to Transcript objects.
40
- transcript_groups: Dict mapping transcript group IDs to TranscriptGroup objects.
41
+ transcripts: List of Transcript objects.
42
+ transcript_groups: List of TranscriptGroup objects.
41
43
  metadata: Additional structured metadata about the agent run as a JSON-serializable dictionary.
42
44
  """
43
45
 
@@ -45,36 +47,31 @@ class AgentRun(BaseModel):
45
47
  name: str | None = None
46
48
  description: str | None = None
47
49
 
48
- transcripts: dict[str, Transcript]
49
- transcript_groups: dict[str, TranscriptGroup] = Field(default_factory=dict)
50
+ transcripts: list[Transcript]
51
+ transcript_groups: list[TranscriptGroup] = Field(default_factory=list)
50
52
  metadata: dict[str, Any] = Field(default_factory=dict)
51
53
 
52
- @field_serializer("metadata")
53
- def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
54
- """
55
- Custom serializer for the metadata field - returns the dict as-is since it's already serializable.
56
- """
57
- return fake_model_dump(metadata)
58
-
59
- @field_validator("metadata", mode="before")
54
+ @field_validator("transcripts", mode="before")
60
55
  @classmethod
61
- def _validate_metadata_json_serializable(cls, v: Any) -> dict[str, Any]:
62
- """
63
- Validates that metadata is a dictionary and is JSON-serializable.
64
- """
65
- if v is None:
66
- return {}
67
-
68
- if not isinstance(v, dict):
69
- raise ValueError(f"metadata must be a dictionary, got {type(v).__name__}")
70
-
71
- # Check that the metadata is JSON serializable
72
- try:
73
- json.dumps(fake_model_dump(cast(dict[str, Any], v)))
74
- except (TypeError, ValueError) as e:
75
- raise ValueError(f"metadata must be JSON-serializable: {e}")
56
+ def _validate_transcripts_type(cls, v: Any) -> Any:
57
+ if isinstance(v, dict):
58
+ logger.warning(
59
+ "dict[str, Transcript] for transcripts is deprecated. Use list[Transcript] instead."
60
+ )
61
+ v = cast(dict[str, Transcript], v)
62
+ return [Transcript.model_validate(t) for t in v.values()]
63
+ return v
76
64
 
77
- return cast(dict[str, Any], v)
65
+ @field_validator("transcript_groups", mode="before")
66
+ @classmethod
67
+ def _validate_transcript_groups_type(cls, v: Any) -> Any:
68
+ if isinstance(v, dict):
69
+ logger.warning(
70
+ "dict[str, TranscriptGroup] for transcript_groups is deprecated. Use list[TranscriptGroup] instead."
71
+ )
72
+ v = cast(dict[str, TranscriptGroup], v)
73
+ return [TranscriptGroup.model_validate(tg) for tg in v.values()]
74
+ return v
78
75
 
79
76
  @model_validator(mode="after")
80
77
  def _validate_transcripts_not_empty(self):
@@ -90,6 +87,52 @@ class AgentRun(BaseModel):
90
87
  raise ValueError("AgentRun must have at least one transcript")
91
88
  return self
92
89
 
90
+ def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
91
+ """Returns a list of all fields that can be used to filter the agent run,
92
+ by recursively exploring the model_dump() for singleton types in dictionaries.
93
+
94
+ Returns:
95
+ list[FilterableField]: A list of filterable fields, where each field is a
96
+ dictionary containing its 'name' (path) and 'type'.
97
+ """
98
+
99
+ result: list[FilterableField] = []
100
+
101
+ def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
102
+ nonlocal result
103
+
104
+ if depth > max_depth:
105
+ return
106
+
107
+ for k, v in d.items():
108
+ if isinstance(v, (str, int, float, bool)):
109
+ result.append(
110
+ {
111
+ "name": f"{prefix}.{k}",
112
+ "type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
113
+ }
114
+ )
115
+ elif isinstance(v, dict):
116
+ _explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)
117
+
118
+ # Look at the agent run metadata
119
+ _explore_dict(to_jsonable_python(self.metadata), "metadata", 0)
120
+ # Look at the transcript metadata
121
+ # TODO(mengk): restore this later when we have the ability to integrate with SQL.
122
+ # for t_id, t in self.transcripts.items():
123
+ # _explore_dict(
124
+ # t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
125
+ # )
126
+
127
+ # Append the text field
128
+ result.append({"name": "text", "type": "str"})
129
+
130
+ return result
131
+
132
+ ######################
133
+ # Converting to text #
134
+ ######################
135
+
93
136
  def _to_text_impl(self, token_limit: int = sys.maxsize, use_blocks: bool = False) -> list[str]:
94
137
  """
95
138
  Core implementation for converting agent run to text representation.
@@ -103,7 +146,7 @@ class AgentRun(BaseModel):
103
146
  """
104
147
  # Generate transcript strings using appropriate method
105
148
  transcript_strs: list[str] = []
106
- for i, (t_key, t) in enumerate(self.transcripts.items()):
149
+ for i, t in enumerate(self.transcripts):
107
150
  if use_blocks:
108
151
  transcript_content = t.to_str_blocks_with_token_limit(
109
152
  token_limit=sys.maxsize,
@@ -116,14 +159,12 @@ class AgentRun(BaseModel):
116
159
  transcript_idx=i,
117
160
  agent_run_idx=None,
118
161
  )[0]
119
- transcript_strs.append(
120
- f"<transcript {t_key}>\n{transcript_content}\n</transcript {t_key}>"
121
- )
162
+ transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
122
163
 
123
164
  transcripts_str = "\n\n".join(transcript_strs)
124
165
 
125
166
  # Gather metadata
126
- metadata_obj = fake_model_dump(self.metadata)
167
+ metadata_obj = to_jsonable_python(self.metadata)
127
168
  if self.name is not None:
128
169
  metadata_obj["name"] = self.name
129
170
  if self.description is not None:
@@ -164,12 +205,12 @@ class AgentRun(BaseModel):
164
205
  assert (
165
206
  msg_range.end == msg_range.start + 1
166
207
  ), "Ranges without metadata should be a single message"
167
- t_id, t = list(self.transcripts.items())[msg_range.start]
208
+ t = self.transcripts[msg_range.start]
168
209
  if msg_range.num_tokens < token_limit - 50:
169
210
  if use_blocks:
170
- transcript = f"<transcript {t_id}>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript {t_id}>"
211
+ transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
171
212
  else:
172
- transcript = f"<transcript {t_id}>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript {t_id}>"
213
+ transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
173
214
  result = (
174
215
  f"Here is a partial agent run for analysis purposes only:\n{transcript}"
175
216
  )
@@ -184,7 +225,7 @@ class AgentRun(BaseModel):
184
225
  token_limit=token_limit - 50,
185
226
  )
186
227
  for fragment in transcript_fragments:
187
- result = f"<transcript {t_id}>\n{fragment}\n</transcript {t_id}>"
228
+ result = f"<transcript>\n{fragment}\n</transcript>"
188
229
  result = (
189
230
  f"Here is a partial agent run for analysis purposes only:\n{result}"
190
231
  )
@@ -229,71 +270,199 @@ class AgentRun(BaseModel):
229
270
  """
230
271
  return self._to_text_impl(token_limit=sys.maxsize, use_blocks=True)[0]
231
272
 
232
- def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
233
- """Extends the parent model_dump method to include the text property.
273
+ ##############################
274
+ # New text rendering methods #
275
+ ##############################
276
+
277
+ # Transcript ID -> Transcript
278
+ _transcript_dict: dict[str, Transcript] | None = PrivateAttr(default=None)
279
+ # Transcript Group ID -> Transcript Group
280
+ _transcript_group_dict: dict[str, TranscriptGroup] | None = PrivateAttr(default=None)
281
+ # Canonical tree cache keyed by full_tree flag
282
+ _canonical_tree_cache: dict[bool, dict[str | None, list[tuple[Literal["t", "tg"], str]]]] = (
283
+ PrivateAttr(default_factory=dict)
284
+ )
285
+ # Transcript IDs (depth-first) cache keyed by full_tree flag
286
+ _transcript_ids_ordered_cache: dict[bool, list[str]] = PrivateAttr(default_factory=dict)
287
+
288
+ @property
289
+ def transcript_dict(self) -> dict[str, Transcript]:
290
+ """Lazily compute and cache a mapping from transcript ID to Transcript."""
291
+ if self._transcript_dict is None:
292
+ self._transcript_dict = {t.id: t for t in self.transcripts}
293
+ return self._transcript_dict
294
+
295
+ @property
296
+ def transcript_group_dict(self) -> dict[str, TranscriptGroup]:
297
+ """Lazily compute and cache a mapping from transcript group ID to TranscriptGroup."""
298
+ if self._transcript_group_dict is None:
299
+ self._transcript_group_dict = {tg.id: tg for tg in self.transcript_groups}
300
+ return self._transcript_group_dict
301
+
302
+ def get_canonical_tree(
303
+ self, full_tree: bool = False
304
+ ) -> dict[str | None, list[tuple[Literal["t", "tg"], str]]]:
305
+ """Compute and cache the canonical, sorted transcript group tree.
234
306
 
235
307
  Args:
236
- *args: Variable length argument list passed to parent method.
237
- **kwargs: Arbitrary keyword arguments passed to parent method.
308
+ full_tree: If True, include all transcript groups regardless of whether
309
+ they contain transcripts. If False, include only the minimal tree
310
+ that connects relevant groups and transcripts.
238
311
 
239
312
  Returns:
240
- dict[str, Any]: Dictionary representation of the model including the text property.
313
+ Canonical tree mapping parent group id (or "__global_root") to a list of
314
+ children (type, id) tuples sorted by creation time.
241
315
  """
242
- return super().model_dump(*args, **kwargs) | {"text": self.text}
316
+ if (
317
+ full_tree not in self._canonical_tree_cache
318
+ or full_tree not in self._transcript_ids_ordered_cache
319
+ ):
320
+ canonical_tree, transcript_idx_map = self._build_canonical_tree(full_tree=full_tree)
321
+ self._canonical_tree_cache[full_tree] = canonical_tree
322
+ self._transcript_ids_ordered_cache[full_tree] = list(transcript_idx_map.keys())
323
+ return self._canonical_tree_cache[full_tree]
324
+
325
+ def get_transcript_ids_ordered(self, full_tree: bool = False) -> list[str]:
326
+ """Compute and cache the depth-first transcript id ordering.
243
327
 
244
- def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
245
- """Returns a list of all fields that can be used to filter the agent run,
246
- by recursively exploring the model_dump() for singleton types in dictionaries.
328
+ Args:
329
+ full_tree: Whether to compute based on the full tree or the minimal tree.
247
330
 
248
331
  Returns:
249
- list[FilterableField]: A list of filterable fields, where each field is a
250
- dictionary containing its 'name' (path) and 'type'.
332
+ List of transcript ids in depth-first order.
251
333
  """
252
-
253
- result: list[FilterableField] = []
254
-
255
- def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
256
- nonlocal result
257
-
258
- if depth > max_depth:
259
- return
260
-
261
- for k, v in d.items():
262
- if isinstance(v, (str, int, float, bool)):
263
- result.append(
264
- {
265
- "name": f"{prefix}.{k}",
266
- "type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
267
- }
334
+ if (
335
+ full_tree not in self._transcript_ids_ordered_cache
336
+ or full_tree not in self._canonical_tree_cache
337
+ ):
338
+ canonical_tree, transcript_idx_map = self._build_canonical_tree(full_tree=full_tree)
339
+ self._canonical_tree_cache[full_tree] = canonical_tree
340
+ self._transcript_ids_ordered_cache[full_tree] = list(transcript_idx_map.keys())
341
+ return self._transcript_ids_ordered_cache[full_tree]
342
+
343
+ def _build_canonical_tree(self, full_tree: bool = False):
344
+ t_dict = self.transcript_dict
345
+ tg_dict = self.transcript_group_dict
346
+
347
+ # Find all transcript groups that have direct transcript children
348
+ # Also keep track of transcripts that are not in a group
349
+ tgs_to_transcripts: dict[str, set[str]] = {}
350
+ for transcript in t_dict.values():
351
+ if transcript.transcript_group_id is None:
352
+ tgs_to_transcripts.setdefault("__global_root", set()).add(transcript.id)
353
+ else:
354
+ tgs_to_transcripts.setdefault(transcript.transcript_group_id, set()).add(
355
+ transcript.id
356
+ )
357
+
358
+ # tg_tree maps from parent -> children. A child can be a group or a transcript.
359
+ # A parent must be a group (or None, for transcripts that are not in a group).
360
+ tg_tree: dict[str, set[tuple[Literal["t", "tg"], str]]] = {}
361
+
362
+ if full_tree:
363
+ for tg_id, tg in tg_dict.items():
364
+ tg_tree.setdefault(tg.parent_transcript_group_id or "__global_root", set()).add(
365
+ ("tg", tg_id)
366
+ )
367
+ for t_id in tgs_to_transcripts.get(tg_id, []):
368
+ tg_tree.setdefault(tg_id, set()).add(("t", t_id))
369
+ for t_id, t in t_dict.items():
370
+ tg_tree.setdefault(t.transcript_group_id or "__global_root", set()).add(("t", t_id))
371
+ else:
372
+ # Initialize q with "important" tgs
373
+ q, seen = Queue[str](), set[str]()
374
+ for tg_id in tgs_to_transcripts.keys():
375
+ q.put(tg_id)
376
+ seen.add(tg_id)
377
+
378
+ # Do an "upwards BFS" from leaves up to the root. Builds a tree of only relevant nodes.
379
+ while q.qsize() > 0:
380
+ u_id = q.get()
381
+ u = tg_dict.get(u_id) # None if __global_root
382
+
383
+ # Add the transcripts under this tg
384
+ for t_id in tgs_to_transcripts.get(u_id, []):
385
+ tg_tree.setdefault(u_id, set()).add(("t", t_id))
386
+
387
+ # Add an edge from the parent
388
+ if u is not None:
389
+ par_id = u.parent_transcript_group_id or "__global_root"
390
+ # Mark u as a child of par
391
+ tg_tree.setdefault(par_id, set()).add(("tg", u_id))
392
+ # If we haven't investigated the parent before, add to q
393
+ if par_id not in seen:
394
+ q.put(par_id)
395
+ seen.add(par_id)
396
+
397
+ # For each node, sort by created_at timestamp
398
+
399
+ def _cmp(element: tuple[Literal["t", "tg"], str]) -> datetime:
400
+ obj_type, obj_id = element
401
+ if obj_type == "tg":
402
+ return tg_dict[obj_id].created_at or datetime.max
403
+ else:
404
+ return t_dict[obj_id].created_at or datetime.max
405
+
406
+ c_tree: dict[str | None, list[tuple[Literal["t", "tg"], str]]] = {}
407
+ for tg_id in tg_tree:
408
+ children_ids = list(set(tg_tree[tg_id]))
409
+ sorted_children_ids = sorted(children_ids, key=_cmp)
410
+ c_tree[tg_id] = sorted_children_ids
411
+
412
+ # Compute transcript indices as the depth-first traversal index
413
+ transcript_idx_map: dict[str, int] = {}
414
+
415
+ def _assign_transcript_indices(cur_tg_id: str, next_idx: int) -> int:
416
+ children = c_tree.get(cur_tg_id, [])
417
+ for child_type, child_id in children:
418
+ if child_type == "tg":
419
+ next_idx = _assign_transcript_indices(child_id, next_idx)
420
+ else:
421
+ transcript_idx_map[child_id] = next_idx
422
+ next_idx += 1
423
+ return next_idx
424
+
425
+ _assign_transcript_indices("__global_root", 0)
426
+
427
+ return c_tree, transcript_idx_map
428
+
429
+ def to_text_new(self, indent: int = 0, full_tree: bool = False):
430
+ c_tree = self.get_canonical_tree(full_tree=full_tree)
431
+ t_ids_ordered = self.get_transcript_ids_ordered(full_tree=full_tree)
432
+ t_idx_map = {t_id: i for i, t_id in enumerate(t_ids_ordered)}
433
+ t_dict = self.transcript_dict
434
+ tg_dict = self.transcript_group_dict
435
+
436
+ # Traverse the tree and render the string
437
+ def _recurse(tg_id: str) -> str:
438
+ children_ids = c_tree.get(tg_id, [])
439
+ children_texts: list[str] = []
440
+ for child_type, child_id in children_ids:
441
+ if child_type == "tg":
442
+ children_texts.append(_recurse(child_id))
443
+ else:
444
+ cur_text = t_dict[child_id].to_text_new(
445
+ transcript_idx=t_idx_map[child_id],
446
+ indent=indent,
268
447
  )
269
- elif isinstance(v, dict):
270
- _explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)
271
-
272
- # Look at the agent run metadata
273
- _explore_dict(fake_model_dump(self.metadata), "metadata", 0)
274
- # Look at the transcript metadata
275
- # TODO(mengk): restore this later when we have the ability to integrate with SQL.
276
- # for t_id, t in self.transcripts.items():
277
- # _explore_dict(
278
- # t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
279
- # )
280
-
281
- # Append the text field
282
- result.append({"name": "text", "type": "str"})
283
-
284
- return result
448
+ children_texts.append(cur_text)
449
+ children_text = "\n".join(children_texts)
285
450
 
451
+ # No wrapper for global root
452
+ if tg_id == "__global_root":
453
+ return children_text
454
+ # Delegate rendering to TranscriptGroup
455
+ else:
456
+ tg = tg_dict[tg_id]
457
+ return tg.to_text_new(children_text=children_text, indent=indent)
286
458
 
287
- class AgentRunWithoutMetadataValidator(AgentRun):
288
- """
289
- A version of AgentRun that doesn't have the model_validator on metadata.
290
- Needed for sending/receiving agent runs via JSON, since they incorrectly trip the existing model_validator.
291
- """
459
+ text = _recurse("__global_root")
292
460
 
293
- transcripts: dict[str, TranscriptWithoutMetadataValidator] # type: ignore
461
+ # Append agent run metadata below the full content
462
+ yaml_text = yaml_dump_metadata(self.metadata)
463
+ if yaml_text is not None:
464
+ if indent > 0:
465
+ yaml_text = textwrap.indent(yaml_text, " " * indent)
466
+ text += f"\n<|agent run metadata|>\n{yaml_text}\n</|agent run metadata|>"
294
467
 
295
- @field_validator("metadata", mode="before")
296
- @classmethod
297
- def _validate_metadata_type(cls, v: Any) -> Any:
298
- # Bypass the model_validator
299
- return v
468
+ return text
@@ -20,9 +20,9 @@ class ToolCall:
20
20
  """
21
21
 
22
22
  id: str
23
- type: Literal["function"] | None
24
23
  function: str
25
24
  arguments: dict[str, Any]
25
+ type: Literal["function"] | None = None
26
26
  parse_error: str | None = None
27
27
  view: ToolCallContent | None = None
28
28
 
@@ -66,16 +66,13 @@ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) ->
66
66
  return None
67
67
 
68
68
  try:
69
- transcript_keys = list(agent_run.transcripts.keys())
70
- if citation.transcript_idx >= len(transcript_keys):
69
+ if citation.transcript_idx >= len(agent_run.get_transcript_ids_ordered()):
71
70
  return None
71
+ transcript_id = agent_run.get_transcript_ids_ordered()[citation.transcript_idx]
72
+ transcript = agent_run.transcript_dict[transcript_id]
72
73
 
73
- transcript_key = transcript_keys[citation.transcript_idx]
74
-
75
- transcript = agent_run.transcripts[transcript_key]
76
74
  if citation.block_idx >= len(transcript.messages):
77
75
  return None
78
-
79
76
  message = transcript.messages[citation.block_idx]
80
77
 
81
78
  # Use the same formatting function that generates content for LLMs
@@ -1,10 +1,12 @@
1
1
  import sys
2
+ import textwrap
2
3
  from datetime import datetime
3
4
  from typing import Any
4
5
  from uuid import uuid4
5
6
 
6
7
  import yaml
7
- from pydantic import BaseModel, Field, PrivateAttr, field_serializer, field_validator
8
+ from pydantic import BaseModel, Field, PrivateAttr, field_validator
9
+ from pydantic_core import to_jsonable_python
8
10
 
9
11
  from docent.data_models._tiktoken_util import (
10
12
  get_token_count,
@@ -13,12 +15,13 @@ from docent.data_models._tiktoken_util import (
13
15
  )
14
16
  from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
15
17
  from docent.data_models.citation import RANGE_BEGIN, RANGE_END
18
+ from docent.data_models.yaml_util import yaml_dump_metadata
16
19
 
17
20
  # Template for formatting individual transcript blocks
18
21
  TRANSCRIPT_BLOCK_TEMPLATE = """
19
- <{index_label} | role: {role}>
22
+ <|{index_label}; role: {role}|>
20
23
  {content}
21
- </{index_label}>
24
+ </|{index_label}; role: {role}|>
22
25
  """.strip()
23
26
 
24
27
  # Instructions for citing single transcript blocks
@@ -35,7 +38,7 @@ Important notes:
35
38
  - Each pair of brackets must contain only one citation. To cite multiple blocks, use multiple pairs of brackets, like [T0B0] [T0B1].
36
39
  """
37
40
 
38
- BLOCK_CITE_INSTRUCTION = f"""Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."""
41
+ BLOCK_CITE_INSTRUCTION = """Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."""
39
42
 
40
43
 
41
44
  def format_chat_message(
@@ -94,19 +97,11 @@ class TranscriptGroup(BaseModel):
94
97
  id: str = Field(default_factory=lambda: str(uuid4()))
95
98
  name: str | None = None
96
99
  description: str | None = None
97
- collection_id: str
98
100
  agent_run_id: str
99
101
  parent_transcript_group_id: str | None = None
100
102
  created_at: datetime | None = None
101
103
  metadata: dict[str, Any] = Field(default_factory=dict)
102
104
 
103
- @field_serializer("metadata")
104
- def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
105
- """
106
- Custom serializer for the metadata field so the internal fields are explicitly preserved.
107
- """
108
- return fake_model_dump(metadata)
109
-
110
105
  @field_validator("metadata", mode="before")
111
106
  @classmethod
112
107
  def _validate_metadata_type(cls, v: Any) -> Any:
@@ -114,16 +109,33 @@ class TranscriptGroup(BaseModel):
114
109
  raise ValueError(f"metadata must be a dictionary, got {type(v).__name__}")
115
110
  return v # type: ignore
116
111
 
112
+ def to_text_new(self, children_text: str, indent: int = 0) -> str:
113
+ """Render this transcript group with its children and metadata.
117
114
 
118
- def fake_model_dump(obj: dict[str, Any]) -> dict[str, Any]:
119
- """
120
- Emulate the action of pydantic.model_dump() for non-pydantic objects (to handle nested values)
121
- """
115
+ Metadata appears below the rendered children content.
122
116
 
123
- class _FakeModel(BaseModel):
124
- data: dict[str, Any]
117
+ Args:
118
+ children_text: Pre-rendered text of this group's children (groups/transcripts).
119
+ indent: Number of spaces to indent the rendered output.
120
+
121
+ Returns:
122
+ str: XML-like wrapped text including the group's metadata.
123
+ """
124
+ # Prepare YAML metadata
125
+ yaml_text = yaml_dump_metadata(self.metadata)
126
+ if yaml_text is not None:
127
+ if indent > 0:
128
+ yaml_text = textwrap.indent(yaml_text, " " * indent)
129
+ inner = (
130
+ f"{children_text}\n<|{self.name} metadata|>\n{yaml_text}\n</|{self.name} metadata|>"
131
+ )
132
+ else:
133
+ inner = children_text
125
134
 
126
- return _FakeModel(data=obj).model_dump()["data"]
135
+ # Compose final text: content first, then metadata, all inside the group wrapper
136
+ if indent > 0:
137
+ inner = textwrap.indent(inner, " " * indent)
138
+ return f"<|{self.name}|>\n{inner}\n</|{self.name}|>"
127
139
 
128
140
 
129
141
  class Transcript(BaseModel):
@@ -152,13 +164,6 @@ class Transcript(BaseModel):
152
164
  metadata: dict[str, Any] = Field(default_factory=dict)
153
165
  _units_of_action: list[list[int]] | None = PrivateAttr(default=None)
154
166
 
155
- @field_serializer("metadata")
156
- def serialize_metadata(self, metadata: dict[str, Any], _info: Any) -> dict[str, Any]:
157
- """
158
- Custom serializer for the metadata field so the internal fields are explicitly preserved.
159
- """
160
- return fake_model_dump(metadata)
161
-
162
167
  @field_validator("metadata", mode="before")
163
168
  @classmethod
164
169
  def _validate_metadata_type(cls, v: Any) -> Any:
@@ -400,7 +405,7 @@ class Transcript(BaseModel):
400
405
  blocks_str = "\n".join(blocks)
401
406
 
402
407
  # Gather metadata
403
- metadata_obj = fake_model_dump(self.metadata)
408
+ metadata_obj = to_jsonable_python(self.metadata)
404
409
  yaml_width = float("inf")
405
410
  block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
406
411
  metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
@@ -484,15 +489,32 @@ class Transcript(BaseModel):
484
489
  use_action_units=False,
485
490
  )
486
491
 
492
+ ##############################
493
+ # New text rendering methods #
494
+ ##############################
487
495
 
488
- class TranscriptWithoutMetadataValidator(Transcript):
489
- """
490
- A version of Transcript that doesn't have the model_validator on metadata.
491
- Needed for sending/receiving transcripts via JSON, since they incorrectly trip the existing model_validator.
492
- """
496
+ def to_text_new(self, transcript_idx: int = 0, indent: int = 0) -> str:
497
+ # Format individual message blocks
498
+ blocks: list[str] = []
499
+ for msg_idx, message in enumerate(self.messages):
500
+ block_text = format_chat_message(message, msg_idx, transcript_idx)
501
+ blocks.append(block_text)
502
+ blocks_str = "\n".join(blocks)
503
+ if indent > 0:
504
+ blocks_str = textwrap.indent(blocks_str, " " * indent)
505
+
506
+ content_str = f"<|T{transcript_idx} blocks|>\n{blocks_str}\n</|T{transcript_idx} blocks|>"
507
+
508
+ # Gather metadata and add to content
509
+ yaml_text = yaml_dump_metadata(self.metadata)
510
+ if yaml_text is not None:
511
+ if indent > 0:
512
+ yaml_text = textwrap.indent(yaml_text, " " * indent)
513
+ content_str += (
514
+ f"\n<|T{transcript_idx} metadata|>\n{yaml_text}\n</|T{transcript_idx} metadata|>"
515
+ )
493
516
 
494
- @field_validator("metadata", mode="before")
495
- @classmethod
496
- def _validate_metadata_type(cls, v: Any) -> Any:
497
- # Bypass the model_validator
498
- return v
517
+ # Format content and return
518
+ if indent > 0:
519
+ content_str = textwrap.indent(content_str, " " * indent)
520
+ return f"<|T{transcript_idx}|>\n{content_str}\n</|T{transcript_idx}|>\n"
@@ -0,0 +1,12 @@
1
+ from typing import Any
2
+
3
+ import yaml
4
+ from pydantic_core import to_jsonable_python
5
+
6
+
7
+ def yaml_dump_metadata(metadata: dict[str, Any]) -> str | None:
8
+ if not metadata:
9
+ return None
10
+ metadata_obj = to_jsonable_python(metadata)
11
+ yaml_text = yaml.dump(metadata_obj, width=float("inf"))
12
+ return yaml_text.strip()
@@ -6,9 +6,12 @@ from zipfile import ZipFile
6
6
  from inspect_ai.log import EvalLog
7
7
  from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
8
8
 
9
+ from docent._log_util.logger import get_logger
9
10
  from docent.data_models import AgentRun, Transcript
10
11
  from docent.data_models.chat import parse_chat_message
11
12
 
13
+ logger = get_logger(__name__)
14
+
12
15
 
13
16
  def _normalize_inspect_score(score: Score | dict[str, Any]) -> Any:
14
17
  """
@@ -83,12 +86,12 @@ def load_inspect_log(log: EvalLog) -> list[AgentRun]:
83
86
 
84
87
  agent_runs.append(
85
88
  AgentRun(
86
- transcripts={
87
- "main": Transcript(
89
+ transcripts=[
90
+ Transcript(
88
91
  messages=[parse_chat_message(m.model_dump()) for m in s.messages],
89
92
  metadata={},
90
93
  )
91
- },
94
+ ],
92
95
  metadata=metadata,
93
96
  )
94
97
  )
@@ -120,11 +123,9 @@ def _read_sample_as_run(data: dict[str, Any], header_metadata: dict[str, Any] =
120
123
  }
121
124
 
122
125
  run = AgentRun(
123
- transcripts={
124
- "main": Transcript(
125
- messages=[parse_chat_message(m) for m in data["messages"]], metadata={}
126
- ),
127
- },
126
+ transcripts=[
127
+ Transcript(messages=[parse_chat_message(m) for m in data["messages"]], metadata={})
128
+ ],
128
129
  metadata=run_metadata,
129
130
  )
130
131
  return run
@@ -166,8 +167,12 @@ def _runs_from_eval_file(
166
167
  file: BinaryIO,
167
168
  ) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
168
169
  zip = ZipFile(file, mode="r")
169
- header: dict[str, Any] = json.load(zip.open("header.json", "r"))
170
- header_metadata = _run_metadata_from_header(header)
170
+ try:
171
+ header: dict[str, Any] = json.load(zip.open("header.json", "r"))
172
+ header_metadata = _run_metadata_from_header(header)
173
+ except KeyError:
174
+ logger.warning(f"No header found in {file.name} file")
175
+ header_metadata = {}
171
176
 
172
177
  def _iter_runs() -> Generator[AgentRun, None, None]:
173
178
  try:
@@ -0,0 +1,266 @@
1
+ import atexit
2
+ import os
3
+ import queue
4
+ import signal
5
+ import threading
6
+ import time
7
+ from typing import Any, Callable, Coroutine, Optional
8
+
9
+ import anyio
10
+ import backoff
11
+ import httpx
12
+ from backoff.types import Details
13
+
14
+ from docent._log_util.logger import get_logger
15
+ from docent.data_models.agent_run import AgentRun
16
+ from docent.sdk.client import Docent
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ def _giveup(exc: BaseException) -> bool:
22
+ """Give up on client errors."""
23
+
24
+ if isinstance(exc, httpx.HTTPStatusError):
25
+ status = exc.response.status_code
26
+ return status < 500 and status != 429
27
+ return False
28
+
29
+
30
+ def _print_backoff_message(e: Details):
31
+ logger.warning(
32
+ f"AgentRunWriter backing off for {e['wait']:.2f}s due to {e['exception'].__class__.__name__}" # type: ignore
33
+ )
34
+
35
+
36
+ class AgentRunWriter:
37
+ """Background thread for logging agent runs.
38
+
39
+ Args:
40
+ api_key (str): API key for the Docent API.
41
+ collection_id (str): ID of the collection to log agent runs to.
42
+ server_url (str): URL of the Docent server.
43
+ num_workers (int): Max number of concurrent tasks to run,
44
+ managed by anyio.CapacityLimiter.
45
+ queue_maxsize (int): Maximum size of the queue.
46
+ If maxsize is <= 0, the queue size is infinite.
47
+ request_timeout (float): Timeout for the HTTP request.
48
+ flush_interval (float): Interval to flush the queue.
49
+ batch_size (int): Number of agent runs to batch together.
50
+ max_retries (int): Maximum number of retries for the HTTP request.
51
+ shutdown_timeout (int): Timeout to wait for the background thread to finish
52
+ after the main thread has requested shutdown.
53
+ """
54
+
55
+ _instance: Optional["AgentRunWriter"] = None
56
+ _instance_lock = threading.Lock()
57
+
58
+ def __init__(
59
+ self,
60
+ api_key: str,
61
+ collection_id: str,
62
+ server_url: str = "https://api.docent.transluce.org",
63
+ num_workers: int = 2,
64
+ queue_maxsize: int = 20_000,
65
+ request_timeout: float = 30.0,
66
+ flush_interval: float = 1.0,
67
+ batch_size: int = 1_000,
68
+ max_retries: int = 5,
69
+ shutdown_timeout: int = 60,
70
+ ) -> None:
71
+ with self._instance_lock:
72
+ if AgentRunWriter._instance is not None:
73
+ return
74
+ AgentRunWriter._instance = self
75
+
76
+ # Request parameters
77
+ self._headers = {"Authorization": f"Bearer {api_key}"}
78
+ self._base_url = server_url.rstrip("/") + "/rest"
79
+ self._endpoint = f"{collection_id}/agent_runs"
80
+
81
+ self._num_workers = num_workers
82
+ self._request_timeout = request_timeout
83
+ self._flush_interval = flush_interval
84
+ self._batch_size = batch_size
85
+ self._max_retries = max_retries
86
+ self._shutdown_timeout = shutdown_timeout
87
+
88
+ self._queue: queue.Queue[AgentRun] = queue.Queue(maxsize=queue_maxsize)
89
+ self._cancel_event = threading.Event()
90
+
91
+ # Start background thread
92
+ self._thread = threading.Thread(
93
+ target=lambda: anyio.run(self._async_main),
94
+ name="AgentRunWriterThread",
95
+ daemon=True,
96
+ )
97
+ self._thread.start()
98
+ logger.info("AgentRunWriter thread started")
99
+
100
+ self._register_shutdown_hooks()
101
+
102
+ def _register_shutdown_hooks(self) -> None:
103
+ """Register shutdown hooks for atexit and signals."""
104
+
105
+ # Register shutdown hooks
106
+ atexit.register(self.finish)
107
+
108
+ # Register signal handlers for graceful shutdown
109
+ signal.signal(signal.SIGINT, lambda s, f: self._shutdown()) # Ctrl+C
110
+ signal.signal(signal.SIGTERM, lambda s, f: self._shutdown()) # Kill signal
111
+
112
+ def log_agent_runs(self, agent_runs: list[AgentRun]) -> None:
113
+ """Put a list of AgentRun objects into the queue.
114
+
115
+ If the queue is full, the method will block until the queue has space.
116
+
117
+ Args:
118
+ agent_runs (list[AgentRun]): List of AgentRun objects to put into the queue.
119
+ """
120
+
121
+ p_full = (
122
+ (self._queue.qsize() + len(agent_runs)) / self._queue.maxsize
123
+ if self._queue.maxsize > 0
124
+ else 0
125
+ )
126
+ if p_full >= 0.9:
127
+ logger.warning("AgentRunWriter queue is almost full (>=90%).")
128
+
129
+ for run in agent_runs:
130
+ try:
131
+ self._queue.put_nowait(run)
132
+ except queue.Full:
133
+ logger.warning("AgentRunWriter queue is full, blocking...")
134
+ self._queue.put(run, block=True)
135
+
136
+ def finish(self, force: bool = False) -> None:
137
+ """Request shutdown and wait up to timeout for pending tasks to complete.
138
+
139
+ Args:
140
+ force (bool): If True, shut down immediately. If False, wait for pending tasks to complete.
141
+ """
142
+ if not force:
143
+ # Wait for background thread to finish up to timeout
144
+ logger.info("Waiting for pending tasks to complete")
145
+
146
+ for i in range(0, self._shutdown_timeout, 5):
147
+ if not self._thread.is_alive():
148
+ break
149
+
150
+ if self._queue.empty():
151
+ break
152
+
153
+ logger.info(
154
+ f"Waiting for pending tasks to complete " f"({i}/{self._shutdown_timeout})s"
155
+ )
156
+ time.sleep(5)
157
+
158
+ self._shutdown()
159
+
160
+ def _shutdown(self) -> None:
161
+ """Shutdown the AgentRunWriter thread."""
162
+ if self._thread.is_alive():
163
+ logger.info("Cancelling pending tasks...")
164
+ self._cancel_event.set()
165
+ n_pending = self._queue.qsize()
166
+ logger.info(f"Cancelled ~{n_pending} pending tasks")
167
+
168
+ # Give a brief moment to exit
169
+ logger.info("Waiting for thread to exit...")
170
+ self._thread.join(timeout=1.0)
171
+
172
+ def get_post_batch_fcn(
173
+ self, client: httpx.AsyncClient
174
+ ) -> Callable[[list[AgentRun], anyio.CapacityLimiter], Coroutine[Any, Any, None]]:
175
+ """Return a function that will post a batch of agent runs to the API."""
176
+
177
+ @backoff.on_exception(
178
+ backoff.expo,
179
+ exception=httpx.HTTPError,
180
+ giveup=_giveup,
181
+ max_tries=self._max_retries,
182
+ on_backoff=_print_backoff_message,
183
+ )
184
+ async def _post_batch(batch: list[AgentRun], limiter: anyio.CapacityLimiter) -> None:
185
+ async with limiter:
186
+ payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
187
+ resp = await client.post(
188
+ self._endpoint, json=payload, timeout=self._request_timeout
189
+ )
190
+ resp.raise_for_status()
191
+
192
+ return _post_batch
193
+
194
+ async def _async_main(self) -> None:
195
+ """Main async function for the AgentRunWriter thread."""
196
+
197
+ limiter = anyio.CapacityLimiter(self._num_workers)
198
+
199
+ async with httpx.AsyncClient(base_url=self._base_url, headers=self._headers) as client:
200
+ async with anyio.create_task_group() as tg:
201
+ _post_batch = self.get_post_batch_fcn(client)
202
+
203
+ async def batch_loop() -> None:
204
+ while not self._cancel_event.is_set():
205
+ batch = await self._gather_next_batch_from_queue()
206
+ if not batch:
207
+ continue
208
+
209
+ tg.start_soon(_post_batch, batch, limiter)
210
+
211
+ tg.start_soon(batch_loop)
212
+
213
+ async def _gather_next_batch_from_queue(self) -> list[AgentRun]:
214
+ """Gather a batch of agent runs from the queue.
215
+
216
+ Fetches items from the queue until the batch is full or the timeout expires.
217
+ """
218
+ batch: list[AgentRun] = []
219
+ with anyio.move_on_after(self._flush_interval):
220
+ while len(batch) < self._batch_size:
221
+ try:
222
+ item = self._queue.get_nowait()
223
+ batch.append(item)
224
+ except queue.Empty:
225
+ await anyio.sleep(0.1)
226
+
227
+ return batch
228
+
229
+
230
+ def init(
231
+ collection_name: str = "Agent Run Collection",
232
+ collection_id: str | None = None,
233
+ server_url: str = "https://api.docent.transluce.org",
234
+ web_url: str = "https://docent.transluce.org",
235
+ api_key: str | None = None,
236
+ ):
237
+ """Initialize the AgentRunWriter thread.
238
+
239
+ Args:
240
+ collection_name (str): Name of the agent run collection.
241
+ collection_id (str): ID of the agent run collection.
242
+ server_url (str): URL of the Docent server.
243
+ web_url (str): URL of the Docent web UI.
244
+ api_key (str): API key for the Docent API.
245
+ """
246
+ api_key = api_key or os.getenv("DOCENT_API_KEY")
247
+
248
+ if api_key is None:
249
+ raise ValueError(
250
+ "api_key is required. Please provide an "
251
+ "api_key or set the DOCENT_API_KEY environment variable."
252
+ )
253
+
254
+ sdk = Docent(
255
+ server_url=server_url,
256
+ web_url=web_url,
257
+ api_key=api_key,
258
+ )
259
+
260
+ collection_id = collection_id or sdk.create_collection(name=collection_name)
261
+
262
+ return AgentRunWriter(
263
+ api_key=api_key,
264
+ collection_id=collection_id,
265
+ server_url=server_url,
266
+ )
docent/sdk/client.py CHANGED
@@ -1,10 +1,14 @@
1
+ import itertools
1
2
  import os
3
+ from pathlib import Path
2
4
  from typing import Any
3
5
 
4
6
  import requests
7
+ from tqdm import tqdm
5
8
 
6
9
  from docent._log_util.logger import get_logger
7
- from docent.data_models.agent_run import AgentRun, AgentRunWithoutMetadataValidator
10
+ from docent.data_models.agent_run import AgentRun
11
+ from docent.loaders import load_inspect
8
12
 
9
13
  logger = get_logger(__name__)
10
14
 
@@ -100,49 +104,9 @@ class Docent:
100
104
  )
101
105
  return collection_id
102
106
 
103
- def set_io_bin_keys(
104
- self, collection_id: str, inner_bin_key: str | None, outer_bin_key: str | None
105
- ):
106
- """Set inner and outer bin keys for a collection."""
107
- response = self._session.post(
108
- f"{self._server_url}/{collection_id}/set_io_bin_keys",
109
- json={"inner_bin_key": inner_bin_key, "outer_bin_key": outer_bin_key},
110
- )
111
- response.raise_for_status()
112
-
113
- def set_inner_bin_key(self, collection_id: str, dim: str):
114
- """Set the inner bin key for a collection."""
115
- current_io_bin_keys = self.get_io_bin_keys(collection_id)
116
- if current_io_bin_keys is None:
117
- current_io_bin_keys = (None, None)
118
- self.set_io_bin_keys(collection_id, dim, current_io_bin_keys[1]) # Set inner, keep outer
119
-
120
- def set_outer_bin_key(self, collection_id: str, dim: str):
121
- """Set the outer bin key for a collection."""
122
- current_io_bin_keys = self.get_io_bin_keys(collection_id)
123
- if current_io_bin_keys is None:
124
- current_io_bin_keys = (None, None)
125
- self.set_io_bin_keys(collection_id, current_io_bin_keys[0], dim) # Keep inner, set outer
126
-
127
- def get_io_bin_keys(self, collection_id: str) -> tuple[str | None, str | None] | None:
128
- """Gets the current inner and outer bin keys for a Collection.
129
-
130
- Args:
131
- collection_id: ID of the Collection.
132
-
133
- Returns:
134
- tuple: (inner_bin_key | None, outer_bin_key | None)
135
-
136
- Raises:
137
- requests.exceptions.HTTPError: If the API request fails.
138
- """
139
- url = f"{self._server_url}/{collection_id}/io_bin_keys"
140
- response = self._session.get(url)
141
- response.raise_for_status()
142
- data = response.json()
143
- return (data.get("inner_bin_key"), data.get("outer_bin_key"))
144
-
145
- def add_agent_runs(self, collection_id: str, agent_runs: list[AgentRun]) -> dict[str, Any]:
107
+ def add_agent_runs(
108
+ self, collection_id: str, agent_runs: list[AgentRun], batch_size: int = 1000
109
+ ) -> dict[str, Any]:
146
110
  """Adds agent runs to a Collection.
147
111
 
148
112
  Agent runs represent execution traces that can be visualized and analyzed.
@@ -161,7 +125,6 @@ class Docent:
161
125
  from tqdm import tqdm
162
126
 
163
127
  url = f"{self._server_url}/{collection_id}/agent_runs"
164
- batch_size = 1000
165
128
  total_runs = len(agent_runs)
166
129
 
167
130
  # Process agent runs in batches
@@ -302,7 +265,7 @@ class Docent:
302
265
  else:
303
266
  # We do this to avoid metadata validation failing
304
267
  # TODO(mengk): kinda hacky
305
- return AgentRunWithoutMetadataValidator.model_validate(response.json())
268
+ return AgentRun.model_validate(response.json())
306
269
 
307
270
  def make_collection_public(self, collection_id: str) -> dict[str, Any]:
308
271
  """Make a collection publicly accessible to anyone with the link.
@@ -367,3 +330,84 @@ class Docent:
367
330
  response = self._session.get(url)
368
331
  response.raise_for_status()
369
332
  return response.json()
333
+
334
+ def recursively_ingest_inspect_logs(self, collection_id: str, fpath: str):
335
+ """Recursively search directory for .eval files and ingest them as agent runs.
336
+
337
+ Args:
338
+ collection_id: ID of the Collection to add agent runs to.
339
+ fpath: Path to directory to search recursively.
340
+
341
+ Raises:
342
+ ValueError: If the path doesn't exist or isn't a directory.
343
+ requests.exceptions.HTTPError: If any API requests fail.
344
+ """
345
+ root_path = Path(fpath)
346
+ if not root_path.exists():
347
+ raise ValueError(f"Path does not exist: {fpath}")
348
+ if not root_path.is_dir():
349
+ raise ValueError(f"Path is not a directory: {fpath}")
350
+
351
+ # Find all .eval files recursively
352
+ eval_files = list(root_path.rglob("*.eval"))
353
+
354
+ if not eval_files:
355
+ logger.info(f"No .eval files found in {fpath}")
356
+ return
357
+
358
+ logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
359
+
360
+ total_runs_added = 0
361
+ batch_size = 100
362
+
363
+ # Process each .eval file
364
+ for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
365
+ # Get total samples for progress tracking
366
+ total_samples = load_inspect.get_total_samples(eval_file, format="eval")
367
+
368
+ if total_samples == 0:
369
+ logger.info(f"No samples found in {eval_file}")
370
+ continue
371
+
372
+ # Load runs from file
373
+ with open(eval_file, "rb") as f:
374
+ _, runs_generator = load_inspect.runs_from_file(f, format="eval")
375
+
376
+ # Process runs in batches
377
+ runs_from_file = 0
378
+ batches = itertools.batched(runs_generator, batch_size)
379
+
380
+ with tqdm(
381
+ total=total_samples,
382
+ desc=f"Processing {eval_file.name}",
383
+ unit="runs",
384
+ leave=False,
385
+ ) as file_pbar:
386
+ for batch in batches:
387
+ batch_list = list(batch) # Convert generator batch to list
388
+ if not batch_list:
389
+ break
390
+
391
+ # Add batch to collection
392
+ url = f"{self._server_url}/{collection_id}/agent_runs"
393
+ payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
394
+
395
+ response = self._session.post(url, json=payload)
396
+ response.raise_for_status()
397
+
398
+ runs_from_file += len(batch_list)
399
+ file_pbar.update(len(batch_list))
400
+
401
+ total_runs_added += runs_from_file
402
+ logger.info(f"Added {runs_from_file} runs from {eval_file}")
403
+
404
+ # Compute embeddings after all files are processed
405
+ if total_runs_added > 0:
406
+ logger.info("Computing embeddings for added runs...")
407
+ url = f"{self._server_url}/{collection_id}/compute_embeddings"
408
+ response = self._session.post(url)
409
+ response.raise_for_status()
410
+
411
+ logger.info(
412
+ f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
413
+ )
docent/trace.py CHANGED
@@ -197,12 +197,14 @@ class DocentTracer:
197
197
  try:
198
198
  if "http" in endpoint.lower() or "https" in endpoint.lower():
199
199
  http_exporter: HTTPExporter = HTTPExporter(
200
- endpoint=f"{endpoint}/v1/traces", headers=self.headers
200
+ endpoint=f"{endpoint}/v1/traces", headers=self.headers, timeout=30
201
201
  )
202
202
  logger.debug(f"Initialized HTTP exporter for endpoint: {endpoint}/v1/traces")
203
203
  return http_exporter
204
204
  else:
205
- grpc_exporter: GRPCExporter = GRPCExporter(endpoint=endpoint, headers=self.headers)
205
+ grpc_exporter: GRPCExporter = GRPCExporter(
206
+ endpoint=endpoint, headers=self.headers, timeout=30
207
+ )
206
208
  logger.debug(f"Initialized gRPC exporter for endpoint: {endpoint}")
207
209
  return grpc_exporter
208
210
  except Exception as e:
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.12a0
3
+ Version: 0.1.14a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
7
7
  Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
8
8
  Author-email: Transluce <info@transluce.org>
9
- License-Expression: MIT
9
+ License-Expression: Apache-2.0
10
10
  License-File: LICENSE.md
11
11
  Requires-Python: >=3.11
12
12
  Requires-Dist: opentelemetry-api>=1.34.1
@@ -1,30 +1,32 @@
1
- docent/__init__.py,sha256=J2BbO6rzilfw9WXRUeolr439EGFezqbMU_kCpCCryRA,59
1
+ docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
2
2
  docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docent/trace.py,sha256=C8oO5NuChSgyHIB5OI6qOfXWaIld7jdvlEqImK56a1E,66761
3
+ docent/trace.py,sha256=bCO66QFgQ9L_4XM4PXnZToMi1Crtc9n0723kNjwCdm0,66823
4
4
  docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
5
5
  docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
6
6
  docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
7
7
  docent/data_models/__init__.py,sha256=4JbTDVzRhS5VZgo8MALwd_YI17GaN7X9E3rOc4Xl7kw,327
8
8
  docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
9
- docent/data_models/agent_run.py,sha256=AhokdyEscrlrg0q5aKaOv26cYvkA6LvAoQsz_WBg_pM,12240
9
+ docent/data_models/agent_run.py,sha256=bsZGL0D3HIO8oxfaeUzaUWRS82u-IiO7vs0-Lv9spks,19970
10
10
  docent/data_models/citation.py,sha256=zpF9WuvVEfktltw1M9P3hwpg5yywizFUKF5zROBR2cY,5062
11
11
  docent/data_models/metadata.py,sha256=r0SYC4i2x096dXMLfw_rAMtcJQCsoV6EOMPZuEngbGA,9062
12
12
  docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
13
- docent/data_models/remove_invalid_citation_ranges.py,sha256=0cn4Xg_tgg45nZvc-sjtqLgr1rywBBrsLJ_WBKEF0pY,5673
13
+ docent/data_models/remove_invalid_citation_ranges.py,sha256=U-aIzRL-SuWFQZr1MqEGqXMNyIKQs7VQLxHDoFrMJwI,5658
14
14
  docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
15
- docent/data_models/transcript.py,sha256=Gmy4lYdlvC5SXzpnerFJ83lIMPPiYUPgjOUbwg6aWJQ,20238
15
+ docent/data_models/transcript.py,sha256=xA6fcGwYn8ewgqWdIgrXcq1Qbt7rByCKqDabffvCL0A,21387
16
+ docent/data_models/yaml_util.py,sha256=6GrPWqbTZrryZh71cnSsiqbHkWVCd-8V3-6GeiEchUg,325
16
17
  docent/data_models/chat/__init__.py,sha256=GleyRzYqKRkwwSRm_tQJw5BudCbgu9WRSa71Fntz0L0,610
17
18
  docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
18
19
  docent/data_models/chat/message.py,sha256=xGt09keA6HRxw40xB_toNzEqA9ip7k53dnhXrEbKGO8,4157
19
- docent/data_models/chat/tool.py,sha256=x7NKINswPe0Kqvcx4ubjHzB-n0-i4DbFodvaBb2vitk,3042
20
- docent/loaders/load_inspect.py,sha256=_cK2Qd6gyLQuJVzOlsvEZz7TrqzNmH6ZsLTkSCWAPqQ,6628
20
+ docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
21
+ docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
21
22
  docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
22
23
  docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
23
24
  docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
24
25
  docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
25
26
  docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- docent/sdk/client.py,sha256=rvOFXvyAr9QxCijN0_CWENbm8y3YQvR1msfFSBDZvOw,13309
27
- docent_python-0.1.12a0.dist-info/METADATA,sha256=OnxdikeOy69TZy_HGoWUxi7VDu20k8nwrikLcIPLXho,1038
28
- docent_python-0.1.12a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
29
- docent_python-0.1.12a0.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
30
- docent_python-0.1.12a0.dist-info/RECORD,,
27
+ docent/sdk/agent_run_writer.py,sha256=QNCV4m36c9BuhzWCyuzs0wH9ql8uubzcQUXMhc3XVug,9135
28
+ docent/sdk/client.py,sha256=fuJrTF87OtUojULFY7acZuqg5xmE8F-4HgEeEV8_gq0,14781
29
+ docent_python-0.1.14a0.dist-info/METADATA,sha256=OpxpQ56Pzl7Af2VTR0mi1_VpZRFj6NlEjlqcLRY5Oec,1045
30
+ docent_python-0.1.14a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
31
+ docent_python-0.1.14a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
32
+ docent_python-0.1.14a0.dist-info/RECORD,,
@@ -0,0 +1,13 @@
1
+ Copyright 2025 Clarity AI Research Inc., dba Transluce
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -1,7 +0,0 @@
1
- Copyright 2025 Clarity AI Research, Inc. dba Transluce
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
-
5
- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
-
7
- THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.