docent-python 0.1.18a0__tar.gz → 0.1.19a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (36) hide show
  1. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/__init__.py +2 -0
  3. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/agent_run.py +5 -5
  4. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/chat/__init__.py +6 -1
  5. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/citation.py +103 -22
  6. docent_python-0.1.19a0/docent/data_models/judge.py +16 -0
  7. docent_python-0.1.19a0/docent/data_models/metadata_util.py +16 -0
  8. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/remove_invalid_citation_ranges.py +23 -10
  9. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/transcript.py +18 -16
  10. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/sdk/agent_run_writer.py +18 -5
  11. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/sdk/client.py +104 -20
  12. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/trace.py +54 -49
  13. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/pyproject.toml +1 -1
  14. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/uv.lock +1 -1
  15. docent_python-0.1.18a0/docent/data_models/metadata.py +0 -229
  16. docent_python-0.1.18a0/docent/data_models/yaml_util.py +0 -12
  17. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/.gitignore +0 -0
  18. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/LICENSE.md +0 -0
  19. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/README.md +0 -0
  20. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/__init__.py +0 -0
  21. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/_log_util/__init__.py +0 -0
  22. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/_log_util/logger.py +0 -0
  23. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/_tiktoken_util.py +0 -0
  24. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/chat/content.py +0 -0
  25. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/chat/message.py +0 -0
  26. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/chat/tool.py +0 -0
  27. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/regex.py +0 -0
  28. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/shared_types.py +0 -0
  29. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/loaders/load_inspect.py +0 -0
  30. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/py.typed +0 -0
  31. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/samples/__init__.py +0 -0
  32. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/samples/load.py +0 -0
  33. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/samples/log.eval +0 -0
  34. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/samples/tb_airline.json +0 -0
  35. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/sdk/__init__.py +0 -0
  36. {docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/trace_temp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.18a0
3
+ Version: 0.1.19a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,11 +1,13 @@
1
1
  from docent.data_models.agent_run import AgentRun
2
2
  from docent.data_models.citation import Citation
3
+ from docent.data_models.judge import JudgeRunLabel
3
4
  from docent.data_models.regex import RegexSnippet
4
5
  from docent.data_models.transcript import Transcript, TranscriptGroup
5
6
 
6
7
  __all__ = [
7
8
  "AgentRun",
8
9
  "Citation",
10
+ "JudgeRunLabel",
9
11
  "RegexSnippet",
10
12
  "Transcript",
11
13
  "TranscriptGroup",
@@ -17,8 +17,8 @@ from pydantic_core import to_jsonable_python
17
17
 
18
18
  from docent._log_util import get_logger
19
19
  from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
20
+ from docent.data_models.metadata_util import dump_metadata
20
21
  from docent.data_models.transcript import Transcript, TranscriptGroup
21
- from docent.data_models.yaml_util import yaml_dump_metadata
22
22
 
23
23
  logger = get_logger(__name__)
24
24
 
@@ -446,10 +446,10 @@ class AgentRun(BaseModel):
446
446
  text = _recurse("__global_root")
447
447
 
448
448
  # Append agent run metadata below the full content
449
- yaml_text = yaml_dump_metadata(self.metadata)
450
- if yaml_text is not None:
449
+ metadata_text = dump_metadata(self.metadata)
450
+ if metadata_text is not None:
451
451
  if indent > 0:
452
- yaml_text = textwrap.indent(yaml_text, " " * indent)
453
- text += f"\n<|agent run metadata|>\n{yaml_text}\n</|agent run metadata|>"
452
+ metadata_text = textwrap.indent(metadata_text, " " * indent)
453
+ text += f"\n<|agent run metadata|>\n{metadata_text}\n</|agent run metadata|>"
454
454
 
455
455
  return text
@@ -7,7 +7,12 @@ from docent.data_models.chat.message import (
7
7
  UserMessage,
8
8
  parse_chat_message,
9
9
  )
10
- from docent.data_models.chat.tool import ToolCall, ToolCallContent, ToolInfo, ToolParams
10
+ from docent.data_models.chat.tool import (
11
+ ToolCall,
12
+ ToolCallContent,
13
+ ToolInfo,
14
+ ToolParams,
15
+ )
11
16
 
12
17
  __all__ = [
13
18
  "ChatMessage",
@@ -1,15 +1,27 @@
1
1
  import re
2
+ from dataclasses import dataclass
2
3
 
3
4
  from pydantic import BaseModel
4
5
 
5
6
 
7
+ @dataclass
8
+ class ParsedCitation:
9
+ """Represents a parsed citation before conversion to full Citation object."""
10
+
11
+ transcript_idx: int | None
12
+ block_idx: int | None
13
+ metadata_key: str | None = None
14
+ start_pattern: str | None = None
15
+
16
+
6
17
  class Citation(BaseModel):
7
18
  start_idx: int
8
19
  end_idx: int
9
20
  agent_run_idx: int | None = None
10
21
  transcript_idx: int | None = None
11
- block_idx: int
22
+ block_idx: int | None = None
12
23
  action_unit_idx: int | None = None
24
+ metadata_key: str | None = None
13
25
  start_pattern: str | None = None
14
26
 
15
27
 
@@ -17,6 +29,9 @@ RANGE_BEGIN = "<RANGE>"
17
29
  RANGE_END = "</RANGE>"
18
30
 
19
31
  _SINGLE_RE = re.compile(r"T(\d+)B(\d+)")
32
+ _METADATA_RE = re.compile(r"^M\.([^:]+)$") # [M.key]
33
+ _TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$") # [T0M.key]
34
+ _MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$") # [T0B1M.key]
20
35
  _RANGE_CONTENT_RE = re.compile(r":\s*" + re.escape(RANGE_BEGIN) + r".*?" + re.escape(RANGE_END))
21
36
 
22
37
 
@@ -70,41 +85,93 @@ def scan_brackets(text: str) -> list[tuple[int, int, str]]:
70
85
  return matches
71
86
 
72
87
 
73
- def parse_single_citation(part: str) -> tuple[int, int, str | None] | None:
88
+ def parse_single_citation(part: str) -> ParsedCitation | None:
74
89
  """
75
90
  Parse a single citation token inside a bracket and return its components.
76
91
 
77
- Returns (transcript_idx, block_idx, start_pattern) or None if invalid.
92
+ Returns ParsedCitation or None if invalid.
93
+ For metadata citations, transcript_idx may be None (for agent run metadata).
94
+ Supports optional text range for all valid citation kinds.
78
95
  """
79
96
  token = part.strip()
80
97
  if not token:
81
98
  return None
82
99
 
100
+ # Extract optional range part
101
+ start_pattern: str | None = None
102
+ citation_part = token
83
103
  if ":" in token:
84
- citation_part, range_part = token.split(":", 1)
85
- single_match = _SINGLE_RE.match(citation_part.strip())
86
- if not single_match:
104
+ left, right = token.split(":", 1)
105
+ citation_part = left.strip()
106
+ start_pattern = _extract_range_pattern(right)
107
+
108
+ # Try matches in order of specificity
109
+ # 1) Message metadata [T0B0M.key]
110
+ m = _MESSAGE_METADATA_RE.match(citation_part)
111
+ if m:
112
+ transcript_idx = int(m.group(1))
113
+ block_idx = int(m.group(2))
114
+ metadata_key = m.group(3)
115
+ # Disallow nested keys like status.code per instruction
116
+ if "." in metadata_key:
87
117
  return None
88
- transcript_idx = int(single_match.group(1))
89
- block_idx = int(single_match.group(2))
90
- start_pattern = _extract_range_pattern(range_part)
91
- return transcript_idx, block_idx, start_pattern
92
- else:
93
- single_match = _SINGLE_RE.match(token)
94
- if not single_match:
118
+ return ParsedCitation(
119
+ transcript_idx=transcript_idx,
120
+ block_idx=block_idx,
121
+ metadata_key=metadata_key,
122
+ start_pattern=start_pattern,
123
+ )
124
+
125
+ # 2) Transcript metadata [T0M.key]
126
+ m = _TRANSCRIPT_METADATA_RE.match(citation_part)
127
+ if m:
128
+ transcript_idx = int(m.group(1))
129
+ metadata_key = m.group(2)
130
+ if "." in metadata_key:
95
131
  return None
96
- transcript_idx = int(single_match.group(1))
97
- block_idx = int(single_match.group(2))
98
- return transcript_idx, block_idx, None
132
+ return ParsedCitation(
133
+ transcript_idx=transcript_idx,
134
+ block_idx=None,
135
+ metadata_key=metadata_key,
136
+ start_pattern=start_pattern,
137
+ )
138
+
139
+ # 3) Agent run metadata [M.key]
140
+ m = _METADATA_RE.match(citation_part)
141
+ if m:
142
+ metadata_key = m.group(1)
143
+ if "." in metadata_key:
144
+ return None
145
+ return ParsedCitation(
146
+ transcript_idx=None,
147
+ block_idx=None,
148
+ metadata_key=metadata_key,
149
+ start_pattern=start_pattern,
150
+ )
151
+
152
+ # 4) Regular transcript block [T0B0]
153
+ m = _SINGLE_RE.match(citation_part)
154
+ if m:
155
+ transcript_idx = int(m.group(1))
156
+ block_idx = int(m.group(2))
157
+ return ParsedCitation(
158
+ transcript_idx=transcript_idx, block_idx=block_idx, start_pattern=start_pattern
159
+ )
160
+
161
+ return None
99
162
 
100
163
 
101
164
  def parse_citations(text: str) -> tuple[str, list[Citation]]:
102
165
  """
103
- Parse citations from text in the format described by BLOCK_RANGE_CITE_INSTRUCTION.
166
+ Parse citations from text in the format described by TEXT_RANGE_CITE_INSTRUCTION.
104
167
 
105
168
  Supported formats:
106
169
  - Single block: [T<key>B<idx>]
107
170
  - Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
171
+ - Agent run metadata: [M.key]
172
+ - Transcript metadata: [T<key>M.key]
173
+ - Message metadata: [T<key>B<idx>M.key]
174
+ - Message metadata with text range: [T<key>B<idx>M.key:<RANGE>start_pattern</RANGE>]
108
175
 
109
176
  Args:
110
177
  text: The text to parse citations from
@@ -127,8 +194,21 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
127
194
  # Parse a single citation token inside the bracket
128
195
  parsed = parse_single_citation(bracket_content)
129
196
  if parsed:
130
- transcript_idx, block_idx, start_pattern = parsed
131
- replacement = f"T{transcript_idx}B{block_idx}"
197
+ # Create appropriate replacement text based on citation type
198
+ if parsed.metadata_key:
199
+ if parsed.transcript_idx is None:
200
+ # Agent run metadata [M.key]
201
+ replacement = "run metadata"
202
+ elif parsed.block_idx is None:
203
+ # Transcript metadata [T0M.key]
204
+ replacement = f"T{parsed.transcript_idx}"
205
+ else:
206
+ # Message metadata [T0B1M.key]
207
+ replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
208
+ else:
209
+ # Regular transcript block [T0B1]
210
+ replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
211
+
132
212
  # Current absolute start position for this replacement in the cleaned text
133
213
  start_idx = len(cleaned_text)
134
214
  end_idx = start_idx + len(replacement)
@@ -137,10 +217,11 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
137
217
  start_idx=start_idx,
138
218
  end_idx=end_idx,
139
219
  agent_run_idx=None,
140
- transcript_idx=transcript_idx,
141
- block_idx=block_idx,
220
+ transcript_idx=parsed.transcript_idx,
221
+ block_idx=parsed.block_idx,
142
222
  action_unit_idx=None,
143
- start_pattern=start_pattern,
223
+ metadata_key=parsed.metadata_key,
224
+ start_pattern=parsed.start_pattern,
144
225
  )
145
226
  )
146
227
  cleaned_text += replacement
@@ -0,0 +1,16 @@
1
+ """Judge-related data models shared across Docent components."""
2
+
3
+ from typing import Any
4
+ from uuid import uuid4
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class JudgeRunLabel(BaseModel):
10
+ id: str = Field(default_factory=lambda: str(uuid4()))
11
+ agent_run_id: str
12
+ rubric_id: str
13
+ label: dict[str, Any]
14
+
15
+
16
+ __all__ = ["JudgeRunLabel"]
@@ -0,0 +1,16 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from pydantic_core import to_jsonable_python
5
+
6
+
7
+ def dump_metadata(metadata: dict[str, Any]) -> str | None:
8
+ """
9
+ Dump metadata to a JSON string.
10
+ We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
11
+ """
12
+ if not metadata:
13
+ return None
14
+ metadata_obj = to_jsonable_python(metadata)
15
+ text = json.dumps(metadata_obj, indent=2)
16
+ return text.strip()
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import re
2
3
 
3
4
  from docent.data_models.agent_run import AgentRun
@@ -52,7 +53,7 @@ def find_citation_matches_in_text(text: str, start_pattern: str) -> list[tuple[i
52
53
 
53
54
  def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) -> str | None:
54
55
  """
55
- Get the text content of a specific transcript block from an AgentRun,
56
+ Get the text content of a specific transcript block (or transcript/run metadata) from an AgentRun,
56
57
  using the same formatting as shown to LLMs via format_chat_message.
57
58
 
58
59
  Args:
@@ -62,19 +63,28 @@ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) ->
62
63
  Returns:
63
64
  Text content of the specified block (including tool calls), or None if not found
64
65
  """
65
- if citation.transcript_idx is None:
66
- return None
67
-
68
66
  try:
69
- if citation.transcript_idx >= len(agent_run.get_transcript_ids_ordered()):
67
+ if citation.transcript_idx is None:
68
+ # At the run level, can only cite metadata
69
+ if citation.metadata_key is not None:
70
+ return json.dumps(agent_run.metadata.get(citation.metadata_key))
70
71
  return None
72
+
71
73
  transcript_id = agent_run.get_transcript_ids_ordered()[citation.transcript_idx]
72
74
  transcript = agent_run.transcript_dict[transcript_id]
73
75
 
74
- if citation.block_idx >= len(transcript.messages):
76
+ if citation.block_idx is None:
77
+ # At the transcript level, can only cite metadata
78
+ if citation.metadata_key is not None:
79
+ return json.dumps(transcript.metadata.get(citation.metadata_key))
75
80
  return None
81
+
76
82
  message = transcript.messages[citation.block_idx]
77
83
 
84
+ # At the message level, can cite metadata or content
85
+ if citation.metadata_key is not None:
86
+ return json.dumps(message.metadata.get(citation.metadata_key))
87
+
78
88
  # Use the same formatting function that generates content for LLMs
79
89
  # This ensures consistent formatting between citation validation and LLM serialization
80
90
  return format_chat_message(
@@ -99,6 +109,9 @@ def validate_citation_text_range(agent_run: AgentRun, citation: Citation) -> boo
99
109
  if not citation.start_pattern:
100
110
  # Nothing to validate
101
111
  return True
112
+ if citation.metadata_key is not None:
113
+ # We don't need to remove invalid metadata citation ranges
114
+ return True
102
115
 
103
116
  text = get_transcript_text_for_citation(agent_run, citation)
104
117
  if text is None:
@@ -130,16 +143,16 @@ def remove_invalid_citation_ranges(text: str, agent_run: AgentRun) -> str:
130
143
  # Parse this bracket content to get citation info
131
144
  parsed = parse_single_citation(bracket_content)
132
145
  if parsed:
133
- transcript_idx, block_idx, start_pattern = parsed
134
146
  # The citation spans from start to end in the original text
135
147
  citation = Citation(
136
148
  start_idx=start,
137
149
  end_idx=end,
138
150
  agent_run_idx=None,
139
- transcript_idx=transcript_idx,
140
- block_idx=block_idx,
151
+ transcript_idx=parsed.transcript_idx,
152
+ block_idx=parsed.block_idx,
141
153
  action_unit_idx=None,
142
- start_pattern=start_pattern,
154
+ metadata_key=parsed.metadata_key,
155
+ start_pattern=parsed.start_pattern,
143
156
  )
144
157
  citations.append(citation)
145
158
 
@@ -15,7 +15,7 @@ from docent.data_models._tiktoken_util import (
15
15
  )
16
16
  from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
17
17
  from docent.data_models.citation import RANGE_BEGIN, RANGE_END
18
- from docent.data_models.yaml_util import yaml_dump_metadata
18
+ from docent.data_models.metadata_util import dump_metadata
19
19
 
20
20
  # Template for formatting individual transcript blocks
21
21
  TRANSCRIPT_BLOCK_TEMPLATE = """
@@ -29,6 +29,12 @@ TEXT_RANGE_CITE_INSTRUCTION = f"""Anytime you quote the transcript, or refer to
29
29
 
30
30
  A citation may include a specific range of text within a block. Use {RANGE_BEGIN} and {RANGE_END} to mark the specific range of text. Add it after the block ID separated by a colon. For example, to cite the part of transcript 0, block 1, where the agent says "I understand the task", write [T0B1:{RANGE_BEGIN}I understand the task{RANGE_END}]. Citations must follow this exact format. The markers {RANGE_BEGIN} and {RANGE_END} must be used ONLY inside the brackets of a citation.
31
31
 
32
+ - You may cite a top-level key in the agent run metadata like this: [M.task_description].
33
+ - You may cite a top-level key in transcript metadata. For example, for transcript 0: [T0M.start_time].
34
+ - You may cite a top-level key in message metadata for a block. For example, for transcript 0, block 1: [T0B1M.status].
35
+ - You may not cite nested keys. For example, [T0B1M.status.code] is invalid.
36
+ - Within a top-level metadata key you may cite a range of text that appears in the value. For example, [T0B1M.status:{RANGE_BEGIN}"running":false{RANGE_END}].
37
+
32
38
  Important notes:
33
39
  - You must include the full content of the text range {RANGE_BEGIN} and {RANGE_END}, EXACTLY as it appears in the transcript, word-for-word, including any markers or punctuation that appear in the middle of the text.
34
40
  - Citations must be as specific as possible. This means you should usually cite a specific text range within a block.
@@ -73,9 +79,9 @@ def format_chat_message(
73
79
  cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
74
80
 
75
81
  if message.metadata:
76
- metadata_yaml = yaml_dump_metadata(message.metadata)
77
- if metadata_yaml is not None:
78
- cur_content += f"\n<|message metadata|>\n{metadata_yaml}\n</|message metadata|>"
82
+ metadata_text = dump_metadata(message.metadata)
83
+ if metadata_text is not None:
84
+ cur_content += f"\n<|message metadata|>\n{metadata_text}\n</|message metadata|>"
79
85
 
80
86
  return TRANSCRIPT_BLOCK_TEMPLATE.format(
81
87
  index_label=index_label, role=message.role, content=cur_content
@@ -127,13 +133,11 @@ class TranscriptGroup(BaseModel):
127
133
  str: XML-like wrapped text including the group's metadata.
128
134
  """
129
135
  # Prepare YAML metadata
130
- yaml_text = yaml_dump_metadata(self.metadata)
131
- if yaml_text is not None:
136
+ metadata_text = dump_metadata(self.metadata)
137
+ if metadata_text is not None:
132
138
  if indent > 0:
133
- yaml_text = textwrap.indent(yaml_text, " " * indent)
134
- inner = (
135
- f"{children_text}\n<|{self.name} metadata|>\n{yaml_text}\n</|{self.name} metadata|>"
136
- )
139
+ metadata_text = textwrap.indent(metadata_text, " " * indent)
140
+ inner = f"{children_text}\n<|{self.name} metadata|>\n{metadata_text}\n</|{self.name} metadata|>"
137
141
  else:
138
142
  inner = children_text
139
143
 
@@ -447,13 +451,11 @@ class Transcript(BaseModel):
447
451
  content_str = f"<|T{transcript_idx} blocks|>\n{blocks_str}\n</|T{transcript_idx} blocks|>"
448
452
 
449
453
  # Gather metadata and add to content
450
- yaml_text = yaml_dump_metadata(self.metadata)
451
- if yaml_text is not None:
454
+ metadata_text = dump_metadata(self.metadata)
455
+ if metadata_text is not None:
452
456
  if indent > 0:
453
- yaml_text = textwrap.indent(yaml_text, " " * indent)
454
- content_str += (
455
- f"\n<|T{transcript_idx} metadata|>\n{yaml_text}\n</|T{transcript_idx} metadata|>"
456
- )
457
+ metadata_text = textwrap.indent(metadata_text, " " * indent)
458
+ content_str += f"\n<|T{transcript_idx} metadata|>\n{metadata_text}\n</|T{transcript_idx} metadata|>"
457
459
 
458
460
  # Format content and return
459
461
  if indent > 0:
@@ -4,11 +4,12 @@ import queue
4
4
  import signal
5
5
  import threading
6
6
  import time
7
- from typing import Any, Callable, Coroutine, Optional
7
+ from typing import Any, AsyncGenerator, Callable, Coroutine, Optional
8
8
 
9
9
  import anyio
10
10
  import backoff
11
11
  import httpx
12
+ import orjson
12
13
  from backoff.types import Details
13
14
 
14
15
  from docent._log_util.logger import get_logger
@@ -38,6 +39,15 @@ def _print_backoff_message(e: Details):
38
39
  )
39
40
 
40
41
 
42
+ async def _generate_payload_chunks(runs: list[AgentRun]) -> AsyncGenerator[bytes, None]:
43
+ yield b'{"agent_runs": ['
44
+ for i, ar in enumerate(runs):
45
+ if i > 0:
46
+ yield b","
47
+ yield orjson.dumps(ar.model_dump(mode="json"))
48
+ yield b"]}"
49
+
50
+
41
51
  class AgentRunWriter:
42
52
  """Background thread for logging agent runs.
43
53
 
@@ -175,7 +185,7 @@ class AgentRunWriter:
175
185
  logger.info("Cancelling pending tasks...")
176
186
  self._cancel_event.set()
177
187
  n_pending = self._queue.qsize()
178
- logger.info(f"Cancelled ~{n_pending} pending tasks")
188
+ logger.info(f"Cancelled ~{n_pending} pending runs")
179
189
 
180
190
  # Give a brief moment to exit
181
191
  logger.info("Waiting for thread to exit...")
@@ -194,8 +204,11 @@ class AgentRunWriter:
194
204
  on_backoff=_print_backoff_message,
195
205
  )
196
206
  async def _post_batch(batch: list[AgentRun]) -> None:
197
- payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
198
- resp = await client.post(self._endpoint, json=payload, timeout=self._request_timeout)
207
+ resp = await client.post(
208
+ self._endpoint,
209
+ content=_generate_payload_chunks(batch),
210
+ timeout=self._request_timeout,
211
+ )
199
212
  resp.raise_for_status()
200
213
 
201
214
  return _post_batch
@@ -246,7 +259,7 @@ def init(
246
259
  web_url: str = "https://docent.transluce.org",
247
260
  api_key: str | None = None,
248
261
  # Writer arguments
249
- num_workers: int = 2,
262
+ num_workers: int = 4,
250
263
  queue_maxsize: int = 20_000,
251
264
  request_timeout: float = 30.0,
252
265
  flush_interval: float = 1.0,
@@ -8,6 +8,7 @@ from tqdm import tqdm
8
8
 
9
9
  from docent._log_util.logger import get_logger
10
10
  from docent.data_models.agent_run import AgentRun
11
+ from docent.data_models.judge import JudgeRunLabel
11
12
  from docent.loaders import load_inspect
12
13
 
13
14
  logger = get_logger(__name__)
@@ -48,13 +49,18 @@ class Docent:
48
49
 
49
50
  self._login(api_key)
50
51
 
52
+ def _handle_response_errors(self, response: requests.Response):
53
+ """Handle API response and raise informative errors.
54
+ TODO: make this more informative."""
55
+ response.raise_for_status()
56
+
51
57
  def _login(self, api_key: str):
52
58
  """Login with email/password to establish session."""
53
59
  self._session.headers.update({"Authorization": f"Bearer {api_key}"})
54
60
 
55
61
  url = f"{self._server_url}/api-keys/test"
56
62
  response = self._session.get(url)
57
- response.raise_for_status()
63
+ self._handle_response_errors(response)
58
64
 
59
65
  logger.info("Logged in with API key")
60
66
  return
@@ -90,7 +96,7 @@ class Docent:
90
96
  }
91
97
 
92
98
  response = self._session.post(url, json=payload)
93
- response.raise_for_status()
99
+ self._handle_response_errors(response)
94
100
 
95
101
  response_data = response.json()
96
102
  collection_id = response_data.get("collection_id")
@@ -134,13 +140,13 @@ class Docent:
134
140
  payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
135
141
 
136
142
  response = self._session.post(url, json=payload)
137
- response.raise_for_status()
143
+ self._handle_response_errors(response)
138
144
 
139
145
  pbar.update(len(batch))
140
146
 
141
147
  url = f"{self._server_url}/{collection_id}/compute_embeddings"
142
148
  response = self._session.post(url)
143
- response.raise_for_status()
149
+ self._handle_response_errors(response)
144
150
 
145
151
  logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
146
152
  return {"status": "success", "total_runs_added": total_runs}
@@ -156,7 +162,7 @@ class Docent:
156
162
  """
157
163
  url = f"{self._server_url}/collections"
158
164
  response = self._session.get(url)
159
- response.raise_for_status()
165
+ self._handle_response_errors(response)
160
166
  return response.json()
161
167
 
162
168
  def list_rubrics(self, collection_id: str) -> list[dict[str, Any]]:
@@ -173,7 +179,7 @@ class Docent:
173
179
  """
174
180
  url = f"{self._server_url}/rubric/{collection_id}/rubrics"
175
181
  response = self._session.get(url)
176
- response.raise_for_status()
182
+ self._handle_response_errors(response)
177
183
  return response.json()
178
184
 
179
185
  def get_rubric_run_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
@@ -191,7 +197,7 @@ class Docent:
191
197
  """
192
198
  url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
193
199
  response = self._session.get(url)
194
- response.raise_for_status()
200
+ self._handle_response_errors(response)
195
201
  return response.json()
196
202
 
197
203
  def get_clustering_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
@@ -209,7 +215,7 @@ class Docent:
209
215
  """
210
216
  url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/clustering_job"
211
217
  response = self._session.get(url)
212
- response.raise_for_status()
218
+ self._handle_response_errors(response)
213
219
  return response.json()
214
220
 
215
221
  def get_cluster_centroids(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
@@ -244,6 +250,90 @@ class Docent:
244
250
  clustering_state = self.get_clustering_state(collection_id, rubric_id)
245
251
  return clustering_state.get("assignments", {})
246
252
 
253
+ def add_label(
254
+ self,
255
+ collection_id: str,
256
+ rubric_id: str,
257
+ label: JudgeRunLabel,
258
+ ) -> dict[str, Any]:
259
+ """Attach a manual label to an agent run for a rubric.
260
+
261
+ Args:
262
+ collection_id: ID of the Collection that owns the rubric.
263
+ rubric_id: ID of the rubric the label applies to.
264
+ label: A `JudgeRunLabel` that must comply with the rubric's output schema.
265
+
266
+ Returns:
267
+ dict: API response containing a status message.
268
+
269
+ Raises:
270
+ ValueError: If the label does not target the rubric specified in the path.
271
+ requests.exceptions.HTTPError: If the API request fails or validation errors occur.
272
+ """
273
+ if label.rubric_id != rubric_id:
274
+ raise ValueError("Label rubric_id must match the rubric_id argument")
275
+
276
+ url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/label"
277
+ payload = {"label": label.model_dump(mode="json")}
278
+ response = self._session.post(url, json=payload)
279
+ self._handle_response_errors(response)
280
+ return response.json()
281
+
282
+ def add_labels(
283
+ self,
284
+ collection_id: str,
285
+ rubric_id: str,
286
+ labels: list[JudgeRunLabel],
287
+ ) -> dict[str, Any]:
288
+ """Attach multiple manual labels to a rubric.
289
+
290
+ Args:
291
+ collection_id: ID of the Collection that owns the rubric.
292
+ rubric_id: ID of the rubric the labels apply to.
293
+ labels: List of `JudgeRunLabel` objects.
294
+
295
+ Returns:
296
+ dict: API response containing status information.
297
+
298
+ Raises:
299
+ ValueError: If no labels are provided.
300
+ ValueError: If any label targets a different rubric.
301
+ requests.exceptions.HTTPError: If the API request fails.
302
+ """
303
+ if not labels:
304
+ raise ValueError("labels must contain at least one entry")
305
+
306
+ rubric_ids = {label.rubric_id for label in labels}
307
+ if rubric_ids != {rubric_id}:
308
+ raise ValueError(
309
+ "All labels must specify the same rubric_id that is provided to add_labels"
310
+ )
311
+
312
+ payload = {"labels": [l.model_dump(mode="json") for l in labels]}
313
+
314
+ url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/labels"
315
+ response = self._session.post(url, json=payload)
316
+ self._handle_response_errors(response)
317
+ return response.json()
318
+
319
+ def get_labels(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
320
+ """Retrieve all manual labels for a rubric.
321
+
322
+ Args:
323
+ collection_id: ID of the Collection that owns the rubric.
324
+ rubric_id: ID of the rubric to fetch labels for.
325
+
326
+ Returns:
327
+ list: List of label dictionaries. Each includes agent_run_id and label content.
328
+
329
+ Raises:
330
+ requests.exceptions.HTTPError: If the API request fails.
331
+ """
332
+ url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/labels"
333
+ response = self._session.get(url)
334
+ self._handle_response_errors(response)
335
+ return response.json()
336
+
247
337
  def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
248
338
  """Get a specific agent run by its ID.
249
339
 
@@ -259,7 +349,7 @@ class Docent:
259
349
  """
260
350
  url = f"{self._server_url}/{collection_id}/agent_run"
261
351
  response = self._session.get(url, params={"agent_run_id": agent_run_id})
262
- response.raise_for_status()
352
+ self._handle_response_errors(response)
263
353
  if response.json() is None:
264
354
  return None
265
355
  else:
@@ -281,7 +371,7 @@ class Docent:
281
371
  """
282
372
  url = f"{self._server_url}/{collection_id}/make_public"
283
373
  response = self._session.post(url)
284
- response.raise_for_status()
374
+ self._handle_response_errors(response)
285
375
 
286
376
  logger.info(f"Successfully made Collection '{collection_id}' public")
287
377
  return response.json()
@@ -303,13 +393,7 @@ class Docent:
303
393
  payload = {"email": email}
304
394
  response = self._session.post(url, json=payload)
305
395
 
306
- try:
307
- response.raise_for_status()
308
- except requests.exceptions.HTTPError:
309
- if response.status_code == 404:
310
- raise ValueError(f"The user you are trying to share with ({email}) does not exist.")
311
- else:
312
- raise # Re-raise the original exception
396
+ self._handle_response_errors(response)
313
397
 
314
398
  logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
315
399
  return response.json()
@@ -328,7 +412,7 @@ class Docent:
328
412
  """
329
413
  url = f"{self._server_url}/{collection_id}/agent_run_ids"
330
414
  response = self._session.get(url)
331
- response.raise_for_status()
415
+ self._handle_response_errors(response)
332
416
  return response.json()
333
417
 
334
418
  def recursively_ingest_inspect_logs(self, collection_id: str, fpath: str):
@@ -393,7 +477,7 @@ class Docent:
393
477
  payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
394
478
 
395
479
  response = self._session.post(url, json=payload)
396
- response.raise_for_status()
480
+ self._handle_response_errors(response)
397
481
 
398
482
  runs_from_file += len(batch_list)
399
483
  file_pbar.update(len(batch_list))
@@ -406,7 +490,7 @@ class Docent:
406
490
  logger.info("Computing embeddings for added runs...")
407
491
  url = f"{self._server_url}/{collection_id}/compute_embeddings"
408
492
  response = self._session.post(url)
409
- response.raise_for_status()
493
+ self._handle_response_errors(response)
410
494
 
411
495
  logger.info(
412
496
  f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
@@ -21,7 +21,7 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExport
21
21
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
22
22
  from opentelemetry.instrumentation.threading import ThreadingInstrumentor
23
23
  from opentelemetry.sdk.resources import Resource
24
- from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
24
+ from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
25
25
  from opentelemetry.sdk.trace.export import (
26
26
  BatchSpanProcessor,
27
27
  ConsoleSpanExporter,
@@ -29,20 +29,13 @@ from opentelemetry.sdk.trace.export import (
29
29
  )
30
30
  from opentelemetry.trace import Span
31
31
 
32
- # Configure logging
33
32
  logger = logging.getLogger(__name__)
34
- logger.setLevel(logging.ERROR)
35
33
 
36
34
  # Default configuration
37
35
  DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
38
36
  DEFAULT_COLLECTION_NAME = "default-collection-name"
39
37
 
40
38
 
41
- def _is_tracing_disabled() -> bool:
42
- """Check if tracing is disabled via environment variable."""
43
- return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
44
-
45
-
46
39
  class Instruments(Enum):
47
40
  """Enumeration of available instrument types."""
48
41
 
@@ -52,16 +45,10 @@ class Instruments(Enum):
52
45
  LANGCHAIN = "langchain"
53
46
 
54
47
 
55
- def _is_notebook() -> bool:
56
- """Check if we're running in a Jupyter notebook."""
57
- try:
58
- return "ipykernel" in sys.modules
59
- except Exception:
60
- return False
61
-
62
-
63
48
  class DocentTracer:
64
- """Manages Docent tracing setup and provides tracing utilities."""
49
+ """
50
+ Manages Docent tracing setup and provides tracing utilities.
51
+ """
65
52
 
66
53
  def __init__(
67
54
  self,
@@ -77,22 +64,6 @@ class DocentTracer:
77
64
  instruments: Optional[Set[Instruments]] = None,
78
65
  block_instruments: Optional[Set[Instruments]] = None,
79
66
  ):
80
- """
81
- Initialize Docent tracing manager.
82
-
83
- Args:
84
- collection_name: Name of the collection for resource attributes
85
- collection_id: Optional collection ID (auto-generated if not provided)
86
- agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
87
- endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
88
- headers: Optional headers for authentication
89
- api_key: Optional API key for bearer token authentication (takes precedence over env var)
90
- enable_console_export: Whether to export to console
91
- enable_otlp_export: Whether to export to OTLP endpoint
92
- disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
93
- instruments: Set of instruments to enable (None = all instruments)
94
- block_instruments: Set of instruments to explicitly disable
95
- """
96
67
  self._initialized: bool = False
97
68
  # Check if tracing is disabled via environment variable
98
69
  if _is_tracing_disabled():
@@ -163,8 +134,12 @@ class DocentTracer:
163
134
  """
164
135
  Get the current agent run ID from context.
165
136
 
137
+ Retrieves the agent run ID that was set in the current execution context.
138
+ If no agent run context is active, returns the default agent run ID.
139
+
166
140
  Returns:
167
- The current agent run ID if available, None otherwise
141
+ The current agent run ID if available, or the default agent run ID
142
+ if no context is active.
168
143
  """
169
144
  try:
170
145
  return self._agent_run_id_var.get()
@@ -249,12 +224,23 @@ class DocentTracer:
249
224
  return
250
225
 
251
226
  try:
227
+
228
+ # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
229
+ default_attribute_limit = 1024
230
+ env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
231
+ env_limit = int(env_value) if env_value.isdigit() else 0
232
+ attribute_limit = max(env_limit, default_attribute_limit)
233
+
234
+ span_limits = SpanLimits(
235
+ max_attributes=attribute_limit,
236
+ )
237
+
252
238
  # Create our own isolated tracer provider
253
239
  self._tracer_provider = TracerProvider(
254
- resource=Resource.create({"service.name": self.collection_name})
240
+ resource=Resource.create({"service.name": self.collection_name}),
241
+ span_limits=span_limits,
255
242
  )
256
243
 
257
- # Add custom span processor for agent_run_id and transcript_id
258
244
  class ContextSpanProcessor(SpanProcessor):
259
245
  def __init__(self, manager: "DocentTracer"):
260
246
  self.manager: "DocentTracer" = manager
@@ -312,11 +298,7 @@ class DocentTracer:
312
298
  )
313
299
 
314
300
  def on_end(self, span: ReadableSpan) -> None:
315
- # Debug logging for span completion
316
- span_attrs = span.attributes or {}
317
- logger.debug(
318
- f"Completed span: name='{span.name}', collection_id={span_attrs.get('collection_id')}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}, duration_ns={span.end_time - span.start_time if span.end_time and span.start_time else 'unknown'}"
319
- )
301
+ pass
320
302
 
321
303
  def shutdown(self) -> None:
322
304
  pass
@@ -422,7 +404,17 @@ class DocentTracer:
422
404
  raise
423
405
 
424
406
  def cleanup(self):
425
- """Clean up Docent tracing resources and signal trace completion to backend."""
407
+ """
408
+ Clean up Docent tracing resources.
409
+
410
+ Flushes all pending spans to exporters and shuts down the tracer provider.
411
+ This method is automatically called during application shutdown via atexit
412
+ handlers, but can also be called manually for explicit cleanup.
413
+
414
+ The cleanup process:
415
+ 1. Flushes all span processors to ensure data is exported
416
+ 2. Shuts down the tracer provider and releases resources
417
+ """
426
418
  if self._disabled:
427
419
  return
428
420
 
@@ -473,7 +465,7 @@ class DocentTracer:
473
465
  if disabled and self._initialized:
474
466
  self.cleanup()
475
467
 
476
- def verify_initialized(self) -> bool:
468
+ def is_initialized(self) -> bool:
477
469
  """Verify if the manager is properly initialized."""
478
470
  return self._initialized
479
471
 
@@ -1063,8 +1055,9 @@ def initialize_tracing(
1063
1055
  collection_id: Optional collection ID (auto-generated if not provided)
1064
1056
  endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
1065
1057
  headers: Optional headers for authentication
1066
- api_key: Optional API key for bearer token authentication (takes precedence over env var)
1067
- enable_console_export: Whether to export spans to console
1058
+ api_key: Optional API key for bearer token authentication (takes precedence
1059
+ over DOCENT_API_KEY environment variable)
1060
+ enable_console_export: Whether to export spans to console for debugging
1068
1061
  enable_otlp_export: Whether to export spans to OTLP endpoint
1069
1062
  disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
1070
1063
  instruments: Set of instruments to enable (None = all instruments).
@@ -1074,7 +1067,6 @@ def initialize_tracing(
1074
1067
  The initialized Docent tracer
1075
1068
 
1076
1069
  Example:
1077
- # Basic setup
1078
1070
  initialize_tracing("my-collection")
1079
1071
  """
1080
1072
 
@@ -1137,17 +1129,17 @@ def close_tracing() -> None:
1137
1129
  def flush_tracing() -> None:
1138
1130
  """Force flush all spans to exporters."""
1139
1131
  if _global_tracer:
1140
- logger.debug("Flushing global tracer")
1132
+ logger.debug("Flushing Docent tracer")
1141
1133
  _global_tracer.flush()
1142
1134
  else:
1143
1135
  logger.debug("No global tracer available to flush")
1144
1136
 
1145
1137
 
1146
- def verify_initialized() -> bool:
1138
+ def is_initialized() -> bool:
1147
1139
  """Verify if the global Docent tracer is properly initialized."""
1148
1140
  if _global_tracer is None:
1149
1141
  return False
1150
- return _global_tracer.verify_initialized()
1142
+ return _global_tracer.is_initialized()
1151
1143
 
1152
1144
 
1153
1145
  def is_disabled() -> bool:
@@ -1764,3 +1756,16 @@ def transcript_group_context(
1764
1756
  return TranscriptGroupContext(
1765
1757
  name, transcript_group_id, description, metadata, parent_transcript_group_id
1766
1758
  )
1759
+
1760
+
1761
+ def _is_tracing_disabled() -> bool:
1762
+ """Check if tracing is disabled via environment variable."""
1763
+ return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
1764
+
1765
+
1766
+ def _is_notebook() -> bool:
1767
+ """Check if we're running in a Jupyter notebook."""
1768
+ try:
1769
+ return "ipykernel" in sys.modules
1770
+ except Exception:
1771
+ return False
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.18-alpha"
4
+ version = "0.1.19-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]
@@ -360,7 +360,7 @@ wheels = [
360
360
 
361
361
  [[package]]
362
362
  name = "docent-python"
363
- version = "0.1.15a0"
363
+ version = "0.1.18a0"
364
364
  source = { editable = "." }
365
365
  dependencies = [
366
366
  { name = "backoff" },
@@ -1,229 +0,0 @@
1
- # import traceback
2
- # from typing import Any, Optional
3
-
4
- # from pydantic import (
5
- # BaseModel,
6
- # ConfigDict,
7
- # Field,
8
- # PrivateAttr,
9
- # SerializerFunctionWrapHandler,
10
- # model_serializer,
11
- # model_validator,
12
- # )
13
-
14
- # from docent._log_util import get_logger
15
-
16
- # logger = get_logger(__name__)
17
-
18
- # SINGLETONS = (int, float, str, bool)
19
-
20
-
21
- # class BaseMetadata(BaseModel):
22
- # """Provides common functionality for accessing and validating metadata fields.
23
- # All metadata classes should inherit from this class.
24
-
25
- # Serialization Behavior:
26
- # - Field descriptions are highly recommended and stored in serialized versions of the object.
27
- # - When a subclass of BaseMetadata is uploaded to a server, all extra fields and their descriptions are retained.
28
- # - To recover the original structure with proper typing upon download, use:
29
- # `CustomMetadataClass.model_validate(obj.model_dump())`.
30
-
31
- # Attributes:
32
- # model_config: Pydantic configuration that allows extra fields.
33
- # allow_fields_without_descriptions: Boolean indicating whether to allow fields without descriptions.
34
- # """
35
-
36
- # model_config = ConfigDict(extra="allow")
37
- # allow_fields_without_descriptions: bool = True
38
-
39
- # # Private attribute to store field descriptions
40
- # _field_descriptions: dict[str, str | None] | None = PrivateAttr(default=None)
41
- # _internal_basemetadata_fields: set[str] = PrivateAttr(
42
- # default={
43
- # "allow_fields_without_descriptions",
44
- # "model_config",
45
- # "_field_descriptions",
46
- # }
47
- # )
48
-
49
- # @model_validator(mode="after")
50
- # def _validate_field_types_and_descriptions(self):
51
- # """Validates that all fields have descriptions and proper types.
52
-
53
- # Returns:
54
- # Self: The validated model instance.
55
-
56
- # Raises:
57
- # ValueError: If any field is missing a description or has an invalid type.
58
- # """
59
- # # Validate each field in the model
60
- # for field_name, field_info in self.__class__.model_fields.items():
61
- # if field_name in self._internal_basemetadata_fields:
62
- # continue
63
-
64
- # # Check that field has a description
65
- # if field_info.description is None:
66
- # if not self.allow_fields_without_descriptions:
67
- # raise ValueError(
68
- # f"Field `{field_name}` needs a description in the definition of `{self.__class__.__name__}`, like `{field_name}: T = Field(description=..., default=...)`. "
69
- # "To allow un-described fields, set `allow_fields_without_descriptions = True` on the instance or in your metadata class definition."
70
- # )
71
-
72
- # # Validate that the metadata is JSON serializable
73
- # try:
74
- # self.model_dump_json()
75
- # except Exception as e:
76
- # raise ValueError(
77
- # f"Metadata is not JSON serializable: {e}. Traceback: {traceback.format_exc()}"
78
- # )
79
-
80
- # return self
81
-
82
- # def model_post_init(self, __context: Any) -> None:
83
- # """Initializes field descriptions from extra data after model initialization.
84
-
85
- # Args:
86
- # __context: The context provided by Pydantic's post-initialization hook.
87
- # """
88
- # fd = self.model_extra.pop("_field_descriptions", None) if self.model_extra else None
89
- # if fd is not None:
90
- # self._field_descriptions = fd
91
-
92
- # @model_serializer(mode="wrap")
93
- # def _serialize_model(self, handler: SerializerFunctionWrapHandler):
94
- # # Call the default serializer
95
- # data = handler(self)
96
-
97
- # # Dump the field descriptions
98
- # if self._field_descriptions is None:
99
- # self._field_descriptions = self._compute_field_descriptions()
100
- # data["_field_descriptions"] = self._field_descriptions
101
-
102
- # return data
103
-
104
- # def model_dump(
105
- # self, *args: Any, strip_internal_fields: bool = False, **kwargs: Any
106
- # ) -> dict[str, Any]:
107
- # data = super().model_dump(*args, **kwargs)
108
-
109
- # # Remove internal fields if requested
110
- # if strip_internal_fields:
111
- # for field in self._internal_basemetadata_fields:
112
- # if field in data:
113
- # data.pop(field)
114
-
115
- # return data
116
-
117
- # def get(self, key: str, default_value: Any = None) -> Any:
118
- # """Gets a value from the metadata by key.
119
-
120
- # Args:
121
- # key: The key to look up in the metadata.
122
- # default_value: Value to return if the key is not found. Defaults to None.
123
-
124
- # Returns:
125
- # Any: The value associated with the key, or the default value if not found.
126
- # """
127
- # # Check if the field exists in the model's fields
128
- # if key in self.__class__.model_fields or (
129
- # self.model_extra is not None and key in self.model_extra
130
- # ):
131
- # # Field exists, return its value (even if None)
132
- # return getattr(self, key)
133
-
134
- # logger.warning(f"Field '{key}' not found in {self.__class__.__name__}")
135
- # return default_value
136
-
137
- # def get_field_description(self, field_name: str) -> str | None:
138
- # """Gets the description of a field defined in the model schema.
139
-
140
- # Args:
141
- # field_name: The name of the field.
142
-
143
- # Returns:
144
- # str or None: The description string if the field is defined in the model schema
145
- # and has a description, otherwise None.
146
- # """
147
- # if self._field_descriptions is None:
148
- # self._field_descriptions = self._compute_field_descriptions()
149
-
150
- # if field_name in self._field_descriptions:
151
- # return self._field_descriptions[field_name]
152
-
153
- # logger.warning(
154
- # f"Field description for '{field_name}' not found in {self.__class__.__name__}"
155
- # )
156
- # return None
157
-
158
- # def get_all_field_descriptions(self) -> dict[str, str | None]:
159
- # """Gets descriptions for all fields defined in the model schema.
160
-
161
- # Returns:
162
- # dict: A dictionary mapping field names to their descriptions.
163
- # Only includes fields that have descriptions defined in the schema.
164
- # """
165
- # if self._field_descriptions is None:
166
- # self._field_descriptions = self._compute_field_descriptions()
167
- # return self._field_descriptions
168
-
169
- # def _compute_field_descriptions(self) -> dict[str, str | None]:
170
- # """Computes descriptions for all fields in the model.
171
-
172
- # Returns:
173
- # dict: A dictionary mapping field names to their descriptions.
174
- # """
175
- # field_descriptions: dict[str, Optional[str]] = {}
176
- # for field_name, field_info in self.__class__.model_fields.items():
177
- # if field_name not in self._internal_basemetadata_fields:
178
- # field_descriptions[field_name] = field_info.description
179
- # return field_descriptions
180
-
181
-
182
- # class BaseAgentRunMetadata(BaseMetadata):
183
- # """Extends BaseMetadata with fields specific to agent evaluation runs.
184
-
185
- # Attributes:
186
- # scores: Dictionary of evaluation metrics.
187
- # """
188
-
189
- # scores: dict[str, int | float | bool | None] = Field(
190
- # description="A dict of score_key -> score_value. Use one key for each metric you're tracking."
191
- # )
192
-
193
-
194
- # class InspectAgentRunMetadata(BaseAgentRunMetadata):
195
- # """Extends BaseAgentRunMetadata with fields specific to Inspect runs.
196
-
197
- # Attributes:
198
- # task_id: The ID of the 'benchmark' or 'set of evals' that the transcript belongs to
199
- # sample_id: The specific task inside of the `task_id` benchmark that the transcript was run on
200
- # epoch_id: Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run.
201
- # model: The model that was used to generate the transcript
202
- # scoring_metadata: Additional metadata about the scoring process
203
- # additional_metadata: Additional metadata about the transcript
204
- # """
205
-
206
- # task_id: str = Field(
207
- # description="The ID of the 'benchmark' or 'set of evals' that the transcript belongs to"
208
- # )
209
-
210
- # # Identification of this particular run
211
- # sample_id: str = Field(
212
- # description="The specific task inside of the `task_id` benchmark that the transcript was run on"
213
- # )
214
- # epoch_id: int = Field(
215
- # description="Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run."
216
- # )
217
-
218
- # # Parameters for the run
219
- # model: str = Field(description="The model that was used to generate the transcript")
220
-
221
- # # Scoring
222
- # scoring_metadata: dict[str, Any] | None = Field(
223
- # description="Additional metadata about the scoring process"
224
- # )
225
-
226
- # # Inspect metadata
227
- # additional_metadata: dict[str, Any] | None = Field(
228
- # description="Additional metadata about the transcript"
229
- # )
@@ -1,12 +0,0 @@
1
- from typing import Any
2
-
3
- import yaml
4
- from pydantic_core import to_jsonable_python
5
-
6
-
7
- def yaml_dump_metadata(metadata: dict[str, Any]) -> str | None:
8
- if not metadata:
9
- return None
10
- metadata_obj = to_jsonable_python(metadata)
11
- yaml_text = yaml.dump(metadata_obj, width=float("inf"))
12
- return yaml_text.strip()