docent-python 0.1.41a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (59) hide show
  1. docent/__init__.py +4 -0
  2. docent/_llm_util/__init__.py +0 -0
  3. docent/_llm_util/data_models/__init__.py +0 -0
  4. docent/_llm_util/data_models/exceptions.py +48 -0
  5. docent/_llm_util/data_models/llm_output.py +331 -0
  6. docent/_llm_util/llm_cache.py +193 -0
  7. docent/_llm_util/llm_svc.py +472 -0
  8. docent/_llm_util/model_registry.py +134 -0
  9. docent/_llm_util/providers/__init__.py +0 -0
  10. docent/_llm_util/providers/anthropic.py +537 -0
  11. docent/_llm_util/providers/common.py +41 -0
  12. docent/_llm_util/providers/google.py +530 -0
  13. docent/_llm_util/providers/openai.py +745 -0
  14. docent/_llm_util/providers/openrouter.py +375 -0
  15. docent/_llm_util/providers/preference_types.py +104 -0
  16. docent/_llm_util/providers/provider_registry.py +164 -0
  17. docent/_log_util/__init__.py +3 -0
  18. docent/_log_util/logger.py +141 -0
  19. docent/data_models/__init__.py +14 -0
  20. docent/data_models/_tiktoken_util.py +91 -0
  21. docent/data_models/agent_run.py +473 -0
  22. docent/data_models/chat/__init__.py +37 -0
  23. docent/data_models/chat/content.py +56 -0
  24. docent/data_models/chat/message.py +191 -0
  25. docent/data_models/chat/tool.py +109 -0
  26. docent/data_models/citation.py +187 -0
  27. docent/data_models/formatted_objects.py +84 -0
  28. docent/data_models/judge.py +17 -0
  29. docent/data_models/metadata_util.py +16 -0
  30. docent/data_models/regex.py +56 -0
  31. docent/data_models/transcript.py +305 -0
  32. docent/data_models/util.py +170 -0
  33. docent/judges/__init__.py +23 -0
  34. docent/judges/analysis.py +77 -0
  35. docent/judges/impl.py +587 -0
  36. docent/judges/runner.py +129 -0
  37. docent/judges/stats.py +205 -0
  38. docent/judges/types.py +320 -0
  39. docent/judges/util/forgiving_json.py +108 -0
  40. docent/judges/util/meta_schema.json +86 -0
  41. docent/judges/util/meta_schema.py +29 -0
  42. docent/judges/util/parse_output.py +68 -0
  43. docent/judges/util/voting.py +139 -0
  44. docent/loaders/load_inspect.py +215 -0
  45. docent/py.typed +0 -0
  46. docent/samples/__init__.py +3 -0
  47. docent/samples/load.py +9 -0
  48. docent/samples/log.eval +0 -0
  49. docent/samples/tb_airline.json +1 -0
  50. docent/sdk/__init__.py +0 -0
  51. docent/sdk/agent_run_writer.py +317 -0
  52. docent/sdk/client.py +1186 -0
  53. docent/sdk/llm_context.py +432 -0
  54. docent/trace.py +2741 -0
  55. docent/trace_temp.py +1086 -0
  56. docent_python-0.1.41a0.dist-info/METADATA +33 -0
  57. docent_python-0.1.41a0.dist-info/RECORD +59 -0
  58. docent_python-0.1.41a0.dist-info/WHEEL +4 -0
  59. docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
@@ -0,0 +1,432 @@
1
+ import json
2
+ import re
3
+ import sys
4
+ import textwrap
5
+ from typing import Any
6
+
7
+ from docent.data_models.agent_run import AgentRun
8
+ from docent.data_models.citation import (
9
+ AgentRunMetadataItem,
10
+ CitationTarget,
11
+ InlineCitation,
12
+ ResolvedCitationItemUnion,
13
+ TranscriptBlockContentItem,
14
+ TranscriptBlockMetadataItem,
15
+ TranscriptMetadataItem,
16
+ parse_citations,
17
+ )
18
+ from docent.data_models.formatted_objects import FormattedAgentRun, FormattedTranscript
19
+ from docent.data_models.transcript import Transcript, format_chat_message
20
+
21
+ RANGE_BEGIN = "<RANGE>"
22
+ RANGE_END = "</RANGE>"
23
+
24
+ LLMContextItem = AgentRun | Transcript
25
+
26
+ _SINGLE_RE = re.compile(r"T(\d+)B(\d+)")
27
+ _AGENT_RUN_METADATA_RE = re.compile(r"^R(\d+)M\.([^:]+)$") # [R0M.key]
28
+ _TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$") # [T0M.key]
29
+ _MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$") # [T0B1M.key]
30
+ _RANGE_CONTENT_RE = re.compile(r":\s*" + re.escape(RANGE_BEGIN) + r".*?" + re.escape(RANGE_END))
31
+
32
+
33
+ class LLMContext:
34
+ """Manages a collection of objects (agent runs, transcripts) for LLM consumption.
35
+
36
+ This class provides:
37
+ - Assignment of local IDs (T0, T1, R0, etc.) for citations
38
+ - Serialization for database storage
39
+ - Conversion to LLM-ready string format
40
+ - Citation resolution mapping local IDs back to database UUIDs
41
+
42
+ Example usage:
43
+ context = LLMContext()
44
+ context.add(agent_run1)
45
+ context.add(agent_run2)
46
+
47
+ # Get string representation for LLM
48
+ llm_input = context.to_str()
49
+
50
+ # Get system message with citation instructions
51
+ system_msg = context.get_system_message()
52
+
53
+ # Serialize for database storage
54
+ serialized = context.to_dict()
55
+ """
56
+
57
+ def __init__(self, items: list[LLMContextItem] | None = None):
58
+ self.root_items: list[str] = []
59
+
60
+ self.transcript_aliases: dict[int, Transcript] = {}
61
+ self.agent_run_aliases: dict[int, AgentRun] = {}
62
+
63
+ self.agent_run_collection_ids: dict[str, str] = {} # agent_run_id -> collection_id
64
+ self.transcript_to_agent_run: dict[str, str] = {} # transcript_id -> agent_run_id
65
+
66
+ if items is not None:
67
+ for item in items:
68
+ self.add(item)
69
+
70
+ def add(self, item: LLMContextItem) -> None:
71
+ """Add an object to the context.
72
+
73
+ Accepts AgentRun, Transcript, FormattedAgentRun, or FormattedTranscript.
74
+ """
75
+ alias = self._create_alias(item)
76
+
77
+ if isinstance(item, AgentRun):
78
+ # Assign aliases in canonical tree order
79
+ t_ids_ordered = item.get_transcript_ids_ordered(full_tree=False)
80
+ for t_id in t_ids_ordered:
81
+ transcript = item.transcript_dict[t_id]
82
+ self._create_alias(transcript)
83
+ self.transcript_to_agent_run[t_id] = item.id
84
+
85
+ self.root_items.append(alias)
86
+
87
+ def _create_alias(self, item: LLMContextItem) -> str:
88
+ if isinstance(item, AgentRun):
89
+ idx = len(self.agent_run_aliases)
90
+ alias = "R" + str(idx)
91
+ self.agent_run_aliases[idx] = item
92
+ elif isinstance(item, Transcript): # type: ignore
93
+ idx = len(self.transcript_aliases)
94
+ alias = "T" + str(idx)
95
+ self.transcript_aliases[idx] = item
96
+ else:
97
+ raise ValueError(f"Unknown item type: {type(item)}")
98
+ return alias
99
+
100
+ def get_item_by_alias(self, alias: str) -> LLMContextItem:
101
+ if not alias:
102
+ raise ValueError("Alias cannot be empty")
103
+
104
+ prefix = alias[0]
105
+ try:
106
+ idx = int(alias[1:])
107
+ except ValueError as exc:
108
+ raise ValueError(f"Invalid alias format: {alias}") from exc
109
+
110
+ if prefix == "R":
111
+ if idx not in self.agent_run_aliases:
112
+ raise ValueError(f"Unknown agent run alias: {alias}")
113
+ return self.agent_run_aliases[idx]
114
+
115
+ if prefix == "T":
116
+ if idx not in self.transcript_aliases:
117
+ raise ValueError(f"Unknown transcript alias: {alias}")
118
+ return self.transcript_aliases[idx]
119
+
120
+ raise ValueError(f"Unknown alias type: {alias}")
121
+
122
+ def to_str(self, token_limit: int = sys.maxsize) -> str:
123
+ """Format all objects for LLM consumption with proper headers.
124
+
125
+ Args:
126
+ token_limit: Maximum tokens for the output (default: no limit)
127
+
128
+ Returns:
129
+ Formatted string with all objects and their local IDs
130
+ """
131
+ sections: list[str] = []
132
+
133
+ for alias in self.root_items:
134
+ item = self.get_item_by_alias(alias)
135
+ # Render each transcript with its global index
136
+ if isinstance(item, Transcript):
137
+ transcript_text = item.to_text_new(alias)
138
+ sections.append(transcript_text)
139
+ elif isinstance(item, AgentRun): # type: ignore
140
+ id_to_idx_map = {t.id: i for i, t in self.transcript_aliases.items()}
141
+ agent_run_text = item.to_text_new(alias, t_idx_map=id_to_idx_map)
142
+ sections.append(agent_run_text)
143
+ else:
144
+ raise ValueError(f"Unknown item type: {type(item)}")
145
+
146
+ return "\n\n".join(sections)
147
+
148
+ def get_system_message(self) -> str:
149
+ """Generate a system prompt with citation instructions for multi-object context.
150
+
151
+ Returns:
152
+ System message string with instructions on how to cite objects
153
+ """
154
+
155
+ context_description = f"You are a helpful assistant that specializes in analyzing transcripts of AI agent behavior."
156
+
157
+ citation_instructions = textwrap.dedent(
158
+ f"""
159
+ Anytime you quote an item that has an ID, or make any claim about such an item, add an inline citation.
160
+
161
+ To cite an item, write the item ID in brackets. For example, to cite T0B1, write [T0B1].
162
+
163
+ You may cite a specific range of text within an item. Use {RANGE_BEGIN} and {RANGE_END} to mark the specific range of text. Add it after the item ID separated by a colon. For example, to cite the part of T0B1 where the agent says "I understand the task", write [T0B1:{RANGE_BEGIN}I understand the task{RANGE_END}]. Citations must follow this exact format. The markers {RANGE_BEGIN} and {RANGE_END} must be used ONLY inside the brackets of a citation.
164
+
165
+ - When citing metadata (that is, an item whose ID ends with M), you must cite a top-level key with dot syntax. For example, for agent run 0 metadata: [R0M.task_description].
166
+ - You may not cite nested keys. For example, [T0B1M.status.code] is invalid.
167
+ - Within a top-level metadata key you may cite a range of text that appears in the value. For example, [T0B1M.status:{RANGE_BEGIN}\"running\":false{RANGE_END}].
168
+
169
+ Important notes:
170
+ - You must include the full content of the text range {RANGE_BEGIN} and {RANGE_END}, EXACTLY as it appears in the transcript, word-for-word, including any markers or punctuation that appear in the middle of the text.
171
+ - Citations must be as specific as possible. This means you should usually cite a specific text range.
172
+ - A citation is not a quote. For brevity, text ranges will not be rendered inline. The user will have to click on the citation to see the full text range.
173
+ - Citations are self-contained. Do NOT label them as citation or evidence. Just insert the citation by itself at the appropriate place in the text.
174
+ - Citations must come immediately after the part of a claim that they support. This may be in the middle of a sentence.
175
+ - Each pair of brackets must contain only one citation. To cite multiple items, use multiple pairs of brackets, like [T0B0] [T0B1].
176
+ - Outside of citations, do not refer to item IDs.
177
+ - Outside of citations, avoid quoting or paraphrasing the transcript.
178
+ """
179
+ )
180
+
181
+ return f"{context_description}\n\n{citation_instructions}"
182
+
183
+ def to_dict(self) -> dict[str, Any]:
184
+ """Serialize the context for database storage.
185
+
186
+ Returns dictionary with explicit alias mappings and formatted object data.
187
+ Formatted objects store full data inline, regular objects only store IDs
188
+ for later database fetching.
189
+
190
+ Returns:
191
+ Dictionary suitable for JSONB storage
192
+ """
193
+ # Serialize alias dicts directly (JSON requires string keys)
194
+ transcript_aliases_serialized = {
195
+ str(idx): transcript.id for idx, transcript in self.transcript_aliases.items()
196
+ }
197
+ agent_run_aliases_serialized = {
198
+ str(idx): agent_run.id for idx, agent_run in self.agent_run_aliases.items()
199
+ }
200
+
201
+ # Build formatted_data dict for all formatted objects
202
+ formatted_data: dict[str, Any] = {}
203
+
204
+ # Add formatted agent runs
205
+ serialized_transcript_ids: set[str] = set()
206
+ for agent_run in self.agent_run_aliases.values():
207
+ if isinstance(agent_run, FormattedAgentRun):
208
+ formatted_data[agent_run.id] = agent_run.model_dump(mode="json")
209
+ serialized_transcript_ids.update(t.id for t in agent_run.transcripts)
210
+
211
+ # Add formatted transcripts that aren't already included in output
212
+ for transcript in self.transcript_aliases.values():
213
+ if transcript.id in serialized_transcript_ids:
214
+ continue
215
+ if isinstance(transcript, FormattedTranscript):
216
+ formatted_data[transcript.id] = transcript.model_dump(mode="json")
217
+
218
+ return {
219
+ "version": "1",
220
+ "root_items": self.root_items,
221
+ "transcript_aliases": transcript_aliases_serialized,
222
+ "agent_run_aliases": agent_run_aliases_serialized,
223
+ "formatted_data": formatted_data,
224
+ "agent_run_collection_ids": self.agent_run_collection_ids,
225
+ "transcript_to_agent_run": self.transcript_to_agent_run,
226
+ }
227
+
228
+ def resolve_item_alias(self, item_alias: str) -> ResolvedCitationItemUnion:
229
+ # 1) T0B0M.key
230
+ m = _MESSAGE_METADATA_RE.match(item_alias)
231
+ if m:
232
+ transcript_idx = int(m.group(1))
233
+ block_idx = int(m.group(2))
234
+ metadata_key = m.group(3)
235
+
236
+ # Disallow nested keys like status.code
237
+ if "." in metadata_key:
238
+ raise ValueError(f"Nested keys are not allowed: {item_alias}")
239
+
240
+ transcript = self.transcript_aliases[transcript_idx]
241
+ agent_run_id = self.transcript_to_agent_run.get(transcript.id, "")
242
+ collection_id = self.agent_run_collection_ids.get(agent_run_id, "")
243
+
244
+ return TranscriptBlockMetadataItem(
245
+ agent_run_id=agent_run_id,
246
+ collection_id=collection_id,
247
+ transcript_id=transcript.id,
248
+ block_idx=block_idx,
249
+ metadata_key=metadata_key,
250
+ )
251
+
252
+ # 2) T0M.key
253
+ m = _TRANSCRIPT_METADATA_RE.match(item_alias)
254
+ if m:
255
+ transcript_idx = int(m.group(1))
256
+ metadata_key = m.group(2)
257
+ if "." in metadata_key:
258
+ raise ValueError(f"Nested keys are not allowed: {item_alias}")
259
+
260
+ transcript = self.transcript_aliases[transcript_idx]
261
+ agent_run_id = self.transcript_to_agent_run.get(transcript.id, "")
262
+ collection_id = self.agent_run_collection_ids.get(agent_run_id, "")
263
+
264
+ return TranscriptMetadataItem(
265
+ agent_run_id=agent_run_id,
266
+ collection_id=collection_id,
267
+ transcript_id=transcript.id,
268
+ metadata_key=metadata_key,
269
+ )
270
+
271
+ # 3) R0M.key
272
+ m = _AGENT_RUN_METADATA_RE.match(item_alias)
273
+ if m:
274
+ agent_run_idx = int(m.group(1))
275
+ metadata_key = m.group(2)
276
+ if "." in metadata_key:
277
+ raise ValueError(f"Nested keys are not allowed: {item_alias}")
278
+ agent_run = self.agent_run_aliases[agent_run_idx]
279
+ collection_id = self.agent_run_collection_ids.get(agent_run.id, "")
280
+ return AgentRunMetadataItem(
281
+ agent_run_id=agent_run.id,
282
+ collection_id=collection_id,
283
+ metadata_key=metadata_key,
284
+ )
285
+
286
+ # 4) T0B0
287
+ m = _SINGLE_RE.match(item_alias)
288
+ if m:
289
+ transcript_idx = int(m.group(1))
290
+ block_idx = int(m.group(2))
291
+
292
+ transcript = self.transcript_aliases[transcript_idx]
293
+ agent_run_id = self.transcript_to_agent_run.get(transcript.id, "")
294
+ collection_id = self.agent_run_collection_ids.get(agent_run_id, "")
295
+
296
+ return TranscriptBlockContentItem(
297
+ agent_run_id=agent_run_id,
298
+ collection_id=collection_id,
299
+ transcript_id=transcript.id,
300
+ block_idx=block_idx,
301
+ )
302
+
303
+ raise ValueError(f"Unknown item alias: {item_alias}")
304
+
305
+
306
+ def _build_whitespace_flexible_regex(pattern: str) -> re.Pattern[str]:
307
+ """Build regex that is flexible with whitespace matching."""
308
+ out = ""
309
+ i = 0
310
+ while i < len(pattern):
311
+ ch = pattern[i]
312
+ if ch.isspace():
313
+ while i < len(pattern) and pattern[i].isspace():
314
+ i += 1
315
+ out += r"\s+"
316
+ continue
317
+ out += re.escape(ch)
318
+ i += 1
319
+ return re.compile(out, re.DOTALL)
320
+
321
+
322
+ def _find_pattern_in_text(text: str, pattern: str | None) -> list[tuple[int, int]]:
323
+ """Find all matches of a pattern in text.
324
+
325
+ Returns list of (start_index, end_index) tuples for matches.
326
+ """
327
+ if not pattern:
328
+ return []
329
+
330
+ try:
331
+ regex = _build_whitespace_flexible_regex(pattern)
332
+ matches: list[tuple[int, int]] = []
333
+
334
+ for match in regex.finditer(text):
335
+ if match.group().strip():
336
+ matches.append((match.start(), match.end()))
337
+
338
+ return matches
339
+ except re.error:
340
+ return []
341
+
342
+
343
+ def _get_text_for_citation_target(target: CitationTarget, context: LLMContext) -> str | None:
344
+ """Get the text content for a citation target."""
345
+ item = target.item
346
+
347
+ if isinstance(item, AgentRunMetadataItem):
348
+ for agent_run in context.agent_run_aliases.values():
349
+ if agent_run.id == item.agent_run_id:
350
+ metadata_value = agent_run.metadata.get(item.metadata_key)
351
+ if metadata_value is not None:
352
+ return json.dumps(metadata_value)
353
+ return None
354
+
355
+ if isinstance(item, TranscriptMetadataItem):
356
+ for transcript in context.transcript_aliases.values():
357
+ if transcript.id == item.transcript_id:
358
+ metadata_value = transcript.metadata.get(item.metadata_key)
359
+ if metadata_value is not None:
360
+ return json.dumps(metadata_value)
361
+ return None
362
+
363
+ if isinstance(item, TranscriptBlockMetadataItem):
364
+ for transcript in context.transcript_aliases.values():
365
+ if transcript.id == item.transcript_id:
366
+ if 0 <= item.block_idx < len(transcript.messages):
367
+ message = transcript.messages[item.block_idx]
368
+ metadata_value = (
369
+ message.metadata.get(item.metadata_key) if message.metadata else None
370
+ )
371
+ if metadata_value is not None:
372
+ return json.dumps(metadata_value)
373
+ return None
374
+
375
+ # Must be TranscriptBlockContentItem at this point
376
+ for t_idx, transcript in context.transcript_aliases.items():
377
+ if transcript.id == item.transcript_id:
378
+ if 0 <= item.block_idx < len(transcript.messages):
379
+ message = transcript.messages[item.block_idx]
380
+ return format_chat_message(message, f"T{t_idx}B{item.block_idx}")
381
+
382
+ return None
383
+
384
+
385
+ def resolve_citations_with_context(
386
+ text: str, context: LLMContext, validate_text_ranges: bool = True
387
+ ) -> tuple[str, list[InlineCitation]]:
388
+ """Parse citations and resolve agent run IDs using LLMContext.
389
+
390
+ This function extends parse_citations to map local transcript IDs (T0, T1, etc.)
391
+ back to their originating agent run IDs using the LLMContext.
392
+
393
+ Args:
394
+ text: The text to parse citations from
395
+ context: LLMContext that maps transcript IDs to agent run IDs
396
+ validate_text_ranges: If True, validate citation text ranges and set to None if invalid
397
+
398
+ Returns:
399
+ A tuple of (cleaned_text, citations) where citations include resolved agent_run_idx
400
+ """
401
+ cleaned_text, citations = parse_citations(text)
402
+ resolved_citations: list[InlineCitation] = []
403
+
404
+ for citation in citations:
405
+ try:
406
+ resolved_item = context.resolve_item_alias(citation.item_alias)
407
+ text_range = citation.text_range
408
+
409
+ target = CitationTarget(item=resolved_item, text_range=text_range)
410
+ # Validate text range if requested and present
411
+ if validate_text_ranges and text_range is not None:
412
+ target_text = _get_text_for_citation_target(target, context)
413
+
414
+ if target_text is not None:
415
+ matches = _find_pattern_in_text(target_text, text_range.start_pattern)
416
+ if len(matches) == 0:
417
+ target.text_range = None
418
+ else:
419
+ target.text_range = None
420
+
421
+ resolved_citations.append(
422
+ InlineCitation(
423
+ start_idx=citation.start_idx,
424
+ end_idx=citation.end_idx,
425
+ target=target,
426
+ )
427
+ )
428
+ except (KeyError, ValueError):
429
+ # Unable to resolve citation target
430
+ continue
431
+
432
+ return cleaned_text, resolved_citations