docent-python 0.1.35a0__tar.gz → 0.1.36a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/PKG-INFO +1 -1
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/__init__.py +2 -2
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/agent_run.py +22 -41
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/chat/__init__.py +6 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/chat/message.py +64 -5
- docent_python-0.1.36a0/docent/data_models/citation.py +187 -0
- docent_python-0.1.36a0/docent/data_models/formatted_objects.py +84 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/transcript.py +31 -191
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/types.py +14 -5
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/util/parse_output.py +5 -24
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/sdk/client.py +68 -1
- docent_python-0.1.36a0/docent/sdk/llm_context.py +430 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/pyproject.toml +1 -1
- docent_python-0.1.35a0/docent/data_models/citation.py +0 -233
- docent_python-0.1.35a0/docent/data_models/remove_invalid_citation_ranges.py +0 -176
- docent_python-0.1.35a0/docent/data_models/shared_types.py +0 -10
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/.gitignore +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/LICENSE.md +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/README.md +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/data_models/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/data_models/exceptions.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/data_models/llm_output.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/llm_cache.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/llm_svc.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/model_registry.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/anthropic.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/common.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/google.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/openai.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/openrouter.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/preference_types.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_llm_util/providers/provider_registry.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_log_util/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/_log_util/logger.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/_tiktoken_util.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/chat/content.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/chat/tool.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/collection.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/judge.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/metadata_util.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/regex.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/data_models/util.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/analysis.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/impl.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/runner.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/stats.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/util/forgiving_json.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/util/meta_schema.json +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/util/meta_schema.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/judges/util/voting.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/loaders/load_inspect.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/py.typed +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/samples/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/samples/load.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/samples/log.eval +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/samples/tb_airline.json +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/sdk/__init__.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/sdk/agent_run_writer.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/trace.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/docent/trace_temp.py +0 -0
- {docent_python-0.1.35a0 → docent_python-0.1.36a0}/uv.lock +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from docent.data_models.agent_run import AgentRun
|
|
2
|
-
from docent.data_models.citation import
|
|
2
|
+
from docent.data_models.citation import InlineCitation
|
|
3
3
|
from docent.data_models.collection import Collection
|
|
4
4
|
from docent.data_models.judge import Label
|
|
5
5
|
from docent.data_models.regex import RegexSnippet
|
|
@@ -7,8 +7,8 @@ from docent.data_models.transcript import Transcript, TranscriptGroup
|
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"AgentRun",
|
|
10
|
-
"Citation",
|
|
11
10
|
"Collection",
|
|
11
|
+
"InlineCitation",
|
|
12
12
|
"Label",
|
|
13
13
|
"RegexSnippet",
|
|
14
14
|
"Transcript",
|
|
@@ -134,7 +134,7 @@ class AgentRun(BaseModel):
|
|
|
134
134
|
# Converting to text #
|
|
135
135
|
######################
|
|
136
136
|
|
|
137
|
-
def _to_text_impl(self, token_limit: int = sys.maxsize
|
|
137
|
+
def _to_text_impl(self, token_limit: int = sys.maxsize) -> list[str]:
|
|
138
138
|
"""
|
|
139
139
|
Core implementation for converting agent run to text representation.
|
|
140
140
|
|
|
@@ -151,8 +151,6 @@ class AgentRun(BaseModel):
|
|
|
151
151
|
transcript_content = t.to_str(
|
|
152
152
|
token_limit=sys.maxsize,
|
|
153
153
|
transcript_idx=i,
|
|
154
|
-
agent_run_idx=None,
|
|
155
|
-
use_action_units=not use_blocks,
|
|
156
154
|
)[0]
|
|
157
155
|
transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
|
|
158
156
|
|
|
@@ -202,15 +200,16 @@ class AgentRun(BaseModel):
|
|
|
202
200
|
), "Ranges without metadata should be a single message"
|
|
203
201
|
t = self.transcripts[msg_range.start]
|
|
204
202
|
if msg_range.num_tokens < token_limit - 50:
|
|
205
|
-
transcript =
|
|
203
|
+
transcript = (
|
|
204
|
+
f"<transcript>\n{t.to_str(token_limit=sys.maxsize)[0]}\n</transcript>"
|
|
205
|
+
)
|
|
206
206
|
result = (
|
|
207
207
|
f"Here is a partial agent run for analysis purposes only:\n{transcript}"
|
|
208
208
|
)
|
|
209
209
|
results.append(result)
|
|
210
210
|
else:
|
|
211
|
-
transcript_fragments = t.to_str(
|
|
211
|
+
transcript_fragments: list[str] = t.to_str(
|
|
212
212
|
token_limit=token_limit - 50,
|
|
213
|
-
use_action_units=not use_blocks,
|
|
214
213
|
)
|
|
215
214
|
for fragment in transcript_fragments:
|
|
216
215
|
result = f"<transcript>\n{fragment}\n</transcript>"
|
|
@@ -220,26 +219,6 @@ class AgentRun(BaseModel):
|
|
|
220
219
|
results.append(result)
|
|
221
220
|
return results
|
|
222
221
|
|
|
223
|
-
def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
|
|
224
|
-
"""
|
|
225
|
-
Represents an agent run as a list of strings, each of which is at most token_limit tokens
|
|
226
|
-
under the GPT-4 tokenization scheme.
|
|
227
|
-
|
|
228
|
-
We'll try to split up long AgentRuns along transcript boundaries and include metadata.
|
|
229
|
-
For very long transcripts, we'll have to split them up further and remove metadata.
|
|
230
|
-
"""
|
|
231
|
-
return self._to_text_impl(token_limit=token_limit, use_blocks=False)
|
|
232
|
-
|
|
233
|
-
def to_text_blocks(self, token_limit: int = sys.maxsize) -> list[str]:
|
|
234
|
-
"""
|
|
235
|
-
Represents an agent run as a list of strings using individual message blocks,
|
|
236
|
-
each of which is at most token_limit tokens under the GPT-4 tokenization scheme.
|
|
237
|
-
|
|
238
|
-
Unlike to_text() which uses action units, this method formats each message
|
|
239
|
-
as an individual block.
|
|
240
|
-
"""
|
|
241
|
-
return self._to_text_impl(token_limit=token_limit, use_blocks=True)
|
|
242
|
-
|
|
243
222
|
@property
|
|
244
223
|
def text(self) -> str:
|
|
245
224
|
"""Concatenates all transcript texts with double newlines as separators.
|
|
@@ -247,16 +226,7 @@ class AgentRun(BaseModel):
|
|
|
247
226
|
Returns:
|
|
248
227
|
str: A string representation of all transcripts.
|
|
249
228
|
"""
|
|
250
|
-
return self._to_text_impl(token_limit=sys.maxsize
|
|
251
|
-
|
|
252
|
-
@property
|
|
253
|
-
def text_blocks(self) -> str:
|
|
254
|
-
"""Concatenates all transcript texts using individual blocks format.
|
|
255
|
-
|
|
256
|
-
Returns:
|
|
257
|
-
str: A string representation of all transcripts using individual message blocks.
|
|
258
|
-
"""
|
|
259
|
-
return self._to_text_impl(token_limit=sys.maxsize, use_blocks=True)[0]
|
|
229
|
+
return self._to_text_impl(token_limit=sys.maxsize)[0]
|
|
260
230
|
|
|
261
231
|
##############################
|
|
262
232
|
# New text rendering methods #
|
|
@@ -414,10 +384,20 @@ class AgentRun(BaseModel):
|
|
|
414
384
|
|
|
415
385
|
return c_tree, transcript_idx_map
|
|
416
386
|
|
|
417
|
-
def to_text_new(
|
|
387
|
+
def to_text_new(
|
|
388
|
+
self,
|
|
389
|
+
agent_run_alias: int | str = 0,
|
|
390
|
+
t_idx_map: dict[str, int] | None = None,
|
|
391
|
+
indent: int = 0,
|
|
392
|
+
full_tree: bool = False,
|
|
393
|
+
):
|
|
394
|
+
if isinstance(agent_run_alias, int):
|
|
395
|
+
agent_run_alias = f"R{agent_run_alias}"
|
|
396
|
+
|
|
418
397
|
c_tree = self.get_canonical_tree(full_tree=full_tree)
|
|
419
398
|
t_ids_ordered = self.get_transcript_ids_ordered(full_tree=full_tree)
|
|
420
|
-
t_idx_map
|
|
399
|
+
if t_idx_map is None:
|
|
400
|
+
t_idx_map = {t_id: i for i, t_id in enumerate(t_ids_ordered)}
|
|
421
401
|
t_dict = self.transcript_dict
|
|
422
402
|
tg_dict = self.transcript_group_dict
|
|
423
403
|
|
|
@@ -430,7 +410,7 @@ class AgentRun(BaseModel):
|
|
|
430
410
|
children_texts.append(_recurse(child_id))
|
|
431
411
|
else:
|
|
432
412
|
cur_text = t_dict[child_id].to_text_new(
|
|
433
|
-
|
|
413
|
+
transcript_alias=t_idx_map[child_id],
|
|
434
414
|
indent=indent,
|
|
435
415
|
)
|
|
436
416
|
children_texts.append(cur_text)
|
|
@@ -451,6 +431,7 @@ class AgentRun(BaseModel):
|
|
|
451
431
|
if metadata_text is not None:
|
|
452
432
|
if indent > 0:
|
|
453
433
|
metadata_text = textwrap.indent(metadata_text, " " * indent)
|
|
454
|
-
|
|
434
|
+
metadata_alias = f"{agent_run_alias}M"
|
|
435
|
+
text += f"\n<|agent run metadata {metadata_alias}|>\n{metadata_text}\n</|agent run metadata {metadata_alias}|>"
|
|
455
436
|
|
|
456
|
-
return text
|
|
437
|
+
return f"<|agent run {agent_run_alias}|>\n{text}\n</|agent run {agent_run_alias}|>\n"
|
|
@@ -2,10 +2,13 @@ from docent.data_models.chat.content import Content, ContentReasoning, ContentTe
|
|
|
2
2
|
from docent.data_models.chat.message import (
|
|
3
3
|
AssistantMessage,
|
|
4
4
|
ChatMessage,
|
|
5
|
+
DocentAssistantMessage,
|
|
6
|
+
DocentChatMessage,
|
|
5
7
|
SystemMessage,
|
|
6
8
|
ToolMessage,
|
|
7
9
|
UserMessage,
|
|
8
10
|
parse_chat_message,
|
|
11
|
+
parse_docent_chat_message,
|
|
9
12
|
)
|
|
10
13
|
from docent.data_models.chat.tool import (
|
|
11
14
|
ToolCall,
|
|
@@ -16,7 +19,9 @@ from docent.data_models.chat.tool import (
|
|
|
16
19
|
|
|
17
20
|
__all__ = [
|
|
18
21
|
"ChatMessage",
|
|
22
|
+
"DocentChatMessage",
|
|
19
23
|
"AssistantMessage",
|
|
24
|
+
"DocentAssistantMessage",
|
|
20
25
|
"SystemMessage",
|
|
21
26
|
"ToolMessage",
|
|
22
27
|
"UserMessage",
|
|
@@ -28,4 +33,5 @@ __all__ = [
|
|
|
28
33
|
"ToolInfo",
|
|
29
34
|
"ToolParams",
|
|
30
35
|
"parse_chat_message",
|
|
36
|
+
"parse_docent_chat_message",
|
|
31
37
|
]
|
|
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Discriminator, Field
|
|
|
5
5
|
|
|
6
6
|
from docent.data_models.chat.content import Content
|
|
7
7
|
from docent.data_models.chat.tool import ToolCall
|
|
8
|
-
from docent.data_models.citation import
|
|
8
|
+
from docent.data_models.citation import InlineCitation
|
|
9
9
|
|
|
10
10
|
logger = getLogger(__name__)
|
|
11
11
|
|
|
@@ -69,14 +69,24 @@ class AssistantMessage(BaseChatMessage):
|
|
|
69
69
|
role: Always set to "assistant".
|
|
70
70
|
model: Optional identifier for the model that generated this message.
|
|
71
71
|
tool_calls: Optional list of tool calls made by the assistant.
|
|
72
|
-
citations: Optional list of citations referenced in the message content.
|
|
73
|
-
suggested_messages: Optional list of suggested followup messages.
|
|
74
72
|
"""
|
|
75
73
|
|
|
76
74
|
role: Literal["assistant"] = "assistant" # type: ignore
|
|
77
75
|
model: str | None = None
|
|
78
76
|
tool_calls: list[ToolCall] | None = None
|
|
79
|
-
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DocentAssistantMessage(AssistantMessage):
|
|
80
|
+
"""Assistant message in a chat session with additional chat-specific metadata.
|
|
81
|
+
|
|
82
|
+
This extends AssistantMessage with fields that are only relevant in Docent chat contexts
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
citations: Optional list of citations referenced in the message content.
|
|
86
|
+
suggested_messages: Optional list of suggested followup messages.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
citations: list[InlineCitation] | None = None
|
|
80
90
|
suggested_messages: list[str] | None = None
|
|
81
91
|
|
|
82
92
|
|
|
@@ -101,12 +111,25 @@ ChatMessage = Annotated[
|
|
|
101
111
|
SystemMessage | UserMessage | AssistantMessage | ToolMessage,
|
|
102
112
|
Discriminator("role"),
|
|
103
113
|
]
|
|
104
|
-
"""Type alias for any chat message type, discriminated by the role field.
|
|
114
|
+
"""Type alias for any chat message type, discriminated by the role field.
|
|
115
|
+
|
|
116
|
+
This is the base message union used in Transcript and AgentRun contexts.
|
|
117
|
+
For chat sessions, use ChatSessionMessage instead.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
DocentChatMessage = Annotated[
|
|
121
|
+
SystemMessage | UserMessage | DocentAssistantMessage | ToolMessage,
|
|
122
|
+
Discriminator("role"),
|
|
123
|
+
]
|
|
124
|
+
"""Type alias for chat session messages with chat-specific assistant metadata."""
|
|
105
125
|
|
|
106
126
|
|
|
107
127
|
def parse_chat_message(message_data: dict[str, Any] | ChatMessage) -> ChatMessage:
|
|
108
128
|
"""Parse a message dictionary or object into the appropriate ChatMessage subclass.
|
|
109
129
|
|
|
130
|
+
This parses base messages without chat-specific fields. For chat sessions,
|
|
131
|
+
use parse_chat_session_message instead.
|
|
132
|
+
|
|
110
133
|
Args:
|
|
111
134
|
message_data: A dictionary or ChatMessage object representing a chat message.
|
|
112
135
|
|
|
@@ -130,3 +153,39 @@ def parse_chat_message(message_data: dict[str, Any] | ChatMessage) -> ChatMessag
|
|
|
130
153
|
return ToolMessage.model_validate(message_data)
|
|
131
154
|
else:
|
|
132
155
|
raise ValueError(f"Unknown message role: {role}")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_docent_chat_message(
|
|
159
|
+
message_data: dict[str, Any] | DocentChatMessage,
|
|
160
|
+
) -> DocentChatMessage:
|
|
161
|
+
"""Parse a message dictionary or object into the appropriate ChatSessionMessage subclass.
|
|
162
|
+
|
|
163
|
+
This handles chat session messages which may include ChatAssistantMessage with
|
|
164
|
+
citations and suggested_messages fields.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
message_data: A dictionary or ChatSessionMessage object representing a chat session message.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ChatSessionMessage: An instance of a ChatSessionMessage subclass based on the role.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
ValueError: If the message role is unknown.
|
|
174
|
+
"""
|
|
175
|
+
if isinstance(
|
|
176
|
+
message_data,
|
|
177
|
+
(SystemMessage, UserMessage, DocentAssistantMessage, AssistantMessage, ToolMessage),
|
|
178
|
+
):
|
|
179
|
+
return message_data
|
|
180
|
+
|
|
181
|
+
role = message_data.get("role")
|
|
182
|
+
if role == "system":
|
|
183
|
+
return SystemMessage.model_validate(message_data)
|
|
184
|
+
elif role == "user":
|
|
185
|
+
return UserMessage.model_validate(message_data)
|
|
186
|
+
elif role == "assistant":
|
|
187
|
+
return DocentAssistantMessage.model_validate(message_data)
|
|
188
|
+
elif role == "tool":
|
|
189
|
+
return ToolMessage.model_validate(message_data)
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(f"Unknown message role: {role}")
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from typing import Annotated, Literal, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Discriminator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CitationTargetTextRange(BaseModel):
|
|
7
|
+
start_pattern: str | None = None
|
|
8
|
+
end_pattern: str | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResolvedCitationItem(BaseModel):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CitationTarget(BaseModel):
|
|
16
|
+
item: "ResolvedCitationItemUnion"
|
|
17
|
+
text_range: CitationTargetTextRange | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParsedCitation(BaseModel):
|
|
21
|
+
start_idx: int
|
|
22
|
+
end_idx: int
|
|
23
|
+
item_alias: str
|
|
24
|
+
text_range: CitationTargetTextRange | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InlineCitation(BaseModel):
|
|
28
|
+
start_idx: int
|
|
29
|
+
end_idx: int
|
|
30
|
+
target: CitationTarget
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AgentRunMetadataItem(ResolvedCitationItem):
|
|
34
|
+
item_type: Literal["agent_run_metadata"] = "agent_run_metadata"
|
|
35
|
+
agent_run_id: str
|
|
36
|
+
collection_id: str
|
|
37
|
+
metadata_key: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TranscriptMetadataItem(ResolvedCitationItem):
|
|
41
|
+
item_type: Literal["transcript_metadata"] = "transcript_metadata"
|
|
42
|
+
agent_run_id: str
|
|
43
|
+
collection_id: str
|
|
44
|
+
transcript_id: str
|
|
45
|
+
metadata_key: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TranscriptBlockMetadataItem(ResolvedCitationItem):
|
|
49
|
+
item_type: Literal["block_metadata"] = "block_metadata"
|
|
50
|
+
agent_run_id: str
|
|
51
|
+
collection_id: str
|
|
52
|
+
transcript_id: str
|
|
53
|
+
block_idx: int
|
|
54
|
+
metadata_key: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TranscriptBlockContentItem(ResolvedCitationItem):
|
|
58
|
+
item_type: Literal["block_content"] = "block_content"
|
|
59
|
+
agent_run_id: str
|
|
60
|
+
collection_id: str
|
|
61
|
+
transcript_id: str
|
|
62
|
+
block_idx: int
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
ResolvedCitationItemUnion = Annotated[
|
|
66
|
+
Union[
|
|
67
|
+
AgentRunMetadataItem,
|
|
68
|
+
TranscriptMetadataItem,
|
|
69
|
+
TranscriptBlockMetadataItem,
|
|
70
|
+
TranscriptBlockContentItem,
|
|
71
|
+
],
|
|
72
|
+
Discriminator("item_type"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
RANGE_BEGIN = "<RANGE>"
|
|
76
|
+
RANGE_END = "</RANGE>"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def scan_brackets(text: str) -> list[tuple[int, int, str]]:
|
|
80
|
+
"""Scan text for bracketed segments, respecting RANGE markers and nested brackets.
|
|
81
|
+
|
|
82
|
+
Returns a list of (start_index, end_index_exclusive, inner_content).
|
|
83
|
+
"""
|
|
84
|
+
matches: list[tuple[int, int, str]] = []
|
|
85
|
+
i = 0
|
|
86
|
+
while i < len(text):
|
|
87
|
+
if text[i] == "[":
|
|
88
|
+
start = i
|
|
89
|
+
bracket_count = 1
|
|
90
|
+
j = i + 1
|
|
91
|
+
in_range = False
|
|
92
|
+
|
|
93
|
+
while j < len(text) and bracket_count > 0:
|
|
94
|
+
if text[j : j + len(RANGE_BEGIN)] == RANGE_BEGIN:
|
|
95
|
+
in_range = True
|
|
96
|
+
elif text[j : j + len(RANGE_END)] == RANGE_END:
|
|
97
|
+
in_range = False
|
|
98
|
+
elif text[j] == "[" and not in_range:
|
|
99
|
+
bracket_count += 1
|
|
100
|
+
elif text[j] == "]" and not in_range:
|
|
101
|
+
bracket_count -= 1
|
|
102
|
+
j += 1
|
|
103
|
+
|
|
104
|
+
if bracket_count == 0:
|
|
105
|
+
end_exclusive = j
|
|
106
|
+
bracket_content = text[start + 1 : end_exclusive - 1]
|
|
107
|
+
matches.append((start, end_exclusive, bracket_content))
|
|
108
|
+
i = j
|
|
109
|
+
else:
|
|
110
|
+
i += 1
|
|
111
|
+
else:
|
|
112
|
+
i += 1
|
|
113
|
+
return matches
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _extract_range_pattern(range_part: str) -> CitationTargetTextRange | None:
|
|
117
|
+
if RANGE_BEGIN in range_part and RANGE_END in range_part:
|
|
118
|
+
range_begin_idx = range_part.find(RANGE_BEGIN)
|
|
119
|
+
range_end_idx = range_part.find(RANGE_END)
|
|
120
|
+
if range_begin_idx != -1 and range_end_idx != -1:
|
|
121
|
+
range_content = range_part[range_begin_idx + len(RANGE_BEGIN) : range_end_idx]
|
|
122
|
+
start_pattern = range_content if range_content else None
|
|
123
|
+
return CitationTargetTextRange(start_pattern=start_pattern)
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | None] | None:
|
|
129
|
+
"""
|
|
130
|
+
Parse a single citation token inside a bracket and return its components.
|
|
131
|
+
|
|
132
|
+
Returns ParsedCitation or None if invalid.
|
|
133
|
+
For metadata citations, transcript_idx may be None (for agent run metadata).
|
|
134
|
+
Supports optional text range for all valid citation kinds.
|
|
135
|
+
"""
|
|
136
|
+
token = part.strip()
|
|
137
|
+
if not token:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
# Extract optional range part
|
|
141
|
+
item_alias = token
|
|
142
|
+
text_range: CitationTargetTextRange | None = None
|
|
143
|
+
if ":" in token:
|
|
144
|
+
left, right = token.split(":", 1)
|
|
145
|
+
item_alias = left.strip()
|
|
146
|
+
text_range = _extract_range_pattern(right)
|
|
147
|
+
|
|
148
|
+
return item_alias, text_range
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
|
|
152
|
+
"""
|
|
153
|
+
Parse citations from text in the format described by TEXT_RANGE_CITE_INSTRUCTION.
|
|
154
|
+
|
|
155
|
+
Supported formats:
|
|
156
|
+
- Single block: [T<key>B<idx>]
|
|
157
|
+
- Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
|
|
158
|
+
- Agent run metadata: [M.key]
|
|
159
|
+
- Transcript metadata: [T<key>M.key]
|
|
160
|
+
- Message metadata: [T<key>B<idx>M.key]
|
|
161
|
+
- Message metadata with text range: [T<key>B<idx>M.key:<RANGE>start_pattern</RANGE>]
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
text: The text to parse citations from
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed
|
|
168
|
+
and citations have start_idx and end_idx representing character positions
|
|
169
|
+
in the cleaned text
|
|
170
|
+
"""
|
|
171
|
+
citations: list[ParsedCitation] = []
|
|
172
|
+
|
|
173
|
+
bracket_matches = scan_brackets(text)
|
|
174
|
+
|
|
175
|
+
for start, end, bracket_content in bracket_matches:
|
|
176
|
+
# Parse a single citation token inside the bracket
|
|
177
|
+
parsed = parse_single_citation(bracket_content)
|
|
178
|
+
if not parsed:
|
|
179
|
+
continue
|
|
180
|
+
label, text_range = parsed
|
|
181
|
+
|
|
182
|
+
citations.append(
|
|
183
|
+
ParsedCitation(start_idx=start, end_idx=end, item_alias=label, text_range=text_range)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# We're not cleaning the text right now but may do that later
|
|
187
|
+
return text, citations
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from uuid import uuid4
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, model_validator
|
|
4
|
+
|
|
5
|
+
from docent.data_models.agent_run import AgentRun
|
|
6
|
+
from docent.data_models.transcript import Transcript
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FormattedTranscript(Transcript):
|
|
10
|
+
"""A Transcript that preserves original message indices during edits.
|
|
11
|
+
|
|
12
|
+
This class extends Transcript to support customization while maintaining accurate
|
|
13
|
+
citations. Each message retains its original index from the source transcript,
|
|
14
|
+
even if messages are added, removed, or reordered.
|
|
15
|
+
|
|
16
|
+
Use this class when you need to customize which parts of a transcript are visible
|
|
17
|
+
to an LLM while ensuring citations remain valid.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
id_to_original_index: dict[str, int]
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_transcript(cls, transcript: Transcript) -> "FormattedTranscript":
|
|
24
|
+
"""Create a FormattedTranscript from a regular Transcript."""
|
|
25
|
+
# Ensure all messages have IDs and build id_to_original_index
|
|
26
|
+
id_to_original_index: dict[str, int] = {}
|
|
27
|
+
for idx, msg in enumerate(transcript.messages):
|
|
28
|
+
if msg.id is None:
|
|
29
|
+
msg.id = str(uuid4())
|
|
30
|
+
id_to_original_index[msg.id] = idx
|
|
31
|
+
|
|
32
|
+
return cls(
|
|
33
|
+
id=transcript.id,
|
|
34
|
+
name=transcript.name,
|
|
35
|
+
description=transcript.description,
|
|
36
|
+
transcript_group_id=transcript.transcript_group_id,
|
|
37
|
+
created_at=transcript.created_at,
|
|
38
|
+
messages=transcript.messages,
|
|
39
|
+
metadata=transcript.metadata,
|
|
40
|
+
id_to_original_index=id_to_original_index,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@model_validator(mode="after")
|
|
44
|
+
def _validate_id_to_original_index(self) -> "FormattedTranscript":
|
|
45
|
+
"""Ensure id_to_original_index covers all messages."""
|
|
46
|
+
for msg in self.messages:
|
|
47
|
+
if msg.id not in self.id_to_original_index:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Message {msg.id} missing from id_to_original_index. "
|
|
50
|
+
"Use FormattedTranscript.from_transcript() to create a new instance."
|
|
51
|
+
)
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def _enumerate_messages(self):
|
|
55
|
+
"""Yield (original index, message) for each message."""
|
|
56
|
+
for message in self.messages:
|
|
57
|
+
assert message.id is not None
|
|
58
|
+
original_idx = self.id_to_original_index[message.id]
|
|
59
|
+
yield (original_idx, message)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FormattedAgentRun(AgentRun):
|
|
63
|
+
"""An AgentRun that allows customization while tracking original identifiers.
|
|
64
|
+
|
|
65
|
+
This class extends AgentRun to support modifications to what an LLM sees
|
|
66
|
+
while maintaining accurate citations back to the original agent run.
|
|
67
|
+
|
|
68
|
+
Use this class when you need to customize which parts of an agent run are visible
|
|
69
|
+
to an LLM (e.g., hiding metadata, truncating long outputs).
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
transcripts: list[FormattedTranscript] = Field(default_factory=list) # type: ignore[assignment]
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_agent_run(cls, agent_run: AgentRun) -> "FormattedAgentRun":
|
|
76
|
+
"""Create a FormattedAgentRun from a regular AgentRun."""
|
|
77
|
+
return cls(
|
|
78
|
+
id=agent_run.id,
|
|
79
|
+
name=agent_run.name,
|
|
80
|
+
description=agent_run.description,
|
|
81
|
+
transcripts=[FormattedTranscript.from_transcript(t) for t in agent_run.transcripts],
|
|
82
|
+
transcript_groups=agent_run.transcript_groups,
|
|
83
|
+
metadata=agent_run.metadata,
|
|
84
|
+
)
|