docent-python 0.1.41a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/__init__.py +4 -0
- docent/_llm_util/__init__.py +0 -0
- docent/_llm_util/data_models/__init__.py +0 -0
- docent/_llm_util/data_models/exceptions.py +48 -0
- docent/_llm_util/data_models/llm_output.py +331 -0
- docent/_llm_util/llm_cache.py +193 -0
- docent/_llm_util/llm_svc.py +472 -0
- docent/_llm_util/model_registry.py +134 -0
- docent/_llm_util/providers/__init__.py +0 -0
- docent/_llm_util/providers/anthropic.py +537 -0
- docent/_llm_util/providers/common.py +41 -0
- docent/_llm_util/providers/google.py +530 -0
- docent/_llm_util/providers/openai.py +745 -0
- docent/_llm_util/providers/openrouter.py +375 -0
- docent/_llm_util/providers/preference_types.py +104 -0
- docent/_llm_util/providers/provider_registry.py +164 -0
- docent/_log_util/__init__.py +3 -0
- docent/_log_util/logger.py +141 -0
- docent/data_models/__init__.py +14 -0
- docent/data_models/_tiktoken_util.py +91 -0
- docent/data_models/agent_run.py +473 -0
- docent/data_models/chat/__init__.py +37 -0
- docent/data_models/chat/content.py +56 -0
- docent/data_models/chat/message.py +191 -0
- docent/data_models/chat/tool.py +109 -0
- docent/data_models/citation.py +187 -0
- docent/data_models/formatted_objects.py +84 -0
- docent/data_models/judge.py +17 -0
- docent/data_models/metadata_util.py +16 -0
- docent/data_models/regex.py +56 -0
- docent/data_models/transcript.py +305 -0
- docent/data_models/util.py +170 -0
- docent/judges/__init__.py +23 -0
- docent/judges/analysis.py +77 -0
- docent/judges/impl.py +587 -0
- docent/judges/runner.py +129 -0
- docent/judges/stats.py +205 -0
- docent/judges/types.py +320 -0
- docent/judges/util/forgiving_json.py +108 -0
- docent/judges/util/meta_schema.json +86 -0
- docent/judges/util/meta_schema.py +29 -0
- docent/judges/util/parse_output.py +68 -0
- docent/judges/util/voting.py +139 -0
- docent/loaders/load_inspect.py +215 -0
- docent/py.typed +0 -0
- docent/samples/__init__.py +3 -0
- docent/samples/load.py +9 -0
- docent/samples/log.eval +0 -0
- docent/samples/tb_airline.json +1 -0
- docent/sdk/__init__.py +0 -0
- docent/sdk/agent_run_writer.py +317 -0
- docent/sdk/client.py +1186 -0
- docent/sdk/llm_context.py +432 -0
- docent/trace.py +2741 -0
- docent/trace_temp.py +1086 -0
- docent_python-0.1.41a0.dist-info/METADATA +33 -0
- docent_python-0.1.41a0.dist-info/RECORD +59 -0
- docent_python-0.1.41a0.dist-info/WHEEL +4 -0
- docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
from typing import Annotated, Any, Literal
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Discriminator
|
|
5
|
+
|
|
6
|
+
from docent.data_models.chat.content import Content
|
|
7
|
+
from docent.data_models.chat.tool import ToolCall
|
|
8
|
+
from docent.data_models.citation import InlineCitation
|
|
9
|
+
|
|
10
|
+
logger = getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseChatMessage(BaseModel):
|
|
14
|
+
"""Base class for all chat message types.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
id: Optional unique identifier for the message.
|
|
18
|
+
content: The message content, either as a string or list of Content objects.
|
|
19
|
+
role: The role of the message sender (system, user, assistant, tool).
|
|
20
|
+
metadata: Additional structured metadata about the message.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str | None = None
|
|
24
|
+
content: str | list[Content]
|
|
25
|
+
role: Literal["system", "user", "assistant", "tool"]
|
|
26
|
+
metadata: dict[str, Any] | None = None
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def text(self) -> str:
|
|
30
|
+
"""Get the text content of the message.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str: The text content of the message. If content is a list,
|
|
34
|
+
concatenates all text content elements with newlines.
|
|
35
|
+
"""
|
|
36
|
+
if isinstance(self.content, str):
|
|
37
|
+
return self.content
|
|
38
|
+
else:
|
|
39
|
+
all_text = [content.text for content in self.content if content.type == "text"]
|
|
40
|
+
return "\n".join(all_text)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SystemMessage(BaseChatMessage):
|
|
44
|
+
"""System message in a chat conversation.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
role: Always set to "system".
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
role: Literal["system"] = "system" # type: ignore
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class UserMessage(BaseChatMessage):
|
|
54
|
+
"""User message in a chat conversation.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
role: Always set to "user".
|
|
58
|
+
tool_call_id: Optional list of tool call IDs this message is responding to.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
role: Literal["user"] = "user" # type: ignore
|
|
62
|
+
tool_call_id: list[str] | None = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AssistantMessage(BaseChatMessage):
|
|
66
|
+
"""Assistant message in a chat conversation.
|
|
67
|
+
|
|
68
|
+
Attributes:
|
|
69
|
+
role: Always set to "assistant".
|
|
70
|
+
model: Optional identifier for the model that generated this message.
|
|
71
|
+
tool_calls: Optional list of tool calls made by the assistant.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
role: Literal["assistant"] = "assistant" # type: ignore
|
|
75
|
+
model: str | None = None
|
|
76
|
+
tool_calls: list[ToolCall] | None = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DocentAssistantMessage(AssistantMessage):
|
|
80
|
+
"""Assistant message in a chat session with additional chat-specific metadata.
|
|
81
|
+
|
|
82
|
+
This extends AssistantMessage with fields that are only relevant in Docent chat contexts
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
citations: Optional list of citations referenced in the message content.
|
|
86
|
+
suggested_messages: Optional list of suggested followup messages.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
citations: list[InlineCitation] | None = None
|
|
90
|
+
suggested_messages: list[str] | None = None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ToolMessage(BaseChatMessage):
|
|
94
|
+
"""Tool message in a chat conversation.
|
|
95
|
+
|
|
96
|
+
Attributes:
|
|
97
|
+
role: Always set to "tool".
|
|
98
|
+
tool_call_id: Optional ID of the tool call this message is responding to.
|
|
99
|
+
function: Optional name of the function that was called.
|
|
100
|
+
error: Optional error information if the tool call failed.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
role: Literal["tool"] = "tool" # type: ignore
|
|
104
|
+
|
|
105
|
+
tool_call_id: str | None = None
|
|
106
|
+
function: str | None = None
|
|
107
|
+
error: dict[str, Any] | None = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
ChatMessage = Annotated[
|
|
111
|
+
SystemMessage | UserMessage | AssistantMessage | ToolMessage,
|
|
112
|
+
Discriminator("role"),
|
|
113
|
+
]
|
|
114
|
+
"""Type alias for any chat message type, discriminated by the role field.
|
|
115
|
+
|
|
116
|
+
This is the base message union used in Transcript and AgentRun contexts.
|
|
117
|
+
For chat sessions, use ChatSessionMessage instead.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
DocentChatMessage = Annotated[
|
|
121
|
+
SystemMessage | UserMessage | DocentAssistantMessage | ToolMessage,
|
|
122
|
+
Discriminator("role"),
|
|
123
|
+
]
|
|
124
|
+
"""Type alias for chat session messages with chat-specific assistant metadata."""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_chat_message(message_data: dict[str, Any] | ChatMessage) -> ChatMessage:
|
|
128
|
+
"""Parse a message dictionary or object into the appropriate ChatMessage subclass.
|
|
129
|
+
|
|
130
|
+
This parses base messages without chat-specific fields. For chat sessions,
|
|
131
|
+
use parse_chat_session_message instead.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
message_data: A dictionary or ChatMessage object representing a chat message.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
ChatMessage: An instance of a ChatMessage subclass based on the role.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
ValueError: If the message role is unknown.
|
|
141
|
+
"""
|
|
142
|
+
if isinstance(message_data, (SystemMessage, UserMessage, AssistantMessage, ToolMessage)):
|
|
143
|
+
return message_data
|
|
144
|
+
|
|
145
|
+
role = message_data.get("role")
|
|
146
|
+
if role == "system":
|
|
147
|
+
return SystemMessage.model_validate(message_data)
|
|
148
|
+
elif role == "user":
|
|
149
|
+
return UserMessage.model_validate(message_data)
|
|
150
|
+
elif role == "assistant":
|
|
151
|
+
return AssistantMessage.model_validate(message_data)
|
|
152
|
+
elif role == "tool":
|
|
153
|
+
return ToolMessage.model_validate(message_data)
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError(f"Unknown message role: {role}")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_docent_chat_message(
|
|
159
|
+
message_data: dict[str, Any] | DocentChatMessage,
|
|
160
|
+
) -> DocentChatMessage:
|
|
161
|
+
"""Parse a message dictionary or object into the appropriate ChatSessionMessage subclass.
|
|
162
|
+
|
|
163
|
+
This handles chat session messages which may include ChatAssistantMessage with
|
|
164
|
+
citations and suggested_messages fields.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
message_data: A dictionary or ChatSessionMessage object representing a chat session message.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
ChatSessionMessage: An instance of a ChatSessionMessage subclass based on the role.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
ValueError: If the message role is unknown.
|
|
174
|
+
"""
|
|
175
|
+
if isinstance(
|
|
176
|
+
message_data,
|
|
177
|
+
(SystemMessage, UserMessage, DocentAssistantMessage, AssistantMessage, ToolMessage),
|
|
178
|
+
):
|
|
179
|
+
return message_data
|
|
180
|
+
|
|
181
|
+
role = message_data.get("role")
|
|
182
|
+
if role == "system":
|
|
183
|
+
return SystemMessage.model_validate(message_data)
|
|
184
|
+
elif role == "user":
|
|
185
|
+
return UserMessage.model_validate(message_data)
|
|
186
|
+
elif role == "assistant":
|
|
187
|
+
return DocentAssistantMessage.model_validate(message_data)
|
|
188
|
+
elif role == "tool":
|
|
189
|
+
return ToolMessage.model_validate(message_data)
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(f"Unknown message role: {role}")
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ToolCall:
|
|
11
|
+
"""Tool call information.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
id: Unique identifier for tool call.
|
|
15
|
+
type: Type of tool call. Can only be "function" or None.
|
|
16
|
+
function: Function called.
|
|
17
|
+
arguments: Arguments to function.
|
|
18
|
+
parse_error: Error which occurred parsing tool call.
|
|
19
|
+
view: Custom view of tool call input.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
id: str
|
|
23
|
+
function: str
|
|
24
|
+
arguments: dict[str, Any]
|
|
25
|
+
type: Literal["function"] | None = None
|
|
26
|
+
parse_error: str | None = None
|
|
27
|
+
view: ToolCallContent | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ToolCallContent(BaseModel):
|
|
31
|
+
"""Content to include in tool call view.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
title: Optional (plain text) title for tool call content.
|
|
35
|
+
format: Format (text or markdown).
|
|
36
|
+
content: Text or markdown content.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
title: str | None = None
|
|
40
|
+
format: Literal["text", "markdown"]
|
|
41
|
+
content: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ToolParam(BaseModel):
|
|
45
|
+
"""A parameter for a tool function.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
name: The name of the parameter.
|
|
49
|
+
description: A description of what the parameter does.
|
|
50
|
+
input_schema: JSON Schema describing the parameter's type and validation rules.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
name: str
|
|
54
|
+
description: str
|
|
55
|
+
input_schema: dict[str, Any]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ToolParams(BaseModel):
|
|
59
|
+
"""Description of tool parameters object in JSON Schema format.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
type: The type of the parameters object, always 'object'.
|
|
63
|
+
properties: Dictionary mapping parameter names to their ToolParam definitions.
|
|
64
|
+
required: List of required parameter names.
|
|
65
|
+
additionalProperties: Whether additional properties are allowed beyond those
|
|
66
|
+
specified. Always False.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
type: Literal["object"] = "object"
|
|
70
|
+
properties: dict[str, ToolParam] = Field(default_factory=dict)
|
|
71
|
+
required: list[str] = Field(default_factory=list)
|
|
72
|
+
additionalProperties: bool = False
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ToolInfo(BaseModel):
|
|
76
|
+
"""Specification of a tool (JSON Schema compatible).
|
|
77
|
+
|
|
78
|
+
If you are implementing a ModelAPI, most LLM libraries can
|
|
79
|
+
be passed this object (dumped to a dict) directly as a function
|
|
80
|
+
specification. For example, in the OpenAI provider:
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
ChatCompletionToolParam(
|
|
84
|
+
type="function",
|
|
85
|
+
function=tool.model_dump(exclude_none=True),
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
In some cases the field names don't match up exactly. In that case
|
|
90
|
+
call `model_dump()` on the `parameters` field. For example, in the
|
|
91
|
+
Anthropic provider:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
ToolParam(
|
|
95
|
+
name=tool.name,
|
|
96
|
+
description=tool.description,
|
|
97
|
+
input_schema=tool.parameters.model_dump(exclude_none=True),
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Attributes:
|
|
102
|
+
name: Name of tool.
|
|
103
|
+
description: Short description of tool.
|
|
104
|
+
parameters: JSON Schema of tool parameters object.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
name: str
|
|
108
|
+
description: str
|
|
109
|
+
parameters: ToolParams = Field(default_factory=ToolParams)
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from typing import Annotated, Literal, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Discriminator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CitationTargetTextRange(BaseModel):
|
|
7
|
+
start_pattern: str | None = None
|
|
8
|
+
end_pattern: str | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ResolvedCitationItem(BaseModel):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CitationTarget(BaseModel):
|
|
16
|
+
item: "ResolvedCitationItemUnion"
|
|
17
|
+
text_range: CitationTargetTextRange | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParsedCitation(BaseModel):
|
|
21
|
+
start_idx: int
|
|
22
|
+
end_idx: int
|
|
23
|
+
item_alias: str
|
|
24
|
+
text_range: CitationTargetTextRange | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InlineCitation(BaseModel):
|
|
28
|
+
start_idx: int
|
|
29
|
+
end_idx: int
|
|
30
|
+
target: CitationTarget
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AgentRunMetadataItem(ResolvedCitationItem):
|
|
34
|
+
item_type: Literal["agent_run_metadata"] = "agent_run_metadata"
|
|
35
|
+
agent_run_id: str
|
|
36
|
+
collection_id: str
|
|
37
|
+
metadata_key: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TranscriptMetadataItem(ResolvedCitationItem):
|
|
41
|
+
item_type: Literal["transcript_metadata"] = "transcript_metadata"
|
|
42
|
+
agent_run_id: str
|
|
43
|
+
collection_id: str
|
|
44
|
+
transcript_id: str
|
|
45
|
+
metadata_key: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TranscriptBlockMetadataItem(ResolvedCitationItem):
|
|
49
|
+
item_type: Literal["block_metadata"] = "block_metadata"
|
|
50
|
+
agent_run_id: str
|
|
51
|
+
collection_id: str
|
|
52
|
+
transcript_id: str
|
|
53
|
+
block_idx: int
|
|
54
|
+
metadata_key: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TranscriptBlockContentItem(ResolvedCitationItem):
|
|
58
|
+
item_type: Literal["block_content"] = "block_content"
|
|
59
|
+
agent_run_id: str
|
|
60
|
+
collection_id: str
|
|
61
|
+
transcript_id: str
|
|
62
|
+
block_idx: int
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
ResolvedCitationItemUnion = Annotated[
|
|
66
|
+
Union[
|
|
67
|
+
AgentRunMetadataItem,
|
|
68
|
+
TranscriptMetadataItem,
|
|
69
|
+
TranscriptBlockMetadataItem,
|
|
70
|
+
TranscriptBlockContentItem,
|
|
71
|
+
],
|
|
72
|
+
Discriminator("item_type"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
RANGE_BEGIN = "<RANGE>"
|
|
76
|
+
RANGE_END = "</RANGE>"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def scan_brackets(text: str) -> list[tuple[int, int, str]]:
|
|
80
|
+
"""Scan text for bracketed segments, respecting RANGE markers and nested brackets.
|
|
81
|
+
|
|
82
|
+
Returns a list of (start_index, end_index_exclusive, inner_content).
|
|
83
|
+
"""
|
|
84
|
+
matches: list[tuple[int, int, str]] = []
|
|
85
|
+
i = 0
|
|
86
|
+
while i < len(text):
|
|
87
|
+
if text[i] == "[":
|
|
88
|
+
start = i
|
|
89
|
+
bracket_count = 1
|
|
90
|
+
j = i + 1
|
|
91
|
+
in_range = False
|
|
92
|
+
|
|
93
|
+
while j < len(text) and bracket_count > 0:
|
|
94
|
+
if text[j : j + len(RANGE_BEGIN)] == RANGE_BEGIN:
|
|
95
|
+
in_range = True
|
|
96
|
+
elif text[j : j + len(RANGE_END)] == RANGE_END:
|
|
97
|
+
in_range = False
|
|
98
|
+
elif text[j] == "[" and not in_range:
|
|
99
|
+
bracket_count += 1
|
|
100
|
+
elif text[j] == "]" and not in_range:
|
|
101
|
+
bracket_count -= 1
|
|
102
|
+
j += 1
|
|
103
|
+
|
|
104
|
+
if bracket_count == 0:
|
|
105
|
+
end_exclusive = j
|
|
106
|
+
bracket_content = text[start + 1 : end_exclusive - 1]
|
|
107
|
+
matches.append((start, end_exclusive, bracket_content))
|
|
108
|
+
i = j
|
|
109
|
+
else:
|
|
110
|
+
i += 1
|
|
111
|
+
else:
|
|
112
|
+
i += 1
|
|
113
|
+
return matches
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _extract_range_pattern(range_part: str) -> CitationTargetTextRange | None:
|
|
117
|
+
if RANGE_BEGIN in range_part and RANGE_END in range_part:
|
|
118
|
+
range_begin_idx = range_part.find(RANGE_BEGIN)
|
|
119
|
+
range_end_idx = range_part.find(RANGE_END)
|
|
120
|
+
if range_begin_idx != -1 and range_end_idx != -1:
|
|
121
|
+
range_content = range_part[range_begin_idx + len(RANGE_BEGIN) : range_end_idx]
|
|
122
|
+
start_pattern = range_content if range_content else None
|
|
123
|
+
return CitationTargetTextRange(start_pattern=start_pattern)
|
|
124
|
+
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def parse_single_citation(part: str) -> tuple[str, CitationTargetTextRange | None] | None:
|
|
129
|
+
"""
|
|
130
|
+
Parse a single citation token inside a bracket and return its components.
|
|
131
|
+
|
|
132
|
+
Returns ParsedCitation or None if invalid.
|
|
133
|
+
For metadata citations, transcript_idx may be None (for agent run metadata).
|
|
134
|
+
Supports optional text range for all valid citation kinds.
|
|
135
|
+
"""
|
|
136
|
+
token = part.strip()
|
|
137
|
+
if not token:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
# Extract optional range part
|
|
141
|
+
item_alias = token
|
|
142
|
+
text_range: CitationTargetTextRange | None = None
|
|
143
|
+
if ":" in token:
|
|
144
|
+
left, right = token.split(":", 1)
|
|
145
|
+
item_alias = left.strip()
|
|
146
|
+
text_range = _extract_range_pattern(right)
|
|
147
|
+
|
|
148
|
+
return item_alias, text_range
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def parse_citations(text: str) -> tuple[str, list[ParsedCitation]]:
|
|
152
|
+
"""
|
|
153
|
+
Parse citations from text in the format described by TEXT_RANGE_CITE_INSTRUCTION.
|
|
154
|
+
|
|
155
|
+
Supported formats:
|
|
156
|
+
- Single block: [T<key>B<idx>]
|
|
157
|
+
- Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
|
|
158
|
+
- Agent run metadata: [M.key]
|
|
159
|
+
- Transcript metadata: [T<key>M.key]
|
|
160
|
+
- Message metadata: [T<key>B<idx>M.key]
|
|
161
|
+
- Message metadata with text range: [T<key>B<idx>M.key:<RANGE>start_pattern</RANGE>]
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
text: The text to parse citations from
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed
|
|
168
|
+
and citations have start_idx and end_idx representing character positions
|
|
169
|
+
in the cleaned text
|
|
170
|
+
"""
|
|
171
|
+
citations: list[ParsedCitation] = []
|
|
172
|
+
|
|
173
|
+
bracket_matches = scan_brackets(text)
|
|
174
|
+
|
|
175
|
+
for start, end, bracket_content in bracket_matches:
|
|
176
|
+
# Parse a single citation token inside the bracket
|
|
177
|
+
parsed = parse_single_citation(bracket_content)
|
|
178
|
+
if not parsed:
|
|
179
|
+
continue
|
|
180
|
+
label, text_range = parsed
|
|
181
|
+
|
|
182
|
+
citations.append(
|
|
183
|
+
ParsedCitation(start_idx=start, end_idx=end, item_alias=label, text_range=text_range)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# We're not cleaning the text right now but may do that later
|
|
187
|
+
return text, citations
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from uuid import uuid4
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, model_validator
|
|
4
|
+
|
|
5
|
+
from docent.data_models.agent_run import AgentRun
|
|
6
|
+
from docent.data_models.transcript import Transcript
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FormattedTranscript(Transcript):
|
|
10
|
+
"""A Transcript that preserves original message indices during edits.
|
|
11
|
+
|
|
12
|
+
This class extends Transcript to support customization while maintaining accurate
|
|
13
|
+
citations. Each message retains its original index from the source transcript,
|
|
14
|
+
even if messages are added, removed, or reordered.
|
|
15
|
+
|
|
16
|
+
Use this class when you need to customize which parts of a transcript are visible
|
|
17
|
+
to an LLM while ensuring citations remain valid.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
id_to_original_index: dict[str, int]
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_transcript(cls, transcript: Transcript) -> "FormattedTranscript":
|
|
24
|
+
"""Create a FormattedTranscript from a regular Transcript."""
|
|
25
|
+
# Ensure all messages have IDs and build id_to_original_index
|
|
26
|
+
id_to_original_index: dict[str, int] = {}
|
|
27
|
+
for idx, msg in enumerate(transcript.messages):
|
|
28
|
+
if msg.id is None:
|
|
29
|
+
msg.id = str(uuid4())
|
|
30
|
+
id_to_original_index[msg.id] = idx
|
|
31
|
+
|
|
32
|
+
return cls(
|
|
33
|
+
id=transcript.id,
|
|
34
|
+
name=transcript.name,
|
|
35
|
+
description=transcript.description,
|
|
36
|
+
transcript_group_id=transcript.transcript_group_id,
|
|
37
|
+
created_at=transcript.created_at,
|
|
38
|
+
messages=transcript.messages,
|
|
39
|
+
metadata=transcript.metadata,
|
|
40
|
+
id_to_original_index=id_to_original_index,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
@model_validator(mode="after")
|
|
44
|
+
def _validate_id_to_original_index(self) -> "FormattedTranscript":
|
|
45
|
+
"""Ensure id_to_original_index covers all messages."""
|
|
46
|
+
for msg in self.messages:
|
|
47
|
+
if msg.id not in self.id_to_original_index:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Message {msg.id} missing from id_to_original_index. "
|
|
50
|
+
"Use FormattedTranscript.from_transcript() to create a new instance."
|
|
51
|
+
)
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def _enumerate_messages(self):
|
|
55
|
+
"""Yield (original index, message) for each message."""
|
|
56
|
+
for message in self.messages:
|
|
57
|
+
assert message.id is not None
|
|
58
|
+
original_idx = self.id_to_original_index[message.id]
|
|
59
|
+
yield (original_idx, message)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FormattedAgentRun(AgentRun):
|
|
63
|
+
"""An AgentRun that allows customization while tracking original identifiers.
|
|
64
|
+
|
|
65
|
+
This class extends AgentRun to support modifications to what an LLM sees
|
|
66
|
+
while maintaining accurate citations back to the original agent run.
|
|
67
|
+
|
|
68
|
+
Use this class when you need to customize which parts of an agent run are visible
|
|
69
|
+
to an LLM (e.g., hiding metadata, truncating long outputs).
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
transcripts: list[FormattedTranscript] = Field(default_factory=list) # type: ignore[assignment]
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_agent_run(cls, agent_run: AgentRun) -> "FormattedAgentRun":
|
|
76
|
+
"""Create a FormattedAgentRun from a regular AgentRun."""
|
|
77
|
+
return cls(
|
|
78
|
+
id=agent_run.id,
|
|
79
|
+
name=agent_run.name,
|
|
80
|
+
description=agent_run.description,
|
|
81
|
+
transcripts=[FormattedTranscript.from_transcript(t) for t in agent_run.transcripts],
|
|
82
|
+
transcript_groups=agent_run.transcript_groups,
|
|
83
|
+
metadata=agent_run.metadata,
|
|
84
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Judge-related data models shared across Docent components."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Label(BaseModel):
|
|
10
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
11
|
+
|
|
12
|
+
label_set_id: str
|
|
13
|
+
label_value: dict[str, Any]
|
|
14
|
+
agent_run_id: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = ["Label"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic_core import to_jsonable_python
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def dump_metadata(metadata: dict[str, Any]) -> str | None:
|
|
8
|
+
"""
|
|
9
|
+
Dump metadata to a JSON string.
|
|
10
|
+
We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
|
|
11
|
+
"""
|
|
12
|
+
if not metadata:
|
|
13
|
+
return None
|
|
14
|
+
metadata_obj = to_jsonable_python(metadata)
|
|
15
|
+
text = json.dumps(metadata_obj, indent=2)
|
|
16
|
+
return text.strip()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from docent._log_util import get_logger
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RegexSnippet(BaseModel):
|
|
11
|
+
snippet: str
|
|
12
|
+
match_start: int
|
|
13
|
+
match_end: int
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_regex_snippets(text: str, pattern: str, window_size: int = 50) -> list[RegexSnippet]:
|
|
17
|
+
"""Extracts snippets from text that match a regex pattern, with surrounding context.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
text: The text to search in.
|
|
21
|
+
pattern: The regex pattern to match.
|
|
22
|
+
window_size: The number of characters to include before and after the match.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
A list of RegexSnippet objects containing the snippets and match positions.
|
|
26
|
+
"""
|
|
27
|
+
# Find all matches
|
|
28
|
+
try:
|
|
29
|
+
matches = list(re.compile(pattern, re.IGNORECASE | re.DOTALL).finditer(text))
|
|
30
|
+
if not matches:
|
|
31
|
+
logger.warning(f"No regex matches found for {pattern}: this shouldn't happen!")
|
|
32
|
+
|
|
33
|
+
if not matches:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
snippets: list[RegexSnippet] = []
|
|
37
|
+
for match in matches:
|
|
38
|
+
start, end = match.span()
|
|
39
|
+
|
|
40
|
+
# Calculate window around the match
|
|
41
|
+
snippet_start = max(0, start - window_size)
|
|
42
|
+
snippet_end = min(len(text), end + window_size)
|
|
43
|
+
|
|
44
|
+
# Create the snippet with the match indices adjusted for the window
|
|
45
|
+
snippets.append(
|
|
46
|
+
RegexSnippet(
|
|
47
|
+
snippet=text[snippet_start:snippet_end],
|
|
48
|
+
match_start=start - snippet_start,
|
|
49
|
+
match_end=end - snippet_start,
|
|
50
|
+
)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return snippets
|
|
54
|
+
except re.error as e:
|
|
55
|
+
logger.error(f"Got regex error: {e}")
|
|
56
|
+
return []
|