docent-python 0.1.16a0__py3-none-any.whl → 0.1.17a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/data_models/agent_run.py +11 -24
- docent/data_models/chat/message.py +3 -1
- docent/data_models/transcript.py +8 -67
- {docent_python-0.1.16a0.dist-info → docent_python-0.1.17a0.dist-info}/METADATA +1 -1
- {docent_python-0.1.16a0.dist-info → docent_python-0.1.17a0.dist-info}/RECORD +7 -7
- {docent_python-0.1.16a0.dist-info → docent_python-0.1.17a0.dist-info}/WHEEL +0 -0
- {docent_python-0.1.16a0.dist-info → docent_python-0.1.17a0.dist-info}/licenses/LICENSE.md +0 -0
docent/data_models/agent_run.py
CHANGED
|
@@ -147,18 +147,12 @@ class AgentRun(BaseModel):
|
|
|
147
147
|
# Generate transcript strings using appropriate method
|
|
148
148
|
transcript_strs: list[str] = []
|
|
149
149
|
for i, t in enumerate(self.transcripts):
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
else:
|
|
157
|
-
transcript_content = t.to_str_with_token_limit(
|
|
158
|
-
token_limit=sys.maxsize,
|
|
159
|
-
transcript_idx=i,
|
|
160
|
-
agent_run_idx=None,
|
|
161
|
-
)[0]
|
|
150
|
+
transcript_content = t.to_str(
|
|
151
|
+
token_limit=sys.maxsize,
|
|
152
|
+
transcript_idx=i,
|
|
153
|
+
agent_run_idx=None,
|
|
154
|
+
use_action_units=not use_blocks,
|
|
155
|
+
)[0]
|
|
162
156
|
transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
|
|
163
157
|
|
|
164
158
|
transcripts_str = "\n\n".join(transcript_strs)
|
|
@@ -207,23 +201,16 @@ class AgentRun(BaseModel):
|
|
|
207
201
|
), "Ranges without metadata should be a single message"
|
|
208
202
|
t = self.transcripts[msg_range.start]
|
|
209
203
|
if msg_range.num_tokens < token_limit - 50:
|
|
210
|
-
|
|
211
|
-
transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
|
|
212
|
-
else:
|
|
213
|
-
transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
|
|
204
|
+
transcript = f"<transcript>\n{t.to_str(token_limit=sys.maxsize, use_action_units=not use_blocks)[0]}\n</transcript>"
|
|
214
205
|
result = (
|
|
215
206
|
f"Here is a partial agent run for analysis purposes only:\n{transcript}"
|
|
216
207
|
)
|
|
217
208
|
results.append(result)
|
|
218
209
|
else:
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
else:
|
|
224
|
-
transcript_fragments = t.to_str_with_token_limit(
|
|
225
|
-
token_limit=token_limit - 50,
|
|
226
|
-
)
|
|
210
|
+
transcript_fragments = t.to_str(
|
|
211
|
+
token_limit=token_limit - 50,
|
|
212
|
+
use_action_units=not use_blocks,
|
|
213
|
+
)
|
|
227
214
|
for fragment in transcript_fragments:
|
|
228
215
|
result = f"<transcript>\n{fragment}\n</transcript>"
|
|
229
216
|
result = (
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from logging import getLogger
|
|
2
2
|
from typing import Annotated, Any, Literal
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel, Discriminator
|
|
4
|
+
from pydantic import BaseModel, Discriminator, Field
|
|
5
5
|
|
|
6
6
|
from docent.data_models.chat.content import Content
|
|
7
7
|
from docent.data_models.chat.tool import ToolCall
|
|
@@ -17,11 +17,13 @@ class BaseChatMessage(BaseModel):
|
|
|
17
17
|
id: Optional unique identifier for the message.
|
|
18
18
|
content: The message content, either as a string or list of Content objects.
|
|
19
19
|
role: The role of the message sender (system, user, assistant, tool).
|
|
20
|
+
metadata: Additional structured metadata about the message.
|
|
20
21
|
"""
|
|
21
22
|
|
|
22
23
|
id: str | None = None
|
|
23
24
|
content: str | list[Content]
|
|
24
25
|
role: Literal["system", "user", "assistant", "tool"]
|
|
26
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
25
27
|
|
|
26
28
|
@property
|
|
27
29
|
def text(self) -> str:
|
docent/data_models/transcript.py
CHANGED
|
@@ -72,6 +72,11 @@ def format_chat_message(
|
|
|
72
72
|
args = ", ".join([f"{k}={v}" for k, v in tool_call.arguments.items()])
|
|
73
73
|
cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
|
|
74
74
|
|
|
75
|
+
if message.metadata:
|
|
76
|
+
metadata_yaml = yaml_dump_metadata(message.metadata)
|
|
77
|
+
if metadata_yaml is not None:
|
|
78
|
+
cur_content += f"\n<|message metadata|>\n{metadata_yaml}\n</|message metadata|>"
|
|
79
|
+
|
|
75
80
|
return TRANSCRIPT_BLOCK_TEMPLATE.format(
|
|
76
81
|
index_label=index_label, role=message.role, content=cur_content
|
|
77
82
|
)
|
|
@@ -301,20 +306,6 @@ class Transcript(BaseModel):
|
|
|
301
306
|
self.messages = messages
|
|
302
307
|
self._units_of_action = self._compute_units_of_action()
|
|
303
308
|
|
|
304
|
-
def to_str(
|
|
305
|
-
self,
|
|
306
|
-
transcript_idx: int = 0,
|
|
307
|
-
agent_run_idx: int | None = None,
|
|
308
|
-
highlight_action_unit: int | None = None,
|
|
309
|
-
) -> str:
|
|
310
|
-
return self._to_str_with_token_limit_impl(
|
|
311
|
-
token_limit=sys.maxsize,
|
|
312
|
-
transcript_idx=transcript_idx,
|
|
313
|
-
agent_run_idx=agent_run_idx,
|
|
314
|
-
use_action_units=True,
|
|
315
|
-
highlight_action_unit=highlight_action_unit,
|
|
316
|
-
)[0]
|
|
317
|
-
|
|
318
309
|
def _generate_formatted_blocks(
|
|
319
310
|
self,
|
|
320
311
|
transcript_idx: int = 0,
|
|
@@ -379,9 +370,9 @@ class Transcript(BaseModel):
|
|
|
379
370
|
|
|
380
371
|
return blocks
|
|
381
372
|
|
|
382
|
-
def
|
|
373
|
+
def to_str(
|
|
383
374
|
self,
|
|
384
|
-
token_limit: int,
|
|
375
|
+
token_limit: int = sys.maxsize,
|
|
385
376
|
transcript_idx: int = 0,
|
|
386
377
|
agent_run_idx: int | None = None,
|
|
387
378
|
use_action_units: bool = True,
|
|
@@ -408,7 +399,7 @@ class Transcript(BaseModel):
|
|
|
408
399
|
metadata_obj = to_jsonable_python(self.metadata)
|
|
409
400
|
yaml_width = float("inf")
|
|
410
401
|
block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
|
|
411
|
-
metadata_str = f"
|
|
402
|
+
metadata_str = f"<|transcript metadata|>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</|transcript metadata|>"
|
|
412
403
|
|
|
413
404
|
if token_limit == sys.maxsize:
|
|
414
405
|
return [f"{block_str}" f"{metadata_str}"]
|
|
@@ -439,56 +430,6 @@ class Transcript(BaseModel):
|
|
|
439
430
|
|
|
440
431
|
return results
|
|
441
432
|
|
|
442
|
-
def to_str_blocks(
|
|
443
|
-
self,
|
|
444
|
-
transcript_idx: int = 0,
|
|
445
|
-
agent_run_idx: int | None = None,
|
|
446
|
-
) -> str:
|
|
447
|
-
"""Represents the transcript as a string using individual message blocks.
|
|
448
|
-
|
|
449
|
-
Unlike to_str() which groups messages into action units, this method
|
|
450
|
-
formats each message as an individual block.
|
|
451
|
-
|
|
452
|
-
Returns:
|
|
453
|
-
str: A string representation with individual message blocks.
|
|
454
|
-
"""
|
|
455
|
-
return self._to_str_with_token_limit_impl(
|
|
456
|
-
token_limit=sys.maxsize,
|
|
457
|
-
transcript_idx=transcript_idx,
|
|
458
|
-
agent_run_idx=agent_run_idx,
|
|
459
|
-
use_action_units=False,
|
|
460
|
-
)[0]
|
|
461
|
-
|
|
462
|
-
def to_str_with_token_limit(
|
|
463
|
-
self,
|
|
464
|
-
token_limit: int,
|
|
465
|
-
transcript_idx: int = 0,
|
|
466
|
-
agent_run_idx: int | None = None,
|
|
467
|
-
highlight_action_unit: int | None = None,
|
|
468
|
-
) -> list[str]:
|
|
469
|
-
"""Represents the transcript as a list of strings using action units with token limit handling."""
|
|
470
|
-
return self._to_str_with_token_limit_impl(
|
|
471
|
-
token_limit=token_limit,
|
|
472
|
-
transcript_idx=transcript_idx,
|
|
473
|
-
agent_run_idx=agent_run_idx,
|
|
474
|
-
use_action_units=True,
|
|
475
|
-
highlight_action_unit=highlight_action_unit,
|
|
476
|
-
)
|
|
477
|
-
|
|
478
|
-
def to_str_blocks_with_token_limit(
|
|
479
|
-
self,
|
|
480
|
-
token_limit: int,
|
|
481
|
-
transcript_idx: int = 0,
|
|
482
|
-
agent_run_idx: int | None = None,
|
|
483
|
-
) -> list[str]:
|
|
484
|
-
"""Represents the transcript as individual blocks with token limit handling."""
|
|
485
|
-
return self._to_str_with_token_limit_impl(
|
|
486
|
-
token_limit=token_limit,
|
|
487
|
-
transcript_idx=transcript_idx,
|
|
488
|
-
agent_run_idx=agent_run_idx,
|
|
489
|
-
use_action_units=False,
|
|
490
|
-
)
|
|
491
|
-
|
|
492
433
|
##############################
|
|
493
434
|
# New text rendering methods #
|
|
494
435
|
##############################
|
|
@@ -6,17 +6,17 @@ docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,
|
|
|
6
6
|
docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
|
|
7
7
|
docent/data_models/__init__.py,sha256=4JbTDVzRhS5VZgo8MALwd_YI17GaN7X9E3rOc4Xl7kw,327
|
|
8
8
|
docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
|
|
9
|
-
docent/data_models/agent_run.py,sha256=
|
|
9
|
+
docent/data_models/agent_run.py,sha256=F5oGsKVpWc-IUCX3QxChkgw6UR30MGTlP4YS2KeaUkQ,19315
|
|
10
10
|
docent/data_models/citation.py,sha256=zpF9WuvVEfktltw1M9P3hwpg5yywizFUKF5zROBR2cY,5062
|
|
11
11
|
docent/data_models/metadata.py,sha256=r0SYC4i2x096dXMLfw_rAMtcJQCsoV6EOMPZuEngbGA,9062
|
|
12
12
|
docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
|
|
13
13
|
docent/data_models/remove_invalid_citation_ranges.py,sha256=U-aIzRL-SuWFQZr1MqEGqXMNyIKQs7VQLxHDoFrMJwI,5658
|
|
14
14
|
docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
|
|
15
|
-
docent/data_models/transcript.py,sha256=
|
|
15
|
+
docent/data_models/transcript.py,sha256=eT48m8rZ3STGdElH-B1aOlCuwdaCc673GNVdQTBgAt0,19429
|
|
16
16
|
docent/data_models/yaml_util.py,sha256=6GrPWqbTZrryZh71cnSsiqbHkWVCd-8V3-6GeiEchUg,325
|
|
17
17
|
docent/data_models/chat/__init__.py,sha256=GleyRzYqKRkwwSRm_tQJw5BudCbgu9WRSa71Fntz0L0,610
|
|
18
18
|
docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
|
|
19
|
-
docent/data_models/chat/message.py,sha256=
|
|
19
|
+
docent/data_models/chat/message.py,sha256=_72xeTdgv8ogQd4WLl1P3yXfIDkIEQrHlWgdvObeQxY,4291
|
|
20
20
|
docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
|
|
21
21
|
docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
|
|
22
22
|
docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
|
|
@@ -26,7 +26,7 @@ docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5
|
|
|
26
26
|
docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
docent/sdk/agent_run_writer.py,sha256=0X5IP6wegoDsPiN_mblEvJa4sNWNxjPLbfNuYyA-qgY,9345
|
|
28
28
|
docent/sdk/client.py,sha256=fuJrTF87OtUojULFY7acZuqg5xmE8F-4HgEeEV8_gq0,14781
|
|
29
|
-
docent_python-0.1.
|
|
30
|
-
docent_python-0.1.
|
|
31
|
-
docent_python-0.1.
|
|
32
|
-
docent_python-0.1.
|
|
29
|
+
docent_python-0.1.17a0.dist-info/METADATA,sha256=D4flqV8wDdL0j8KMrVJbEezjQmdLhPGcCyZL7zlnMyA,1110
|
|
30
|
+
docent_python-0.1.17a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
31
|
+
docent_python-0.1.17a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
|
|
32
|
+
docent_python-0.1.17a0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|