docent-python 0.1.15a0__tar.gz → 0.1.17a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (34) hide show
  1. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/agent_run.py +11 -24
  3. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/chat/message.py +3 -1
  4. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/transcript.py +8 -67
  5. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/sdk/agent_run_writer.py +10 -2
  6. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/pyproject.toml +1 -1
  7. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/.gitignore +0 -0
  8. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/LICENSE.md +0 -0
  9. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/README.md +0 -0
  10. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/__init__.py +0 -0
  11. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/_log_util/__init__.py +0 -0
  12. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/_log_util/logger.py +0 -0
  13. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/__init__.py +0 -0
  14. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/_tiktoken_util.py +0 -0
  15. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/chat/__init__.py +0 -0
  16. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/chat/content.py +0 -0
  17. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/chat/tool.py +0 -0
  18. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/citation.py +0 -0
  19. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/metadata.py +0 -0
  20. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/regex.py +0 -0
  21. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/remove_invalid_citation_ranges.py +0 -0
  22. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/shared_types.py +0 -0
  23. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/yaml_util.py +0 -0
  24. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/loaders/load_inspect.py +0 -0
  25. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/py.typed +0 -0
  26. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/samples/__init__.py +0 -0
  27. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/samples/load.py +0 -0
  28. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/samples/log.eval +0 -0
  29. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/samples/tb_airline.json +0 -0
  30. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/sdk/__init__.py +0 -0
  31. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/sdk/client.py +0 -0
  32. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/trace.py +0 -0
  33. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/trace_temp.py +0 -0
  34. {docent_python-0.1.15a0 → docent_python-0.1.17a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.15a0
3
+ Version: 0.1.17a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -147,18 +147,12 @@ class AgentRun(BaseModel):
147
147
  # Generate transcript strings using appropriate method
148
148
  transcript_strs: list[str] = []
149
149
  for i, t in enumerate(self.transcripts):
150
- if use_blocks:
151
- transcript_content = t.to_str_blocks_with_token_limit(
152
- token_limit=sys.maxsize,
153
- transcript_idx=i,
154
- agent_run_idx=None,
155
- )[0]
156
- else:
157
- transcript_content = t.to_str_with_token_limit(
158
- token_limit=sys.maxsize,
159
- transcript_idx=i,
160
- agent_run_idx=None,
161
- )[0]
150
+ transcript_content = t.to_str(
151
+ token_limit=sys.maxsize,
152
+ transcript_idx=i,
153
+ agent_run_idx=None,
154
+ use_action_units=not use_blocks,
155
+ )[0]
162
156
  transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
163
157
 
164
158
  transcripts_str = "\n\n".join(transcript_strs)
@@ -207,23 +201,16 @@ class AgentRun(BaseModel):
207
201
  ), "Ranges without metadata should be a single message"
208
202
  t = self.transcripts[msg_range.start]
209
203
  if msg_range.num_tokens < token_limit - 50:
210
- if use_blocks:
211
- transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
212
- else:
213
- transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
204
+ transcript = f"<transcript>\n{t.to_str(token_limit=sys.maxsize, use_action_units=not use_blocks)[0]}\n</transcript>"
214
205
  result = (
215
206
  f"Here is a partial agent run for analysis purposes only:\n{transcript}"
216
207
  )
217
208
  results.append(result)
218
209
  else:
219
- if use_blocks:
220
- transcript_fragments = t.to_str_blocks_with_token_limit(
221
- token_limit=token_limit - 50,
222
- )
223
- else:
224
- transcript_fragments = t.to_str_with_token_limit(
225
- token_limit=token_limit - 50,
226
- )
210
+ transcript_fragments = t.to_str(
211
+ token_limit=token_limit - 50,
212
+ use_action_units=not use_blocks,
213
+ )
227
214
  for fragment in transcript_fragments:
228
215
  result = f"<transcript>\n{fragment}\n</transcript>"
229
216
  result = (
@@ -1,7 +1,7 @@
1
1
  from logging import getLogger
2
2
  from typing import Annotated, Any, Literal
3
3
 
4
- from pydantic import BaseModel, Discriminator
4
+ from pydantic import BaseModel, Discriminator, Field
5
5
 
6
6
  from docent.data_models.chat.content import Content
7
7
  from docent.data_models.chat.tool import ToolCall
@@ -17,11 +17,13 @@ class BaseChatMessage(BaseModel):
17
17
  id: Optional unique identifier for the message.
18
18
  content: The message content, either as a string or list of Content objects.
19
19
  role: The role of the message sender (system, user, assistant, tool).
20
+ metadata: Additional structured metadata about the message.
20
21
  """
21
22
 
22
23
  id: str | None = None
23
24
  content: str | list[Content]
24
25
  role: Literal["system", "user", "assistant", "tool"]
26
+ metadata: dict[str, Any] = Field(default_factory=dict)
25
27
 
26
28
  @property
27
29
  def text(self) -> str:
@@ -72,6 +72,11 @@ def format_chat_message(
72
72
  args = ", ".join([f"{k}={v}" for k, v in tool_call.arguments.items()])
73
73
  cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
74
74
 
75
+ if message.metadata:
76
+ metadata_yaml = yaml_dump_metadata(message.metadata)
77
+ if metadata_yaml is not None:
78
+ cur_content += f"\n<|message metadata|>\n{metadata_yaml}\n</|message metadata|>"
79
+
75
80
  return TRANSCRIPT_BLOCK_TEMPLATE.format(
76
81
  index_label=index_label, role=message.role, content=cur_content
77
82
  )
@@ -301,20 +306,6 @@ class Transcript(BaseModel):
301
306
  self.messages = messages
302
307
  self._units_of_action = self._compute_units_of_action()
303
308
 
304
- def to_str(
305
- self,
306
- transcript_idx: int = 0,
307
- agent_run_idx: int | None = None,
308
- highlight_action_unit: int | None = None,
309
- ) -> str:
310
- return self._to_str_with_token_limit_impl(
311
- token_limit=sys.maxsize,
312
- transcript_idx=transcript_idx,
313
- agent_run_idx=agent_run_idx,
314
- use_action_units=True,
315
- highlight_action_unit=highlight_action_unit,
316
- )[0]
317
-
318
309
  def _generate_formatted_blocks(
319
310
  self,
320
311
  transcript_idx: int = 0,
@@ -379,9 +370,9 @@ class Transcript(BaseModel):
379
370
 
380
371
  return blocks
381
372
 
382
- def _to_str_with_token_limit_impl(
373
+ def to_str(
383
374
  self,
384
- token_limit: int,
375
+ token_limit: int = sys.maxsize,
385
376
  transcript_idx: int = 0,
386
377
  agent_run_idx: int | None = None,
387
378
  use_action_units: bool = True,
@@ -408,7 +399,7 @@ class Transcript(BaseModel):
408
399
  metadata_obj = to_jsonable_python(self.metadata)
409
400
  yaml_width = float("inf")
410
401
  block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
411
- metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
402
+ metadata_str = f"<|transcript metadata|>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</|transcript metadata|>"
412
403
 
413
404
  if token_limit == sys.maxsize:
414
405
  return [f"{block_str}" f"{metadata_str}"]
@@ -439,56 +430,6 @@ class Transcript(BaseModel):
439
430
 
440
431
  return results
441
432
 
442
- def to_str_blocks(
443
- self,
444
- transcript_idx: int = 0,
445
- agent_run_idx: int | None = None,
446
- ) -> str:
447
- """Represents the transcript as a string using individual message blocks.
448
-
449
- Unlike to_str() which groups messages into action units, this method
450
- formats each message as an individual block.
451
-
452
- Returns:
453
- str: A string representation with individual message blocks.
454
- """
455
- return self._to_str_with_token_limit_impl(
456
- token_limit=sys.maxsize,
457
- transcript_idx=transcript_idx,
458
- agent_run_idx=agent_run_idx,
459
- use_action_units=False,
460
- )[0]
461
-
462
- def to_str_with_token_limit(
463
- self,
464
- token_limit: int,
465
- transcript_idx: int = 0,
466
- agent_run_idx: int | None = None,
467
- highlight_action_unit: int | None = None,
468
- ) -> list[str]:
469
- """Represents the transcript as a list of strings using action units with token limit handling."""
470
- return self._to_str_with_token_limit_impl(
471
- token_limit=token_limit,
472
- transcript_idx=transcript_idx,
473
- agent_run_idx=agent_run_idx,
474
- use_action_units=True,
475
- highlight_action_unit=highlight_action_unit,
476
- )
477
-
478
- def to_str_blocks_with_token_limit(
479
- self,
480
- token_limit: int,
481
- transcript_idx: int = 0,
482
- agent_run_idx: int | None = None,
483
- ) -> list[str]:
484
- """Represents the transcript as individual blocks with token limit handling."""
485
- return self._to_str_with_token_limit_impl(
486
- token_limit=token_limit,
487
- transcript_idx=transcript_idx,
488
- agent_run_idx=agent_run_idx,
489
- use_action_units=False,
490
- )
491
-
492
433
  ##############################
493
434
  # New text rendering methods #
494
435
  ##############################
@@ -105,9 +105,17 @@ class AgentRunWriter:
105
105
  # Register shutdown hooks
106
106
  atexit.register(self.finish)
107
107
 
108
+ def _handle_sigint(s: int, f: object) -> None:
109
+ self._shutdown()
110
+ raise KeyboardInterrupt
111
+
112
+ def _handle_sigterm(s: int, f: object) -> None:
113
+ self._shutdown()
114
+ raise SystemExit(0)
115
+
108
116
  # Register signal handlers for graceful shutdown
109
- signal.signal(signal.SIGINT, lambda s, f: self._shutdown()) # Ctrl+C
110
- signal.signal(signal.SIGTERM, lambda s, f: self._shutdown()) # Kill signal
117
+ signal.signal(signal.SIGINT, _handle_sigint) # Ctrl+C
118
+ signal.signal(signal.SIGTERM, _handle_sigterm) # Kill signal
111
119
 
112
120
  def log_agent_runs(self, agent_runs: list[AgentRun]) -> None:
113
121
  """Put a list of AgentRun objects into the queue.
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.15-alpha"
4
+ version = "0.1.17-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]