docent-python 0.1.49a0__tar.gz → 0.1.51a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/chat/content.py +6 -0
  3. docent_python-0.1.51a0/docent/data_models/feedback.py +369 -0
  4. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/transcript.py +1 -1
  5. docent_python-0.1.51a0/docent/judges/util/voting.py +351 -0
  6. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/mcp/server.py +1 -1
  7. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/sdk/client.py +527 -79
  8. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/trace.py +43 -0
  9. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/pyproject.toml +1 -1
  10. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/uv.lock +64 -67
  11. docent_python-0.1.49a0/docent/judges/util/voting.py +0 -140
  12. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/.gitignore +0 -0
  13. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/LICENSE.md +0 -0
  14. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/README.md +0 -0
  15. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/__init__.py +0 -0
  16. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/__init__.py +0 -0
  17. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/data_models/__init__.py +0 -0
  18. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  19. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  20. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/llm_cache.py +0 -0
  21. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/llm_svc.py +0 -0
  22. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/model_registry.py +0 -0
  23. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/__init__.py +0 -0
  24. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/anthropic.py +0 -0
  25. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/common.py +0 -0
  26. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/google.py +0 -0
  27. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/openai.py +0 -0
  28. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/openrouter.py +0 -0
  29. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/preference_types.py +0 -0
  30. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  31. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_log_util/__init__.py +0 -0
  32. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/_log_util/logger.py +0 -0
  33. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/__init__.py +0 -0
  34. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/_tiktoken_util.py +0 -0
  35. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/agent_run.py +0 -0
  36. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/chat/__init__.py +0 -0
  37. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/chat/message.py +0 -0
  38. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/chat/response_format.py +0 -0
  39. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/chat/tool.py +0 -0
  40. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/citation.py +0 -0
  41. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/formatted_objects.py +0 -0
  42. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/judge.py +0 -0
  43. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/metadata_util.py +0 -0
  44. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/regex.py +0 -0
  45. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/data_models/util.py +0 -0
  46. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/__init__.py +0 -0
  47. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/analysis.py +0 -0
  48. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/impl.py +0 -0
  49. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/runner.py +0 -0
  50. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/stats.py +0 -0
  51. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/types.py +0 -0
  52. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/util/forgiving_json.py +0 -0
  53. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/util/meta_schema.json +0 -0
  54. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/util/meta_schema.py +0 -0
  55. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/util/parse_output.py +0 -0
  56. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/judges/util/template_formatter.py +0 -0
  57. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/loaders/load_inspect.py +0 -0
  58. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/mcp/__init__.py +0 -0
  59. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/mcp/__main__.py +0 -0
  60. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/py.typed +0 -0
  61. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/samples/__init__.py +0 -0
  62. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/samples/load.py +0 -0
  63. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/samples/log.eval +0 -0
  64. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/samples/tb_airline.json +0 -0
  65. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/sdk/__init__.py +0 -0
  66. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/sdk/agent_run_writer.py +0 -0
  67. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/sdk/llm_context.py +0 -0
  68. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/sdk/llm_request.py +0 -0
  69. {docent_python-0.1.49a0 → docent_python-0.1.51a0}/docent/trace_temp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.49a0
3
+ Version: 0.1.51a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -39,15 +39,21 @@ class ContentReasoning(BaseContent):
39
39
  Attributes:
40
40
  type: Fixed as "reasoning" to identify this content type.
41
41
  reasoning: The actual reasoning text.
42
+ summary: Optional human-readable reasoning summary.
42
43
  signature: Optional signature associated with the reasoning.
43
44
  redacted: Flag indicating if the reasoning has been redacted.
44
45
  """
45
46
 
46
47
  type: Literal["reasoning"] = "reasoning" # type: ignore
47
48
  reasoning: str
49
+ summary: str | None = None
48
50
  signature: str | None = None
49
51
  redacted: bool = False
50
52
 
53
+ @property
54
+ def display_reasoning(self) -> str:
55
+ return self.summary if self.redacted and self.summary else self.reasoning
56
+
51
57
 
52
58
  # Content type discriminated union
53
59
  Content = Annotated[ContentText | ContentReasoning, Discriminator("type")]
@@ -0,0 +1,369 @@
1
+ """Data structures for run-centric feedback elicitation and user context inference."""
2
+
3
+ import json
4
+ from collections.abc import Iterator
5
+ from datetime import datetime
6
+ from typing import Any, Literal
7
+
8
+ from pydantic import BaseModel, Field, model_validator
9
+
10
+ from docent.data_models.citation import InlineCitation
11
+ from docent.judges.util.voting import OutputDistribution
12
+
13
+
14
+ def _stable_json(value: Any) -> str:
15
+ return json.dumps(value, sort_keys=True)
16
+
17
+
18
+ def _indent_lines(lines: list[str], indent: int) -> list[str]:
19
+ prefix = " " * max(0, indent)
20
+ return [f"{prefix}{line}" if line else "" for line in lines]
21
+
22
+
23
+ def _tag_block(tag: str, body_lines: list[str], indent: int) -> list[str]:
24
+ lines = [f"<{tag}>"]
25
+ if body_lines:
26
+ lines.extend(_indent_lines(body_lines, indent))
27
+ else:
28
+ lines.extend(_indent_lines(["N/A"], indent))
29
+ lines.append(f"</{tag}>")
30
+ return lines
31
+
32
+
33
+ def _text_or_na(text: str | None) -> str:
34
+ if text is None:
35
+ return "N/A"
36
+ stripped = text.strip()
37
+ return stripped if stripped else "N/A"
38
+
39
+
40
+ def _text_lines_or_na(text: str | None) -> list[str]:
41
+ return _text_or_na(text).splitlines()
42
+
43
+
44
+ def _render_citations_block(citations: list[InlineCitation], indent: int) -> list[str]:
45
+ citation_payload = [citation.model_dump(mode="json") for citation in citations]
46
+ citation_text = _stable_json(citation_payload) if citation_payload else "N/A"
47
+ return _tag_block("Citations", [citation_text], indent)
48
+
49
+
50
+ def _render_user_distribution_block(
51
+ user_distribution: OutputDistribution | None,
52
+ indent: int,
53
+ ) -> list[str]:
54
+ distribution_text = (
55
+ _stable_json(user_distribution.model_dump(mode="json"))
56
+ if user_distribution is not None
57
+ else "N/A"
58
+ )
59
+ return _tag_block("Estimated user distribution p_u", [distribution_text], indent)
60
+
61
+
62
+ def _render_user_distribution_reasoning_block(
63
+ reasoning: str | None,
64
+ reasoning_citations: list[InlineCitation] | None,
65
+ indent: int,
66
+ ) -> list[str]:
67
+ body_lines = _text_lines_or_na(reasoning)
68
+ body_lines.extend(_render_citations_block(reasoning_citations or [], indent))
69
+ return _tag_block("p_u reasoning", body_lines, indent)
70
+
71
+
72
+ class LabelingRequestFocusItem(BaseModel):
73
+ """Specific rubric-related question the human labeler should inspect."""
74
+
75
+ question: str
76
+ citations: list[InlineCitation] = Field(default_factory=list[InlineCitation])
77
+ sample_answers: list[str] = Field(default_factory=list[str])
78
+
79
+ def to_str(self, indent: int = 0) -> str:
80
+ """Render focus item in a deterministic LLM-facing format."""
81
+ lines: list[str] = []
82
+
83
+ # Render the question and its citations as one nested block.
84
+ question_lines = _text_lines_or_na(self.question)
85
+ question_lines.extend(_render_citations_block(self.citations, indent))
86
+ lines.extend(_tag_block("Question", question_lines, indent))
87
+
88
+ sample_answers_lines = (
89
+ [
90
+ f"Answer {sample_idx}: {sample_answer}"
91
+ for sample_idx, sample_answer in enumerate(self.sample_answers, start=1)
92
+ ]
93
+ if self.sample_answers
94
+ else ["N/A"]
95
+ )
96
+ lines.extend(_tag_block("Sample Answers", sample_answers_lines, indent))
97
+ return "\n".join(lines)
98
+
99
+
100
+ class QAPair(BaseModel):
101
+ """A single review-focus answer captured for one run."""
102
+
103
+ # What the user was shown
104
+ focus_item: LabelingRequestFocusItem
105
+
106
+ # Whether the user selected a sample answer or not
107
+ selected_sample_index: int | None = None
108
+ is_custom_response: bool = False
109
+
110
+ # What the user responded
111
+ answer: str
112
+ explanation: str | None = None
113
+
114
+ # The user could have skipped this question and provided nothing
115
+ status: Literal["answered", "skipped"]
116
+ timestamp: datetime = Field(default_factory=datetime.now)
117
+
118
+ def to_str(self, indent: int = 0) -> str:
119
+ """Render QA pair in a deterministic LLM-facing format."""
120
+ lines = self.focus_item.to_str(indent=indent).splitlines()
121
+ lines.append(f"User answer: {_text_or_na(self.answer)}")
122
+ lines.append(f"User explanation: {_text_or_na(self.explanation)}")
123
+ return "\n".join(lines)
124
+
125
+
126
+ class LabelingRequest(BaseModel):
127
+ """Structured labeling request shown to the user."""
128
+
129
+ title: str
130
+ review_context: str
131
+ review_context_citations: list[InlineCitation] = Field(default_factory=list[InlineCitation])
132
+ review_focus: list[LabelingRequestFocusItem] = Field(
133
+ default_factory=list[LabelingRequestFocusItem]
134
+ )
135
+ user_distribution: OutputDistribution | None = None
136
+ user_distribution_reasoning: str | None = None
137
+
138
+ def to_str(self, indent: int = 0) -> str:
139
+ """Render labeling request in a deterministic LLM-facing format."""
140
+ body_lines: list[str] = [f"Title: {_text_or_na(self.title)}"]
141
+
142
+ review_context_lines = _text_lines_or_na(self.review_context)
143
+ review_context_lines.extend(_render_citations_block(self.review_context_citations, indent))
144
+ body_lines.extend(_tag_block("Review Context", review_context_lines, indent))
145
+
146
+ review_focus_lines: list[str] = []
147
+ if self.review_focus:
148
+ for focus_idx, focus_item in enumerate(self.review_focus, start=1):
149
+ focus_lines = focus_item.to_str(indent=indent).splitlines()
150
+ review_focus_lines.extend(_tag_block(f"Focus {focus_idx}", focus_lines, indent))
151
+ else:
152
+ review_focus_lines.append("N/A")
153
+ body_lines.extend(_tag_block("Review Focus", review_focus_lines, indent))
154
+
155
+ body_lines.extend(_render_user_distribution_block(self.user_distribution, indent))
156
+ body_lines.extend(
157
+ _render_user_distribution_reasoning_block(
158
+ self.user_distribution_reasoning,
159
+ reasoning_citations=None,
160
+ indent=indent,
161
+ )
162
+ )
163
+
164
+ lines = _tag_block("Labeling Request", body_lines, indent)
165
+ return "\n".join(lines)
166
+
167
+
168
+ class LabeledRun(BaseModel):
169
+ """A human label for one agent run."""
170
+
171
+ agent_run_id: str
172
+ timestamp: datetime = Field(default_factory=datetime.now)
173
+
174
+ # What the user responded
175
+ label_value: dict[str, Any]
176
+ explanation: str | None = None
177
+
178
+ def to_str(
179
+ self,
180
+ labeling_request: LabelingRequest | None = None,
181
+ indent: int = 0,
182
+ ) -> str:
183
+ """Render user label in a deterministic LLM-facing format."""
184
+ body_lines = [
185
+ f"User label: {_stable_json(self.label_value)}",
186
+ f"User explanation: {_text_or_na(self.explanation)}",
187
+ ]
188
+ if labeling_request is None:
189
+ return "\n".join(_tag_block("Label", body_lines, indent))
190
+
191
+ body_lines.extend(
192
+ _render_user_distribution_block(labeling_request.user_distribution, indent)
193
+ )
194
+ body_lines.extend(
195
+ _render_user_distribution_reasoning_block(
196
+ labeling_request.user_distribution_reasoning,
197
+ reasoning_citations=None,
198
+ indent=indent,
199
+ )
200
+ )
201
+ return "\n".join(_tag_block("Label", body_lines, indent))
202
+
203
+
204
+ class AgentRunFeedbackContext(BaseModel):
205
+ """All feedback collected for a single agent run."""
206
+
207
+ agent_run_id: str
208
+ round: int
209
+ created_at: datetime = Field(default_factory=datetime.now)
210
+ last_updated: datetime = Field(default_factory=datetime.now)
211
+
212
+ # What the user was shown
213
+ labeling_request: LabelingRequest
214
+
215
+ # What the user responded
216
+ qa_pairs: list[QAPair] = Field(default_factory=list[QAPair])
217
+ label: LabeledRun | None = None
218
+
219
+ @model_validator(mode="after")
220
+ def validate_nested_agent_run_ids(self) -> "AgentRunFeedbackContext":
221
+ """Ensure nested run IDs are consistent with the top-level run ID."""
222
+ if self.label is not None and self.label.agent_run_id != self.agent_run_id:
223
+ raise ValueError("label.agent_run_id must match agent_run_id")
224
+ return self
225
+
226
+ def to_str(self, indent: int = 0) -> str:
227
+ """Render full feedback entry in a deterministic LLM-facing format."""
228
+ lines = self.labeling_request.to_str(indent=indent).splitlines()
229
+
230
+ qa_lines: list[str] = []
231
+ if not self.qa_pairs:
232
+ qa_lines.append("N/A")
233
+ else:
234
+ for qa_idx, qa_pair in enumerate(self.qa_pairs, start=1):
235
+ qa_entry_lines = qa_pair.to_str(indent=indent).splitlines()
236
+ qa_lines.extend(_tag_block(f"QA {qa_idx}", qa_entry_lines, indent))
237
+ lines.extend(_tag_block("Question Answer Pairs", qa_lines, indent))
238
+
239
+ if self.label is None:
240
+ label_body_lines = [
241
+ "User label: N/A",
242
+ "User explanation: N/A",
243
+ ]
244
+ label_body_lines.extend(
245
+ _render_user_distribution_block(self.labeling_request.user_distribution, indent)
246
+ )
247
+ label_body_lines.extend(
248
+ _render_user_distribution_reasoning_block(
249
+ self.labeling_request.user_distribution_reasoning,
250
+ reasoning_citations=None,
251
+ indent=indent,
252
+ )
253
+ )
254
+ lines.extend(_tag_block("Label", label_body_lines, indent))
255
+ else:
256
+ lines.extend(
257
+ self.label.to_str(
258
+ labeling_request=self.labeling_request,
259
+ indent=indent,
260
+ ).splitlines()
261
+ )
262
+ return "\n".join(lines)
263
+
264
+
265
+ class FeedbackContext(BaseModel):
266
+ """Feedback context returned by the feedback REST API."""
267
+
268
+ feedback_context_id: str
269
+ feedback_session_id: str
270
+ agent_run_id: str
271
+ labeling_request: LabelingRequest
272
+ created_at: datetime
273
+ updated_at: datetime
274
+
275
+
276
+ class FeedbackContextsResponse(BaseModel):
277
+ """Round-scoped feedback contexts returned by the feedback REST API."""
278
+
279
+ current_round: int
280
+ contexts: list[FeedbackContext] = Field(default_factory=list[FeedbackContext])
281
+
282
+
283
+ class UserData(BaseModel):
284
+ """User Data (U) for user-context inference and downstream evaluation."""
285
+
286
+ initial_rubric: str
287
+ agent_run_feedbacks: list[AgentRunFeedbackContext] = Field(
288
+ default_factory=lambda: list[AgentRunFeedbackContext]()
289
+ )
290
+ created_at: datetime = Field(default_factory=datetime.now)
291
+ last_updated: datetime = Field(default_factory=datetime.now)
292
+
293
+ def upsert_run_feedback(self, agent_run_feedback: AgentRunFeedbackContext) -> None:
294
+ """Insert or replace feedback for an agent run ID, updating timestamps."""
295
+ now = datetime.now()
296
+ upserted_feedback = agent_run_feedback.model_copy(deep=True)
297
+ upserted_feedback.last_updated = now
298
+
299
+ for idx, existing in enumerate(self.agent_run_feedbacks):
300
+ if existing.agent_run_id != upserted_feedback.agent_run_id:
301
+ continue
302
+ upserted_feedback.created_at = existing.created_at
303
+ self.agent_run_feedbacks[idx] = upserted_feedback
304
+ self.last_updated = now
305
+ return
306
+
307
+ self.agent_run_feedbacks.append(upserted_feedback)
308
+ self.last_updated = now
309
+
310
+ def validate_against_agreement_keys(self, agreement_keys: set[str]) -> None:
311
+ """Validate stored labels and p_u outcomes against rubric agreement keys."""
312
+ for feedback in self.agent_run_feedbacks:
313
+ run_id = feedback.agent_run_id
314
+
315
+ label = feedback.label
316
+ if label is not None:
317
+ invalid_label_keys = sorted(set(label.label_value.keys()) - agreement_keys)
318
+ if invalid_label_keys:
319
+ raise ValueError(
320
+ "Run "
321
+ f"{run_id} has label_value keys outside rubric agreement keys: "
322
+ + ", ".join(invalid_label_keys)
323
+ )
324
+
325
+ user_distribution = feedback.labeling_request.user_distribution
326
+ if user_distribution is None:
327
+ continue
328
+
329
+ for outcome_idx, outcome in enumerate(user_distribution.outcomes, start=1):
330
+ invalid_output_keys = sorted(set(outcome.output.keys()) - agreement_keys)
331
+ if invalid_output_keys:
332
+ raise ValueError(
333
+ "Run "
334
+ f"{run_id} has user_distribution outcome #{outcome_idx} keys outside "
335
+ "rubric agreement keys: " + ", ".join(invalid_output_keys)
336
+ )
337
+ for key, value in outcome.output.items():
338
+ if isinstance(value, (str, bool, int, float)):
339
+ continue
340
+ raise ValueError(
341
+ "Run "
342
+ f"{run_id} has user_distribution outcome #{outcome_idx} non-scalar "
343
+ f"value for key '{key}': {type(value).__name__}"
344
+ )
345
+
346
+ def iter_answered_qa_entries(self) -> Iterator[tuple[AgentRunFeedbackContext, QAPair]]:
347
+ """Iterate answered QA pairs with their parent run feedback."""
348
+ for feedback in self.agent_run_feedbacks:
349
+ for qa_pair in feedback.qa_pairs:
350
+ if qa_pair.status == "answered":
351
+ yield feedback, qa_pair
352
+
353
+ def iter_skipped_qa_entries(self) -> Iterator[tuple[AgentRunFeedbackContext, QAPair]]:
354
+ """Iterate skipped QA pairs with their parent run feedback."""
355
+ for feedback in self.agent_run_feedbacks:
356
+ for qa_pair in feedback.qa_pairs:
357
+ if qa_pair.status == "skipped":
358
+ yield feedback, qa_pair
359
+
360
+ def iter_labeled_entries(self) -> Iterator[tuple[AgentRunFeedbackContext, LabeledRun]]:
361
+ """Iterate labeled run entries with their parent run feedback."""
362
+ for feedback in self.agent_run_feedbacks:
363
+ if feedback.label is None:
364
+ continue
365
+ yield feedback, feedback.label
366
+
367
+
368
+ # Backward-compatible alias used by older callers/scripts.
369
+ AgentRunFeedback = AgentRunFeedbackContext
@@ -179,7 +179,7 @@ def format_chat_message(
179
179
  if isinstance(message, AssistantMessage) and message.content:
180
180
  for content in message.content:
181
181
  if isinstance(content, ContentReasoning):
182
- cur_content = f"<reasoning>\n{content.reasoning}\n</reasoning>\n"
182
+ cur_content = f"<reasoning>\n{content.display_reasoning}\n</reasoning>\n"
183
183
 
184
184
  # Main content text
185
185
  cur_content += message.text