prompture 0.0.47.dev1__py3-none-any.whl → 0.0.47.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prompture/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.0.47.dev1'
32
- __version_tuple__ = version_tuple = (0, 0, 47, 'dev1')
31
+ __version__ = version = '0.0.47.dev3'
32
+ __version_tuple__ = version_tuple = (0, 0, 47, 'dev3')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -55,6 +55,7 @@ class AsyncConversation:
55
55
  callbacks: DriverCallbacks | None = None,
56
56
  tools: ToolRegistry | None = None,
57
57
  max_tool_rounds: int = 10,
58
+ simulated_tools: bool | Literal["auto"] = "auto",
58
59
  conversation_id: str | None = None,
59
60
  auto_save: str | Path | None = None,
60
61
  tags: list[str] | None = None,
@@ -106,6 +107,10 @@ class AsyncConversation:
106
107
  }
107
108
  self._tools = tools or ToolRegistry()
108
109
  self._max_tool_rounds = max_tool_rounds
110
+ self._simulated_tools = simulated_tools
111
+
112
+ # Reasoning content from last response
113
+ self._last_reasoning: str | None = None
109
114
 
110
115
  # Persistence
111
116
  self._conversation_id = conversation_id or str(uuid.uuid4())
@@ -119,6 +124,11 @@ class AsyncConversation:
119
124
  # Public helpers
120
125
  # ------------------------------------------------------------------
121
126
 
127
+ @property
128
+ def last_reasoning(self) -> str | None:
129
+ """The reasoning/thinking content from the last LLM response, if any."""
130
+ return self._last_reasoning
131
+
122
132
  @property
123
133
  def messages(self) -> list[dict[str, Any]]:
124
134
  """Read-only view of the conversation history."""
@@ -324,8 +334,15 @@ class AsyncConversation:
324
334
  If tools are registered and the driver supports tool use,
325
335
  dispatches to the async tool execution loop.
326
336
  """
327
- if self._tools and getattr(self._driver, "supports_tool_use", False):
328
- return await self._ask_with_tools(content, options, images=images)
337
+ self._last_reasoning = None
338
+
339
+ # Route to appropriate tool handling
340
+ if self._tools:
341
+ use_native = getattr(self._driver, "supports_tool_use", False)
342
+ if self._simulated_tools is True or (self._simulated_tools == "auto" and not use_native):
343
+ return await self._ask_with_simulated_tools(content, options, images=images)
344
+ elif use_native and self._simulated_tools is not True:
345
+ return await self._ask_with_tools(content, options, images=images)
329
346
 
330
347
  merged = {**self._options, **(options or {})}
331
348
  messages = self._build_messages(content, images=images)
@@ -333,6 +350,7 @@ class AsyncConversation:
333
350
 
334
351
  text = resp.get("text", "")
335
352
  meta = resp.get("meta", {})
353
+ self._last_reasoning = resp.get("reasoning_content")
336
354
 
337
355
  user_content = self._build_content_with_images(content, images)
338
356
  self._messages.append({"role": "user", "content": user_content})
@@ -365,6 +383,7 @@ class AsyncConversation:
365
383
  text = resp.get("text", "")
366
384
 
367
385
  if not tool_calls:
386
+ self._last_reasoning = resp.get("reasoning_content")
368
387
  self._messages.append({"role": "assistant", "content": text})
369
388
  return text
370
389
 
@@ -377,6 +396,11 @@ class AsyncConversation:
377
396
  }
378
397
  for tc in tool_calls
379
398
  ]
399
+ # Preserve reasoning_content for providers that require it
400
+ # on subsequent requests (e.g. Moonshot reasoning models).
401
+ if resp.get("reasoning_content") is not None:
402
+ assistant_msg["reasoning_content"] = resp["reasoning_content"]
403
+
380
404
  self._messages.append(assistant_msg)
381
405
  msgs.append(assistant_msg)
382
406
 
@@ -397,6 +421,63 @@ class AsyncConversation:
397
421
 
398
422
  raise RuntimeError(f"Tool execution loop exceeded {self._max_tool_rounds} rounds")
399
423
 
424
+ async def _ask_with_simulated_tools(
425
+ self,
426
+ content: str,
427
+ options: dict[str, Any] | None = None,
428
+ images: list[ImageInput] | None = None,
429
+ ) -> str:
430
+ """Async prompt-based tool calling for drivers without native tool use."""
431
+ from .simulated_tools import build_tool_prompt, format_tool_result, parse_simulated_response
432
+
433
+ merged = {**self._options, **(options or {})}
434
+ tool_prompt = build_tool_prompt(self._tools)
435
+
436
+ # Augment system prompt with tool descriptions
437
+ augmented_system = tool_prompt
438
+ if self._system_prompt:
439
+ augmented_system = f"{self._system_prompt}\n\n{tool_prompt}"
440
+
441
+ # Record user message in history
442
+ user_content = self._build_content_with_images(content, images)
443
+ self._messages.append({"role": "user", "content": user_content})
444
+
445
+ for _round in range(self._max_tool_rounds):
446
+ # Build messages with the augmented system prompt
447
+ msgs: list[dict[str, Any]] = []
448
+ msgs.append({"role": "system", "content": augmented_system})
449
+ msgs.extend(self._messages)
450
+
451
+ resp = await self._driver.generate_messages_with_hooks(msgs, merged)
452
+ text = resp.get("text", "")
453
+ meta = resp.get("meta", {})
454
+ self._accumulate_usage(meta)
455
+
456
+ parsed = parse_simulated_response(text, self._tools)
457
+
458
+ if parsed["type"] == "final_answer":
459
+ answer = parsed["content"]
460
+ self._messages.append({"role": "assistant", "content": answer})
461
+ return answer
462
+
463
+ # Tool call
464
+ tool_name = parsed["name"]
465
+ tool_args = parsed["arguments"]
466
+
467
+ # Record assistant's tool call as an assistant message
468
+ self._messages.append({"role": "assistant", "content": text})
469
+
470
+ try:
471
+ result = self._tools.execute(tool_name, tool_args)
472
+ result_msg = format_tool_result(tool_name, result)
473
+ except Exception as exc:
474
+ result_msg = format_tool_result(tool_name, f"Error: {exc}")
475
+
476
+ # Record tool result as a user message
477
+ self._messages.append({"role": "user", "content": result_msg})
478
+
479
+ raise RuntimeError(f"Simulated tool execution loop exceeded {self._max_tool_rounds} rounds")
480
+
400
481
  def _build_messages_raw(self) -> list[dict[str, Any]]:
401
482
  """Build messages array from system prompt + full history (including tool messages)."""
402
483
  msgs: list[dict[str, Any]] = []
@@ -457,6 +538,8 @@ class AsyncConversation:
457
538
  images: list[ImageInput] | None = None,
458
539
  ) -> dict[str, Any]:
459
540
  """Send a message with schema enforcement and get structured JSON back (async)."""
541
+ self._last_reasoning = None
542
+
460
543
  merged = {**self._options, **(options or {})}
461
544
 
462
545
  schema_string = json.dumps(json_schema, indent=2)
@@ -494,6 +577,7 @@ class AsyncConversation:
494
577
 
495
578
  text = resp.get("text", "")
496
579
  meta = resp.get("meta", {})
580
+ self._last_reasoning = resp.get("reasoning_content")
497
581
 
498
582
  user_content = self._build_content_with_images(content, images)
499
583
  self._messages.append({"role": "user", "content": user_content})
@@ -528,6 +612,7 @@ class AsyncConversation:
528
612
  "json_object": json_obj,
529
613
  "usage": usage,
530
614
  "output_format": output_format,
615
+ "reasoning": self._last_reasoning,
531
616
  }
532
617
 
533
618
  if output_format == "toon":
prompture/conversation.py CHANGED
@@ -56,6 +56,7 @@ class Conversation:
56
56
  callbacks: DriverCallbacks | None = None,
57
57
  tools: ToolRegistry | None = None,
58
58
  max_tool_rounds: int = 10,
59
+ simulated_tools: bool | Literal["auto"] = "auto",
59
60
  conversation_id: str | None = None,
60
61
  auto_save: str | Path | None = None,
61
62
  tags: list[str] | None = None,
@@ -109,6 +110,10 @@ class Conversation:
109
110
  }
110
111
  self._tools = tools or ToolRegistry()
111
112
  self._max_tool_rounds = max_tool_rounds
113
+ self._simulated_tools = simulated_tools
114
+
115
+ # Reasoning content from last response
116
+ self._last_reasoning: str | None = None
112
117
 
113
118
  # Persistence
114
119
  self._conversation_id = conversation_id or str(uuid.uuid4())
@@ -122,6 +127,11 @@ class Conversation:
122
127
  # Public helpers
123
128
  # ------------------------------------------------------------------
124
129
 
130
+ @property
131
+ def last_reasoning(self) -> str | None:
132
+ """The reasoning/thinking content from the last LLM response, if any."""
133
+ return self._last_reasoning
134
+
125
135
  @property
126
136
  def messages(self) -> list[dict[str, Any]]:
127
137
  """Read-only view of the conversation history."""
@@ -338,8 +348,15 @@ class Conversation:
338
348
  images: Optional list of images to include (bytes, path, URL,
339
349
  base64 string, or :class:`ImageContent`).
340
350
  """
341
- if self._tools and getattr(self._driver, "supports_tool_use", False):
342
- return self._ask_with_tools(content, options, images=images)
351
+ self._last_reasoning = None
352
+
353
+ # Route to appropriate tool handling
354
+ if self._tools:
355
+ use_native = getattr(self._driver, "supports_tool_use", False)
356
+ if self._simulated_tools is True or (self._simulated_tools == "auto" and not use_native):
357
+ return self._ask_with_simulated_tools(content, options, images=images)
358
+ elif use_native and self._simulated_tools is not True:
359
+ return self._ask_with_tools(content, options, images=images)
343
360
 
344
361
  merged = {**self._options, **(options or {})}
345
362
  messages = self._build_messages(content, images=images)
@@ -347,6 +364,7 @@ class Conversation:
347
364
 
348
365
  text = resp.get("text", "")
349
366
  meta = resp.get("meta", {})
367
+ self._last_reasoning = resp.get("reasoning_content")
350
368
 
351
369
  # Record in history — store content with images for context
352
370
  user_content = self._build_content_with_images(content, images)
@@ -382,6 +400,7 @@ class Conversation:
382
400
 
383
401
  if not tool_calls:
384
402
  # No tool calls -> final response
403
+ self._last_reasoning = resp.get("reasoning_content")
385
404
  self._messages.append({"role": "assistant", "content": text})
386
405
  return text
387
406
 
@@ -395,6 +414,11 @@ class Conversation:
395
414
  }
396
415
  for tc in tool_calls
397
416
  ]
417
+ # Preserve reasoning_content for providers that require it
418
+ # on subsequent requests (e.g. Moonshot reasoning models).
419
+ if resp.get("reasoning_content") is not None:
420
+ assistant_msg["reasoning_content"] = resp["reasoning_content"]
421
+
398
422
  self._messages.append(assistant_msg)
399
423
  msgs.append(assistant_msg)
400
424
 
@@ -416,6 +440,63 @@ class Conversation:
416
440
 
417
441
  raise RuntimeError(f"Tool execution loop exceeded {self._max_tool_rounds} rounds")
418
442
 
443
+ def _ask_with_simulated_tools(
444
+ self,
445
+ content: str,
446
+ options: dict[str, Any] | None = None,
447
+ images: list[ImageInput] | None = None,
448
+ ) -> str:
449
+ """Prompt-based tool calling for drivers without native tool use."""
450
+ from .simulated_tools import build_tool_prompt, format_tool_result, parse_simulated_response
451
+
452
+ merged = {**self._options, **(options or {})}
453
+ tool_prompt = build_tool_prompt(self._tools)
454
+
455
+ # Augment system prompt with tool descriptions
456
+ augmented_system = tool_prompt
457
+ if self._system_prompt:
458
+ augmented_system = f"{self._system_prompt}\n\n{tool_prompt}"
459
+
460
+ # Record user message in history
461
+ user_content = self._build_content_with_images(content, images)
462
+ self._messages.append({"role": "user", "content": user_content})
463
+
464
+ for _round in range(self._max_tool_rounds):
465
+ # Build messages with the augmented system prompt
466
+ msgs: list[dict[str, Any]] = []
467
+ msgs.append({"role": "system", "content": augmented_system})
468
+ msgs.extend(self._messages)
469
+
470
+ resp = self._driver.generate_messages_with_hooks(msgs, merged)
471
+ text = resp.get("text", "")
472
+ meta = resp.get("meta", {})
473
+ self._accumulate_usage(meta)
474
+
475
+ parsed = parse_simulated_response(text, self._tools)
476
+
477
+ if parsed["type"] == "final_answer":
478
+ answer = parsed["content"]
479
+ self._messages.append({"role": "assistant", "content": answer})
480
+ return answer
481
+
482
+ # Tool call
483
+ tool_name = parsed["name"]
484
+ tool_args = parsed["arguments"]
485
+
486
+ # Record assistant's tool call as an assistant message
487
+ self._messages.append({"role": "assistant", "content": text})
488
+
489
+ try:
490
+ result = self._tools.execute(tool_name, tool_args)
491
+ result_msg = format_tool_result(tool_name, result)
492
+ except Exception as exc:
493
+ result_msg = format_tool_result(tool_name, f"Error: {exc}")
494
+
495
+ # Record tool result as a user message (all drivers understand user/assistant)
496
+ self._messages.append({"role": "user", "content": result_msg})
497
+
498
+ raise RuntimeError(f"Simulated tool execution loop exceeded {self._max_tool_rounds} rounds")
499
+
419
500
  def _build_messages_raw(self) -> list[dict[str, Any]]:
420
501
  """Build messages array from system prompt + full history (including tool messages)."""
421
502
  msgs: list[dict[str, Any]] = []
@@ -484,6 +565,8 @@ class Conversation:
484
565
  context clean for subsequent turns.
485
566
  """
486
567
 
568
+ self._last_reasoning = None
569
+
487
570
  merged = {**self._options, **(options or {})}
488
571
 
489
572
  # Build the full prompt with schema instructions inline (handled by ask_for_json)
@@ -525,6 +608,7 @@ class Conversation:
525
608
 
526
609
  text = resp.get("text", "")
527
610
  meta = resp.get("meta", {})
611
+ self._last_reasoning = resp.get("reasoning_content")
528
612
 
529
613
  # Store original content (without schema boilerplate) for cleaner context
530
614
  # Include images in history so subsequent turns can reference them
@@ -563,6 +647,7 @@ class Conversation:
563
647
  "json_object": json_obj,
564
648
  "usage": usage,
565
649
  "output_format": output_format,
650
+ "reasoning": self._last_reasoning,
566
651
  }
567
652
 
568
653
  if output_format == "toon":
@@ -95,8 +95,17 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
95
95
  "model_name": model,
96
96
  }
97
97
 
98
- text = resp["choices"][0]["message"]["content"]
99
- return {"text": text, "meta": meta}
98
+ message = resp["choices"][0]["message"]
99
+ text = message.get("content") or ""
100
+ reasoning_content = message.get("reasoning_content")
101
+
102
+ if not text and reasoning_content:
103
+ text = reasoning_content
104
+
105
+ result: dict[str, Any] = {"text": text, "meta": meta}
106
+ if reasoning_content is not None:
107
+ result["reasoning_content"] = reasoning_content
108
+ return result
100
109
 
101
110
  # ------------------------------------------------------------------
102
111
  # Tool use
@@ -173,15 +182,20 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
173
182
  args = json.loads(tc["function"]["arguments"])
174
183
  except (json.JSONDecodeError, TypeError):
175
184
  args = {}
176
- tool_calls_out.append({
177
- "id": tc["id"],
178
- "name": tc["function"]["name"],
179
- "arguments": args,
180
- })
181
-
182
- return {
185
+ tool_calls_out.append(
186
+ {
187
+ "id": tc["id"],
188
+ "name": tc["function"]["name"],
189
+ "arguments": args,
190
+ }
191
+ )
192
+
193
+ result: dict[str, Any] = {
183
194
  "text": text,
184
195
  "meta": meta,
185
196
  "tool_calls": tool_calls_out,
186
197
  "stop_reason": stop_reason,
187
198
  }
199
+ if choice["message"].get("reasoning_content") is not None:
200
+ result["reasoning_content"] = choice["message"]["reasoning_content"]
201
+ return result
@@ -88,8 +88,16 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
88
88
  "model_name": model,
89
89
  }
90
90
 
91
- text = resp.choices[0].message.content
92
- return {"text": text, "meta": meta}
91
+ text = resp.choices[0].message.content or ""
92
+ reasoning_content = getattr(resp.choices[0].message, "reasoning_content", None)
93
+
94
+ if not text and reasoning_content:
95
+ text = reasoning_content
96
+
97
+ result: dict[str, Any] = {"text": text, "meta": meta}
98
+ if reasoning_content is not None:
99
+ result["reasoning_content"] = reasoning_content
100
+ return result
93
101
 
94
102
  # ------------------------------------------------------------------
95
103
  # Tool use
@@ -152,15 +160,21 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
152
160
  args = json.loads(tc.function.arguments)
153
161
  except (json.JSONDecodeError, TypeError):
154
162
  args = {}
155
- tool_calls_out.append({
156
- "id": tc.id,
157
- "name": tc.function.name,
158
- "arguments": args,
159
- })
160
-
161
- return {
163
+ tool_calls_out.append(
164
+ {
165
+ "id": tc.id,
166
+ "name": tc.function.name,
167
+ "arguments": args,
168
+ }
169
+ )
170
+
171
+ result: dict[str, Any] = {
162
172
  "text": text,
163
173
  "meta": meta,
164
174
  "tool_calls": tool_calls_out,
165
175
  "stop_reason": stop_reason,
166
176
  }
177
+ reasoning_content = getattr(choice.message, "reasoning_content", None)
178
+ if reasoning_content is not None:
179
+ result["reasoning_content"] = reasoning_content
180
+ return result
@@ -98,7 +98,12 @@ class AsyncLMStudioDriver(AsyncDriver):
98
98
  if "choices" not in response_data or not response_data["choices"]:
99
99
  raise ValueError(f"Unexpected response format: {response_data}")
100
100
 
101
- text = response_data["choices"][0]["message"]["content"]
101
+ message = response_data["choices"][0]["message"]
102
+ text = message.get("content") or ""
103
+ reasoning_content = message.get("reasoning_content")
104
+
105
+ if not text and reasoning_content:
106
+ text = reasoning_content
102
107
 
103
108
  usage = response_data.get("usage", {})
104
109
  prompt_tokens = usage.get("prompt_tokens", 0)
@@ -114,7 +119,10 @@ class AsyncLMStudioDriver(AsyncDriver):
114
119
  "model_name": merged_options.get("model", self.model),
115
120
  }
116
121
 
117
- return {"text": text, "meta": meta}
122
+ result: dict[str, Any] = {"text": text, "meta": meta}
123
+ if reasoning_content is not None:
124
+ result["reasoning_content"] = reasoning_content
125
+ return result
118
126
 
119
127
  # -- Model management (LM Studio 0.4.0+) ----------------------------------
120
128
 
@@ -138,10 +138,11 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
138
138
 
139
139
  message = resp["choices"][0]["message"]
140
140
  text = message.get("content") or ""
141
+ reasoning_content = message.get("reasoning_content")
141
142
 
142
143
  # Reasoning models may return content in reasoning_content when content is empty
143
- if not text and message.get("reasoning_content"):
144
- text = message["reasoning_content"]
144
+ if not text and reasoning_content:
145
+ text = reasoning_content
145
146
 
146
147
  # Structured output fallback: if we used json_schema mode and got an
147
148
  # empty response, retry with json_object mode and schema in the prompt.
@@ -184,8 +185,9 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
184
185
  resp = fb_resp
185
186
  fb_message = fb_resp["choices"][0]["message"]
186
187
  text = fb_message.get("content") or ""
187
- if not text and fb_message.get("reasoning_content"):
188
- text = fb_message["reasoning_content"]
188
+ reasoning_content = fb_message.get("reasoning_content")
189
+ if not text and reasoning_content:
190
+ text = reasoning_content
189
191
 
190
192
  total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
191
193
 
@@ -198,7 +200,10 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
198
200
  "model_name": model,
199
201
  }
200
202
 
201
- return {"text": text, "meta": meta}
203
+ result: dict[str, Any] = {"text": text, "meta": meta}
204
+ if reasoning_content is not None:
205
+ result["reasoning_content"] = reasoning_content
206
+ return result
202
207
 
203
208
  # ------------------------------------------------------------------
204
209
  # Tool use
@@ -271,11 +276,12 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
271
276
  }
272
277
 
273
278
  choice = resp["choices"][0]
274
- text = choice["message"].get("content") or ""
279
+ message = choice["message"]
280
+ text = message.get("content") or ""
275
281
  stop_reason = choice.get("finish_reason")
276
282
 
277
283
  tool_calls_out: list[dict[str, Any]] = []
278
- for tc in choice["message"].get("tool_calls", []):
284
+ for tc in message.get("tool_calls", []):
279
285
  try:
280
286
  args = json.loads(tc["function"]["arguments"])
281
287
  except (json.JSONDecodeError, TypeError):
@@ -288,13 +294,21 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
288
294
  }
289
295
  )
290
296
 
291
- return {
297
+ result: dict[str, Any] = {
292
298
  "text": text,
293
299
  "meta": meta,
294
300
  "tool_calls": tool_calls_out,
295
301
  "stop_reason": stop_reason,
296
302
  }
297
303
 
304
+ # Preserve reasoning_content for reasoning models so the
305
+ # conversation loop can include it when sending the assistant
306
+ # message back (Moonshot requires it on subsequent requests).
307
+ if message.get("reasoning_content") is not None:
308
+ result["reasoning_content"] = message["reasoning_content"]
309
+
310
+ return result
311
+
298
312
  # ------------------------------------------------------------------
299
313
  # Streaming
300
314
  # ------------------------------------------------------------------
@@ -325,6 +339,7 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
325
339
  data["temperature"] = opts["temperature"]
326
340
 
327
341
  full_text = ""
342
+ full_reasoning = ""
328
343
  prompt_tokens = 0
329
344
  completion_tokens = 0
330
345
 
@@ -359,9 +374,11 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
359
374
  if choices:
360
375
  delta = choices[0].get("delta", {})
361
376
  content = delta.get("content") or ""
362
- # Reasoning models stream thinking via reasoning_content
363
- if not content:
364
- content = delta.get("reasoning_content") or ""
377
+ reasoning_chunk = delta.get("reasoning_content") or ""
378
+ if reasoning_chunk:
379
+ full_reasoning += reasoning_chunk
380
+ if not content and reasoning_chunk:
381
+ content = reasoning_chunk
365
382
  if content:
366
383
  full_text += content
367
384
  yield {"type": "delta", "text": content}
@@ -369,7 +386,7 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
369
386
  total_tokens = prompt_tokens + completion_tokens
370
387
  total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
371
388
 
372
- yield {
389
+ done_chunk: dict[str, Any] = {
373
390
  "type": "done",
374
391
  "text": full_text,
375
392
  "meta": {
@@ -381,3 +398,6 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
381
398
  "model_name": model,
382
399
  },
383
400
  }
401
+ if full_reasoning:
402
+ done_chunk["reasoning_content"] = full_reasoning
403
+ yield done_chunk