prompture 0.0.47.dev1__py3-none-any.whl → 0.0.47.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/_version.py +2 -2
- prompture/async_conversation.py +87 -2
- prompture/conversation.py +87 -2
- prompture/drivers/async_grok_driver.py +23 -9
- prompture/drivers/async_groq_driver.py +23 -9
- prompture/drivers/async_lmstudio_driver.py +10 -2
- prompture/drivers/async_moonshot_driver.py +32 -12
- prompture/drivers/async_openrouter_driver.py +43 -17
- prompture/drivers/grok_driver.py +23 -9
- prompture/drivers/groq_driver.py +23 -9
- prompture/drivers/lmstudio_driver.py +11 -2
- prompture/drivers/moonshot_driver.py +32 -12
- prompture/drivers/openrouter_driver.py +34 -10
- prompture/simulated_tools.py +115 -0
- prompture/tools_schema.py +22 -0
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/METADATA +35 -2
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/RECORD +21 -20
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/WHEEL +0 -0
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.47.dev1.dist-info → prompture-0.0.47.dev3.dist-info}/top_level.txt +0 -0
prompture/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.0.47.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 0, 47, '
|
|
31
|
+
__version__ = version = '0.0.47.dev3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 0, 47, 'dev3')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
prompture/async_conversation.py
CHANGED
|
@@ -55,6 +55,7 @@ class AsyncConversation:
|
|
|
55
55
|
callbacks: DriverCallbacks | None = None,
|
|
56
56
|
tools: ToolRegistry | None = None,
|
|
57
57
|
max_tool_rounds: int = 10,
|
|
58
|
+
simulated_tools: bool | Literal["auto"] = "auto",
|
|
58
59
|
conversation_id: str | None = None,
|
|
59
60
|
auto_save: str | Path | None = None,
|
|
60
61
|
tags: list[str] | None = None,
|
|
@@ -106,6 +107,10 @@ class AsyncConversation:
|
|
|
106
107
|
}
|
|
107
108
|
self._tools = tools or ToolRegistry()
|
|
108
109
|
self._max_tool_rounds = max_tool_rounds
|
|
110
|
+
self._simulated_tools = simulated_tools
|
|
111
|
+
|
|
112
|
+
# Reasoning content from last response
|
|
113
|
+
self._last_reasoning: str | None = None
|
|
109
114
|
|
|
110
115
|
# Persistence
|
|
111
116
|
self._conversation_id = conversation_id or str(uuid.uuid4())
|
|
@@ -119,6 +124,11 @@ class AsyncConversation:
|
|
|
119
124
|
# Public helpers
|
|
120
125
|
# ------------------------------------------------------------------
|
|
121
126
|
|
|
127
|
+
@property
|
|
128
|
+
def last_reasoning(self) -> str | None:
|
|
129
|
+
"""The reasoning/thinking content from the last LLM response, if any."""
|
|
130
|
+
return self._last_reasoning
|
|
131
|
+
|
|
122
132
|
@property
|
|
123
133
|
def messages(self) -> list[dict[str, Any]]:
|
|
124
134
|
"""Read-only view of the conversation history."""
|
|
@@ -324,8 +334,15 @@ class AsyncConversation:
|
|
|
324
334
|
If tools are registered and the driver supports tool use,
|
|
325
335
|
dispatches to the async tool execution loop.
|
|
326
336
|
"""
|
|
327
|
-
|
|
328
|
-
|
|
337
|
+
self._last_reasoning = None
|
|
338
|
+
|
|
339
|
+
# Route to appropriate tool handling
|
|
340
|
+
if self._tools:
|
|
341
|
+
use_native = getattr(self._driver, "supports_tool_use", False)
|
|
342
|
+
if self._simulated_tools is True or (self._simulated_tools == "auto" and not use_native):
|
|
343
|
+
return await self._ask_with_simulated_tools(content, options, images=images)
|
|
344
|
+
elif use_native and self._simulated_tools is not True:
|
|
345
|
+
return await self._ask_with_tools(content, options, images=images)
|
|
329
346
|
|
|
330
347
|
merged = {**self._options, **(options or {})}
|
|
331
348
|
messages = self._build_messages(content, images=images)
|
|
@@ -333,6 +350,7 @@ class AsyncConversation:
|
|
|
333
350
|
|
|
334
351
|
text = resp.get("text", "")
|
|
335
352
|
meta = resp.get("meta", {})
|
|
353
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
336
354
|
|
|
337
355
|
user_content = self._build_content_with_images(content, images)
|
|
338
356
|
self._messages.append({"role": "user", "content": user_content})
|
|
@@ -365,6 +383,7 @@ class AsyncConversation:
|
|
|
365
383
|
text = resp.get("text", "")
|
|
366
384
|
|
|
367
385
|
if not tool_calls:
|
|
386
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
368
387
|
self._messages.append({"role": "assistant", "content": text})
|
|
369
388
|
return text
|
|
370
389
|
|
|
@@ -377,6 +396,11 @@ class AsyncConversation:
|
|
|
377
396
|
}
|
|
378
397
|
for tc in tool_calls
|
|
379
398
|
]
|
|
399
|
+
# Preserve reasoning_content for providers that require it
|
|
400
|
+
# on subsequent requests (e.g. Moonshot reasoning models).
|
|
401
|
+
if resp.get("reasoning_content") is not None:
|
|
402
|
+
assistant_msg["reasoning_content"] = resp["reasoning_content"]
|
|
403
|
+
|
|
380
404
|
self._messages.append(assistant_msg)
|
|
381
405
|
msgs.append(assistant_msg)
|
|
382
406
|
|
|
@@ -397,6 +421,63 @@ class AsyncConversation:
|
|
|
397
421
|
|
|
398
422
|
raise RuntimeError(f"Tool execution loop exceeded {self._max_tool_rounds} rounds")
|
|
399
423
|
|
|
424
|
+
async def _ask_with_simulated_tools(
|
|
425
|
+
self,
|
|
426
|
+
content: str,
|
|
427
|
+
options: dict[str, Any] | None = None,
|
|
428
|
+
images: list[ImageInput] | None = None,
|
|
429
|
+
) -> str:
|
|
430
|
+
"""Async prompt-based tool calling for drivers without native tool use."""
|
|
431
|
+
from .simulated_tools import build_tool_prompt, format_tool_result, parse_simulated_response
|
|
432
|
+
|
|
433
|
+
merged = {**self._options, **(options or {})}
|
|
434
|
+
tool_prompt = build_tool_prompt(self._tools)
|
|
435
|
+
|
|
436
|
+
# Augment system prompt with tool descriptions
|
|
437
|
+
augmented_system = tool_prompt
|
|
438
|
+
if self._system_prompt:
|
|
439
|
+
augmented_system = f"{self._system_prompt}\n\n{tool_prompt}"
|
|
440
|
+
|
|
441
|
+
# Record user message in history
|
|
442
|
+
user_content = self._build_content_with_images(content, images)
|
|
443
|
+
self._messages.append({"role": "user", "content": user_content})
|
|
444
|
+
|
|
445
|
+
for _round in range(self._max_tool_rounds):
|
|
446
|
+
# Build messages with the augmented system prompt
|
|
447
|
+
msgs: list[dict[str, Any]] = []
|
|
448
|
+
msgs.append({"role": "system", "content": augmented_system})
|
|
449
|
+
msgs.extend(self._messages)
|
|
450
|
+
|
|
451
|
+
resp = await self._driver.generate_messages_with_hooks(msgs, merged)
|
|
452
|
+
text = resp.get("text", "")
|
|
453
|
+
meta = resp.get("meta", {})
|
|
454
|
+
self._accumulate_usage(meta)
|
|
455
|
+
|
|
456
|
+
parsed = parse_simulated_response(text, self._tools)
|
|
457
|
+
|
|
458
|
+
if parsed["type"] == "final_answer":
|
|
459
|
+
answer = parsed["content"]
|
|
460
|
+
self._messages.append({"role": "assistant", "content": answer})
|
|
461
|
+
return answer
|
|
462
|
+
|
|
463
|
+
# Tool call
|
|
464
|
+
tool_name = parsed["name"]
|
|
465
|
+
tool_args = parsed["arguments"]
|
|
466
|
+
|
|
467
|
+
# Record assistant's tool call as an assistant message
|
|
468
|
+
self._messages.append({"role": "assistant", "content": text})
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
result = self._tools.execute(tool_name, tool_args)
|
|
472
|
+
result_msg = format_tool_result(tool_name, result)
|
|
473
|
+
except Exception as exc:
|
|
474
|
+
result_msg = format_tool_result(tool_name, f"Error: {exc}")
|
|
475
|
+
|
|
476
|
+
# Record tool result as a user message
|
|
477
|
+
self._messages.append({"role": "user", "content": result_msg})
|
|
478
|
+
|
|
479
|
+
raise RuntimeError(f"Simulated tool execution loop exceeded {self._max_tool_rounds} rounds")
|
|
480
|
+
|
|
400
481
|
def _build_messages_raw(self) -> list[dict[str, Any]]:
|
|
401
482
|
"""Build messages array from system prompt + full history (including tool messages)."""
|
|
402
483
|
msgs: list[dict[str, Any]] = []
|
|
@@ -457,6 +538,8 @@ class AsyncConversation:
|
|
|
457
538
|
images: list[ImageInput] | None = None,
|
|
458
539
|
) -> dict[str, Any]:
|
|
459
540
|
"""Send a message with schema enforcement and get structured JSON back (async)."""
|
|
541
|
+
self._last_reasoning = None
|
|
542
|
+
|
|
460
543
|
merged = {**self._options, **(options or {})}
|
|
461
544
|
|
|
462
545
|
schema_string = json.dumps(json_schema, indent=2)
|
|
@@ -494,6 +577,7 @@ class AsyncConversation:
|
|
|
494
577
|
|
|
495
578
|
text = resp.get("text", "")
|
|
496
579
|
meta = resp.get("meta", {})
|
|
580
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
497
581
|
|
|
498
582
|
user_content = self._build_content_with_images(content, images)
|
|
499
583
|
self._messages.append({"role": "user", "content": user_content})
|
|
@@ -528,6 +612,7 @@ class AsyncConversation:
|
|
|
528
612
|
"json_object": json_obj,
|
|
529
613
|
"usage": usage,
|
|
530
614
|
"output_format": output_format,
|
|
615
|
+
"reasoning": self._last_reasoning,
|
|
531
616
|
}
|
|
532
617
|
|
|
533
618
|
if output_format == "toon":
|
prompture/conversation.py
CHANGED
|
@@ -56,6 +56,7 @@ class Conversation:
|
|
|
56
56
|
callbacks: DriverCallbacks | None = None,
|
|
57
57
|
tools: ToolRegistry | None = None,
|
|
58
58
|
max_tool_rounds: int = 10,
|
|
59
|
+
simulated_tools: bool | Literal["auto"] = "auto",
|
|
59
60
|
conversation_id: str | None = None,
|
|
60
61
|
auto_save: str | Path | None = None,
|
|
61
62
|
tags: list[str] | None = None,
|
|
@@ -109,6 +110,10 @@ class Conversation:
|
|
|
109
110
|
}
|
|
110
111
|
self._tools = tools or ToolRegistry()
|
|
111
112
|
self._max_tool_rounds = max_tool_rounds
|
|
113
|
+
self._simulated_tools = simulated_tools
|
|
114
|
+
|
|
115
|
+
# Reasoning content from last response
|
|
116
|
+
self._last_reasoning: str | None = None
|
|
112
117
|
|
|
113
118
|
# Persistence
|
|
114
119
|
self._conversation_id = conversation_id or str(uuid.uuid4())
|
|
@@ -122,6 +127,11 @@ class Conversation:
|
|
|
122
127
|
# Public helpers
|
|
123
128
|
# ------------------------------------------------------------------
|
|
124
129
|
|
|
130
|
+
@property
|
|
131
|
+
def last_reasoning(self) -> str | None:
|
|
132
|
+
"""The reasoning/thinking content from the last LLM response, if any."""
|
|
133
|
+
return self._last_reasoning
|
|
134
|
+
|
|
125
135
|
@property
|
|
126
136
|
def messages(self) -> list[dict[str, Any]]:
|
|
127
137
|
"""Read-only view of the conversation history."""
|
|
@@ -338,8 +348,15 @@ class Conversation:
|
|
|
338
348
|
images: Optional list of images to include (bytes, path, URL,
|
|
339
349
|
base64 string, or :class:`ImageContent`).
|
|
340
350
|
"""
|
|
341
|
-
|
|
342
|
-
|
|
351
|
+
self._last_reasoning = None
|
|
352
|
+
|
|
353
|
+
# Route to appropriate tool handling
|
|
354
|
+
if self._tools:
|
|
355
|
+
use_native = getattr(self._driver, "supports_tool_use", False)
|
|
356
|
+
if self._simulated_tools is True or (self._simulated_tools == "auto" and not use_native):
|
|
357
|
+
return self._ask_with_simulated_tools(content, options, images=images)
|
|
358
|
+
elif use_native and self._simulated_tools is not True:
|
|
359
|
+
return self._ask_with_tools(content, options, images=images)
|
|
343
360
|
|
|
344
361
|
merged = {**self._options, **(options or {})}
|
|
345
362
|
messages = self._build_messages(content, images=images)
|
|
@@ -347,6 +364,7 @@ class Conversation:
|
|
|
347
364
|
|
|
348
365
|
text = resp.get("text", "")
|
|
349
366
|
meta = resp.get("meta", {})
|
|
367
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
350
368
|
|
|
351
369
|
# Record in history — store content with images for context
|
|
352
370
|
user_content = self._build_content_with_images(content, images)
|
|
@@ -382,6 +400,7 @@ class Conversation:
|
|
|
382
400
|
|
|
383
401
|
if not tool_calls:
|
|
384
402
|
# No tool calls -> final response
|
|
403
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
385
404
|
self._messages.append({"role": "assistant", "content": text})
|
|
386
405
|
return text
|
|
387
406
|
|
|
@@ -395,6 +414,11 @@ class Conversation:
|
|
|
395
414
|
}
|
|
396
415
|
for tc in tool_calls
|
|
397
416
|
]
|
|
417
|
+
# Preserve reasoning_content for providers that require it
|
|
418
|
+
# on subsequent requests (e.g. Moonshot reasoning models).
|
|
419
|
+
if resp.get("reasoning_content") is not None:
|
|
420
|
+
assistant_msg["reasoning_content"] = resp["reasoning_content"]
|
|
421
|
+
|
|
398
422
|
self._messages.append(assistant_msg)
|
|
399
423
|
msgs.append(assistant_msg)
|
|
400
424
|
|
|
@@ -416,6 +440,63 @@ class Conversation:
|
|
|
416
440
|
|
|
417
441
|
raise RuntimeError(f"Tool execution loop exceeded {self._max_tool_rounds} rounds")
|
|
418
442
|
|
|
443
|
+
def _ask_with_simulated_tools(
|
|
444
|
+
self,
|
|
445
|
+
content: str,
|
|
446
|
+
options: dict[str, Any] | None = None,
|
|
447
|
+
images: list[ImageInput] | None = None,
|
|
448
|
+
) -> str:
|
|
449
|
+
"""Prompt-based tool calling for drivers without native tool use."""
|
|
450
|
+
from .simulated_tools import build_tool_prompt, format_tool_result, parse_simulated_response
|
|
451
|
+
|
|
452
|
+
merged = {**self._options, **(options or {})}
|
|
453
|
+
tool_prompt = build_tool_prompt(self._tools)
|
|
454
|
+
|
|
455
|
+
# Augment system prompt with tool descriptions
|
|
456
|
+
augmented_system = tool_prompt
|
|
457
|
+
if self._system_prompt:
|
|
458
|
+
augmented_system = f"{self._system_prompt}\n\n{tool_prompt}"
|
|
459
|
+
|
|
460
|
+
# Record user message in history
|
|
461
|
+
user_content = self._build_content_with_images(content, images)
|
|
462
|
+
self._messages.append({"role": "user", "content": user_content})
|
|
463
|
+
|
|
464
|
+
for _round in range(self._max_tool_rounds):
|
|
465
|
+
# Build messages with the augmented system prompt
|
|
466
|
+
msgs: list[dict[str, Any]] = []
|
|
467
|
+
msgs.append({"role": "system", "content": augmented_system})
|
|
468
|
+
msgs.extend(self._messages)
|
|
469
|
+
|
|
470
|
+
resp = self._driver.generate_messages_with_hooks(msgs, merged)
|
|
471
|
+
text = resp.get("text", "")
|
|
472
|
+
meta = resp.get("meta", {})
|
|
473
|
+
self._accumulate_usage(meta)
|
|
474
|
+
|
|
475
|
+
parsed = parse_simulated_response(text, self._tools)
|
|
476
|
+
|
|
477
|
+
if parsed["type"] == "final_answer":
|
|
478
|
+
answer = parsed["content"]
|
|
479
|
+
self._messages.append({"role": "assistant", "content": answer})
|
|
480
|
+
return answer
|
|
481
|
+
|
|
482
|
+
# Tool call
|
|
483
|
+
tool_name = parsed["name"]
|
|
484
|
+
tool_args = parsed["arguments"]
|
|
485
|
+
|
|
486
|
+
# Record assistant's tool call as an assistant message
|
|
487
|
+
self._messages.append({"role": "assistant", "content": text})
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
result = self._tools.execute(tool_name, tool_args)
|
|
491
|
+
result_msg = format_tool_result(tool_name, result)
|
|
492
|
+
except Exception as exc:
|
|
493
|
+
result_msg = format_tool_result(tool_name, f"Error: {exc}")
|
|
494
|
+
|
|
495
|
+
# Record tool result as a user message (all drivers understand user/assistant)
|
|
496
|
+
self._messages.append({"role": "user", "content": result_msg})
|
|
497
|
+
|
|
498
|
+
raise RuntimeError(f"Simulated tool execution loop exceeded {self._max_tool_rounds} rounds")
|
|
499
|
+
|
|
419
500
|
def _build_messages_raw(self) -> list[dict[str, Any]]:
|
|
420
501
|
"""Build messages array from system prompt + full history (including tool messages)."""
|
|
421
502
|
msgs: list[dict[str, Any]] = []
|
|
@@ -484,6 +565,8 @@ class Conversation:
|
|
|
484
565
|
context clean for subsequent turns.
|
|
485
566
|
"""
|
|
486
567
|
|
|
568
|
+
self._last_reasoning = None
|
|
569
|
+
|
|
487
570
|
merged = {**self._options, **(options or {})}
|
|
488
571
|
|
|
489
572
|
# Build the full prompt with schema instructions inline (handled by ask_for_json)
|
|
@@ -525,6 +608,7 @@ class Conversation:
|
|
|
525
608
|
|
|
526
609
|
text = resp.get("text", "")
|
|
527
610
|
meta = resp.get("meta", {})
|
|
611
|
+
self._last_reasoning = resp.get("reasoning_content")
|
|
528
612
|
|
|
529
613
|
# Store original content (without schema boilerplate) for cleaner context
|
|
530
614
|
# Include images in history so subsequent turns can reference them
|
|
@@ -563,6 +647,7 @@ class Conversation:
|
|
|
563
647
|
"json_object": json_obj,
|
|
564
648
|
"usage": usage,
|
|
565
649
|
"output_format": output_format,
|
|
650
|
+
"reasoning": self._last_reasoning,
|
|
566
651
|
}
|
|
567
652
|
|
|
568
653
|
if output_format == "toon":
|
|
@@ -95,8 +95,17 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
|
|
|
95
95
|
"model_name": model,
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
message = resp["choices"][0]["message"]
|
|
99
|
+
text = message.get("content") or ""
|
|
100
|
+
reasoning_content = message.get("reasoning_content")
|
|
101
|
+
|
|
102
|
+
if not text and reasoning_content:
|
|
103
|
+
text = reasoning_content
|
|
104
|
+
|
|
105
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
106
|
+
if reasoning_content is not None:
|
|
107
|
+
result["reasoning_content"] = reasoning_content
|
|
108
|
+
return result
|
|
100
109
|
|
|
101
110
|
# ------------------------------------------------------------------
|
|
102
111
|
# Tool use
|
|
@@ -173,15 +182,20 @@ class AsyncGrokDriver(CostMixin, AsyncDriver):
|
|
|
173
182
|
args = json.loads(tc["function"]["arguments"])
|
|
174
183
|
except (json.JSONDecodeError, TypeError):
|
|
175
184
|
args = {}
|
|
176
|
-
tool_calls_out.append(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
185
|
+
tool_calls_out.append(
|
|
186
|
+
{
|
|
187
|
+
"id": tc["id"],
|
|
188
|
+
"name": tc["function"]["name"],
|
|
189
|
+
"arguments": args,
|
|
190
|
+
}
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
result: dict[str, Any] = {
|
|
183
194
|
"text": text,
|
|
184
195
|
"meta": meta,
|
|
185
196
|
"tool_calls": tool_calls_out,
|
|
186
197
|
"stop_reason": stop_reason,
|
|
187
198
|
}
|
|
199
|
+
if choice["message"].get("reasoning_content") is not None:
|
|
200
|
+
result["reasoning_content"] = choice["message"]["reasoning_content"]
|
|
201
|
+
return result
|
|
@@ -88,8 +88,16 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
|
|
|
88
88
|
"model_name": model,
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
-
text = resp.choices[0].message.content
|
|
92
|
-
|
|
91
|
+
text = resp.choices[0].message.content or ""
|
|
92
|
+
reasoning_content = getattr(resp.choices[0].message, "reasoning_content", None)
|
|
93
|
+
|
|
94
|
+
if not text and reasoning_content:
|
|
95
|
+
text = reasoning_content
|
|
96
|
+
|
|
97
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
98
|
+
if reasoning_content is not None:
|
|
99
|
+
result["reasoning_content"] = reasoning_content
|
|
100
|
+
return result
|
|
93
101
|
|
|
94
102
|
# ------------------------------------------------------------------
|
|
95
103
|
# Tool use
|
|
@@ -152,15 +160,21 @@ class AsyncGroqDriver(CostMixin, AsyncDriver):
|
|
|
152
160
|
args = json.loads(tc.function.arguments)
|
|
153
161
|
except (json.JSONDecodeError, TypeError):
|
|
154
162
|
args = {}
|
|
155
|
-
tool_calls_out.append(
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
163
|
+
tool_calls_out.append(
|
|
164
|
+
{
|
|
165
|
+
"id": tc.id,
|
|
166
|
+
"name": tc.function.name,
|
|
167
|
+
"arguments": args,
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
result: dict[str, Any] = {
|
|
162
172
|
"text": text,
|
|
163
173
|
"meta": meta,
|
|
164
174
|
"tool_calls": tool_calls_out,
|
|
165
175
|
"stop_reason": stop_reason,
|
|
166
176
|
}
|
|
177
|
+
reasoning_content = getattr(choice.message, "reasoning_content", None)
|
|
178
|
+
if reasoning_content is not None:
|
|
179
|
+
result["reasoning_content"] = reasoning_content
|
|
180
|
+
return result
|
|
@@ -98,7 +98,12 @@ class AsyncLMStudioDriver(AsyncDriver):
|
|
|
98
98
|
if "choices" not in response_data or not response_data["choices"]:
|
|
99
99
|
raise ValueError(f"Unexpected response format: {response_data}")
|
|
100
100
|
|
|
101
|
-
|
|
101
|
+
message = response_data["choices"][0]["message"]
|
|
102
|
+
text = message.get("content") or ""
|
|
103
|
+
reasoning_content = message.get("reasoning_content")
|
|
104
|
+
|
|
105
|
+
if not text and reasoning_content:
|
|
106
|
+
text = reasoning_content
|
|
102
107
|
|
|
103
108
|
usage = response_data.get("usage", {})
|
|
104
109
|
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
@@ -114,7 +119,10 @@ class AsyncLMStudioDriver(AsyncDriver):
|
|
|
114
119
|
"model_name": merged_options.get("model", self.model),
|
|
115
120
|
}
|
|
116
121
|
|
|
117
|
-
|
|
122
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
123
|
+
if reasoning_content is not None:
|
|
124
|
+
result["reasoning_content"] = reasoning_content
|
|
125
|
+
return result
|
|
118
126
|
|
|
119
127
|
# -- Model management (LM Studio 0.4.0+) ----------------------------------
|
|
120
128
|
|
|
@@ -138,10 +138,11 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
138
138
|
|
|
139
139
|
message = resp["choices"][0]["message"]
|
|
140
140
|
text = message.get("content") or ""
|
|
141
|
+
reasoning_content = message.get("reasoning_content")
|
|
141
142
|
|
|
142
143
|
# Reasoning models may return content in reasoning_content when content is empty
|
|
143
|
-
if not text and
|
|
144
|
-
text =
|
|
144
|
+
if not text and reasoning_content:
|
|
145
|
+
text = reasoning_content
|
|
145
146
|
|
|
146
147
|
# Structured output fallback: if we used json_schema mode and got an
|
|
147
148
|
# empty response, retry with json_object mode and schema in the prompt.
|
|
@@ -184,8 +185,9 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
184
185
|
resp = fb_resp
|
|
185
186
|
fb_message = fb_resp["choices"][0]["message"]
|
|
186
187
|
text = fb_message.get("content") or ""
|
|
187
|
-
|
|
188
|
-
|
|
188
|
+
reasoning_content = fb_message.get("reasoning_content")
|
|
189
|
+
if not text and reasoning_content:
|
|
190
|
+
text = reasoning_content
|
|
189
191
|
|
|
190
192
|
total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
|
|
191
193
|
|
|
@@ -198,7 +200,10 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
198
200
|
"model_name": model,
|
|
199
201
|
}
|
|
200
202
|
|
|
201
|
-
|
|
203
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
204
|
+
if reasoning_content is not None:
|
|
205
|
+
result["reasoning_content"] = reasoning_content
|
|
206
|
+
return result
|
|
202
207
|
|
|
203
208
|
# ------------------------------------------------------------------
|
|
204
209
|
# Tool use
|
|
@@ -271,11 +276,12 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
271
276
|
}
|
|
272
277
|
|
|
273
278
|
choice = resp["choices"][0]
|
|
274
|
-
|
|
279
|
+
message = choice["message"]
|
|
280
|
+
text = message.get("content") or ""
|
|
275
281
|
stop_reason = choice.get("finish_reason")
|
|
276
282
|
|
|
277
283
|
tool_calls_out: list[dict[str, Any]] = []
|
|
278
|
-
for tc in
|
|
284
|
+
for tc in message.get("tool_calls", []):
|
|
279
285
|
try:
|
|
280
286
|
args = json.loads(tc["function"]["arguments"])
|
|
281
287
|
except (json.JSONDecodeError, TypeError):
|
|
@@ -288,13 +294,21 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
288
294
|
}
|
|
289
295
|
)
|
|
290
296
|
|
|
291
|
-
|
|
297
|
+
result: dict[str, Any] = {
|
|
292
298
|
"text": text,
|
|
293
299
|
"meta": meta,
|
|
294
300
|
"tool_calls": tool_calls_out,
|
|
295
301
|
"stop_reason": stop_reason,
|
|
296
302
|
}
|
|
297
303
|
|
|
304
|
+
# Preserve reasoning_content for reasoning models so the
|
|
305
|
+
# conversation loop can include it when sending the assistant
|
|
306
|
+
# message back (Moonshot requires it on subsequent requests).
|
|
307
|
+
if message.get("reasoning_content") is not None:
|
|
308
|
+
result["reasoning_content"] = message["reasoning_content"]
|
|
309
|
+
|
|
310
|
+
return result
|
|
311
|
+
|
|
298
312
|
# ------------------------------------------------------------------
|
|
299
313
|
# Streaming
|
|
300
314
|
# ------------------------------------------------------------------
|
|
@@ -325,6 +339,7 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
325
339
|
data["temperature"] = opts["temperature"]
|
|
326
340
|
|
|
327
341
|
full_text = ""
|
|
342
|
+
full_reasoning = ""
|
|
328
343
|
prompt_tokens = 0
|
|
329
344
|
completion_tokens = 0
|
|
330
345
|
|
|
@@ -359,9 +374,11 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
359
374
|
if choices:
|
|
360
375
|
delta = choices[0].get("delta", {})
|
|
361
376
|
content = delta.get("content") or ""
|
|
362
|
-
|
|
363
|
-
if
|
|
364
|
-
|
|
377
|
+
reasoning_chunk = delta.get("reasoning_content") or ""
|
|
378
|
+
if reasoning_chunk:
|
|
379
|
+
full_reasoning += reasoning_chunk
|
|
380
|
+
if not content and reasoning_chunk:
|
|
381
|
+
content = reasoning_chunk
|
|
365
382
|
if content:
|
|
366
383
|
full_text += content
|
|
367
384
|
yield {"type": "delta", "text": content}
|
|
@@ -369,7 +386,7 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
369
386
|
total_tokens = prompt_tokens + completion_tokens
|
|
370
387
|
total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
|
|
371
388
|
|
|
372
|
-
|
|
389
|
+
done_chunk: dict[str, Any] = {
|
|
373
390
|
"type": "done",
|
|
374
391
|
"text": full_text,
|
|
375
392
|
"meta": {
|
|
@@ -381,3 +398,6 @@ class AsyncMoonshotDriver(CostMixin, AsyncDriver):
|
|
|
381
398
|
"model_name": model,
|
|
382
399
|
},
|
|
383
400
|
}
|
|
401
|
+
if full_reasoning:
|
|
402
|
+
done_chunk["reasoning_content"] = full_reasoning
|
|
403
|
+
yield done_chunk
|