prompture 0.0.47.dev2__py3-none-any.whl → 0.0.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/_version.py +2 -2
- prompture/async_conversation.py +16 -0
- prompture/conversation.py +16 -0
- prompture/drivers/async_claude_driver.py +32 -7
- prompture/drivers/async_grok_driver.py +23 -9
- prompture/drivers/async_groq_driver.py +23 -9
- prompture/drivers/async_lmstudio_driver.py +10 -2
- prompture/drivers/async_moonshot_driver.py +20 -9
- prompture/drivers/async_ollama_driver.py +27 -3
- prompture/drivers/async_openrouter_driver.py +43 -17
- prompture/drivers/claude_driver.py +43 -7
- prompture/drivers/grok_driver.py +23 -9
- prompture/drivers/groq_driver.py +23 -9
- prompture/drivers/lmstudio_driver.py +11 -2
- prompture/drivers/moonshot_driver.py +27 -16
- prompture/drivers/ollama_driver.py +42 -9
- prompture/drivers/openrouter_driver.py +34 -10
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/METADATA +1 -1
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/RECORD +23 -23
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/WHEEL +0 -0
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.47.dev2.dist-info → prompture-0.0.48.dist-info}/top_level.txt +0 -0
|
@@ -131,6 +131,13 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
131
131
|
resp = client.messages.create(**common_kwargs)
|
|
132
132
|
text = resp.content[0].text
|
|
133
133
|
|
|
134
|
+
# Extract reasoning/thinking content from content blocks
|
|
135
|
+
reasoning_content = self._extract_thinking(resp.content)
|
|
136
|
+
|
|
137
|
+
# Fallback: use reasoning as text if content is empty
|
|
138
|
+
if not text and reasoning_content:
|
|
139
|
+
text = reasoning_content
|
|
140
|
+
|
|
134
141
|
# Extract token usage from Claude response
|
|
135
142
|
prompt_tokens = resp.usage.input_tokens
|
|
136
143
|
completion_tokens = resp.usage.output_tokens
|
|
@@ -149,12 +156,26 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
149
156
|
"model_name": model,
|
|
150
157
|
}
|
|
151
158
|
|
|
152
|
-
|
|
159
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
160
|
+
if reasoning_content is not None:
|
|
161
|
+
result["reasoning_content"] = reasoning_content
|
|
162
|
+
return result
|
|
153
163
|
|
|
154
164
|
# ------------------------------------------------------------------
|
|
155
165
|
# Helpers
|
|
156
166
|
# ------------------------------------------------------------------
|
|
157
167
|
|
|
168
|
+
@staticmethod
|
|
169
|
+
def _extract_thinking(content_blocks: list[Any]) -> str | None:
|
|
170
|
+
"""Extract thinking/reasoning text from Claude content blocks."""
|
|
171
|
+
parts: list[str] = []
|
|
172
|
+
for block in content_blocks:
|
|
173
|
+
if getattr(block, "type", None) == "thinking":
|
|
174
|
+
thinking_text = getattr(block, "thinking", "")
|
|
175
|
+
if thinking_text:
|
|
176
|
+
parts.append(thinking_text)
|
|
177
|
+
return "\n".join(parts) if parts else None
|
|
178
|
+
|
|
158
179
|
def _extract_system_and_messages(
|
|
159
180
|
self, messages: list[dict[str, Any]]
|
|
160
181
|
) -> tuple[str | None, list[dict[str, Any]]]:
|
|
@@ -246,12 +267,17 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
246
267
|
"arguments": block.input,
|
|
247
268
|
})
|
|
248
269
|
|
|
249
|
-
|
|
270
|
+
reasoning_content = self._extract_thinking(resp.content)
|
|
271
|
+
|
|
272
|
+
result: dict[str, Any] = {
|
|
250
273
|
"text": text,
|
|
251
274
|
"meta": meta,
|
|
252
275
|
"tool_calls": tool_calls_out,
|
|
253
276
|
"stop_reason": resp.stop_reason,
|
|
254
277
|
}
|
|
278
|
+
if reasoning_content is not None:
|
|
279
|
+
result["reasoning_content"] = reasoning_content
|
|
280
|
+
return result
|
|
255
281
|
|
|
256
282
|
# ------------------------------------------------------------------
|
|
257
283
|
# Streaming
|
|
@@ -282,6 +308,7 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
282
308
|
kwargs["system"] = system_content
|
|
283
309
|
|
|
284
310
|
full_text = ""
|
|
311
|
+
full_reasoning = ""
|
|
285
312
|
prompt_tokens = 0
|
|
286
313
|
completion_tokens = 0
|
|
287
314
|
|
|
@@ -289,10 +316,16 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
289
316
|
for event in stream:
|
|
290
317
|
if hasattr(event, "type"):
|
|
291
318
|
if event.type == "content_block_delta" and hasattr(event, "delta"):
|
|
292
|
-
|
|
293
|
-
if
|
|
294
|
-
|
|
295
|
-
|
|
319
|
+
delta_type = getattr(event.delta, "type", "")
|
|
320
|
+
if delta_type == "thinking_delta":
|
|
321
|
+
thinking_text = getattr(event.delta, "thinking", "")
|
|
322
|
+
if thinking_text:
|
|
323
|
+
full_reasoning += thinking_text
|
|
324
|
+
else:
|
|
325
|
+
delta_text = getattr(event.delta, "text", "")
|
|
326
|
+
if delta_text:
|
|
327
|
+
full_text += delta_text
|
|
328
|
+
yield {"type": "delta", "text": delta_text}
|
|
296
329
|
elif event.type == "message_delta" and hasattr(event, "usage"):
|
|
297
330
|
completion_tokens = getattr(event.usage, "output_tokens", 0)
|
|
298
331
|
elif event.type == "message_start" and hasattr(event, "message"):
|
|
@@ -303,7 +336,7 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
303
336
|
total_tokens = prompt_tokens + completion_tokens
|
|
304
337
|
total_cost = self._calculate_cost("claude", model, prompt_tokens, completion_tokens)
|
|
305
338
|
|
|
306
|
-
|
|
339
|
+
done_chunk: dict[str, Any] = {
|
|
307
340
|
"type": "done",
|
|
308
341
|
"text": full_text,
|
|
309
342
|
"meta": {
|
|
@@ -315,3 +348,6 @@ class ClaudeDriver(CostMixin, Driver):
|
|
|
315
348
|
"model_name": model,
|
|
316
349
|
},
|
|
317
350
|
}
|
|
351
|
+
if full_reasoning:
|
|
352
|
+
done_chunk["reasoning_content"] = full_reasoning
|
|
353
|
+
yield done_chunk
|
prompture/drivers/grok_driver.py
CHANGED
|
@@ -154,8 +154,17 @@ class GrokDriver(CostMixin, Driver):
|
|
|
154
154
|
"model_name": model,
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
|
|
157
|
+
message = resp["choices"][0]["message"]
|
|
158
|
+
text = message.get("content") or ""
|
|
159
|
+
reasoning_content = message.get("reasoning_content")
|
|
160
|
+
|
|
161
|
+
if not text and reasoning_content:
|
|
162
|
+
text = reasoning_content
|
|
163
|
+
|
|
164
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
165
|
+
if reasoning_content is not None:
|
|
166
|
+
result["reasoning_content"] = reasoning_content
|
|
167
|
+
return result
|
|
159
168
|
|
|
160
169
|
# ------------------------------------------------------------------
|
|
161
170
|
# Tool use
|
|
@@ -227,15 +236,20 @@ class GrokDriver(CostMixin, Driver):
|
|
|
227
236
|
args = json.loads(tc["function"]["arguments"])
|
|
228
237
|
except (json.JSONDecodeError, TypeError):
|
|
229
238
|
args = {}
|
|
230
|
-
tool_calls_out.append(
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
239
|
+
tool_calls_out.append(
|
|
240
|
+
{
|
|
241
|
+
"id": tc["id"],
|
|
242
|
+
"name": tc["function"]["name"],
|
|
243
|
+
"arguments": args,
|
|
244
|
+
}
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
result: dict[str, Any] = {
|
|
237
248
|
"text": text,
|
|
238
249
|
"meta": meta,
|
|
239
250
|
"tool_calls": tool_calls_out,
|
|
240
251
|
"stop_reason": stop_reason,
|
|
241
252
|
}
|
|
253
|
+
if choice["message"].get("reasoning_content") is not None:
|
|
254
|
+
result["reasoning_content"] = choice["message"]["reasoning_content"]
|
|
255
|
+
return result
|
prompture/drivers/groq_driver.py
CHANGED
|
@@ -122,8 +122,16 @@ class GroqDriver(CostMixin, Driver):
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
# Extract generated text
|
|
125
|
-
text = resp.choices[0].message.content
|
|
126
|
-
|
|
125
|
+
text = resp.choices[0].message.content or ""
|
|
126
|
+
reasoning_content = getattr(resp.choices[0].message, "reasoning_content", None)
|
|
127
|
+
|
|
128
|
+
if not text and reasoning_content:
|
|
129
|
+
text = reasoning_content
|
|
130
|
+
|
|
131
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
132
|
+
if reasoning_content is not None:
|
|
133
|
+
result["reasoning_content"] = reasoning_content
|
|
134
|
+
return result
|
|
127
135
|
|
|
128
136
|
# ------------------------------------------------------------------
|
|
129
137
|
# Tool use
|
|
@@ -186,15 +194,21 @@ class GroqDriver(CostMixin, Driver):
|
|
|
186
194
|
args = json.loads(tc.function.arguments)
|
|
187
195
|
except (json.JSONDecodeError, TypeError):
|
|
188
196
|
args = {}
|
|
189
|
-
tool_calls_out.append(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
197
|
+
tool_calls_out.append(
|
|
198
|
+
{
|
|
199
|
+
"id": tc.id,
|
|
200
|
+
"name": tc.function.name,
|
|
201
|
+
"arguments": args,
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
result: dict[str, Any] = {
|
|
196
206
|
"text": text,
|
|
197
207
|
"meta": meta,
|
|
198
208
|
"tool_calls": tool_calls_out,
|
|
199
209
|
"stop_reason": stop_reason,
|
|
200
210
|
}
|
|
211
|
+
reasoning_content = getattr(choice.message, "reasoning_content", None)
|
|
212
|
+
if reasoning_content is not None:
|
|
213
|
+
result["reasoning_content"] = reasoning_content
|
|
214
|
+
return result
|
|
@@ -123,7 +123,13 @@ class LMStudioDriver(Driver):
|
|
|
123
123
|
raise RuntimeError(f"LM Studio request failed: {e}") from e
|
|
124
124
|
|
|
125
125
|
# Extract text
|
|
126
|
-
|
|
126
|
+
message = response_data["choices"][0]["message"]
|
|
127
|
+
text = message.get("content") or ""
|
|
128
|
+
reasoning_content = message.get("reasoning_content")
|
|
129
|
+
|
|
130
|
+
# Reasoning models (e.g. DeepSeek R1) may return content in reasoning_content
|
|
131
|
+
if not text and reasoning_content:
|
|
132
|
+
text = reasoning_content
|
|
127
133
|
|
|
128
134
|
# Meta info
|
|
129
135
|
usage = response_data.get("usage", {})
|
|
@@ -140,7 +146,10 @@ class LMStudioDriver(Driver):
|
|
|
140
146
|
"model_name": merged_options.get("model", self.model),
|
|
141
147
|
}
|
|
142
148
|
|
|
143
|
-
|
|
149
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
150
|
+
if reasoning_content is not None:
|
|
151
|
+
result["reasoning_content"] = reasoning_content
|
|
152
|
+
return result
|
|
144
153
|
|
|
145
154
|
# -- Model management (LM Studio 0.4.0+) ----------------------------------
|
|
146
155
|
|
|
@@ -167,7 +167,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
167
167
|
using_json_schema=bool(options.get("json_schema")),
|
|
168
168
|
)
|
|
169
169
|
|
|
170
|
-
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
170
|
+
opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
|
|
171
171
|
opts = self._clamp_temperature(opts)
|
|
172
172
|
|
|
173
173
|
data: dict[str, Any] = {
|
|
@@ -210,7 +210,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
210
210
|
f"{self.base_url}/chat/completions",
|
|
211
211
|
headers=self.headers,
|
|
212
212
|
json=data,
|
|
213
|
-
timeout=
|
|
213
|
+
timeout=opts.get("timeout", 300),
|
|
214
214
|
)
|
|
215
215
|
response.raise_for_status()
|
|
216
216
|
resp = response.json()
|
|
@@ -228,10 +228,11 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
228
228
|
|
|
229
229
|
message = resp["choices"][0]["message"]
|
|
230
230
|
text = message.get("content") or ""
|
|
231
|
+
reasoning_content = message.get("reasoning_content")
|
|
231
232
|
|
|
232
233
|
# Reasoning models may return content in reasoning_content when content is empty
|
|
233
|
-
if not text and
|
|
234
|
-
text =
|
|
234
|
+
if not text and reasoning_content:
|
|
235
|
+
text = reasoning_content
|
|
235
236
|
|
|
236
237
|
# Structured output fallback: if we used json_schema mode and got an
|
|
237
238
|
# empty response, retry with json_object mode and schema in the prompt.
|
|
@@ -260,7 +261,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
260
261
|
f"{self.base_url}/chat/completions",
|
|
261
262
|
headers=self.headers,
|
|
262
263
|
json=fallback_data,
|
|
263
|
-
timeout=
|
|
264
|
+
timeout=opts.get("timeout", 300),
|
|
264
265
|
)
|
|
265
266
|
fb_response.raise_for_status()
|
|
266
267
|
fb_resp = fb_response.json()
|
|
@@ -275,8 +276,9 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
275
276
|
resp = fb_resp
|
|
276
277
|
fb_message = fb_resp["choices"][0]["message"]
|
|
277
278
|
text = fb_message.get("content") or ""
|
|
278
|
-
|
|
279
|
-
|
|
279
|
+
reasoning_content = fb_message.get("reasoning_content")
|
|
280
|
+
if not text and reasoning_content:
|
|
281
|
+
text = reasoning_content
|
|
280
282
|
|
|
281
283
|
total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
|
|
282
284
|
|
|
@@ -289,7 +291,10 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
289
291
|
"model_name": model,
|
|
290
292
|
}
|
|
291
293
|
|
|
292
|
-
|
|
294
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
295
|
+
if reasoning_content is not None:
|
|
296
|
+
result["reasoning_content"] = reasoning_content
|
|
297
|
+
return result
|
|
293
298
|
|
|
294
299
|
# ------------------------------------------------------------------
|
|
295
300
|
# Tool use
|
|
@@ -312,7 +317,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
312
317
|
|
|
313
318
|
self._validate_model_capabilities("moonshot", model, using_tool_use=True)
|
|
314
319
|
|
|
315
|
-
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
320
|
+
opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
|
|
316
321
|
opts = self._clamp_temperature(opts)
|
|
317
322
|
|
|
318
323
|
sanitized_tools = self._sanitize_tools(tools)
|
|
@@ -337,7 +342,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
337
342
|
f"{self.base_url}/chat/completions",
|
|
338
343
|
headers=self.headers,
|
|
339
344
|
json=data,
|
|
340
|
-
timeout=
|
|
345
|
+
timeout=opts.get("timeout", 300),
|
|
341
346
|
)
|
|
342
347
|
response.raise_for_status()
|
|
343
348
|
resp = response.json()
|
|
@@ -415,7 +420,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
415
420
|
tokens_param = model_config["tokens_param"]
|
|
416
421
|
supports_temperature = model_config["supports_temperature"]
|
|
417
422
|
|
|
418
|
-
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
423
|
+
opts = {"temperature": 1.0, "max_tokens": 512, "timeout": 300, **options}
|
|
419
424
|
opts = self._clamp_temperature(opts)
|
|
420
425
|
|
|
421
426
|
data: dict[str, Any] = {
|
|
@@ -434,11 +439,12 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
434
439
|
headers=self.headers,
|
|
435
440
|
json=data,
|
|
436
441
|
stream=True,
|
|
437
|
-
timeout=
|
|
442
|
+
timeout=opts.get("timeout", 300),
|
|
438
443
|
)
|
|
439
444
|
response.raise_for_status()
|
|
440
445
|
|
|
441
446
|
full_text = ""
|
|
447
|
+
full_reasoning = ""
|
|
442
448
|
prompt_tokens = 0
|
|
443
449
|
completion_tokens = 0
|
|
444
450
|
|
|
@@ -462,9 +468,11 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
462
468
|
if choices:
|
|
463
469
|
delta = choices[0].get("delta", {})
|
|
464
470
|
content = delta.get("content") or ""
|
|
465
|
-
|
|
466
|
-
if
|
|
467
|
-
|
|
471
|
+
reasoning_chunk = delta.get("reasoning_content") or ""
|
|
472
|
+
if reasoning_chunk:
|
|
473
|
+
full_reasoning += reasoning_chunk
|
|
474
|
+
if not content and reasoning_chunk:
|
|
475
|
+
content = reasoning_chunk
|
|
468
476
|
if content:
|
|
469
477
|
full_text += content
|
|
470
478
|
yield {"type": "delta", "text": content}
|
|
@@ -472,7 +480,7 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
472
480
|
total_tokens = prompt_tokens + completion_tokens
|
|
473
481
|
total_cost = self._calculate_cost("moonshot", model, prompt_tokens, completion_tokens)
|
|
474
482
|
|
|
475
|
-
|
|
483
|
+
done_chunk: dict[str, Any] = {
|
|
476
484
|
"type": "done",
|
|
477
485
|
"text": full_text,
|
|
478
486
|
"meta": {
|
|
@@ -484,3 +492,6 @@ class MoonshotDriver(CostMixin, Driver):
|
|
|
484
492
|
"model_name": model,
|
|
485
493
|
},
|
|
486
494
|
}
|
|
495
|
+
if full_reasoning:
|
|
496
|
+
done_chunk["reasoning_content"] = full_reasoning
|
|
497
|
+
yield done_chunk
|
|
@@ -84,7 +84,7 @@ class OllamaDriver(Driver):
|
|
|
84
84
|
logger.debug(f"Sending request to Ollama endpoint: {self.endpoint}")
|
|
85
85
|
logger.debug(f"Request payload: {payload}")
|
|
86
86
|
|
|
87
|
-
r = requests.post(self.endpoint, json=payload, timeout=
|
|
87
|
+
r = requests.post(self.endpoint, json=payload, timeout=merged_options.get("timeout", 300))
|
|
88
88
|
logger.debug(f"Response status code: {r.status_code}")
|
|
89
89
|
|
|
90
90
|
r.raise_for_status()
|
|
@@ -131,7 +131,17 @@ class OllamaDriver(Driver):
|
|
|
131
131
|
}
|
|
132
132
|
|
|
133
133
|
# Ollama returns text in "response"
|
|
134
|
-
|
|
134
|
+
text = response_data.get("response", "")
|
|
135
|
+
reasoning_content = response_data.get("thinking") or None
|
|
136
|
+
|
|
137
|
+
# Reasoning models may return content only in thinking
|
|
138
|
+
if not text and reasoning_content:
|
|
139
|
+
text = reasoning_content
|
|
140
|
+
|
|
141
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
142
|
+
if reasoning_content is not None:
|
|
143
|
+
result["reasoning_content"] = reasoning_content
|
|
144
|
+
return result
|
|
135
145
|
|
|
136
146
|
# ------------------------------------------------------------------
|
|
137
147
|
# Tool use
|
|
@@ -166,7 +176,7 @@ class OllamaDriver(Driver):
|
|
|
166
176
|
|
|
167
177
|
try:
|
|
168
178
|
logger.debug(f"Sending tool use request to Ollama endpoint: {chat_endpoint}")
|
|
169
|
-
r = requests.post(chat_endpoint, json=payload, timeout=
|
|
179
|
+
r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300))
|
|
170
180
|
r.raise_for_status()
|
|
171
181
|
response_data = r.json()
|
|
172
182
|
|
|
@@ -196,8 +206,12 @@ class OllamaDriver(Driver):
|
|
|
196
206
|
|
|
197
207
|
message = response_data.get("message", {})
|
|
198
208
|
text = message.get("content") or ""
|
|
209
|
+
reasoning_content = message.get("thinking") or None
|
|
199
210
|
stop_reason = response_data.get("done_reason", "stop")
|
|
200
211
|
|
|
212
|
+
if not text and reasoning_content:
|
|
213
|
+
text = reasoning_content
|
|
214
|
+
|
|
201
215
|
tool_calls_out: list[dict[str, Any]] = []
|
|
202
216
|
for tc in message.get("tool_calls", []):
|
|
203
217
|
func = tc.get("function", {})
|
|
@@ -215,12 +229,15 @@ class OllamaDriver(Driver):
|
|
|
215
229
|
"arguments": args,
|
|
216
230
|
})
|
|
217
231
|
|
|
218
|
-
|
|
232
|
+
result: dict[str, Any] = {
|
|
219
233
|
"text": text,
|
|
220
234
|
"meta": meta,
|
|
221
235
|
"tool_calls": tool_calls_out,
|
|
222
236
|
"stop_reason": stop_reason,
|
|
223
237
|
}
|
|
238
|
+
if reasoning_content is not None:
|
|
239
|
+
result["reasoning_content"] = reasoning_content
|
|
240
|
+
return result
|
|
224
241
|
|
|
225
242
|
# ------------------------------------------------------------------
|
|
226
243
|
# Streaming
|
|
@@ -255,10 +272,11 @@ class OllamaDriver(Driver):
|
|
|
255
272
|
payload["top_k"] = merged_options["top_k"]
|
|
256
273
|
|
|
257
274
|
full_text = ""
|
|
275
|
+
full_reasoning = ""
|
|
258
276
|
prompt_tokens = 0
|
|
259
277
|
completion_tokens = 0
|
|
260
278
|
|
|
261
|
-
r = requests.post(chat_endpoint, json=payload, timeout=
|
|
279
|
+
r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300), stream=True)
|
|
262
280
|
r.raise_for_status()
|
|
263
281
|
|
|
264
282
|
for line in r.iter_lines():
|
|
@@ -269,13 +287,17 @@ class OllamaDriver(Driver):
|
|
|
269
287
|
prompt_tokens = chunk.get("prompt_eval_count", 0)
|
|
270
288
|
completion_tokens = chunk.get("eval_count", 0)
|
|
271
289
|
else:
|
|
272
|
-
|
|
290
|
+
msg = chunk.get("message", {})
|
|
291
|
+
thinking = msg.get("thinking", "")
|
|
292
|
+
if thinking:
|
|
293
|
+
full_reasoning += thinking
|
|
294
|
+
content = msg.get("content", "")
|
|
273
295
|
if content:
|
|
274
296
|
full_text += content
|
|
275
297
|
yield {"type": "delta", "text": content}
|
|
276
298
|
|
|
277
299
|
total_tokens = prompt_tokens + completion_tokens
|
|
278
|
-
|
|
300
|
+
done_chunk: dict[str, Any] = {
|
|
279
301
|
"type": "done",
|
|
280
302
|
"text": full_text,
|
|
281
303
|
"meta": {
|
|
@@ -287,6 +309,9 @@ class OllamaDriver(Driver):
|
|
|
287
309
|
"model_name": merged_options.get("model", self.model),
|
|
288
310
|
},
|
|
289
311
|
}
|
|
312
|
+
if full_reasoning:
|
|
313
|
+
done_chunk["reasoning_content"] = full_reasoning
|
|
314
|
+
yield done_chunk
|
|
290
315
|
|
|
291
316
|
def generate_messages(self, messages: list[dict[str, Any]], options: dict[str, Any]) -> dict[str, Any]:
|
|
292
317
|
"""Use Ollama's /api/chat endpoint for multi-turn conversations."""
|
|
@@ -318,7 +343,7 @@ class OllamaDriver(Driver):
|
|
|
318
343
|
|
|
319
344
|
try:
|
|
320
345
|
logger.debug(f"Sending chat request to Ollama endpoint: {chat_endpoint}")
|
|
321
|
-
r = requests.post(chat_endpoint, json=payload, timeout=
|
|
346
|
+
r = requests.post(chat_endpoint, json=payload, timeout=merged_options.get("timeout", 300))
|
|
322
347
|
r.raise_for_status()
|
|
323
348
|
response_data = r.json()
|
|
324
349
|
|
|
@@ -349,4 +374,12 @@ class OllamaDriver(Driver):
|
|
|
349
374
|
# Chat endpoint returns response in message.content
|
|
350
375
|
message = response_data.get("message", {})
|
|
351
376
|
text = message.get("content", "")
|
|
352
|
-
|
|
377
|
+
reasoning_content = message.get("thinking") or None
|
|
378
|
+
|
|
379
|
+
if not text and reasoning_content:
|
|
380
|
+
text = reasoning_content
|
|
381
|
+
|
|
382
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
383
|
+
if reasoning_content is not None:
|
|
384
|
+
result["reasoning_content"] = reasoning_content
|
|
385
|
+
return result
|
|
@@ -181,8 +181,18 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
181
181
|
"model_name": model,
|
|
182
182
|
}
|
|
183
183
|
|
|
184
|
-
|
|
185
|
-
|
|
184
|
+
message = resp["choices"][0]["message"]
|
|
185
|
+
text = message.get("content") or ""
|
|
186
|
+
reasoning_content = message.get("reasoning_content")
|
|
187
|
+
|
|
188
|
+
# Reasoning models may return content in reasoning_content when content is empty
|
|
189
|
+
if not text and reasoning_content:
|
|
190
|
+
text = reasoning_content
|
|
191
|
+
|
|
192
|
+
result: dict[str, Any] = {"text": text, "meta": meta}
|
|
193
|
+
if reasoning_content is not None:
|
|
194
|
+
result["reasoning_content"] = reasoning_content
|
|
195
|
+
return result
|
|
186
196
|
|
|
187
197
|
# ------------------------------------------------------------------
|
|
188
198
|
# Tool use
|
|
@@ -257,18 +267,23 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
257
267
|
args = json.loads(tc["function"]["arguments"])
|
|
258
268
|
except (json.JSONDecodeError, TypeError):
|
|
259
269
|
args = {}
|
|
260
|
-
tool_calls_out.append(
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
270
|
+
tool_calls_out.append(
|
|
271
|
+
{
|
|
272
|
+
"id": tc["id"],
|
|
273
|
+
"name": tc["function"]["name"],
|
|
274
|
+
"arguments": args,
|
|
275
|
+
}
|
|
276
|
+
)
|
|
265
277
|
|
|
266
|
-
|
|
278
|
+
result: dict[str, Any] = {
|
|
267
279
|
"text": text,
|
|
268
280
|
"meta": meta,
|
|
269
281
|
"tool_calls": tool_calls_out,
|
|
270
282
|
"stop_reason": stop_reason,
|
|
271
283
|
}
|
|
284
|
+
if choice["message"].get("reasoning_content") is not None:
|
|
285
|
+
result["reasoning_content"] = choice["message"]["reasoning_content"]
|
|
286
|
+
return result
|
|
272
287
|
|
|
273
288
|
# ------------------------------------------------------------------
|
|
274
289
|
# Streaming
|
|
@@ -311,13 +326,14 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
311
326
|
response.raise_for_status()
|
|
312
327
|
|
|
313
328
|
full_text = ""
|
|
329
|
+
full_reasoning = ""
|
|
314
330
|
prompt_tokens = 0
|
|
315
331
|
completion_tokens = 0
|
|
316
332
|
|
|
317
333
|
for line in response.iter_lines(decode_unicode=True):
|
|
318
334
|
if not line or not line.startswith("data: "):
|
|
319
335
|
continue
|
|
320
|
-
payload = line[len("data: "):]
|
|
336
|
+
payload = line[len("data: ") :]
|
|
321
337
|
if payload.strip() == "[DONE]":
|
|
322
338
|
break
|
|
323
339
|
try:
|
|
@@ -335,6 +351,11 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
335
351
|
if choices:
|
|
336
352
|
delta = choices[0].get("delta", {})
|
|
337
353
|
content = delta.get("content", "")
|
|
354
|
+
reasoning_chunk = delta.get("reasoning_content") or ""
|
|
355
|
+
if reasoning_chunk:
|
|
356
|
+
full_reasoning += reasoning_chunk
|
|
357
|
+
if not content and reasoning_chunk:
|
|
358
|
+
content = reasoning_chunk
|
|
338
359
|
if content:
|
|
339
360
|
full_text += content
|
|
340
361
|
yield {"type": "delta", "text": content}
|
|
@@ -342,7 +363,7 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
342
363
|
total_tokens = prompt_tokens + completion_tokens
|
|
343
364
|
total_cost = self._calculate_cost("openrouter", model, prompt_tokens, completion_tokens)
|
|
344
365
|
|
|
345
|
-
|
|
366
|
+
done_chunk: dict[str, Any] = {
|
|
346
367
|
"type": "done",
|
|
347
368
|
"text": full_text,
|
|
348
369
|
"meta": {
|
|
@@ -354,3 +375,6 @@ class OpenRouterDriver(CostMixin, Driver):
|
|
|
354
375
|
"model_name": model,
|
|
355
376
|
},
|
|
356
377
|
}
|
|
378
|
+
if full_reasoning:
|
|
379
|
+
done_chunk["reasoning_content"] = full_reasoning
|
|
380
|
+
yield done_chunk
|