prompture 0.0.40.dev1__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompture/_version.py +2 -2
- prompture/agent.py +11 -11
- prompture/async_agent.py +11 -11
- prompture/async_groups.py +63 -0
- prompture/cost_mixin.py +25 -0
- prompture/drivers/__init__.py +39 -0
- prompture/drivers/async_azure_driver.py +3 -2
- prompture/drivers/async_modelscope_driver.py +286 -0
- prompture/drivers/async_moonshot_driver.py +312 -0
- prompture/drivers/async_openai_driver.py +3 -2
- prompture/drivers/async_openrouter_driver.py +192 -3
- prompture/drivers/async_registry.py +30 -0
- prompture/drivers/async_zai_driver.py +303 -0
- prompture/drivers/azure_driver.py +3 -2
- prompture/drivers/modelscope_driver.py +303 -0
- prompture/drivers/moonshot_driver.py +342 -0
- prompture/drivers/openai_driver.py +3 -2
- prompture/drivers/openrouter_driver.py +244 -40
- prompture/drivers/zai_driver.py +318 -0
- prompture/groups.py +42 -0
- prompture/model_rates.py +2 -0
- prompture/settings.py +16 -1
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/METADATA +1 -1
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/RECORD +28 -22
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/WHEEL +0 -0
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/entry_points.txt +0 -0
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/licenses/LICENSE +0 -0
- {prompture-0.0.40.dev1.dist-info → prompture-0.0.42.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""Async Z.ai (Zhipu AI) driver using httpx.
|
|
2
|
+
|
|
3
|
+
All pricing comes from models.dev (provider: "zai") — no hardcoded pricing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from ..async_driver import AsyncDriver
|
|
16
|
+
from ..cost_mixin import CostMixin, prepare_strict_schema
|
|
17
|
+
from .zai_driver import ZaiDriver
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AsyncZaiDriver(CostMixin, AsyncDriver):
|
|
21
|
+
supports_json_mode = True
|
|
22
|
+
supports_json_schema = True
|
|
23
|
+
supports_tool_use = True
|
|
24
|
+
supports_streaming = True
|
|
25
|
+
supports_vision = True
|
|
26
|
+
|
|
27
|
+
MODEL_PRICING = ZaiDriver.MODEL_PRICING
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
api_key: str | None = None,
|
|
32
|
+
model: str = "glm-4.7",
|
|
33
|
+
endpoint: str = "https://api.z.ai/api/paas/v4",
|
|
34
|
+
):
|
|
35
|
+
self.api_key = api_key or os.getenv("ZHIPU_API_KEY")
|
|
36
|
+
if not self.api_key:
|
|
37
|
+
raise ValueError("Zhipu API key not found. Set ZHIPU_API_KEY env var.")
|
|
38
|
+
self.model = model
|
|
39
|
+
self.base_url = endpoint.rstrip("/")
|
|
40
|
+
self.headers = {
|
|
41
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
supports_messages = True
|
|
46
|
+
|
|
47
|
+
def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
48
|
+
from .vision_helpers import _prepare_openai_vision_messages
|
|
49
|
+
|
|
50
|
+
return _prepare_openai_vision_messages(messages)
|
|
51
|
+
|
|
52
|
+
async def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
|
|
53
|
+
messages = [{"role": "user", "content": prompt}]
|
|
54
|
+
return await self._do_generate(messages, options)
|
|
55
|
+
|
|
56
|
+
async def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
57
|
+
return await self._do_generate(self._prepare_messages(messages), options)
|
|
58
|
+
|
|
59
|
+
async def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
60
|
+
model = options.get("model", self.model)
|
|
61
|
+
|
|
62
|
+
model_config = self._get_model_config("zai", model)
|
|
63
|
+
tokens_param = model_config["tokens_param"]
|
|
64
|
+
supports_temperature = model_config["supports_temperature"]
|
|
65
|
+
|
|
66
|
+
self._validate_model_capabilities(
|
|
67
|
+
"zai",
|
|
68
|
+
model,
|
|
69
|
+
using_json_schema=bool(options.get("json_schema")),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
73
|
+
|
|
74
|
+
data: dict[str, Any] = {
|
|
75
|
+
"model": model,
|
|
76
|
+
"messages": messages,
|
|
77
|
+
}
|
|
78
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
79
|
+
|
|
80
|
+
if supports_temperature and "temperature" in opts:
|
|
81
|
+
data["temperature"] = opts["temperature"]
|
|
82
|
+
|
|
83
|
+
if options.get("json_mode"):
|
|
84
|
+
json_schema = options.get("json_schema")
|
|
85
|
+
if json_schema:
|
|
86
|
+
schema_copy = prepare_strict_schema(json_schema)
|
|
87
|
+
data["response_format"] = {
|
|
88
|
+
"type": "json_schema",
|
|
89
|
+
"json_schema": {
|
|
90
|
+
"name": "extraction",
|
|
91
|
+
"strict": True,
|
|
92
|
+
"schema": schema_copy,
|
|
93
|
+
},
|
|
94
|
+
}
|
|
95
|
+
else:
|
|
96
|
+
data["response_format"] = {"type": "json_object"}
|
|
97
|
+
|
|
98
|
+
async with httpx.AsyncClient() as client:
|
|
99
|
+
try:
|
|
100
|
+
response = await client.post(
|
|
101
|
+
f"{self.base_url}/chat/completions",
|
|
102
|
+
headers=self.headers,
|
|
103
|
+
json=data,
|
|
104
|
+
timeout=120,
|
|
105
|
+
)
|
|
106
|
+
response.raise_for_status()
|
|
107
|
+
resp = response.json()
|
|
108
|
+
except httpx.HTTPStatusError as e:
|
|
109
|
+
error_msg = f"Z.ai API request failed: {e!s}"
|
|
110
|
+
raise RuntimeError(error_msg) from e
|
|
111
|
+
except Exception as e:
|
|
112
|
+
raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
|
|
113
|
+
|
|
114
|
+
usage = resp.get("usage", {})
|
|
115
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
116
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
117
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
118
|
+
|
|
119
|
+
total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
|
|
120
|
+
|
|
121
|
+
meta = {
|
|
122
|
+
"prompt_tokens": prompt_tokens,
|
|
123
|
+
"completion_tokens": completion_tokens,
|
|
124
|
+
"total_tokens": total_tokens,
|
|
125
|
+
"cost": round(total_cost, 6),
|
|
126
|
+
"raw_response": resp,
|
|
127
|
+
"model_name": model,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
text = resp["choices"][0]["message"]["content"]
|
|
131
|
+
return {"text": text, "meta": meta}
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Tool use
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
async def generate_messages_with_tools(
|
|
138
|
+
self,
|
|
139
|
+
messages: list[dict[str, Any]],
|
|
140
|
+
tools: list[dict[str, Any]],
|
|
141
|
+
options: dict[str, Any],
|
|
142
|
+
) -> dict[str, Any]:
|
|
143
|
+
"""Generate a response that may include tool calls."""
|
|
144
|
+
model = options.get("model", self.model)
|
|
145
|
+
model_config = self._get_model_config("zai", model)
|
|
146
|
+
tokens_param = model_config["tokens_param"]
|
|
147
|
+
supports_temperature = model_config["supports_temperature"]
|
|
148
|
+
|
|
149
|
+
self._validate_model_capabilities("zai", model, using_tool_use=True)
|
|
150
|
+
|
|
151
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
152
|
+
|
|
153
|
+
data: dict[str, Any] = {
|
|
154
|
+
"model": model,
|
|
155
|
+
"messages": messages,
|
|
156
|
+
"tools": tools,
|
|
157
|
+
}
|
|
158
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
159
|
+
|
|
160
|
+
if supports_temperature and "temperature" in opts:
|
|
161
|
+
data["temperature"] = opts["temperature"]
|
|
162
|
+
|
|
163
|
+
if "tool_choice" in options:
|
|
164
|
+
data["tool_choice"] = options["tool_choice"]
|
|
165
|
+
|
|
166
|
+
async with httpx.AsyncClient() as client:
|
|
167
|
+
try:
|
|
168
|
+
response = await client.post(
|
|
169
|
+
f"{self.base_url}/chat/completions",
|
|
170
|
+
headers=self.headers,
|
|
171
|
+
json=data,
|
|
172
|
+
timeout=120,
|
|
173
|
+
)
|
|
174
|
+
response.raise_for_status()
|
|
175
|
+
resp = response.json()
|
|
176
|
+
except httpx.HTTPStatusError as e:
|
|
177
|
+
error_msg = f"Z.ai API request failed: {e!s}"
|
|
178
|
+
raise RuntimeError(error_msg) from e
|
|
179
|
+
except Exception as e:
|
|
180
|
+
raise RuntimeError(f"Z.ai API request failed: {e!s}") from e
|
|
181
|
+
|
|
182
|
+
usage = resp.get("usage", {})
|
|
183
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
184
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
185
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
186
|
+
total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
|
|
187
|
+
|
|
188
|
+
meta = {
|
|
189
|
+
"prompt_tokens": prompt_tokens,
|
|
190
|
+
"completion_tokens": completion_tokens,
|
|
191
|
+
"total_tokens": total_tokens,
|
|
192
|
+
"cost": round(total_cost, 6),
|
|
193
|
+
"raw_response": resp,
|
|
194
|
+
"model_name": model,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
choice = resp["choices"][0]
|
|
198
|
+
text = choice["message"].get("content") or ""
|
|
199
|
+
stop_reason = choice.get("finish_reason")
|
|
200
|
+
|
|
201
|
+
tool_calls_out: list[dict[str, Any]] = []
|
|
202
|
+
for tc in choice["message"].get("tool_calls", []):
|
|
203
|
+
try:
|
|
204
|
+
args = json.loads(tc["function"]["arguments"])
|
|
205
|
+
except (json.JSONDecodeError, TypeError):
|
|
206
|
+
args = {}
|
|
207
|
+
tool_calls_out.append(
|
|
208
|
+
{
|
|
209
|
+
"id": tc["id"],
|
|
210
|
+
"name": tc["function"]["name"],
|
|
211
|
+
"arguments": args,
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
"text": text,
|
|
217
|
+
"meta": meta,
|
|
218
|
+
"tool_calls": tool_calls_out,
|
|
219
|
+
"stop_reason": stop_reason,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
# Streaming
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
async def generate_messages_stream(
|
|
227
|
+
self,
|
|
228
|
+
messages: list[dict[str, Any]],
|
|
229
|
+
options: dict[str, Any],
|
|
230
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
231
|
+
"""Yield response chunks via Z.ai streaming API."""
|
|
232
|
+
model = options.get("model", self.model)
|
|
233
|
+
model_config = self._get_model_config("zai", model)
|
|
234
|
+
tokens_param = model_config["tokens_param"]
|
|
235
|
+
supports_temperature = model_config["supports_temperature"]
|
|
236
|
+
|
|
237
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
238
|
+
|
|
239
|
+
data: dict[str, Any] = {
|
|
240
|
+
"model": model,
|
|
241
|
+
"messages": messages,
|
|
242
|
+
"stream": True,
|
|
243
|
+
"stream_options": {"include_usage": True},
|
|
244
|
+
}
|
|
245
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
246
|
+
|
|
247
|
+
if supports_temperature and "temperature" in opts:
|
|
248
|
+
data["temperature"] = opts["temperature"]
|
|
249
|
+
|
|
250
|
+
full_text = ""
|
|
251
|
+
prompt_tokens = 0
|
|
252
|
+
completion_tokens = 0
|
|
253
|
+
|
|
254
|
+
async with (
|
|
255
|
+
httpx.AsyncClient() as client,
|
|
256
|
+
client.stream(
|
|
257
|
+
"POST",
|
|
258
|
+
f"{self.base_url}/chat/completions",
|
|
259
|
+
headers=self.headers,
|
|
260
|
+
json=data,
|
|
261
|
+
timeout=120,
|
|
262
|
+
) as response,
|
|
263
|
+
):
|
|
264
|
+
response.raise_for_status()
|
|
265
|
+
async for line in response.aiter_lines():
|
|
266
|
+
if not line or not line.startswith("data: "):
|
|
267
|
+
continue
|
|
268
|
+
payload = line[len("data: ") :]
|
|
269
|
+
if payload.strip() == "[DONE]":
|
|
270
|
+
break
|
|
271
|
+
try:
|
|
272
|
+
chunk = json.loads(payload)
|
|
273
|
+
except json.JSONDecodeError:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
usage = chunk.get("usage")
|
|
277
|
+
if usage:
|
|
278
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
279
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
280
|
+
|
|
281
|
+
choices = chunk.get("choices", [])
|
|
282
|
+
if choices:
|
|
283
|
+
delta = choices[0].get("delta", {})
|
|
284
|
+
content = delta.get("content", "")
|
|
285
|
+
if content:
|
|
286
|
+
full_text += content
|
|
287
|
+
yield {"type": "delta", "text": content}
|
|
288
|
+
|
|
289
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
290
|
+
total_cost = self._calculate_cost("zai", model, prompt_tokens, completion_tokens)
|
|
291
|
+
|
|
292
|
+
yield {
|
|
293
|
+
"type": "done",
|
|
294
|
+
"text": full_text,
|
|
295
|
+
"meta": {
|
|
296
|
+
"prompt_tokens": prompt_tokens,
|
|
297
|
+
"completion_tokens": completion_tokens,
|
|
298
|
+
"total_tokens": total_tokens,
|
|
299
|
+
"cost": round(total_cost, 6),
|
|
300
|
+
"raw_response": {},
|
|
301
|
+
"model_name": model,
|
|
302
|
+
},
|
|
303
|
+
}
|
|
@@ -10,7 +10,7 @@ try:
|
|
|
10
10
|
except Exception:
|
|
11
11
|
AzureOpenAI = None
|
|
12
12
|
|
|
13
|
-
from ..cost_mixin import CostMixin
|
|
13
|
+
from ..cost_mixin import CostMixin, prepare_strict_schema
|
|
14
14
|
from ..driver import Driver
|
|
15
15
|
|
|
16
16
|
|
|
@@ -128,12 +128,13 @@ class AzureDriver(CostMixin, Driver):
|
|
|
128
128
|
if options.get("json_mode"):
|
|
129
129
|
json_schema = options.get("json_schema")
|
|
130
130
|
if json_schema:
|
|
131
|
+
schema_copy = prepare_strict_schema(json_schema)
|
|
131
132
|
kwargs["response_format"] = {
|
|
132
133
|
"type": "json_schema",
|
|
133
134
|
"json_schema": {
|
|
134
135
|
"name": "extraction",
|
|
135
136
|
"strict": True,
|
|
136
|
-
"schema":
|
|
137
|
+
"schema": schema_copy,
|
|
137
138
|
},
|
|
138
139
|
}
|
|
139
140
|
else:
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""ModelScope (Alibaba Cloud) driver implementation.
|
|
2
|
+
Requires the `requests` package. Uses MODELSCOPE_API_KEY env var.
|
|
3
|
+
|
|
4
|
+
The ModelScope API-Inference endpoint is fully OpenAI-compatible (/v1/chat/completions).
|
|
5
|
+
No hardcoded pricing — ModelScope's free tier has no per-token cost.
|
|
6
|
+
|
|
7
|
+
Model IDs are namespace-prefixed (e.g. Qwen/Qwen3-235B-A22B-Instruct-2507).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from collections.abc import Iterator
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import requests
|
|
16
|
+
|
|
17
|
+
from ..cost_mixin import CostMixin
|
|
18
|
+
from ..driver import Driver
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModelScopeDriver(CostMixin, Driver):
|
|
22
|
+
supports_json_mode = True
|
|
23
|
+
supports_json_schema = False
|
|
24
|
+
supports_tool_use = True
|
|
25
|
+
supports_streaming = True
|
|
26
|
+
supports_vision = False
|
|
27
|
+
|
|
28
|
+
# No pricing data available — ModelScope free tier has no per-token cost
|
|
29
|
+
MODEL_PRICING: dict[str, dict[str, Any]] = {}
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
api_key: str | None = None,
|
|
34
|
+
model: str = "Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
35
|
+
endpoint: str = "https://api-inference.modelscope.cn/v1",
|
|
36
|
+
):
|
|
37
|
+
"""Initialize ModelScope driver.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
api_key: ModelScope API key. If not provided, will look for MODELSCOPE_API_KEY env var.
|
|
41
|
+
model: Model to use. Defaults to Qwen/Qwen3-235B-A22B-Instruct-2507.
|
|
42
|
+
endpoint: API base URL. Defaults to https://api-inference.modelscope.cn/v1.
|
|
43
|
+
"""
|
|
44
|
+
self.api_key = api_key or os.getenv("MODELSCOPE_API_KEY")
|
|
45
|
+
if not self.api_key:
|
|
46
|
+
raise ValueError("ModelScope API key not found. Set MODELSCOPE_API_KEY env var.")
|
|
47
|
+
|
|
48
|
+
self.model = model
|
|
49
|
+
self.base_url = endpoint.rstrip("/")
|
|
50
|
+
|
|
51
|
+
self.headers = {
|
|
52
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
53
|
+
"Content-Type": "application/json",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
supports_messages = True
|
|
57
|
+
|
|
58
|
+
def generate(self, prompt: str, options: dict[str, Any]) -> dict[str, Any]:
|
|
59
|
+
messages = [{"role": "user", "content": prompt}]
|
|
60
|
+
return self._do_generate(messages, options)
|
|
61
|
+
|
|
62
|
+
def generate_messages(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
63
|
+
return self._do_generate(messages, options)
|
|
64
|
+
|
|
65
|
+
def _do_generate(self, messages: list[dict[str, str]], options: dict[str, Any]) -> dict[str, Any]:
|
|
66
|
+
if not self.api_key:
|
|
67
|
+
raise RuntimeError("ModelScope API key not found")
|
|
68
|
+
|
|
69
|
+
model = options.get("model", self.model)
|
|
70
|
+
|
|
71
|
+
model_config = self._get_model_config("modelscope", model)
|
|
72
|
+
tokens_param = model_config["tokens_param"]
|
|
73
|
+
supports_temperature = model_config["supports_temperature"]
|
|
74
|
+
|
|
75
|
+
self._validate_model_capabilities(
|
|
76
|
+
"modelscope",
|
|
77
|
+
model,
|
|
78
|
+
using_json_schema=bool(options.get("json_schema")),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
82
|
+
|
|
83
|
+
data: dict[str, Any] = {
|
|
84
|
+
"model": model,
|
|
85
|
+
"messages": messages,
|
|
86
|
+
}
|
|
87
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
88
|
+
|
|
89
|
+
if supports_temperature and "temperature" in opts:
|
|
90
|
+
data["temperature"] = opts["temperature"]
|
|
91
|
+
|
|
92
|
+
# Native JSON mode support
|
|
93
|
+
if options.get("json_mode"):
|
|
94
|
+
data["response_format"] = {"type": "json_object"}
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
response = requests.post(
|
|
98
|
+
f"{self.base_url}/chat/completions",
|
|
99
|
+
headers=self.headers,
|
|
100
|
+
json=data,
|
|
101
|
+
timeout=120,
|
|
102
|
+
)
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
resp = response.json()
|
|
105
|
+
except requests.exceptions.HTTPError as e:
|
|
106
|
+
error_msg = f"ModelScope API request failed: {e!s}"
|
|
107
|
+
raise RuntimeError(error_msg) from e
|
|
108
|
+
except requests.exceptions.RequestException as e:
|
|
109
|
+
raise RuntimeError(f"ModelScope API request failed: {e!s}") from e
|
|
110
|
+
|
|
111
|
+
usage = resp.get("usage", {})
|
|
112
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
113
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
114
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
115
|
+
|
|
116
|
+
total_cost = self._calculate_cost("modelscope", model, prompt_tokens, completion_tokens)
|
|
117
|
+
|
|
118
|
+
meta = {
|
|
119
|
+
"prompt_tokens": prompt_tokens,
|
|
120
|
+
"completion_tokens": completion_tokens,
|
|
121
|
+
"total_tokens": total_tokens,
|
|
122
|
+
"cost": round(total_cost, 6),
|
|
123
|
+
"raw_response": resp,
|
|
124
|
+
"model_name": model,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
text = resp["choices"][0]["message"]["content"]
|
|
128
|
+
return {"text": text, "meta": meta}
|
|
129
|
+
|
|
130
|
+
# ------------------------------------------------------------------
|
|
131
|
+
# Tool use
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def generate_messages_with_tools(
|
|
135
|
+
self,
|
|
136
|
+
messages: list[dict[str, Any]],
|
|
137
|
+
tools: list[dict[str, Any]],
|
|
138
|
+
options: dict[str, Any],
|
|
139
|
+
) -> dict[str, Any]:
|
|
140
|
+
"""Generate a response that may include tool calls."""
|
|
141
|
+
if not self.api_key:
|
|
142
|
+
raise RuntimeError("ModelScope API key not found")
|
|
143
|
+
|
|
144
|
+
model = options.get("model", self.model)
|
|
145
|
+
model_config = self._get_model_config("modelscope", model)
|
|
146
|
+
tokens_param = model_config["tokens_param"]
|
|
147
|
+
supports_temperature = model_config["supports_temperature"]
|
|
148
|
+
|
|
149
|
+
self._validate_model_capabilities("modelscope", model, using_tool_use=True)
|
|
150
|
+
|
|
151
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
152
|
+
|
|
153
|
+
data: dict[str, Any] = {
|
|
154
|
+
"model": model,
|
|
155
|
+
"messages": messages,
|
|
156
|
+
"tools": tools,
|
|
157
|
+
}
|
|
158
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
159
|
+
|
|
160
|
+
if supports_temperature and "temperature" in opts:
|
|
161
|
+
data["temperature"] = opts["temperature"]
|
|
162
|
+
|
|
163
|
+
if "tool_choice" in options:
|
|
164
|
+
data["tool_choice"] = options["tool_choice"]
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
response = requests.post(
|
|
168
|
+
f"{self.base_url}/chat/completions",
|
|
169
|
+
headers=self.headers,
|
|
170
|
+
json=data,
|
|
171
|
+
timeout=120,
|
|
172
|
+
)
|
|
173
|
+
response.raise_for_status()
|
|
174
|
+
resp = response.json()
|
|
175
|
+
except requests.exceptions.HTTPError as e:
|
|
176
|
+
error_msg = f"ModelScope API request failed: {e!s}"
|
|
177
|
+
raise RuntimeError(error_msg) from e
|
|
178
|
+
except requests.exceptions.RequestException as e:
|
|
179
|
+
raise RuntimeError(f"ModelScope API request failed: {e!s}") from e
|
|
180
|
+
|
|
181
|
+
usage = resp.get("usage", {})
|
|
182
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
183
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
184
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
185
|
+
total_cost = self._calculate_cost("modelscope", model, prompt_tokens, completion_tokens)
|
|
186
|
+
|
|
187
|
+
meta = {
|
|
188
|
+
"prompt_tokens": prompt_tokens,
|
|
189
|
+
"completion_tokens": completion_tokens,
|
|
190
|
+
"total_tokens": total_tokens,
|
|
191
|
+
"cost": round(total_cost, 6),
|
|
192
|
+
"raw_response": resp,
|
|
193
|
+
"model_name": model,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
choice = resp["choices"][0]
|
|
197
|
+
text = choice["message"].get("content") or ""
|
|
198
|
+
stop_reason = choice.get("finish_reason")
|
|
199
|
+
|
|
200
|
+
tool_calls_out: list[dict[str, Any]] = []
|
|
201
|
+
for tc in choice["message"].get("tool_calls", []):
|
|
202
|
+
try:
|
|
203
|
+
args = json.loads(tc["function"]["arguments"])
|
|
204
|
+
except (json.JSONDecodeError, TypeError):
|
|
205
|
+
args = {}
|
|
206
|
+
tool_calls_out.append(
|
|
207
|
+
{
|
|
208
|
+
"id": tc["id"],
|
|
209
|
+
"name": tc["function"]["name"],
|
|
210
|
+
"arguments": args,
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
"text": text,
|
|
216
|
+
"meta": meta,
|
|
217
|
+
"tool_calls": tool_calls_out,
|
|
218
|
+
"stop_reason": stop_reason,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# ------------------------------------------------------------------
|
|
222
|
+
# Streaming
|
|
223
|
+
# ------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def generate_messages_stream(
|
|
226
|
+
self,
|
|
227
|
+
messages: list[dict[str, Any]],
|
|
228
|
+
options: dict[str, Any],
|
|
229
|
+
) -> Iterator[dict[str, Any]]:
|
|
230
|
+
"""Yield response chunks via ModelScope streaming API."""
|
|
231
|
+
if not self.api_key:
|
|
232
|
+
raise RuntimeError("ModelScope API key not found")
|
|
233
|
+
|
|
234
|
+
model = options.get("model", self.model)
|
|
235
|
+
model_config = self._get_model_config("modelscope", model)
|
|
236
|
+
tokens_param = model_config["tokens_param"]
|
|
237
|
+
supports_temperature = model_config["supports_temperature"]
|
|
238
|
+
|
|
239
|
+
opts = {"temperature": 1.0, "max_tokens": 512, **options}
|
|
240
|
+
|
|
241
|
+
data: dict[str, Any] = {
|
|
242
|
+
"model": model,
|
|
243
|
+
"messages": messages,
|
|
244
|
+
"stream": True,
|
|
245
|
+
"stream_options": {"include_usage": True},
|
|
246
|
+
}
|
|
247
|
+
data[tokens_param] = opts.get("max_tokens", 512)
|
|
248
|
+
|
|
249
|
+
if supports_temperature and "temperature" in opts:
|
|
250
|
+
data["temperature"] = opts["temperature"]
|
|
251
|
+
|
|
252
|
+
response = requests.post(
|
|
253
|
+
f"{self.base_url}/chat/completions",
|
|
254
|
+
headers=self.headers,
|
|
255
|
+
json=data,
|
|
256
|
+
stream=True,
|
|
257
|
+
timeout=120,
|
|
258
|
+
)
|
|
259
|
+
response.raise_for_status()
|
|
260
|
+
|
|
261
|
+
full_text = ""
|
|
262
|
+
prompt_tokens = 0
|
|
263
|
+
completion_tokens = 0
|
|
264
|
+
|
|
265
|
+
for line in response.iter_lines(decode_unicode=True):
|
|
266
|
+
if not line or not line.startswith("data: "):
|
|
267
|
+
continue
|
|
268
|
+
payload = line[len("data: ") :]
|
|
269
|
+
if payload.strip() == "[DONE]":
|
|
270
|
+
break
|
|
271
|
+
try:
|
|
272
|
+
chunk = json.loads(payload)
|
|
273
|
+
except json.JSONDecodeError:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
usage = chunk.get("usage")
|
|
277
|
+
if usage:
|
|
278
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
279
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
280
|
+
|
|
281
|
+
choices = chunk.get("choices", [])
|
|
282
|
+
if choices:
|
|
283
|
+
delta = choices[0].get("delta", {})
|
|
284
|
+
content = delta.get("content", "")
|
|
285
|
+
if content:
|
|
286
|
+
full_text += content
|
|
287
|
+
yield {"type": "delta", "text": content}
|
|
288
|
+
|
|
289
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
290
|
+
total_cost = self._calculate_cost("modelscope", model, prompt_tokens, completion_tokens)
|
|
291
|
+
|
|
292
|
+
yield {
|
|
293
|
+
"type": "done",
|
|
294
|
+
"text": full_text,
|
|
295
|
+
"meta": {
|
|
296
|
+
"prompt_tokens": prompt_tokens,
|
|
297
|
+
"completion_tokens": completion_tokens,
|
|
298
|
+
"total_tokens": total_tokens,
|
|
299
|
+
"cost": round(total_cost, 6),
|
|
300
|
+
"raw_response": {},
|
|
301
|
+
"model_name": model,
|
|
302
|
+
},
|
|
303
|
+
}
|