nous-genai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nous/__init__.py +3 -0
- nous/genai/__init__.py +56 -0
- nous/genai/__main__.py +3 -0
- nous/genai/_internal/__init__.py +1 -0
- nous/genai/_internal/capability_rules.py +476 -0
- nous/genai/_internal/config.py +102 -0
- nous/genai/_internal/errors.py +63 -0
- nous/genai/_internal/http.py +951 -0
- nous/genai/_internal/json_schema.py +54 -0
- nous/genai/cli.py +1316 -0
- nous/genai/client.py +719 -0
- nous/genai/mcp_cli.py +275 -0
- nous/genai/mcp_server.py +1080 -0
- nous/genai/providers/__init__.py +15 -0
- nous/genai/providers/aliyun.py +535 -0
- nous/genai/providers/anthropic.py +483 -0
- nous/genai/providers/gemini.py +1606 -0
- nous/genai/providers/openai.py +1909 -0
- nous/genai/providers/tuzi.py +1158 -0
- nous/genai/providers/volcengine.py +273 -0
- nous/genai/reference/__init__.py +17 -0
- nous/genai/reference/catalog.py +206 -0
- nous/genai/reference/mappings.py +467 -0
- nous/genai/reference/mode_overrides.py +26 -0
- nous/genai/reference/model_catalog.py +82 -0
- nous/genai/reference/model_catalog_data/__init__.py +1 -0
- nous/genai/reference/model_catalog_data/aliyun.py +98 -0
- nous/genai/reference/model_catalog_data/anthropic.py +10 -0
- nous/genai/reference/model_catalog_data/google.py +45 -0
- nous/genai/reference/model_catalog_data/openai.py +44 -0
- nous/genai/reference/model_catalog_data/tuzi_anthropic.py +21 -0
- nous/genai/reference/model_catalog_data/tuzi_google.py +19 -0
- nous/genai/reference/model_catalog_data/tuzi_openai.py +75 -0
- nous/genai/reference/model_catalog_data/tuzi_web.py +136 -0
- nous/genai/reference/model_catalog_data/volcengine.py +107 -0
- nous/genai/tools/__init__.py +13 -0
- nous/genai/tools/output_parser.py +119 -0
- nous/genai/types.py +416 -0
- nous/py.typed +1 -0
- nous_genai-0.1.0.dist-info/METADATA +200 -0
- nous_genai-0.1.0.dist-info/RECORD +45 -0
- nous_genai-0.1.0.dist-info/WHEEL +5 -0
- nous_genai-0.1.0.dist-info/entry_points.txt +4 -0
- nous_genai-0.1.0.dist-info/licenses/LICENSE +190 -0
- nous_genai-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .aliyun import AliyunAdapter
|
|
2
|
+
from .anthropic import AnthropicAdapter
|
|
3
|
+
from .gemini import GeminiAdapter
|
|
4
|
+
from .openai import OpenAIAdapter
|
|
5
|
+
from .tuzi import TuziAdapter
|
|
6
|
+
from .volcengine import VolcengineAdapter
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"AliyunAdapter",
|
|
10
|
+
"AnthropicAdapter",
|
|
11
|
+
"GeminiAdapter",
|
|
12
|
+
"OpenAIAdapter",
|
|
13
|
+
"TuziAdapter",
|
|
14
|
+
"VolcengineAdapter",
|
|
15
|
+
]
|
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Iterator
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
from .._internal.errors import (
|
|
9
|
+
invalid_request_error,
|
|
10
|
+
not_supported_error,
|
|
11
|
+
provider_error,
|
|
12
|
+
)
|
|
13
|
+
from .._internal.http import request_json
|
|
14
|
+
from ..types import Capability, GenerateEvent, GenerateRequest, GenerateResponse
|
|
15
|
+
from ..types import JobInfo, Message, Part, PartSourceBytes, PartSourceUrl
|
|
16
|
+
from .openai import OpenAIAdapter
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True, slots=True)
|
|
20
|
+
class AliyunAdapter:
|
|
21
|
+
"""
|
|
22
|
+
Aliyun DashScope (Bailian / Model Studio).
|
|
23
|
+
|
|
24
|
+
Supported in this SDK:
|
|
25
|
+
- chat (text/image -> text), stream supported
|
|
26
|
+
- embeddings (text -> embedding)
|
|
27
|
+
- image generation (text -> image) via DashScope AIGC endpoint
|
|
28
|
+
- video generation (text -> video) via DashScope AIGC async task endpoint
|
|
29
|
+
- speech synthesis (text -> audio) via DashScope AIGC endpoint
|
|
30
|
+
- speech recognition (audio -> text) for Qwen-ASR models via OpenAI-compatible chat endpoint
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
openai: OpenAIAdapter
|
|
34
|
+
|
|
35
|
+
def capabilities(self, model_id: str) -> Capability:
|
|
36
|
+
if _is_embedding_model(model_id):
|
|
37
|
+
return Capability(
|
|
38
|
+
input_modalities={"text"},
|
|
39
|
+
output_modalities={"embedding"},
|
|
40
|
+
supports_stream=False,
|
|
41
|
+
supports_job=False,
|
|
42
|
+
supports_tools=False,
|
|
43
|
+
supports_json_schema=False,
|
|
44
|
+
)
|
|
45
|
+
if _is_speech_synthesis_model(model_id):
|
|
46
|
+
return Capability(
|
|
47
|
+
input_modalities={"text"},
|
|
48
|
+
output_modalities={"audio"},
|
|
49
|
+
supports_stream=False,
|
|
50
|
+
supports_job=False,
|
|
51
|
+
supports_tools=False,
|
|
52
|
+
supports_json_schema=False,
|
|
53
|
+
)
|
|
54
|
+
if _is_asr_model(model_id):
|
|
55
|
+
return Capability(
|
|
56
|
+
input_modalities={"audio"},
|
|
57
|
+
output_modalities={"text"},
|
|
58
|
+
supports_stream=False,
|
|
59
|
+
supports_job=False,
|
|
60
|
+
supports_tools=False,
|
|
61
|
+
supports_json_schema=False,
|
|
62
|
+
)
|
|
63
|
+
if _is_image_generation_model(model_id):
|
|
64
|
+
return Capability(
|
|
65
|
+
input_modalities={"text"},
|
|
66
|
+
output_modalities={"image"},
|
|
67
|
+
supports_stream=False,
|
|
68
|
+
supports_job=False,
|
|
69
|
+
supports_tools=False,
|
|
70
|
+
supports_json_schema=False,
|
|
71
|
+
)
|
|
72
|
+
if _is_video_generation_model(model_id):
|
|
73
|
+
return Capability(
|
|
74
|
+
input_modalities={"text"},
|
|
75
|
+
output_modalities={"video"},
|
|
76
|
+
supports_stream=False,
|
|
77
|
+
supports_job=True,
|
|
78
|
+
supports_tools=False,
|
|
79
|
+
supports_json_schema=False,
|
|
80
|
+
)
|
|
81
|
+
return Capability(
|
|
82
|
+
input_modalities={"text", "image"},
|
|
83
|
+
output_modalities={"text"},
|
|
84
|
+
supports_stream=True,
|
|
85
|
+
supports_job=False,
|
|
86
|
+
supports_tools=True,
|
|
87
|
+
supports_json_schema=True,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def list_models(self, *, timeout_ms: int | None = None) -> list[str]:
|
|
91
|
+
return self.openai.list_models(timeout_ms=timeout_ms)
|
|
92
|
+
|
|
93
|
+
def generate(
|
|
94
|
+
self, request: GenerateRequest, *, stream: bool
|
|
95
|
+
) -> GenerateResponse | Iterator[GenerateEvent]:
|
|
96
|
+
modalities = set(request.output.modalities)
|
|
97
|
+
model_id = request.model_id()
|
|
98
|
+
|
|
99
|
+
if modalities == {"embedding"}:
|
|
100
|
+
if stream:
|
|
101
|
+
raise not_supported_error("Aliyun embeddings do not support streaming")
|
|
102
|
+
return self.openai.generate(request, stream=False)
|
|
103
|
+
|
|
104
|
+
if modalities == {"image"}:
|
|
105
|
+
if stream:
|
|
106
|
+
raise not_supported_error(
|
|
107
|
+
"Aliyun image generation does not support streaming"
|
|
108
|
+
)
|
|
109
|
+
return self._image(request, model_id=model_id)
|
|
110
|
+
|
|
111
|
+
if modalities == {"video"}:
|
|
112
|
+
if stream:
|
|
113
|
+
raise not_supported_error(
|
|
114
|
+
"Aliyun video generation does not support streaming"
|
|
115
|
+
)
|
|
116
|
+
return self._video(request, model_id=model_id)
|
|
117
|
+
|
|
118
|
+
if modalities == {"audio"}:
|
|
119
|
+
if stream:
|
|
120
|
+
raise not_supported_error(
|
|
121
|
+
"Aliyun speech synthesis does not support streaming"
|
|
122
|
+
)
|
|
123
|
+
return self._audio(request, model_id=model_id)
|
|
124
|
+
|
|
125
|
+
if modalities != {"text"}:
|
|
126
|
+
raise not_supported_error(
|
|
127
|
+
"Aliyun only supports chat/embeddings/image/video/audio in this SDK"
|
|
128
|
+
)
|
|
129
|
+
if _is_embedding_model(model_id):
|
|
130
|
+
raise not_supported_error(
|
|
131
|
+
"Aliyun embedding models must be called with output.modalities=['embedding']"
|
|
132
|
+
)
|
|
133
|
+
if _is_speech_synthesis_model(model_id):
|
|
134
|
+
raise not_supported_error(
|
|
135
|
+
"Aliyun speech synthesis models must be called with output.modalities=['audio']"
|
|
136
|
+
)
|
|
137
|
+
if _is_image_generation_model(model_id):
|
|
138
|
+
raise not_supported_error(
|
|
139
|
+
"Aliyun image models must be called with output.modalities=['image']"
|
|
140
|
+
)
|
|
141
|
+
if _is_video_generation_model(model_id):
|
|
142
|
+
raise not_supported_error(
|
|
143
|
+
"Aliyun video models must be called with output.modalities=['video']"
|
|
144
|
+
)
|
|
145
|
+
if _has_audio_input(request) and not _is_asr_model(model_id):
|
|
146
|
+
raise not_supported_error(
|
|
147
|
+
"Aliyun chat input only supports audio for ASR models in this SDK"
|
|
148
|
+
)
|
|
149
|
+
return self.openai.generate(request, stream=stream)
|
|
150
|
+
|
|
151
|
+
def _image(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
|
|
152
|
+
if not _is_image_generation_model(model_id):
|
|
153
|
+
raise not_supported_error(
|
|
154
|
+
'Aliyun image generation requires model like "aliyun:qwen-image-max"'
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
prompt = _single_text_prompt(request)
|
|
158
|
+
body: dict[str, Any] = {
|
|
159
|
+
"model": model_id,
|
|
160
|
+
"input": {
|
|
161
|
+
"messages": [
|
|
162
|
+
{
|
|
163
|
+
"role": "user",
|
|
164
|
+
"content": [{"text": prompt}],
|
|
165
|
+
}
|
|
166
|
+
]
|
|
167
|
+
},
|
|
168
|
+
"parameters": {
|
|
169
|
+
"watermark": False,
|
|
170
|
+
},
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
img = request.output.image
|
|
174
|
+
if img and img.size:
|
|
175
|
+
size = img.size.strip()
|
|
176
|
+
if "x" in size and "*" not in size:
|
|
177
|
+
size = size.replace("x", "*")
|
|
178
|
+
body["parameters"]["size"] = size
|
|
179
|
+
|
|
180
|
+
opts = request.provider_options.get("aliyun")
|
|
181
|
+
if isinstance(opts, dict):
|
|
182
|
+
_merge_provider_options(body=body, opts=opts)
|
|
183
|
+
|
|
184
|
+
obj = request_json(
|
|
185
|
+
method="POST",
|
|
186
|
+
url="https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation",
|
|
187
|
+
headers={**_aliyun_headers(self.openai.api_key, request=request)},
|
|
188
|
+
json_body=body,
|
|
189
|
+
timeout_ms=request.params.timeout_ms,
|
|
190
|
+
proxy_url=self.openai.proxy_url,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
image_url = _extract_image_url(obj)
|
|
194
|
+
if not image_url:
|
|
195
|
+
raise provider_error("aliyun image response missing image url")
|
|
196
|
+
|
|
197
|
+
part = Part(type="image", source=PartSourceUrl(url=image_url))
|
|
198
|
+
return GenerateResponse(
|
|
199
|
+
id=f"sdk_{uuid4().hex}",
|
|
200
|
+
provider="aliyun",
|
|
201
|
+
model=f"aliyun:{model_id}",
|
|
202
|
+
status="completed",
|
|
203
|
+
output=[Message(role="assistant", content=[part])],
|
|
204
|
+
usage=None,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _audio(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
|
|
208
|
+
if not _is_speech_synthesis_model(model_id):
|
|
209
|
+
raise not_supported_error(
|
|
210
|
+
'Aliyun speech synthesis requires model like "aliyun:qwen3-tts-flash"'
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
prompt = _single_text_prompt(request)
|
|
214
|
+
audio = request.output.audio
|
|
215
|
+
if audio is None or not audio.voice:
|
|
216
|
+
raise invalid_request_error(
|
|
217
|
+
"output.audio.voice required for Aliyun speech synthesis"
|
|
218
|
+
)
|
|
219
|
+
if audio.format and audio.format.strip().lower() not in {"wav", "wave"}:
|
|
220
|
+
raise not_supported_error(
|
|
221
|
+
"Aliyun speech synthesis only supports wav output in this SDK"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
body: dict[str, Any] = {
|
|
225
|
+
"model": model_id,
|
|
226
|
+
"input": {
|
|
227
|
+
"text": prompt,
|
|
228
|
+
"voice": audio.voice,
|
|
229
|
+
},
|
|
230
|
+
}
|
|
231
|
+
if audio.language:
|
|
232
|
+
body["input"]["language_type"] = _map_language_type(audio.language)
|
|
233
|
+
|
|
234
|
+
opts = request.provider_options.get("aliyun")
|
|
235
|
+
if isinstance(opts, dict):
|
|
236
|
+
_merge_provider_options(body=body, opts=opts)
|
|
237
|
+
|
|
238
|
+
obj = request_json(
|
|
239
|
+
method="POST",
|
|
240
|
+
url="https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation",
|
|
241
|
+
headers={**_aliyun_headers(self.openai.api_key, request=request)},
|
|
242
|
+
json_body=body,
|
|
243
|
+
timeout_ms=request.params.timeout_ms,
|
|
244
|
+
proxy_url=self.openai.proxy_url,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
source, mime_type = _extract_audio_source(obj)
|
|
248
|
+
part = Part(type="audio", mime_type=mime_type, source=source)
|
|
249
|
+
return GenerateResponse(
|
|
250
|
+
id=f"sdk_{uuid4().hex}",
|
|
251
|
+
provider="aliyun",
|
|
252
|
+
model=f"aliyun:{model_id}",
|
|
253
|
+
status="completed",
|
|
254
|
+
output=[Message(role="assistant", content=[part])],
|
|
255
|
+
usage=None,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def _video(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
|
|
259
|
+
if not _is_video_generation_model(model_id):
|
|
260
|
+
raise not_supported_error(
|
|
261
|
+
'Aliyun video generation requires model like "aliyun:wan2.5-t2v-preview"'
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
prompt = _single_text_prompt(request)
|
|
265
|
+
body: dict[str, Any] = {"model": model_id, "input": {"prompt": prompt}}
|
|
266
|
+
|
|
267
|
+
video = request.output.video
|
|
268
|
+
if video and video.duration_sec is not None:
|
|
269
|
+
body.setdefault("parameters", {})["duration"] = int(video.duration_sec)
|
|
270
|
+
if video and video.aspect_ratio:
|
|
271
|
+
body.setdefault("parameters", {})["ratio"] = video.aspect_ratio
|
|
272
|
+
|
|
273
|
+
opts = request.provider_options.get("aliyun")
|
|
274
|
+
if isinstance(opts, dict):
|
|
275
|
+
_merge_provider_options(body=body, opts=opts)
|
|
276
|
+
|
|
277
|
+
budget_ms = (
|
|
278
|
+
120_000 if request.params.timeout_ms is None else request.params.timeout_ms
|
|
279
|
+
)
|
|
280
|
+
deadline = time.time() + max(1, budget_ms) / 1000.0
|
|
281
|
+
obj = request_json(
|
|
282
|
+
method="POST",
|
|
283
|
+
url="https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis",
|
|
284
|
+
headers={
|
|
285
|
+
**_aliyun_headers(self.openai.api_key, request=request),
|
|
286
|
+
"X-DashScope-Async": "enable",
|
|
287
|
+
},
|
|
288
|
+
json_body=body,
|
|
289
|
+
timeout_ms=min(30_000, max(1, budget_ms)),
|
|
290
|
+
proxy_url=self.openai.proxy_url,
|
|
291
|
+
)
|
|
292
|
+
task_id = _extract_task_id(obj)
|
|
293
|
+
if not task_id:
|
|
294
|
+
raise provider_error("aliyun video response missing task_id")
|
|
295
|
+
|
|
296
|
+
if not request.wait:
|
|
297
|
+
return GenerateResponse(
|
|
298
|
+
id=f"sdk_{uuid4().hex}",
|
|
299
|
+
provider="aliyun",
|
|
300
|
+
model=f"aliyun:{model_id}",
|
|
301
|
+
status="running",
|
|
302
|
+
job=JobInfo(job_id=task_id, poll_after_ms=1_000),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
final = _wait_task_done(
|
|
306
|
+
task_id=task_id,
|
|
307
|
+
api_key=self.openai.api_key,
|
|
308
|
+
deadline=deadline,
|
|
309
|
+
proxy_url=self.openai.proxy_url,
|
|
310
|
+
)
|
|
311
|
+
status = _extract_task_status(final)
|
|
312
|
+
if status != "SUCCEEDED":
|
|
313
|
+
if status == "FAILED":
|
|
314
|
+
raise provider_error(f"aliyun video generation failed: {final}")
|
|
315
|
+
return GenerateResponse(
|
|
316
|
+
id=f"sdk_{uuid4().hex}",
|
|
317
|
+
provider="aliyun",
|
|
318
|
+
model=f"aliyun:{model_id}",
|
|
319
|
+
status="running",
|
|
320
|
+
job=JobInfo(job_id=task_id, poll_after_ms=1_000),
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
video_url = _extract_video_url(final)
|
|
324
|
+
if not video_url:
|
|
325
|
+
raise provider_error("aliyun video task missing video_url")
|
|
326
|
+
part = Part(
|
|
327
|
+
type="video", mime_type="video/mp4", source=PartSourceUrl(url=video_url)
|
|
328
|
+
)
|
|
329
|
+
return GenerateResponse(
|
|
330
|
+
id=f"sdk_{uuid4().hex}",
|
|
331
|
+
provider="aliyun",
|
|
332
|
+
model=f"aliyun:{model_id}",
|
|
333
|
+
status="completed",
|
|
334
|
+
output=[Message(role="assistant", content=[part])],
|
|
335
|
+
usage=None,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _has_audio_input(request: GenerateRequest) -> bool:
|
|
340
|
+
for m in request.input:
|
|
341
|
+
for p in m.content:
|
|
342
|
+
if p.type == "audio":
|
|
343
|
+
return True
|
|
344
|
+
return False
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _is_embedding_model(model_id: str) -> bool:
|
|
348
|
+
mid = model_id.lower()
|
|
349
|
+
return "embedding" in mid and "text" in mid
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _is_speech_synthesis_model(model_id: str) -> bool:
|
|
353
|
+
return "tts" in model_id.lower()
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _is_asr_model(model_id: str) -> bool:
|
|
357
|
+
return "asr" in model_id.lower()
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _is_image_generation_model(model_id: str) -> bool:
|
|
361
|
+
mid = model_id.lower()
|
|
362
|
+
return "image" in mid and "embedding" not in mid
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _is_video_generation_model(model_id: str) -> bool:
|
|
366
|
+
mid = model_id.lower()
|
|
367
|
+
return "t2v" in mid or "i2v" in mid
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _single_text_prompt(request: GenerateRequest) -> str:
|
|
371
|
+
texts: list[str] = []
|
|
372
|
+
for m in request.input:
|
|
373
|
+
for p in m.content:
|
|
374
|
+
if p.type != "text":
|
|
375
|
+
raise invalid_request_error(
|
|
376
|
+
"this operation requires exactly one text part"
|
|
377
|
+
)
|
|
378
|
+
t = p.require_text().strip()
|
|
379
|
+
if t:
|
|
380
|
+
texts.append(t)
|
|
381
|
+
if len(texts) != 1:
|
|
382
|
+
raise invalid_request_error("this operation requires exactly one text part")
|
|
383
|
+
return texts[0]
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _map_language_type(language: str) -> str:
|
|
387
|
+
lang = language.strip().lower()
|
|
388
|
+
if lang.startswith("zh"):
|
|
389
|
+
return "Chinese"
|
|
390
|
+
if lang.startswith("en"):
|
|
391
|
+
return "English"
|
|
392
|
+
return language
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _aliyun_headers(
|
|
396
|
+
api_key: str, *, request: GenerateRequest | None = None
|
|
397
|
+
) -> dict[str, str]:
|
|
398
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
399
|
+
if request and request.params.idempotency_key:
|
|
400
|
+
headers["Idempotency-Key"] = request.params.idempotency_key
|
|
401
|
+
return headers
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _merge_provider_options(*, body: dict[str, Any], opts: dict[str, Any]) -> None:
|
|
405
|
+
if "model" in opts and opts["model"] != body.get("model"):
|
|
406
|
+
raise invalid_request_error("provider_options cannot override model")
|
|
407
|
+
if "input" in opts:
|
|
408
|
+
if not isinstance(opts["input"], dict):
|
|
409
|
+
raise invalid_request_error("provider_options.input must be an object")
|
|
410
|
+
inp = body.setdefault("input", {})
|
|
411
|
+
if not isinstance(inp, dict):
|
|
412
|
+
raise invalid_request_error("internal error: body.input is not an object")
|
|
413
|
+
for k, v in opts["input"].items():
|
|
414
|
+
if k in inp:
|
|
415
|
+
raise invalid_request_error(
|
|
416
|
+
f"provider_options cannot override input.{k}"
|
|
417
|
+
)
|
|
418
|
+
inp[k] = v
|
|
419
|
+
if "parameters" in opts:
|
|
420
|
+
if not isinstance(opts["parameters"], dict):
|
|
421
|
+
raise invalid_request_error("provider_options.parameters must be an object")
|
|
422
|
+
params = body.setdefault("parameters", {})
|
|
423
|
+
if not isinstance(params, dict):
|
|
424
|
+
raise invalid_request_error(
|
|
425
|
+
"internal error: body.parameters is not an object"
|
|
426
|
+
)
|
|
427
|
+
for k, v in opts["parameters"].items():
|
|
428
|
+
if k in params:
|
|
429
|
+
raise invalid_request_error(
|
|
430
|
+
f"provider_options cannot override parameters.{k}"
|
|
431
|
+
)
|
|
432
|
+
params[k] = v
|
|
433
|
+
for k, v in opts.items():
|
|
434
|
+
if k in {"model", "input", "parameters"}:
|
|
435
|
+
continue
|
|
436
|
+
if k in body:
|
|
437
|
+
raise invalid_request_error(f"provider_options cannot override body.{k}")
|
|
438
|
+
body[k] = v
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _extract_image_url(obj: dict[str, Any]) -> str | None:
|
|
442
|
+
output = obj.get("output")
|
|
443
|
+
if not isinstance(output, dict):
|
|
444
|
+
return None
|
|
445
|
+
choices = output.get("choices")
|
|
446
|
+
if not isinstance(choices, list) or not choices:
|
|
447
|
+
return None
|
|
448
|
+
first = choices[0]
|
|
449
|
+
if not isinstance(first, dict):
|
|
450
|
+
return None
|
|
451
|
+
msg = first.get("message")
|
|
452
|
+
if not isinstance(msg, dict):
|
|
453
|
+
return None
|
|
454
|
+
content = msg.get("content")
|
|
455
|
+
if not isinstance(content, list) or not content:
|
|
456
|
+
return None
|
|
457
|
+
item = content[0]
|
|
458
|
+
if not isinstance(item, dict):
|
|
459
|
+
return None
|
|
460
|
+
u = item.get("image")
|
|
461
|
+
if isinstance(u, str) and u:
|
|
462
|
+
return u
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _extract_audio_source(
|
|
467
|
+
obj: dict[str, Any],
|
|
468
|
+
) -> tuple[PartSourceBytes | PartSourceUrl, str]:
|
|
469
|
+
output = obj.get("output")
|
|
470
|
+
if not isinstance(output, dict):
|
|
471
|
+
raise provider_error("aliyun audio response missing output")
|
|
472
|
+
audio = output.get("audio")
|
|
473
|
+
if not isinstance(audio, dict):
|
|
474
|
+
raise provider_error("aliyun audio response missing output.audio")
|
|
475
|
+
|
|
476
|
+
data = audio.get("data")
|
|
477
|
+
if isinstance(data, str) and data:
|
|
478
|
+
return PartSourceBytes(data=data, encoding="base64"), "audio/wav"
|
|
479
|
+
|
|
480
|
+
url = audio.get("url")
|
|
481
|
+
if isinstance(url, str) and url:
|
|
482
|
+
return PartSourceUrl(url=url), "audio/wav"
|
|
483
|
+
raise provider_error("aliyun audio response missing url/data")
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _extract_task_id(obj: dict[str, Any]) -> str | None:
|
|
487
|
+
output = obj.get("output")
|
|
488
|
+
if not isinstance(output, dict):
|
|
489
|
+
return None
|
|
490
|
+
tid = output.get("task_id") or output.get("taskId")
|
|
491
|
+
if isinstance(tid, str) and tid:
|
|
492
|
+
return tid
|
|
493
|
+
return None
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _extract_task_status(obj: dict[str, Any]) -> str | None:
|
|
497
|
+
output = obj.get("output")
|
|
498
|
+
if not isinstance(output, dict):
|
|
499
|
+
return None
|
|
500
|
+
st = output.get("task_status") or output.get("taskStatus")
|
|
501
|
+
if isinstance(st, str) and st:
|
|
502
|
+
return st
|
|
503
|
+
return None
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _extract_video_url(obj: dict[str, Any]) -> str | None:
|
|
507
|
+
output = obj.get("output")
|
|
508
|
+
if not isinstance(output, dict):
|
|
509
|
+
return None
|
|
510
|
+
u = output.get("video_url") or output.get("videoUrl")
|
|
511
|
+
if isinstance(u, str) and u:
|
|
512
|
+
return u
|
|
513
|
+
return None
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _wait_task_done(
|
|
517
|
+
*, task_id: str, api_key: str, deadline: float, proxy_url: str | None
|
|
518
|
+
) -> dict[str, Any]:
|
|
519
|
+
url = f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}"
|
|
520
|
+
while True:
|
|
521
|
+
remaining_ms = int((deadline - time.time()) * 1000)
|
|
522
|
+
if remaining_ms <= 0:
|
|
523
|
+
break
|
|
524
|
+
obj = request_json(
|
|
525
|
+
method="GET",
|
|
526
|
+
url=url,
|
|
527
|
+
headers=_aliyun_headers(api_key),
|
|
528
|
+
timeout_ms=min(30_000, remaining_ms),
|
|
529
|
+
proxy_url=proxy_url,
|
|
530
|
+
)
|
|
531
|
+
st = _extract_task_status(obj)
|
|
532
|
+
if st in {"SUCCEEDED", "FAILED"}:
|
|
533
|
+
return obj
|
|
534
|
+
time.sleep(1.0)
|
|
535
|
+
return {"output": {"task_id": task_id, "task_status": "RUNNING"}}
|