nous-genai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. nous/__init__.py +3 -0
  2. nous/genai/__init__.py +56 -0
  3. nous/genai/__main__.py +3 -0
  4. nous/genai/_internal/__init__.py +1 -0
  5. nous/genai/_internal/capability_rules.py +476 -0
  6. nous/genai/_internal/config.py +102 -0
  7. nous/genai/_internal/errors.py +63 -0
  8. nous/genai/_internal/http.py +951 -0
  9. nous/genai/_internal/json_schema.py +54 -0
  10. nous/genai/cli.py +1316 -0
  11. nous/genai/client.py +719 -0
  12. nous/genai/mcp_cli.py +275 -0
  13. nous/genai/mcp_server.py +1080 -0
  14. nous/genai/providers/__init__.py +15 -0
  15. nous/genai/providers/aliyun.py +535 -0
  16. nous/genai/providers/anthropic.py +483 -0
  17. nous/genai/providers/gemini.py +1606 -0
  18. nous/genai/providers/openai.py +1909 -0
  19. nous/genai/providers/tuzi.py +1158 -0
  20. nous/genai/providers/volcengine.py +273 -0
  21. nous/genai/reference/__init__.py +17 -0
  22. nous/genai/reference/catalog.py +206 -0
  23. nous/genai/reference/mappings.py +467 -0
  24. nous/genai/reference/mode_overrides.py +26 -0
  25. nous/genai/reference/model_catalog.py +82 -0
  26. nous/genai/reference/model_catalog_data/__init__.py +1 -0
  27. nous/genai/reference/model_catalog_data/aliyun.py +98 -0
  28. nous/genai/reference/model_catalog_data/anthropic.py +10 -0
  29. nous/genai/reference/model_catalog_data/google.py +45 -0
  30. nous/genai/reference/model_catalog_data/openai.py +44 -0
  31. nous/genai/reference/model_catalog_data/tuzi_anthropic.py +21 -0
  32. nous/genai/reference/model_catalog_data/tuzi_google.py +19 -0
  33. nous/genai/reference/model_catalog_data/tuzi_openai.py +75 -0
  34. nous/genai/reference/model_catalog_data/tuzi_web.py +136 -0
  35. nous/genai/reference/model_catalog_data/volcengine.py +107 -0
  36. nous/genai/tools/__init__.py +13 -0
  37. nous/genai/tools/output_parser.py +119 -0
  38. nous/genai/types.py +416 -0
  39. nous/py.typed +1 -0
  40. nous_genai-0.1.0.dist-info/METADATA +200 -0
  41. nous_genai-0.1.0.dist-info/RECORD +45 -0
  42. nous_genai-0.1.0.dist-info/WHEEL +5 -0
  43. nous_genai-0.1.0.dist-info/entry_points.txt +4 -0
  44. nous_genai-0.1.0.dist-info/licenses/LICENSE +190 -0
  45. nous_genai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,15 @@
1
+ from .aliyun import AliyunAdapter
2
+ from .anthropic import AnthropicAdapter
3
+ from .gemini import GeminiAdapter
4
+ from .openai import OpenAIAdapter
5
+ from .tuzi import TuziAdapter
6
+ from .volcengine import VolcengineAdapter
7
+
8
+ __all__ = [
9
+ "AliyunAdapter",
10
+ "AnthropicAdapter",
11
+ "GeminiAdapter",
12
+ "OpenAIAdapter",
13
+ "TuziAdapter",
14
+ "VolcengineAdapter",
15
+ ]
@@ -0,0 +1,535 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from dataclasses import dataclass
5
+ from typing import Any, Iterator
6
+ from uuid import uuid4
7
+
8
+ from .._internal.errors import (
9
+ invalid_request_error,
10
+ not_supported_error,
11
+ provider_error,
12
+ )
13
+ from .._internal.http import request_json
14
+ from ..types import Capability, GenerateEvent, GenerateRequest, GenerateResponse
15
+ from ..types import JobInfo, Message, Part, PartSourceBytes, PartSourceUrl
16
+ from .openai import OpenAIAdapter
17
+
18
+
19
+ @dataclass(frozen=True, slots=True)
20
+ class AliyunAdapter:
21
+ """
22
+ Aliyun DashScope (Bailian / Model Studio).
23
+
24
+ Supported in this SDK:
25
+ - chat (text/image -> text), stream supported
26
+ - embeddings (text -> embedding)
27
+ - image generation (text -> image) via DashScope AIGC endpoint
28
+ - video generation (text -> video) via DashScope AIGC async task endpoint
29
+ - speech synthesis (text -> audio) via DashScope AIGC endpoint
30
+ - speech recognition (audio -> text) for Qwen-ASR models via OpenAI-compatible chat endpoint
31
+ """
32
+
33
+ openai: OpenAIAdapter
34
+
35
+ def capabilities(self, model_id: str) -> Capability:
36
+ if _is_embedding_model(model_id):
37
+ return Capability(
38
+ input_modalities={"text"},
39
+ output_modalities={"embedding"},
40
+ supports_stream=False,
41
+ supports_job=False,
42
+ supports_tools=False,
43
+ supports_json_schema=False,
44
+ )
45
+ if _is_speech_synthesis_model(model_id):
46
+ return Capability(
47
+ input_modalities={"text"},
48
+ output_modalities={"audio"},
49
+ supports_stream=False,
50
+ supports_job=False,
51
+ supports_tools=False,
52
+ supports_json_schema=False,
53
+ )
54
+ if _is_asr_model(model_id):
55
+ return Capability(
56
+ input_modalities={"audio"},
57
+ output_modalities={"text"},
58
+ supports_stream=False,
59
+ supports_job=False,
60
+ supports_tools=False,
61
+ supports_json_schema=False,
62
+ )
63
+ if _is_image_generation_model(model_id):
64
+ return Capability(
65
+ input_modalities={"text"},
66
+ output_modalities={"image"},
67
+ supports_stream=False,
68
+ supports_job=False,
69
+ supports_tools=False,
70
+ supports_json_schema=False,
71
+ )
72
+ if _is_video_generation_model(model_id):
73
+ return Capability(
74
+ input_modalities={"text"},
75
+ output_modalities={"video"},
76
+ supports_stream=False,
77
+ supports_job=True,
78
+ supports_tools=False,
79
+ supports_json_schema=False,
80
+ )
81
+ return Capability(
82
+ input_modalities={"text", "image"},
83
+ output_modalities={"text"},
84
+ supports_stream=True,
85
+ supports_job=False,
86
+ supports_tools=True,
87
+ supports_json_schema=True,
88
+ )
89
+
90
+ def list_models(self, *, timeout_ms: int | None = None) -> list[str]:
91
+ return self.openai.list_models(timeout_ms=timeout_ms)
92
+
93
+ def generate(
94
+ self, request: GenerateRequest, *, stream: bool
95
+ ) -> GenerateResponse | Iterator[GenerateEvent]:
96
+ modalities = set(request.output.modalities)
97
+ model_id = request.model_id()
98
+
99
+ if modalities == {"embedding"}:
100
+ if stream:
101
+ raise not_supported_error("Aliyun embeddings do not support streaming")
102
+ return self.openai.generate(request, stream=False)
103
+
104
+ if modalities == {"image"}:
105
+ if stream:
106
+ raise not_supported_error(
107
+ "Aliyun image generation does not support streaming"
108
+ )
109
+ return self._image(request, model_id=model_id)
110
+
111
+ if modalities == {"video"}:
112
+ if stream:
113
+ raise not_supported_error(
114
+ "Aliyun video generation does not support streaming"
115
+ )
116
+ return self._video(request, model_id=model_id)
117
+
118
+ if modalities == {"audio"}:
119
+ if stream:
120
+ raise not_supported_error(
121
+ "Aliyun speech synthesis does not support streaming"
122
+ )
123
+ return self._audio(request, model_id=model_id)
124
+
125
+ if modalities != {"text"}:
126
+ raise not_supported_error(
127
+ "Aliyun only supports chat/embeddings/image/video/audio in this SDK"
128
+ )
129
+ if _is_embedding_model(model_id):
130
+ raise not_supported_error(
131
+ "Aliyun embedding models must be called with output.modalities=['embedding']"
132
+ )
133
+ if _is_speech_synthesis_model(model_id):
134
+ raise not_supported_error(
135
+ "Aliyun speech synthesis models must be called with output.modalities=['audio']"
136
+ )
137
+ if _is_image_generation_model(model_id):
138
+ raise not_supported_error(
139
+ "Aliyun image models must be called with output.modalities=['image']"
140
+ )
141
+ if _is_video_generation_model(model_id):
142
+ raise not_supported_error(
143
+ "Aliyun video models must be called with output.modalities=['video']"
144
+ )
145
+ if _has_audio_input(request) and not _is_asr_model(model_id):
146
+ raise not_supported_error(
147
+ "Aliyun chat input only supports audio for ASR models in this SDK"
148
+ )
149
+ return self.openai.generate(request, stream=stream)
150
+
151
+ def _image(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
152
+ if not _is_image_generation_model(model_id):
153
+ raise not_supported_error(
154
+ 'Aliyun image generation requires model like "aliyun:qwen-image-max"'
155
+ )
156
+
157
+ prompt = _single_text_prompt(request)
158
+ body: dict[str, Any] = {
159
+ "model": model_id,
160
+ "input": {
161
+ "messages": [
162
+ {
163
+ "role": "user",
164
+ "content": [{"text": prompt}],
165
+ }
166
+ ]
167
+ },
168
+ "parameters": {
169
+ "watermark": False,
170
+ },
171
+ }
172
+
173
+ img = request.output.image
174
+ if img and img.size:
175
+ size = img.size.strip()
176
+ if "x" in size and "*" not in size:
177
+ size = size.replace("x", "*")
178
+ body["parameters"]["size"] = size
179
+
180
+ opts = request.provider_options.get("aliyun")
181
+ if isinstance(opts, dict):
182
+ _merge_provider_options(body=body, opts=opts)
183
+
184
+ obj = request_json(
185
+ method="POST",
186
+ url="https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation",
187
+ headers={**_aliyun_headers(self.openai.api_key, request=request)},
188
+ json_body=body,
189
+ timeout_ms=request.params.timeout_ms,
190
+ proxy_url=self.openai.proxy_url,
191
+ )
192
+
193
+ image_url = _extract_image_url(obj)
194
+ if not image_url:
195
+ raise provider_error("aliyun image response missing image url")
196
+
197
+ part = Part(type="image", source=PartSourceUrl(url=image_url))
198
+ return GenerateResponse(
199
+ id=f"sdk_{uuid4().hex}",
200
+ provider="aliyun",
201
+ model=f"aliyun:{model_id}",
202
+ status="completed",
203
+ output=[Message(role="assistant", content=[part])],
204
+ usage=None,
205
+ )
206
+
207
+ def _audio(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
208
+ if not _is_speech_synthesis_model(model_id):
209
+ raise not_supported_error(
210
+ 'Aliyun speech synthesis requires model like "aliyun:qwen3-tts-flash"'
211
+ )
212
+
213
+ prompt = _single_text_prompt(request)
214
+ audio = request.output.audio
215
+ if audio is None or not audio.voice:
216
+ raise invalid_request_error(
217
+ "output.audio.voice required for Aliyun speech synthesis"
218
+ )
219
+ if audio.format and audio.format.strip().lower() not in {"wav", "wave"}:
220
+ raise not_supported_error(
221
+ "Aliyun speech synthesis only supports wav output in this SDK"
222
+ )
223
+
224
+ body: dict[str, Any] = {
225
+ "model": model_id,
226
+ "input": {
227
+ "text": prompt,
228
+ "voice": audio.voice,
229
+ },
230
+ }
231
+ if audio.language:
232
+ body["input"]["language_type"] = _map_language_type(audio.language)
233
+
234
+ opts = request.provider_options.get("aliyun")
235
+ if isinstance(opts, dict):
236
+ _merge_provider_options(body=body, opts=opts)
237
+
238
+ obj = request_json(
239
+ method="POST",
240
+ url="https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation",
241
+ headers={**_aliyun_headers(self.openai.api_key, request=request)},
242
+ json_body=body,
243
+ timeout_ms=request.params.timeout_ms,
244
+ proxy_url=self.openai.proxy_url,
245
+ )
246
+
247
+ source, mime_type = _extract_audio_source(obj)
248
+ part = Part(type="audio", mime_type=mime_type, source=source)
249
+ return GenerateResponse(
250
+ id=f"sdk_{uuid4().hex}",
251
+ provider="aliyun",
252
+ model=f"aliyun:{model_id}",
253
+ status="completed",
254
+ output=[Message(role="assistant", content=[part])],
255
+ usage=None,
256
+ )
257
+
258
+ def _video(self, request: GenerateRequest, *, model_id: str) -> GenerateResponse:
259
+ if not _is_video_generation_model(model_id):
260
+ raise not_supported_error(
261
+ 'Aliyun video generation requires model like "aliyun:wan2.5-t2v-preview"'
262
+ )
263
+
264
+ prompt = _single_text_prompt(request)
265
+ body: dict[str, Any] = {"model": model_id, "input": {"prompt": prompt}}
266
+
267
+ video = request.output.video
268
+ if video and video.duration_sec is not None:
269
+ body.setdefault("parameters", {})["duration"] = int(video.duration_sec)
270
+ if video and video.aspect_ratio:
271
+ body.setdefault("parameters", {})["ratio"] = video.aspect_ratio
272
+
273
+ opts = request.provider_options.get("aliyun")
274
+ if isinstance(opts, dict):
275
+ _merge_provider_options(body=body, opts=opts)
276
+
277
+ budget_ms = (
278
+ 120_000 if request.params.timeout_ms is None else request.params.timeout_ms
279
+ )
280
+ deadline = time.time() + max(1, budget_ms) / 1000.0
281
+ obj = request_json(
282
+ method="POST",
283
+ url="https://dashscope.aliyuncs.com/api/v1/services/aigc/video-generation/video-synthesis",
284
+ headers={
285
+ **_aliyun_headers(self.openai.api_key, request=request),
286
+ "X-DashScope-Async": "enable",
287
+ },
288
+ json_body=body,
289
+ timeout_ms=min(30_000, max(1, budget_ms)),
290
+ proxy_url=self.openai.proxy_url,
291
+ )
292
+ task_id = _extract_task_id(obj)
293
+ if not task_id:
294
+ raise provider_error("aliyun video response missing task_id")
295
+
296
+ if not request.wait:
297
+ return GenerateResponse(
298
+ id=f"sdk_{uuid4().hex}",
299
+ provider="aliyun",
300
+ model=f"aliyun:{model_id}",
301
+ status="running",
302
+ job=JobInfo(job_id=task_id, poll_after_ms=1_000),
303
+ )
304
+
305
+ final = _wait_task_done(
306
+ task_id=task_id,
307
+ api_key=self.openai.api_key,
308
+ deadline=deadline,
309
+ proxy_url=self.openai.proxy_url,
310
+ )
311
+ status = _extract_task_status(final)
312
+ if status != "SUCCEEDED":
313
+ if status == "FAILED":
314
+ raise provider_error(f"aliyun video generation failed: {final}")
315
+ return GenerateResponse(
316
+ id=f"sdk_{uuid4().hex}",
317
+ provider="aliyun",
318
+ model=f"aliyun:{model_id}",
319
+ status="running",
320
+ job=JobInfo(job_id=task_id, poll_after_ms=1_000),
321
+ )
322
+
323
+ video_url = _extract_video_url(final)
324
+ if not video_url:
325
+ raise provider_error("aliyun video task missing video_url")
326
+ part = Part(
327
+ type="video", mime_type="video/mp4", source=PartSourceUrl(url=video_url)
328
+ )
329
+ return GenerateResponse(
330
+ id=f"sdk_{uuid4().hex}",
331
+ provider="aliyun",
332
+ model=f"aliyun:{model_id}",
333
+ status="completed",
334
+ output=[Message(role="assistant", content=[part])],
335
+ usage=None,
336
+ )
337
+
338
+
339
+ def _has_audio_input(request: GenerateRequest) -> bool:
340
+ for m in request.input:
341
+ for p in m.content:
342
+ if p.type == "audio":
343
+ return True
344
+ return False
345
+
346
+
347
+ def _is_embedding_model(model_id: str) -> bool:
348
+ mid = model_id.lower()
349
+ return "embedding" in mid and "text" in mid
350
+
351
+
352
+ def _is_speech_synthesis_model(model_id: str) -> bool:
353
+ return "tts" in model_id.lower()
354
+
355
+
356
+ def _is_asr_model(model_id: str) -> bool:
357
+ return "asr" in model_id.lower()
358
+
359
+
360
+ def _is_image_generation_model(model_id: str) -> bool:
361
+ mid = model_id.lower()
362
+ return "image" in mid and "embedding" not in mid
363
+
364
+
365
+ def _is_video_generation_model(model_id: str) -> bool:
366
+ mid = model_id.lower()
367
+ return "t2v" in mid or "i2v" in mid
368
+
369
+
370
+ def _single_text_prompt(request: GenerateRequest) -> str:
371
+ texts: list[str] = []
372
+ for m in request.input:
373
+ for p in m.content:
374
+ if p.type != "text":
375
+ raise invalid_request_error(
376
+ "this operation requires exactly one text part"
377
+ )
378
+ t = p.require_text().strip()
379
+ if t:
380
+ texts.append(t)
381
+ if len(texts) != 1:
382
+ raise invalid_request_error("this operation requires exactly one text part")
383
+ return texts[0]
384
+
385
+
386
+ def _map_language_type(language: str) -> str:
387
+ lang = language.strip().lower()
388
+ if lang.startswith("zh"):
389
+ return "Chinese"
390
+ if lang.startswith("en"):
391
+ return "English"
392
+ return language
393
+
394
+
395
+ def _aliyun_headers(
396
+ api_key: str, *, request: GenerateRequest | None = None
397
+ ) -> dict[str, str]:
398
+ headers = {"Authorization": f"Bearer {api_key}"}
399
+ if request and request.params.idempotency_key:
400
+ headers["Idempotency-Key"] = request.params.idempotency_key
401
+ return headers
402
+
403
+
404
+ def _merge_provider_options(*, body: dict[str, Any], opts: dict[str, Any]) -> None:
405
+ if "model" in opts and opts["model"] != body.get("model"):
406
+ raise invalid_request_error("provider_options cannot override model")
407
+ if "input" in opts:
408
+ if not isinstance(opts["input"], dict):
409
+ raise invalid_request_error("provider_options.input must be an object")
410
+ inp = body.setdefault("input", {})
411
+ if not isinstance(inp, dict):
412
+ raise invalid_request_error("internal error: body.input is not an object")
413
+ for k, v in opts["input"].items():
414
+ if k in inp:
415
+ raise invalid_request_error(
416
+ f"provider_options cannot override input.{k}"
417
+ )
418
+ inp[k] = v
419
+ if "parameters" in opts:
420
+ if not isinstance(opts["parameters"], dict):
421
+ raise invalid_request_error("provider_options.parameters must be an object")
422
+ params = body.setdefault("parameters", {})
423
+ if not isinstance(params, dict):
424
+ raise invalid_request_error(
425
+ "internal error: body.parameters is not an object"
426
+ )
427
+ for k, v in opts["parameters"].items():
428
+ if k in params:
429
+ raise invalid_request_error(
430
+ f"provider_options cannot override parameters.{k}"
431
+ )
432
+ params[k] = v
433
+ for k, v in opts.items():
434
+ if k in {"model", "input", "parameters"}:
435
+ continue
436
+ if k in body:
437
+ raise invalid_request_error(f"provider_options cannot override body.{k}")
438
+ body[k] = v
439
+
440
+
441
+ def _extract_image_url(obj: dict[str, Any]) -> str | None:
442
+ output = obj.get("output")
443
+ if not isinstance(output, dict):
444
+ return None
445
+ choices = output.get("choices")
446
+ if not isinstance(choices, list) or not choices:
447
+ return None
448
+ first = choices[0]
449
+ if not isinstance(first, dict):
450
+ return None
451
+ msg = first.get("message")
452
+ if not isinstance(msg, dict):
453
+ return None
454
+ content = msg.get("content")
455
+ if not isinstance(content, list) or not content:
456
+ return None
457
+ item = content[0]
458
+ if not isinstance(item, dict):
459
+ return None
460
+ u = item.get("image")
461
+ if isinstance(u, str) and u:
462
+ return u
463
+ return None
464
+
465
+
466
+ def _extract_audio_source(
467
+ obj: dict[str, Any],
468
+ ) -> tuple[PartSourceBytes | PartSourceUrl, str]:
469
+ output = obj.get("output")
470
+ if not isinstance(output, dict):
471
+ raise provider_error("aliyun audio response missing output")
472
+ audio = output.get("audio")
473
+ if not isinstance(audio, dict):
474
+ raise provider_error("aliyun audio response missing output.audio")
475
+
476
+ data = audio.get("data")
477
+ if isinstance(data, str) and data:
478
+ return PartSourceBytes(data=data, encoding="base64"), "audio/wav"
479
+
480
+ url = audio.get("url")
481
+ if isinstance(url, str) and url:
482
+ return PartSourceUrl(url=url), "audio/wav"
483
+ raise provider_error("aliyun audio response missing url/data")
484
+
485
+
486
+ def _extract_task_id(obj: dict[str, Any]) -> str | None:
487
+ output = obj.get("output")
488
+ if not isinstance(output, dict):
489
+ return None
490
+ tid = output.get("task_id") or output.get("taskId")
491
+ if isinstance(tid, str) and tid:
492
+ return tid
493
+ return None
494
+
495
+
496
+ def _extract_task_status(obj: dict[str, Any]) -> str | None:
497
+ output = obj.get("output")
498
+ if not isinstance(output, dict):
499
+ return None
500
+ st = output.get("task_status") or output.get("taskStatus")
501
+ if isinstance(st, str) and st:
502
+ return st
503
+ return None
504
+
505
+
506
+ def _extract_video_url(obj: dict[str, Any]) -> str | None:
507
+ output = obj.get("output")
508
+ if not isinstance(output, dict):
509
+ return None
510
+ u = output.get("video_url") or output.get("videoUrl")
511
+ if isinstance(u, str) and u:
512
+ return u
513
+ return None
514
+
515
+
516
+ def _wait_task_done(
517
+ *, task_id: str, api_key: str, deadline: float, proxy_url: str | None
518
+ ) -> dict[str, Any]:
519
+ url = f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}"
520
+ while True:
521
+ remaining_ms = int((deadline - time.time()) * 1000)
522
+ if remaining_ms <= 0:
523
+ break
524
+ obj = request_json(
525
+ method="GET",
526
+ url=url,
527
+ headers=_aliyun_headers(api_key),
528
+ timeout_ms=min(30_000, remaining_ms),
529
+ proxy_url=proxy_url,
530
+ )
531
+ st = _extract_task_status(obj)
532
+ if st in {"SUCCEEDED", "FAILED"}:
533
+ return obj
534
+ time.sleep(1.0)
535
+ return {"output": {"task_id": task_id, "task_status": "RUNNING"}}