@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Zhipu (GLM) Media Transport 鈥?CogView (image), CogVideoX (video), TTS, STT, Embedding.
3
+ *
4
+ * API reference (docs.bigmodel.cn):
5
+ * Image sync: POST /images/generations (CogView-4, cogview-3-flash)
6
+ * Image async: POST /async/images/generations (glm-image)
7
+ * Video async: POST /videos/generations (CogVideoX)
8
+ * TTS sync: POST /audio/speech (glm-tts, returns audio bytes)
9
+ * STT sync: POST /audio/transcriptions (glm-asr, multipart/form-data)
10
+ * Embedding: POST /embeddings (embedding-3/2)
11
+ * Async poll: GET /async-result/{id} (unified poll for all async tasks)
12
+ *
13
+ * Base URL: https://open.bigmodel.cn/api/paas/v4
14
+ * Auth: Authorization: Bearer $ZHIPU_API_KEY
15
+ */
16
+ import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
17
+ export interface ZhipuMediaConfig {
18
+ /** Base URL, e.g. "https://open.bigmodel.cn/api/paas/v4" */
19
+ baseUrl: string;
20
+ timeoutMs?: number;
21
+ }
22
+ export declare class ZhipuMediaTransport implements AsyncMediaTransport {
23
+ readonly supportedTypes: readonly MediaType[];
24
+ private baseUrl;
25
+ private timeoutMs;
26
+ constructor(config: ZhipuMediaConfig);
27
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
28
+ private generateImage;
29
+ /** CogView-4 / cogview-3-flash 鈥?sync, returns URL directly */
30
+ private generateImageSync;
31
+ /** glm-image 鈥?async submit + poll */
32
+ private generateImageAsync;
33
+ private generateVideo;
34
+ private generateTTS;
35
+ private generateSTT;
36
+ private generateEmbedding;
37
+ private generateVoiceClone;
38
+ private generateDocumentParsing;
39
+ private generateRerank;
40
+ private postJSON;
41
+ /**
42
+ * Unified async result polling 鈥?GET /async-result/{id}
43
+ * Returns the result object when task_status === "SUCCESS".
44
+ * Throws on "FAIL" or timeout.
45
+ */
46
+ private pollAsyncResult;
47
+ /**
48
+ * Query a single task status 鈥?GET /async-result/{id}
49
+ * Zhipu uses a unified async result endpoint for all task types.
50
+ */
51
+ getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
52
+ status: string;
53
+ task: Record<string, unknown>;
54
+ }>;
55
+ /**
56
+ * List recent tasks 鈥?Zhipu does not have a native list endpoint.
57
+ * Returns empty since individual task query via getTaskStatus() is the primary API.
58
+ */
59
+ listVideoTasks(_apiKey: string, _options?: {
60
+ after?: string;
61
+ limit?: number;
62
+ status?: string;
63
+ }, _signal?: AbortSignal): Promise<Record<string, unknown>>;
64
+ /**
65
+ * Cancel/delete is not supported by Zhipu's async API 鈥?throws informative error.
66
+ * The /async-result/{id} endpoint is read-only.
67
+ */
68
+ deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
69
+ /**
70
+ * List cloned voices 鈥?GET /voice/
71
+ * Returns all voice clones for the current user.
72
+ */
73
+ listVoices(apiKey: string, signal?: AbortSignal): Promise<Array<{
74
+ voice_id: string;
75
+ voice_name: string;
76
+ status: string;
77
+ }>>;
78
+ /**
79
+ * Delete a cloned voice 鈥?POST /voice/delete
80
+ */
81
+ deleteVoice(voiceId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
82
+ }
@@ -0,0 +1,522 @@
1
+ /**
2
+ * Zhipu (GLM) Media Transport 鈥?CogView (image), CogVideoX (video), TTS, STT, Embedding.
3
+ *
4
+ * API reference (docs.bigmodel.cn):
5
+ * Image sync: POST /images/generations (CogView-4, cogview-3-flash)
6
+ * Image async: POST /async/images/generations (glm-image)
7
+ * Video async: POST /videos/generations (CogVideoX)
8
+ * TTS sync: POST /audio/speech (glm-tts, returns audio bytes)
9
+ * STT sync: POST /audio/transcriptions (glm-asr, multipart/form-data)
10
+ * Embedding: POST /embeddings (embedding-3/2)
11
+ * Async poll: GET /async-result/{id} (unified poll for all async tasks)
12
+ *
13
+ * Base URL: https://open.bigmodel.cn/api/paas/v4
14
+ * Auth: Authorization: Bearer $ZHIPU_API_KEY
15
+ */
16
+ import { writeFileSync, mkdirSync } from "node:fs";
17
+ import { join } from "node:path";
18
+ import { randomUUID } from "node:crypto";
19
+ import { getUserCacheDir } from "../paths.js";
20
+ const DEFAULT_TIMEOUT_MS = 180_000;
21
+ const POLL_INTERVAL_MS = 3_000;
22
+ const MAX_POLL_MS = 600_000; // 10 min max for video
23
+ // Models that use async image generation (submit + poll)
24
+ const ASYNC_IMAGE_MODELS = new Set(["glm-image"]);
25
+ export class ZhipuMediaTransport {
26
+ supportedTypes = [
27
+ "image", "video", "tts", "stt", "embedding",
28
+ "voice_clone", "document_parsing", "rerank",
29
+ ];
30
+ baseUrl;
31
+ timeoutMs;
32
+ constructor(config) {
33
+ this.baseUrl = config.baseUrl.replace(/\/+$/, "");
34
+ this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
35
+ }
36
+ async generate(request, apiKey, signal) {
37
+ switch (request.mediaType) {
38
+ case "image":
39
+ return this.generateImage(request, apiKey, signal);
40
+ case "video":
41
+ return this.generateVideo(request, apiKey, signal);
42
+ case "tts":
43
+ return this.generateTTS(request, apiKey, signal);
44
+ case "stt":
45
+ return this.generateSTT(request, apiKey, signal);
46
+ case "embedding":
47
+ return this.generateEmbedding(request, apiKey, signal);
48
+ case "voice_clone":
49
+ return this.generateVoiceClone(request, apiKey, signal);
50
+ case "document_parsing":
51
+ return this.generateDocumentParsing(request, apiKey, signal);
52
+ case "rerank":
53
+ return this.generateRerank(request, apiKey, signal);
54
+ default:
55
+ throw new Error(`ZhipuMediaTransport: unsupported mediaType "${request.mediaType}"`);
56
+ }
57
+ }
58
+ // 鈹€鈹€ Image (CogView sync / glm-image async) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
59
+ async generateImage(request, apiKey, signal) {
60
+ if (ASYNC_IMAGE_MODELS.has(request.model)) {
61
+ return this.generateImageAsync(request, apiKey, signal);
62
+ }
63
+ return this.generateImageSync(request, apiKey, signal);
64
+ }
65
+ /** CogView-4 / cogview-3-flash 鈥?sync, returns URL directly */
66
+ async generateImageSync(request, apiKey, signal) {
67
+ const start = Date.now();
68
+ const url = `${this.baseUrl}/images/generations`;
69
+ const body = {
70
+ model: request.model,
71
+ prompt: request.prompt,
72
+ };
73
+ if (request.size)
74
+ body.size = request.size;
75
+ // CogView-4 supports quality param (standard/hd); cogview-3-flash does not
76
+ if (request.quality && request.model.startsWith("cogview-4"))
77
+ body.quality = request.quality;
78
+ const data = await this.postJSON(url, body, apiKey, signal);
79
+ const items = data.data ?? [];
80
+ const mediaUrls = items.map(d => d.url).filter((u) => !!u);
81
+ return {
82
+ mediaUrls,
83
+ model: request.model,
84
+ size: request.size,
85
+ durationMs: Date.now() - start,
86
+ };
87
+ }
88
+ /** glm-image 鈥?async submit + poll */
89
+ async generateImageAsync(request, apiKey, signal) {
90
+ const start = Date.now();
91
+ const url = `${this.baseUrl}/async/images/generations`;
92
+ const body = {
93
+ model: request.model,
94
+ prompt: request.prompt,
95
+ };
96
+ if (request.size)
97
+ body.size = request.size;
98
+ if (request.quality)
99
+ body.quality = request.quality;
100
+ const submitData = await this.postJSON(url, body, apiKey, signal);
101
+ const taskId = submitData.id;
102
+ if (!taskId)
103
+ throw new Error("Zhipu async image: no task id in response");
104
+ const result = await this.pollAsyncResult(taskId, apiKey, signal, request.onProgress);
105
+ const items = (result.data) ?? [];
106
+ const mediaUrls = items.map(d => d.url).filter((u) => !!u);
107
+ return {
108
+ mediaUrls,
109
+ model: request.model,
110
+ size: request.size,
111
+ durationMs: Date.now() - start,
112
+ };
113
+ }
114
+ // 鈹€鈹€ Video (CogVideoX 鈥?async) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
115
+ async generateVideo(request, apiKey, signal) {
116
+ const start = Date.now();
117
+ const url = `${this.baseUrl}/videos/generations`;
118
+ const body = {
119
+ model: request.model,
120
+ prompt: request.prompt,
121
+ };
122
+ if (request.imageUrl)
123
+ body.image_url = request.imageUrl;
124
+ if (request.size)
125
+ body.size = request.size;
126
+ if (request.duration)
127
+ body.duration = request.duration;
128
+ const submitData = await this.postJSON(url, body, apiKey, signal);
129
+ const taskId = submitData.id;
130
+ if (!taskId)
131
+ throw new Error("Zhipu video generation: no task id in response");
132
+ const result = await this.pollAsyncResult(taskId, apiKey, signal, request.onProgress);
133
+ const videoResult = result.video_result;
134
+ const mediaUrls = (videoResult ?? []).map(v => v.url).filter((u) => !!u);
135
+ return {
136
+ mediaUrls,
137
+ model: request.model,
138
+ durationMs: Date.now() - start,
139
+ };
140
+ }
141
+ // 鈹€鈹€ TTS (glm-tts 鈥?sync, binary response) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
142
+ async generateTTS(request, apiKey, signal) {
143
+ const start = Date.now();
144
+ const text = request.text || request.prompt;
145
+ if (!text)
146
+ throw new Error("ZhipuMediaTransport: text or prompt required for TTS");
147
+ const url = `${this.baseUrl}/audio/speech`;
148
+ const body = {
149
+ model: request.model || "glm-tts",
150
+ input: text,
151
+ };
152
+ if (request.voice)
153
+ body.voice = request.voice;
154
+ if (request.speed !== undefined)
155
+ body.speed = request.speed;
156
+ if (request.audioFormat)
157
+ body.response_format = request.audioFormat;
158
+ const res = await fetch(url, {
159
+ method: "POST",
160
+ headers: {
161
+ "Content-Type": "application/json",
162
+ Authorization: `Bearer ${apiKey}`,
163
+ },
164
+ body: JSON.stringify(body),
165
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
166
+ });
167
+ if (!res.ok) {
168
+ const errText = await res.text().catch(() => "");
169
+ throw new Error(`Zhipu TTS API error ${res.status}: ${errText}`);
170
+ }
171
+ // Response is raw audio bytes 鈥?save to temp file
172
+ const audioBuffer = Buffer.from(await res.arrayBuffer());
173
+ const cacheDir = join(getUserCacheDir(), "tts");
174
+ mkdirSync(cacheDir, { recursive: true });
175
+ const fmt = request.audioFormat ?? "wav";
176
+ const filename = `zhipu-tts-${randomUUID()}.${fmt}`;
177
+ const filePath = join(cacheDir, filename);
178
+ writeFileSync(filePath, audioBuffer);
179
+ return {
180
+ mediaUrls: [`file://${filePath}`],
181
+ model: request.model || "glm-tts",
182
+ durationMs: Date.now() - start,
183
+ billingUnit: "per_character",
184
+ billingQuantity: text.length,
185
+ };
186
+ }
187
+ // 鈹€鈹€ STT (glm-asr 鈥?sync, multipart) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
188
+ async generateSTT(request, apiKey, signal) {
189
+ const start = Date.now();
190
+ if (!request.audioUrl)
191
+ throw new Error("ZhipuMediaTransport: audioUrl required for STT");
192
+ const url = `${this.baseUrl}/audio/transcriptions`;
193
+ // Fetch audio file from URL, then send as multipart
194
+ const audioRes = await fetch(request.audioUrl, {
195
+ headers: { "User-Agent": "qlogicagent/1.0" },
196
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
197
+ });
198
+ if (!audioRes.ok)
199
+ throw new Error(`Failed to fetch audio from ${request.audioUrl}`);
200
+ const audioBlob = await audioRes.blob();
201
+ const formData = new FormData();
202
+ formData.append("model", request.model || "glm-asr-2512");
203
+ formData.append("file", audioBlob, "audio.wav");
204
+ const res = await fetch(url, {
205
+ method: "POST",
206
+ headers: {
207
+ Authorization: `Bearer ${apiKey}`,
208
+ },
209
+ body: formData,
210
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
211
+ });
212
+ if (!res.ok) {
213
+ const errText = await res.text().catch(() => "");
214
+ throw new Error(`Zhipu STT API error ${res.status}: ${errText}`);
215
+ }
216
+ const data = await res.json();
217
+ return {
218
+ mediaUrls: [],
219
+ model: request.model || "glm-asr-2512",
220
+ durationMs: Date.now() - start,
221
+ metadata: { transcription: data.text ?? "" },
222
+ };
223
+ }
224
+ // 鈹€鈹€ Embedding 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
225
+ async generateEmbedding(request, apiKey, signal) {
226
+ const start = Date.now();
227
+ const text = request.text || request.prompt;
228
+ if (!text)
229
+ throw new Error("ZhipuMediaTransport: text or prompt required for embedding");
230
+ const url = `${this.baseUrl}/embeddings`;
231
+ const body = {
232
+ model: request.model || "embedding-3",
233
+ input: text,
234
+ };
235
+ const data = await this.postJSON(url, body, apiKey, signal);
236
+ const embeddings = data.data ?? [];
237
+ return {
238
+ mediaUrls: [],
239
+ model: request.model || "embedding-3",
240
+ durationMs: Date.now() - start,
241
+ metadata: {
242
+ embeddings: embeddings.map(e => e.embedding).filter(Boolean),
243
+ },
244
+ };
245
+ }
246
+ // 鈹€鈹€ Voice Clone (glm-tts-clone 鈥?sync) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
247
+ // POST /voice/clone with audio sample file
248
+ // See: zhipu-ProviderMax.md 搂20 Voice Clone
249
+ async generateVoiceClone(request, apiKey, signal) {
250
+ const start = Date.now();
251
+ if (!request.audioUrl)
252
+ throw new Error("ZhipuMediaTransport: audioUrl required for voice_clone (sample audio)");
253
+ const input = request.text || request.prompt;
254
+ if (!input)
255
+ throw new Error("ZhipuMediaTransport: text or prompt required for voice_clone preview input");
256
+ // Fetch the sample audio
257
+ const audioRes = await fetch(request.audioUrl, {
258
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
259
+ });
260
+ if (!audioRes.ok)
261
+ throw new Error(`Failed to fetch audio sample from ${request.audioUrl}`);
262
+ const audioBlob = await audioRes.blob();
263
+ const uploadForm = new FormData();
264
+ uploadForm.append("purpose", "voice-clone-input");
265
+ uploadForm.append("file", audioBlob, "sample.wav");
266
+ const uploadRes = await fetch(`${this.baseUrl}/files`, {
267
+ method: "POST",
268
+ headers: { Authorization: `Bearer ${apiKey}` },
269
+ body: uploadForm,
270
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
271
+ });
272
+ if (!uploadRes.ok) {
273
+ const errText = await uploadRes.text().catch(() => "");
274
+ throw new Error(`Zhipu Voice Clone file upload error ${uploadRes.status}: ${errText}`);
275
+ }
276
+ const uploadData = await uploadRes.json();
277
+ if (uploadData.error) {
278
+ throw new Error(`Zhipu Voice Clone file upload rejected: ${uploadData.error.code ?? ""}: ${uploadData.error.message ?? ""}`);
279
+ }
280
+ if (!uploadData.id)
281
+ throw new Error("Zhipu Voice Clone file upload: no file id in response");
282
+ const body = {
283
+ model: request.model || "glm-tts-clone",
284
+ voice_name: String(request.metadata?.voiceName ?? `qlogic_clone_${Date.now()}`),
285
+ input,
286
+ file_id: uploadData.id,
287
+ text: typeof request.metadata?.sampleText === "string" ? request.metadata.sampleText : undefined,
288
+ };
289
+ const data = await this.postJSON(`${this.baseUrl}/voice/clone`, body, apiKey, signal);
290
+ if (data.error) {
291
+ throw new Error(`Zhipu Voice Clone rejected: ${data.error.code ?? ""}: ${data.error.message ?? ""}`);
292
+ }
293
+ return {
294
+ mediaUrls: [],
295
+ model: request.model || "glm-tts-clone",
296
+ durationMs: Date.now() - start,
297
+ metadata: {
298
+ voiceId: data.voice,
299
+ previewFileId: data.file_id,
300
+ previewFilePurpose: data.file_purpose,
301
+ requestId: data.request_id,
302
+ sampleFileId: uploadData.id,
303
+ },
304
+ };
305
+ }
306
+ // 鈹€鈹€ Document Parsing / OCR (glm-ocr 鈥?sync) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
307
+ // POST /layout_parsing 鈥?accepts file_url or base64
308
+ // Returns structured page-level parsing results
309
+ // See: zhipu-ProviderMax.md 搂22 Layout Parsing
310
+ async generateDocumentParsing(request, apiKey, signal) {
311
+ const start = Date.now();
312
+ const file = request.imageUrl || request.audioUrl || request.prompt;
313
+ if (!file) {
314
+ throw new Error("ZhipuMediaTransport: imageUrl/audioUrl or prompt (data URL) required for document_parsing");
315
+ }
316
+ const url = `${this.baseUrl}/layout_parsing`;
317
+ const body = {
318
+ model: request.model || "glm-ocr",
319
+ file: normalizeZhipuLayoutFile(file, request.metadata),
320
+ };
321
+ if (request.metadata?.returnCropImages !== undefined)
322
+ body.return_crop_images = request.metadata.returnCropImages;
323
+ if (request.metadata?.needLayoutVisualization !== undefined)
324
+ body.need_layout_visualization = request.metadata.needLayoutVisualization;
325
+ if (request.metadata?.startPageId !== undefined)
326
+ body.start_page_id = request.metadata.startPageId;
327
+ if (request.metadata?.endPageId !== undefined)
328
+ body.end_page_id = request.metadata.endPageId;
329
+ const data = await this.postJSON(url, body, apiKey, signal);
330
+ const result = data;
331
+ return {
332
+ mediaUrls: [],
333
+ model: request.model || "glm-ocr",
334
+ durationMs: Date.now() - start,
335
+ metadata: {
336
+ content: result.md_results ?? "",
337
+ mdResults: result.md_results ?? "",
338
+ layoutDetails: result.layout_details,
339
+ layoutVisualization: result.layout_visualization,
340
+ dataInfo: result.data_info,
341
+ pages: result.data_info?.pages ?? [],
342
+ pageCount: result.data_info?.num_pages ?? result.data_info?.pages?.length ?? 0,
343
+ usage: result.usage,
344
+ requestId: result.request_id,
345
+ },
346
+ };
347
+ }
348
+ // 鈹€鈹€ Rerank 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
349
+ // POST /rerank 鈥?reranks documents by relevance to a query
350
+ // See: zhipu-ProviderMax.md 搂18 Rerank
351
+ async generateRerank(request, apiKey, signal) {
352
+ const start = Date.now();
353
+ const query = request.prompt;
354
+ if (!query)
355
+ throw new Error("ZhipuMediaTransport: prompt (query) required for rerank");
356
+ if (!request.metadata?.documents) {
357
+ throw new Error("ZhipuMediaTransport: metadata.documents required for rerank");
358
+ }
359
+ const url = `${this.baseUrl}/rerank`;
360
+ const body = {
361
+ model: request.model || "rerank",
362
+ query,
363
+ documents: request.metadata.documents,
364
+ };
365
+ if (request.metadata.topN !== undefined)
366
+ body.top_n = request.metadata.topN;
367
+ const data = await this.postJSON(url, body, apiKey, signal);
368
+ return {
369
+ mediaUrls: [],
370
+ model: request.model || "rerank",
371
+ durationMs: Date.now() - start,
372
+ metadata: {
373
+ results: (data.results ?? []).map(r => ({
374
+ index: r.index,
375
+ relevanceScore: r.relevance_score,
376
+ document: r.document,
377
+ })),
378
+ },
379
+ };
380
+ }
381
+ // 鈹€鈹€ Shared helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
382
+ async postJSON(url, body, apiKey, signal) {
383
+ const res = await fetch(url, {
384
+ method: "POST",
385
+ headers: {
386
+ "Content-Type": "application/json",
387
+ Authorization: `Bearer ${apiKey}`,
388
+ },
389
+ body: JSON.stringify(body),
390
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
391
+ });
392
+ if (!res.ok) {
393
+ const errText = await res.text().catch(() => "");
394
+ throw new Error(`Zhipu API error ${res.status}: ${errText}`);
395
+ }
396
+ return res.json();
397
+ }
398
+ /**
399
+ * Unified async result polling 鈥?GET /async-result/{id}
400
+ * Returns the result object when task_status === "SUCCESS".
401
+ * Throws on "FAIL" or timeout.
402
+ */
403
+ async pollAsyncResult(taskId, apiKey, signal, onProgress) {
404
+ const deadline = Date.now() + MAX_POLL_MS;
405
+ const pollUrl = `${this.baseUrl}/async-result/${encodeURIComponent(taskId)}`;
406
+ while (Date.now() < deadline) {
407
+ signal?.throwIfAborted();
408
+ const res = await fetch(pollUrl, {
409
+ method: "GET",
410
+ headers: { Authorization: `Bearer ${apiKey}` },
411
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
412
+ });
413
+ if (!res.ok) {
414
+ const errText = await res.text().catch(() => "");
415
+ throw new Error(`Zhipu async-result error ${res.status}: ${errText}`);
416
+ }
417
+ const data = await res.json();
418
+ if (data.task_status === "SUCCESS") {
419
+ onProgress?.(100, "completed", taskId);
420
+ return data;
421
+ }
422
+ if (data.task_status === "FAIL") {
423
+ throw new Error("Zhipu async task failed");
424
+ }
425
+ // PROCESSING 鈥?report progress and wait
426
+ const elapsed = Date.now() - (deadline - MAX_POLL_MS);
427
+ onProgress?.(Math.min(95, Math.round((elapsed / MAX_POLL_MS) * 100)), data.task_status ?? "running", taskId);
428
+ await new Promise(r => setTimeout(r, POLL_INTERVAL_MS));
429
+ }
430
+ throw new Error(`Zhipu async task timed out after ${MAX_POLL_MS / 1000}s`);
431
+ }
432
+ // 鈹€鈹€ AsyncMediaTransport: Task Management 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
433
+ /**
434
+ * Query a single task status 鈥?GET /async-result/{id}
435
+ * Zhipu uses a unified async result endpoint for all task types.
436
+ */
437
+ async getTaskStatus(taskId, apiKey, signal) {
438
+ const url = `${this.baseUrl}/async-result/${encodeURIComponent(taskId)}`;
439
+ const res = await fetch(url, {
440
+ method: "GET",
441
+ headers: { Authorization: `Bearer ${apiKey}` },
442
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
443
+ });
444
+ if (!res.ok) {
445
+ const errText = await res.text().catch(() => "");
446
+ throw new Error(`Zhipu task query error ${res.status}: ${errText}`);
447
+ }
448
+ const data = await res.json();
449
+ const taskStatus = data.task_status ?? "unknown";
450
+ // Normalize Zhipu statuses (PROCESSING/SUCCESS/FAIL) to lower-case
451
+ const normalizedStatus = taskStatus === "SUCCESS" ? "succeeded"
452
+ : taskStatus === "FAIL" ? "failed"
453
+ : taskStatus === "PROCESSING" ? "running"
454
+ : taskStatus.toLowerCase();
455
+ return { status: normalizedStatus, task: data };
456
+ }
457
+ /**
458
+ * List recent tasks 鈥?Zhipu does not have a native list endpoint.
459
+ * Returns empty since individual task query via getTaskStatus() is the primary API.
460
+ */
461
+ async listVideoTasks(_apiKey, _options, _signal) {
462
+ // Zhipu has no list endpoint; callers should use getTaskStatus() for direct lookup
463
+ return { data: [] };
464
+ }
465
+ /**
466
+ * Cancel/delete is not supported by Zhipu's async API 鈥?throws informative error.
467
+ * The /async-result/{id} endpoint is read-only.
468
+ */
469
+ async deleteVideoTask(_taskId, _apiKey, _signal) {
470
+ throw new Error("Zhipu does not support task cancellation. Tasks complete or timeout automatically.");
471
+ }
472
+ // 鈹€鈹€ T17: Voice Management API 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
473
+ /**
474
+ * List cloned voices 鈥?GET /voice/
475
+ * Returns all voice clones for the current user.
476
+ */
477
+ async listVoices(apiKey, signal) {
478
+ const url = `${this.baseUrl}/voice/list`;
479
+ const res = await fetch(url, {
480
+ method: "GET",
481
+ headers: { Authorization: `Bearer ${apiKey}` },
482
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
483
+ });
484
+ if (!res.ok) {
485
+ const errText = await res.text().catch(() => "");
486
+ throw new Error(`Zhipu voice list error ${res.status}: ${errText}`);
487
+ }
488
+ const data = await res.json();
489
+ return (data.voice_list ?? []).map(voice => ({
490
+ voice_id: voice.voice_id ?? voice.voice ?? "",
491
+ voice_name: voice.voice_name ?? "",
492
+ status: voice.status ?? "",
493
+ }));
494
+ }
495
+ /**
496
+ * Delete a cloned voice 鈥?POST /voice/delete
497
+ */
498
+ async deleteVoice(voiceId, apiKey, signal) {
499
+ const url = `${this.baseUrl}/voice/delete`;
500
+ const res = await fetch(url, {
501
+ method: "POST",
502
+ headers: {
503
+ "Content-Type": "application/json",
504
+ Authorization: `Bearer ${apiKey}`,
505
+ },
506
+ body: JSON.stringify({ voice: voiceId }),
507
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
508
+ });
509
+ if (!res.ok) {
510
+ const errText = await res.text().catch(() => "");
511
+ throw new Error(`Zhipu voice delete error ${res.status}: ${errText}`);
512
+ }
513
+ }
514
+ }
515
+ function normalizeZhipuLayoutFile(file, metadata) {
516
+ if (/^(https?:|data:)/i.test(file))
517
+ return file;
518
+ const mimeType = typeof metadata?.mimeType === "string"
519
+ ? metadata.mimeType
520
+ : "application/pdf";
521
+ return `data:${mimeType};base64,${file}`;
522
+ }
@@ -0,0 +1,35 @@
1
+ /**
2
+ * ZhipuToolAPI 鈥?Zhipu-specific utility endpoints.
3
+ *
4
+ * Implements ProviderToolAPI for Zhipu GLM platform independent APIs:
5
+ * C1: Web Search 鈥?POST /tools/web-search
6
+ * C2: Reader 鈥?POST /tools/reader (extract web page content)
7
+ * C3: Tokenizer 鈥?POST /tokenizer
8
+ * C4: Moderations 鈥?POST /moderations
9
+ *
10
+ * Base URL: https://open.bigmodel.cn/api/paas/v4
11
+ * Auth: Authorization: Bearer $ZHIPU_API_KEY
12
+ *
13
+ * C5 (File Parser) is handled by document_parsing media handler.
14
+ * C6 (Realtime API) requires WebSocket 鈥?out of scope for this interface.
15
+ */
16
+ import type { ProviderToolAPI, ProviderToolCapability, WebSearchResult, ReaderResult, TokenizerResult, ModerationResult } from "../provider-tool-api.js";
17
+ export interface ZhipuToolAPIConfig {
18
+ baseUrl: string;
19
+ apiKey: string;
20
+ timeoutMs?: number;
21
+ }
22
+ export declare class ZhipuToolAPI implements ProviderToolAPI {
23
+ readonly capabilities: readonly ProviderToolCapability[];
24
+ private baseUrl;
25
+ private apiKey;
26
+ private timeoutMs;
27
+ constructor(config: ZhipuToolAPIConfig);
28
+ webSearch(query: string, options?: {
29
+ maxResults?: number;
30
+ }): Promise<WebSearchResult[]>;
31
+ reader(pageUrl: string): Promise<ReaderResult>;
32
+ tokenize(text: string, model: string): Promise<TokenizerResult>;
33
+ moderate(text: string): Promise<ModerationResult>;
34
+ private postJSON;
35
+ }