@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,217 @@
1
+ /**
2
+ * LLM Client factory 鈥?resolves ProviderDef 鈫?creates LLMTransport instance.
3
+ *
4
+ * Replaces the old createAdminInferProxyClient() call chain.
5
+ * User API key + ProviderDef 鈫?direct provider connection.
6
+ */
7
+ import { OpenAIChatTransport } from "./transports/openai-chat.js";
8
+ import { OpenAIResponsesTransport } from "./transports/openai-responses.js";
9
+ import { AnthropicMessagesTransport } from "./transports/anthropic-messages.js";
10
+ import { VolcengineResponsesTransport } from "./transports/volcengine-responses.js";
11
+ import { GeminiGenerateContentTransport } from "./transports/gemini-generatecontent.js";
12
+ import { OpenAIFileUploadAdapter } from "./adapters/openai-file-upload-adapter.js";
13
+ import { VolcengineFileUploadAdapter } from "./adapters/volcengine-file-upload-adapter.js";
14
+ import { GeminiFileUploadAdapter } from "./adapters/gemini-file-upload-adapter.js";
15
+ import { HubOSSFileUploadAdapter } from "./adapters/hub-oss-file-upload-adapter.js";
16
+ import { createAliyunOSSAdapterFromEnv } from "./adapters/aliyun-oss-file-upload-adapter.js";
17
+ /**
18
+ * Create an LLM client from config + registry.
19
+ *
20
+ * 1. Look up provider in registry
21
+ * 2. Apply baseUrl override if provided
22
+ * 3. Instantiate the correct transport
23
+ */
24
+ export function createLLMClient(config, registry) {
25
+ const providerDef = registry.getProvider(config.provider);
26
+ if (!providerDef) {
27
+ throw new Error(`Unknown LLM provider: "${config.provider}". ` +
28
+ `Available: ${registry.listProviders().map((p) => p.id).join(", ")}`);
29
+ }
30
+ // Hermes parity: empty string must fall back to provider default (Python truthiness)
31
+ let baseUrl = config.baseUrl || providerDef.baseUrl;
32
+ const builtinBaseUrl = registry.getBuiltinProvider(config.provider)?.baseUrl;
33
+ // Normalize: Users often copy just the domain (e.g., "https://api.deepseek.com") from docs,
34
+ // but non-native providers using anthropic-messages transport need a path suffix (e.g., "/anthropic").
35
+ // If the user's URL has no path but the builtin does, re-append the original path suffix.
36
+ if (config.baseUrl && builtinBaseUrl) {
37
+ try {
38
+ const userUrl = new URL(config.baseUrl);
39
+ const builtinUrl = new URL(builtinBaseUrl);
40
+ if ((userUrl.pathname === "/" || userUrl.pathname === "") &&
41
+ builtinUrl.pathname !== "/" &&
42
+ builtinUrl.pathname !== "") {
43
+ baseUrl = new URL(builtinUrl.pathname, config.baseUrl).toString().replace(/\/$/, "");
44
+ }
45
+ }
46
+ catch {
47
+ // Invalid URL 鈥?use as-is
48
+ }
49
+ }
50
+ const effectiveProviderDef = config.baseUrl ? { ...providerDef, baseUrl } : providerDef;
51
+ const transport = createTransport(effectiveProviderDef, baseUrl);
52
+ return {
53
+ transport,
54
+ apiKey: config.apiKey,
55
+ resolvedModel: config.model,
56
+ providerDef: effectiveProviderDef,
57
+ };
58
+ }
59
+ /**
60
+ * Auto-detect provider from API key environment variables.
61
+ * Scans registry for providers whose env vars are set.
62
+ * Returns the first match.
63
+ */
64
+ export function autoDetectProvider(registry) {
65
+ for (const provider of registry.listProviders()) {
66
+ const key = registry.resolveApiKey(provider.id);
67
+ if (key) {
68
+ return {
69
+ providerId: provider.id,
70
+ apiKey: key,
71
+ defaultModel: provider.defaultModel ?? provider.models?.[0]?.id ?? "",
72
+ };
73
+ }
74
+ }
75
+ return undefined;
76
+ }
77
+ function createFileUploadAdapter(transport, baseUrl, providerDef) {
78
+ switch (transport) {
79
+ case "openai-chat":
80
+ case "openai-responses":
81
+ return new OpenAIFileUploadAdapter({
82
+ baseUrl,
83
+ provider: providerDef.id,
84
+ extraHeaders: providerDef.extraHeaders,
85
+ });
86
+ case "volcengine-responses":
87
+ return new VolcengineFileUploadAdapter({ baseUrl });
88
+ case "gemini-generatecontent":
89
+ return new GeminiFileUploadAdapter({ baseUrl });
90
+ case "anthropic-messages":
91
+ default:
92
+ // Providers without native File Upload API use cloud OSS.
93
+ // Priority: Hub OSS relay 鈫?direct Aliyun OSS 鈫?undefined (will error at call site)
94
+ return getUniversalOSSAdapter();
95
+ }
96
+ }
97
+ /**
98
+ * Universal OSS fallback for providers without native File Upload API.
99
+ * Priority: Hub OSS relay (if configured) 鈫?direct Aliyun OSS (if credentials present).
100
+ * Cached after first resolution.
101
+ */
102
+ let _cachedOSSAdapter = null;
103
+ function getUniversalOSSAdapter() {
104
+ if (_cachedOSSAdapter !== null)
105
+ return _cachedOSSAdapter || undefined;
106
+ // Option 1: Hub OSS relay
107
+ const hubBaseUrl = (process.env.QLOGICAGENT_HUB_URL ?? process.env.HUB_BASE_URL ?? "").trim();
108
+ if (hubBaseUrl) {
109
+ _cachedOSSAdapter = new HubOSSFileUploadAdapter({
110
+ hubBaseUrl,
111
+ hubApiKey: (process.env.QLOGICAGENT_HUB_API_KEY ?? process.env.HUB_API_KEY ?? "").trim() || undefined,
112
+ });
113
+ return _cachedOSSAdapter;
114
+ }
115
+ // Option 2: Direct Aliyun OSS
116
+ const ossAdapter = createAliyunOSSAdapterFromEnv();
117
+ if (ossAdapter) {
118
+ _cachedOSSAdapter = ossAdapter;
119
+ return _cachedOSSAdapter;
120
+ }
121
+ // Neither configured 鈥?return undefined (transport will throw on local URL instead of base64 fallback)
122
+ _cachedOSSAdapter = undefined;
123
+ return undefined;
124
+ }
125
+ function createTransport(providerDef, baseUrl) {
126
+ let primary;
127
+ const fileUploadAdapter = createFileUploadAdapter(providerDef.transport, baseUrl, providerDef);
128
+ switch (providerDef.transport) {
129
+ case "openai-chat":
130
+ primary = new OpenAIChatTransport({
131
+ baseUrl,
132
+ extraHeaders: providerDef.extraHeaders,
133
+ supportsStreamOptions: providerDef.supportsStreamOptions,
134
+ omitZeroTemperature: providerDef.omitZeroTemperature,
135
+ quirks: providerDef.quirks,
136
+ fileUploadAdapter,
137
+ });
138
+ break;
139
+ case "openai-responses":
140
+ primary = new OpenAIResponsesTransport({
141
+ baseUrl,
142
+ extraHeaders: providerDef.extraHeaders,
143
+ quirks: providerDef.quirks,
144
+ fileUploadAdapter,
145
+ });
146
+ break;
147
+ case "anthropic-messages":
148
+ primary = new AnthropicMessagesTransport({
149
+ baseUrl,
150
+ omitZeroTemperature: providerDef.omitZeroTemperature,
151
+ quirks: providerDef.quirks,
152
+ fileUploadAdapter,
153
+ });
154
+ break;
155
+ case "volcengine-responses":
156
+ primary = new VolcengineResponsesTransport({
157
+ baseUrl,
158
+ extraHeaders: providerDef.extraHeaders,
159
+ quirks: providerDef.quirks,
160
+ fileUploadAdapter,
161
+ });
162
+ break;
163
+ case "gemini-generatecontent":
164
+ primary = new GeminiGenerateContentTransport({
165
+ baseUrl,
166
+ quirks: providerDef.quirks,
167
+ fileUploadAdapter,
168
+ });
169
+ break;
170
+ default:
171
+ throw new Error(`Unsupported transport type: "${providerDef.transport}" for provider "${providerDef.id}"`);
172
+ }
173
+ // Dual-transport: providers using Anthropic Messages as primary transport but
174
+ // supporting prefix completion via OpenAI /beta endpoint (e.g. DeepSeek).
175
+ // Route prefix requests to an OpenAI-chat transport; everything else to Anthropic.
176
+ if (providerDef.transport === "anthropic-messages" &&
177
+ providerDef.quirks?.supportsPrefixCompletion) {
178
+ // Derive OpenAI base URL: strip /anthropic suffix to get the standard base.
179
+ // e.g. "https://api.deepseek.com/anthropic" 鈫?"https://api.deepseek.com"
180
+ const openaiBaseUrl = baseUrl.replace(/\/anthropic\/?$/, "");
181
+ const prefixTransport = new OpenAIChatTransport({
182
+ baseUrl: openaiBaseUrl,
183
+ quirks: providerDef.quirks,
184
+ });
185
+ return new PrefixRoutingTransport(primary, prefixTransport);
186
+ }
187
+ return primary;
188
+ }
189
+ /**
190
+ * Composite transport: routes prefix-completion requests to an OpenAI-chat
191
+ * transport (DeepSeek /beta endpoint) while keeping the primary Anthropic
192
+ * transport for all other requests.
193
+ */
194
+ class PrefixRoutingTransport {
195
+ primary;
196
+ prefixTransport;
197
+ constructor(primary, prefixTransport) {
198
+ this.primary = primary;
199
+ this.prefixTransport = prefixTransport;
200
+ }
201
+ async *stream(request, apiKey, signal) {
202
+ const transport = request.prefixMessage
203
+ ? this.prefixTransport
204
+ : this.primary;
205
+ yield* transport.stream(request, apiKey, signal);
206
+ }
207
+ // Delegate FIM completion to the prefix transport (OpenAI-chat)
208
+ // which has the DeepSeek /beta endpoint.
209
+ async *complete(request, apiKey, signal) {
210
+ if (this.prefixTransport.complete) {
211
+ yield* this.prefixTransport.complete(request, apiKey, signal);
212
+ }
213
+ else {
214
+ throw new Error("FIM completion is not supported by this transport");
215
+ }
216
+ }
217
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Media Client factory 鈥?resolves ProviderDef 鈫?creates MediaTransport instances.
3
+ *
4
+ * Parallel to createLLMClient() but for generation models (image/video/music/3D).
5
+ * Uses the same ProviderRegistry to look up provider config, then instantiates
6
+ * the correct media transport adapter.
7
+ *
8
+ * The MediaClient holds a provider-keyed transport cache so that repeated
9
+ * generation calls reuse the same adapter instance.
10
+ */
11
+ import type { MediaTransport } from "./media-transport.js";
12
+ import type { ProviderDef, MediaCapability, ModelInfo } from "./provider-def.js";
13
+ import type { ProviderRegistry } from "./provider-registry.js";
14
+ export interface MediaClientConfig {
15
+ registry: ProviderRegistry;
16
+ }
17
+ export interface ResolvedMediaModel {
18
+ providerId: string;
19
+ providerDef: ProviderDef;
20
+ modelInfo: ModelInfo;
21
+ mediaType: MediaCapability;
22
+ }
23
+ export declare class MediaClient {
24
+ private registry;
25
+ private transports;
26
+ constructor(config: MediaClientConfig);
27
+ /**
28
+ * Find a specific model by provider + model id for a given media type.
29
+ * Used by the user-designated model routing (no failover).
30
+ */
31
+ resolveModelById(providerId: string, modelId: string, mediaType: MediaCapability): ResolvedMediaModel | undefined;
32
+ /**
33
+ * Get a MediaTransport for a specific provider.
34
+ * Creates and caches the adapter on first access.
35
+ */
36
+ getTransport(providerId: string): MediaTransport | undefined;
37
+ /**
38
+ * List all available generation models across all providers.
39
+ */
40
+ listMediaModels(mediaType?: MediaCapability): ResolvedMediaModel[];
41
+ private findModelInProvider;
42
+ }
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Media Client factory 鈥?resolves ProviderDef 鈫?creates MediaTransport instances.
3
+ *
4
+ * Parallel to createLLMClient() but for generation models (image/video/music/3D).
5
+ * Uses the same ProviderRegistry to look up provider config, then instantiates
6
+ * the correct media transport adapter.
7
+ *
8
+ * The MediaClient holds a provider-keyed transport cache so that repeated
9
+ * generation calls reuse the same adapter instance.
10
+ */
11
+ import { VolcengineMediaTransport } from "./transports/volcengine-media.js";
12
+ import { OpenAIMediaTransport } from "./transports/openai-media.js";
13
+ import { MiniMaxMediaTransport } from "./transports/minimax-media.js";
14
+ import { GeminiMediaTransport } from "./transports/gemini-media.js";
15
+ import { QwenMediaTransport } from "./transports/qwen-media.js";
16
+ import { ZhipuMediaTransport } from "./transports/zhipu-media.js";
17
+ // 鈹€鈹€ MediaClient 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
18
+ export class MediaClient {
19
+ registry;
20
+ transports = new Map();
21
+ constructor(config) {
22
+ this.registry = config.registry;
23
+ }
24
+ /**
25
+ * Find a specific model by provider + model id for a given media type.
26
+ * Used by the user-designated model routing (no failover).
27
+ */
28
+ resolveModelById(providerId, modelId, mediaType) {
29
+ const providerDef = this.registry.getProvider(providerId);
30
+ if (!providerDef)
31
+ return undefined;
32
+ const models = this.registry.listModels(providerId);
33
+ const model = models.find(m => m.id === modelId && m.mediaType === mediaType);
34
+ if (!model)
35
+ return undefined;
36
+ return { providerId, providerDef, modelInfo: model, mediaType };
37
+ }
38
+ /**
39
+ * Get a MediaTransport for a specific provider.
40
+ * Creates and caches the adapter on first access.
41
+ */
42
+ getTransport(providerId) {
43
+ const cached = this.transports.get(providerId);
44
+ if (cached)
45
+ return cached;
46
+ const providerDef = this.registry.getProvider(providerId);
47
+ if (!providerDef)
48
+ return undefined;
49
+ const transport = createMediaTransport(providerDef);
50
+ if (!transport)
51
+ return undefined;
52
+ this.transports.set(providerId, transport);
53
+ return transport;
54
+ }
55
+ /**
56
+ * List all available generation models across all providers.
57
+ */
58
+ listMediaModels(mediaType) {
59
+ const results = [];
60
+ for (const provider of this.registry.listProviders()) {
61
+ const models = this.registry.listModels(provider.id);
62
+ for (const model of models) {
63
+ if (model.mediaType && (!mediaType || model.mediaType === mediaType)) {
64
+ results.push({
65
+ providerId: provider.id,
66
+ providerDef: provider,
67
+ modelInfo: model,
68
+ mediaType: model.mediaType,
69
+ });
70
+ }
71
+ }
72
+ }
73
+ return results;
74
+ }
75
+ // 鈹€鈹€ Internal 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
76
+ findModelInProvider(providerId, mediaType, request) {
77
+ const providerDef = this.registry.getProvider(providerId);
78
+ if (!providerDef)
79
+ return undefined;
80
+ const models = this.registry.listModels(providerId);
81
+ const requiredOp = request ? inferRequiredOperation(request) : undefined;
82
+ const model = models.find(m => {
83
+ if (m.mediaType !== mediaType)
84
+ return false;
85
+ // If no specific operation required, any model of this type matches
86
+ if (!requiredOp)
87
+ return true;
88
+ // If model has no capability metadata, assume it supports everything (backward compat)
89
+ if (!m.mediaCapabilities)
90
+ return true;
91
+ // Check if the model declares support for the required operation
92
+ return modelSupportsOperation(m.mediaCapabilities, requiredOp);
93
+ });
94
+ if (!model)
95
+ return undefined;
96
+ return { providerId, providerDef, modelInfo: model, mediaType };
97
+ }
98
+ }
99
+ // 鈹€鈹€ Capability matching helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
100
+ /**
101
+ * Infer the required operation from a MediaRequest.
102
+ * Maps request fields to the specific operation the model must support.
103
+ */
104
+ function inferRequiredOperation(request) {
105
+ switch (request.mediaType) {
106
+ case "video":
107
+ if (request.operation === "merge")
108
+ return "merge";
109
+ if (request.operation === "upscale")
110
+ return "upscale";
111
+ if (request.operation === "edit")
112
+ return "edit";
113
+ return request.imageUrl ? "img2video" : "text2video";
114
+ case "image":
115
+ if (request.operation === "edit")
116
+ return "inpainting";
117
+ return request.imageUrl ? "img2img" : "text2image";
118
+ case "music":
119
+ return request.audioUrl ? "cover" : "text2music";
120
+ case "music_realtime":
121
+ return "realtime";
122
+ case "tts":
123
+ return "text2speech";
124
+ case "3d":
125
+ return request.imageUrl ? "img2_3d" : "text2_3d";
126
+ default:
127
+ return undefined;
128
+ }
129
+ }
130
+ /**
131
+ * Check if a model's declared capabilities include the required operation.
132
+ */
133
+ function modelSupportsOperation(caps, requiredOp) {
134
+ if (!("operations" in caps) || !caps.operations)
135
+ return true;
136
+ return caps.operations.includes(requiredOp);
137
+ }
138
+ // 鈹€鈹€ Transport factory 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
139
+ /**
140
+ * Provider 鈫?MediaTransport mapping.
141
+ * Each provider that has generation models gets a dedicated transport.
142
+ */
143
+ function createMediaTransport(providerDef) {
144
+ // Remove trailing /anthropic path for providers that use it for chat
145
+ const rawUrl = providerDef.baseUrl;
146
+ switch (providerDef.id) {
147
+ case "volcengine":
148
+ return new VolcengineMediaTransport({ baseUrl: rawUrl });
149
+ case "openai":
150
+ return new OpenAIMediaTransport({ baseUrl: rawUrl });
151
+ case "minimax":
152
+ return new MiniMaxMediaTransport({ baseUrl: rawUrl });
153
+ case "google":
154
+ return new GeminiMediaTransport({ baseUrl: rawUrl });
155
+ case "qwen":
156
+ return new QwenMediaTransport({ baseUrl: rawUrl });
157
+ case "zhipu":
158
+ // Media models use the OpenAI-compat /paas/v4 endpoint, not the Anthropic endpoint.
159
+ // The zhipu provider's baseUrl is /api/anthropic 鈥?derive the media base.
160
+ return new ZhipuMediaTransport({
161
+ baseUrl: rawUrl.replace(/\/api\/anthropic\/?$/, "/api/paas/v4"),
162
+ });
163
+ case "zhipu-openai":
164
+ case "zhipu-coding":
165
+ // These providers already use /api/paas/v4 or /api/coding/paas/v4 base URL.
166
+ // Media endpoints are on /api/paas/v4 regardless.
167
+ return new ZhipuMediaTransport({
168
+ baseUrl: rawUrl.replace(/\/api\/coding\/paas\/v4\/?$/, "/api/paas/v4"),
169
+ });
170
+ default:
171
+ // Provider doesn't support media generation
172
+ return undefined;
173
+ }
174
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * MediaTransport 鈥?transport interface for generation APIs (image, video, music, 3D).
3
+ *
4
+ * Parallel to LLMTransport (chat/reasoning), MediaTransport handles
5
+ * non-chat generation endpoints that each vendor exposes differently:
6
+ * - Sync (OpenAI images, Volcengine Seedream, Gemini generateContent)
7
+ * - Async job (Volcengine Seedance/3D, MiniMax music) 鈥?submit 鈫?poll 鈫?result
8
+ *
9
+ * Each provider adapter implements this interface and hides vendor-specific
10
+ * auth, endpoint paths, request shapes, and polling logic.
11
+ */
12
+ export type MediaType = "image" | "video" | "music" | "music_realtime" | "tts" | "3d" | "stt" | "embedding" | "video_understanding" | "image_understanding" | "voice_clone" | "rerank" | "document_parsing" | "realtime_audio" | "realtime_video";
13
+ export interface MediaRequest {
14
+ /** Generation model id, e.g. "doubao-seedream-5-0-260128", "gpt-image-2" */
15
+ model: string;
16
+ /** What kind of media to generate */
17
+ mediaType: MediaType;
18
+ /** Text prompt for generation */
19
+ prompt: string;
20
+ /** Optional reference image URL (img2img, i2v, img-to-3d) */
21
+ imageUrl?: string;
22
+ /** Desired dimensions, e.g. "1024x1024" */
23
+ size?: string;
24
+ /** Aspect ratio for video, e.g. "16:9" */
25
+ aspectRatio?: string;
26
+ /** Duration in seconds (video, music) */
27
+ duration?: number;
28
+ /** Number of outputs (image) */
29
+ n?: number;
30
+ /** Visual/musical style */
31
+ style?: string;
32
+ /** Intended use / purpose */
33
+ purpose?: string;
34
+ /** Lyrics for music generation */
35
+ lyrics?: string;
36
+ /** Text for TTS */
37
+ text?: string;
38
+ /** TTS channel hint */
39
+ channel?: string;
40
+ /** Source video URLs for edit/merge operations */
41
+ sourceVideos?: string[];
42
+ /** Reference images (no role / first_frame / last_frame) */
43
+ referenceImages?: string[];
44
+ /** Reference image roles 鈥?parallel array with referenceImages. */
45
+ imageRoles?: Array<"first_frame" | "last_frame" | "reference_image">;
46
+ /** Reference video URLs for multimodal reference generation (Seedance 2.0) */
47
+ referenceVideos?: string[];
48
+ /** Reference audio URLs for multimodal reference generation (Seedance 2.0) */
49
+ referenceAudios?: string[];
50
+ /** Generate synchronized audio track (Seedance 2.0 / 1.5 pro) */
51
+ generateAudio?: boolean;
52
+ /** Output resolution for upscale, e.g. "1080p" */
53
+ resolution?: string;
54
+ /** Operation variant: generate (default), edit, merge, upscale, multimodal_reference, extend */
55
+ operation?: "generate" | "edit" | "merge" | "upscale" | "multimodal_reference" | "extend";
56
+ /** Image/video quality, e.g. "auto", "high", "low", "hd" */
57
+ quality?: string;
58
+ /** Seed for reproducible generation */
59
+ seed?: number;
60
+ /** TTS voice, e.g. "alloy", "nova", "shimmer" */
61
+ voice?: string;
62
+ /** TTS speech speed multiplier */
63
+ speed?: number;
64
+ /** Pure instrumental mode (MiniMax music) */
65
+ isInstrumental?: boolean;
66
+ /** Source audio URL for cover/remix (MiniMax music) */
67
+ audioUrl?: string;
68
+ /** Output audio format, e.g. "mp3", "wav", "flac" */
69
+ audioFormat?: string;
70
+ /** Video frames per second */
71
+ fps?: number;
72
+ /** Whether to add AI watermark */
73
+ watermark?: boolean;
74
+ /** img2img guidance/control strength (0-1), e.g. Volcengine */
75
+ guidanceScale?: number;
76
+ /** Auto-generate lyrics when none provided (MiniMax) */
77
+ lyricsOptimizer?: boolean;
78
+ /** 3D output format, e.g. "glb", "obj", "usd", "usdz" */
79
+ outputFormat?: string;
80
+ /** Image background mode (OpenAI gpt-image-2): "transparent", "opaque", "auto" */
81
+ background?: string;
82
+ /** Provider-specific request metadata (e.g. rerank documents, parsing options) */
83
+ metadata?: Record<string, unknown>;
84
+ /** Ask the provider to optimize/enhance the prompt before generation */
85
+ enhancePrompt?: boolean;
86
+ /**
87
+ * Progress callback for async polling operations (video gen, 3D gen, etc.).
88
+ * Called periodically with estimated progress percentage, status text, and
89
+ * the provider-specific task ID (available once the task is submitted).
90
+ */
91
+ onProgress?: (percent: number, status: string, taskId?: string) => void;
92
+ /** Enable progressive/streaming image generation (Volcengine Seedream stream:true) */
93
+ streamImage?: boolean;
94
+ /** Lock camera position (Seedance 1.0/1.5, not Seedance 2.0) */
95
+ cameraFixed?: boolean;
96
+ /** Return last frame URL for chaining continuous video segments */
97
+ returnLastFrame?: boolean;
98
+ /** Service tier: 'default' (online) or 'flex' (offline, ~50% cost) 鈥?not all models support flex */
99
+ serviceTier?: "default" | "flex";
100
+ /** Task expiration in seconds (for flex/offline scheduling) */
101
+ executionExpiresAfterSeconds?: number;
102
+ /** Draft mode 鈥?low-cost preview (Seedance 1.5 pro only) */
103
+ draft?: boolean;
104
+ /** Draft task ID to promote to final video */
105
+ draftTaskId?: string;
106
+ /** Video-level builtin tools, e.g. ["web_search"] (Seedance 2.0) */
107
+ videoTools?: string[];
108
+ /** End-user safety identifier for content moderation */
109
+ safetyIdentifier?: string;
110
+ /** Callback URL for async task status push notifications */
111
+ callbackUrl?: string;
112
+ /** Image detail level control: 'auto' | 'low' | 'high' */
113
+ detail?: "auto" | "low" | "high";
114
+ /** Max image pixels budget (Volcengine image_pixel_limit) */
115
+ imagePixelLimit?: number;
116
+ }
117
+ export interface MediaResult {
118
+ /** URLs of generated media files */
119
+ mediaUrls: string[];
120
+ /** Model actually used */
121
+ model?: string;
122
+ /** Output dimensions / format info */
123
+ size?: string;
124
+ /** Total generation time in ms */
125
+ durationMs?: number;
126
+ /** Billing unit type for non-token models */
127
+ billingUnit?: "per_call" | "per_second" | "per_character" | "per_pixel" | "per_token";
128
+ /** Quantity consumed (seconds, characters, pixels, etc.) */
129
+ billingQuantity?: number;
130
+ /** Provider-specific metadata */
131
+ metadata?: Record<string, unknown>;
132
+ /** Last frame image URL for chaining continuous video generation */
133
+ lastFrameUrl?: string;
134
+ /** Task ID (for continuing draft鈫抐inal or querying) */
135
+ taskId?: string;
136
+ }
137
+ export interface MediaTransport {
138
+ /**
139
+ * Generate media content.
140
+ * Handles sync APIs directly and async job APIs (submit + poll) internally.
141
+ *
142
+ * @param request - Generation parameters
143
+ * @param apiKey - User API key (passed explicitly, not from env)
144
+ * @param signal - Optional abort signal
145
+ */
146
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
147
+ /**
148
+ * Which media types this transport supports.
149
+ * Used by the media client factory to route requests.
150
+ */
151
+ readonly supportedTypes: readonly MediaType[];
152
+ }
153
+ /**
154
+ * Extended transport for providers that support async task management
155
+ * (submit 鈫?poll 鈫?cancel/query). Volcengine Seedance, MiniMax async, etc.
156
+ */
157
+ export interface AsyncMediaTransport extends MediaTransport {
158
+ /** Cancel / delete an async task. */
159
+ deleteVideoTask(taskId: string, apiKey: string, signal?: AbortSignal): Promise<void>;
160
+ /** List recent tasks for status queries. */
161
+ listVideoTasks(apiKey: string, options?: {
162
+ after?: string;
163
+ limit?: number;
164
+ status?: string;
165
+ }, signal?: AbortSignal): Promise<Record<string, unknown>>;
166
+ /**
167
+ * Query a single task by ID 鈥?preferred over listVideoTasks for direct lookups.
168
+ * Not all providers support listing; all async providers support single-task queries.
169
+ */
170
+ getTaskStatus?(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
171
+ status: string;
172
+ task: Record<string, unknown>;
173
+ }>;
174
+ }
175
+ /** Type guard for transports that support async task management. */
176
+ export declare function isAsyncMediaTransport(t: MediaTransport): t is AsyncMediaTransport;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * MediaTransport 鈥?transport interface for generation APIs (image, video, music, 3D).
3
+ *
4
+ * Parallel to LLMTransport (chat/reasoning), MediaTransport handles
5
+ * non-chat generation endpoints that each vendor exposes differently:
6
+ * - Sync (OpenAI images, Volcengine Seedream, Gemini generateContent)
7
+ * - Async job (Volcengine Seedance/3D, MiniMax music) 鈥?submit 鈫?poll 鈫?result
8
+ *
9
+ * Each provider adapter implements this interface and hides vendor-specific
10
+ * auth, endpoint paths, request shapes, and polling logic.
11
+ */
12
+ /** Type guard for transports that support async task management. */
13
+ export function isAsyncMediaTransport(t) {
14
+ return typeof t.deleteVideoTask === "function"
15
+ && typeof t.listVideoTasks === "function";
16
+ }
@@ -0,0 +1,2 @@
1
+ export type { MediaTransport, MediaRequest, MediaResult, MediaType } from "./media-transport.js";
2
+ export { isAsyncMediaTransport } from "./media-transport.js";
package/dist/media.js ADDED
@@ -0,0 +1 @@
1
+ export { isAsyncMediaTransport } from "./media-transport.js";
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Model detection helpers shared between OpenAI transport implementations.
3
+ *
4
+ * These are used by both openai-chat.ts and openai-responses.ts to detect
5
+ * model families and apply family-specific constraints:
6
+ * - GPT-5.x: unified reasoning, temperature allowed, reasoning object format
7
+ * - GPT-5.4-nano: reasoning effort capped at medium
8
+ * - o-series (legacy): reasoning_effort flat string, temperature suppressed
9
+ */
10
+ /** GPT-5.x models (new generation with unified reasoning). */
11
+ export declare function isGPT5xModel(model: string): boolean;
12
+ /**
13
+ * GPT-5.4-nano models 鈥?reasoning effort capped at medium.
14
+ * openai-ProviderMax 搂3: gpt-5.4-nano only supports none/low/medium effort.
15
+ */
16
+ export declare function isGPT5NanoModel(model: string): boolean;
17
+ /**
18
+ * OpenAI o-series reasoning models (legacy, kept for 3rd-party provider compat).
19
+ * These suppress temperature/top_p and use reasoning_effort as flat string.
20
+ * Matches: o1, o1-mini, o1-pro, o3, o3-mini, o3-pro, o4-mini, etc.
21
+ */
22
+ export declare function isOpenAIReasoningModel(model: string): boolean;