@heyputer/puter.js 2.1.4 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/puter.cjs +2 -2
  2. package/index.d.ts +41 -15
  3. package/package.json +1 -1
  4. package/src/index.js +116 -79
  5. package/src/lib/APICallLogger.js +20 -21
  6. package/src/lib/EventListener.js +10 -10
  7. package/src/lib/filesystem/APIFS.js +11 -19
  8. package/src/lib/filesystem/CacheFS.js +25 -25
  9. package/src/lib/filesystem/PostMessageFS.js +11 -11
  10. package/src/lib/filesystem/definitions.js +11 -10
  11. package/src/lib/path.js +505 -446
  12. package/src/lib/polyfills/fileReaderPoly.js +40 -0
  13. package/src/lib/polyfills/localStorage.js +30 -33
  14. package/src/lib/polyfills/xhrshim.js +206 -207
  15. package/src/lib/utils.js +160 -151
  16. package/src/lib/xdrpc.js +9 -9
  17. package/src/modules/AI.js +473 -292
  18. package/src/modules/Apps.js +56 -56
  19. package/src/modules/Auth.js +17 -17
  20. package/src/modules/Debug.js +1 -1
  21. package/src/modules/Drivers.js +41 -41
  22. package/src/modules/FSItem.js +64 -62
  23. package/src/modules/FileSystem/index.js +22 -23
  24. package/src/modules/FileSystem/operations/copy.js +7 -7
  25. package/src/modules/FileSystem/operations/deleteFSEntry.js +14 -12
  26. package/src/modules/FileSystem/operations/getReadUrl.js +16 -14
  27. package/src/modules/FileSystem/operations/mkdir.js +11 -11
  28. package/src/modules/FileSystem/operations/move.js +12 -12
  29. package/src/modules/FileSystem/operations/read.js +10 -10
  30. package/src/modules/FileSystem/operations/readdir.js +28 -28
  31. package/src/modules/FileSystem/operations/rename.js +11 -11
  32. package/src/modules/FileSystem/operations/sign.js +33 -30
  33. package/src/modules/FileSystem/operations/space.js +7 -7
  34. package/src/modules/FileSystem/operations/stat.js +25 -25
  35. package/src/modules/FileSystem/operations/symlink.js +15 -17
  36. package/src/modules/FileSystem/operations/upload.js +151 -122
  37. package/src/modules/FileSystem/operations/write.js +16 -12
  38. package/src/modules/FileSystem/utils/getAbsolutePathForApp.js +10 -6
  39. package/src/modules/Hosting.js +29 -29
  40. package/src/modules/KV.js +23 -23
  41. package/src/modules/OS.js +15 -15
  42. package/src/modules/Perms.js +19 -21
  43. package/src/modules/PuterDialog.js +46 -48
  44. package/src/modules/Threads.js +17 -20
  45. package/src/modules/UI.js +156 -156
  46. package/src/modules/Util.js +3 -3
  47. package/src/modules/Workers.js +52 -49
  48. package/src/modules/networking/PSocket.js +38 -38
  49. package/src/modules/networking/PTLS.js +54 -47
  50. package/src/modules/networking/PWispHandler.js +49 -47
  51. package/src/modules/networking/parsers.js +110 -108
  52. package/src/modules/networking/requests.js +67 -78
  53. package/src/services/APIAccess.js +9 -9
  54. package/src/services/FSRelay.js +6 -6
  55. package/src/services/Filesystem.js +8 -8
  56. package/src/services/NoPuterYet.js +2 -2
  57. package/src/services/XDIncoming.js +1 -1
package/src/modules/AI.js CHANGED
@@ -1,12 +1,13 @@
1
1
  import * as utils from '../lib/utils.js';
2
2
 
3
3
  const normalizeTTSProvider = (value) => {
4
- if (typeof value !== 'string') {
4
+ if ( typeof value !== 'string' ) {
5
5
  return 'aws-polly';
6
6
  }
7
7
  const lower = value.toLowerCase();
8
- if (lower === 'openai') return 'openai';
9
- if (lower === 'aws' || lower === 'polly' || lower === 'aws-polly') return 'aws-polly';
8
+ if ( lower === 'openai' ) return 'openai';
9
+ if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs';
10
+ if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly';
10
11
  return value;
11
12
  };
12
13
 
@@ -20,6 +21,13 @@ const TOGETHER_IMAGE_MODEL_PREFIXES = [
20
21
  'sg161222/',
21
22
  'wavymulder/',
22
23
  'prompthero/',
24
+ 'bytedance-seed/',
25
+ 'hidream-ai/',
26
+ 'lykon/',
27
+ 'qwen/',
28
+ 'rundiffusion/',
29
+ 'google/',
30
+ 'ideogram/',
23
31
  ];
24
32
 
25
33
  const TOGETHER_IMAGE_MODEL_KEYWORDS = [
@@ -40,7 +48,7 @@ const TOGETHER_VIDEO_MODEL_PREFIXES = [
40
48
  'wan-ai/',
41
49
  ];
42
50
 
43
- class AI{
51
+ class AI {
44
52
  /**
45
53
  * Creates a new instance with the given authentication token, API origin, and app ID,
46
54
  *
@@ -68,7 +76,7 @@ class AI{
68
76
 
69
77
  /**
70
78
  * Sets the API origin.
71
- *
79
+ *
72
80
  * @param {string} APIOrigin - The new API origin.
73
81
  * @memberof [AI]
74
82
  * @returns {void}
@@ -77,100 +85,165 @@ class AI{
77
85
  this.APIOrigin = APIOrigin;
78
86
  }
79
87
 
80
- /**
88
+ /**
81
89
  * Returns a list of available AI models.
82
90
  * @param {string} provider - The provider to filter the models returned.
83
- * @returns {Object} Object containing lists of available models by provider
91
+ * @returns {Array} Array containing available model objects
84
92
  */
85
- async listModels(provider) {
86
- const modelsByProvider = {};
93
+ async listModels (provider) {
94
+ // Prefer the public API endpoint and fall back to the legacy driver call if needed.
95
+ const headers = this.authToken ? { Authorization: `Bearer ${this.authToken}` } : {};
96
+
97
+ const tryFetchModels = async () => {
98
+ const resp = await fetch(`${this.APIOrigin }/puterai/chat/models/details`, { headers });
99
+ if ( !resp.ok ) return null;
100
+ const data = await resp.json();
101
+ const models = Array.isArray(data?.models) ? data.models : [];
102
+ return provider ? models.filter(model => model.provider === provider) : models;
103
+ };
87
104
 
88
- const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
105
+ const tryDriverModels = async () => {
106
+ const models = await puter.drivers.call('puter-chat-completion', 'ai-chat', 'models');
107
+ const result = Array.isArray(models?.result) ? models.result : [];
108
+ return provider ? result.filter(model => model.provider === provider) : result;
109
+ };
89
110
 
90
- if (!models || !models.result || !Array.isArray(models.result)) {
91
- return modelsByProvider;
92
- }
93
- models.result.forEach(item => {
94
- if (!item.provider || !item.id) return;
95
- if (provider && item.provider !== provider) return;
96
- if (!modelsByProvider[item.provider]) modelsByProvider[item.provider] = [];
97
- modelsByProvider[item.provider].push(item.id);
98
- });
99
-
100
- return modelsByProvider;
111
+ const models = await (async () => {
112
+ try {
113
+ const apiModels = await tryFetchModels();
114
+ if ( apiModels !== null ) return apiModels;
115
+ } catch (e) {
116
+ // Ignore and fall back to the driver call below.
117
+ }
118
+ try {
119
+ return await tryDriverModels();
120
+ } catch (e) {
121
+ return [];
122
+ }
123
+ })();
124
+
125
+ return models;
101
126
  }
102
127
 
103
128
  /**
104
129
  * Returns a list of all available AI providers
105
130
  * @returns {Array} Array containing providers
106
131
  */
107
- async listModelProviders() {
108
- let providers = [];
109
- const models = await puter.drivers.call('puter-chat-completion','ai-chat','models');
110
-
111
- if (!models || !models.result || !Array.isArray(models.result)) return providers; // if models is invalid then return empty array
112
- providers = new Set(); // Use a Set to store unique providers
113
- models.result.forEach(item => {
114
- if (item.provider) providers.add(item.provider);
132
+ async listModelProviders () {
133
+ const models = await this.listModels();
134
+ const providers = new Set();
135
+ (models ?? []).forEach(item => {
136
+ if ( item?.provider ) providers.add(item.provider);
115
137
  });
116
- providers = Array.from(providers); // Convert Set to an array
117
- return providers;
138
+ return Array.from(providers);
118
139
  }
119
-
140
+
120
141
  img2txt = async (...args) => {
121
- let MAX_INPUT_SIZE = 10 * 1024 * 1024;
142
+ const MAX_INPUT_SIZE = 10 * 1024 * 1024;
143
+ if ( !args || args.length === 0 ) {
144
+ throw { message: 'Arguments are required', code: 'arguments_required' };
145
+ }
146
+
147
+ const isBlobLike = (value) => {
148
+ if ( typeof Blob === 'undefined' ) return false;
149
+ return value instanceof Blob || (typeof File !== 'undefined' && value instanceof File);
150
+ };
151
+ const isPlainObject = (value) => value && typeof value === 'object' && !Array.isArray(value) && !isBlobLike(value);
152
+ const normalizeProvider = (value) => {
153
+ if ( ! value ) return 'aws-textract';
154
+ const normalized = String(value).toLowerCase();
155
+ if ( ['aws', 'textract', 'aws-textract'].includes(normalized) ) return 'aws-textract';
156
+ if ( ['mistral', 'mistral-ocr'].includes(normalized) ) return 'mistral';
157
+ return 'aws-textract';
158
+ };
159
+
122
160
  let options = {};
123
- let testMode = false;
161
+ if ( isPlainObject(args[0]) ) {
162
+ options = { ...args[0] };
163
+ } else {
164
+ options.source = args[0];
165
+ }
124
166
 
125
- // Check that the argument is not undefined or null
126
- if(!args){
127
- throw({message: 'Arguments are required', code: 'arguments_required'});
167
+ let testMode = false;
168
+ for ( let i = 1; i < args.length; i++ ) {
169
+ const value = args[i];
170
+ if ( typeof value === 'boolean' ) {
171
+ testMode = testMode || value;
172
+ } else if ( isPlainObject(value) ) {
173
+ options = { ...options, ...value };
174
+ }
128
175
  }
129
176
 
130
- // if argument is string transform it to the object that the API expects
131
- if (typeof args[0] === 'string' || args[0] instanceof Blob) {
132
- options.source = args[0];
177
+ if ( typeof options.testMode === 'boolean' ) {
178
+ testMode = options.testMode;
133
179
  }
134
180
 
135
- // if input is a blob, transform it to a data URI
136
- if (args[0].source instanceof Blob) {
137
- options.source = await utils.blobToDataUri(args[0].source);
181
+ const provider = normalizeProvider(options.provider);
182
+ delete options.provider;
183
+ delete options.testMode;
184
+
185
+ if ( ! options.source ) {
186
+ throw { message: 'Source is required', code: 'source_required' };
138
187
  }
139
188
 
140
- // check input size
141
- if (options.source.length > this.MAX_INPUT_SIZE) {
142
- throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
189
+ if ( isBlobLike(options.source) ) {
190
+ options.source = await utils.blobToDataUri(options.source);
191
+ } else if ( options.source?.source && isBlobLike(options.source.source) ) {
192
+ // Support shape { source: Blob }
193
+ options.source = await utils.blobToDataUri(options.source.source);
143
194
  }
144
195
 
145
- // determine if test mode is enabled
146
- if (typeof args[1] === 'boolean' && args[1] === true ||
147
- typeof args[2] === 'boolean' && args[2] === true ||
148
- typeof args[3] === 'boolean' && args[3] === true) {
149
- testMode = true;
196
+ if ( typeof options.source === 'string' &&
197
+ options.source.startsWith('data:') &&
198
+ options.source.length > MAX_INPUT_SIZE ) {
199
+ throw { message: `Input size cannot be larger than ${ MAX_INPUT_SIZE}`, code: 'input_too_large' };
150
200
  }
151
-
152
- return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
153
- test_mode: testMode ?? false,
154
- transform: async (result) => {
201
+
202
+ const toText = (result) => {
203
+ if ( ! result ) return '';
204
+ if ( Array.isArray(result.blocks) && result.blocks.length ) {
155
205
  let str = '';
156
- for (let i = 0; i < result?.blocks?.length; i++) {
157
- if("text/textract:LINE" === result.blocks[i].type)
158
- str += result.blocks[i].text + "\n";
206
+ for ( const block of result.blocks ) {
207
+ if ( typeof block?.text !== 'string' ) continue;
208
+ if ( !block.type || block.type === 'text/textract:LINE' || block.type.startsWith('text/') ) {
209
+ str += `${block.text }\n`;
210
+ }
159
211
  }
160
- return str;
212
+ if ( str.trim() ) return str;
161
213
  }
162
- }).call(this, options);
163
- }
214
+ if ( Array.isArray(result.pages) && result.pages.length ) {
215
+ const markdown = result.pages
216
+ .map(page => (page?.markdown || '').trim())
217
+ .filter(Boolean)
218
+ .join('\n\n');
219
+ if ( markdown.trim() ) return markdown;
220
+ }
221
+ if ( typeof result.document_annotation === 'string' ) {
222
+ return result.document_annotation;
223
+ }
224
+ if ( typeof result.text === 'string' ) {
225
+ return result.text;
226
+ }
227
+ return '';
228
+ };
229
+
230
+ const driverCall = utils.make_driver_method(['source'], 'puter-ocr', provider, 'recognize', {
231
+ test_mode: testMode ?? false,
232
+ transform: async (result) => toText(result),
233
+ });
234
+
235
+ return await driverCall.call(this, options);
236
+ };
164
237
 
165
238
  txt2speech = async (...args) => {
166
239
  let MAX_INPUT_SIZE = 3000;
167
240
  let options = {};
168
241
  let testMode = false;
169
242
 
170
- if(!args){
171
- throw({message: 'Arguments are required', code: 'arguments_required'});
243
+ if ( ! args ) {
244
+ throw ({ message: 'Arguments are required', code: 'arguments_required' });
172
245
  }
173
-
246
+
174
247
  // Accept arguments in the following formats:
175
248
  // 1. Shorthand API
176
249
  // puter.ai.txt2speech("Hello world")
@@ -186,123 +259,258 @@ class AI{
186
259
  // puter.ai.txt2speech("Hello world", "en-US")
187
260
  // puter.ai.txt2speech("Hello world", "en-US", "Joanna")
188
261
  // puter.ai.txt2speech("Hello world", "en-US", "Joanna", "neural")
189
- //
262
+ //
190
263
  // Undefined parameters will be set to default values:
191
264
  // - voice: "Joanna"
192
265
  // - engine: "standard"
193
266
  // - language: "en-US"
194
267
 
195
-
196
- if (typeof args[0] === 'string') {
268
+ if ( typeof args[0] === 'string' ) {
197
269
  options = { text: args[0] };
198
270
  }
199
271
 
200
- if (args[1] && typeof args[1] === 'object' && !Array.isArray(args[1])) {
272
+ if ( args[1] && typeof args[1] === 'object' && !Array.isArray(args[1]) ) {
201
273
  // for verbose object API
202
274
  Object.assign(options, args[1]);
203
- } else if (args[1] && typeof args[1] === 'string') {
275
+ } else if ( args[1] && typeof args[1] === 'string' ) {
204
276
  // for legacy positional-arguments API
205
- //
277
+ //
206
278
  // puter.ai.txt2speech(<text>, <language>, <voice>, <engine>)
207
279
  options.language = args[1];
208
-
209
- if (args[2] && typeof args[2] === 'string') {
280
+
281
+ if ( args[2] && typeof args[2] === 'string' ) {
210
282
  options.voice = args[2];
211
283
  }
212
-
213
- if (args[3] && typeof args[3] === 'string') {
284
+
285
+ if ( args[3] && typeof args[3] === 'string' ) {
214
286
  options.engine = args[3];
215
287
  }
216
- } else if (args[1] && typeof args[1] !== 'boolean') {
288
+ } else if ( args[1] && typeof args[1] !== 'boolean' ) {
217
289
  // If second argument is not an object, string, or boolean, throw an error
218
290
  throw { message: 'Second argument must be an options object or language string. Use: txt2speech("text", { voice: "name", engine: "type", language: "code" }) or txt2speech("text", "language", "voice", "engine")', code: 'invalid_arguments' };
219
291
  }
220
292
 
221
293
  // Validate required text parameter
222
- if (!options.text) {
294
+ if ( ! options.text ) {
223
295
  throw { message: 'Text parameter is required', code: 'text_required' };
224
296
  }
225
297
 
226
298
  const validEngines = ['standard', 'neural', 'long-form', 'generative'];
227
299
  let provider = normalizeTTSProvider(options.provider);
228
300
 
229
- if (options.engine && normalizeTTSProvider(options.engine) === 'openai' && !options.provider) {
301
+ if ( options.engine && normalizeTTSProvider(options.engine) === 'openai' && !options.provider ) {
230
302
  provider = 'openai';
231
303
  }
232
304
 
233
- if (provider === 'openai') {
234
- if (!options.model && typeof options.engine === 'string') {
305
+ if ( options.engine && normalizeTTSProvider(options.engine) === 'elevenlabs' && !options.provider ) {
306
+ provider = 'elevenlabs';
307
+ }
308
+
309
+ if ( provider === 'openai' ) {
310
+ if ( !options.model && typeof options.engine === 'string' ) {
235
311
  options.model = options.engine;
236
312
  }
237
- if (!options.voice) {
313
+ if ( ! options.voice ) {
238
314
  options.voice = 'alloy';
239
315
  }
240
- if (!options.model) {
316
+ if ( ! options.model ) {
241
317
  options.model = 'gpt-4o-mini-tts';
242
318
  }
243
- if (!options.response_format) {
319
+ if ( ! options.response_format ) {
244
320
  options.response_format = 'mp3';
245
321
  }
246
322
  delete options.engine;
323
+ } else if ( provider === 'elevenlabs' ) {
324
+ if ( ! options.voice ) {
325
+ options.voice = '21m00Tcm4TlvDq8ikWAM';
326
+ }
327
+ if ( ! options.model && typeof options.engine === 'string' ) {
328
+ options.model = options.engine;
329
+ }
330
+ if ( ! options.model ) {
331
+ options.model = 'eleven_multilingual_v2';
332
+ }
333
+ if ( ! options.output_format && !options.response_format ) {
334
+ options.output_format = 'mp3_44100_128';
335
+ }
336
+ if ( options.response_format && !options.output_format ) {
337
+ options.output_format = options.response_format;
338
+ }
339
+ delete options.engine;
247
340
  } else {
248
341
  provider = 'aws-polly';
249
342
 
250
- if (options.engine && !validEngines.includes(options.engine)) {
251
- throw { message: 'Invalid engine. Must be one of: ' + validEngines.join(', '), code: 'invalid_engine' };
343
+ if ( options.engine && !validEngines.includes(options.engine) ) {
344
+ throw { message: `Invalid engine. Must be one of: ${ validEngines.join(', ')}`, code: 'invalid_engine' };
252
345
  }
253
346
 
254
- if (!options.voice) {
347
+ if ( ! options.voice ) {
255
348
  options.voice = 'Joanna';
256
349
  }
257
- if (!options.engine) {
350
+ if ( ! options.engine ) {
258
351
  options.engine = 'standard';
259
352
  }
260
- if (!options.language) {
353
+ if ( ! options.language ) {
261
354
  options.language = 'en-US';
262
355
  }
263
356
  }
264
357
 
265
358
  // check input size
266
- if (options.text.length > MAX_INPUT_SIZE) {
267
- throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
359
+ if ( options.text.length > MAX_INPUT_SIZE ) {
360
+ throw { message: `Input size cannot be larger than ${ MAX_INPUT_SIZE}`, code: 'input_too_large' };
268
361
  }
269
362
 
270
363
  // determine if test mode is enabled (check all arguments for boolean true)
271
- for (let i = 0; i < args.length; i++) {
272
- if (typeof args[i] === 'boolean' && args[i] === true) {
364
+ for ( let i = 0; i < args.length; i++ ) {
365
+ if ( typeof args[i] === 'boolean' && args[i] === true ) {
273
366
  testMode = true;
274
367
  break;
275
368
  }
276
369
  }
277
370
 
278
- const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
371
+ const driverName = provider === 'openai'
372
+ ? 'openai-tts'
373
+ : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
279
374
 
280
375
  return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
281
376
  responseType: 'blob',
282
377
  test_mode: testMode ?? false,
283
378
  transform: async (result) => {
284
379
  let url;
285
- if (typeof result === 'string') {
380
+ if ( typeof result === 'string' ) {
286
381
  url = result;
287
- } else if (result instanceof Blob) {
382
+ } else if ( result instanceof Blob ) {
288
383
  url = await utils.blob_to_url(result);
289
- } else if (result instanceof ArrayBuffer) {
384
+ } else if ( result instanceof ArrayBuffer ) {
290
385
  const blob = new Blob([result]);
291
386
  url = await utils.blob_to_url(blob);
292
- } else if (result && typeof result === 'object' && typeof result.arrayBuffer === 'function') {
387
+ } else if ( result && typeof result === 'object' && typeof result.arrayBuffer === 'function' ) {
293
388
  const arrayBuffer = await result.arrayBuffer();
294
389
  const blob = new Blob([arrayBuffer], { type: result.type || undefined });
295
390
  url = await utils.blob_to_url(blob);
296
391
  } else {
297
392
  throw { message: 'Unexpected audio response format', code: 'invalid_audio_response' };
298
393
  }
299
- const audio = new Audio(url);
394
+ const audio = new (globalThis.Audio || Object)();
395
+ audio.src = url;
300
396
  audio.toString = () => url;
301
397
  audio.valueOf = () => url;
302
398
  return audio;
303
- }
399
+ },
304
400
  }).call(this, options);
305
- }
401
+ };
402
+
403
+ speech2speech = async (...args) => {
404
+ const MAX_INPUT_SIZE = 25 * 1024 * 1024;
405
+ if ( !args || !args.length ) {
406
+ throw ({ message: 'Arguments are required', code: 'arguments_required' });
407
+ }
408
+
409
+ const normalizeSource = async (value) => {
410
+ if ( value instanceof Blob ) {
411
+ return await utils.blobToDataUri(value);
412
+ }
413
+ return value;
414
+ };
415
+
416
+ const normalizeOptions = (opts = {}) => {
417
+ const normalized = { ...opts };
418
+ if ( normalized.voiceId && !normalized.voice && !normalized.voice_id ) normalized.voice = normalized.voiceId;
419
+ if ( normalized.modelId && !normalized.model && !normalized.model_id ) normalized.model = normalized.modelId;
420
+ if ( normalized.outputFormat && !normalized.output_format ) normalized.output_format = normalized.outputFormat;
421
+ if ( normalized.voiceSettings && !normalized.voice_settings ) normalized.voice_settings = normalized.voiceSettings;
422
+ if ( normalized.fileFormat && !normalized.file_format ) normalized.file_format = normalized.fileFormat;
423
+ if ( normalized.removeBackgroundNoise !== undefined && normalized.remove_background_noise === undefined ) {
424
+ normalized.remove_background_noise = normalized.removeBackgroundNoise;
425
+ }
426
+ if ( normalized.optimizeStreamingLatency !== undefined && normalized.optimize_streaming_latency === undefined ) {
427
+ normalized.optimize_streaming_latency = normalized.optimizeStreamingLatency;
428
+ }
429
+ if ( normalized.enableLogging !== undefined && normalized.enable_logging === undefined ) {
430
+ normalized.enable_logging = normalized.enableLogging;
431
+ }
432
+ delete normalized.voiceId;
433
+ delete normalized.modelId;
434
+ delete normalized.outputFormat;
435
+ delete normalized.voiceSettings;
436
+ delete normalized.fileFormat;
437
+ delete normalized.removeBackgroundNoise;
438
+ delete normalized.optimizeStreamingLatency;
439
+ delete normalized.enableLogging;
440
+ return normalized;
441
+ };
442
+
443
+ let options = {};
444
+ let testMode = false;
445
+
446
+ const primary = args[0];
447
+ if ( primary && typeof primary === 'object' && !Array.isArray(primary) && !(primary instanceof Blob) ) {
448
+ options = { ...primary };
449
+ } else {
450
+ options.audio = await normalizeSource(primary);
451
+ }
452
+
453
+ if ( args[1] && typeof args[1] === 'object' && !Array.isArray(args[1]) && !(args[1] instanceof Blob) ) {
454
+ options = { ...options, ...args[1] };
455
+ } else if ( typeof args[1] === 'boolean' ) {
456
+ testMode = args[1];
457
+ }
458
+
459
+ if ( typeof args[2] === 'boolean' ) {
460
+ testMode = args[2];
461
+ }
462
+
463
+ if ( options.file ) {
464
+ options.audio = await normalizeSource(options.file);
465
+ delete options.file;
466
+ }
467
+
468
+ if ( options.audio instanceof Blob ) {
469
+ options.audio = await normalizeSource(options.audio);
470
+ }
471
+
472
+ if ( ! options.audio ) {
473
+ throw { message: 'Audio input is required', code: 'audio_required' };
474
+ }
475
+
476
+ if ( typeof options.audio === 'string' && options.audio.startsWith('data:') ) {
477
+ const base64 = options.audio.split(',')[1] || '';
478
+ const padding = base64.endsWith('==') ? 2 : (base64.endsWith('=') ? 1 : 0);
479
+ const byteLength = Math.floor((base64.length * 3) / 4) - padding;
480
+ if ( byteLength > MAX_INPUT_SIZE ) {
481
+ throw { message: 'Input size cannot be larger than 25 MB', code: 'input_too_large' };
482
+ }
483
+ }
484
+
485
+ const driverArgs = normalizeOptions({ ...options });
486
+ delete driverArgs.provider;
487
+
488
+ return await utils.make_driver_method(['audio'], 'puter-speech2speech', 'elevenlabs-voice-changer', 'convert', {
489
+ responseType: 'blob',
490
+ test_mode: testMode,
491
+ transform: async (result) => {
492
+ let url;
493
+ if ( typeof result === 'string' ) {
494
+ url = result;
495
+ } else if ( result instanceof Blob ) {
496
+ url = await utils.blob_to_url(result);
497
+ } else if ( result instanceof ArrayBuffer ) {
498
+ const blob = new Blob([result]);
499
+ url = await utils.blob_to_url(blob);
500
+ } else if ( result && typeof result === 'object' && typeof result.arrayBuffer === 'function' ) {
501
+ const arrayBuffer = await result.arrayBuffer();
502
+ const blob = new Blob([arrayBuffer], { type: result.type || undefined });
503
+ url = await utils.blob_to_url(blob);
504
+ } else {
505
+ throw { message: 'Unexpected audio response format', code: 'invalid_audio_response' };
506
+ }
507
+ const audio = new Audio(url);
508
+ audio.toString = () => url;
509
+ audio.valueOf = () => url;
510
+ return audio;
511
+ },
512
+ }).call(this, driverArgs);
513
+ };
306
514
 
307
515
  speech2txt = async (...args) => {
308
516
  const MAX_INPUT_SIZE = 25 * 1024 * 1024;
@@ -346,7 +554,7 @@ class AI{
346
554
  options.file = await normalizeSource(options.file);
347
555
  }
348
556
 
349
- if ( !options.file ) {
557
+ if ( ! options.file ) {
350
558
  throw { message: 'Audio input is required', code: 'audio_required' };
351
559
  }
352
560
 
@@ -374,7 +582,7 @@ class AI{
374
582
  return result;
375
583
  },
376
584
  }).call(this, driverArgs);
377
- }
585
+ };
378
586
 
379
587
  // Add new methods for TTS engine management
380
588
  txt2speech = Object.assign(this.txt2speech, {
@@ -386,19 +594,25 @@ class AI{
386
594
  let provider = 'aws-polly';
387
595
  let params = {};
388
596
 
389
- if (typeof options === 'string') {
597
+ if ( typeof options === 'string' ) {
390
598
  provider = normalizeTTSProvider(options);
391
- } else if (options && typeof options === 'object') {
599
+ } else if ( options && typeof options === 'object' ) {
392
600
  provider = normalizeTTSProvider(options.provider) || provider;
393
601
  params = { ...options };
394
602
  delete params.provider;
395
603
  }
396
604
 
397
- if (provider === 'openai') {
605
+ if ( provider === 'openai' ) {
398
606
  params.provider = 'openai';
399
607
  }
400
608
 
401
- const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
609
+ if ( provider === 'elevenlabs' ) {
610
+ params.provider = 'elevenlabs';
611
+ }
612
+
613
+ const driverName = provider === 'openai'
614
+ ? 'openai-tts'
615
+ : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
402
616
 
403
617
  return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
404
618
  responseType: 'text',
@@ -414,28 +628,33 @@ class AI{
414
628
  let provider = 'aws-polly';
415
629
  let params = {};
416
630
 
417
- if (typeof options === 'string') {
631
+ if ( typeof options === 'string' ) {
418
632
  params.engine = options;
419
- } else if (options && typeof options === 'object') {
633
+ } else if ( options && typeof options === 'object' ) {
420
634
  provider = normalizeTTSProvider(options.provider) || provider;
421
635
  params = { ...options };
422
636
  delete params.provider;
423
637
  }
424
638
 
425
- if (provider === 'openai') {
639
+ if ( provider === 'openai' ) {
426
640
  params.provider = 'openai';
427
641
  delete params.engine;
428
642
  }
429
643
 
430
- const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
644
+ if ( provider === 'elevenlabs' ) {
645
+ params.provider = 'elevenlabs';
646
+ }
647
+
648
+ const driverName = provider === 'openai'
649
+ ? 'openai-tts'
650
+ : (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
431
651
 
432
652
  return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
433
653
  responseType: 'text',
434
654
  }).call(this, params);
435
- }
655
+ },
436
656
  });
437
657
 
438
-
439
658
  // accepts either a string or an array of message objects
440
659
  // if string, it's treated as the prompt which is a shorthand for { messages: [{ content: prompt }] }
441
660
  // if object, it's treated as the full argument object that the API expects
@@ -450,100 +669,99 @@ class AI{
450
669
  let driver = 'openai-completion';
451
670
 
452
671
  // Check that the argument is not undefined or null
453
- if(!args){
454
- throw({message: 'Arguments are required', code: 'arguments_required'});
672
+ if ( ! args ) {
673
+ throw ({ message: 'Arguments are required', code: 'arguments_required' });
455
674
  }
456
675
 
457
676
  // ai.chat(prompt)
458
- if(typeof args[0] === 'string'){
677
+ if ( typeof args[0] === 'string' ) {
459
678
  requestParams = { messages: [{ content: args[0] }] };
460
679
  }
461
680
 
462
681
  // ai.chat(prompt, testMode)
463
- if (typeof args[0] === 'string' && (!args[1] || typeof args[1] === 'boolean')) {
682
+ if ( typeof args[0] === 'string' && (!args[1] || typeof args[1] === 'boolean') ) {
464
683
  requestParams = { messages: [{ content: args[0] }] };
465
684
  }
466
685
 
467
686
  // ai.chat(prompt, imageURL/File)
468
687
  // ai.chat(prompt, imageURL/File, testMode)
469
- else if (typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File)) {
688
+ else if ( typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File) ) {
470
689
  // if imageURL is a File, transform it to a data URI
471
- if(args[1] instanceof File){
690
+ if ( args[1] instanceof File ) {
472
691
  args[1] = await utils.blobToDataUri(args[1]);
473
692
  }
474
693
 
475
694
  // parse args[1] as an image_url object
476
- requestParams = {
695
+ requestParams = {
477
696
  vision: true,
478
697
  messages: [
479
- {
698
+ {
480
699
  content: [
481
700
  args[0],
482
701
  {
483
702
  image_url: {
484
- url: args[1]
485
- }
486
- }
487
- ],
488
- }
489
- ]
703
+ url: args[1],
704
+ },
705
+ },
706
+ ],
707
+ },
708
+ ],
490
709
  };
491
710
  }
492
711
  // chat(prompt, [imageURLs])
493
- else if (typeof args[0] === 'string' && Array.isArray(args[1])) {
712
+ else if ( typeof args[0] === 'string' && Array.isArray(args[1]) ) {
494
713
  // parse args[1] as an array of image_url objects
495
- for (let i = 0; i < args[1].length; i++) {
714
+ for ( let i = 0; i < args[1].length; i++ ) {
496
715
  args[1][i] = { image_url: { url: args[1][i] } };
497
716
  }
498
- requestParams = {
717
+ requestParams = {
499
718
  vision: true,
500
719
  messages: [
501
- {
720
+ {
502
721
  content: [
503
722
  args[0],
504
- ...args[1]
505
- ],
506
- }
507
- ]
723
+ ...args[1],
724
+ ],
725
+ },
726
+ ],
508
727
  };
509
728
  }
510
729
  // chat([messages])
511
- else if (Array.isArray(args[0])) {
730
+ else if ( Array.isArray(args[0]) ) {
512
731
  requestParams = { messages: args[0] };
513
732
  }
514
733
 
515
734
  // determine if testMode is enabled
516
- if (typeof args[1] === 'boolean' && args[1] === true ||
735
+ if ( typeof args[1] === 'boolean' && args[1] === true ||
517
736
  typeof args[2] === 'boolean' && args[2] === true ||
518
- typeof args[3] === 'boolean' && args[3] === true) {
737
+ typeof args[3] === 'boolean' && args[3] === true ) {
519
738
  testMode = true;
520
739
  }
521
-
740
+
522
741
  // if any of the args is an object, assume it's the user parameters object
523
742
  const is_object = v => {
524
743
  return typeof v === 'object' &&
525
744
  !Array.isArray(v) &&
526
745
  v !== null;
527
746
  };
528
- for (let i = 0; i < args.length; i++) {
529
- if (is_object(args[i])) {
747
+ for ( let i = 0; i < args.length; i++ ) {
748
+ if ( is_object(args[i]) ) {
530
749
  userParams = args[i];
531
750
  break;
532
751
  }
533
752
  }
534
753
 
535
-
536
754
  // Copy relevant parameters from userParams to requestParams
537
- if (userParams.model) {
755
+ if ( userParams.model ) {
538
756
  requestParams.model = userParams.model;
539
757
  }
540
- if (userParams.temperature) {
758
+ if ( userParams.temperature ) {
541
759
  requestParams.temperature = userParams.temperature;
542
760
  }
543
- if (userParams.max_tokens) {
761
+ if ( userParams.max_tokens ) {
544
762
  requestParams.max_tokens = userParams.max_tokens;
545
763
  }
546
-
764
+
547
765
  // convert undefined to empty string so that .startsWith works
548
766
  requestParams.model = requestParams.model ?? '';
549
767
 
@@ -552,21 +770,21 @@ class AI{
552
770
  // for example: "claude-3-5-sonnet" should become "anthropic/claude-3-5-sonnet"
553
771
  // but for now, we want to keep the old behavior
554
772
  // so we remove the "anthropic/" prefix if it exists
555
- if (requestParams.model && requestParams.model.startsWith('anthropic/')) {
773
+ if ( requestParams.model && requestParams.model.startsWith('anthropic/') ) {
556
774
  requestParams.model = requestParams.model.replace('anthropic/', '');
557
775
  }
558
776
 
559
777
  // convert to the correct model name if necessary
560
- if( requestParams.model === 'claude-3-5-sonnet'){
778
+ if ( requestParams.model === 'claude-3-5-sonnet' ) {
561
779
  requestParams.model = 'claude-3-5-sonnet-latest';
562
780
  }
563
- if( requestParams.model === 'claude-3-7-sonnet' || requestParams.model === 'claude'){
781
+ if ( requestParams.model === 'claude-3-7-sonnet' || requestParams.model === 'claude' ) {
564
782
  requestParams.model = 'claude-3-7-sonnet-latest';
565
783
  }
566
- if( requestParams.model === 'claude-sonnet-4' || requestParams.model === 'claude-sonnet-4-latest'){
784
+ if ( requestParams.model === 'claude-sonnet-4' || requestParams.model === 'claude-sonnet-4-latest' ) {
567
785
  requestParams.model = 'claude-sonnet-4-20250514';
568
786
  }
569
- if( requestParams.model === 'claude-opus-4' || requestParams.model === 'claude-opus-4-latest') {
787
+ if ( requestParams.model === 'claude-opus-4' || requestParams.model === 'claude-opus-4-latest' ) {
570
788
  requestParams.model = 'claude-opus-4-20250514';
571
789
  }
572
790
  if ( requestParams.model === 'mistral' ) {
@@ -580,73 +798,22 @@ class AI{
580
798
  }
581
799
 
582
800
  // o1-mini to openrouter:openai/o1-mini
583
- if ( requestParams.model === 'o1-mini') {
801
+ if ( requestParams.model === 'o1-mini' ) {
584
802
  requestParams.model = 'openrouter:openai/o1-mini';
585
803
  }
586
804
 
587
805
  // if a model is prepended with "openai/", remove it
588
- if (requestParams.model && requestParams.model.startsWith('openai/')) {
806
+ if ( requestParams.model && requestParams.model.startsWith('openai/') ) {
589
807
  requestParams.model = requestParams.model.replace('openai/', '');
590
808
  driver = 'openai-completion';
591
809
  }
592
-
593
- // if model starts with:
594
- // agentica-org/
595
- // ai21/
596
- // aion-labs/
597
- // alfredpros/
598
- // alpindale/
599
- // amazon/
600
- // anthracite-org/
601
- // arcee-ai/
602
- // arliai/
603
- // baidu/
604
- // bytedance/
605
- // cognitivecomputations/
606
- // cohere/
607
- // deepseek/
608
- // eleutherai/
609
- // google/
610
- // gryphe/
611
- // inception/
612
- // infermatic/
613
- // liquid/
614
- // mancer/
615
- // meta-llama/
616
- // microsoft/
617
- // minimax/
618
- // mistralai/
619
- // moonshotai/
620
- // morph/
621
- // neversleep/
622
- // nousresearch/
623
- // nvidia/
624
- // openrouter/
625
- // perplexity/
626
- // pygmalionai/
627
- // qwen/
628
- // raifle/
629
- // rekaai/
630
- // sao10k/
631
- // sarvamai/
632
- // scb10x/
633
- // shisa-ai/
634
- // sophosympatheia/
635
- // switchpoint/
636
- // tencent/
637
- // thedrummer/
638
- // thudm/
639
- // tngtech/
640
- // undi95/
641
- // x-ai/
642
- // z-ai/
643
-
644
- // prepend it with openrouter:
645
- if (
810
+ // For the following providers, we need to prepend "openrouter:" to the model name so that the backend driver can handle it
811
+ if (
646
812
  requestParams.model.startsWith('agentica-org/') ||
647
813
  requestParams.model.startsWith('ai21/') ||
648
814
  requestParams.model.startsWith('aion-labs/') ||
649
815
  requestParams.model.startsWith('alfredpros/') ||
816
+ requestParams.model.startsWith('allenai/') ||
650
817
  requestParams.model.startsWith('alpindale/') ||
651
818
  requestParams.model.startsWith('amazon/') ||
652
819
  requestParams.model.startsWith('anthracite-org/') ||
@@ -656,9 +823,9 @@ class AI{
656
823
  requestParams.model.startsWith('bytedance/') ||
657
824
  requestParams.model.startsWith('cognitivecomputations/') ||
658
825
  requestParams.model.startsWith('cohere/') ||
659
- requestParams.model.startsWith('deepseek/') ||
826
+ requestParams.model.startsWith('deepseek/') ||
660
827
  requestParams.model.startsWith('eleutherai/') ||
661
- requestParams.model.startsWith('google/') ||
828
+ requestParams.model.startsWith('google/') ||
662
829
  requestParams.model.startsWith('gryphe/') ||
663
830
  requestParams.model.startsWith('inception/') ||
664
831
  requestParams.model.startsWith('infermatic/') ||
@@ -676,7 +843,7 @@ class AI{
676
843
  requestParams.model.startsWith('openrouter/') ||
677
844
  requestParams.model.startsWith('perplexity/') ||
678
845
  requestParams.model.startsWith('pygmalionai/') ||
679
- requestParams.model.startsWith('qwen/') ||
846
+ requestParams.model.startsWith('qwen/') ||
680
847
  requestParams.model.startsWith('raifle/') ||
681
848
  requestParams.model.startsWith('rekaai/') ||
682
849
  requestParams.model.startsWith('sao10k/') ||
@@ -690,77 +857,85 @@ class AI{
690
857
  requestParams.model.startsWith('thudm/') ||
691
858
  requestParams.model.startsWith('tngtech/') ||
692
859
  requestParams.model.startsWith('undi95/') ||
693
- requestParams.model.startsWith('x-ai/') ||
860
+ requestParams.model.startsWith('x-ai/') ||
694
861
  requestParams.model.startsWith('z-ai/')
695
862
  ) {
696
- requestParams.model = 'openrouter:' + requestParams.model;
863
+ requestParams.model = `openrouter:${ requestParams.model}`;
697
864
  }
698
865
 
699
866
  // map model to the appropriate driver
700
- if (!requestParams.model || requestParams.model.startsWith('gpt-')) {
867
+ if ( !requestParams.model || requestParams.model.startsWith('gpt-') ) {
701
868
  driver = 'openai-completion';
702
- }else if(
869
+ } else if (
703
870
  requestParams.model.startsWith('claude-')
704
- ){
871
+ ) {
705
872
  driver = 'claude';
706
- }else if(requestParams.model === 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' || requestParams.model === `google/gemma-2-27b-it`){
873
+ } else if ( requestParams.model === 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' || requestParams.model === 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' || requestParams.model === 'google/gemma-2-27b-it' ) {
707
874
  driver = 'together-ai';
708
- }else if(requestParams.model.startsWith('mistral-') || requestParams.model.startsWith('codestral-') || requestParams.model.startsWith('pixtral-') || requestParams.model.startsWith('magistral-') || requestParams.model.startsWith('devstral-') || requestParams.model.startsWith('mistral-ocr-') || requestParams.model.startsWith('open-mistral-')){
875
+ } else if ( requestParams.model.startsWith('mistral-') || requestParams.model.startsWith('codestral-') || requestParams.model.startsWith('pixtral-') || requestParams.model.startsWith('magistral-') || requestParams.model.startsWith('devstral-') || requestParams.model.startsWith('mistral-ocr-') || requestParams.model.startsWith('open-mistral-') ) {
709
876
  driver = 'mistral';
710
- }else if([
711
- "distil-whisper-large-v3-en",
712
- "gemma2-9b-it",
713
- "gemma-7b-it",
714
- "llama-3.1-70b-versatile",
715
- "llama-3.1-8b-instant",
716
- "llama3-70b-8192",
717
- "llama3-8b-8192",
718
- "llama3-groq-70b-8192-tool-use-preview",
719
- "llama3-groq-8b-8192-tool-use-preview",
720
- "llama-guard-3-8b",
721
- "mixtral-8x7b-32768",
722
- "whisper-large-v3"
723
- ].includes(requestParams.model)) {
877
+ } else if ( [
878
+ 'distil-whisper-large-v3-en',
879
+ 'gemma2-9b-it',
880
+ 'gemma-7b-it',
881
+ 'llama-3.1-70b-versatile',
882
+ 'llama-3.1-8b-instant',
883
+ 'llama3-70b-8192',
884
+ 'llama3-8b-8192',
885
+ 'llama3-groq-70b-8192-tool-use-preview',
886
+ 'llama3-groq-8b-8192-tool-use-preview',
887
+ 'llama-guard-3-8b',
888
+ 'mixtral-8x7b-32768',
889
+ 'whisper-large-v3',
890
+ ].includes(requestParams.model) ) {
724
891
  driver = 'groq';
725
- }else if(requestParams.model === 'grok-beta') {
892
+ } else if ( requestParams.model === 'grok-beta' ) {
726
893
  driver = 'xai';
727
894
  }
728
- else if(requestParams.model.startsWith('grok-')){
895
+ else if ( requestParams.model.startsWith('grok-') ) {
729
896
  driver = 'openrouter';
730
897
  }
731
- else if(
898
+ else if (
732
899
  requestParams.model === 'deepseek-chat' ||
733
900
  requestParams.model === 'deepseek-reasoner'
734
- ){
901
+ ) {
735
902
  driver = 'deepseek';
736
903
  }
737
- else if(
904
+ else if (
738
905
  requestParams.model === 'gemini-1.5-flash' ||
739
- requestParams.model === 'gemini-2.0-flash'
740
- ){
906
+ requestParams.model === 'gemini-2.0-flash' ||
907
+ requestParams.model === 'gemini-2.5-flash' ||
908
+ requestParams.model === 'gemini-2.5-flash-lite' ||
909
+ requestParams.model === 'gemini-2.0-flash-lite' ||
910
+ requestParams.model === 'gemini-3-pro-preview' ||
911
+ requestParams.model === 'gemini-2.5-pro'
912
+ ) {
741
913
  driver = 'gemini';
742
914
  }
743
915
  else if ( requestParams.model.startsWith('openrouter:') ) {
744
916
  driver = 'openrouter';
745
917
  }
918
+ else if ( requestParams.model.startsWith('ollama:') ) {
919
+ driver = 'ollama';
920
+ }
746
921
 
747
922
  // stream flag from userParams
748
- if(userParams.stream !== undefined && typeof userParams.stream === 'boolean'){
923
+ if ( userParams.stream !== undefined && typeof userParams.stream === 'boolean' ) {
749
924
  requestParams.stream = userParams.stream;
750
925
  }
751
-
926
+
752
927
  if ( userParams.driver ) {
753
928
  driver = userParams.driver;
754
929
  }
755
930
 
756
931
  // Additional parameters to pass from userParams to requestParams
757
- const PARAMS_TO_PASS = ['tools', 'response'];
932
+ const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
758
933
  for ( const name of PARAMS_TO_PASS ) {
759
934
  if ( userParams[name] ) {
760
935
  requestParams[name] = userParams[name];
761
936
  }
762
937
  }
763
-
938
+
764
939
  if ( requestParams.model === '' ) {
765
940
  delete requestParams.model;
766
941
  }
@@ -775,16 +950,16 @@ class AI{
775
950
 
776
951
  result.valueOf = () => {
777
952
  return result.message?.content;
778
- }
953
+ };
779
954
 
780
955
  return result;
781
- }
956
+ },
782
957
  }).call(this, requestParams);
783
- }
958
+ };
784
959
 
785
960
  /**
786
961
  * Generate images from text prompts or perform image-to-image generation
787
- *
962
+ *
788
963
  * @param {string|object} prompt - Text prompt or options object
789
964
  * @param {object|boolean} [options] - Generation options or test mode flag
790
965
  * @param {string} [options.prompt] - Text description of the image to generate
@@ -793,11 +968,11 @@ class AI{
793
968
  * @param {string} [options.input_image] - Base64 encoded input image for image-to-image generation
794
969
  * @param {string} [options.input_image_mime_type] - MIME type of input image (e.g., "image/png")
795
970
  * @returns {Promise<Image>} Generated image object with src property
796
- *
971
+ *
797
972
  * @example
798
973
  * // Text-to-image
799
974
  * const img = await puter.ai.txt2img("A beautiful sunset");
800
- *
975
+ *
801
976
  * @example
802
977
  * // Image-to-image
803
978
  * const img = await puter.ai.txt2img({
@@ -811,32 +986,38 @@ class AI{
811
986
  let options = {};
812
987
  let testMode = false;
813
988
 
814
- if(!args){
815
- throw({message: 'Arguments are required', code: 'arguments_required'});
989
+ if ( ! args ) {
990
+ throw ({ message: 'Arguments are required', code: 'arguments_required' });
816
991
  }
817
992
 
818
993
  // if argument is string transform it to the object that the API expects
819
- if (typeof args[0] === 'string') {
994
+ if ( typeof args[0] === 'string' ) {
820
995
  options = { prompt: args[0] };
821
996
  }
822
997
 
823
998
  // if second argument is string, it's the `testMode`
824
- if (typeof args[1] === 'boolean' && args[1] === true) {
999
+ if ( typeof args[1] === 'boolean' && args[1] === true ) {
825
1000
  testMode = true;
826
1001
  }
827
1002
 
828
- if (typeof args[0] === 'string' && typeof args[1] === "object") {
1003
+ if ( typeof args[0] === 'string' && typeof args[1] === 'object' ) {
829
1004
  options = args[1];
830
1005
  options.prompt = args[0];
831
1006
  }
832
1007
 
833
- if (typeof args[0] === 'object') {
834
- options = args[0]
1008
+ if ( typeof args[0] === 'object' ) {
1009
+ options = args[0];
1010
+ }
1011
+
1012
+ let AIService = 'openai-image-generation';
1013
+ if ( options.model === 'nano-banana' )
1014
+ {
1015
+ options.model = 'gemini-2.5-flash-image-preview';
835
1016
  }
836
1017
 
837
- let AIService = "openai-image-generation"
838
- if (options.model === "nano-banana")
839
- options.model = "gemini-2.5-flash-image-preview";
1018
+ if (options.model === "nano-banana-pro") {
1019
+ options.model = "gemini-3-pro-image-preview";
1020
+ }
840
1021
 
841
1022
  const driverHint = typeof options.driver === 'string' ? options.driver : undefined;
842
1023
  const providerRaw = typeof options.provider === 'string'
@@ -850,16 +1031,16 @@ class AI{
850
1031
  (TOGETHER_IMAGE_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) ||
851
1032
  TOGETHER_IMAGE_MODEL_KEYWORDS.some(keyword => modelLower.includes(keyword)));
852
1033
 
853
- if (driverHint) {
1034
+ if ( driverHint ) {
854
1035
  AIService = driverHint;
855
- } else if (providerHint === 'gemini') {
856
- AIService = "gemini-image-generation";
857
- } else if (providerHint === 'together' || providerHint === 'together-ai') {
858
- AIService = "together-image-generation";
859
- } else if (options.model === "gemini-2.5-flash-image-preview") {
860
- AIService = "gemini-image-generation";
861
- } else if (looksLikeTogetherModel) {
862
- AIService = "together-image-generation";
1036
+ } else if ( providerHint === 'gemini' ) {
1037
+ AIService = 'gemini-image-generation';
1038
+ } else if ( providerHint === 'together' || providerHint === 'together-ai' ) {
1039
+ AIService = 'together-image-generation';
1040
+ } else if (options.model === 'gemini-2.5-flash-image-preview' || options.model === "gemini-3-pro-image-preview" ) {
1041
+ AIService = 'gemini-image-generation';
1042
+ } else if ( looksLikeTogetherModel ) {
1043
+ AIService = 'together-image-generation';
863
1044
  }
864
1045
  // Call the original chat.complete method
865
1046
  return await utils.make_driver_method(['prompt'], 'puter-image-generation', AIService, 'generate', {
@@ -881,49 +1062,49 @@ class AI{
881
1062
  } else {
882
1063
  throw { message: 'Unexpected image response format', code: 'invalid_image_response' };
883
1064
  }
884
- let img = new Image();
1065
+ let img = new (globalThis.Image || Object)();
885
1066
  img.src = url;
886
1067
  img.toString = () => img.src;
887
1068
  img.valueOf = () => img.src;
888
1069
  return img;
889
- }
1070
+ },
890
1071
  }).call(this, options);
891
- }
1072
+ };
892
1073
 
893
1074
  txt2vid = async (...args) => {
894
1075
  let options = {};
895
1076
  let testMode = false;
896
1077
 
897
- if(!args){
898
- throw({message: 'Arguments are required', code: 'arguments_required'});
1078
+ if ( ! args ) {
1079
+ throw ({ message: 'Arguments are required', code: 'arguments_required' });
899
1080
  }
900
1081
 
901
- if (typeof args[0] === 'string') {
1082
+ if ( typeof args[0] === 'string' ) {
902
1083
  options = { prompt: args[0] };
903
1084
  }
904
1085
 
905
- if (typeof args[1] === 'boolean' && args[1] === true) {
1086
+ if ( typeof args[1] === 'boolean' && args[1] === true ) {
906
1087
  testMode = true;
907
1088
  }
908
1089
 
909
- if (typeof args[0] === 'string' && typeof args[1] === "object") {
1090
+ if ( typeof args[0] === 'string' && typeof args[1] === 'object' ) {
910
1091
  options = args[1];
911
1092
  options.prompt = args[0];
912
1093
  }
913
1094
 
914
- if (typeof args[0] === 'object') {
1095
+ if ( typeof args[0] === 'object' ) {
915
1096
  options = args[0];
916
1097
  }
917
1098
 
918
- if (!options.prompt) {
919
- throw({message: 'Prompt parameter is required', code: 'prompt_required'});
1099
+ if ( ! options.prompt ) {
1100
+ throw ({ message: 'Prompt parameter is required', code: 'prompt_required' });
920
1101
  }
921
1102
 
922
- if (!options.model) {
1103
+ if ( ! options.model ) {
923
1104
  options.model = 'sora-2';
924
1105
  }
925
1106
 
926
- if (options.duration !== undefined && options.seconds === undefined) {
1107
+ if ( options.duration !== undefined && options.seconds === undefined ) {
927
1108
  options.seconds = options.duration;
928
1109
  }
929
1110
 
@@ -939,17 +1120,17 @@ class AI{
939
1120
  const looksLikeTogetherVideoModel = typeof options.model === 'string' &&
940
1121
  TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix));
941
1122
 
942
- if (driverHintLower === 'together' || driverHintLower === 'together-ai') {
1123
+ if ( driverHintLower === 'together' || driverHintLower === 'together-ai' ) {
943
1124
  videoService = 'together-video-generation';
944
- } else if (driverHintLower === 'together-video-generation') {
1125
+ } else if ( driverHintLower === 'together-video-generation' ) {
945
1126
  videoService = 'together-video-generation';
946
- } else if (driverHintLower === 'openai') {
1127
+ } else if ( driverHintLower === 'openai' ) {
947
1128
  videoService = 'openai-video-generation';
948
- } else if (driverHint) {
1129
+ } else if ( driverHint ) {
949
1130
  videoService = driverHint;
950
- } else if (providerHint === 'together' || providerHint === 'together-ai') {
1131
+ } else if ( providerHint === 'together' || providerHint === 'together-ai' ) {
951
1132
  videoService = 'together-video-generation';
952
- } else if (looksLikeTogetherVideoModel) {
1133
+ } else if ( looksLikeTogetherVideoModel ) {
953
1134
  videoService = 'together-video-generation';
954
1135
  }
955
1136
 
@@ -959,34 +1140,34 @@ class AI{
959
1140
  transform: async result => {
960
1141
  let sourceUrl = null;
961
1142
  let mimeType = null;
962
- if (result instanceof Blob) {
1143
+ if ( result instanceof Blob ) {
963
1144
  sourceUrl = await utils.blob_to_url(result);
964
1145
  mimeType = result.type || 'video/mp4';
965
- } else if (typeof result === 'string') {
1146
+ } else if ( typeof result === 'string' ) {
966
1147
  sourceUrl = result;
967
- } else if (result && typeof result === 'object') {
1148
+ } else if ( result && typeof result === 'object' ) {
968
1149
  sourceUrl = result.asset_url || result.url || result.href || null;
969
1150
  mimeType = result.mime_type || result.content_type || null;
970
1151
  }
971
1152
 
972
- if (!sourceUrl) {
1153
+ if ( ! sourceUrl ) {
973
1154
  return result;
974
1155
  }
975
1156
 
976
- const video = document.createElement('video');
1157
+ const video = (globalThis.document?.createElement('video') || {setAttribute: ()=>{}});
977
1158
  video.src = sourceUrl;
978
1159
  video.controls = true;
979
1160
  video.preload = 'metadata';
980
- if (mimeType) {
1161
+ if ( mimeType ) {
981
1162
  video.setAttribute('data-mime-type', mimeType);
982
1163
  }
983
1164
  video.setAttribute('data-source', sourceUrl);
984
1165
  video.toString = () => video.src;
985
1166
  video.valueOf = () => video.src;
986
1167
  return video;
987
- }
1168
+ },
988
1169
  }).call(this, options);
989
- }
1170
+ };
990
1171
  }
991
1172
 
992
1173
  export default AI;