npm - @heyputer/puter.js - Versions diffs - 2.1.2 → 2.1.6 - Mend

@heyputer/puter.js 2.1.2 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/src/modules/AI.js CHANGED Viewed

@@ -1,5 +1,45 @@
 import * as utils from '../lib/utils.js';
+const normalizeTTSProvider = (value) => {
+    if (typeof value !== 'string') {
+        return 'aws-polly';
+    }
+    const lower = value.toLowerCase();
+    if (lower === 'openai') return 'openai';
+    if (lower === 'aws' || lower === 'polly' || lower === 'aws-polly') return 'aws-polly';
+    return value;
+};
+const TOGETHER_IMAGE_MODEL_PREFIXES = [
+    'black-forest-labs/',
+    'stabilityai/',
+    'togethercomputer/',
+    'playgroundai/',
+    'runwayml/',
+    'lightricks/',
+    'sg161222/',
+    'wavymulder/',
+    'prompthero/',
+];
+const TOGETHER_IMAGE_MODEL_KEYWORDS = [
+    'flux',
+    'kling',
+    'sd3',
+    'stable-diffusion',
+    'kolors',
+];
+const TOGETHER_VIDEO_MODEL_PREFIXES = [
+    'minimax/',
+    'google/',
+    'bytedance/',
+    'pixverse/',
+    'kwaivgi/',
+    'vidu/',
+    'wan-ai/',
+];
 class AI{
     /**
      * Creates a new instance with the given authentication token, API origin, and app ID,
@@ -78,48 +118,100 @@ class AI{
     }
     img2txt = async (...args) => {
-        let MAX_INPUT_SIZE = 10 * 1024 * 1024;
+        const MAX_INPUT_SIZE = 10 * 1024 * 1024;
+        if (!args || args.length === 0) {
+            throw { message: 'Arguments are required', code: 'arguments_required' };
+        }
+        const isBlobLike = (value) => {
+            if (typeof Blob === 'undefined') return false;
+            return value instanceof Blob || (typeof File !== 'undefined' && value instanceof File);
+        };
+        const isPlainObject = (value) => value && typeof value === 'object' && !Array.isArray(value) && !isBlobLike(value);
+        const normalizeProvider = (value) => {
+            if (!value) return 'aws-textract';
+            const normalized = String(value).toLowerCase();
+            if (['aws', 'textract', 'aws-textract'].includes(normalized)) return 'aws-textract';
+            if (['mistral', 'mistral-ocr'].includes(normalized)) return 'mistral';
+            return 'aws-textract';
+        };
         let options = {};
+        if (isPlainObject(args[0])) {
+            options = { ...args[0] };
+        } else {
+            options.source = args[0];
+        }
         let testMode = false;
+        for (let i = 1; i < args.length; i++) {
+            const value = args[i];
+            if (typeof value === 'boolean') {
+                testMode = testMode || value;
+            } else if (isPlainObject(value)) {
+                options = { ...options, ...value };
+            }
+        }
-        // Check that the argument is not undefined or null
-        if(!args){
-            throw({message: 'Arguments are required', code: 'arguments_required'});
+        if (typeof options.testMode === 'boolean') {
+            testMode = options.testMode;
         }
-        // if argument is string transform it to the object that the API expects
-        if (typeof args[0] === 'string' || args[0] instanceof Blob) {
-            options.source = args[0];
+        const provider = normalizeProvider(options.provider);
+        delete options.provider;
+        delete options.testMode;
+        if (!options.source) {
+            throw { message: 'Source is required', code: 'source_required' };
         }
-        // if input is a blob, transform it to a data URI
-        if (args[0].source instanceof Blob) {
-            options.source = await utils.blobToDataUri(args[0].source);
+        if (isBlobLike(options.source)) {
+            options.source = await utils.blobToDataUri(options.source);
+        } else if (options.source?.source && isBlobLike(options.source.source)) {
+            // Support shape { source: Blob }
+            options.source = await utils.blobToDataUri(options.source.source);
         }
-        // check input size
-        if (options.source.length > this.MAX_INPUT_SIZE) {
+        if (typeof options.source === 'string' &&
+            options.source.startsWith('data:') &&
+            options.source.length > MAX_INPUT_SIZE) {
             throw { message: 'Input size cannot be larger than ' + MAX_INPUT_SIZE, code: 'input_too_large' };
         }
-        // determine if test mode is enabled
-        if (typeof args[1] === 'boolean' && args[1] === true ||
-            typeof args[2] === 'boolean' && args[2] === true ||
-            typeof args[3] === 'boolean' && args[3] === true) {
-            testMode = true;
-        }
-        return await utils.make_driver_method(['source'], 'puter-ocr', 'aws-textract', 'recognize', {
-            test_mode: testMode ?? false,
-            transform: async (result) => {
+        const toText = (result) => {
+            if (!result) return '';
+            if (Array.isArray(result.blocks) && result.blocks.length) {
                 let str = '';
-                for (let i = 0; i < result?.blocks?.length; i++) {
-                    if("text/textract:LINE" === result.blocks[i].type)
-                        str += result.blocks[i].text + "\n";
+                for (const block of result.blocks) {
+                    if (typeof block?.text !== 'string') continue;
+                    if (!block.type || block.type === 'text/textract:LINE' || block.type.startsWith('text/')) {
+                        str += block.text + '\n';
+                    }
                 }
-                return str;
+                if (str.trim()) return str;
             }
-        }).call(this, options);
+            if (Array.isArray(result.pages) && result.pages.length) {
+                const markdown = result.pages
+                    .map(page => (page?.markdown || '').trim())
+                    .filter(Boolean)
+                    .join('\n\n');
+                if (markdown.trim()) return markdown;
+            }
+            if (typeof result.document_annotation === 'string') {
+                return result.document_annotation;
+            }
+            if (typeof result.text === 'string') {
+                return result.text;
+            }
+            return '';
+        };
+        const driverCall = utils.make_driver_method(['source'], 'puter-ocr', provider, 'recognize', {
+            test_mode: testMode ?? false,
+            transform: async (result) => toText(result),
+        });
+        return await driverCall.call(this, options);
     }
     txt2speech = async (...args) => {
@@ -183,23 +275,43 @@ class AI{
             throw { message: 'Text parameter is required', code: 'text_required' };
         }
-        // Validate engine if provided
-        if (options.engine) {
-            const validEngines = ['standard', 'neural', 'long-form', 'generative'];
-            if (!validEngines.includes(options.engine)) {
+        const validEngines = ['standard', 'neural', 'long-form', 'generative'];
+        let provider = normalizeTTSProvider(options.provider);
+        if (options.engine && normalizeTTSProvider(options.engine) === 'openai' && !options.provider) {
+            provider = 'openai';
+        }
+        if (provider === 'openai') {
+            if (!options.model && typeof options.engine === 'string') {
+                options.model = options.engine;
+            }
+            if (!options.voice) {
+                options.voice = 'alloy';
+            }
+            if (!options.model) {
+                options.model = 'gpt-4o-mini-tts';
+            }
+            if (!options.response_format) {
+                options.response_format = 'mp3';
+            }
+            delete options.engine;
+        } else {
+            provider = 'aws-polly';
+            if (options.engine && !validEngines.includes(options.engine)) {
                 throw { message: 'Invalid engine. Must be one of: ' + validEngines.join(', '), code: 'invalid_engine' };
             }
-        }
-        // Set default values if not provided
-        if (!options.voice) {
-            options.voice = 'Joanna';
-        }
-        if (!options.engine) {
-            options.engine = 'standard';
-        }
-        if (!options.language) {
-            options.language = 'en-US';
+            if (!options.voice) {
+                options.voice = 'Joanna';
+            }
+            if (!options.engine) {
+                options.engine = 'standard';
+            }
+            if (!options.language) {
+                options.language = 'en-US';
+            }
         }
         // check input size
@@ -214,12 +326,28 @@ class AI{
                 break;
             }
         }
-        return await utils.make_driver_method(['source'], 'puter-tts', 'aws-polly', 'synthesize', {
+        const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
+        return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
             responseType: 'blob',
             test_mode: testMode ?? false,
             transform: async (result) => {
-                const url = await utils.blob_to_url(result);
+                let url;
+                if (typeof result === 'string') {
+                    url = result;
+                } else if (result instanceof Blob) {
+                    url = await utils.blob_to_url(result);
+                } else if (result instanceof ArrayBuffer) {
+                    const blob = new Blob([result]);
+                    url = await utils.blob_to_url(blob);
+                } else if (result && typeof result === 'object' && typeof result.arrayBuffer === 'function') {
+                    const arrayBuffer = await result.arrayBuffer();
+                    const blob = new Blob([arrayBuffer], { type: result.type || undefined });
+                    url = await utils.blob_to_url(blob);
+                } else {
+                    throw { message: 'Unexpected audio response format', code: 'invalid_audio_response' };
+                }
                 const audio = new Audio(url);
                 audio.toString = () => url;
                 audio.valueOf = () => url;
@@ -228,16 +356,105 @@ class AI{
         }).call(this, options);
     }
+    speech2txt = async (...args) => {
+        const MAX_INPUT_SIZE = 25 * 1024 * 1024;
+        if ( !args || !args.length ) {
+            throw ({ message: 'Arguments are required', code: 'arguments_required' });
+        }
+        const normalizeSource = async (value) => {
+            if ( value instanceof Blob ) {
+                return await utils.blobToDataUri(value);
+            }
+            return value;
+        };
+        let options = {};
+        let testMode = false;
+        const primary = args[0];
+        if ( primary && typeof primary === 'object' && !Array.isArray(primary) && !(primary instanceof Blob) ) {
+            options = { ...primary };
+        } else {
+            options.file = await normalizeSource(primary);
+        }
+        if ( args[1] && typeof args[1] === 'object' && !Array.isArray(args[1]) && !(args[1] instanceof Blob) ) {
+            options = { ...options, ...args[1] };
+        } else if ( typeof args[1] === 'boolean' ) {
+            testMode = args[1];
+        }
+        if ( typeof args[2] === 'boolean' ) {
+            testMode = args[2];
+        }
+        if ( options.audio ) {
+            options.file = await normalizeSource(options.audio);
+            delete options.audio;
+        }
+        if ( options.file instanceof Blob ) {
+            options.file = await normalizeSource(options.file);
+        }
+        if ( !options.file ) {
+            throw { message: 'Audio input is required', code: 'audio_required' };
+        }
+        if ( typeof options.file === 'string' && options.file.startsWith('data:') ) {
+            const base64 = options.file.split(',')[1] || '';
+            const padding = base64.endsWith('==') ? 2 : (base64.endsWith('=') ? 1 : 0);
+            const byteLength = Math.floor((base64.length * 3) / 4) - padding;
+            if ( byteLength > MAX_INPUT_SIZE ) {
+                throw { message: 'Input size cannot be larger than 25 MB', code: 'input_too_large' };
+            }
+        }
+        const driverMethod = options.translate ? 'translate' : 'transcribe';
+        const driverArgs = { ...options };
+        delete driverArgs.translate;
+        const responseFormat = driverArgs.response_format;
+        return await utils.make_driver_method([], 'puter-speech2txt', 'openai-speech2txt', driverMethod, {
+            test_mode: testMode,
+            transform: async (result) => {
+                if ( responseFormat === 'text' && result && typeof result === 'object' && typeof result.text === 'string' ) {
+                    return result.text;
+                }
+                return result;
+            },
+        }).call(this, driverArgs);
+    }
     // Add new methods for TTS engine management
     txt2speech = Object.assign(this.txt2speech, {
         /**
          * List available TTS engines with pricing information
          * @returns {Promise<Array>} Array of available engines
          */
-        listEngines: async () => {
-            return await utils.make_driver_method(['source'], 'puter-tts', 'aws-polly', 'list_engines', {
+        listEngines: async (options = {}) => {
+            let provider = 'aws-polly';
+            let params = {};
+            if (typeof options === 'string') {
+                provider = normalizeTTSProvider(options);
+            } else if (options && typeof options === 'object') {
+                provider = normalizeTTSProvider(options.provider) || provider;
+                params = { ...options };
+                delete params.provider;
+            }
+            if (provider === 'openai') {
+                params.provider = 'openai';
+            }
+            const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
+            return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
                 responseType: 'text',
-            }).call(this, {});
+            }).call(this, params);
         },
         /**
@@ -245,13 +462,26 @@ class AI{
          * @param {string} [engine] - Optional engine filter
          * @returns {Promise<Array>} Array of available voices
          */
-        listVoices: async (engine) => {
-            const params = {};
-            if (engine) {
-                params.engine = engine;
+        listVoices: async (options) => {
+            let provider = 'aws-polly';
+            let params = {};
+            if (typeof options === 'string') {
+                params.engine = options;
+            } else if (options && typeof options === 'object') {
+                provider = normalizeTTSProvider(options.provider) || provider;
+                params = { ...options };
+                delete params.provider;
             }
-            return utils.make_driver_method(['source'], 'puter-tts', 'aws-polly', 'list_voices', {
+            if (provider === 'openai') {
+                params.provider = 'openai';
+                delete params.engine;
+            }
+            const driverName = provider === 'openai' ? 'openai-tts' : 'aws-polly';
+            return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
                 responseType: 'text',
             }).call(this, params);
         }
@@ -565,6 +795,9 @@ class AI{
         else if ( requestParams.model.startsWith('openrouter:') ) {
             driver = 'openrouter';
         }
+        else if ( requestParams.model.startsWith('ollama:') ) {
+            driver = 'ollama';
+        }
         // stream flag from userParams
         if(userParams.stream !== undefined && typeof userParams.stream === 'boolean'){
@@ -576,7 +809,7 @@ class AI{
         }
         // Additional parameters to pass from userParams to requestParams
-        const PARAMS_TO_PASS = ['tools', 'response'];
+        const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
         for ( const name of PARAMS_TO_PASS ) {
             if ( userParams[name] ) {
                 requestParams[name] = userParams[name];
@@ -660,21 +893,155 @@ class AI{
         if (options.model === "nano-banana")
             options.model = "gemini-2.5-flash-image-preview";
-        if (options.model === "gemini-2.5-flash-image-preview")
+        const driverHint = typeof options.driver === 'string' ? options.driver : undefined;
+        const providerRaw = typeof options.provider === 'string'
+            ? options.provider
+            : (typeof options.service === 'string' ? options.service : undefined);
+        const providerHint = typeof providerRaw === 'string' ? providerRaw.toLowerCase() : undefined;
+        const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : '';
+        const looksLikeTogetherModel =
+            typeof options.model === 'string' &&
+            (TOGETHER_IMAGE_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) ||
+                TOGETHER_IMAGE_MODEL_KEYWORDS.some(keyword => modelLower.includes(keyword)));
+        if (driverHint) {
+            AIService = driverHint;
+        } else if (providerHint === 'gemini') {
+            AIService = "gemini-image-generation";
+        } else if (providerHint === 'together' || providerHint === 'together-ai') {
+            AIService = "together-image-generation";
+        } else if (options.model === "gemini-2.5-flash-image-preview") {
             AIService = "gemini-image-generation";
+        } else if (looksLikeTogetherModel) {
+            AIService = "together-image-generation";
+        }
         // Call the original chat.complete method
         return await utils.make_driver_method(['prompt'], 'puter-image-generation', AIService, 'generate', {
             responseType: 'blob',
             test_mode: testMode ?? false,
-            transform: async blob => {
+            transform: async result => {
+                let url;
+                if ( typeof result === 'string' ) {
+                    url = result;
+                } else if ( result instanceof Blob ) {
+                    url = await utils.blob_to_url(result);
+                } else if ( result instanceof ArrayBuffer ) {
+                    const blob = new Blob([result]);
+                    url = await utils.blob_to_url(blob);
+                } else if ( result && typeof result === 'object' && typeof result.arrayBuffer === 'function' ) {
+                    const arrayBuffer = await result.arrayBuffer();
+                    const blob = new Blob([arrayBuffer], { type: result.type || undefined });
+                    url = await utils.blob_to_url(blob);
+                } else {
+                    throw { message: 'Unexpected image response format', code: 'invalid_image_response' };
+                }
                 let img = new Image();
-                img.src = await utils.blob_to_url(blob);
+                img.src = url;
                 img.toString = () => img.src;
                 img.valueOf = () => img.src;
                 return img;
             }
         }).call(this, options);
     }
+    txt2vid = async (...args) => {
+        let options = {};
+        let testMode = false;
+        if(!args){
+            throw({message: 'Arguments are required', code: 'arguments_required'});
+        }
+        if (typeof args[0] === 'string') {
+            options = { prompt: args[0] };
+        }
+        if (typeof args[1] === 'boolean' && args[1] === true) {
+            testMode = true;
+        }
+        if (typeof args[0] === 'string' && typeof args[1] === "object") {
+            options = args[1];
+            options.prompt = args[0];
+        }
+        if (typeof args[0] === 'object') {
+            options = args[0];
+        }
+        if (!options.prompt) {
+            throw({message: 'Prompt parameter is required', code: 'prompt_required'});
+        }
+        if (!options.model) {
+            options.model = 'sora-2';
+        }
+        if (options.duration !== undefined && options.seconds === undefined) {
+            options.seconds = options.duration;
+        }
+        let videoService = 'openai-video-generation';
+        const driverHint = typeof options.driver === 'string' ? options.driver : undefined;
+        const driverHintLower = driverHint ? driverHint.toLowerCase() : undefined;
+        const providerRaw = typeof options.provider === 'string'
+            ? options.provider
+            : (typeof options.service === 'string' ? options.service : undefined);
+        const providerHint = typeof providerRaw === 'string' ? providerRaw.toLowerCase() : undefined;
+        const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : '';
+        const looksLikeTogetherVideoModel = typeof options.model === 'string' &&
+            TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix));
+        if (driverHintLower === 'together' || driverHintLower === 'together-ai') {
+            videoService = 'together-video-generation';
+        } else if (driverHintLower === 'together-video-generation') {
+            videoService = 'together-video-generation';
+        } else if (driverHintLower === 'openai') {
+            videoService = 'openai-video-generation';
+        } else if (driverHint) {
+            videoService = driverHint;
+        } else if (providerHint === 'together' || providerHint === 'together-ai') {
+            videoService = 'together-video-generation';
+        } else if (looksLikeTogetherVideoModel) {
+            videoService = 'together-video-generation';
+        }
+        return await utils.make_driver_method(['prompt'], 'puter-video-generation', videoService, 'generate', {
+            responseType: 'blob',
+            test_mode: testMode ?? false,
+            transform: async result => {
+                let sourceUrl = null;
+                let mimeType = null;
+                if (result instanceof Blob) {
+                    sourceUrl = await utils.blob_to_url(result);
+                    mimeType = result.type || 'video/mp4';
+                } else if (typeof result === 'string') {
+                    sourceUrl = result;
+                } else if (result && typeof result === 'object') {
+                    sourceUrl = result.asset_url || result.url || result.href || null;
+                    mimeType = result.mime_type || result.content_type || null;
+                }
+                if (!sourceUrl) {
+                    return result;
+                }
+                const video = document.createElement('video');
+                video.src = sourceUrl;
+                video.controls = true;
+                video.preload = 'metadata';
+                if (mimeType) {
+                    video.setAttribute('data-mime-type', mimeType);
+                }
+                video.setAttribute('data-source', sourceUrl);
+                video.toString = () => video.src;
+                video.valueOf = () => video.src;
+                return video;
+            }
+        }).call(this, options);
+    }
 }
 export default AI;