npm - prompt-api-polyfill - Versions diffs - 0.1.0 → 0.3.0 - Mend

prompt-api-polyfill 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +143 -66
package/backends/base.js +59 -0
package/backends/defaults.js +9 -0
package/backends/firebase.js +45 -0
package/backends/gemini.js +48 -0
package/backends/openai.js +340 -0
package/backends/transformers.js +106 -0
package/json-schema-converter.js +3 -1
package/multimodal-converter.js +138 -12
package/package.json +21 -5
package/prompt-api-polyfill.js +482 -444

package/backends/openai.js ADDED Viewed

@@ -0,0 +1,340 @@
+import OpenAI from 'https://esm.run/openai';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * OpenAI API Backend
+ */
+export default class OpenAIBackend extends PolyfillBackend {
+  #model;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.openai);
+    this.config = config;
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+      dangerouslyAllowBrowser: true, // Required for client-side usage
+    });
+  }
+  static availability(options = {}) {
+    if (options.expectedInputs) {
+      const hasAudio = options.expectedInputs.some(
+        (input) => input.type === 'audio'
+      );
+      const hasImage = options.expectedInputs.some(
+        (input) => input.type === 'image'
+      );
+      if (hasAudio && hasImage) {
+        return 'unavailable';
+      }
+    }
+    return 'available';
+  }
+  createSession(options, inCloudParams) {
+    // OpenAI doesn't have a "session" object like Gemini, so we return a context object
+    // tailored for our generate methods.
+    this.#model = {
+      model: options.modelName || this.modelName,
+      temperature: inCloudParams.generationConfig?.temperature,
+      top_p: 1.0, // Default to 1.0 as topK is not directly supported the same way
+      systemInstruction: inCloudParams.systemInstruction,
+    };
+    const config = inCloudParams.generationConfig || {};
+    if (config.responseSchema) {
+      const { schema, wrapped } = this.#fixSchemaForOpenAI(
+        config.responseSchema
+      );
+      this.#model.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: 'response',
+          strict: true,
+          schema: schema,
+        },
+      };
+      this.#model.response_wrapped = wrapped;
+    } else if (config.responseMimeType === 'application/json') {
+      this.#model.response_format = { type: 'json_object' };
+    }
+    return this.#model;
+  }
+  /**
+   * OpenAI Structured Outputs require:
+   * 1. All fields in objects to be marked as 'required'.
+   * 2. Objects to have 'additionalProperties: false'.
+   * 3. The root must be an 'object'.
+   */
+  #fixSchemaForOpenAI(schema) {
+    if (typeof schema !== 'object' || schema === null) {
+      return { schema, wrapped: false };
+    }
+    const processNode = (node) => {
+      if (node.type === 'object') {
+        if (node.properties) {
+          node.additionalProperties = false;
+          node.required = Object.keys(node.properties);
+          for (const key in node.properties) {
+            processNode(node.properties[key]);
+          }
+        } else {
+          node.additionalProperties = false;
+          node.required = [];
+        }
+      } else if (node.type === 'array' && node.items) {
+        processNode(node.items);
+      }
+      return node;
+    };
+    // Deep clone to avoid side effects
+    const cloned = JSON.parse(JSON.stringify(schema));
+    if (cloned.type !== 'object') {
+      // Wrap in object as OpenAI requires object root
+      return {
+        wrapped: true,
+        schema: {
+          type: 'object',
+          properties: { value: cloned },
+          required: ['value'],
+          additionalProperties: false,
+        },
+      };
+    }
+    return {
+      wrapped: false,
+      schema: processNode(cloned),
+    };
+  }
+  #validateContent(messages) {
+    let hasImage = false;
+    let hasAudio = false;
+    for (const msg of messages) {
+      if (Array.isArray(msg.content)) {
+        for (const part of msg.content) {
+          if (part.type === 'image_url') {
+            hasImage = true;
+          }
+          if (part.type === 'input_audio') {
+            hasAudio = true;
+          }
+        }
+      }
+    }
+    if (hasImage && hasAudio) {
+      throw new Error(
+        'OpenAI backend does not support mixing images and audio in the same session. Please start a new session.'
+      );
+    }
+    return { hasImage, hasAudio };
+  }
+  #routeModel(hasAudio) {
+    // If the user explicitly provided a model in the session options, respect it.
+    // Otherwise, pick based on content.
+    if (this.#model.model !== this.modelName) {
+      return this.#model.model;
+    }
+    return hasAudio ? `${this.modelName}-audio-preview` : this.modelName;
+  }
+  async generateContent(contents) {
+    const { messages } = this.#convertContentsToInput(
+      contents,
+      this.#model.systemInstruction
+    );
+    const { hasAudio } = this.#validateContent(messages);
+    const model = this.#routeModel(hasAudio);
+    if (
+      model === `${this.modelName}-audio-preview` &&
+      this.#model.response_format
+    ) {
+      throw new DOMException(
+        `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
+        'NotSupportedError'
+      );
+    }
+    const options = {
+      model: model,
+      messages: messages,
+    };
+    if (this.#model.temperature > 0) {
+      options.temperature = this.#model.temperature;
+    }
+    if (this.#model.response_format) {
+      options.response_format = this.#model.response_format;
+    }
+    try {
+      const response = await this.openai.chat.completions.create(options);
+      const choice = response.choices[0];
+      let text = choice.message.content;
+      if (this.#model.response_wrapped && text) {
+        try {
+          const parsed = JSON.parse(text);
+          if (parsed && typeof parsed === 'object' && 'value' in parsed) {
+            text = JSON.stringify(parsed.value);
+          }
+        } catch {
+          // Ignore parsing error, return raw text
+        }
+      }
+      const usage = response.usage?.prompt_tokens || 0;
+      return { text, usage };
+    } catch (error) {
+      console.error('OpenAI Generate Content Error:', error);
+      throw error;
+    }
+  }
+  async generateContentStream(contents) {
+    const { messages } = this.#convertContentsToInput(
+      contents,
+      this.#model.systemInstruction
+    );
+    const { hasAudio } = this.#validateContent(messages);
+    const model = this.#routeModel(hasAudio);
+    if (
+      model === `${this.modelName}-audio-preview` &&
+      this.#model.response_format
+    ) {
+      throw new DOMException(
+        `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
+        'NotSupportedError'
+      );
+    }
+    const options = {
+      model: model,
+      messages: messages,
+      stream: true,
+    };
+    if (this.#model.temperature > 0) {
+      options.temperature = this.#model.temperature;
+    }
+    if (this.#model.response_format) {
+      options.response_format = this.#model.response_format;
+    }
+    try {
+      const stream = await this.openai.chat.completions.create(options);
+      // Convert OpenAI stream to an AsyncIterable that yields chunks
+      return (async function* () {
+        let firstChunk = true;
+        for await (const chunk of stream) {
+          let text = chunk.choices[0]?.delta?.content;
+          if (text) {
+            // Note: Unwrapping a wrapped object in a stream is complex.
+            // For now, streaming wrapped results will yield the full JSON including the wrapper.
+            yield {
+              text: () => text,
+              usageMetadata: { totalTokenCount: 0 },
+            };
+          }
+        }
+      })();
+    } catch (error) {
+      console.error('OpenAI Generate Content Stream Error:', error);
+      throw error;
+    }
+  }
+  async countTokens(contents) {
+    // OpenAI does not provide a public API endpoint for counting tokens before generation.
+    // Implementing countTokens strictly requires a tokenizer like `tiktoken`.
+    // For this initial implementation, we use a character-based approximation (e.g., text.length / 4)
+    // to avoid adding heavy WASM dependencies (`tiktoken`) to the polyfill.
+    let totalText = '';
+    if (this.#model && this.#model.systemInstruction) {
+      totalText += this.#model.systemInstruction;
+    }
+    if (Array.isArray(contents)) {
+      for (const content of contents) {
+        if (!content.parts) {
+          continue;
+        }
+        for (const part of content.parts) {
+          if (part.text) {
+            totalText += part.text;
+          } else if (part.inlineData) {
+            // Approximate image token cost (e.g., ~1000 chars worth)
+            totalText += ' '.repeat(1000);
+          }
+        }
+      }
+    }
+    return Math.ceil(totalText.length / 4);
+  }
+  #convertContentsToInput(contents, systemInstruction) {
+    const messages = [];
+    // System instructions
+    if (systemInstruction) {
+      messages.push({
+        role: 'system',
+        content: systemInstruction,
+      });
+    }
+    for (const content of contents) {
+      const role = content.role === 'model' ? 'assistant' : 'user';
+      const contentParts = [];
+      for (const part of content.parts) {
+        if (part.text) {
+          contentParts.push({ type: 'text', text: part.text });
+        } else if (part.inlineData) {
+          const { data, mimeType } = part.inlineData;
+          if (mimeType.startsWith('image/')) {
+            contentParts.push({
+              type: 'image_url',
+              image_url: { url: `data:${mimeType};base64,${data}` },
+            });
+          } else if (mimeType.startsWith('audio/')) {
+            contentParts.push({
+              type: 'input_audio',
+              input_audio: {
+                data: data,
+                format: mimeType.split('/')[1] === 'mpeg' ? 'mp3' : 'wav',
+              },
+            });
+          }
+        }
+      }
+      // Simplification: if only one text part, just send string content for better compatibility
+      // but multimodal models usually prefer the array format.
+      // We'll keep the array format for consistency with multimodal inputs.
+      messages.push({ role, content: contentParts });
+    }
+    return { messages };
+  }
+}

package/backends/transformers.js ADDED Viewed

@@ -0,0 +1,106 @@
+import { pipeline, TextStreamer } from 'https://esm.run/@huggingface/transformers';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * Transformers.js (ONNX Runtime) Backend
+ */
+export default class TransformersBackend extends PolyfillBackend {
+  #generator;
+  #tokenizer;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.transformers);
+  }
+  async #ensureGenerator() {
+    if (!this.#generator) {
+      console.log(`[Transformers.js] Loading model: ${this.modelName}`);
+      this.#generator = await pipeline('text-generation', this.modelName, {
+        device: 'webgpu',
+      });
+      this.#tokenizer = this.#generator.tokenizer;
+    }
+    return this.#generator;
+  }
+  async createSession(options, inCloudParams) {
+    // Initializing the generator can be slow, so we do it lazily or here.
+    // For now, let's trigger the loading.
+    await this.#ensureGenerator();
+    // We don't really have "sessions" in the same way Gemini does,
+    // but we can store the generation config.
+    this.generationConfig = {
+      max_new_tokens: 512, // Default limit
+      temperature: inCloudParams.generationConfig?.temperature || 1.0,
+      top_p: 1.0,
+      do_sample: inCloudParams.generationConfig?.temperature > 0,
+    };
+    return this.#generator;
+  }
+  async generateContent(contents) {
+    const generator = await this.#ensureGenerator();
+    const prompt = this.#convertContentsToPrompt(contents);
+    const output = await generator(prompt, this.generationConfig);
+    const text = output[0].generated_text.slice(prompt.length);
+    // Approximate usage
+    const usage = await this.countTokens(contents);
+    return { text, usage };
+  }
+  async generateContentStream(contents) {
+    const generator = await this.#ensureGenerator();
+    const prompt = this.#convertContentsToPrompt(contents);
+    const streamer = new TextStreamer(this.#tokenizer, {
+      skip_prompt: true,
+      skip_special_tokens: true,
+    });
+    // Run generation in the background (don't await)
+    generator(prompt, {
+      ...this.generationConfig,
+      streamer,
+    });
+    // streamer is an AsyncIterable in Transformers.js v3
+    return (async function* () {
+      for await (const newText of streamer) {
+        yield {
+          text: () => newText,
+          usageMetadata: { totalTokenCount: 0 },
+        };
+      }
+    })();
+  }
+  async countTokens(contents) {
+    await this.#ensureGenerator();
+    const text = this.#convertContentsToPrompt(contents);
+    const { input_ids } = await this.#tokenizer(text);
+    return input_ids.size;
+  }
+  #convertContentsToPrompt(contents) {
+    // Simple ChatML-like format for Qwen/Llama
+    let prompt = '';
+    for (const content of contents) {
+      const role = content.role === 'model' ? 'assistant' : 'user';
+      prompt += `<|im_start|>${role}\n`;
+      for (const part of content.parts) {
+        if (part.text) {
+          prompt += part.text;
+        }
+      }
+      prompt += '<|im_end|>\n';
+    }
+    prompt += '<|im_start|>assistant\n';
+    return prompt;
+  }
+}

package/json-schema-converter.js CHANGED Viewed

@@ -6,7 +6,9 @@ import { Schema } from 'https://esm.run/firebase/ai';
  * @returns {Schema} - The Firebase Vertex AI Schema instance.
  */
 export function convertJsonSchemaToVertexSchema(jsonSchema) {
-  if (!jsonSchema) return undefined;
+  if (!jsonSchema) {
+    return undefined;
+  }
   // Extract common base parameters supported by all Schema types
   const baseParams = {

package/multimodal-converter.js CHANGED Viewed

@@ -1,7 +1,11 @@
 export default class MultimodalConverter {
   static async convert(type, value) {
-    if (type === 'image') return this.processImage(value);
-    if (type === 'audio') return this.processAudio(value);
+    if (type === 'image') {
+      return this.processImage(value);
+    }
+    if (type === 'audio') {
+      return this.processAudio(value);
+    }
     throw new DOMException(
       `Unsupported media type: ${type}`,
       'NotSupportedError'
@@ -16,13 +20,16 @@ export default class MultimodalConverter {
     // BufferSource (ArrayBuffer/View) -> Sniff or Default
     if (ArrayBuffer.isView(source) || source instanceof ArrayBuffer) {
-      const buffer = source instanceof ArrayBuffer ? source : source.buffer;
+      const u8 =
+        source instanceof ArrayBuffer
+          ? new Uint8Array(source)
+          : new Uint8Array(source.buffer, source.byteOffset, source.byteLength);
+      const buffer = u8.buffer.slice(
+        u8.byteOffset,
+        u8.byteOffset + u8.byteLength
+      );
       const base64 = this.arrayBufferToBase64(buffer);
-      // Basic sniffing for PNG/JPEG magic bytes
-      const u8 = new Uint8Array(buffer);
-      let mimeType = 'image/png'; // Default
-      if (u8[0] === 0xff && u8[1] === 0xd8) mimeType = 'image/jpeg';
-      else if (u8[0] === 0x89 && u8[1] === 0x50) mimeType = 'image/png';
+      const mimeType = this.#sniffImageMimeType(u8) || 'image/png';
       return { inlineData: { data: base64, mimeType } };
     }
@@ -32,6 +39,111 @@ export default class MultimodalConverter {
     return this.canvasSourceToInlineData(source);
   }
+  static #sniffImageMimeType(u8) {
+    const len = u8.length;
+    if (len < 4) {
+      return null;
+    }
+    // JPEG: FF D8 FF
+    if (u8[0] === 0xff && u8[1] === 0xd8 && u8[2] === 0xff) {
+      return 'image/jpeg';
+    }
+    // PNG: 89 50 4E 47 0D 0A 1A 0A
+    if (
+      u8[0] === 0x89 &&
+      u8[1] === 0x50 &&
+      u8[2] === 0x4e &&
+      u8[3] === 0x47 &&
+      u8[4] === 0x0d &&
+      u8[5] === 0x0a &&
+      u8[6] === 0x1a &&
+      u8[7] === 0x0a
+    ) {
+      return 'image/png';
+    }
+    // GIF: GIF87a / GIF89a
+    if (u8[0] === 0x47 && u8[1] === 0x49 && u8[2] === 0x46 && u8[3] === 0x38) {
+      return 'image/gif';
+    }
+    // WebP: RIFF (offset 0) + WEBP (offset 8)
+    if (
+      u8[0] === 0x52 &&
+      u8[1] === 0x49 &&
+      u8[2] === 0x46 &&
+      u8[3] === 0x46 &&
+      u8[8] === 0x57 &&
+      u8[9] === 0x45 &&
+      u8[10] === 0x42 &&
+      u8[11] === 0x50
+    ) {
+      return 'image/webp';
+    }
+    // BMP: BM
+    if (u8[0] === 0x42 && u8[1] === 0x4d) {
+      return 'image/bmp';
+    }
+    // ICO: 00 00 01 00
+    if (u8[0] === 0x00 && u8[1] === 0x00 && u8[2] === 0x01 && u8[3] === 0x00) {
+      return 'image/x-icon';
+    }
+    // TIFF: II* (LE) / MM* (BE)
+    if (
+      (u8[0] === 0x49 && u8[1] === 0x49 && u8[2] === 0x2a) ||
+      (u8[0] === 0x4d && u8[1] === 0x4d && u8[2] === 0x2a)
+    ) {
+      return 'image/tiff';
+    }
+    // ISOBMFF (AVIF / HEIC / HEIF)
+    // "ftyp" at offset 4
+    if (u8[4] === 0x66 && u8[5] === 0x74 && u8[6] === 0x79 && u8[7] === 0x70) {
+      const type = String.fromCharCode(u8[8], u8[9], u8[10], u8[11]);
+      if (type === 'avif' || type === 'avis') {
+        return 'image/avif';
+      }
+      if (
+        type === 'heic' ||
+        type === 'heix' ||
+        type === 'hevc' ||
+        type === 'hevx'
+      ) {
+        return 'image/heic';
+      }
+      if (type === 'mif1' || type === 'msf1') {
+        return 'image/heif';
+      }
+    }
+    // JPEG XL: FF 0A or container bits
+    if (u8[0] === 0xff && u8[1] === 0x0a) {
+      return 'image/jxl';
+    }
+    // Container: 00 00 00 0c 4a 58 4c 20 0d 0a 87 0a (JXL )
+    if (u8[0] === 0x00 && u8[4] === 0x4a && u8[5] === 0x58 && u8[6] === 0x4c) {
+      return 'image/jxl';
+    }
+    // JPEG 2000
+    if (u8[0] === 0x00 && u8[4] === 0x6a && u8[5] === 0x50 && u8[6] === 0x20) {
+      return 'image/jp2';
+    }
+    // SVG: Check for <svg or <?xml (heuristics)
+    const preview = String.fromCharCode(...u8.slice(0, 100)).toLowerCase();
+    if (preview.includes('<svg') || preview.includes('<?xml')) {
+      return 'image/svg+xml';
+    }
+    return null;
+  }
   static async processAudio(source) {
     // Blob
     if (source instanceof Blob) {
@@ -46,8 +158,20 @@ export default class MultimodalConverter {
     }
     // BufferSource -> Assume it's already an audio file (mp3/wav)
-    if (ArrayBuffer.isView(source) || source instanceof ArrayBuffer) {
-      const buffer = source instanceof ArrayBuffer ? source : source.buffer;
+    const isArrayBuffer =
+      source instanceof ArrayBuffer ||
+      (source &&
+        source.constructor &&
+        source.constructor.name === 'ArrayBuffer');
+    const isView =
+      ArrayBuffer.isView(source) ||
+      (source &&
+        source.buffer &&
+        (source.buffer instanceof ArrayBuffer ||
+          source.buffer.constructor.name === 'ArrayBuffer'));
+    if (isArrayBuffer || isView) {
+      const buffer = isArrayBuffer ? source : source.buffer;
       return {
         inlineData: {
           data: this.arrayBufferToBase64(buffer),
@@ -65,14 +189,16 @@ export default class MultimodalConverter {
     return new Promise((resolve, reject) => {
       const reader = new FileReader();
       reader.onloadend = () => {
-        if (reader.error) reject(reader.error);
-        else
+        if (reader.error) {
+          reject(reader.error);
+        } else {
           resolve({
             inlineData: {
               data: reader.result.split(',')[1],
               mimeType: blob.type,
             },
           });
+        }
       };
       reader.readAsDataURL(blob);
     });

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "prompt-api-polyfill",
-  "version": "0.1.0",
-  "description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic.",
+  "version": "0.3.0",
+  "description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, or OpenAI API.",
   "type": "module",
   "main": "./prompt-api-polyfill.js",
   "module": "./prompt-api-polyfill.js",
@@ -14,7 +14,8 @@
     "json-schema-converter.js",
     "multimodal-converter.js",
     "prompt-api-polyfill.js",
-    "dot_env.json"
+    "dot_env.json",
+    "backends/"
   ],
   "sideEffects": true,
   "keywords": [
@@ -22,6 +23,8 @@
     "language-model",
     "polyfill",
     "firebase",
+    "gemini",
+    "openai",
     "web-ai"
   ],
   "repository": {
@@ -35,9 +38,22 @@
   "homepage": "https://github.com/GoogleChromeLabs/web-ai-demos/tree/main/prompt-api-polyfill/README.md",
   "license": "Apache-2.0",
   "scripts": {
-    "start": "npx http-server"
+    "start": "npx http-server",
+    "test:browser": "node scripts/list-backends.js && vitest run -c vitest.browser.config.js .browser.test.js",
+    "fix": "npx prettier --write ."
   },
   "devDependencies": {
-    "http-server": "^14.1.1"
+    "@firebase/ai": "^2.6.1",
+    "@google/generative-ai": "^0.24.1",
+    "@vitest/browser": "^4.0.17",
+    "@vitest/browser-playwright": "^4.0.17",
+    "ajv": "^8.17.1",
+    "firebase": "^12.7.0",
+    "http-server": "^14.1.1",
+    "jsdom": "^27.4.0",
+    "openai": "^6.16.0",
+    "playwright": "^1.57.0",
+    "prettier-plugin-curly": "^0.4.1",
+    "vitest": "^4.0.17"
   }
 }