npm - prompt-api-polyfill - Versions diffs - 0.2.0 → 0.3.0 - Mend

prompt-api-polyfill 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/backends/base.js +59 -0
package/backends/defaults.js +9 -0
package/backends/firebase.js +45 -0
package/backends/gemini.js +48 -0
package/backends/openai.js +340 -0
package/backends/transformers.js +106 -0
package/package.json +3 -2
package/prompt-api-polyfill.js +4 -0

package/backends/base.js ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * Abstract class representing a backend for the LanguageModel polyfill.
+ */
+export default class PolyfillBackend {
+  #model;
+  /**
+   * @param {string} modelName - The name of the model.
+   */
+  constructor(modelName) {
+    this.modelName = modelName;
+  }
+  /**
+   * Checks if the backend is available given the options.
+   * @param {Object} options - LanguageModel options.
+   * @returns {string} 'available', 'unavailable', 'downloadable', or 'downloading'.
+   */
+  static availability(options) {
+    return 'available';
+  }
+  /**
+   * Creates a model session and stores it.
+   * @param {Object} options - LanguageModel options.
+   * @param {Object} inCloudParams - Parameters for the cloud model.
+   * @returns {any} The created session object.
+   */
+  createSession(options, inCloudParams) {
+    throw new Error('Not implemented');
+  }
+  /**
+   * Generates content (non-streaming).
+   * @param {Array} content - The history + new message content.
+   * @returns {Promise<{text: string, usage: number}>}
+   */
+  async generateContent(content) {
+    throw new Error('Not implemented');
+  }
+  /**
+   * Generates content stream.
+   * @param {Array} content - The history + new content.
+   * @returns {Promise<AsyncIterable>} Stream of chunks.
+   */
+  async generateContentStream(content) {
+    throw new Error('Not implemented');
+  }
+  /**
+   * Counts tokens.
+   * @param {Array} content - The content to count.
+   * @returns {Promise<number>} Total tokens.
+   */
+  async countTokens(content) {
+    throw new Error('Not implemented');
+  }
+}

package/backends/defaults.js ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Default model versions for each backend.
+ */
+export const DEFAULT_MODELS = {
+  firebase: 'gemini-2.5-flash-lite',
+  gemini: 'gemini-2.0-flash-lite-preview-02-05',
+  openai: 'gpt-4o',
+  transformers: 'onnx-community/Qwen3-4B-ONNX',
+};

package/backends/firebase.js ADDED Viewed

@@ -0,0 +1,45 @@
+import { initializeApp } from 'https://esm.run/firebase/app';
+import {
+  getAI,
+  getGenerativeModel,
+  GoogleAIBackend,
+  InferenceMode,
+} from 'https://esm.run/firebase/ai';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * Firebase AI Logic Backend
+ */
+export default class FirebaseBackend extends PolyfillBackend {
+  #model;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.firebase);
+    this.ai = getAI(initializeApp(config), { backend: new GoogleAIBackend() });
+  }
+  createSession(_options, inCloudParams) {
+    this.#model = getGenerativeModel(this.ai, {
+      mode: InferenceMode.ONLY_IN_CLOUD,
+      inCloudParams,
+    });
+    return this.#model;
+  }
+  async generateContent(contents) {
+    const result = await this.#model.generateContent({ contents });
+    const usage = result.response.usageMetadata?.promptTokenCount || 0;
+    return { text: result.response.text(), usage };
+  }
+  async generateContentStream(contents) {
+    const result = await this.#model.generateContentStream({ contents });
+    return result.stream;
+  }
+  async countTokens(contents) {
+    const { totalTokens } = await this.#model.countTokens({ contents });
+    return totalTokens;
+  }
+}

package/backends/gemini.js ADDED Viewed

@@ -0,0 +1,48 @@
+import { GoogleGenerativeAI } from 'https://esm.run/@google/generative-ai';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * Google Gemini API Backend
+ */
+export default class GeminiBackend extends PolyfillBackend {
+  #model;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.gemini);
+    this.genAI = new GoogleGenerativeAI(config.apiKey);
+  }
+  createSession(options, inCloudParams) {
+    const modelParams = {
+      model: options.modelName || this.modelName,
+      generationConfig: inCloudParams.generationConfig,
+      systemInstruction: inCloudParams.systemInstruction,
+    };
+    // Clean undefined systemInstruction
+    if (!modelParams.systemInstruction) {
+      delete modelParams.systemInstruction;
+    }
+    this.#model = this.genAI.getGenerativeModel(modelParams);
+    return this.#model;
+  }
+  async generateContent(contents) {
+    // Gemini SDK expects { role, parts: [...] } which matches our internal structure
+    const result = await this.#model.generateContent({ contents });
+    const response = await result.response;
+    const usage = response.usageMetadata?.promptTokenCount || 0;
+    return { text: response.text(), usage };
+  }
+  async generateContentStream(contents) {
+    const result = await this.#model.generateContentStream({ contents });
+    return result.stream;
+  }
+  async countTokens(contents) {
+    const { totalTokens } = await this.#model.countTokens({ contents });
+    return totalTokens;
+  }
+}

package/backends/openai.js ADDED Viewed

@@ -0,0 +1,340 @@
+import OpenAI from 'https://esm.run/openai';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * OpenAI API Backend
+ */
+export default class OpenAIBackend extends PolyfillBackend {
+  #model;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.openai);
+    this.config = config;
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+      dangerouslyAllowBrowser: true, // Required for client-side usage
+    });
+  }
+  static availability(options = {}) {
+    if (options.expectedInputs) {
+      const hasAudio = options.expectedInputs.some(
+        (input) => input.type === 'audio'
+      );
+      const hasImage = options.expectedInputs.some(
+        (input) => input.type === 'image'
+      );
+      if (hasAudio && hasImage) {
+        return 'unavailable';
+      }
+    }
+    return 'available';
+  }
+  createSession(options, inCloudParams) {
+    // OpenAI doesn't have a "session" object like Gemini, so we return a context object
+    // tailored for our generate methods.
+    this.#model = {
+      model: options.modelName || this.modelName,
+      temperature: inCloudParams.generationConfig?.temperature,
+      top_p: 1.0, // Default to 1.0 as topK is not directly supported the same way
+      systemInstruction: inCloudParams.systemInstruction,
+    };
+    const config = inCloudParams.generationConfig || {};
+    if (config.responseSchema) {
+      const { schema, wrapped } = this.#fixSchemaForOpenAI(
+        config.responseSchema
+      );
+      this.#model.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: 'response',
+          strict: true,
+          schema: schema,
+        },
+      };
+      this.#model.response_wrapped = wrapped;
+    } else if (config.responseMimeType === 'application/json') {
+      this.#model.response_format = { type: 'json_object' };
+    }
+    return this.#model;
+  }
+  /**
+   * OpenAI Structured Outputs require:
+   * 1. All fields in objects to be marked as 'required'.
+   * 2. Objects to have 'additionalProperties: false'.
+   * 3. The root must be an 'object'.
+   */
+  #fixSchemaForOpenAI(schema) {
+    if (typeof schema !== 'object' || schema === null) {
+      return { schema, wrapped: false };
+    }
+    const processNode = (node) => {
+      if (node.type === 'object') {
+        if (node.properties) {
+          node.additionalProperties = false;
+          node.required = Object.keys(node.properties);
+          for (const key in node.properties) {
+            processNode(node.properties[key]);
+          }
+        } else {
+          node.additionalProperties = false;
+          node.required = [];
+        }
+      } else if (node.type === 'array' && node.items) {
+        processNode(node.items);
+      }
+      return node;
+    };
+    // Deep clone to avoid side effects
+    const cloned = JSON.parse(JSON.stringify(schema));
+    if (cloned.type !== 'object') {
+      // Wrap in object as OpenAI requires object root
+      return {
+        wrapped: true,
+        schema: {
+          type: 'object',
+          properties: { value: cloned },
+          required: ['value'],
+          additionalProperties: false,
+        },
+      };
+    }
+    return {
+      wrapped: false,
+      schema: processNode(cloned),
+    };
+  }
+  #validateContent(messages) {
+    let hasImage = false;
+    let hasAudio = false;
+    for (const msg of messages) {
+      if (Array.isArray(msg.content)) {
+        for (const part of msg.content) {
+          if (part.type === 'image_url') {
+            hasImage = true;
+          }
+          if (part.type === 'input_audio') {
+            hasAudio = true;
+          }
+        }
+      }
+    }
+    if (hasImage && hasAudio) {
+      throw new Error(
+        'OpenAI backend does not support mixing images and audio in the same session. Please start a new session.'
+      );
+    }
+    return { hasImage, hasAudio };
+  }
+  #routeModel(hasAudio) {
+    // If the user explicitly provided a model in the session options, respect it.
+    // Otherwise, pick based on content.
+    if (this.#model.model !== this.modelName) {
+      return this.#model.model;
+    }
+    return hasAudio ? `${this.modelName}-audio-preview` : this.modelName;
+  }
+  async generateContent(contents) {
+    const { messages } = this.#convertContentsToInput(
+      contents,
+      this.#model.systemInstruction
+    );
+    const { hasAudio } = this.#validateContent(messages);
+    const model = this.#routeModel(hasAudio);
+    if (
+      model === `${this.modelName}-audio-preview` &&
+      this.#model.response_format
+    ) {
+      throw new DOMException(
+        `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
+        'NotSupportedError'
+      );
+    }
+    const options = {
+      model: model,
+      messages: messages,
+    };
+    if (this.#model.temperature > 0) {
+      options.temperature = this.#model.temperature;
+    }
+    if (this.#model.response_format) {
+      options.response_format = this.#model.response_format;
+    }
+    try {
+      const response = await this.openai.chat.completions.create(options);
+      const choice = response.choices[0];
+      let text = choice.message.content;
+      if (this.#model.response_wrapped && text) {
+        try {
+          const parsed = JSON.parse(text);
+          if (parsed && typeof parsed === 'object' && 'value' in parsed) {
+            text = JSON.stringify(parsed.value);
+          }
+        } catch {
+          // Ignore parsing error, return raw text
+        }
+      }
+      const usage = response.usage?.prompt_tokens || 0;
+      return { text, usage };
+    } catch (error) {
+      console.error('OpenAI Generate Content Error:', error);
+      throw error;
+    }
+  }
+  async generateContentStream(contents) {
+    const { messages } = this.#convertContentsToInput(
+      contents,
+      this.#model.systemInstruction
+    );
+    const { hasAudio } = this.#validateContent(messages);
+    const model = this.#routeModel(hasAudio);
+    if (
+      model === `${this.modelName}-audio-preview` &&
+      this.#model.response_format
+    ) {
+      throw new DOMException(
+        `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
+        'NotSupportedError'
+      );
+    }
+    const options = {
+      model: model,
+      messages: messages,
+      stream: true,
+    };
+    if (this.#model.temperature > 0) {
+      options.temperature = this.#model.temperature;
+    }
+    if (this.#model.response_format) {
+      options.response_format = this.#model.response_format;
+    }
+    try {
+      const stream = await this.openai.chat.completions.create(options);
+      // Convert OpenAI stream to an AsyncIterable that yields chunks
+      return (async function* () {
+        let firstChunk = true;
+        for await (const chunk of stream) {
+          let text = chunk.choices[0]?.delta?.content;
+          if (text) {
+            // Note: Unwrapping a wrapped object in a stream is complex.
+            // For now, streaming wrapped results will yield the full JSON including the wrapper.
+            yield {
+              text: () => text,
+              usageMetadata: { totalTokenCount: 0 },
+            };
+          }
+        }
+      })();
+    } catch (error) {
+      console.error('OpenAI Generate Content Stream Error:', error);
+      throw error;
+    }
+  }
+  async countTokens(contents) {
+    // OpenAI does not provide a public API endpoint for counting tokens before generation.
+    // Implementing countTokens strictly requires a tokenizer like `tiktoken`.
+    // For this initial implementation, we use a character-based approximation (e.g., text.length / 4)
+    // to avoid adding heavy WASM dependencies (`tiktoken`) to the polyfill.
+    let totalText = '';
+    if (this.#model && this.#model.systemInstruction) {
+      totalText += this.#model.systemInstruction;
+    }
+    if (Array.isArray(contents)) {
+      for (const content of contents) {
+        if (!content.parts) {
+          continue;
+        }
+        for (const part of content.parts) {
+          if (part.text) {
+            totalText += part.text;
+          } else if (part.inlineData) {
+            // Approximate image token cost (e.g., ~1000 chars worth)
+            totalText += ' '.repeat(1000);
+          }
+        }
+      }
+    }
+    return Math.ceil(totalText.length / 4);
+  }
+  #convertContentsToInput(contents, systemInstruction) {
+    const messages = [];
+    // System instructions
+    if (systemInstruction) {
+      messages.push({
+        role: 'system',
+        content: systemInstruction,
+      });
+    }
+    for (const content of contents) {
+      const role = content.role === 'model' ? 'assistant' : 'user';
+      const contentParts = [];
+      for (const part of content.parts) {
+        if (part.text) {
+          contentParts.push({ type: 'text', text: part.text });
+        } else if (part.inlineData) {
+          const { data, mimeType } = part.inlineData;
+          if (mimeType.startsWith('image/')) {
+            contentParts.push({
+              type: 'image_url',
+              image_url: { url: `data:${mimeType};base64,${data}` },
+            });
+          } else if (mimeType.startsWith('audio/')) {
+            contentParts.push({
+              type: 'input_audio',
+              input_audio: {
+                data: data,
+                format: mimeType.split('/')[1] === 'mpeg' ? 'mp3' : 'wav',
+              },
+            });
+          }
+        }
+      }
+      // Simplification: if only one text part, just send string content for better compatibility
+      // but multimodal models usually prefer the array format.
+      // We'll keep the array format for consistency with multimodal inputs.
+      messages.push({ role, content: contentParts });
+    }
+    return { messages };
+  }
+}

package/backends/transformers.js ADDED Viewed

@@ -0,0 +1,106 @@
+import { pipeline, TextStreamer } from 'https://esm.run/@huggingface/transformers';
+import PolyfillBackend from './base.js';
+import { DEFAULT_MODELS } from './defaults.js';
+/**
+ * Transformers.js (ONNX Runtime) Backend
+ */
+export default class TransformersBackend extends PolyfillBackend {
+  #generator;
+  #tokenizer;
+  constructor(config) {
+    super(config.modelName || DEFAULT_MODELS.transformers);
+  }
+  async #ensureGenerator() {
+    if (!this.#generator) {
+      console.log(`[Transformers.js] Loading model: ${this.modelName}`);
+      this.#generator = await pipeline('text-generation', this.modelName, {
+        device: 'webgpu',
+      });
+      this.#tokenizer = this.#generator.tokenizer;
+    }
+    return this.#generator;
+  }
+  async createSession(options, inCloudParams) {
+    // Initializing the generator can be slow, so we do it lazily or here.
+    // For now, let's trigger the loading.
+    await this.#ensureGenerator();
+    // We don't really have "sessions" in the same way Gemini does,
+    // but we can store the generation config.
+    this.generationConfig = {
+      max_new_tokens: 512, // Default limit
+      temperature: inCloudParams.generationConfig?.temperature || 1.0,
+      top_p: 1.0,
+      do_sample: inCloudParams.generationConfig?.temperature > 0,
+    };
+    return this.#generator;
+  }
+  async generateContent(contents) {
+    const generator = await this.#ensureGenerator();
+    const prompt = this.#convertContentsToPrompt(contents);
+    const output = await generator(prompt, this.generationConfig);
+    const text = output[0].generated_text.slice(prompt.length);
+    // Approximate usage
+    const usage = await this.countTokens(contents);
+    return { text, usage };
+  }
+  async generateContentStream(contents) {
+    const generator = await this.#ensureGenerator();
+    const prompt = this.#convertContentsToPrompt(contents);
+    const streamer = new TextStreamer(this.#tokenizer, {
+      skip_prompt: true,
+      skip_special_tokens: true,
+    });
+    // Run generation in the background (don't await)
+    generator(prompt, {
+      ...this.generationConfig,
+      streamer,
+    });
+    // streamer is an AsyncIterable in Transformers.js v3
+    return (async function* () {
+      for await (const newText of streamer) {
+        yield {
+          text: () => newText,
+          usageMetadata: { totalTokenCount: 0 },
+        };
+      }
+    })();
+  }
+  async countTokens(contents) {
+    await this.#ensureGenerator();
+    const text = this.#convertContentsToPrompt(contents);
+    const { input_ids } = await this.#tokenizer(text);
+    return input_ids.size;
+  }
+  #convertContentsToPrompt(contents) {
+    // Simple ChatML-like format for Qwen/Llama
+    let prompt = '';
+    for (const content of contents) {
+      const role = content.role === 'model' ? 'assistant' : 'user';
+      prompt += `<|im_start|>${role}\n`;
+      for (const part of content.parts) {
+        if (part.text) {
+          prompt += part.text;
+        }
+      }
+      prompt += '<|im_end|>\n';
+    }
+    prompt += '<|im_start|>assistant\n';
+    return prompt;
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "prompt-api-polyfill",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, or OpenAI API.",
   "type": "module",
   "main": "./prompt-api-polyfill.js",
@@ -14,7 +14,8 @@
     "json-schema-converter.js",
     "multimodal-converter.js",
     "prompt-api-polyfill.js",
-    "dot_env.json"
+    "dot_env.json",
+    "backends/"
   ],
   "sideEffects": true,
   "keywords": [

package/prompt-api-polyfill.js CHANGED Viewed

@@ -120,6 +120,10 @@ export class LanguageModel extends EventTarget {
       config: 'OPENAI_CONFIG',
       path: './backends/openai.js',
     },
+    {
+      config: 'TRANSFORMERS_CONFIG',
+      path: './backends/transformers.js',
+    },
   ];
   static #getBackendInfo() {