prompt-api-polyfill 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dot_env.json CHANGED
@@ -3,6 +3,9 @@
3
3
  "projectId": "",
4
4
  "appId": "",
5
5
  "modelName": "",
6
+ "useAppCheck": false,
7
+ "reCaptchaSiteKey": "",
8
+ "useLimitedUseAppCheckTokens": true,
6
9
  "device": "webgpu",
7
10
  "dtype": "q4f16"
8
11
  }
package/package.json CHANGED
@@ -1,21 +1,17 @@
1
1
  {
2
2
  "name": "prompt-api-polyfill",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, OpenAI API, or Transformers.js.",
5
5
  "type": "module",
6
- "main": "./prompt-api-polyfill.js",
7
- "module": "./prompt-api-polyfill.js",
8
- "browser": "./prompt-api-polyfill.js",
6
+ "main": "./dist/prompt-api-polyfill.js",
7
+ "module": "./dist/prompt-api-polyfill.js",
8
+ "browser": "./dist/prompt-api-polyfill.js",
9
9
  "exports": {
10
- ".": "./prompt-api-polyfill.js"
10
+ ".": "./dist/prompt-api-polyfill.js"
11
11
  },
12
12
  "files": [
13
- "async-iterator-polyfill.js",
14
- "json-schema-converter.js",
15
- "multimodal-converter.js",
16
- "prompt-api-polyfill.js",
17
- "dot_env.json",
18
- "backends/"
13
+ "dist/",
14
+ "dot_env.json"
19
15
  ],
20
16
  "sideEffects": true,
21
17
  "keywords": [
@@ -36,17 +32,26 @@
36
32
  "bugs": {
37
33
  "url": "https://github.com/GoogleChromeLabs/web-ai-demos/issues"
38
34
  },
39
- "homepage": "https://github.com/GoogleChromeLabs/web-ai-demos/tree/main/prompt-api-polyfill/README.md",
35
+ "homepage": "https://github.com/GoogleChromeLabs/web-ai-demos/tree/main/prompt-api-polyfill",
40
36
  "license": "Apache-2.0",
41
37
  "scripts": {
42
- "start": "npx http-server",
38
+ "start": "vite",
39
+ "build": "vite build",
40
+ "prepublishOnly": "npm run build",
43
41
  "sync:wpt": "node scripts/sync-wpt.js",
44
42
  "generate:wpt": "node scripts/list-backends.js && node scripts/generate-wpt-wrappers.js",
45
- "test:wpt": "npm run sync:wpt && npm run generate:wpt && npx http-server -o /tests/wpt/index.html",
43
+ "test:wpt": "npm run sync:wpt && npm run generate:wpt && npx vite --open /tests/wpt/index.html",
46
44
  "fix": "npx prettier --write ."
47
45
  },
48
46
  "devDependencies": {
49
- "http-server": "^14.1.1",
50
- "prettier-plugin-curly": "^0.4.1"
47
+ "prettier": "^3.8.1",
48
+ "prettier-plugin-curly": "^0.4.1",
49
+ "vite": "^7.3.1"
50
+ },
51
+ "dependencies": {
52
+ "@google/genai": "^1.41.0",
53
+ "@huggingface/transformers": "^3.8.1",
54
+ "firebase": "^12.9.0",
55
+ "openai": "^6.22.0"
51
56
  }
52
57
  }
@@ -1,16 +0,0 @@
1
- if (!ReadableStream.prototype[Symbol.asyncIterator]) {
2
- ReadableStream.prototype[Symbol.asyncIterator] = async function* () {
3
- const reader = this.getReader();
4
- try {
5
- while (true) {
6
- const { done, value } = await reader.read();
7
- if (done) {
8
- return;
9
- }
10
- yield value;
11
- }
12
- } finally {
13
- reader.releaseLock();
14
- }
15
- };
16
- }
@@ -1,13 +0,0 @@
1
- /**
2
- * Default model versions for each backend.
3
- */
4
- export const DEFAULT_MODELS = {
5
- firebase: { modelName: 'gemini-2.5-flash-lite' },
6
- gemini: { modelName: 'gemini-2.0-flash-lite-preview-02-05' },
7
- openai: { modelName: 'gpt-4o' },
8
- transformers: {
9
- modelName: 'onnx-community/gemma-3-1b-it-ONNX-GQA',
10
- device: 'webgpu',
11
- dtype: 'q4f16',
12
- },
13
- };
@@ -1,49 +0,0 @@
1
- import { initializeApp } from 'https://esm.run/firebase/app';
2
- import {
3
- getAI,
4
- getGenerativeModel,
5
- GoogleAIBackend,
6
- InferenceMode,
7
- } from 'https://esm.run/firebase/ai';
8
- import PolyfillBackend from './base.js';
9
- import { DEFAULT_MODELS } from './defaults.js';
10
-
11
- /**
12
- * Firebase AI Logic Backend
13
- */
14
- export default class FirebaseBackend extends PolyfillBackend {
15
- #model;
16
- #sessionParams;
17
-
18
- constructor(config) {
19
- super(config.modelName || DEFAULT_MODELS.firebase.modelName);
20
- this.ai = getAI(initializeApp(config), { backend: new GoogleAIBackend() });
21
- }
22
-
23
- createSession(_options, sessionParams) {
24
- this.#sessionParams = sessionParams;
25
- this.#model = getGenerativeModel(this.ai, {
26
- mode: InferenceMode.ONLY_IN_CLOUD,
27
- inCloudParams: sessionParams,
28
- });
29
- return this.#model;
30
- }
31
-
32
- async generateContent(contents) {
33
- const result = await this.#model.generateContent({ contents });
34
- const usage = result.response.usageMetadata?.promptTokenCount || 0;
35
- return { text: result.response.text(), usage };
36
- }
37
-
38
- async generateContentStream(contents) {
39
- const result = await this.#model.generateContentStream({ contents });
40
- return result.stream;
41
- }
42
-
43
- async countTokens(contents) {
44
- const { totalTokens } = await this.#model.countTokens({
45
- contents,
46
- });
47
- return totalTokens;
48
- }
49
- }
@@ -1,52 +0,0 @@
1
- import { GoogleGenerativeAI } from 'https://esm.run/@google/generative-ai';
2
- import PolyfillBackend from './base.js';
3
- import { DEFAULT_MODELS } from './defaults.js';
4
-
5
- /**
6
- * Google Gemini API Backend
7
- */
8
- export default class GeminiBackend extends PolyfillBackend {
9
- #model;
10
- #sessionParams;
11
-
12
- constructor(config) {
13
- super(config.modelName || DEFAULT_MODELS.gemini.modelName);
14
- this.genAI = new GoogleGenerativeAI(config.apiKey);
15
- }
16
-
17
- createSession(options, sessionParams) {
18
- this.#sessionParams = sessionParams;
19
- const modelParams = {
20
- model: options.modelName || this.modelName,
21
- generationConfig: sessionParams.generationConfig,
22
- systemInstruction: sessionParams.systemInstruction,
23
- };
24
- // Clean undefined systemInstruction
25
- if (!modelParams.systemInstruction) {
26
- delete modelParams.systemInstruction;
27
- }
28
-
29
- this.#model = this.genAI.getGenerativeModel(modelParams);
30
- return this.#model;
31
- }
32
-
33
- async generateContent(contents) {
34
- // Gemini SDK expects { role, parts: [...] } which matches our internal structure
35
- const result = await this.#model.generateContent({ contents });
36
- const response = await result.response;
37
- const usage = response.usageMetadata?.promptTokenCount || 0;
38
- return { text: response.text(), usage };
39
- }
40
-
41
- async generateContentStream(contents) {
42
- const result = await this.#model.generateContentStream({ contents });
43
- return result.stream;
44
- }
45
-
46
- async countTokens(contents) {
47
- const { totalTokens } = await this.#model.countTokens({
48
- contents,
49
- });
50
- return totalTokens;
51
- }
52
- }
@@ -1,337 +0,0 @@
1
- import OpenAI from 'https://esm.run/openai';
2
- import PolyfillBackend from './base.js';
3
- import { DEFAULT_MODELS } from './defaults.js';
4
-
5
- /**
6
- * OpenAI API Backend
7
- */
8
- export default class OpenAIBackend extends PolyfillBackend {
9
- #model;
10
-
11
- constructor(config) {
12
- super(config.modelName || DEFAULT_MODELS.openai.modelName);
13
- this.config = config;
14
- this.openai = new OpenAI({
15
- apiKey: config.apiKey,
16
- dangerouslyAllowBrowser: true, // Required for client-side usage
17
- });
18
- }
19
-
20
- static availability(options = {}) {
21
- if (options.expectedInputs) {
22
- const hasAudio = options.expectedInputs.some(
23
- (input) => input.type === 'audio'
24
- );
25
- const hasImage = options.expectedInputs.some(
26
- (input) => input.type === 'image'
27
- );
28
- if (hasAudio && hasImage) {
29
- return 'unavailable';
30
- }
31
- }
32
- return 'available';
33
- }
34
-
35
- createSession(options, sessionParams) {
36
- // OpenAI doesn't have a "session" object like Gemini, so we return a context object
37
- // tailored for our generate methods.
38
- this.#model = {
39
- model: options.modelName || this.modelName,
40
- temperature: sessionParams.generationConfig?.temperature,
41
- top_p: 1.0, // Default to 1.0 as topK is not directly supported the same way
42
- systemInstruction: sessionParams.systemInstruction,
43
- };
44
-
45
- const config = sessionParams.generationConfig || {};
46
- if (config.responseSchema) {
47
- const { schema, wrapped } = this.#fixSchemaForOpenAI(
48
- config.responseSchema
49
- );
50
- this.#model.response_format = {
51
- type: 'json_schema',
52
- json_schema: {
53
- name: 'response',
54
- strict: true,
55
- schema: schema,
56
- },
57
- };
58
- this.#model.response_wrapped = wrapped;
59
- } else if (config.responseMimeType === 'application/json') {
60
- this.#model.response_format = { type: 'json_object' };
61
- }
62
-
63
- return this.#model;
64
- }
65
-
66
- /**
67
- * OpenAI Structured Outputs require:
68
- * 1. All fields in objects to be marked as 'required'.
69
- * 2. Objects to have 'additionalProperties: false'.
70
- * 3. The root must be an 'object'.
71
- */
72
- #fixSchemaForOpenAI(schema) {
73
- if (typeof schema !== 'object' || schema === null) {
74
- return { schema, wrapped: false };
75
- }
76
-
77
- const processNode = (node) => {
78
- if (node.type === 'object') {
79
- if (node.properties) {
80
- node.additionalProperties = false;
81
- node.required = Object.keys(node.properties);
82
- for (const key in node.properties) {
83
- processNode(node.properties[key]);
84
- }
85
- } else {
86
- node.additionalProperties = false;
87
- node.required = [];
88
- }
89
- } else if (node.type === 'array' && node.items) {
90
- processNode(node.items);
91
- }
92
- return node;
93
- };
94
-
95
- // Deep clone to avoid side effects
96
- const cloned = JSON.parse(JSON.stringify(schema));
97
-
98
- if (cloned.type !== 'object') {
99
- // Wrap in object as OpenAI requires object root
100
- return {
101
- wrapped: true,
102
- schema: {
103
- type: 'object',
104
- properties: { value: cloned },
105
- required: ['value'],
106
- additionalProperties: false,
107
- },
108
- };
109
- }
110
-
111
- return {
112
- wrapped: false,
113
- schema: processNode(cloned),
114
- };
115
- }
116
-
117
- #validateContent(messages) {
118
- let hasImage = false;
119
- let hasAudio = false;
120
-
121
- for (const msg of messages) {
122
- if (Array.isArray(msg.content)) {
123
- for (const part of msg.content) {
124
- if (part.type === 'image_url') {
125
- hasImage = true;
126
- }
127
- if (part.type === 'input_audio') {
128
- hasAudio = true;
129
- }
130
- }
131
- }
132
- }
133
-
134
- if (hasImage && hasAudio) {
135
- throw new Error(
136
- 'OpenAI backend does not support mixing images and audio in the same session. Please start a new session.'
137
- );
138
- }
139
-
140
- return { hasImage, hasAudio };
141
- }
142
-
143
- #routeModel(hasAudio) {
144
- // If the user explicitly provided a model in the session options, respect it.
145
- // Otherwise, pick based on content.
146
- if (this.#model.model !== this.modelName) {
147
- return this.#model.model;
148
- }
149
-
150
- return hasAudio ? `${this.modelName}-audio-preview` : this.modelName;
151
- }
152
-
153
- async generateContent(contents) {
154
- const { messages } = this.#convertContentsToInput(
155
- contents,
156
- this.#model.systemInstruction
157
- );
158
- const { hasAudio } = this.#validateContent(messages);
159
- const model = this.#routeModel(hasAudio);
160
-
161
- if (
162
- model === `${this.modelName}-audio-preview` &&
163
- this.#model.response_format
164
- ) {
165
- throw new DOMException(
166
- `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
167
- 'NotSupportedError'
168
- );
169
- }
170
-
171
- const options = {
172
- model: model,
173
- messages: messages,
174
- };
175
-
176
- if (this.#model.temperature > 0) {
177
- options.temperature = this.#model.temperature;
178
- }
179
-
180
- if (this.#model.response_format) {
181
- options.response_format = this.#model.response_format;
182
- }
183
-
184
- try {
185
- const response = await this.openai.chat.completions.create(options);
186
-
187
- const choice = response.choices[0];
188
- let text = choice.message.content;
189
-
190
- if (this.#model.response_wrapped && text) {
191
- try {
192
- const parsed = JSON.parse(text);
193
- if (parsed && typeof parsed === 'object' && 'value' in parsed) {
194
- text = JSON.stringify(parsed.value);
195
- }
196
- } catch {
197
- // Ignore parsing error, return raw text
198
- }
199
- }
200
-
201
- const usage = response.usage?.prompt_tokens || 0;
202
-
203
- return { text, usage };
204
- } catch (error) {
205
- console.error('OpenAI Generate Content Error:', error);
206
- throw error;
207
- }
208
- }
209
-
210
- async generateContentStream(contents) {
211
- const { messages } = this.#convertContentsToInput(
212
- contents,
213
- this.#model.systemInstruction
214
- );
215
- const { hasAudio } = this.#validateContent(messages);
216
- const model = this.#routeModel(hasAudio);
217
-
218
- if (
219
- model === `${this.modelName}-audio-preview` &&
220
- this.#model.response_format
221
- ) {
222
- throw new DOMException(
223
- `OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
224
- 'NotSupportedError'
225
- );
226
- }
227
-
228
- const options = {
229
- model: model,
230
- messages: messages,
231
- stream: true,
232
- };
233
-
234
- if (this.#model.temperature > 0) {
235
- options.temperature = this.#model.temperature;
236
- }
237
-
238
- if (this.#model.response_format) {
239
- options.response_format = this.#model.response_format;
240
- }
241
-
242
- try {
243
- const stream = await this.openai.chat.completions.create(options);
244
-
245
- // Convert OpenAI stream to an AsyncIterable that yields chunks
246
- return (async function* () {
247
- let firstChunk = true;
248
- for await (const chunk of stream) {
249
- let text = chunk.choices[0]?.delta?.content;
250
- if (text) {
251
- // Note: Unwrapping a wrapped object in a stream is complex.
252
- // For now, streaming wrapped results will yield the full JSON including the wrapper.
253
- yield {
254
- text: () => text,
255
- usageMetadata: { totalTokenCount: 0 },
256
- };
257
- }
258
- }
259
- })();
260
- } catch (error) {
261
- console.error('OpenAI Generate Content Stream Error:', error);
262
- throw error;
263
- }
264
- }
265
-
266
- async countTokens(contents) {
267
- // OpenAI does not provide a public API endpoint for counting tokens before generation.
268
- // Implementing countTokens strictly requires a tokenizer like `tiktoken`.
269
- // For this initial implementation, we use a character-based approximation (e.g., text.length / 4)
270
- // to avoid adding heavy WASM dependencies (`tiktoken`) to the polyfill.
271
- let totalText = '';
272
-
273
- if (Array.isArray(contents)) {
274
- for (const content of contents) {
275
- if (!content.parts) {
276
- continue;
277
- }
278
- for (const part of content.parts) {
279
- if (part.text) {
280
- totalText += part.text;
281
- } else if (part.inlineData) {
282
- // Approximate image token cost (e.g., ~1000 chars worth)
283
- totalText += ' '.repeat(1000);
284
- }
285
- }
286
- }
287
- }
288
-
289
- return Math.ceil(totalText.length / 4);
290
- }
291
-
292
- #convertContentsToInput(contents, systemInstruction) {
293
- const messages = [];
294
-
295
- // System instructions
296
- if (systemInstruction) {
297
- messages.push({
298
- role: 'system',
299
- content: systemInstruction,
300
- });
301
- }
302
-
303
- for (const content of contents) {
304
- const role = content.role === 'model' ? 'assistant' : 'user';
305
- const contentParts = [];
306
-
307
- for (const part of content.parts) {
308
- if (part.text) {
309
- contentParts.push({ type: 'text', text: part.text });
310
- } else if (part.inlineData) {
311
- const { data, mimeType } = part.inlineData;
312
- if (mimeType.startsWith('image/')) {
313
- contentParts.push({
314
- type: 'image_url',
315
- image_url: { url: `data:${mimeType};base64,${data}` },
316
- });
317
- } else if (mimeType.startsWith('audio/')) {
318
- contentParts.push({
319
- type: 'input_audio',
320
- input_audio: {
321
- data: data,
322
- format: mimeType.split('/')[1] === 'mpeg' ? 'mp3' : 'wav',
323
- },
324
- });
325
- }
326
- }
327
- }
328
-
329
- // Simplification: if only one text part, just send string content for better compatibility
330
- // but multimodal models usually prefer the array format.
331
- // We'll keep the array format for consistency with multimodal inputs.
332
- messages.push({ role, content: contentParts });
333
- }
334
-
335
- return { messages };
336
- }
337
- }