prompt-api-polyfill 0.4.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +175 -15
- package/backends/base.js +3 -2
- package/backends/defaults.js +8 -3
- package/backends/firebase.js +8 -4
- package/backends/gemini.js +9 -5
- package/backends/openai.js +5 -8
- package/backends/transformers.js +451 -0
- package/dot_env.json +3 -1
- package/package.json +3 -2
- package/prompt-api-polyfill.js +188 -94
package/README.md
CHANGED
|
@@ -4,9 +4,10 @@ This package provides a browser polyfill for the
|
|
|
4
4
|
[Prompt API `LanguageModel`](https://github.com/webmachinelearning/prompt-api),
|
|
5
5
|
supporting dynamic backends:
|
|
6
6
|
|
|
7
|
-
- **Firebase AI Logic**
|
|
8
|
-
- **Google Gemini API**
|
|
9
|
-
- **OpenAI API**
|
|
7
|
+
- **Firebase AI Logic** (cloud)
|
|
8
|
+
- **Google Gemini API** (cloud)
|
|
9
|
+
- **OpenAI API** (cloud)
|
|
10
|
+
- **Transformers.js** (local after initial model download)
|
|
10
11
|
|
|
11
12
|
When loaded in the browser, it defines a global:
|
|
12
13
|
|
|
@@ -19,27 +20,34 @@ natively available.
|
|
|
19
20
|
|
|
20
21
|
## Supported Backends
|
|
21
22
|
|
|
22
|
-
### Firebase AI Logic
|
|
23
|
+
### Firebase AI Logic (cloud)
|
|
23
24
|
|
|
24
25
|
- **Uses**: `firebase/ai` SDK.
|
|
25
26
|
- **Select by setting**: `window.FIREBASE_CONFIG`.
|
|
26
27
|
- **Model**: Uses default if not specified (see
|
|
27
28
|
[`backends/defaults.js`](backends/defaults.js)).
|
|
28
29
|
|
|
29
|
-
### Google Gemini API
|
|
30
|
+
### Google Gemini API (cloud)
|
|
30
31
|
|
|
31
32
|
- **Uses**: `@google/generative-ai` SDK.
|
|
32
33
|
- **Select by setting**: `window.GEMINI_CONFIG`.
|
|
33
34
|
- **Model**: Uses default if not specified (see
|
|
34
35
|
[`backends/defaults.js`](backends/defaults.js)).
|
|
35
36
|
|
|
36
|
-
### OpenAI API
|
|
37
|
+
### OpenAI API (cloud)
|
|
37
38
|
|
|
38
39
|
- **Uses**: `openai` SDK.
|
|
39
40
|
- **Select by setting**: `window.OPENAI_CONFIG`.
|
|
40
41
|
- **Model**: Uses default if not specified (see
|
|
41
42
|
[`backends/defaults.js`](backends/defaults.js)).
|
|
42
43
|
|
|
44
|
+
### Transformers.js (local after initial model download)
|
|
45
|
+
|
|
46
|
+
- **Uses**: `@huggingface/transformers` SDK.
|
|
47
|
+
- **Select by setting**: `window.TRANSFORMERS_CONFIG`.
|
|
48
|
+
- **Model**: Uses default if not specified (see
|
|
49
|
+
[`backends/defaults.js`](backends/defaults.js)).
|
|
50
|
+
|
|
43
51
|
---
|
|
44
52
|
|
|
45
53
|
## Installation
|
|
@@ -52,7 +60,7 @@ npm install prompt-api-polyfill
|
|
|
52
60
|
|
|
53
61
|
## Quick start
|
|
54
62
|
|
|
55
|
-
### Backed by Firebase
|
|
63
|
+
### Backed by Firebase AI Logic (cloud)
|
|
56
64
|
|
|
57
65
|
1. **Create a Firebase project with Generative AI enabled**.
|
|
58
66
|
2. **Provide your Firebase config** on `window.FIREBASE_CONFIG`.
|
|
@@ -73,7 +81,7 @@ npm install prompt-api-polyfill
|
|
|
73
81
|
</script>
|
|
74
82
|
```
|
|
75
83
|
|
|
76
|
-
### Backed by Gemini API
|
|
84
|
+
### Backed by Gemini API (cloud)
|
|
77
85
|
|
|
78
86
|
1. **Get a Gemini API Key** from
|
|
79
87
|
[Google AI Studio](https://aistudio.google.com/).
|
|
@@ -94,7 +102,7 @@ npm install prompt-api-polyfill
|
|
|
94
102
|
</script>
|
|
95
103
|
```
|
|
96
104
|
|
|
97
|
-
### Backed by OpenAI API
|
|
105
|
+
### Backed by OpenAI API (cloud)
|
|
98
106
|
|
|
99
107
|
1. **Get an OpenAI API Key** from the
|
|
100
108
|
[OpenAI Platform](https://platform.openai.com/).
|
|
@@ -115,6 +123,29 @@ npm install prompt-api-polyfill
|
|
|
115
123
|
</script>
|
|
116
124
|
```
|
|
117
125
|
|
|
126
|
+
### Backed by Transformers.js (local after initial model download)
|
|
127
|
+
|
|
128
|
+
1. **Only a dummy API Key required** (runs locally in the browser).
|
|
129
|
+
2. **Provide configuration** on `window.TRANSFORMERS_CONFIG`.
|
|
130
|
+
3. **Import the polyfill**.
|
|
131
|
+
|
|
132
|
+
```html
|
|
133
|
+
<script type="module">
|
|
134
|
+
// Set TRANSFORMERS_CONFIG to select the Transformers.js backend
|
|
135
|
+
window.TRANSFORMERS_CONFIG = {
|
|
136
|
+
apiKey: 'dummy', // Required for now by the loader
|
|
137
|
+
device: 'webgpu', // 'webgpu' or 'cpu'
|
|
138
|
+
dtype: 'q4f16', // Quantization level
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
if (!('LanguageModel' in window)) {
|
|
142
|
+
await import('prompt-api-polyfill');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const session = await LanguageModel.create();
|
|
146
|
+
</script>
|
|
147
|
+
```
|
|
148
|
+
|
|
118
149
|
---
|
|
119
150
|
|
|
120
151
|
## Configuration
|
|
@@ -175,13 +206,17 @@ This repo ships with a template file:
|
|
|
175
206
|
```jsonc
|
|
176
207
|
// dot_env.json
|
|
177
208
|
{
|
|
178
|
-
// For Firebase:
|
|
209
|
+
// For Firebase AI Logic:
|
|
179
210
|
"projectId": "",
|
|
180
211
|
"appId": "",
|
|
181
212
|
"modelName": "",
|
|
182
213
|
|
|
183
|
-
// For Firebase OR Gemini OR OpenAI:
|
|
214
|
+
// For Firebase AI Logic OR Gemini OR OpenAI OR Transformers.js:
|
|
184
215
|
"apiKey": "",
|
|
216
|
+
|
|
217
|
+
// For Transformers.js:
|
|
218
|
+
"device": "webgpu",
|
|
219
|
+
"dtype": "q4f16",
|
|
185
220
|
}
|
|
186
221
|
```
|
|
187
222
|
|
|
@@ -198,7 +233,7 @@ cp dot_env.json .env.json
|
|
|
198
233
|
|
|
199
234
|
Then open `.env.json` and fill in the values.
|
|
200
235
|
|
|
201
|
-
**For Firebase:**
|
|
236
|
+
**For Firebase AI Logic:**
|
|
202
237
|
|
|
203
238
|
```json
|
|
204
239
|
{
|
|
@@ -227,13 +262,28 @@ Then open `.env.json` and fill in the values.
|
|
|
227
262
|
}
|
|
228
263
|
```
|
|
229
264
|
|
|
265
|
+
**For Transformers.js:**
|
|
266
|
+
|
|
267
|
+
```json
|
|
268
|
+
{
|
|
269
|
+
"apiKey": "dummy",
|
|
270
|
+
"modelName": "onnx-community/gemma-3-1b-it-ONNX-GQA",
|
|
271
|
+
"device": "webgpu",
|
|
272
|
+
"dtype": "q4f16"
|
|
273
|
+
}
|
|
274
|
+
```
|
|
275
|
+
|
|
230
276
|
### Field-by-field explanation
|
|
231
277
|
|
|
232
278
|
- `apiKey`:
|
|
233
|
-
- **Firebase**: Your Firebase Web API key.
|
|
279
|
+
- **Firebase AI Logic**: Your Firebase Web API key.
|
|
234
280
|
- **Gemini**: Your Gemini API Key.
|
|
235
281
|
- **OpenAI**: Your OpenAI API Key.
|
|
236
|
-
-
|
|
282
|
+
- **Transformers.js**: Use `"dummy"`.
|
|
283
|
+
- `projectId` / `appId`: **Firebase AI Logic only**.
|
|
284
|
+
|
|
285
|
+
- `device`: **Transformers.js only**. Either `"webgpu"` or `"cpu"`.
|
|
286
|
+
- `dtype`: **Transformers.js only**. Quantization level (e.g., `"q4f16"`).
|
|
237
287
|
|
|
238
288
|
- `modelName` (optional): The model ID to use. If not provided, the polyfill
|
|
239
289
|
uses the defaults defined in [`backends/defaults.js`](backends/defaults.js).
|
|
@@ -245,7 +295,8 @@ Then open `.env.json` and fill in the values.
|
|
|
245
295
|
### Wiring the config into the polyfill
|
|
246
296
|
|
|
247
297
|
Once `.env.json` is filled out, you can import it and expose it to the polyfill.
|
|
248
|
-
See the [Quick start](#quick-start) examples above.
|
|
298
|
+
See the [Quick start](#quick-start) examples above. For Transformers.js, ensure
|
|
299
|
+
you set `window.TRANSFORMERS_CONFIG`.
|
|
249
300
|
|
|
250
301
|
---
|
|
251
302
|
|
|
@@ -300,6 +351,115 @@ To see the browser and DevTools while testing, you can modify
|
|
|
300
351
|
|
|
301
352
|
---
|
|
302
353
|
|
|
354
|
+
## Create your own backend provider
|
|
355
|
+
|
|
356
|
+
If you want to add your own backend provider, these are the steps to follow.
|
|
357
|
+
|
|
358
|
+
### Extend the base backend class
|
|
359
|
+
|
|
360
|
+
Create a new file in the `backends/` directory, for example,
|
|
361
|
+
`backends/custom.js`. You need to extend the `PolyfillBackend` class and
|
|
362
|
+
implement the core methods that satisfy the expected interface.
|
|
363
|
+
|
|
364
|
+
```js
|
|
365
|
+
import PolyfillBackend from './base.js';
|
|
366
|
+
import { DEFAULT_MODELS } from './defaults.js';
|
|
367
|
+
|
|
368
|
+
export default class CustomBackend extends PolyfillBackend {
|
|
369
|
+
constructor(config) {
|
|
370
|
+
// config typically comes from a window global (e.g., window.CUSTOM_CONFIG)
|
|
371
|
+
super(config.modelName || DEFAULT_MODELS.custom.modelName);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Check if the backend is configured (e.g., API key is present), if given
|
|
375
|
+
// combinations of modelName and options are supported, or, for local model,
|
|
376
|
+
// if the model is available.
|
|
377
|
+
static availability(options) {
|
|
378
|
+
return window.CUSTOM_CONFIG?.apiKey ? 'available' : 'unavailable';
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Initialize the underlying SDK or API client. With local models, use
|
|
382
|
+
// monitorTarget to report model download progress to the polyfill.
|
|
383
|
+
createSession(options, sessionParams, monitorTarget) {
|
|
384
|
+
// Return the initialized session or client instance
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Non-streaming prompt execution
|
|
388
|
+
async generateContent(contents) {
|
|
389
|
+
// contents: Array of { role: 'user'|'model', parts: [{ text: string }] }
|
|
390
|
+
// Return: { text: string, usage: number }
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Streaming prompt execution
|
|
394
|
+
async generateContentStream(contents) {
|
|
395
|
+
// Return: AsyncIterable yielding chunks
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Token counting for quota/usage tracking
|
|
399
|
+
async countTokens(contents) {
|
|
400
|
+
// Return: total token count (number)
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
### Register your backend
|
|
406
|
+
|
|
407
|
+
The polyfill uses a "First-Match Priority" strategy based on global
|
|
408
|
+
configuration. You need to register your backend in the `prompt-api-polyfill.js`
|
|
409
|
+
file by adding it to the static `#backends` array:
|
|
410
|
+
|
|
411
|
+
```js
|
|
412
|
+
// prompt-api-polyfill.js
|
|
413
|
+
static #backends = [
|
|
414
|
+
// ... existing backends
|
|
415
|
+
{
|
|
416
|
+
config: 'CUSTOM_CONFIG', // The global object to look for on `window`
|
|
417
|
+
path: './backends/custom.js',
|
|
418
|
+
},
|
|
419
|
+
];
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
### Set a default model
|
|
423
|
+
|
|
424
|
+
Define the fallback model identity in `backends/defaults.js`. This is used when
|
|
425
|
+
a user initializes a session without specifying a specific `modelName`.
|
|
426
|
+
|
|
427
|
+
```js
|
|
428
|
+
// backends/defaults.js
|
|
429
|
+
export const DEFAULT_MODELS = {
|
|
430
|
+
// ...
|
|
431
|
+
custom: { modelName: 'custom-model-pro-v1' },
|
|
432
|
+
};
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Enable local development and testing
|
|
436
|
+
|
|
437
|
+
The project uses a discovery script (`scripts/list-backends.js`) to generate
|
|
438
|
+
test matrices. To include your new backend in the test runner, create a
|
|
439
|
+
`.env-[name].json` file (for example, `.env-custom.json`) in the root directory:
|
|
440
|
+
|
|
441
|
+
```json
|
|
442
|
+
{
|
|
443
|
+
"apiKey": "your-api-key-here",
|
|
444
|
+
"modelName": "custom-model-pro-v1"
|
|
445
|
+
}
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Verify via Web Platform Tests (WPT)
|
|
449
|
+
|
|
450
|
+
The final step is ensuring compliance. Because the polyfill is spec-driven, any
|
|
451
|
+
new backend should pass the official (or tentative) Web Platform Tests:
|
|
452
|
+
|
|
453
|
+
```bash
|
|
454
|
+
npm run test:wpt
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
This verification step ensures that your backend handles things like
|
|
458
|
+
`AbortSignal`, system prompts, and history formatting exactly as the Prompt API
|
|
459
|
+
specification expects.
|
|
460
|
+
|
|
461
|
+
---
|
|
462
|
+
|
|
303
463
|
## License
|
|
304
464
|
|
|
305
465
|
Apache 2.0
|
package/backends/base.js
CHANGED
|
@@ -23,10 +23,11 @@ export default class PolyfillBackend {
|
|
|
23
23
|
/**
|
|
24
24
|
* Creates a model session and stores it.
|
|
25
25
|
* @param {Object} options - LanguageModel options.
|
|
26
|
-
* @param {Object}
|
|
26
|
+
* @param {Object} sessionParams - Parameters for the cloud or local model.
|
|
27
|
+
* @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
|
|
27
28
|
* @returns {any} The created session object.
|
|
28
29
|
*/
|
|
29
|
-
createSession(options,
|
|
30
|
+
createSession(options, sessionParams, monitorTarget) {
|
|
30
31
|
throw new Error('Not implemented');
|
|
31
32
|
}
|
|
32
33
|
|
package/backends/defaults.js
CHANGED
|
@@ -2,7 +2,12 @@
|
|
|
2
2
|
* Default model versions for each backend.
|
|
3
3
|
*/
|
|
4
4
|
export const DEFAULT_MODELS = {
|
|
5
|
-
firebase: 'gemini-2.5-flash-lite',
|
|
6
|
-
gemini: 'gemini-2.0-flash-lite-preview-02-05',
|
|
7
|
-
openai: 'gpt-4o',
|
|
5
|
+
firebase: { modelName: 'gemini-2.5-flash-lite' },
|
|
6
|
+
gemini: { modelName: 'gemini-2.0-flash-lite-preview-02-05' },
|
|
7
|
+
openai: { modelName: 'gpt-4o' },
|
|
8
|
+
transformers: {
|
|
9
|
+
modelName: 'onnx-community/gemma-3-1b-it-ONNX-GQA',
|
|
10
|
+
device: 'webgpu',
|
|
11
|
+
dtype: 'q4f16',
|
|
12
|
+
},
|
|
8
13
|
};
|
package/backends/firebase.js
CHANGED
|
@@ -13,16 +13,18 @@ import { DEFAULT_MODELS } from './defaults.js';
|
|
|
13
13
|
*/
|
|
14
14
|
export default class FirebaseBackend extends PolyfillBackend {
|
|
15
15
|
#model;
|
|
16
|
+
#sessionParams;
|
|
16
17
|
|
|
17
18
|
constructor(config) {
|
|
18
|
-
super(config.modelName || DEFAULT_MODELS.firebase);
|
|
19
|
+
super(config.modelName || DEFAULT_MODELS.firebase.modelName);
|
|
19
20
|
this.ai = getAI(initializeApp(config), { backend: new GoogleAIBackend() });
|
|
20
21
|
}
|
|
21
22
|
|
|
22
|
-
createSession(_options,
|
|
23
|
+
createSession(_options, sessionParams) {
|
|
24
|
+
this.#sessionParams = sessionParams;
|
|
23
25
|
this.#model = getGenerativeModel(this.ai, {
|
|
24
26
|
mode: InferenceMode.ONLY_IN_CLOUD,
|
|
25
|
-
inCloudParams,
|
|
27
|
+
inCloudParams: sessionParams,
|
|
26
28
|
});
|
|
27
29
|
return this.#model;
|
|
28
30
|
}
|
|
@@ -39,7 +41,9 @@ export default class FirebaseBackend extends PolyfillBackend {
|
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
async countTokens(contents) {
|
|
42
|
-
const { totalTokens } = await this.#model.countTokens({
|
|
44
|
+
const { totalTokens } = await this.#model.countTokens({
|
|
45
|
+
contents,
|
|
46
|
+
});
|
|
43
47
|
return totalTokens;
|
|
44
48
|
}
|
|
45
49
|
}
|
package/backends/gemini.js
CHANGED
|
@@ -7,17 +7,19 @@ import { DEFAULT_MODELS } from './defaults.js';
|
|
|
7
7
|
*/
|
|
8
8
|
export default class GeminiBackend extends PolyfillBackend {
|
|
9
9
|
#model;
|
|
10
|
+
#sessionParams;
|
|
10
11
|
|
|
11
12
|
constructor(config) {
|
|
12
|
-
super(config.modelName || DEFAULT_MODELS.gemini);
|
|
13
|
+
super(config.modelName || DEFAULT_MODELS.gemini.modelName);
|
|
13
14
|
this.genAI = new GoogleGenerativeAI(config.apiKey);
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
createSession(options,
|
|
17
|
+
createSession(options, sessionParams) {
|
|
18
|
+
this.#sessionParams = sessionParams;
|
|
17
19
|
const modelParams = {
|
|
18
20
|
model: options.modelName || this.modelName,
|
|
19
|
-
generationConfig:
|
|
20
|
-
systemInstruction:
|
|
21
|
+
generationConfig: sessionParams.generationConfig,
|
|
22
|
+
systemInstruction: sessionParams.systemInstruction,
|
|
21
23
|
};
|
|
22
24
|
// Clean undefined systemInstruction
|
|
23
25
|
if (!modelParams.systemInstruction) {
|
|
@@ -42,7 +44,9 @@ export default class GeminiBackend extends PolyfillBackend {
|
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
async countTokens(contents) {
|
|
45
|
-
const { totalTokens } = await this.#model.countTokens({
|
|
47
|
+
const { totalTokens } = await this.#model.countTokens({
|
|
48
|
+
contents,
|
|
49
|
+
});
|
|
46
50
|
return totalTokens;
|
|
47
51
|
}
|
|
48
52
|
}
|
package/backends/openai.js
CHANGED
|
@@ -9,7 +9,7 @@ export default class OpenAIBackend extends PolyfillBackend {
|
|
|
9
9
|
#model;
|
|
10
10
|
|
|
11
11
|
constructor(config) {
|
|
12
|
-
super(config.modelName || DEFAULT_MODELS.openai);
|
|
12
|
+
super(config.modelName || DEFAULT_MODELS.openai.modelName);
|
|
13
13
|
this.config = config;
|
|
14
14
|
this.openai = new OpenAI({
|
|
15
15
|
apiKey: config.apiKey,
|
|
@@ -32,17 +32,17 @@ export default class OpenAIBackend extends PolyfillBackend {
|
|
|
32
32
|
return 'available';
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
createSession(options,
|
|
35
|
+
createSession(options, sessionParams) {
|
|
36
36
|
// OpenAI doesn't have a "session" object like Gemini, so we return a context object
|
|
37
37
|
// tailored for our generate methods.
|
|
38
38
|
this.#model = {
|
|
39
39
|
model: options.modelName || this.modelName,
|
|
40
|
-
temperature:
|
|
40
|
+
temperature: sessionParams.generationConfig?.temperature,
|
|
41
41
|
top_p: 1.0, // Default to 1.0 as topK is not directly supported the same way
|
|
42
|
-
systemInstruction:
|
|
42
|
+
systemInstruction: sessionParams.systemInstruction,
|
|
43
43
|
};
|
|
44
44
|
|
|
45
|
-
const config =
|
|
45
|
+
const config = sessionParams.generationConfig || {};
|
|
46
46
|
if (config.responseSchema) {
|
|
47
47
|
const { schema, wrapped } = this.#fixSchemaForOpenAI(
|
|
48
48
|
config.responseSchema
|
|
@@ -269,9 +269,6 @@ export default class OpenAIBackend extends PolyfillBackend {
|
|
|
269
269
|
// For this initial implementation, we use a character-based approximation (e.g., text.length / 4)
|
|
270
270
|
// to avoid adding heavy WASM dependencies (`tiktoken`) to the polyfill.
|
|
271
271
|
let totalText = '';
|
|
272
|
-
if (this.#model && this.#model.systemInstruction) {
|
|
273
|
-
totalText += this.#model.systemInstruction;
|
|
274
|
-
}
|
|
275
272
|
|
|
276
273
|
if (Array.isArray(contents)) {
|
|
277
274
|
for (const content of contents) {
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
import {
|
|
2
|
+
pipeline,
|
|
3
|
+
TextStreamer,
|
|
4
|
+
} from 'https://esm.run/@huggingface/transformers';
|
|
5
|
+
import PolyfillBackend from './base.js';
|
|
6
|
+
import { DEFAULT_MODELS } from './defaults.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Transformers.js (ONNX Runtime) Backend
|
|
10
|
+
*/
|
|
11
|
+
export default class TransformersBackend extends PolyfillBackend {
|
|
12
|
+
#generator;
|
|
13
|
+
#tokenizer;
|
|
14
|
+
#device;
|
|
15
|
+
#dtype;
|
|
16
|
+
#systemInstruction;
|
|
17
|
+
|
|
18
|
+
constructor(config = {}) {
|
|
19
|
+
super(config.modelName || DEFAULT_MODELS.transformers.modelName);
|
|
20
|
+
this.#device =
|
|
21
|
+
config.device || DEFAULT_MODELS.transformers.device || 'webgpu';
|
|
22
|
+
this.#dtype = config.dtype || DEFAULT_MODELS.transformers.dtype || 'q4f16';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Loaded models can be large, so we initialize them lazily.
|
|
27
|
+
* @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
|
|
28
|
+
* @returns {Promise<Object>} The generator.
|
|
29
|
+
*/
|
|
30
|
+
async #ensureGenerator(monitorTarget) {
|
|
31
|
+
if (!this.#generator) {
|
|
32
|
+
const files = new Map();
|
|
33
|
+
const modelFiles = await resolveModelFiles(this.modelName, {
|
|
34
|
+
dtype: this.#dtype,
|
|
35
|
+
});
|
|
36
|
+
for (const { path, size } of modelFiles) {
|
|
37
|
+
files.set(path, { loaded: 0, total: size });
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const dispatch = (loaded) => {
|
|
41
|
+
if (!monitorTarget) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
// Round to nearest 1/0x10000 (65536) as required by WPT
|
|
45
|
+
const precision = 1 / 65536;
|
|
46
|
+
const roundedLoaded = Math.floor(loaded / precision) * precision;
|
|
47
|
+
|
|
48
|
+
// Ensure strict monotonicity using the property set by the polyfill
|
|
49
|
+
if (roundedLoaded <= monitorTarget.__lastProgressLoaded) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
monitorTarget.dispatchEvent(
|
|
54
|
+
new ProgressEvent('downloadprogress', {
|
|
55
|
+
loaded: roundedLoaded,
|
|
56
|
+
total: 1,
|
|
57
|
+
lengthComputable: true,
|
|
58
|
+
})
|
|
59
|
+
);
|
|
60
|
+
monitorTarget.__lastProgressLoaded = roundedLoaded;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const progress_callback = (data) => {
|
|
64
|
+
if (data.status === 'initiate') {
|
|
65
|
+
if (files.has(data.file)) {
|
|
66
|
+
const fileData = files.get(data.file);
|
|
67
|
+
// Update with actual size if available, otherwise keep pre-fetched
|
|
68
|
+
if (data.total) {
|
|
69
|
+
fileData.total = data.total;
|
|
70
|
+
}
|
|
71
|
+
} else {
|
|
72
|
+
files.set(data.file, { loaded: 0, total: data.total || 0 });
|
|
73
|
+
}
|
|
74
|
+
} else if (data.status === 'progress') {
|
|
75
|
+
if (files.has(data.file)) {
|
|
76
|
+
files.get(data.file).loaded = data.loaded;
|
|
77
|
+
}
|
|
78
|
+
} else if (data.status === 'done') {
|
|
79
|
+
if (files.has(data.file)) {
|
|
80
|
+
const fileData = files.get(data.file);
|
|
81
|
+
fileData.loaded = fileData.total;
|
|
82
|
+
}
|
|
83
|
+
} else if (data.status === 'ready') {
|
|
84
|
+
dispatch(1);
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (data.status === 'progress' || data.status === 'done') {
|
|
89
|
+
let totalLoaded = 0;
|
|
90
|
+
let totalSize = 0;
|
|
91
|
+
for (const { loaded, total } of files.values()) {
|
|
92
|
+
totalLoaded += loaded;
|
|
93
|
+
totalSize += total;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (totalSize > 0) {
|
|
97
|
+
const globalProgress = totalLoaded / totalSize;
|
|
98
|
+
// Cap at slightly less than 1.0 until 'ready'
|
|
99
|
+
dispatch(Math.min(globalProgress, 0.9999));
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// Initial 0% progress
|
|
105
|
+
dispatch(0);
|
|
106
|
+
|
|
107
|
+
this.#generator = await pipeline('text-generation', this.modelName, {
|
|
108
|
+
device: this.#device,
|
|
109
|
+
dtype: this.#dtype,
|
|
110
|
+
progress_callback,
|
|
111
|
+
});
|
|
112
|
+
this.#tokenizer = this.#generator.tokenizer;
|
|
113
|
+
}
|
|
114
|
+
return this.#generator;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Checks if the backend is available given the options.
|
|
119
|
+
* @param {Object} options - LanguageModel options.
|
|
120
|
+
* @returns {string} 'available' or 'unavailable'.
|
|
121
|
+
*/
|
|
122
|
+
static availability(options) {
|
|
123
|
+
if (options?.expectedInputs && Array.isArray(options.expectedInputs)) {
|
|
124
|
+
for (const input of options.expectedInputs) {
|
|
125
|
+
if (input.type === 'audio' || input.type === 'image') {
|
|
126
|
+
return 'unavailable';
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return 'available';
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Creates a new session.
|
|
135
|
+
* @param {Object} options - LanguageModel options.
|
|
136
|
+
* @param {Object} sessionParams - Session parameters.
|
|
137
|
+
* @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
|
|
138
|
+
* @returns {Promise<Object>} The generator.
|
|
139
|
+
*/
|
|
140
|
+
async createSession(options, sessionParams, monitorTarget) {
|
|
141
|
+
if (options.responseConstraint) {
|
|
142
|
+
console.warn(
|
|
143
|
+
"The `responseConstraint` flag isn't supported by the Transformers.js backend and was ignored."
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
// Initializing the generator can be slow, so we do it lazily or here.
|
|
147
|
+
// For now, let's trigger the loading.
|
|
148
|
+
await this.#ensureGenerator(monitorTarget);
|
|
149
|
+
|
|
150
|
+
// We don't really have "sessions" in the same way Gemini does,
|
|
151
|
+
// but we can store the generation config.
|
|
152
|
+
this.generationConfig = {
|
|
153
|
+
max_new_tokens: 512, // Default limit
|
|
154
|
+
temperature: sessionParams.generationConfig?.temperature || 1.0,
|
|
155
|
+
top_p: 1.0,
|
|
156
|
+
do_sample: sessionParams.generationConfig?.temperature > 0,
|
|
157
|
+
return_full_text: false,
|
|
158
|
+
};
|
|
159
|
+
this.#systemInstruction = sessionParams.systemInstruction;
|
|
160
|
+
|
|
161
|
+
return this.#generator;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async generateContent(contents) {
|
|
165
|
+
const generator = await this.#ensureGenerator();
|
|
166
|
+
const messages = this.#contentsToMessages(contents);
|
|
167
|
+
const prompt = this.#tokenizer.apply_chat_template(messages, {
|
|
168
|
+
tokenize: false,
|
|
169
|
+
add_generation_prompt: true,
|
|
170
|
+
});
|
|
171
|
+
const output = await generator(prompt, {
|
|
172
|
+
...this.generationConfig,
|
|
173
|
+
add_special_tokens: false,
|
|
174
|
+
});
|
|
175
|
+
const text = output[0].generated_text;
|
|
176
|
+
|
|
177
|
+
// Approximate usage
|
|
178
|
+
const usage = await this.countTokens(contents);
|
|
179
|
+
|
|
180
|
+
return { text, usage };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async generateContentStream(contents) {
|
|
184
|
+
const generator = await this.#ensureGenerator();
|
|
185
|
+
const messages = this.#contentsToMessages(contents);
|
|
186
|
+
const prompt = this.#tokenizer.apply_chat_template(messages, {
|
|
187
|
+
tokenize: false,
|
|
188
|
+
add_generation_prompt: true,
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const queue = [];
|
|
192
|
+
let resolveSignal;
|
|
193
|
+
let promise = new Promise((r) => (resolveSignal = r));
|
|
194
|
+
let isDone = false;
|
|
195
|
+
|
|
196
|
+
const on_token_callback = (text) => {
|
|
197
|
+
queue.push(text);
|
|
198
|
+
if (resolveSignal) {
|
|
199
|
+
resolveSignal();
|
|
200
|
+
resolveSignal = null;
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
const streamer = new TextStreamer(this.#tokenizer, {
|
|
205
|
+
skip_prompt: true,
|
|
206
|
+
skip_special_tokens: true,
|
|
207
|
+
callback_function: on_token_callback,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
const generationPromise = generator(prompt, {
|
|
211
|
+
...this.generationConfig,
|
|
212
|
+
add_special_tokens: false,
|
|
213
|
+
streamer,
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
generationPromise
|
|
217
|
+
.then(() => {
|
|
218
|
+
isDone = true;
|
|
219
|
+
if (resolveSignal) {
|
|
220
|
+
resolveSignal();
|
|
221
|
+
resolveSignal = null;
|
|
222
|
+
}
|
|
223
|
+
})
|
|
224
|
+
.catch((err) => {
|
|
225
|
+
console.error('[Transformers.js] Generation error:', err);
|
|
226
|
+
isDone = true;
|
|
227
|
+
if (resolveSignal) {
|
|
228
|
+
resolveSignal();
|
|
229
|
+
resolveSignal = null;
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
return (async function* () {
|
|
234
|
+
while (true) {
|
|
235
|
+
if (queue.length === 0 && !isDone) {
|
|
236
|
+
if (!resolveSignal) {
|
|
237
|
+
promise = new Promise((r) => (resolveSignal = r));
|
|
238
|
+
}
|
|
239
|
+
await promise;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
while (queue.length > 0) {
|
|
243
|
+
const newText = queue.shift();
|
|
244
|
+
yield {
|
|
245
|
+
text: () => newText,
|
|
246
|
+
usageMetadata: { totalTokenCount: 0 },
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (isDone) {
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
})();
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
async countTokens(contents) {
|
|
258
|
+
await this.#ensureGenerator();
|
|
259
|
+
const messages = this.#contentsToMessages(contents);
|
|
260
|
+
const input_ids = this.#tokenizer.apply_chat_template(messages, {
|
|
261
|
+
tokenize: true,
|
|
262
|
+
add_generation_prompt: false,
|
|
263
|
+
return_tensor: false,
|
|
264
|
+
});
|
|
265
|
+
return input_ids.length;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
#contentsToMessages(contents) {
|
|
269
|
+
const messages = contents.map((c) => {
|
|
270
|
+
let role =
|
|
271
|
+
c.role === 'model'
|
|
272
|
+
? 'assistant'
|
|
273
|
+
: c.role === 'system'
|
|
274
|
+
? 'system'
|
|
275
|
+
: 'user';
|
|
276
|
+
const content = c.parts.map((p) => p.text).join('');
|
|
277
|
+
return { role, content };
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
if (this.#systemInstruction && !messages.some((m) => m.role === 'system')) {
|
|
281
|
+
messages.unshift({ role: 'system', content: this.#systemInstruction });
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (this.modelName.toLowerCase().includes('gemma')) {
|
|
285
|
+
const systemIndex = messages.findIndex((m) => m.role === 'system');
|
|
286
|
+
if (systemIndex !== -1) {
|
|
287
|
+
const systemMsg = messages[systemIndex];
|
|
288
|
+
const nextUserIndex = messages.findIndex(
|
|
289
|
+
(m, i) => m.role === 'user' && i > systemIndex
|
|
290
|
+
);
|
|
291
|
+
if (nextUserIndex !== -1) {
|
|
292
|
+
messages[nextUserIndex].content =
|
|
293
|
+
systemMsg.content + '\n\n' + messages[nextUserIndex].content;
|
|
294
|
+
messages.splice(systemIndex, 1);
|
|
295
|
+
} else {
|
|
296
|
+
// If there's no user message after the system message,
|
|
297
|
+
// just convert the system message to a user message.
|
|
298
|
+
systemMsg.content += '\n\n';
|
|
299
|
+
systemMsg.role = 'user';
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return messages;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Exact replication of Transformers.js file resolution logic using HF Tree API.
|
|
310
|
+
* @param {string} modelId - The Hugging Face model ID.
|
|
311
|
+
* @param {object} options - Configuration options.
|
|
312
|
+
* @returns {Promise<Object[]>} Array of { path, size } objects.
|
|
313
|
+
*/
|
|
314
|
+
async function resolveModelFiles(modelId, options = {}) {
|
|
315
|
+
const { dtype = 'q8', branch = 'main' } = options;
|
|
316
|
+
|
|
317
|
+
let cachedData = null;
|
|
318
|
+
const cacheKey = `transformers_model_files_${modelId}_${dtype}_${branch}`;
|
|
319
|
+
try {
|
|
320
|
+
const cached = localStorage.getItem(cacheKey);
|
|
321
|
+
if (cached) {
|
|
322
|
+
cachedData = JSON.parse(cached);
|
|
323
|
+
const { timestamp, files } = cachedData;
|
|
324
|
+
const oneDay = 24 * 60 * 60 * 1000;
|
|
325
|
+
if (Date.now() - timestamp < oneDay) {
|
|
326
|
+
return files;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
} catch (e) {
|
|
330
|
+
console.warn('Failed to read from localStorage cache:', e);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const manifestUrl = `https://huggingface.co/api/models/${modelId}/tree/${branch}?recursive=true`;
|
|
334
|
+
|
|
335
|
+
let response;
|
|
336
|
+
try {
|
|
337
|
+
response = await fetch(manifestUrl);
|
|
338
|
+
if (!response.ok) {
|
|
339
|
+
throw new Error(`Manifest fetch failed: ${response.status}`);
|
|
340
|
+
}
|
|
341
|
+
} catch (e) {
|
|
342
|
+
if (cachedData) {
|
|
343
|
+
console.warn(
|
|
344
|
+
`Failed to fetch manifest from network, falling back to cached data (expired):`,
|
|
345
|
+
e
|
|
346
|
+
);
|
|
347
|
+
return cachedData.files;
|
|
348
|
+
}
|
|
349
|
+
throw e;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const fileTree = await response.json();
|
|
353
|
+
const fileMap = new Map(fileTree.map((f) => [f.path, f.size]));
|
|
354
|
+
const finalFiles = [];
|
|
355
|
+
|
|
356
|
+
// Helper: check existence and return { path, size }
|
|
357
|
+
const exists = (path) => fileMap.has(path);
|
|
358
|
+
const add = (path) => {
|
|
359
|
+
if (exists(path)) {
|
|
360
|
+
finalFiles.push({ path, size: fileMap.get(path) });
|
|
361
|
+
return true;
|
|
362
|
+
}
|
|
363
|
+
return false;
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
// --- 1. Configs (Always Required) ---
|
|
367
|
+
add('config.json');
|
|
368
|
+
add('generation_config.json');
|
|
369
|
+
add('preprocessor_config.json');
|
|
370
|
+
|
|
371
|
+
// --- 2. Tokenizer Resolution ---
|
|
372
|
+
if (exists('tokenizer.json')) {
|
|
373
|
+
add('tokenizer.json');
|
|
374
|
+
add('tokenizer_config.json');
|
|
375
|
+
} else {
|
|
376
|
+
// Fallback: Legacy tokenizer files
|
|
377
|
+
add('tokenizer_config.json');
|
|
378
|
+
add('special_tokens_map.json');
|
|
379
|
+
add('vocab.json');
|
|
380
|
+
add('merges.txt');
|
|
381
|
+
add('vocab.txt');
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// --- 3. ONNX Model Resolution ---
|
|
385
|
+
const onnxFolder = 'onnx';
|
|
386
|
+
|
|
387
|
+
let suffixes = [];
|
|
388
|
+
if (dtype === 'fp32') {
|
|
389
|
+
suffixes = [''];
|
|
390
|
+
} else if (dtype === 'quantized') {
|
|
391
|
+
suffixes = ['_quantized'];
|
|
392
|
+
} else {
|
|
393
|
+
suffixes = [`_${dtype}`];
|
|
394
|
+
if (dtype === 'q8') {
|
|
395
|
+
suffixes.push('');
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
let components = [
|
|
400
|
+
'model',
|
|
401
|
+
'encoder_model',
|
|
402
|
+
'decoder_model',
|
|
403
|
+
'decoder_model_merged',
|
|
404
|
+
];
|
|
405
|
+
|
|
406
|
+
const foundComponents = [];
|
|
407
|
+
for (const c of components) {
|
|
408
|
+
for (const s of suffixes) {
|
|
409
|
+
const filename = `${onnxFolder}/${c}${s}.onnx`;
|
|
410
|
+
if (exists(filename)) {
|
|
411
|
+
foundComponents.push(filename);
|
|
412
|
+
break;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const hasMerged = foundComponents.some((f) =>
|
|
418
|
+
f.includes('decoder_model_merged')
|
|
419
|
+
);
|
|
420
|
+
const filteredComponents = foundComponents.filter((f) => {
|
|
421
|
+
if (hasMerged && f.includes('decoder_model') && !f.includes('merged')) {
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
424
|
+
return true;
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
for (const file of filteredComponents) {
|
|
428
|
+
add(file);
|
|
429
|
+
const dataFile = `${file}_data`;
|
|
430
|
+
if (add(dataFile)) {
|
|
431
|
+
let i = 1;
|
|
432
|
+
while (add(`${dataFile}_${i}`)) {
|
|
433
|
+
i++;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
try {
|
|
439
|
+
localStorage.setItem(
|
|
440
|
+
cacheKey,
|
|
441
|
+
JSON.stringify({
|
|
442
|
+
timestamp: Date.now(),
|
|
443
|
+
files: finalFiles,
|
|
444
|
+
})
|
|
445
|
+
);
|
|
446
|
+
} catch (e) {
|
|
447
|
+
console.warn('Failed to write to localStorage cache:', e);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return finalFiles;
|
|
451
|
+
}
|
package/dot_env.json
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prompt-api-polyfill",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API,
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, OpenAI API, or Transformers.js.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./prompt-api-polyfill.js",
|
|
7
7
|
"module": "./prompt-api-polyfill.js",
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"firebase",
|
|
26
26
|
"gemini",
|
|
27
27
|
"openai",
|
|
28
|
+
"transformersjs",
|
|
28
29
|
"web-ai"
|
|
29
30
|
],
|
|
30
31
|
"repository": {
|
package/prompt-api-polyfill.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* - Firebase AI Logic (via `firebase/ai`)
|
|
5
5
|
* - Google Gemini API (via `@google/generative-ai`)
|
|
6
6
|
* - OpenAI API (via `openai`)
|
|
7
|
+
* - Transformers.js (via `@huggingface/transformers`)
|
|
7
8
|
*
|
|
8
9
|
* Spec: https://github.com/webmachinelearning/prompt-api/blob/main/README.md
|
|
9
10
|
*
|
|
@@ -13,6 +14,7 @@
|
|
|
13
14
|
* - For Firebase: Define `window.FIREBASE_CONFIG`.
|
|
14
15
|
* - For Gemini: Define `window.GEMINI_CONFIG`.
|
|
15
16
|
* - For OpenAI: Define `window.OPENAI_CONFIG`.
|
|
17
|
+
* - For Transformers.js: Define `window.TRANSFORMERS_CONFIG`.
|
|
16
18
|
*/
|
|
17
19
|
|
|
18
20
|
import './async-iterator-polyfill.js';
|
|
@@ -67,7 +69,7 @@ export class LanguageModel extends EventTarget {
|
|
|
67
69
|
#model;
|
|
68
70
|
#history;
|
|
69
71
|
#options;
|
|
70
|
-
#
|
|
72
|
+
#sessionParams;
|
|
71
73
|
#destroyed;
|
|
72
74
|
#inputUsage;
|
|
73
75
|
#topK;
|
|
@@ -80,7 +82,7 @@ export class LanguageModel extends EventTarget {
|
|
|
80
82
|
model,
|
|
81
83
|
initialHistory,
|
|
82
84
|
options = {},
|
|
83
|
-
|
|
85
|
+
sessionParams,
|
|
84
86
|
inputUsage = 0,
|
|
85
87
|
win = globalThis
|
|
86
88
|
) {
|
|
@@ -89,7 +91,7 @@ export class LanguageModel extends EventTarget {
|
|
|
89
91
|
this.#model = model;
|
|
90
92
|
this.#history = initialHistory || [];
|
|
91
93
|
this.#options = options;
|
|
92
|
-
this.#
|
|
94
|
+
this.#sessionParams = sessionParams;
|
|
93
95
|
this.#destroyed = false;
|
|
94
96
|
this.#inputUsage = inputUsage;
|
|
95
97
|
this.#onquotaoverflow = {};
|
|
@@ -195,6 +197,10 @@ export class LanguageModel extends EventTarget {
|
|
|
195
197
|
config: 'OPENAI_CONFIG',
|
|
196
198
|
path: './backends/openai.js',
|
|
197
199
|
},
|
|
200
|
+
{
|
|
201
|
+
config: 'TRANSFORMERS_CONFIG',
|
|
202
|
+
path: './backends/transformers.js',
|
|
203
|
+
},
|
|
198
204
|
];
|
|
199
205
|
|
|
200
206
|
static #getBackendInfo(win = globalThis) {
|
|
@@ -205,7 +211,7 @@ export class LanguageModel extends EventTarget {
|
|
|
205
211
|
}
|
|
206
212
|
}
|
|
207
213
|
throw new (win.DOMException || globalThis.DOMException)(
|
|
208
|
-
'Prompt API Polyfill: No backend configuration found. Please set window.FIREBASE_CONFIG, window.GEMINI_CONFIG, or window.
|
|
214
|
+
'Prompt API Polyfill: No backend configuration found. Please set window.FIREBASE_CONFIG, window.GEMINI_CONFIG, window.OPENAI_CONFIG, or window.TRANSFORMERS_CONFIG.',
|
|
209
215
|
'NotSupportedError'
|
|
210
216
|
);
|
|
211
217
|
}
|
|
@@ -430,7 +436,7 @@ export class LanguageModel extends EventTarget {
|
|
|
430
436
|
win
|
|
431
437
|
);
|
|
432
438
|
|
|
433
|
-
const
|
|
439
|
+
const sessionParams = {
|
|
434
440
|
model: backend.modelName,
|
|
435
441
|
generationConfig: {
|
|
436
442
|
temperature: resolvedOptions.temperature,
|
|
@@ -453,8 +459,19 @@ export class LanguageModel extends EventTarget {
|
|
|
453
459
|
);
|
|
454
460
|
|
|
455
461
|
if (systemPrompts.length > 0) {
|
|
456
|
-
|
|
457
|
-
.map((p) =>
|
|
462
|
+
sessionParams.systemInstruction = systemPrompts
|
|
463
|
+
.map((p) => {
|
|
464
|
+
if (typeof p.content === 'string') {
|
|
465
|
+
return p.content;
|
|
466
|
+
}
|
|
467
|
+
if (Array.isArray(p.content)) {
|
|
468
|
+
return p.content
|
|
469
|
+
.filter((part) => part.type === 'text')
|
|
470
|
+
.map((part) => part.value || part.text || '')
|
|
471
|
+
.join('\n');
|
|
472
|
+
}
|
|
473
|
+
return '';
|
|
474
|
+
})
|
|
458
475
|
.join('\n');
|
|
459
476
|
}
|
|
460
477
|
// Await the conversion of history items (in case of images in history)
|
|
@@ -494,7 +511,51 @@ export class LanguageModel extends EventTarget {
|
|
|
494
511
|
}
|
|
495
512
|
}
|
|
496
513
|
|
|
497
|
-
|
|
514
|
+
let monitorTarget = null;
|
|
515
|
+
if (typeof resolvedOptions.monitor === 'function') {
|
|
516
|
+
monitorTarget = new EventTarget();
|
|
517
|
+
try {
|
|
518
|
+
resolvedOptions.monitor(monitorTarget);
|
|
519
|
+
} catch (e) {
|
|
520
|
+
throw e;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (monitorTarget) {
|
|
525
|
+
monitorTarget.__lastProgressLoaded = -1;
|
|
526
|
+
}
|
|
527
|
+
const dispatchProgress = async (loaded) => {
|
|
528
|
+
if (!monitorTarget || options.signal?.aborted) {
|
|
529
|
+
return !options.signal?.aborted;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Round to nearest 1/0x10000 (65536) as required by WPT in tests/wpt/resources/util.js
|
|
533
|
+
const precision = 1 / 65536;
|
|
534
|
+
const roundedLoaded = Math.floor(loaded / precision) * precision;
|
|
535
|
+
|
|
536
|
+
// Ensure strict monotonicity
|
|
537
|
+
if (roundedLoaded <= monitorTarget.__lastProgressLoaded) {
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
try {
|
|
542
|
+
monitorTarget.dispatchEvent(
|
|
543
|
+
new ProgressEvent('downloadprogress', {
|
|
544
|
+
loaded: roundedLoaded,
|
|
545
|
+
total: 1,
|
|
546
|
+
lengthComputable: true,
|
|
547
|
+
})
|
|
548
|
+
);
|
|
549
|
+
monitorTarget.__lastProgressLoaded = roundedLoaded;
|
|
550
|
+
} catch (e) {
|
|
551
|
+
console.error('Error dispatching downloadprogress events:', e);
|
|
552
|
+
}
|
|
553
|
+
// Yield to the event loop to allow the test/user to abort
|
|
554
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
555
|
+
return !options.signal?.aborted;
|
|
556
|
+
};
|
|
557
|
+
|
|
558
|
+
if (!(await dispatchProgress(0))) {
|
|
498
559
|
throw (
|
|
499
560
|
options.signal.reason ||
|
|
500
561
|
new (win.DOMException || globalThis.DOMException)(
|
|
@@ -504,19 +565,31 @@ export class LanguageModel extends EventTarget {
|
|
|
504
565
|
);
|
|
505
566
|
}
|
|
506
567
|
|
|
507
|
-
const model = backend.createSession(
|
|
568
|
+
const model = await backend.createSession(
|
|
569
|
+
resolvedOptions,
|
|
570
|
+
sessionParams,
|
|
571
|
+
monitorTarget
|
|
572
|
+
);
|
|
573
|
+
|
|
574
|
+
if (!(await dispatchProgress(1))) {
|
|
575
|
+
throw (
|
|
576
|
+
options.signal.reason ||
|
|
577
|
+
new (win.DOMException || globalThis.DOMException)(
|
|
578
|
+
'Aborted',
|
|
579
|
+
'AbortError'
|
|
580
|
+
)
|
|
581
|
+
);
|
|
582
|
+
}
|
|
508
583
|
|
|
509
|
-
// Initialize inputUsage with the tokens from the initial prompts
|
|
584
|
+
// Initialize inputUsage with the tokens from the initial prompts.
|
|
510
585
|
if (resolvedOptions.initialPrompts?.length > 0) {
|
|
511
|
-
// Calculate token usage including system instruction and conversation history
|
|
512
586
|
const fullHistory = [...initialHistory];
|
|
513
|
-
if (
|
|
587
|
+
if (sessionParams.systemInstruction) {
|
|
514
588
|
fullHistory.unshift({
|
|
515
589
|
role: 'system',
|
|
516
|
-
parts: [{ text:
|
|
590
|
+
parts: [{ text: sessionParams.systemInstruction }],
|
|
517
591
|
});
|
|
518
592
|
}
|
|
519
|
-
|
|
520
593
|
inputUsageValue = (await backend.countTokens(fullHistory)) || 0;
|
|
521
594
|
|
|
522
595
|
if (inputUsageValue > 1000000) {
|
|
@@ -536,63 +609,12 @@ export class LanguageModel extends EventTarget {
|
|
|
536
609
|
}
|
|
537
610
|
}
|
|
538
611
|
|
|
539
|
-
// If a monitor callback is provided, simulate simple downloadprogress events
|
|
540
|
-
if (typeof resolvedOptions.monitor === 'function') {
|
|
541
|
-
const monitorTarget = new EventTarget();
|
|
542
|
-
|
|
543
|
-
try {
|
|
544
|
-
resolvedOptions.monitor(monitorTarget);
|
|
545
|
-
} catch (e) {
|
|
546
|
-
// Re-throw if the monitor callback itself throws, as per WPT requirements
|
|
547
|
-
throw e;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
const dispatchProgress = async (loaded) => {
|
|
551
|
-
if (options.signal?.aborted) {
|
|
552
|
-
return false;
|
|
553
|
-
}
|
|
554
|
-
try {
|
|
555
|
-
const progressEvent = new ProgressEvent('downloadprogress', {
|
|
556
|
-
loaded: loaded,
|
|
557
|
-
total: 1,
|
|
558
|
-
lengthComputable: true,
|
|
559
|
-
});
|
|
560
|
-
monitorTarget.dispatchEvent(progressEvent);
|
|
561
|
-
} catch (e) {
|
|
562
|
-
console.error('Error dispatching downloadprogress events:', e);
|
|
563
|
-
}
|
|
564
|
-
// Yield to the event loop to allow the test/user to abort
|
|
565
|
-
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
566
|
-
return !options.signal?.aborted;
|
|
567
|
-
};
|
|
568
|
-
|
|
569
|
-
if (!(await dispatchProgress(0))) {
|
|
570
|
-
throw (
|
|
571
|
-
options.signal.reason ||
|
|
572
|
-
new (win.DOMException || globalThis.DOMException)(
|
|
573
|
-
'Aborted',
|
|
574
|
-
'AbortError'
|
|
575
|
-
)
|
|
576
|
-
);
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
if (!(await dispatchProgress(1))) {
|
|
580
|
-
throw (
|
|
581
|
-
options.signal.reason ||
|
|
582
|
-
new (win.DOMException || globalThis.DOMException)(
|
|
583
|
-
'Aborted',
|
|
584
|
-
'AbortError'
|
|
585
|
-
)
|
|
586
|
-
);
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
|
|
590
612
|
return new this(
|
|
591
613
|
backend,
|
|
592
614
|
model,
|
|
593
615
|
initialHistory,
|
|
594
616
|
resolvedOptions,
|
|
595
|
-
|
|
617
|
+
sessionParams,
|
|
596
618
|
inputUsageValue,
|
|
597
619
|
win
|
|
598
620
|
);
|
|
@@ -620,13 +642,13 @@ export class LanguageModel extends EventTarget {
|
|
|
620
642
|
|
|
621
643
|
const historyCopy = JSON.parse(JSON.stringify(this.#history));
|
|
622
644
|
const mergedOptions = { ...this.#options, ...options };
|
|
623
|
-
const
|
|
645
|
+
const mergedSessionParams = { ...this.#sessionParams };
|
|
624
646
|
|
|
625
647
|
if (options.temperature !== undefined) {
|
|
626
|
-
|
|
648
|
+
mergedSessionParams.generationConfig.temperature = options.temperature;
|
|
627
649
|
}
|
|
628
650
|
if (options.topK !== undefined) {
|
|
629
|
-
|
|
651
|
+
mergedSessionParams.generationConfig.topK = options.topK;
|
|
630
652
|
}
|
|
631
653
|
|
|
632
654
|
// Re-create the backend for the clone since it now holds state (#model)
|
|
@@ -635,7 +657,7 @@ export class LanguageModel extends EventTarget {
|
|
|
635
657
|
const newBackend = new BackendClass(info.configValue);
|
|
636
658
|
const newModel = newBackend.createSession(
|
|
637
659
|
mergedOptions,
|
|
638
|
-
|
|
660
|
+
mergedSessionParams
|
|
639
661
|
);
|
|
640
662
|
|
|
641
663
|
if (options.signal?.aborted) {
|
|
@@ -653,7 +675,7 @@ export class LanguageModel extends EventTarget {
|
|
|
653
675
|
newModel,
|
|
654
676
|
historyCopy,
|
|
655
677
|
mergedOptions,
|
|
656
|
-
|
|
678
|
+
mergedSessionParams,
|
|
657
679
|
this.#inputUsage,
|
|
658
680
|
this.#window
|
|
659
681
|
);
|
|
@@ -683,6 +705,19 @@ export class LanguageModel extends EventTarget {
|
|
|
683
705
|
);
|
|
684
706
|
}
|
|
685
707
|
|
|
708
|
+
if (
|
|
709
|
+
typeof input === 'object' &&
|
|
710
|
+
input !== null &&
|
|
711
|
+
!Array.isArray(input) &&
|
|
712
|
+
Object.keys(input).length === 0
|
|
713
|
+
) {
|
|
714
|
+
// This is done to pass a WPT test and work around a safety feature in
|
|
715
|
+
// Gemma that refuses to follow instructions to respond with
|
|
716
|
+
// "[object Object]". We skip the model and return the expected response
|
|
717
|
+
// directly.
|
|
718
|
+
return '[object Object]';
|
|
719
|
+
}
|
|
720
|
+
|
|
686
721
|
if (options.responseConstraint) {
|
|
687
722
|
LanguageModel.#validateResponseConstraint(
|
|
688
723
|
options.responseConstraint,
|
|
@@ -692,14 +727,14 @@ export class LanguageModel extends EventTarget {
|
|
|
692
727
|
const schema = convertJsonSchemaToVertexSchema(
|
|
693
728
|
options.responseConstraint
|
|
694
729
|
);
|
|
695
|
-
this.#
|
|
730
|
+
this.#sessionParams.generationConfig.responseMimeType =
|
|
696
731
|
'application/json';
|
|
697
|
-
this.#
|
|
732
|
+
this.#sessionParams.generationConfig.responseSchema = schema;
|
|
698
733
|
|
|
699
734
|
// Re-create model with new config/schema (stored in backend)
|
|
700
735
|
this.#model = this.#backend.createSession(
|
|
701
736
|
this.#options,
|
|
702
|
-
this.#
|
|
737
|
+
this.#sessionParams
|
|
703
738
|
);
|
|
704
739
|
}
|
|
705
740
|
|
|
@@ -763,19 +798,37 @@ export class LanguageModel extends EventTarget {
|
|
|
763
798
|
return 'Mock response for quota overflow test.';
|
|
764
799
|
}
|
|
765
800
|
|
|
801
|
+
const fullHistoryWithNewPrompt = [...this.#history, userContent];
|
|
802
|
+
if (this.#sessionParams.systemInstruction) {
|
|
803
|
+
fullHistoryWithNewPrompt.unshift({
|
|
804
|
+
role: 'system',
|
|
805
|
+
parts: [{ text: this.#sessionParams.systemInstruction }],
|
|
806
|
+
});
|
|
807
|
+
}
|
|
808
|
+
|
|
766
809
|
// Estimate usage
|
|
767
|
-
const totalTokens = await this.#backend.countTokens(
|
|
768
|
-
|
|
769
|
-
|
|
810
|
+
const totalTokens = await this.#backend.countTokens(
|
|
811
|
+
fullHistoryWithNewPrompt
|
|
812
|
+
);
|
|
770
813
|
|
|
771
814
|
if (totalTokens > this.inputQuota) {
|
|
772
|
-
|
|
815
|
+
const ErrorClass =
|
|
816
|
+
(this.#window && this.#window.QuotaExceededError) ||
|
|
817
|
+
(this.#window && this.#window.DOMException) ||
|
|
818
|
+
globalThis.QuotaExceededError ||
|
|
819
|
+
globalThis.DOMException;
|
|
820
|
+
const error = new ErrorClass(
|
|
773
821
|
`The prompt is too large (${totalTokens} tokens), it exceeds the quota of ${this.inputQuota} tokens.`,
|
|
774
822
|
'QuotaExceededError'
|
|
775
823
|
);
|
|
824
|
+
// Attach properties expected by WPT tests
|
|
825
|
+
Object.defineProperty(error, 'code', { value: 22, configurable: true });
|
|
826
|
+
error.requested = totalTokens;
|
|
827
|
+
error.quota = this.inputQuota;
|
|
828
|
+
throw error;
|
|
776
829
|
}
|
|
777
830
|
|
|
778
|
-
if (
|
|
831
|
+
if (totalTokens > this.inputQuota) {
|
|
779
832
|
this.dispatchEvent(new Event('quotaoverflow'));
|
|
780
833
|
}
|
|
781
834
|
|
|
@@ -844,6 +897,24 @@ export class LanguageModel extends EventTarget {
|
|
|
844
897
|
);
|
|
845
898
|
}
|
|
846
899
|
|
|
900
|
+
if (
|
|
901
|
+
typeof input === 'object' &&
|
|
902
|
+
input !== null &&
|
|
903
|
+
!Array.isArray(input) &&
|
|
904
|
+
Object.keys(input).length === 0
|
|
905
|
+
) {
|
|
906
|
+
return new ReadableStream({
|
|
907
|
+
start(controller) {
|
|
908
|
+
// This is done to pass a WPT test and work around a safety feature in
|
|
909
|
+
// Gemma that refuses to follow instructions to respond with
|
|
910
|
+
// "[object Object]". We skip the model and return the expected response
|
|
911
|
+
// directly.
|
|
912
|
+
controller.enqueue('[object Object]');
|
|
913
|
+
controller.close();
|
|
914
|
+
},
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
|
|
847
918
|
const _this = this; // Capture 'this' to access private fields in callback
|
|
848
919
|
|
|
849
920
|
const signal = options.signal;
|
|
@@ -884,12 +955,12 @@ export class LanguageModel extends EventTarget {
|
|
|
884
955
|
const schema = convertJsonSchemaToVertexSchema(
|
|
885
956
|
options.responseConstraint
|
|
886
957
|
);
|
|
887
|
-
_this.#
|
|
958
|
+
_this.#sessionParams.generationConfig.responseMimeType =
|
|
888
959
|
'application/json';
|
|
889
|
-
_this.#
|
|
960
|
+
_this.#sessionParams.generationConfig.responseSchema = schema;
|
|
890
961
|
_this.#model = _this.#backend.createSession(
|
|
891
962
|
_this.#options,
|
|
892
|
-
_this.#
|
|
963
|
+
_this.#sessionParams
|
|
893
964
|
);
|
|
894
965
|
}
|
|
895
966
|
|
|
@@ -930,18 +1001,39 @@ export class LanguageModel extends EventTarget {
|
|
|
930
1001
|
return;
|
|
931
1002
|
}
|
|
932
1003
|
|
|
933
|
-
const
|
|
934
|
-
|
|
935
|
-
|
|
1004
|
+
const fullHistoryWithNewPrompt = [..._this.#history, userContent];
|
|
1005
|
+
if (_this.#sessionParams.systemInstruction) {
|
|
1006
|
+
fullHistoryWithNewPrompt.unshift({
|
|
1007
|
+
role: 'system',
|
|
1008
|
+
parts: [{ text: _this.#sessionParams.systemInstruction }],
|
|
1009
|
+
});
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
const totalTokens = await _this.#backend.countTokens(
|
|
1013
|
+
fullHistoryWithNewPrompt
|
|
1014
|
+
);
|
|
936
1015
|
|
|
937
1016
|
if (totalTokens > _this.inputQuota) {
|
|
938
|
-
|
|
1017
|
+
const ErrorClass =
|
|
1018
|
+
(_this.#window && _this.#window.QuotaExceededError) ||
|
|
1019
|
+
(_this.#window && _this.#window.DOMException) ||
|
|
1020
|
+
globalThis.QuotaExceededError ||
|
|
1021
|
+
globalThis.DOMException;
|
|
1022
|
+
const error = new ErrorClass(
|
|
939
1023
|
`The prompt is too large (${totalTokens} tokens), it exceeds the quota of ${_this.inputQuota} tokens.`,
|
|
940
1024
|
'QuotaExceededError'
|
|
941
1025
|
);
|
|
1026
|
+
// Attach properties expected by WPT tests
|
|
1027
|
+
Object.defineProperty(error, 'code', {
|
|
1028
|
+
value: 22,
|
|
1029
|
+
configurable: true,
|
|
1030
|
+
});
|
|
1031
|
+
error.requested = totalTokens;
|
|
1032
|
+
error.quota = _this.inputQuota;
|
|
1033
|
+
throw error;
|
|
942
1034
|
}
|
|
943
1035
|
|
|
944
|
-
if (
|
|
1036
|
+
if (totalTokens > _this.inputQuota) {
|
|
945
1037
|
_this.dispatchEvent(new Event('quotaoverflow'));
|
|
946
1038
|
}
|
|
947
1039
|
|
|
@@ -1050,7 +1142,14 @@ export class LanguageModel extends EventTarget {
|
|
|
1050
1142
|
this.#history.push(content);
|
|
1051
1143
|
|
|
1052
1144
|
try {
|
|
1053
|
-
const
|
|
1145
|
+
const fullHistory = [...this.#history];
|
|
1146
|
+
if (this.#sessionParams.systemInstruction) {
|
|
1147
|
+
fullHistory.unshift({
|
|
1148
|
+
role: 'system',
|
|
1149
|
+
parts: [{ text: this.#sessionParams.systemInstruction }],
|
|
1150
|
+
});
|
|
1151
|
+
}
|
|
1152
|
+
const totalTokens = await this.#backend.countTokens(fullHistory);
|
|
1054
1153
|
this.#inputUsage = totalTokens || 0;
|
|
1055
1154
|
} catch {
|
|
1056
1155
|
// Do nothing.
|
|
@@ -1249,12 +1348,7 @@ export class LanguageModel extends EventTarget {
|
|
|
1249
1348
|
'NotSupportedError'
|
|
1250
1349
|
);
|
|
1251
1350
|
}
|
|
1252
|
-
const text =
|
|
1253
|
-
typeof input === 'object' &&
|
|
1254
|
-
input !== null &&
|
|
1255
|
-
Object.keys(input).length === 0
|
|
1256
|
-
? 'Respond with "[object Object]"' // Just for passing a WPT test
|
|
1257
|
-
: JSON.stringify(input);
|
|
1351
|
+
const text = JSON.stringify(input);
|
|
1258
1352
|
return [{ text }];
|
|
1259
1353
|
}
|
|
1260
1354
|
|