prompt-api-polyfill 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -140,17 +140,6 @@ npm install prompt-api-polyfill
140
140
  apiKey: 'dummy', // Required for now by the loader
141
141
  device: 'webgpu', // 'webgpu' or 'cpu'
142
142
  dtype: 'q4f16', // Quantization level
143
- env: {
144
- // Optional: Pass low-level Transformers.js environment overrides
145
- allowRemoteModels: false,
146
- backends: {
147
- onnx: {
148
- wasm: {
149
- wasmPaths: 'https://cdn.example.com/wasm-assets/',
150
- },
151
- },
152
- },
153
- },
154
143
  };
155
144
 
156
145
  if (!('LanguageModel' in window)) {
@@ -179,8 +168,8 @@ including:
179
168
  - `LanguageModel.create()` with options
180
169
  - `prompt()` and `promptStreaming()`
181
170
  - Multimodal inputs (text, image, audio)
182
- - `append()` and `measureInputUsage()`
183
- - Quota handling via `onquotaoverflow`
171
+ - `append()` and `measureContextUsage()`
172
+ - Quota handling via `oncontextwindowoverflow`
184
173
  - `clone()` and `destroy()`
185
174
 
186
175
  A simplified version of how it is wired up:
@@ -236,18 +225,6 @@ This repo ships with a template file:
236
225
  // For Transformers.js:
237
226
  "device": "webgpu",
238
227
  "dtype": "q4f16",
239
-
240
- // Optional library-level overrides:
241
- "env": {
242
- "allowRemoteModels": false,
243
- "backends": {
244
- "onnx": {
245
- "wasm": {
246
- "wasmPaths": "https://cdn.example.com/wasm-assets/"
247
- }
248
- }
249
- }
250
- }
251
228
  }
252
229
  ```
253
230
 
@@ -304,17 +281,7 @@ Then open `.env.json` and fill in the values.
304
281
  "apiKey": "dummy",
305
282
  "modelName": "onnx-community/gemma-3-1b-it-ONNX-GQA",
306
283
  "device": "webgpu",
307
- "dtype": "q4f16",
308
- "env": {
309
- "allowRemoteModels": false,
310
- "backends": {
311
- "onnx": {
312
- "wasm": {
313
- "wasmPaths": "https://cdn.example.com/wasm-assets/"
314
- }
315
- }
316
- }
317
- }
284
+ "dtype": "q4f16"
318
285
  }
319
286
  ```
320
287
 
@@ -337,9 +304,6 @@ Then open `.env.json` and fill in the values.
337
304
 
338
305
  - `device`: **Transformers.js only**. Either `"webgpu"` or `"cpu"`.
339
306
  - `dtype`: **Transformers.js only**. Quantization level (e.g., `"q4f16"`).
340
- - `env` (optional): **Transformers.js only**. A flexible object to override
341
- [Transformers.js environment variables](https://huggingface.co/docs/transformers.js/api/env).
342
- This is useful for specifying local `wasmPaths` or proxy settings.
343
307
 
344
308
  - `modelName` (optional): The model ID to use. If not provided, the polyfill
345
309
  uses the defaults defined in [`backends/defaults.js`](backends/defaults.js).
@@ -460,21 +424,17 @@ export default class CustomBackend extends PolyfillBackend {
460
424
 
461
425
  ### Register your backend
462
426
 
463
- The polyfill uses a "First-Match Priority" strategy based on global
464
- configuration. You need to register your backend in the `prompt-api-polyfill.js`
465
- file by adding it to the static `#backends` array:
427
+ The polyfill uses an automated registration strategy. To register your new
428
+ backend, simply run the registration script:
466
429
 
467
- ```js
468
- // prompt-api-polyfill.js
469
- static #backends = [
470
- // ... existing backends
471
- {
472
- config: 'CUSTOM_CONFIG', // The global object to look for on `window`
473
- path: './backends/custom.js',
474
- },
475
- ];
430
+ ```bash
431
+ npm run generate:registry
476
432
  ```
477
433
 
434
+ This updates the `backends-registry.js` file, which the polyfill imports. The
435
+ registry contains the configuration mapping and a dynamic loader that ensures
436
+ compatibility with bundlers and CDNs.
437
+
478
438
  ### Set a default model
479
439
 
480
440
  Define the fallback model identity in `backends/defaults.js`. This is used when
@@ -490,9 +450,11 @@ export const DEFAULT_MODELS = {
490
450
 
491
451
  ### Enable local development and testing
492
452
 
493
- The project uses a discovery script (`scripts/list-backends.js`) to generate
494
- test matrices. To include your new backend in the test runner, create a
495
- `.env-[name].json` file (for example, `.env-custom.json`) in the root directory:
453
+ The project uses a discovery script (`scripts/backend-discovery.js`) to generate
454
+ test matrices and list active backends based on the presence of
455
+ `.env-[name].json` files. To include your new backend in the test runner, create
456
+ a `.env-[name].json` file (for example, `.env-custom.json`) in the root
457
+ directory:
496
458
 
497
459
  ```json
498
460
  {
@@ -501,6 +463,12 @@ test matrices. To include your new backend in the test runner, create a
501
463
  }
502
464
  ```
503
465
 
466
+ Then run the WPT generation script:
467
+
468
+ ```bash
469
+ npm run generate:wpt
470
+ ```
471
+
504
472
  ### Verify via Web Platform Tests (WPT)
505
473
 
506
474
  The final step is ensuring compliance. Because the polyfill is spec-driven, any
@@ -1,74 +1,68 @@
1
- import { env as j, pipeline as z, TextStreamer as C } from "@huggingface/transformers";
2
- import { P as $, D as y } from "../chunks/defaults-CNQngzSd.js";
3
- class I extends $ {
1
+ import { pipeline as z, TextStreamer as C } from "@huggingface/transformers";
2
+ import { P as j, D as y } from "../chunks/defaults-CNQngzSd.js";
3
+ class D extends j {
4
4
  #e;
5
5
  #t;
6
- #i;
7
- #o;
6
+ #a;
8
7
  #n;
9
- constructor(o = {}) {
10
- if (super(o.modelName || y.transformers.modelName), this.#i = o.device || y.transformers.device, this.#o = o.dtype || y.transformers.dtype, o.env) {
11
- const t = (n, a) => {
12
- for (const [l, e] of Object.entries(a))
13
- e && typeof e == "object" && !Array.isArray(e) && n[l] && typeof n[l] == "object" ? t(n[l], e) : n[l] = e;
14
- };
15
- t(j, o.env);
16
- }
8
+ #o;
9
+ constructor(n = {}) {
10
+ super(n.modelName || y.transformers.modelName), this.#a = n.device || y.transformers.device, this.#n = n.dtype || y.transformers.dtype;
17
11
  }
18
12
  /**
19
13
  * Loaded models can be large, so we initialize them lazily.
20
14
  * @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
21
15
  * @returns {Promise<Object>} The generator.
22
16
  */
23
- async #s(o) {
17
+ async #s(n) {
24
18
  if (!this.#e) {
25
- const t = /* @__PURE__ */ new Map(), n = await M(this.modelName, {
26
- dtype: this.#o
19
+ const t = /* @__PURE__ */ new Map(), a = await $(this.modelName, {
20
+ dtype: this.#n
27
21
  });
28
- for (const { path: e, size: r } of n)
29
- t.set(e, { loaded: 0, total: r });
30
- const a = (e) => {
31
- if (!o)
22
+ for (const { path: e, size: o } of a)
23
+ t.set(e, { loaded: 0, total: o });
24
+ const i = (e) => {
25
+ if (!n)
32
26
  return;
33
- const r = 1 / 65536, c = Math.floor(e / r) * r;
34
- c <= o.__lastProgressLoaded || (o.dispatchEvent(
27
+ const o = 1 / 65536, c = Math.floor(e / o) * o;
28
+ c <= n.__lastProgressLoaded || (n.dispatchEvent(
35
29
  new ProgressEvent("downloadprogress", {
36
30
  loaded: c,
37
31
  total: 1,
38
32
  lengthComputable: !0
39
33
  })
40
- ), o.__lastProgressLoaded = c);
34
+ ), n.__lastProgressLoaded = c);
41
35
  }, l = (e) => {
42
36
  if (e.status === "initiate")
43
37
  if (t.has(e.file)) {
44
- const r = t.get(e.file);
45
- e.total && (r.total = e.total);
38
+ const o = t.get(e.file);
39
+ e.total && (o.total = e.total);
46
40
  } else
47
41
  t.set(e.file, { loaded: 0, total: e.total || 0 });
48
42
  else if (e.status === "progress")
49
43
  t.has(e.file) && (t.get(e.file).loaded = e.loaded);
50
44
  else if (e.status === "done") {
51
45
  if (t.has(e.file)) {
52
- const r = t.get(e.file);
53
- r.loaded = r.total;
46
+ const o = t.get(e.file);
47
+ o.loaded = o.total;
54
48
  }
55
49
  } else if (e.status === "ready") {
56
- a(1);
50
+ i(1);
57
51
  return;
58
52
  }
59
53
  if (e.status === "progress" || e.status === "done") {
60
- let r = 0, c = 0;
54
+ let o = 0, c = 0;
61
55
  for (const { loaded: f, total: d } of t.values())
62
- r += f, c += d;
56
+ o += f, c += d;
63
57
  if (c > 0) {
64
- const f = r / c;
65
- a(Math.min(f, 0.9999));
58
+ const f = o / c;
59
+ i(Math.min(f, 0.9999));
66
60
  }
67
61
  }
68
62
  };
69
- a(0), this.#e = await z("text-generation", this.modelName, {
70
- device: this.#i,
71
- dtype: this.#o,
63
+ i(0), this.#e = await z("text-generation", this.modelName, {
64
+ device: this.#a,
65
+ dtype: this.#n,
72
66
  progress_callback: l
73
67
  }), this.#t = this.#e.tokenizer;
74
68
  }
@@ -79,9 +73,9 @@ class I extends $ {
79
73
  * @param {Object} options - LanguageModel options.
80
74
  * @returns {string} 'available' or 'unavailable'.
81
75
  */
82
- static availability(o) {
83
- if (o?.expectedInputs && Array.isArray(o.expectedInputs)) {
84
- for (const t of o.expectedInputs)
76
+ static availability(n) {
77
+ if (n?.expectedInputs && Array.isArray(n.expectedInputs)) {
78
+ for (const t of n.expectedInputs)
85
79
  if (t.type === "audio" || t.type === "image")
86
80
  return "unavailable";
87
81
  }
@@ -94,55 +88,55 @@ class I extends $ {
94
88
  * @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
95
89
  * @returns {Promise<Object>} The generator.
96
90
  */
97
- async createSession(o, t, n) {
98
- return o.responseConstraint && console.warn(
91
+ async createSession(n, t, a) {
92
+ return n.responseConstraint && console.warn(
99
93
  "The `responseConstraint` flag isn't supported by the Transformers.js backend and was ignored."
100
- ), await this.#s(n), this.generationConfig = {
94
+ ), await this.#s(a), this.generationConfig = {
101
95
  max_new_tokens: 512,
102
96
  // Default limit
103
97
  temperature: t.generationConfig?.temperature ?? 1,
104
98
  top_p: 1,
105
99
  do_sample: t.generationConfig?.temperature !== 0,
106
100
  return_full_text: !1
107
- }, this.#n = t.systemInstruction, this.#e;
101
+ }, this.#o = t.systemInstruction, this.#e;
108
102
  }
109
- async generateContent(o) {
110
- const t = await this.#s(), n = this.#r(o), a = this.#t.apply_chat_template(n, {
103
+ async generateContent(n) {
104
+ const t = await this.#s(), a = this.#r(n), i = this.#t.apply_chat_template(a, {
111
105
  tokenize: !1,
112
106
  add_generation_prompt: !0
113
- }), e = (await t(a, {
107
+ }), e = (await t(i, {
114
108
  ...this.generationConfig,
115
109
  add_special_tokens: !1
116
- }))[0].generated_text, r = await this.countTokens(o);
117
- return { text: e, usage: r };
110
+ }))[0].generated_text, o = await this.countTokens(n);
111
+ return { text: e, usage: o };
118
112
  }
119
- async generateContentStream(o) {
120
- const t = await this.#s(), n = this.#r(o), a = this.#t.apply_chat_template(n, {
113
+ async generateContentStream(n) {
114
+ const t = await this.#s(), a = this.#r(n), i = this.#t.apply_chat_template(a, {
121
115
  tokenize: !1,
122
116
  add_generation_prompt: !0
123
117
  }), l = [];
124
- let e, r = new Promise((i) => e = i), c = !1;
125
- const f = (i) => {
126
- l.push(i), e && (e(), e = null);
118
+ let e, o = new Promise((r) => e = r), c = !1;
119
+ const f = (r) => {
120
+ l.push(r), e && (e(), e = null);
127
121
  }, d = new C(this.#t, {
128
122
  skip_prompt: !0,
129
123
  skip_special_tokens: !0,
130
124
  callback_function: f
131
125
  });
132
- return t(a, {
126
+ return t(i, {
133
127
  ...this.generationConfig,
134
128
  add_special_tokens: !1,
135
129
  streamer: d
136
130
  }).then(() => {
137
131
  c = !0, e && (e(), e = null);
138
- }).catch((i) => {
139
- console.error("[Transformers.js] Generation error:", i), c = !0, e && (e(), e = null);
132
+ }).catch((r) => {
133
+ console.error("[Transformers.js] Generation error:", r), c = !0, e && (e(), e = null);
140
134
  }), (async function* () {
141
135
  for (; ; ) {
142
- for (l.length === 0 && !c && (e || (r = new Promise((i) => e = i)), await r); l.length > 0; ) {
143
- const i = l.shift();
136
+ for (l.length === 0 && !c && (e || (o = new Promise((r) => e = r)), await o); l.length > 0; ) {
137
+ const r = l.shift();
144
138
  yield {
145
- text: () => i,
139
+ text: () => r,
146
140
  usageMetadata: { totalTokenCount: 0 }
147
141
  };
148
142
  }
@@ -151,67 +145,67 @@ class I extends $ {
151
145
  }
152
146
  })();
153
147
  }
154
- async countTokens(o) {
148
+ async countTokens(n) {
155
149
  await this.#s();
156
- const t = this.#r(o);
150
+ const t = this.#r(n);
157
151
  return this.#t.apply_chat_template(t, {
158
152
  tokenize: !0,
159
153
  add_generation_prompt: !1,
160
154
  return_tensor: !1
161
155
  }).length;
162
156
  }
163
- #r(o) {
164
- const t = o.map((n) => {
165
- let a = n.role === "model" ? "assistant" : n.role === "system" ? "system" : "user";
166
- const l = n.parts.map((e) => e.text).join("");
167
- return { role: a, content: l };
157
+ #r(n) {
158
+ const t = n.map((a) => {
159
+ let i = a.role === "model" ? "assistant" : a.role === "system" ? "system" : "user";
160
+ const l = a.parts.map((e) => e.text).join("");
161
+ return { role: i, content: l };
168
162
  });
169
- if (this.#n && !t.some((n) => n.role === "system") && t.unshift({ role: "system", content: this.#n }), this.modelName.toLowerCase().includes("gemma")) {
170
- const n = t.findIndex((a) => a.role === "system");
171
- if (n !== -1) {
172
- const a = t[n], l = t.findIndex(
173
- (e, r) => e.role === "user" && r > n
163
+ if (this.#o && !t.some((a) => a.role === "system") && t.unshift({ role: "system", content: this.#o }), this.modelName.toLowerCase().includes("gemma")) {
164
+ const a = t.findIndex((i) => i.role === "system");
165
+ if (a !== -1) {
166
+ const i = t[a], l = t.findIndex(
167
+ (e, o) => e.role === "user" && o > a
174
168
  );
175
- l !== -1 ? (t[l].content = a.content + `
169
+ l !== -1 ? (t[l].content = i.content + `
176
170
 
177
- ` + t[l].content, t.splice(n, 1)) : (a.content += `
171
+ ` + t[l].content, t.splice(a, 1)) : (i.content += `
178
172
 
179
- `, a.role = "user");
173
+ `, i.role = "user");
180
174
  }
181
175
  }
182
176
  return t;
183
177
  }
184
178
  }
185
- async function M(g, o = {}) {
186
- const { dtype: t = "q8", branch: n = "main" } = o;
187
- let a = null;
188
- const l = `transformers_model_files_${g}_${t}_${n}`;
179
+ async function $(g, n = {}) {
180
+ const { dtype: t = "q8", branch: a = "main" } = n;
181
+ let i = null;
182
+ const l = `transformers_model_files_${g}_${t}_${a}`;
189
183
  try {
190
184
  const s = localStorage.getItem(l);
191
185
  if (s) {
192
- a = JSON.parse(s);
193
- const { timestamp: p, files: u } = a, v = 1440 * 60 * 1e3;
186
+ i = JSON.parse(s);
187
+ const { timestamp: p, files: u } = i, v = 1440 * 60 * 1e3;
194
188
  if (Date.now() - p < v)
195
189
  return u;
196
190
  }
197
191
  } catch (s) {
198
192
  console.warn("Failed to read from localStorage cache:", s);
199
193
  }
200
- const e = `https://huggingface.co/api/models/${g}/tree/${n}?recursive=true`;
201
- let r;
194
+ const e = `https://huggingface.co/api/models/${g}/tree/${a}?recursive=true`;
195
+ let o;
202
196
  try {
203
- if (r = await fetch(e), !r.ok)
204
- throw new Error(`Manifest fetch failed: ${r.status}`);
197
+ if (o = await fetch(e), !o.ok)
198
+ throw new Error(`Manifest fetch failed: ${o.status}`);
205
199
  } catch (s) {
206
- if (a)
200
+ if (i)
207
201
  return console.warn(
208
202
  "Failed to fetch manifest from network, falling back to cached data (expired):",
209
203
  s
210
- ), a.files;
204
+ ), i.files;
211
205
  throw s;
212
206
  }
213
- const c = await r.json(), f = new Map(c.map((s) => [s.path, s.size])), d = [], h = (s) => f.has(s), i = (s) => h(s) ? (d.push({ path: s, size: f.get(s) }), !0) : !1;
214
- i("config.json"), i("generation_config.json"), i("preprocessor_config.json"), h("tokenizer.json") ? (i("tokenizer.json"), i("tokenizer_config.json")) : (i("tokenizer_config.json"), i("special_tokens_map.json"), i("vocab.json"), i("merges.txt"), i("vocab.txt"));
207
+ const c = await o.json(), f = new Map(c.map((s) => [s.path, s.size])), d = [], h = (s) => f.has(s), r = (s) => h(s) ? (d.push({ path: s, size: f.get(s) }), !0) : !1;
208
+ r("config.json"), r("generation_config.json"), r("preprocessor_config.json"), h("tokenizer.json") ? (r("tokenizer.json"), r("tokenizer_config.json")) : (r("tokenizer_config.json"), r("special_tokens_map.json"), r("vocab.json"), r("merges.txt"), r("vocab.txt"));
215
209
  const w = "onnx";
216
210
  let m = [];
217
211
  t === "fp32" ? m = [""] : t === "quantized" ? m = ["_quantized"] : (m = [`_${t}`], t === "q8" && m.push(""));
@@ -234,11 +228,11 @@ async function M(g, o = {}) {
234
228
  (s) => s.includes("decoder_model_merged")
235
229
  ), b = _.filter((s) => !(x && s.includes("decoder_model") && !s.includes("merged")));
236
230
  for (const s of b) {
237
- i(s);
231
+ r(s);
238
232
  const p = `${s}_data`;
239
- if (i(p)) {
233
+ if (r(p)) {
240
234
  let u = 1;
241
- for (; i(`${p}_${u}`); )
235
+ for (; r(`${p}_${u}`); )
242
236
  u++;
243
237
  }
244
238
  }
@@ -256,5 +250,5 @@ async function M(g, o = {}) {
256
250
  return d;
257
251
  }
258
252
  export {
259
- I as default
253
+ D as default
260
254
  };