prompt-api-polyfill 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -1
- package/dist/backends/transformers.js +91 -85
- package/dot_env.json +11 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -140,6 +140,17 @@ npm install prompt-api-polyfill
|
|
|
140
140
|
apiKey: 'dummy', // Required for now by the loader
|
|
141
141
|
device: 'webgpu', // 'webgpu' or 'cpu'
|
|
142
142
|
dtype: 'q4f16', // Quantization level
|
|
143
|
+
env: {
|
|
144
|
+
// Optional: Pass low-level Transformers.js environment overrides
|
|
145
|
+
allowRemoteModels: false,
|
|
146
|
+
backends: {
|
|
147
|
+
onnx: {
|
|
148
|
+
wasm: {
|
|
149
|
+
wasmPaths: 'https://cdn.example.com/wasm-assets/',
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
},
|
|
153
|
+
},
|
|
143
154
|
};
|
|
144
155
|
|
|
145
156
|
if (!('LanguageModel' in window)) {
|
|
@@ -225,6 +236,18 @@ This repo ships with a template file:
|
|
|
225
236
|
// For Transformers.js:
|
|
226
237
|
"device": "webgpu",
|
|
227
238
|
"dtype": "q4f16",
|
|
239
|
+
|
|
240
|
+
// Optional library-level overrides:
|
|
241
|
+
"env": {
|
|
242
|
+
"allowRemoteModels": false,
|
|
243
|
+
"backends": {
|
|
244
|
+
"onnx": {
|
|
245
|
+
"wasm": {
|
|
246
|
+
"wasmPaths": "https://cdn.example.com/wasm-assets/"
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
228
251
|
}
|
|
229
252
|
```
|
|
230
253
|
|
|
@@ -281,7 +304,17 @@ Then open `.env.json` and fill in the values.
|
|
|
281
304
|
"apiKey": "dummy",
|
|
282
305
|
"modelName": "onnx-community/gemma-3-1b-it-ONNX-GQA",
|
|
283
306
|
"device": "webgpu",
|
|
284
|
-
"dtype": "q4f16"
|
|
307
|
+
"dtype": "q4f16",
|
|
308
|
+
"env": {
|
|
309
|
+
"allowRemoteModels": false,
|
|
310
|
+
"backends": {
|
|
311
|
+
"onnx": {
|
|
312
|
+
"wasm": {
|
|
313
|
+
"wasmPaths": "https://cdn.example.com/wasm-assets/"
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
285
318
|
}
|
|
286
319
|
```
|
|
287
320
|
|
|
@@ -304,6 +337,9 @@ Then open `.env.json` and fill in the values.
|
|
|
304
337
|
|
|
305
338
|
- `device`: **Transformers.js only**. Either `"webgpu"` or `"cpu"`.
|
|
306
339
|
- `dtype`: **Transformers.js only**. Quantization level (e.g., `"q4f16"`).
|
|
340
|
+
- `env` (optional): **Transformers.js only**. A flexible object to override
|
|
341
|
+
[Transformers.js environment variables](https://huggingface.co/docs/transformers.js/api/env).
|
|
342
|
+
This is useful for specifying local `wasmPaths` or proxy settings.
|
|
307
343
|
|
|
308
344
|
- `modelName` (optional): The model ID to use. If not provided, the polyfill
|
|
309
345
|
uses the defaults defined in [`backends/defaults.js`](backends/defaults.js).
|
|
@@ -1,68 +1,74 @@
|
|
|
1
|
-
import { pipeline as z, TextStreamer as C } from "@huggingface/transformers";
|
|
2
|
-
import { P as
|
|
3
|
-
class
|
|
1
|
+
import { env as j, pipeline as z, TextStreamer as C } from "@huggingface/transformers";
|
|
2
|
+
import { P as $, D as y } from "../chunks/defaults-CNQngzSd.js";
|
|
3
|
+
class I extends $ {
|
|
4
4
|
#e;
|
|
5
5
|
#t;
|
|
6
|
-
#
|
|
7
|
-
#n;
|
|
6
|
+
#i;
|
|
8
7
|
#o;
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
#n;
|
|
9
|
+
constructor(o = {}) {
|
|
10
|
+
if (super(o.modelName || y.transformers.modelName), this.#i = o.device || y.transformers.device, this.#o = o.dtype || y.transformers.dtype, o.env) {
|
|
11
|
+
const t = (n, a) => {
|
|
12
|
+
for (const [l, e] of Object.entries(a))
|
|
13
|
+
e && typeof e == "object" && !Array.isArray(e) && n[l] && typeof n[l] == "object" ? t(n[l], e) : n[l] = e;
|
|
14
|
+
};
|
|
15
|
+
t(j, o.env);
|
|
16
|
+
}
|
|
11
17
|
}
|
|
12
18
|
/**
|
|
13
19
|
* Loaded models can be large, so we initialize them lazily.
|
|
14
20
|
* @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
|
|
15
21
|
* @returns {Promise<Object>} The generator.
|
|
16
22
|
*/
|
|
17
|
-
async #s(
|
|
23
|
+
async #s(o) {
|
|
18
24
|
if (!this.#e) {
|
|
19
|
-
const t = /* @__PURE__ */ new Map(),
|
|
20
|
-
dtype: this.#
|
|
25
|
+
const t = /* @__PURE__ */ new Map(), n = await M(this.modelName, {
|
|
26
|
+
dtype: this.#o
|
|
21
27
|
});
|
|
22
|
-
for (const { path: e, size:
|
|
23
|
-
t.set(e, { loaded: 0, total:
|
|
24
|
-
const
|
|
25
|
-
if (!
|
|
28
|
+
for (const { path: e, size: r } of n)
|
|
29
|
+
t.set(e, { loaded: 0, total: r });
|
|
30
|
+
const a = (e) => {
|
|
31
|
+
if (!o)
|
|
26
32
|
return;
|
|
27
|
-
const
|
|
28
|
-
c <=
|
|
33
|
+
const r = 1 / 65536, c = Math.floor(e / r) * r;
|
|
34
|
+
c <= o.__lastProgressLoaded || (o.dispatchEvent(
|
|
29
35
|
new ProgressEvent("downloadprogress", {
|
|
30
36
|
loaded: c,
|
|
31
37
|
total: 1,
|
|
32
38
|
lengthComputable: !0
|
|
33
39
|
})
|
|
34
|
-
),
|
|
40
|
+
), o.__lastProgressLoaded = c);
|
|
35
41
|
}, l = (e) => {
|
|
36
42
|
if (e.status === "initiate")
|
|
37
43
|
if (t.has(e.file)) {
|
|
38
|
-
const
|
|
39
|
-
e.total && (
|
|
44
|
+
const r = t.get(e.file);
|
|
45
|
+
e.total && (r.total = e.total);
|
|
40
46
|
} else
|
|
41
47
|
t.set(e.file, { loaded: 0, total: e.total || 0 });
|
|
42
48
|
else if (e.status === "progress")
|
|
43
49
|
t.has(e.file) && (t.get(e.file).loaded = e.loaded);
|
|
44
50
|
else if (e.status === "done") {
|
|
45
51
|
if (t.has(e.file)) {
|
|
46
|
-
const
|
|
47
|
-
|
|
52
|
+
const r = t.get(e.file);
|
|
53
|
+
r.loaded = r.total;
|
|
48
54
|
}
|
|
49
55
|
} else if (e.status === "ready") {
|
|
50
|
-
|
|
56
|
+
a(1);
|
|
51
57
|
return;
|
|
52
58
|
}
|
|
53
59
|
if (e.status === "progress" || e.status === "done") {
|
|
54
|
-
let
|
|
60
|
+
let r = 0, c = 0;
|
|
55
61
|
for (const { loaded: f, total: d } of t.values())
|
|
56
|
-
|
|
62
|
+
r += f, c += d;
|
|
57
63
|
if (c > 0) {
|
|
58
|
-
const f =
|
|
59
|
-
|
|
64
|
+
const f = r / c;
|
|
65
|
+
a(Math.min(f, 0.9999));
|
|
60
66
|
}
|
|
61
67
|
}
|
|
62
68
|
};
|
|
63
|
-
|
|
64
|
-
device: this.#
|
|
65
|
-
dtype: this.#
|
|
69
|
+
a(0), this.#e = await z("text-generation", this.modelName, {
|
|
70
|
+
device: this.#i,
|
|
71
|
+
dtype: this.#o,
|
|
66
72
|
progress_callback: l
|
|
67
73
|
}), this.#t = this.#e.tokenizer;
|
|
68
74
|
}
|
|
@@ -73,9 +79,9 @@ class D extends j {
|
|
|
73
79
|
* @param {Object} options - LanguageModel options.
|
|
74
80
|
* @returns {string} 'available' or 'unavailable'.
|
|
75
81
|
*/
|
|
76
|
-
static availability(
|
|
77
|
-
if (
|
|
78
|
-
for (const t of
|
|
82
|
+
static availability(o) {
|
|
83
|
+
if (o?.expectedInputs && Array.isArray(o.expectedInputs)) {
|
|
84
|
+
for (const t of o.expectedInputs)
|
|
79
85
|
if (t.type === "audio" || t.type === "image")
|
|
80
86
|
return "unavailable";
|
|
81
87
|
}
|
|
@@ -88,55 +94,55 @@ class D extends j {
|
|
|
88
94
|
* @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
|
|
89
95
|
* @returns {Promise<Object>} The generator.
|
|
90
96
|
*/
|
|
91
|
-
async createSession(
|
|
92
|
-
return
|
|
97
|
+
async createSession(o, t, n) {
|
|
98
|
+
return o.responseConstraint && console.warn(
|
|
93
99
|
"The `responseConstraint` flag isn't supported by the Transformers.js backend and was ignored."
|
|
94
|
-
), await this.#s(
|
|
100
|
+
), await this.#s(n), this.generationConfig = {
|
|
95
101
|
max_new_tokens: 512,
|
|
96
102
|
// Default limit
|
|
97
103
|
temperature: t.generationConfig?.temperature ?? 1,
|
|
98
104
|
top_p: 1,
|
|
99
105
|
do_sample: t.generationConfig?.temperature !== 0,
|
|
100
106
|
return_full_text: !1
|
|
101
|
-
}, this.#
|
|
107
|
+
}, this.#n = t.systemInstruction, this.#e;
|
|
102
108
|
}
|
|
103
|
-
async generateContent(
|
|
104
|
-
const t = await this.#s(),
|
|
109
|
+
async generateContent(o) {
|
|
110
|
+
const t = await this.#s(), n = this.#r(o), a = this.#t.apply_chat_template(n, {
|
|
105
111
|
tokenize: !1,
|
|
106
112
|
add_generation_prompt: !0
|
|
107
|
-
}), e = (await t(
|
|
113
|
+
}), e = (await t(a, {
|
|
108
114
|
...this.generationConfig,
|
|
109
115
|
add_special_tokens: !1
|
|
110
|
-
}))[0].generated_text,
|
|
111
|
-
return { text: e, usage:
|
|
116
|
+
}))[0].generated_text, r = await this.countTokens(o);
|
|
117
|
+
return { text: e, usage: r };
|
|
112
118
|
}
|
|
113
|
-
async generateContentStream(
|
|
114
|
-
const t = await this.#s(),
|
|
119
|
+
async generateContentStream(o) {
|
|
120
|
+
const t = await this.#s(), n = this.#r(o), a = this.#t.apply_chat_template(n, {
|
|
115
121
|
tokenize: !1,
|
|
116
122
|
add_generation_prompt: !0
|
|
117
123
|
}), l = [];
|
|
118
|
-
let e,
|
|
119
|
-
const f = (
|
|
120
|
-
l.push(
|
|
124
|
+
let e, r = new Promise((i) => e = i), c = !1;
|
|
125
|
+
const f = (i) => {
|
|
126
|
+
l.push(i), e && (e(), e = null);
|
|
121
127
|
}, d = new C(this.#t, {
|
|
122
128
|
skip_prompt: !0,
|
|
123
129
|
skip_special_tokens: !0,
|
|
124
130
|
callback_function: f
|
|
125
131
|
});
|
|
126
|
-
return t(
|
|
132
|
+
return t(a, {
|
|
127
133
|
...this.generationConfig,
|
|
128
134
|
add_special_tokens: !1,
|
|
129
135
|
streamer: d
|
|
130
136
|
}).then(() => {
|
|
131
137
|
c = !0, e && (e(), e = null);
|
|
132
|
-
}).catch((
|
|
133
|
-
console.error("[Transformers.js] Generation error:",
|
|
138
|
+
}).catch((i) => {
|
|
139
|
+
console.error("[Transformers.js] Generation error:", i), c = !0, e && (e(), e = null);
|
|
134
140
|
}), (async function* () {
|
|
135
141
|
for (; ; ) {
|
|
136
|
-
for (l.length === 0 && !c && (e || (
|
|
137
|
-
const
|
|
142
|
+
for (l.length === 0 && !c && (e || (r = new Promise((i) => e = i)), await r); l.length > 0; ) {
|
|
143
|
+
const i = l.shift();
|
|
138
144
|
yield {
|
|
139
|
-
text: () =>
|
|
145
|
+
text: () => i,
|
|
140
146
|
usageMetadata: { totalTokenCount: 0 }
|
|
141
147
|
};
|
|
142
148
|
}
|
|
@@ -145,67 +151,67 @@ class D extends j {
|
|
|
145
151
|
}
|
|
146
152
|
})();
|
|
147
153
|
}
|
|
148
|
-
async countTokens(
|
|
154
|
+
async countTokens(o) {
|
|
149
155
|
await this.#s();
|
|
150
|
-
const t = this.#r(
|
|
156
|
+
const t = this.#r(o);
|
|
151
157
|
return this.#t.apply_chat_template(t, {
|
|
152
158
|
tokenize: !0,
|
|
153
159
|
add_generation_prompt: !1,
|
|
154
160
|
return_tensor: !1
|
|
155
161
|
}).length;
|
|
156
162
|
}
|
|
157
|
-
#r(
|
|
158
|
-
const t =
|
|
159
|
-
let
|
|
160
|
-
const l =
|
|
161
|
-
return { role:
|
|
163
|
+
#r(o) {
|
|
164
|
+
const t = o.map((n) => {
|
|
165
|
+
let a = n.role === "model" ? "assistant" : n.role === "system" ? "system" : "user";
|
|
166
|
+
const l = n.parts.map((e) => e.text).join("");
|
|
167
|
+
return { role: a, content: l };
|
|
162
168
|
});
|
|
163
|
-
if (this.#
|
|
164
|
-
const
|
|
165
|
-
if (
|
|
166
|
-
const
|
|
167
|
-
(e,
|
|
169
|
+
if (this.#n && !t.some((n) => n.role === "system") && t.unshift({ role: "system", content: this.#n }), this.modelName.toLowerCase().includes("gemma")) {
|
|
170
|
+
const n = t.findIndex((a) => a.role === "system");
|
|
171
|
+
if (n !== -1) {
|
|
172
|
+
const a = t[n], l = t.findIndex(
|
|
173
|
+
(e, r) => e.role === "user" && r > n
|
|
168
174
|
);
|
|
169
|
-
l !== -1 ? (t[l].content =
|
|
175
|
+
l !== -1 ? (t[l].content = a.content + `
|
|
170
176
|
|
|
171
|
-
` + t[l].content, t.splice(
|
|
177
|
+
` + t[l].content, t.splice(n, 1)) : (a.content += `
|
|
172
178
|
|
|
173
|
-
`,
|
|
179
|
+
`, a.role = "user");
|
|
174
180
|
}
|
|
175
181
|
}
|
|
176
182
|
return t;
|
|
177
183
|
}
|
|
178
184
|
}
|
|
179
|
-
async function
|
|
180
|
-
const { dtype: t = "q8", branch:
|
|
181
|
-
let
|
|
182
|
-
const l = `transformers_model_files_${g}_${t}_${
|
|
185
|
+
async function M(g, o = {}) {
|
|
186
|
+
const { dtype: t = "q8", branch: n = "main" } = o;
|
|
187
|
+
let a = null;
|
|
188
|
+
const l = `transformers_model_files_${g}_${t}_${n}`;
|
|
183
189
|
try {
|
|
184
190
|
const s = localStorage.getItem(l);
|
|
185
191
|
if (s) {
|
|
186
|
-
|
|
187
|
-
const { timestamp: p, files: u } =
|
|
192
|
+
a = JSON.parse(s);
|
|
193
|
+
const { timestamp: p, files: u } = a, v = 1440 * 60 * 1e3;
|
|
188
194
|
if (Date.now() - p < v)
|
|
189
195
|
return u;
|
|
190
196
|
}
|
|
191
197
|
} catch (s) {
|
|
192
198
|
console.warn("Failed to read from localStorage cache:", s);
|
|
193
199
|
}
|
|
194
|
-
const e = `https://huggingface.co/api/models/${g}/tree/${
|
|
195
|
-
let
|
|
200
|
+
const e = `https://huggingface.co/api/models/${g}/tree/${n}?recursive=true`;
|
|
201
|
+
let r;
|
|
196
202
|
try {
|
|
197
|
-
if (
|
|
198
|
-
throw new Error(`Manifest fetch failed: ${
|
|
203
|
+
if (r = await fetch(e), !r.ok)
|
|
204
|
+
throw new Error(`Manifest fetch failed: ${r.status}`);
|
|
199
205
|
} catch (s) {
|
|
200
|
-
if (
|
|
206
|
+
if (a)
|
|
201
207
|
return console.warn(
|
|
202
208
|
"Failed to fetch manifest from network, falling back to cached data (expired):",
|
|
203
209
|
s
|
|
204
|
-
),
|
|
210
|
+
), a.files;
|
|
205
211
|
throw s;
|
|
206
212
|
}
|
|
207
|
-
const c = await
|
|
208
|
-
|
|
213
|
+
const c = await r.json(), f = new Map(c.map((s) => [s.path, s.size])), d = [], h = (s) => f.has(s), i = (s) => h(s) ? (d.push({ path: s, size: f.get(s) }), !0) : !1;
|
|
214
|
+
i("config.json"), i("generation_config.json"), i("preprocessor_config.json"), h("tokenizer.json") ? (i("tokenizer.json"), i("tokenizer_config.json")) : (i("tokenizer_config.json"), i("special_tokens_map.json"), i("vocab.json"), i("merges.txt"), i("vocab.txt"));
|
|
209
215
|
const w = "onnx";
|
|
210
216
|
let m = [];
|
|
211
217
|
t === "fp32" ? m = [""] : t === "quantized" ? m = ["_quantized"] : (m = [`_${t}`], t === "q8" && m.push(""));
|
|
@@ -228,11 +234,11 @@ async function $(g, n = {}) {
|
|
|
228
234
|
(s) => s.includes("decoder_model_merged")
|
|
229
235
|
), b = _.filter((s) => !(x && s.includes("decoder_model") && !s.includes("merged")));
|
|
230
236
|
for (const s of b) {
|
|
231
|
-
|
|
237
|
+
i(s);
|
|
232
238
|
const p = `${s}_data`;
|
|
233
|
-
if (
|
|
239
|
+
if (i(p)) {
|
|
234
240
|
let u = 1;
|
|
235
|
-
for (;
|
|
241
|
+
for (; i(`${p}_${u}`); )
|
|
236
242
|
u++;
|
|
237
243
|
}
|
|
238
244
|
}
|
|
@@ -250,5 +256,5 @@ async function $(g, n = {}) {
|
|
|
250
256
|
return d;
|
|
251
257
|
}
|
|
252
258
|
export {
|
|
253
|
-
|
|
259
|
+
I as default
|
|
254
260
|
};
|
package/dot_env.json
CHANGED
|
@@ -7,5 +7,15 @@
|
|
|
7
7
|
"reCaptchaSiteKey": "",
|
|
8
8
|
"useLimitedUseAppCheckTokens": true,
|
|
9
9
|
"device": "webgpu",
|
|
10
|
-
"dtype": "q4f16"
|
|
10
|
+
"dtype": "q4f16",
|
|
11
|
+
"env": {
|
|
12
|
+
"allowRemoteModels": true,
|
|
13
|
+
"backends": {
|
|
14
|
+
"onnx": {
|
|
15
|
+
"wasm": {
|
|
16
|
+
"wasmPaths": ""
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
11
21
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prompt-api-polyfill",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, OpenAI API, or Transformers.js.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/prompt-api-polyfill.js",
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
"vite": "^7.3.1"
|
|
50
50
|
},
|
|
51
51
|
"dependencies": {
|
|
52
|
-
"@google/genai": "^1.
|
|
52
|
+
"@google/genai": "^1.42.0",
|
|
53
53
|
"@huggingface/transformers": "^3.8.1",
|
|
54
54
|
"firebase": "^12.9.0",
|
|
55
55
|
"openai": "^6.22.0"
|