prompt-api-polyfill 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,451 +0,0 @@
1
- import {
2
- pipeline,
3
- TextStreamer,
4
- } from 'https://esm.run/@huggingface/transformers';
5
- import PolyfillBackend from './base.js';
6
- import { DEFAULT_MODELS } from './defaults.js';
7
-
8
- /**
9
- * Transformers.js (ONNX Runtime) Backend
10
- */
11
- export default class TransformersBackend extends PolyfillBackend {
12
- #generator;
13
- #tokenizer;
14
- #device;
15
- #dtype;
16
- #systemInstruction;
17
-
18
- constructor(config = {}) {
19
- super(config.modelName || DEFAULT_MODELS.transformers.modelName);
20
- this.#device =
21
- config.device || DEFAULT_MODELS.transformers.device || 'webgpu';
22
- this.#dtype = config.dtype || DEFAULT_MODELS.transformers.dtype || 'q4f16';
23
- }
24
-
25
- /**
26
- * Loaded models can be large, so we initialize them lazily.
27
- * @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
28
- * @returns {Promise<Object>} The generator.
29
- */
30
- async #ensureGenerator(monitorTarget) {
31
- if (!this.#generator) {
32
- const files = new Map();
33
- const modelFiles = await resolveModelFiles(this.modelName, {
34
- dtype: this.#dtype,
35
- });
36
- for (const { path, size } of modelFiles) {
37
- files.set(path, { loaded: 0, total: size });
38
- }
39
-
40
- const dispatch = (loaded) => {
41
- if (!monitorTarget) {
42
- return;
43
- }
44
- // Round to nearest 1/0x10000 (65536) as required by WPT
45
- const precision = 1 / 65536;
46
- const roundedLoaded = Math.floor(loaded / precision) * precision;
47
-
48
- // Ensure strict monotonicity using the property set by the polyfill
49
- if (roundedLoaded <= monitorTarget.__lastProgressLoaded) {
50
- return;
51
- }
52
-
53
- monitorTarget.dispatchEvent(
54
- new ProgressEvent('downloadprogress', {
55
- loaded: roundedLoaded,
56
- total: 1,
57
- lengthComputable: true,
58
- })
59
- );
60
- monitorTarget.__lastProgressLoaded = roundedLoaded;
61
- };
62
-
63
- const progress_callback = (data) => {
64
- if (data.status === 'initiate') {
65
- if (files.has(data.file)) {
66
- const fileData = files.get(data.file);
67
- // Update with actual size if available, otherwise keep pre-fetched
68
- if (data.total) {
69
- fileData.total = data.total;
70
- }
71
- } else {
72
- files.set(data.file, { loaded: 0, total: data.total || 0 });
73
- }
74
- } else if (data.status === 'progress') {
75
- if (files.has(data.file)) {
76
- files.get(data.file).loaded = data.loaded;
77
- }
78
- } else if (data.status === 'done') {
79
- if (files.has(data.file)) {
80
- const fileData = files.get(data.file);
81
- fileData.loaded = fileData.total;
82
- }
83
- } else if (data.status === 'ready') {
84
- dispatch(1);
85
- return;
86
- }
87
-
88
- if (data.status === 'progress' || data.status === 'done') {
89
- let totalLoaded = 0;
90
- let totalSize = 0;
91
- for (const { loaded, total } of files.values()) {
92
- totalLoaded += loaded;
93
- totalSize += total;
94
- }
95
-
96
- if (totalSize > 0) {
97
- const globalProgress = totalLoaded / totalSize;
98
- // Cap at slightly less than 1.0 until 'ready'
99
- dispatch(Math.min(globalProgress, 0.9999));
100
- }
101
- }
102
- };
103
-
104
- // Initial 0% progress
105
- dispatch(0);
106
-
107
- this.#generator = await pipeline('text-generation', this.modelName, {
108
- device: this.#device,
109
- dtype: this.#dtype,
110
- progress_callback,
111
- });
112
- this.#tokenizer = this.#generator.tokenizer;
113
- }
114
- return this.#generator;
115
- }
116
-
117
- /**
118
- * Checks if the backend is available given the options.
119
- * @param {Object} options - LanguageModel options.
120
- * @returns {string} 'available' or 'unavailable'.
121
- */
122
- static availability(options) {
123
- if (options?.expectedInputs && Array.isArray(options.expectedInputs)) {
124
- for (const input of options.expectedInputs) {
125
- if (input.type === 'audio' || input.type === 'image') {
126
- return 'unavailable';
127
- }
128
- }
129
- }
130
- return 'available';
131
- }
132
-
133
- /**
134
- * Creates a new session.
135
- * @param {Object} options - LanguageModel options.
136
- * @param {Object} sessionParams - Session parameters.
137
- * @param {EventTarget} [monitorTarget] - The event target to dispatch download progress events to.
138
- * @returns {Promise<Object>} The generator.
139
- */
140
- async createSession(options, sessionParams, monitorTarget) {
141
- if (options.responseConstraint) {
142
- console.warn(
143
- "The `responseConstraint` flag isn't supported by the Transformers.js backend and was ignored."
144
- );
145
- }
146
- // Initializing the generator can be slow, so we do it lazily or here.
147
- // For now, let's trigger the loading.
148
- await this.#ensureGenerator(monitorTarget);
149
-
150
- // We don't really have "sessions" in the same way Gemini does,
151
- // but we can store the generation config.
152
- this.generationConfig = {
153
- max_new_tokens: 512, // Default limit
154
- temperature: sessionParams.generationConfig?.temperature || 1.0,
155
- top_p: 1.0,
156
- do_sample: sessionParams.generationConfig?.temperature > 0,
157
- return_full_text: false,
158
- };
159
- this.#systemInstruction = sessionParams.systemInstruction;
160
-
161
- return this.#generator;
162
- }
163
-
164
- async generateContent(contents) {
165
- const generator = await this.#ensureGenerator();
166
- const messages = this.#contentsToMessages(contents);
167
- const prompt = this.#tokenizer.apply_chat_template(messages, {
168
- tokenize: false,
169
- add_generation_prompt: true,
170
- });
171
- const output = await generator(prompt, {
172
- ...this.generationConfig,
173
- add_special_tokens: false,
174
- });
175
- const text = output[0].generated_text;
176
-
177
- // Approximate usage
178
- const usage = await this.countTokens(contents);
179
-
180
- return { text, usage };
181
- }
182
-
183
- async generateContentStream(contents) {
184
- const generator = await this.#ensureGenerator();
185
- const messages = this.#contentsToMessages(contents);
186
- const prompt = this.#tokenizer.apply_chat_template(messages, {
187
- tokenize: false,
188
- add_generation_prompt: true,
189
- });
190
-
191
- const queue = [];
192
- let resolveSignal;
193
- let promise = new Promise((r) => (resolveSignal = r));
194
- let isDone = false;
195
-
196
- const on_token_callback = (text) => {
197
- queue.push(text);
198
- if (resolveSignal) {
199
- resolveSignal();
200
- resolveSignal = null;
201
- }
202
- };
203
-
204
- const streamer = new TextStreamer(this.#tokenizer, {
205
- skip_prompt: true,
206
- skip_special_tokens: true,
207
- callback_function: on_token_callback,
208
- });
209
-
210
- const generationPromise = generator(prompt, {
211
- ...this.generationConfig,
212
- add_special_tokens: false,
213
- streamer,
214
- });
215
-
216
- generationPromise
217
- .then(() => {
218
- isDone = true;
219
- if (resolveSignal) {
220
- resolveSignal();
221
- resolveSignal = null;
222
- }
223
- })
224
- .catch((err) => {
225
- console.error('[Transformers.js] Generation error:', err);
226
- isDone = true;
227
- if (resolveSignal) {
228
- resolveSignal();
229
- resolveSignal = null;
230
- }
231
- });
232
-
233
- return (async function* () {
234
- while (true) {
235
- if (queue.length === 0 && !isDone) {
236
- if (!resolveSignal) {
237
- promise = new Promise((r) => (resolveSignal = r));
238
- }
239
- await promise;
240
- }
241
-
242
- while (queue.length > 0) {
243
- const newText = queue.shift();
244
- yield {
245
- text: () => newText,
246
- usageMetadata: { totalTokenCount: 0 },
247
- };
248
- }
249
-
250
- if (isDone) {
251
- break;
252
- }
253
- }
254
- })();
255
- }
256
-
257
- async countTokens(contents) {
258
- await this.#ensureGenerator();
259
- const messages = this.#contentsToMessages(contents);
260
- const input_ids = this.#tokenizer.apply_chat_template(messages, {
261
- tokenize: true,
262
- add_generation_prompt: false,
263
- return_tensor: false,
264
- });
265
- return input_ids.length;
266
- }
267
-
268
- #contentsToMessages(contents) {
269
- const messages = contents.map((c) => {
270
- let role =
271
- c.role === 'model'
272
- ? 'assistant'
273
- : c.role === 'system'
274
- ? 'system'
275
- : 'user';
276
- const content = c.parts.map((p) => p.text).join('');
277
- return { role, content };
278
- });
279
-
280
- if (this.#systemInstruction && !messages.some((m) => m.role === 'system')) {
281
- messages.unshift({ role: 'system', content: this.#systemInstruction });
282
- }
283
-
284
- if (this.modelName.toLowerCase().includes('gemma')) {
285
- const systemIndex = messages.findIndex((m) => m.role === 'system');
286
- if (systemIndex !== -1) {
287
- const systemMsg = messages[systemIndex];
288
- const nextUserIndex = messages.findIndex(
289
- (m, i) => m.role === 'user' && i > systemIndex
290
- );
291
- if (nextUserIndex !== -1) {
292
- messages[nextUserIndex].content =
293
- systemMsg.content + '\n\n' + messages[nextUserIndex].content;
294
- messages.splice(systemIndex, 1);
295
- } else {
296
- // If there's no user message after the system message,
297
- // just convert the system message to a user message.
298
- systemMsg.content += '\n\n';
299
- systemMsg.role = 'user';
300
- }
301
- }
302
- }
303
-
304
- return messages;
305
- }
306
- }
307
-
308
- /**
309
- * Exact replication of Transformers.js file resolution logic using HF Tree API.
310
- * @param {string} modelId - The Hugging Face model ID.
311
- * @param {object} options - Configuration options.
312
- * @returns {Promise<Object[]>} Array of { path, size } objects.
313
- */
314
- async function resolveModelFiles(modelId, options = {}) {
315
- const { dtype = 'q8', branch = 'main' } = options;
316
-
317
- let cachedData = null;
318
- const cacheKey = `transformers_model_files_${modelId}_${dtype}_${branch}`;
319
- try {
320
- const cached = localStorage.getItem(cacheKey);
321
- if (cached) {
322
- cachedData = JSON.parse(cached);
323
- const { timestamp, files } = cachedData;
324
- const oneDay = 24 * 60 * 60 * 1000;
325
- if (Date.now() - timestamp < oneDay) {
326
- return files;
327
- }
328
- }
329
- } catch (e) {
330
- console.warn('Failed to read from localStorage cache:', e);
331
- }
332
-
333
- const manifestUrl = `https://huggingface.co/api/models/${modelId}/tree/${branch}?recursive=true`;
334
-
335
- let response;
336
- try {
337
- response = await fetch(manifestUrl);
338
- if (!response.ok) {
339
- throw new Error(`Manifest fetch failed: ${response.status}`);
340
- }
341
- } catch (e) {
342
- if (cachedData) {
343
- console.warn(
344
- `Failed to fetch manifest from network, falling back to cached data (expired):`,
345
- e
346
- );
347
- return cachedData.files;
348
- }
349
- throw e;
350
- }
351
-
352
- const fileTree = await response.json();
353
- const fileMap = new Map(fileTree.map((f) => [f.path, f.size]));
354
- const finalFiles = [];
355
-
356
- // Helper: check existence and return { path, size }
357
- const exists = (path) => fileMap.has(path);
358
- const add = (path) => {
359
- if (exists(path)) {
360
- finalFiles.push({ path, size: fileMap.get(path) });
361
- return true;
362
- }
363
- return false;
364
- };
365
-
366
- // --- 1. Configs (Always Required) ---
367
- add('config.json');
368
- add('generation_config.json');
369
- add('preprocessor_config.json');
370
-
371
- // --- 2. Tokenizer Resolution ---
372
- if (exists('tokenizer.json')) {
373
- add('tokenizer.json');
374
- add('tokenizer_config.json');
375
- } else {
376
- // Fallback: Legacy tokenizer files
377
- add('tokenizer_config.json');
378
- add('special_tokens_map.json');
379
- add('vocab.json');
380
- add('merges.txt');
381
- add('vocab.txt');
382
- }
383
-
384
- // --- 3. ONNX Model Resolution ---
385
- const onnxFolder = 'onnx';
386
-
387
- let suffixes = [];
388
- if (dtype === 'fp32') {
389
- suffixes = [''];
390
- } else if (dtype === 'quantized') {
391
- suffixes = ['_quantized'];
392
- } else {
393
- suffixes = [`_${dtype}`];
394
- if (dtype === 'q8') {
395
- suffixes.push('');
396
- }
397
- }
398
-
399
- let components = [
400
- 'model',
401
- 'encoder_model',
402
- 'decoder_model',
403
- 'decoder_model_merged',
404
- ];
405
-
406
- const foundComponents = [];
407
- for (const c of components) {
408
- for (const s of suffixes) {
409
- const filename = `${onnxFolder}/${c}${s}.onnx`;
410
- if (exists(filename)) {
411
- foundComponents.push(filename);
412
- break;
413
- }
414
- }
415
- }
416
-
417
- const hasMerged = foundComponents.some((f) =>
418
- f.includes('decoder_model_merged')
419
- );
420
- const filteredComponents = foundComponents.filter((f) => {
421
- if (hasMerged && f.includes('decoder_model') && !f.includes('merged')) {
422
- return false;
423
- }
424
- return true;
425
- });
426
-
427
- for (const file of filteredComponents) {
428
- add(file);
429
- const dataFile = `${file}_data`;
430
- if (add(dataFile)) {
431
- let i = 1;
432
- while (add(`${dataFile}_${i}`)) {
433
- i++;
434
- }
435
- }
436
- }
437
-
438
- try {
439
- localStorage.setItem(
440
- cacheKey,
441
- JSON.stringify({
442
- timestamp: Date.now(),
443
- files: finalFiles,
444
- })
445
- );
446
- } catch (e) {
447
- console.warn('Failed to write to localStorage cache:', e);
448
- }
449
-
450
- return finalFiles;
451
- }
@@ -1,88 +0,0 @@
1
- import { Schema } from 'https://esm.run/firebase/ai';
2
-
3
- /**
4
- * Converts a standard JSON Schema object into a Firebase Vertex AI Schema class instance.
5
- * * @param {Object} jsonSchema - The standard JSON Schema object.
6
- * @returns {Schema} - The Firebase Vertex AI Schema instance.
7
- */
8
- export function convertJsonSchemaToVertexSchema(jsonSchema) {
9
- if (!jsonSchema) {
10
- return undefined;
11
- }
12
-
13
- // Extract common base parameters supported by all Schema types
14
- const baseParams = {
15
- description: jsonSchema.description,
16
- nullable: jsonSchema.nullable || false,
17
- format: jsonSchema.format,
18
- };
19
-
20
- // Handle "type": ["string", "null"] pattern common in JSON Schema
21
- if (Array.isArray(jsonSchema.type) && jsonSchema.type.includes('null')) {
22
- baseParams.nullable = true;
23
- jsonSchema.type = jsonSchema.type.find((t) => t !== 'null');
24
- }
25
-
26
- // SWITCH based on schema type
27
- switch (jsonSchema.type) {
28
- case 'string':
29
- // Check for Enums
30
- if (jsonSchema.enum && Array.isArray(jsonSchema.enum)) {
31
- return Schema.enumString({
32
- ...baseParams,
33
- enum: jsonSchema.enum,
34
- });
35
- }
36
- return Schema.string(baseParams);
37
-
38
- case 'number':
39
- return Schema.number(baseParams);
40
-
41
- case 'integer':
42
- return Schema.integer(baseParams);
43
-
44
- case 'boolean':
45
- return Schema.boolean(baseParams);
46
-
47
- case 'array':
48
- return Schema.array({
49
- ...baseParams,
50
- // Recursively convert the 'items' schema
51
- items: convertJsonSchemaToVertexSchema(jsonSchema.items),
52
- });
53
-
54
- case 'object':
55
- const properties = {};
56
- const allPropertyKeys = jsonSchema.properties
57
- ? Object.keys(jsonSchema.properties)
58
- : [];
59
-
60
- // Recursively convert each property
61
- allPropertyKeys.forEach((key) => {
62
- properties[key] = convertJsonSchemaToVertexSchema(
63
- jsonSchema.properties[key]
64
- );
65
- });
66
-
67
- // Calculate optionalProperties
68
- // JSON Schema uses "required" (allowlist), Vertex SDK uses "optionalProperties" (blocklist)
69
- const required = jsonSchema.required || [];
70
- const optionalProperties = allPropertyKeys.filter(
71
- (key) => !required.includes(key)
72
- );
73
-
74
- return Schema.object({
75
- ...baseParams,
76
- properties: properties,
77
- optionalProperties: optionalProperties,
78
- });
79
-
80
- default:
81
- // Fallback for unknown types or complex types not fully supported (like oneOf)
82
- // defaulting to string usually prevents crashes, but use with caution.
83
- console.warn(
84
- `Unsupported type: ${jsonSchema.type}, defaulting to string.`
85
- );
86
- return Schema.string(baseParams);
87
- }
88
- }