prompt-api-polyfill 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +27 -7
- package/dist/backends/firebase.js +1808 -0
- package/dist/backends/gemini.js +55 -0
- package/dist/backends/openai.js +199 -0
- package/dist/backends/transformers.js +254 -0
- package/{backends/base.js → dist/chunks/defaults-CNQngzSd.js} +29 -24
- package/dist/prompt-api-polyfill.js +1031 -0
- package/dot_env.json +3 -0
- package/package.json +21 -16
- package/async-iterator-polyfill.js +0 -16
- package/backends/defaults.js +0 -13
- package/backends/firebase.js +0 -49
- package/backends/gemini.js +0 -52
- package/backends/openai.js +0 -337
- package/backends/transformers.js +0 -451
- package/json-schema-converter.js +0 -88
- package/multimodal-converter.js +0 -383
- package/prompt-api-polyfill.js +0 -1467
package/dot_env.json
CHANGED
package/package.json
CHANGED
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prompt-api-polyfill",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Polyfill for the Prompt API (`LanguageModel`) backed by Firebase AI Logic, Gemini API, OpenAI API, or Transformers.js.",
|
|
5
5
|
"type": "module",
|
|
6
|
-
"main": "./prompt-api-polyfill.js",
|
|
7
|
-
"module": "./prompt-api-polyfill.js",
|
|
8
|
-
"browser": "./prompt-api-polyfill.js",
|
|
6
|
+
"main": "./dist/prompt-api-polyfill.js",
|
|
7
|
+
"module": "./dist/prompt-api-polyfill.js",
|
|
8
|
+
"browser": "./dist/prompt-api-polyfill.js",
|
|
9
9
|
"exports": {
|
|
10
|
-
".": "./prompt-api-polyfill.js"
|
|
10
|
+
".": "./dist/prompt-api-polyfill.js"
|
|
11
11
|
},
|
|
12
12
|
"files": [
|
|
13
|
-
"
|
|
14
|
-
"json
|
|
15
|
-
"multimodal-converter.js",
|
|
16
|
-
"prompt-api-polyfill.js",
|
|
17
|
-
"dot_env.json",
|
|
18
|
-
"backends/"
|
|
13
|
+
"dist/",
|
|
14
|
+
"dot_env.json"
|
|
19
15
|
],
|
|
20
16
|
"sideEffects": true,
|
|
21
17
|
"keywords": [
|
|
@@ -36,17 +32,26 @@
|
|
|
36
32
|
"bugs": {
|
|
37
33
|
"url": "https://github.com/GoogleChromeLabs/web-ai-demos/issues"
|
|
38
34
|
},
|
|
39
|
-
"homepage": "https://github.com/GoogleChromeLabs/web-ai-demos/tree/main/prompt-api-polyfill
|
|
35
|
+
"homepage": "https://github.com/GoogleChromeLabs/web-ai-demos/tree/main/prompt-api-polyfill",
|
|
40
36
|
"license": "Apache-2.0",
|
|
41
37
|
"scripts": {
|
|
42
|
-
"start": "
|
|
38
|
+
"start": "vite",
|
|
39
|
+
"build": "vite build",
|
|
40
|
+
"prepublishOnly": "npm run build",
|
|
43
41
|
"sync:wpt": "node scripts/sync-wpt.js",
|
|
44
42
|
"generate:wpt": "node scripts/list-backends.js && node scripts/generate-wpt-wrappers.js",
|
|
45
|
-
"test:wpt": "npm run sync:wpt && npm run generate:wpt && npx
|
|
43
|
+
"test:wpt": "npm run sync:wpt && npm run generate:wpt && npx vite --open /tests/wpt/index.html",
|
|
46
44
|
"fix": "npx prettier --write ."
|
|
47
45
|
},
|
|
48
46
|
"devDependencies": {
|
|
49
|
-
"
|
|
50
|
-
"prettier-plugin-curly": "^0.4.1"
|
|
47
|
+
"prettier": "^3.8.1",
|
|
48
|
+
"prettier-plugin-curly": "^0.4.1",
|
|
49
|
+
"vite": "^7.3.1"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"@google/genai": "^1.41.0",
|
|
53
|
+
"@huggingface/transformers": "^3.8.1",
|
|
54
|
+
"firebase": "^12.9.0",
|
|
55
|
+
"openai": "^6.22.0"
|
|
51
56
|
}
|
|
52
57
|
}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
if (!ReadableStream.prototype[Symbol.asyncIterator]) {
|
|
2
|
-
ReadableStream.prototype[Symbol.asyncIterator] = async function* () {
|
|
3
|
-
const reader = this.getReader();
|
|
4
|
-
try {
|
|
5
|
-
while (true) {
|
|
6
|
-
const { done, value } = await reader.read();
|
|
7
|
-
if (done) {
|
|
8
|
-
return;
|
|
9
|
-
}
|
|
10
|
-
yield value;
|
|
11
|
-
}
|
|
12
|
-
} finally {
|
|
13
|
-
reader.releaseLock();
|
|
14
|
-
}
|
|
15
|
-
};
|
|
16
|
-
}
|
package/backends/defaults.js
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Default model versions for each backend.
|
|
3
|
-
*/
|
|
4
|
-
export const DEFAULT_MODELS = {
|
|
5
|
-
firebase: { modelName: 'gemini-2.5-flash-lite' },
|
|
6
|
-
gemini: { modelName: 'gemini-2.0-flash-lite-preview-02-05' },
|
|
7
|
-
openai: { modelName: 'gpt-4o' },
|
|
8
|
-
transformers: {
|
|
9
|
-
modelName: 'onnx-community/gemma-3-1b-it-ONNX-GQA',
|
|
10
|
-
device: 'webgpu',
|
|
11
|
-
dtype: 'q4f16',
|
|
12
|
-
},
|
|
13
|
-
};
|
package/backends/firebase.js
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import { initializeApp } from 'https://esm.run/firebase/app';
|
|
2
|
-
import {
|
|
3
|
-
getAI,
|
|
4
|
-
getGenerativeModel,
|
|
5
|
-
GoogleAIBackend,
|
|
6
|
-
InferenceMode,
|
|
7
|
-
} from 'https://esm.run/firebase/ai';
|
|
8
|
-
import PolyfillBackend from './base.js';
|
|
9
|
-
import { DEFAULT_MODELS } from './defaults.js';
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Firebase AI Logic Backend
|
|
13
|
-
*/
|
|
14
|
-
export default class FirebaseBackend extends PolyfillBackend {
|
|
15
|
-
#model;
|
|
16
|
-
#sessionParams;
|
|
17
|
-
|
|
18
|
-
constructor(config) {
|
|
19
|
-
super(config.modelName || DEFAULT_MODELS.firebase.modelName);
|
|
20
|
-
this.ai = getAI(initializeApp(config), { backend: new GoogleAIBackend() });
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
createSession(_options, sessionParams) {
|
|
24
|
-
this.#sessionParams = sessionParams;
|
|
25
|
-
this.#model = getGenerativeModel(this.ai, {
|
|
26
|
-
mode: InferenceMode.ONLY_IN_CLOUD,
|
|
27
|
-
inCloudParams: sessionParams,
|
|
28
|
-
});
|
|
29
|
-
return this.#model;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
async generateContent(contents) {
|
|
33
|
-
const result = await this.#model.generateContent({ contents });
|
|
34
|
-
const usage = result.response.usageMetadata?.promptTokenCount || 0;
|
|
35
|
-
return { text: result.response.text(), usage };
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
async generateContentStream(contents) {
|
|
39
|
-
const result = await this.#model.generateContentStream({ contents });
|
|
40
|
-
return result.stream;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
async countTokens(contents) {
|
|
44
|
-
const { totalTokens } = await this.#model.countTokens({
|
|
45
|
-
contents,
|
|
46
|
-
});
|
|
47
|
-
return totalTokens;
|
|
48
|
-
}
|
|
49
|
-
}
|
package/backends/gemini.js
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import { GoogleGenerativeAI } from 'https://esm.run/@google/generative-ai';
|
|
2
|
-
import PolyfillBackend from './base.js';
|
|
3
|
-
import { DEFAULT_MODELS } from './defaults.js';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Google Gemini API Backend
|
|
7
|
-
*/
|
|
8
|
-
export default class GeminiBackend extends PolyfillBackend {
|
|
9
|
-
#model;
|
|
10
|
-
#sessionParams;
|
|
11
|
-
|
|
12
|
-
constructor(config) {
|
|
13
|
-
super(config.modelName || DEFAULT_MODELS.gemini.modelName);
|
|
14
|
-
this.genAI = new GoogleGenerativeAI(config.apiKey);
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
createSession(options, sessionParams) {
|
|
18
|
-
this.#sessionParams = sessionParams;
|
|
19
|
-
const modelParams = {
|
|
20
|
-
model: options.modelName || this.modelName,
|
|
21
|
-
generationConfig: sessionParams.generationConfig,
|
|
22
|
-
systemInstruction: sessionParams.systemInstruction,
|
|
23
|
-
};
|
|
24
|
-
// Clean undefined systemInstruction
|
|
25
|
-
if (!modelParams.systemInstruction) {
|
|
26
|
-
delete modelParams.systemInstruction;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
this.#model = this.genAI.getGenerativeModel(modelParams);
|
|
30
|
-
return this.#model;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async generateContent(contents) {
|
|
34
|
-
// Gemini SDK expects { role, parts: [...] } which matches our internal structure
|
|
35
|
-
const result = await this.#model.generateContent({ contents });
|
|
36
|
-
const response = await result.response;
|
|
37
|
-
const usage = response.usageMetadata?.promptTokenCount || 0;
|
|
38
|
-
return { text: response.text(), usage };
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
async generateContentStream(contents) {
|
|
42
|
-
const result = await this.#model.generateContentStream({ contents });
|
|
43
|
-
return result.stream;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async countTokens(contents) {
|
|
47
|
-
const { totalTokens } = await this.#model.countTokens({
|
|
48
|
-
contents,
|
|
49
|
-
});
|
|
50
|
-
return totalTokens;
|
|
51
|
-
}
|
|
52
|
-
}
|
package/backends/openai.js
DELETED
|
@@ -1,337 +0,0 @@
|
|
|
1
|
-
import OpenAI from 'https://esm.run/openai';
|
|
2
|
-
import PolyfillBackend from './base.js';
|
|
3
|
-
import { DEFAULT_MODELS } from './defaults.js';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* OpenAI API Backend
|
|
7
|
-
*/
|
|
8
|
-
export default class OpenAIBackend extends PolyfillBackend {
|
|
9
|
-
#model;
|
|
10
|
-
|
|
11
|
-
constructor(config) {
|
|
12
|
-
super(config.modelName || DEFAULT_MODELS.openai.modelName);
|
|
13
|
-
this.config = config;
|
|
14
|
-
this.openai = new OpenAI({
|
|
15
|
-
apiKey: config.apiKey,
|
|
16
|
-
dangerouslyAllowBrowser: true, // Required for client-side usage
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
static availability(options = {}) {
|
|
21
|
-
if (options.expectedInputs) {
|
|
22
|
-
const hasAudio = options.expectedInputs.some(
|
|
23
|
-
(input) => input.type === 'audio'
|
|
24
|
-
);
|
|
25
|
-
const hasImage = options.expectedInputs.some(
|
|
26
|
-
(input) => input.type === 'image'
|
|
27
|
-
);
|
|
28
|
-
if (hasAudio && hasImage) {
|
|
29
|
-
return 'unavailable';
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
return 'available';
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
createSession(options, sessionParams) {
|
|
36
|
-
// OpenAI doesn't have a "session" object like Gemini, so we return a context object
|
|
37
|
-
// tailored for our generate methods.
|
|
38
|
-
this.#model = {
|
|
39
|
-
model: options.modelName || this.modelName,
|
|
40
|
-
temperature: sessionParams.generationConfig?.temperature,
|
|
41
|
-
top_p: 1.0, // Default to 1.0 as topK is not directly supported the same way
|
|
42
|
-
systemInstruction: sessionParams.systemInstruction,
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
const config = sessionParams.generationConfig || {};
|
|
46
|
-
if (config.responseSchema) {
|
|
47
|
-
const { schema, wrapped } = this.#fixSchemaForOpenAI(
|
|
48
|
-
config.responseSchema
|
|
49
|
-
);
|
|
50
|
-
this.#model.response_format = {
|
|
51
|
-
type: 'json_schema',
|
|
52
|
-
json_schema: {
|
|
53
|
-
name: 'response',
|
|
54
|
-
strict: true,
|
|
55
|
-
schema: schema,
|
|
56
|
-
},
|
|
57
|
-
};
|
|
58
|
-
this.#model.response_wrapped = wrapped;
|
|
59
|
-
} else if (config.responseMimeType === 'application/json') {
|
|
60
|
-
this.#model.response_format = { type: 'json_object' };
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
return this.#model;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* OpenAI Structured Outputs require:
|
|
68
|
-
* 1. All fields in objects to be marked as 'required'.
|
|
69
|
-
* 2. Objects to have 'additionalProperties: false'.
|
|
70
|
-
* 3. The root must be an 'object'.
|
|
71
|
-
*/
|
|
72
|
-
#fixSchemaForOpenAI(schema) {
|
|
73
|
-
if (typeof schema !== 'object' || schema === null) {
|
|
74
|
-
return { schema, wrapped: false };
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const processNode = (node) => {
|
|
78
|
-
if (node.type === 'object') {
|
|
79
|
-
if (node.properties) {
|
|
80
|
-
node.additionalProperties = false;
|
|
81
|
-
node.required = Object.keys(node.properties);
|
|
82
|
-
for (const key in node.properties) {
|
|
83
|
-
processNode(node.properties[key]);
|
|
84
|
-
}
|
|
85
|
-
} else {
|
|
86
|
-
node.additionalProperties = false;
|
|
87
|
-
node.required = [];
|
|
88
|
-
}
|
|
89
|
-
} else if (node.type === 'array' && node.items) {
|
|
90
|
-
processNode(node.items);
|
|
91
|
-
}
|
|
92
|
-
return node;
|
|
93
|
-
};
|
|
94
|
-
|
|
95
|
-
// Deep clone to avoid side effects
|
|
96
|
-
const cloned = JSON.parse(JSON.stringify(schema));
|
|
97
|
-
|
|
98
|
-
if (cloned.type !== 'object') {
|
|
99
|
-
// Wrap in object as OpenAI requires object root
|
|
100
|
-
return {
|
|
101
|
-
wrapped: true,
|
|
102
|
-
schema: {
|
|
103
|
-
type: 'object',
|
|
104
|
-
properties: { value: cloned },
|
|
105
|
-
required: ['value'],
|
|
106
|
-
additionalProperties: false,
|
|
107
|
-
},
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return {
|
|
112
|
-
wrapped: false,
|
|
113
|
-
schema: processNode(cloned),
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
#validateContent(messages) {
|
|
118
|
-
let hasImage = false;
|
|
119
|
-
let hasAudio = false;
|
|
120
|
-
|
|
121
|
-
for (const msg of messages) {
|
|
122
|
-
if (Array.isArray(msg.content)) {
|
|
123
|
-
for (const part of msg.content) {
|
|
124
|
-
if (part.type === 'image_url') {
|
|
125
|
-
hasImage = true;
|
|
126
|
-
}
|
|
127
|
-
if (part.type === 'input_audio') {
|
|
128
|
-
hasAudio = true;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (hasImage && hasAudio) {
|
|
135
|
-
throw new Error(
|
|
136
|
-
'OpenAI backend does not support mixing images and audio in the same session. Please start a new session.'
|
|
137
|
-
);
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
return { hasImage, hasAudio };
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
#routeModel(hasAudio) {
|
|
144
|
-
// If the user explicitly provided a model in the session options, respect it.
|
|
145
|
-
// Otherwise, pick based on content.
|
|
146
|
-
if (this.#model.model !== this.modelName) {
|
|
147
|
-
return this.#model.model;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
return hasAudio ? `${this.modelName}-audio-preview` : this.modelName;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
async generateContent(contents) {
|
|
154
|
-
const { messages } = this.#convertContentsToInput(
|
|
155
|
-
contents,
|
|
156
|
-
this.#model.systemInstruction
|
|
157
|
-
);
|
|
158
|
-
const { hasAudio } = this.#validateContent(messages);
|
|
159
|
-
const model = this.#routeModel(hasAudio);
|
|
160
|
-
|
|
161
|
-
if (
|
|
162
|
-
model === `${this.modelName}-audio-preview` &&
|
|
163
|
-
this.#model.response_format
|
|
164
|
-
) {
|
|
165
|
-
throw new DOMException(
|
|
166
|
-
`OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
|
|
167
|
-
'NotSupportedError'
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
const options = {
|
|
172
|
-
model: model,
|
|
173
|
-
messages: messages,
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
if (this.#model.temperature > 0) {
|
|
177
|
-
options.temperature = this.#model.temperature;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
if (this.#model.response_format) {
|
|
181
|
-
options.response_format = this.#model.response_format;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
try {
|
|
185
|
-
const response = await this.openai.chat.completions.create(options);
|
|
186
|
-
|
|
187
|
-
const choice = response.choices[0];
|
|
188
|
-
let text = choice.message.content;
|
|
189
|
-
|
|
190
|
-
if (this.#model.response_wrapped && text) {
|
|
191
|
-
try {
|
|
192
|
-
const parsed = JSON.parse(text);
|
|
193
|
-
if (parsed && typeof parsed === 'object' && 'value' in parsed) {
|
|
194
|
-
text = JSON.stringify(parsed.value);
|
|
195
|
-
}
|
|
196
|
-
} catch {
|
|
197
|
-
// Ignore parsing error, return raw text
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
const usage = response.usage?.prompt_tokens || 0;
|
|
202
|
-
|
|
203
|
-
return { text, usage };
|
|
204
|
-
} catch (error) {
|
|
205
|
-
console.error('OpenAI Generate Content Error:', error);
|
|
206
|
-
throw error;
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
async generateContentStream(contents) {
|
|
211
|
-
const { messages } = this.#convertContentsToInput(
|
|
212
|
-
contents,
|
|
213
|
-
this.#model.systemInstruction
|
|
214
|
-
);
|
|
215
|
-
const { hasAudio } = this.#validateContent(messages);
|
|
216
|
-
const model = this.#routeModel(hasAudio);
|
|
217
|
-
|
|
218
|
-
if (
|
|
219
|
-
model === `${this.modelName}-audio-preview` &&
|
|
220
|
-
this.#model.response_format
|
|
221
|
-
) {
|
|
222
|
-
throw new DOMException(
|
|
223
|
-
`OpenAI audio model ('${model}') does not support structured outputs (responseConstraint).`,
|
|
224
|
-
'NotSupportedError'
|
|
225
|
-
);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
const options = {
|
|
229
|
-
model: model,
|
|
230
|
-
messages: messages,
|
|
231
|
-
stream: true,
|
|
232
|
-
};
|
|
233
|
-
|
|
234
|
-
if (this.#model.temperature > 0) {
|
|
235
|
-
options.temperature = this.#model.temperature;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if (this.#model.response_format) {
|
|
239
|
-
options.response_format = this.#model.response_format;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
try {
|
|
243
|
-
const stream = await this.openai.chat.completions.create(options);
|
|
244
|
-
|
|
245
|
-
// Convert OpenAI stream to an AsyncIterable that yields chunks
|
|
246
|
-
return (async function* () {
|
|
247
|
-
let firstChunk = true;
|
|
248
|
-
for await (const chunk of stream) {
|
|
249
|
-
let text = chunk.choices[0]?.delta?.content;
|
|
250
|
-
if (text) {
|
|
251
|
-
// Note: Unwrapping a wrapped object in a stream is complex.
|
|
252
|
-
// For now, streaming wrapped results will yield the full JSON including the wrapper.
|
|
253
|
-
yield {
|
|
254
|
-
text: () => text,
|
|
255
|
-
usageMetadata: { totalTokenCount: 0 },
|
|
256
|
-
};
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
})();
|
|
260
|
-
} catch (error) {
|
|
261
|
-
console.error('OpenAI Generate Content Stream Error:', error);
|
|
262
|
-
throw error;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
async countTokens(contents) {
|
|
267
|
-
// OpenAI does not provide a public API endpoint for counting tokens before generation.
|
|
268
|
-
// Implementing countTokens strictly requires a tokenizer like `tiktoken`.
|
|
269
|
-
// For this initial implementation, we use a character-based approximation (e.g., text.length / 4)
|
|
270
|
-
// to avoid adding heavy WASM dependencies (`tiktoken`) to the polyfill.
|
|
271
|
-
let totalText = '';
|
|
272
|
-
|
|
273
|
-
if (Array.isArray(contents)) {
|
|
274
|
-
for (const content of contents) {
|
|
275
|
-
if (!content.parts) {
|
|
276
|
-
continue;
|
|
277
|
-
}
|
|
278
|
-
for (const part of content.parts) {
|
|
279
|
-
if (part.text) {
|
|
280
|
-
totalText += part.text;
|
|
281
|
-
} else if (part.inlineData) {
|
|
282
|
-
// Approximate image token cost (e.g., ~1000 chars worth)
|
|
283
|
-
totalText += ' '.repeat(1000);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
return Math.ceil(totalText.length / 4);
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
#convertContentsToInput(contents, systemInstruction) {
|
|
293
|
-
const messages = [];
|
|
294
|
-
|
|
295
|
-
// System instructions
|
|
296
|
-
if (systemInstruction) {
|
|
297
|
-
messages.push({
|
|
298
|
-
role: 'system',
|
|
299
|
-
content: systemInstruction,
|
|
300
|
-
});
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
for (const content of contents) {
|
|
304
|
-
const role = content.role === 'model' ? 'assistant' : 'user';
|
|
305
|
-
const contentParts = [];
|
|
306
|
-
|
|
307
|
-
for (const part of content.parts) {
|
|
308
|
-
if (part.text) {
|
|
309
|
-
contentParts.push({ type: 'text', text: part.text });
|
|
310
|
-
} else if (part.inlineData) {
|
|
311
|
-
const { data, mimeType } = part.inlineData;
|
|
312
|
-
if (mimeType.startsWith('image/')) {
|
|
313
|
-
contentParts.push({
|
|
314
|
-
type: 'image_url',
|
|
315
|
-
image_url: { url: `data:${mimeType};base64,${data}` },
|
|
316
|
-
});
|
|
317
|
-
} else if (mimeType.startsWith('audio/')) {
|
|
318
|
-
contentParts.push({
|
|
319
|
-
type: 'input_audio',
|
|
320
|
-
input_audio: {
|
|
321
|
-
data: data,
|
|
322
|
-
format: mimeType.split('/')[1] === 'mpeg' ? 'mp3' : 'wav',
|
|
323
|
-
},
|
|
324
|
-
});
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Simplification: if only one text part, just send string content for better compatibility
|
|
330
|
-
// but multimodal models usually prefer the array format.
|
|
331
|
-
// We'll keep the array format for consistency with multimodal inputs.
|
|
332
|
-
messages.push({ role, content: contentParts });
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
return { messages };
|
|
336
|
-
}
|
|
337
|
-
}
|