viscribe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +522 -0
- package/assets/black-v.png +0 -0
- package/assets/black-v.svg +5 -0
- package/assets/white-v.png +0 -0
- package/assets/white-v.svg +5 -0
- package/dist/index.cjs +826 -0
- package/dist/index.d.cts +135 -0
- package/dist/index.d.ts +135 -0
- package/dist/index.js +787 -0
- package/package.json +66 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,826 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.ts
|
|
31
|
+
var index_exports = {};
|
|
32
|
+
__export(index_exports, {
|
|
33
|
+
DEFAULT_MODEL: () => DEFAULT_MODEL,
|
|
34
|
+
StructuredOutputError: () => StructuredOutputError,
|
|
35
|
+
StructuredOutputFinishReasonError: () => StructuredOutputFinishReasonError,
|
|
36
|
+
StructuredOutputParseError: () => StructuredOutputParseError,
|
|
37
|
+
StructuredOutputRefusalError: () => StructuredOutputRefusalError,
|
|
38
|
+
StructuredOutputValidationError: () => StructuredOutputValidationError,
|
|
39
|
+
ViscribeAI: () => ViscribeAI,
|
|
40
|
+
buildSchemaFromFields: () => buildSchemaFromFields,
|
|
41
|
+
images: () => images_exports
|
|
42
|
+
});
|
|
43
|
+
module.exports = __toCommonJS(index_exports);
|
|
44
|
+
|
|
45
|
+
// src/images.ts
|
|
46
|
+
var images_exports = {};
|
|
47
|
+
__export(images_exports, {
|
|
48
|
+
ask: () => ask,
|
|
49
|
+
assertImagePathExists: () => assertImagePathExists,
|
|
50
|
+
buildImageSource: () => buildImageSource,
|
|
51
|
+
classify: () => classify,
|
|
52
|
+
compare: () => compare,
|
|
53
|
+
describe: () => describe,
|
|
54
|
+
extract: () => extract
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// src/client.ts
|
|
58
|
+
var import_openai = __toESM(require("openai"), 1);
|
|
59
|
+
|
|
60
|
+
// src/image-source.ts
|
|
61
|
+
var import_node_fs = require("fs");
|
|
62
|
+
var import_promises = require("fs/promises");
|
|
63
|
+
var import_node_url = require("url");
|
|
64
|
+
function buildImageSource(input) {
|
|
65
|
+
const provided = [
|
|
66
|
+
input.imageUrl !== void 0,
|
|
67
|
+
input.imageBase64 !== void 0,
|
|
68
|
+
input.imagePath !== void 0
|
|
69
|
+
].filter(Boolean).length;
|
|
70
|
+
if (provided !== 1) {
|
|
71
|
+
throw new Error(
|
|
72
|
+
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
if (input.imageUrl !== void 0) {
|
|
76
|
+
validateImageUrl(input.imageUrl);
|
|
77
|
+
return input.imageUrl;
|
|
78
|
+
}
|
|
79
|
+
if (input.imagePath !== void 0) {
|
|
80
|
+
const imageBytes = (0, import_node_fs.readFileSync)(input.imagePath);
|
|
81
|
+
const mimeType = detectImageMimeType(imageBytes);
|
|
82
|
+
return `data:${mimeType};base64,${imageBytes.toString("base64")}`;
|
|
83
|
+
}
|
|
84
|
+
if (input.imageBase64?.startsWith("data:")) {
|
|
85
|
+
validateDataUrl(input.imageBase64);
|
|
86
|
+
return input.imageBase64;
|
|
87
|
+
}
|
|
88
|
+
if (input.imageBase64 !== void 0) {
|
|
89
|
+
const imageBytes = decodeBase64(input.imageBase64);
|
|
90
|
+
const mimeType = detectImageMimeType(imageBytes);
|
|
91
|
+
return `data:${mimeType};base64,${input.imageBase64}`;
|
|
92
|
+
}
|
|
93
|
+
throw new Error(
|
|
94
|
+
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
async function assertImagePathExists(imagePath) {
|
|
98
|
+
const result = await (0, import_promises.stat)(imagePath);
|
|
99
|
+
if (!result.isFile()) {
|
|
100
|
+
throw new Error(`imagePath does not point to a file: ${imagePath}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
function validateImageUrl(imageUrl) {
|
|
104
|
+
let parsed;
|
|
105
|
+
try {
|
|
106
|
+
parsed = new import_node_url.URL(imageUrl);
|
|
107
|
+
} catch (error) {
|
|
108
|
+
throw new Error("imageUrl must be an absolute http:// or https:// URL.", {
|
|
109
|
+
cause: error
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
if (!["http:", "https:"].includes(parsed.protocol) || !parsed.host) {
|
|
113
|
+
throw new Error("imageUrl must be an absolute http:// or https:// URL.");
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function validateDataUrl(dataUrl) {
|
|
117
|
+
if (!dataUrl.includes(";base64,")) {
|
|
118
|
+
throw new Error("imageBase64 data URLs must include ';base64,'.");
|
|
119
|
+
}
|
|
120
|
+
const payload = dataUrl.split(",", 2)[1];
|
|
121
|
+
decodeBase64(payload);
|
|
122
|
+
}
|
|
123
|
+
function decodeBase64(payload) {
|
|
124
|
+
if (!payload || payload.length % 4 !== 0) {
|
|
125
|
+
throw new Error("imageBase64 must be valid base64 data.");
|
|
126
|
+
}
|
|
127
|
+
if (!/^[A-Za-z0-9+/]+={0,2}$/.test(payload)) {
|
|
128
|
+
throw new Error("imageBase64 must be valid base64 data.");
|
|
129
|
+
}
|
|
130
|
+
return Buffer.from(payload, "base64");
|
|
131
|
+
}
|
|
132
|
+
function detectImageMimeType(imageBytes) {
|
|
133
|
+
if (imageBytes.subarray(0, 3).equals(Buffer.from([255, 216, 255]))) {
|
|
134
|
+
return "image/jpeg";
|
|
135
|
+
}
|
|
136
|
+
if (imageBytes.subarray(0, 8).equals(Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]))) {
|
|
137
|
+
return "image/png";
|
|
138
|
+
}
|
|
139
|
+
if (imageBytes.subarray(0, 6).toString("ascii") === "GIF87a" || imageBytes.subarray(0, 6).toString("ascii") === "GIF89a") {
|
|
140
|
+
return "image/gif";
|
|
141
|
+
}
|
|
142
|
+
if (imageBytes.length >= 12 && imageBytes.subarray(0, 4).toString("ascii") === "RIFF" && imageBytes.subarray(8, 12).toString("ascii") === "WEBP") {
|
|
143
|
+
return "image/webp";
|
|
144
|
+
}
|
|
145
|
+
if (imageBytes.subarray(0, 2).toString("ascii") === "BM") {
|
|
146
|
+
return "image/bmp";
|
|
147
|
+
}
|
|
148
|
+
throw new Error("Unsupported image format. Use JPEG, PNG, GIF, WebP, or BMP.");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// src/prompts.ts
|
|
152
|
+
var EXTRACT_SYSTEM_PROMPT = `
|
|
153
|
+
You are an expert at extracting structured information from images.
|
|
154
|
+
|
|
155
|
+
CRITICAL INSTRUCTIONS:
|
|
156
|
+
- Respond with valid JSON only.
|
|
157
|
+
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
158
|
+
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
159
|
+
|
|
160
|
+
TASK:
|
|
161
|
+
- Analyze the image carefully and extract data according to the provided schema.
|
|
162
|
+
- Be precise and accurate.
|
|
163
|
+
- If a value is not visible or cannot be inferred from the image, use null
|
|
164
|
+
when the schema allows it.
|
|
165
|
+
- Ensure all required fields are included.
|
|
166
|
+
`.trim();
|
|
167
|
+
var DESCRIBE_SYSTEM_PROMPT = `
|
|
168
|
+
You are an expert image analyst.
|
|
169
|
+
|
|
170
|
+
CRITICAL INSTRUCTIONS:
|
|
171
|
+
- Respond with valid JSON only.
|
|
172
|
+
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
173
|
+
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
174
|
+
- Follow any user-provided instruction closely.
|
|
175
|
+
|
|
176
|
+
TASK:
|
|
177
|
+
- Provide a detailed, objective description of the image.
|
|
178
|
+
- Focus on main elements, context, and notable details.
|
|
179
|
+
- If tags are requested, generate up to 5 relevant tags.
|
|
180
|
+
`.trim();
|
|
181
|
+
var CLASSIFY_SYSTEM_PROMPT = `
|
|
182
|
+
You are an expert at image classification.
|
|
183
|
+
|
|
184
|
+
CRITICAL INSTRUCTIONS:
|
|
185
|
+
- Respond with valid JSON only.
|
|
186
|
+
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
187
|
+
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
188
|
+
|
|
189
|
+
TASK:
|
|
190
|
+
- Analyze the image and classify it according to the requested categories or task.
|
|
191
|
+
- Return only categories that are supported by the image.
|
|
192
|
+
- If the image cannot be classified into a requested category, return an empty list.
|
|
193
|
+
`.trim();
|
|
194
|
+
var ASK_SYSTEM_PROMPT = `
|
|
195
|
+
You are an expert at analyzing images and answering related questions.
|
|
196
|
+
|
|
197
|
+
CRITICAL INSTRUCTIONS:
|
|
198
|
+
- Respond with valid JSON only.
|
|
199
|
+
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
200
|
+
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
201
|
+
|
|
202
|
+
TASK:
|
|
203
|
+
- Carefully examine the provided image.
|
|
204
|
+
- Deliver a clear, accurate, and detailed response to the question.
|
|
205
|
+
- If the answer cannot be determined from the image, state that the information is not available.
|
|
206
|
+
`.trim();
|
|
207
|
+
var COMPARE_SYSTEM_PROMPT = `
|
|
208
|
+
You are an expert at comparing images.
|
|
209
|
+
|
|
210
|
+
CRITICAL INSTRUCTIONS:
|
|
211
|
+
- Respond with valid JSON only.
|
|
212
|
+
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
213
|
+
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
214
|
+
|
|
215
|
+
TASK:
|
|
216
|
+
- Analyze two provided images, focusing on their visual elements.
|
|
217
|
+
- Identify and describe similarities.
|
|
218
|
+
- Highlight differences in color, texture, composition, subject matter, and context.
|
|
219
|
+
- Point out notable features unique to each image.
|
|
220
|
+
`.trim();
|
|
221
|
+
|
|
222
|
+
// src/schema.ts
|
|
223
|
+
function buildSchemaFromFields(fields) {
|
|
224
|
+
if (fields.length === 0) {
|
|
225
|
+
throw new Error("At least one extraction field is required.");
|
|
226
|
+
}
|
|
227
|
+
if (fields.length > 10) {
|
|
228
|
+
throw new Error("A maximum of 10 extraction fields is supported.");
|
|
229
|
+
}
|
|
230
|
+
const names = fields.map((field) => validateField(field).name);
|
|
231
|
+
const duplicates = names.filter((name, index) => names.indexOf(name) !== index);
|
|
232
|
+
if (duplicates.length > 0) {
|
|
233
|
+
throw new Error(
|
|
234
|
+
`Field names must be unique. Duplicates: ${[...new Set(duplicates)].join(", ")}.`
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
const properties = Object.fromEntries(
|
|
238
|
+
fields.map((field) => [field.name, schemaForField(field)])
|
|
239
|
+
);
|
|
240
|
+
return {
|
|
241
|
+
title: "ExtractedData",
|
|
242
|
+
description: "Structured data extracted from an image.",
|
|
243
|
+
type: "object",
|
|
244
|
+
properties,
|
|
245
|
+
required: names,
|
|
246
|
+
additionalProperties: false
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
function validateField(field) {
|
|
250
|
+
if (!field.name) {
|
|
251
|
+
throw new Error("Field name cannot be empty.");
|
|
252
|
+
}
|
|
253
|
+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(field.name)) {
|
|
254
|
+
throw new Error(
|
|
255
|
+
"Field names must contain letters, numbers, or underscores, and cannot start with a number."
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
if (!["text", "number", "array_text", "array_number"].includes(field.type)) {
|
|
259
|
+
throw new Error("Unsupported field type.");
|
|
260
|
+
}
|
|
261
|
+
return field;
|
|
262
|
+
}
|
|
263
|
+
function schemaForField(field) {
|
|
264
|
+
const schema = field.type === "text" ? { type: ["string", "null"] } : field.type === "number" ? { type: ["number", "null"] } : field.type === "array_text" ? {
|
|
265
|
+
type: ["array", "null"],
|
|
266
|
+
items: { type: "string" },
|
|
267
|
+
maxItems: 5
|
|
268
|
+
} : {
|
|
269
|
+
type: ["array", "null"],
|
|
270
|
+
items: { type: "number" },
|
|
271
|
+
maxItems: 5
|
|
272
|
+
};
|
|
273
|
+
if (field.description) {
|
|
274
|
+
return { ...schema, description: field.description };
|
|
275
|
+
}
|
|
276
|
+
return schema;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// src/client.ts
|
|
280
|
+
var DEFAULT_MODEL = "gpt-4o-mini";
|
|
281
|
+
var CLIENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
282
|
+
"apiKey",
|
|
283
|
+
"api_key",
|
|
284
|
+
"organization",
|
|
285
|
+
"project",
|
|
286
|
+
"baseURL",
|
|
287
|
+
"baseUrl",
|
|
288
|
+
"base_url",
|
|
289
|
+
"timeout",
|
|
290
|
+
"maxRetries",
|
|
291
|
+
"max_retries",
|
|
292
|
+
"defaultHeaders",
|
|
293
|
+
"defaultQuery",
|
|
294
|
+
"fetch",
|
|
295
|
+
"httpAgent"
|
|
296
|
+
]);
|
|
297
|
+
var DESCRIBE_SCHEMA = {
|
|
298
|
+
title: "ImageDescription",
|
|
299
|
+
description: "Structured description of an image.",
|
|
300
|
+
type: "object",
|
|
301
|
+
properties: {
|
|
302
|
+
image_description: {
|
|
303
|
+
type: "string",
|
|
304
|
+
description: "Objective description of the image."
|
|
305
|
+
},
|
|
306
|
+
tags: {
|
|
307
|
+
type: "array",
|
|
308
|
+
items: { type: "string" },
|
|
309
|
+
maxItems: 5,
|
|
310
|
+
description: "Relevant tags describing the image."
|
|
311
|
+
}
|
|
312
|
+
},
|
|
313
|
+
required: ["image_description", "tags"],
|
|
314
|
+
additionalProperties: false
|
|
315
|
+
};
|
|
316
|
+
var DESCRIBE_NO_TAGS_SCHEMA = {
|
|
317
|
+
title: "ImageDescription",
|
|
318
|
+
description: "Structured description of an image.",
|
|
319
|
+
type: "object",
|
|
320
|
+
properties: {
|
|
321
|
+
image_description: {
|
|
322
|
+
type: "string",
|
|
323
|
+
description: "Objective description of the image."
|
|
324
|
+
}
|
|
325
|
+
},
|
|
326
|
+
required: ["image_description"],
|
|
327
|
+
additionalProperties: false
|
|
328
|
+
};
|
|
329
|
+
var CLASSIFY_SCHEMA = {
|
|
330
|
+
title: "ImageClassification",
|
|
331
|
+
description: "Structured classification of an image.",
|
|
332
|
+
type: "object",
|
|
333
|
+
properties: {
|
|
334
|
+
classification: {
|
|
335
|
+
type: "array",
|
|
336
|
+
items: { type: "string" },
|
|
337
|
+
description: "Predicted class names."
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
required: ["classification"],
|
|
341
|
+
additionalProperties: false
|
|
342
|
+
};
|
|
343
|
+
var ASK_SCHEMA = {
|
|
344
|
+
title: "ImageQuestionAnswer",
|
|
345
|
+
description: "Answer to a question about an image.",
|
|
346
|
+
type: "object",
|
|
347
|
+
properties: {
|
|
348
|
+
answer: {
|
|
349
|
+
type: "string",
|
|
350
|
+
description: "Answer to the question based on the image."
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
required: ["answer"],
|
|
354
|
+
additionalProperties: false
|
|
355
|
+
};
|
|
356
|
+
var COMPARE_SCHEMA = {
|
|
357
|
+
title: "ImageComparison",
|
|
358
|
+
description: "Structured comparison of two images.",
|
|
359
|
+
type: "object",
|
|
360
|
+
properties: {
|
|
361
|
+
comparison_result: {
|
|
362
|
+
type: "string",
|
|
363
|
+
description: "Textual comparison of the two images."
|
|
364
|
+
}
|
|
365
|
+
},
|
|
366
|
+
required: ["comparison_result"],
|
|
367
|
+
additionalProperties: false
|
|
368
|
+
};
|
|
369
|
+
var StructuredOutputError = class extends Error {
|
|
370
|
+
};
|
|
371
|
+
var StructuredOutputRefusalError = class extends StructuredOutputError {
|
|
372
|
+
};
|
|
373
|
+
var StructuredOutputParseError = class extends StructuredOutputError {
|
|
374
|
+
};
|
|
375
|
+
var StructuredOutputValidationError = class extends StructuredOutputError {
|
|
376
|
+
};
|
|
377
|
+
var StructuredOutputFinishReasonError = class extends StructuredOutputError {
|
|
378
|
+
};
|
|
379
|
+
var ViscribeAI = class {
|
|
380
|
+
model;
|
|
381
|
+
strict;
|
|
382
|
+
images;
|
|
383
|
+
modelConfig;
|
|
384
|
+
client;
|
|
385
|
+
constructor(options = {}) {
|
|
386
|
+
const modelConfig = resolveModelConfig(options);
|
|
387
|
+
this.model = modelConfig.model;
|
|
388
|
+
this.modelConfig = {
|
|
389
|
+
client: modelConfig.client,
|
|
390
|
+
request: modelConfig.request
|
|
391
|
+
};
|
|
392
|
+
this.strict = options.strict ?? true;
|
|
393
|
+
this.client = options.client;
|
|
394
|
+
this.images = {
|
|
395
|
+
extract: (extractOptions) => this.extractImage(extractOptions),
|
|
396
|
+
describe: (describeOptions) => this.describeImage(describeOptions),
|
|
397
|
+
classify: (classifyOptions) => this.classifyImage(classifyOptions),
|
|
398
|
+
ask: (askOptions) => this.askImage(askOptions),
|
|
399
|
+
compare: (compareOptions) => this.compareImages(compareOptions)
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
async extractImage(options) {
|
|
403
|
+
const schema = resolveOutputSchema(options);
|
|
404
|
+
return this.structuredImageRequest({
|
|
405
|
+
messages: singleImageMessages({
|
|
406
|
+
systemPrompt: EXTRACT_SYSTEM_PROMPT,
|
|
407
|
+
userPrompt: options.instruction ?? "Extract information from this image according to the provided schema.",
|
|
408
|
+
imageSource: options
|
|
409
|
+
}),
|
|
410
|
+
schema
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
async describeImage(options) {
|
|
414
|
+
return this.structuredImageRequest({
|
|
415
|
+
messages: singleImageMessages({
|
|
416
|
+
systemPrompt: DESCRIBE_SYSTEM_PROMPT,
|
|
417
|
+
userPrompt: options.instruction ?? "Describe this image.",
|
|
418
|
+
imageSource: options
|
|
419
|
+
}),
|
|
420
|
+
schema: options.generateTags ?? true ? DESCRIBE_SCHEMA : DESCRIBE_NO_TAGS_SCHEMA
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
async classifyImage(options) {
|
|
424
|
+
const classes = validateClassificationOptions(options);
|
|
425
|
+
return this.structuredImageRequest({
|
|
426
|
+
messages: singleImageMessages({
|
|
427
|
+
systemPrompt: CLASSIFY_SYSTEM_PROMPT,
|
|
428
|
+
userPrompt: classificationPrompt({ ...options, classes }),
|
|
429
|
+
imageSource: options
|
|
430
|
+
}),
|
|
431
|
+
schema: classificationSchema({ multiLabel: options.multiLabel ?? false }),
|
|
432
|
+
transform: (data) => normalizeClassification(data, { classes })
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
async askImage(options) {
|
|
436
|
+
const question = validateQuestion(options.question);
|
|
437
|
+
return this.structuredImageRequest({
|
|
438
|
+
messages: singleImageMessages({
|
|
439
|
+
systemPrompt: ASK_SYSTEM_PROMPT,
|
|
440
|
+
userPrompt: question,
|
|
441
|
+
imageSource: options
|
|
442
|
+
}),
|
|
443
|
+
schema: ASK_SCHEMA
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
async compareImages(options) {
|
|
447
|
+
return this.structuredImageRequest({
|
|
448
|
+
messages: compareMessages(options),
|
|
449
|
+
schema: COMPARE_SCHEMA
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
async structuredImageRequest(options) {
|
|
453
|
+
const response = await this.getClient().chat.completions.create({
|
|
454
|
+
messages: options.messages,
|
|
455
|
+
model: this.model,
|
|
456
|
+
response_format: responseFormatForSchema(options.schema, this.strict),
|
|
457
|
+
...this.modelConfig.request
|
|
458
|
+
});
|
|
459
|
+
return resultFromResponse(response, {
|
|
460
|
+
rawSchema: options.schema,
|
|
461
|
+
transform: options.transform
|
|
462
|
+
});
|
|
463
|
+
}
|
|
464
|
+
getClient() {
|
|
465
|
+
if (!this.client) {
|
|
466
|
+
this.client = new import_openai.default(
|
|
467
|
+
this.modelConfig.client
|
|
468
|
+
);
|
|
469
|
+
}
|
|
470
|
+
return this.client;
|
|
471
|
+
}
|
|
472
|
+
};
|
|
473
|
+
function resolveModelConfig(options) {
|
|
474
|
+
const raw = typeof options.modelConfig === "string" ? { model: options.modelConfig } : { ...options.modelConfig ?? {} };
|
|
475
|
+
raw.model ??= DEFAULT_MODEL;
|
|
476
|
+
raw.temperature ??= 0;
|
|
477
|
+
if (options.apiKey) {
|
|
478
|
+
raw.apiKey = options.apiKey;
|
|
479
|
+
}
|
|
480
|
+
if (options.temperature !== void 0) {
|
|
481
|
+
raw.temperature = options.temperature;
|
|
482
|
+
}
|
|
483
|
+
const model = String(raw.model);
|
|
484
|
+
delete raw.model;
|
|
485
|
+
const client = {};
|
|
486
|
+
const request = {};
|
|
487
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
488
|
+
const normalizedKey = normalizeClientKey(key);
|
|
489
|
+
if (CLIENT_CONFIG_KEYS.has(key) || CLIENT_CONFIG_KEYS.has(normalizedKey)) {
|
|
490
|
+
client[normalizedKey] = value;
|
|
491
|
+
} else {
|
|
492
|
+
request[key] = value;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
return { model, client, request };
|
|
496
|
+
}
|
|
497
|
+
function normalizeClientKey(key) {
|
|
498
|
+
return key === "api_key" ? "apiKey" : key === "base_url" || key === "baseUrl" ? "baseURL" : key === "max_retries" ? "maxRetries" : key;
|
|
499
|
+
}
|
|
500
|
+
function resolveOutputSchema(options) {
|
|
501
|
+
if (!options.outputSchema) {
|
|
502
|
+
throw new Error("'outputSchema' must be provided.");
|
|
503
|
+
}
|
|
504
|
+
return Array.isArray(options.outputSchema) ? buildSchemaFromFields(options.outputSchema) : options.outputSchema;
|
|
505
|
+
}
|
|
506
|
+
function singleImageMessages(options) {
|
|
507
|
+
const imageSource = buildImageSource(options.imageSource);
|
|
508
|
+
return [
|
|
509
|
+
{ role: "system", content: options.systemPrompt },
|
|
510
|
+
{
|
|
511
|
+
role: "user",
|
|
512
|
+
content: [
|
|
513
|
+
{
|
|
514
|
+
type: "text",
|
|
515
|
+
text: options.userPrompt
|
|
516
|
+
},
|
|
517
|
+
{ type: "image_url", image_url: { url: imageSource } }
|
|
518
|
+
]
|
|
519
|
+
}
|
|
520
|
+
];
|
|
521
|
+
}
|
|
522
|
+
function compareMessages(options) {
|
|
523
|
+
const image1Source = buildNumberedImageSource("image1", {
|
|
524
|
+
imageUrl: options.image1Url,
|
|
525
|
+
imageBase64: options.image1Base64,
|
|
526
|
+
imagePath: options.image1Path
|
|
527
|
+
});
|
|
528
|
+
const image2Source = buildNumberedImageSource("image2", {
|
|
529
|
+
imageUrl: options.image2Url,
|
|
530
|
+
imageBase64: options.image2Base64,
|
|
531
|
+
imagePath: options.image2Path
|
|
532
|
+
});
|
|
533
|
+
return [
|
|
534
|
+
{ role: "system", content: COMPARE_SYSTEM_PROMPT },
|
|
535
|
+
{
|
|
536
|
+
role: "user",
|
|
537
|
+
content: [
|
|
538
|
+
{
|
|
539
|
+
type: "text",
|
|
540
|
+
text: options.instruction ?? "Describe the similarities and differences between these two images."
|
|
541
|
+
},
|
|
542
|
+
{ type: "image_url", image_url: { url: image1Source } },
|
|
543
|
+
{ type: "image_url", image_url: { url: image2Source } }
|
|
544
|
+
]
|
|
545
|
+
}
|
|
546
|
+
];
|
|
547
|
+
}
|
|
548
|
+
function buildNumberedImageSource(label, input) {
|
|
549
|
+
const provided = [
|
|
550
|
+
input.imageUrl !== void 0,
|
|
551
|
+
input.imageBase64 !== void 0,
|
|
552
|
+
input.imagePath !== void 0
|
|
553
|
+
].filter(Boolean).length;
|
|
554
|
+
if (provided !== 1) {
|
|
555
|
+
throw new Error(
|
|
556
|
+
`Provide exactly one source for '${label}': '${label}Url', '${label}Base64', or '${label}Path'.`
|
|
557
|
+
);
|
|
558
|
+
}
|
|
559
|
+
return buildImageSource(input);
|
|
560
|
+
}
|
|
561
|
+
function classificationSchema(options) {
|
|
562
|
+
const schema = structuredClone(CLASSIFY_SCHEMA);
|
|
563
|
+
const properties = schema.properties;
|
|
564
|
+
if (!options.multiLabel && isPlainObject(properties)) {
|
|
565
|
+
const classification = properties.classification;
|
|
566
|
+
if (isPlainObject(classification)) {
|
|
567
|
+
classification.maxItems = 1;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
return schema;
|
|
571
|
+
}
|
|
572
|
+
function classificationPrompt(options) {
|
|
573
|
+
const parts = [options.instruction ?? "Classify this image."];
|
|
574
|
+
if (options.classes && options.classes.length > 0) {
|
|
575
|
+
parts.push(`Allowed classes: ${options.classes.join(", ")}.`);
|
|
576
|
+
} else if (options.classes) {
|
|
577
|
+
parts.push("No allowed classes were provided; return an empty list.");
|
|
578
|
+
}
|
|
579
|
+
if (options.classDescriptions) {
|
|
580
|
+
const descriptions = Object.entries(options.classDescriptions).map(([className, description]) => `- ${className}: ${description}`).join("\n");
|
|
581
|
+
parts.push(`Class descriptions:
|
|
582
|
+
${descriptions}`);
|
|
583
|
+
}
|
|
584
|
+
parts.push(
|
|
585
|
+
options.multiLabel ? "Return every relevant class in the classification list." : "Return at most one class in the classification list."
|
|
586
|
+
);
|
|
587
|
+
return parts.join("\n");
|
|
588
|
+
}
|
|
589
|
+
function validateClassificationOptions(options) {
|
|
590
|
+
const classes = options.classes ? [...options.classes] : void 0;
|
|
591
|
+
if (options.classDescriptions) {
|
|
592
|
+
if (!classes || classes.length === 0) {
|
|
593
|
+
throw new Error(
|
|
594
|
+
"'classDescriptions' can only be provided when 'classes' is provided."
|
|
595
|
+
);
|
|
596
|
+
}
|
|
597
|
+
const invalidClasses = Object.keys(options.classDescriptions).filter(
|
|
598
|
+
(className) => !classes.includes(className)
|
|
599
|
+
);
|
|
600
|
+
if (invalidClasses.length > 0) {
|
|
601
|
+
throw new Error(
|
|
602
|
+
`'classDescriptions' contains entries not present in 'classes': ${invalidClasses.sort().join(", ")}.`
|
|
603
|
+
);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
return classes;
|
|
607
|
+
}
|
|
608
|
+
function normalizeClassification(data, options) {
|
|
609
|
+
const rawClassification = data.classification;
|
|
610
|
+
let classification;
|
|
611
|
+
if (typeof rawClassification === "string") {
|
|
612
|
+
classification = rawClassification ? [rawClassification] : [];
|
|
613
|
+
} else if (Array.isArray(rawClassification)) {
|
|
614
|
+
classification = rawClassification.filter((value) => typeof value === "string").map((value) => value);
|
|
615
|
+
} else {
|
|
616
|
+
classification = [];
|
|
617
|
+
}
|
|
618
|
+
if (options.classes) {
|
|
619
|
+
const allowedClasses = new Set(options.classes);
|
|
620
|
+
classification = classification.filter(
|
|
621
|
+
(className) => allowedClasses.has(className)
|
|
622
|
+
);
|
|
623
|
+
}
|
|
624
|
+
return { classification };
|
|
625
|
+
}
|
|
626
|
+
function validateQuestion(question) {
|
|
627
|
+
if (typeof question !== "string" || question.trim().length === 0) {
|
|
628
|
+
throw new Error("'question' must be provided.");
|
|
629
|
+
}
|
|
630
|
+
return question.trim();
|
|
631
|
+
}
|
|
632
|
+
function responseFormatForSchema(rawSchema, strict) {
|
|
633
|
+
if (rawSchema.type === "json_schema" && typeof rawSchema.json_schema === "object" && rawSchema.json_schema !== null) {
|
|
634
|
+
const jsonSchema = { ...rawSchema.json_schema };
|
|
635
|
+
jsonSchema.strict ??= strict;
|
|
636
|
+
return { type: "json_schema", json_schema: jsonSchema };
|
|
637
|
+
}
|
|
638
|
+
const schema = strict ? strictJsonSchema(rawSchema) : { ...rawSchema };
|
|
639
|
+
const name = String(schema.title ?? "ExtractedData");
|
|
640
|
+
const description = typeof schema.description === "string" ? schema.description : void 0;
|
|
641
|
+
delete schema.title;
|
|
642
|
+
delete schema.description;
|
|
643
|
+
return {
|
|
644
|
+
type: "json_schema",
|
|
645
|
+
json_schema: {
|
|
646
|
+
name,
|
|
647
|
+
...description ? { description } : {},
|
|
648
|
+
strict,
|
|
649
|
+
schema
|
|
650
|
+
}
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
function strictJsonSchema(schema) {
|
|
654
|
+
const copy = structuredClone(schema);
|
|
655
|
+
ensureStrictJsonSchema(copy);
|
|
656
|
+
return copy;
|
|
657
|
+
}
|
|
658
|
+
function ensureStrictJsonSchema(schema) {
|
|
659
|
+
for (const defsKey of ["$defs", "definitions"]) {
|
|
660
|
+
const defs = schema[defsKey];
|
|
661
|
+
if (isPlainObject(defs)) {
|
|
662
|
+
for (const value of Object.values(defs)) {
|
|
663
|
+
if (isPlainObject(value)) ensureStrictJsonSchema(value);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
if (schema.type === "object" && schema.additionalProperties === void 0) {
|
|
668
|
+
schema.additionalProperties = false;
|
|
669
|
+
}
|
|
670
|
+
if (isPlainObject(schema.properties)) {
|
|
671
|
+
schema.required = Object.keys(schema.properties);
|
|
672
|
+
for (const value of Object.values(schema.properties)) {
|
|
673
|
+
if (isPlainObject(value)) ensureStrictJsonSchema(value);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
if (isPlainObject(schema.items)) {
|
|
677
|
+
ensureStrictJsonSchema(schema.items);
|
|
678
|
+
}
|
|
679
|
+
for (const key of ["anyOf", "allOf"]) {
|
|
680
|
+
const value = schema[key];
|
|
681
|
+
if (Array.isArray(value)) {
|
|
682
|
+
for (const item of value) {
|
|
683
|
+
if (isPlainObject(item)) ensureStrictJsonSchema(item);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
if (schema.default === null) {
|
|
688
|
+
delete schema.default;
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
function resultFromResponse(response, context) {
|
|
692
|
+
const parsed = parseStructuredResponse(response);
|
|
693
|
+
return {
|
|
694
|
+
data: context.transform ? context.transform(parsed) : parsed,
|
|
695
|
+
raw: response,
|
|
696
|
+
usageMetadata: usageMetadata(response)
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
function parseStructuredResponse(response) {
|
|
700
|
+
const choice = firstChoice(response);
|
|
701
|
+
if (choice.finish_reason === "length" || choice.finish_reason === "content_filter") {
|
|
702
|
+
throw new StructuredOutputFinishReasonError(
|
|
703
|
+
`Model finished with reason '${choice.finish_reason}'.`
|
|
704
|
+
);
|
|
705
|
+
}
|
|
706
|
+
const message = choice.message;
|
|
707
|
+
if (!isPlainObject(message)) {
|
|
708
|
+
throw new StructuredOutputParseError("Response does not contain a message.");
|
|
709
|
+
}
|
|
710
|
+
const refusal = message.refusal;
|
|
711
|
+
if (typeof refusal === "string" && refusal) {
|
|
712
|
+
throw new StructuredOutputRefusalError(refusal);
|
|
713
|
+
}
|
|
714
|
+
const parsed = message.parsed;
|
|
715
|
+
if (isPlainObject(parsed)) {
|
|
716
|
+
return parsed;
|
|
717
|
+
}
|
|
718
|
+
const content = messageContentText(message);
|
|
719
|
+
return loadsJsonObject(content);
|
|
720
|
+
}
|
|
721
|
+
function firstChoice(response) {
|
|
722
|
+
if (!isPlainObject(response) || !Array.isArray(response.choices)) {
|
|
723
|
+
throw new StructuredOutputParseError("Response does not contain choices.");
|
|
724
|
+
}
|
|
725
|
+
const choice = response.choices[0];
|
|
726
|
+
if (!isPlainObject(choice)) {
|
|
727
|
+
throw new StructuredOutputParseError("Response does not contain choices.");
|
|
728
|
+
}
|
|
729
|
+
return choice;
|
|
730
|
+
}
|
|
731
|
+
function messageContentText(message) {
|
|
732
|
+
if (typeof message.content === "string") {
|
|
733
|
+
return message.content;
|
|
734
|
+
}
|
|
735
|
+
if (Array.isArray(message.content)) {
|
|
736
|
+
const text = message.content.filter(
|
|
737
|
+
(block) => isPlainObject(block) && (block.type === "text" || block.type === "output_text") && typeof block.text === "string"
|
|
738
|
+
).map((block) => block.text).join("");
|
|
739
|
+
if (text) return text;
|
|
740
|
+
}
|
|
741
|
+
throw new StructuredOutputParseError(
|
|
742
|
+
"Response message does not contain text content."
|
|
743
|
+
);
|
|
744
|
+
}
|
|
745
|
+
function loadsJsonObject(content) {
|
|
746
|
+
const trimmed = content.trim();
|
|
747
|
+
const attempts = [trimmed];
|
|
748
|
+
if (trimmed.startsWith("```")) {
|
|
749
|
+
const lines = trimmed.split(/\r?\n/);
|
|
750
|
+
if (lines.length >= 3 && lines.at(-1)?.trim() === "```") {
|
|
751
|
+
attempts.push(lines.slice(1, -1).join("\n").trim());
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
const firstBrace = trimmed.indexOf("{");
|
|
755
|
+
const lastBrace = trimmed.lastIndexOf("}");
|
|
756
|
+
if (firstBrace !== -1 && lastBrace > firstBrace) {
|
|
757
|
+
attempts.push(trimmed.slice(firstBrace, lastBrace + 1));
|
|
758
|
+
}
|
|
759
|
+
let lastError;
|
|
760
|
+
for (const attempt of attempts) {
|
|
761
|
+
try {
|
|
762
|
+
const data = JSON.parse(attempt);
|
|
763
|
+
if (!isPlainObject(data)) {
|
|
764
|
+
throw new StructuredOutputParseError(
|
|
765
|
+
"Structured output must be a JSON object."
|
|
766
|
+
);
|
|
767
|
+
}
|
|
768
|
+
return data;
|
|
769
|
+
} catch (error) {
|
|
770
|
+
lastError = error;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
throw new StructuredOutputParseError("Could not parse structured output JSON.", {
|
|
774
|
+
cause: lastError
|
|
775
|
+
});
|
|
776
|
+
}
|
|
777
|
+
function usageMetadata(response) {
|
|
778
|
+
if (!isPlainObject(response) || !isPlainObject(response.usage)) {
|
|
779
|
+
return {};
|
|
780
|
+
}
|
|
781
|
+
const metadata = { ...response.usage };
|
|
782
|
+
if (typeof metadata.prompt_tokens === "number") {
|
|
783
|
+
metadata.input_tokens = metadata.prompt_tokens;
|
|
784
|
+
}
|
|
785
|
+
if (typeof metadata.completion_tokens === "number") {
|
|
786
|
+
metadata.output_tokens = metadata.completion_tokens;
|
|
787
|
+
}
|
|
788
|
+
return metadata;
|
|
789
|
+
}
|
|
790
|
+
function isPlainObject(value) {
|
|
791
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
// src/images.ts
|
|
795
|
+
async function extract(options) {
|
|
796
|
+
const client = new ViscribeAI(options);
|
|
797
|
+
return client.images.extract(options);
|
|
798
|
+
}
|
|
799
|
+
async function describe(options) {
|
|
800
|
+
const client = new ViscribeAI(options);
|
|
801
|
+
return client.images.describe(options);
|
|
802
|
+
}
|
|
803
|
+
async function classify(options) {
|
|
804
|
+
const client = new ViscribeAI(options);
|
|
805
|
+
return client.images.classify(options);
|
|
806
|
+
}
|
|
807
|
+
async function ask(options) {
|
|
808
|
+
const client = new ViscribeAI(options);
|
|
809
|
+
return client.images.ask(options);
|
|
810
|
+
}
|
|
811
|
+
async function compare(options) {
|
|
812
|
+
const client = new ViscribeAI(options);
|
|
813
|
+
return client.images.compare(options);
|
|
814
|
+
}
|
|
815
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
816
|
+
0 && (module.exports = {
|
|
817
|
+
DEFAULT_MODEL,
|
|
818
|
+
StructuredOutputError,
|
|
819
|
+
StructuredOutputFinishReasonError,
|
|
820
|
+
StructuredOutputParseError,
|
|
821
|
+
StructuredOutputRefusalError,
|
|
822
|
+
StructuredOutputValidationError,
|
|
823
|
+
ViscribeAI,
|
|
824
|
+
buildSchemaFromFields,
|
|
825
|
+
images
|
|
826
|
+
});
|