viscribe 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -466
- package/dist/index.cjs +5 -305
- package/dist/index.d.cts +2 -53
- package/dist/index.d.ts +2 -53
- package/dist/index.js +5 -305
- package/package.json +1 -5
package/dist/index.js
CHANGED
|
@@ -7,12 +7,8 @@ var __export = (target, all) => {
|
|
|
7
7
|
// src/images.ts
|
|
8
8
|
var images_exports = {};
|
|
9
9
|
__export(images_exports, {
|
|
10
|
-
ask: () => ask,
|
|
11
10
|
assertImagePathExists: () => assertImagePathExists,
|
|
12
11
|
buildImageSource: () => buildImageSource,
|
|
13
|
-
classify: () => classify,
|
|
14
|
-
compare: () => compare,
|
|
15
|
-
describe: () => describe,
|
|
16
12
|
extract: () => extract
|
|
17
13
|
});
|
|
18
14
|
|
|
@@ -115,67 +111,13 @@ CRITICAL INSTRUCTIONS:
|
|
|
115
111
|
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
116
112
|
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
117
113
|
|
|
118
|
-
|
|
114
|
+
GOAL:
|
|
119
115
|
- Analyze the image carefully and extract data according to the provided schema.
|
|
120
116
|
- Be precise and accurate.
|
|
121
117
|
- If a value is not visible or cannot be inferred from the image, use null
|
|
122
118
|
when the schema allows it.
|
|
123
119
|
- Ensure all required fields are included.
|
|
124
120
|
`.trim();
|
|
125
|
-
var DESCRIBE_SYSTEM_PROMPT = `
|
|
126
|
-
You are an expert image analyst.
|
|
127
|
-
|
|
128
|
-
CRITICAL INSTRUCTIONS:
|
|
129
|
-
- Respond with valid JSON only.
|
|
130
|
-
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
131
|
-
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
132
|
-
- Follow any user-provided instruction closely.
|
|
133
|
-
|
|
134
|
-
TASK:
|
|
135
|
-
- Provide a detailed, objective description of the image.
|
|
136
|
-
- Focus on main elements, context, and notable details.
|
|
137
|
-
- If tags are requested, generate up to 5 relevant tags.
|
|
138
|
-
`.trim();
|
|
139
|
-
var CLASSIFY_SYSTEM_PROMPT = `
|
|
140
|
-
You are an expert at image classification.
|
|
141
|
-
|
|
142
|
-
CRITICAL INSTRUCTIONS:
|
|
143
|
-
- Respond with valid JSON only.
|
|
144
|
-
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
145
|
-
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
146
|
-
|
|
147
|
-
TASK:
|
|
148
|
-
- Analyze the image and classify it according to the requested categories or task.
|
|
149
|
-
- Return only categories that are supported by the image.
|
|
150
|
-
- If the image cannot be classified into a requested category, return an empty list.
|
|
151
|
-
`.trim();
|
|
152
|
-
var ASK_SYSTEM_PROMPT = `
|
|
153
|
-
You are an expert at analyzing images and answering related questions.
|
|
154
|
-
|
|
155
|
-
CRITICAL INSTRUCTIONS:
|
|
156
|
-
- Respond with valid JSON only.
|
|
157
|
-
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
158
|
-
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
159
|
-
|
|
160
|
-
TASK:
|
|
161
|
-
- Carefully examine the provided image.
|
|
162
|
-
- Deliver a clear, accurate, and detailed response to the question.
|
|
163
|
-
- If the answer cannot be determined from the image, state that the information is not available.
|
|
164
|
-
`.trim();
|
|
165
|
-
var COMPARE_SYSTEM_PROMPT = `
|
|
166
|
-
You are an expert at comparing images.
|
|
167
|
-
|
|
168
|
-
CRITICAL INSTRUCTIONS:
|
|
169
|
-
- Respond with valid JSON only.
|
|
170
|
-
- Do not include explanatory text, code blocks, or markdown formatting.
|
|
171
|
-
- Your response must be a single JSON object that exactly matches the provided schema.
|
|
172
|
-
|
|
173
|
-
TASK:
|
|
174
|
-
- Analyze two provided images, focusing on their visual elements.
|
|
175
|
-
- Identify and describe similarities.
|
|
176
|
-
- Highlight differences in color, texture, composition, subject matter, and context.
|
|
177
|
-
- Point out notable features unique to each image.
|
|
178
|
-
`.trim();
|
|
179
121
|
|
|
180
122
|
// src/schema.ts
|
|
181
123
|
function buildSchemaFromFields(fields) {
|
|
@@ -252,78 +194,6 @@ var CLIENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
|
252
194
|
"fetch",
|
|
253
195
|
"httpAgent"
|
|
254
196
|
]);
|
|
255
|
-
var DESCRIBE_SCHEMA = {
|
|
256
|
-
title: "ImageDescription",
|
|
257
|
-
description: "Structured description of an image.",
|
|
258
|
-
type: "object",
|
|
259
|
-
properties: {
|
|
260
|
-
image_description: {
|
|
261
|
-
type: "string",
|
|
262
|
-
description: "Objective description of the image."
|
|
263
|
-
},
|
|
264
|
-
tags: {
|
|
265
|
-
type: "array",
|
|
266
|
-
items: { type: "string" },
|
|
267
|
-
maxItems: 5,
|
|
268
|
-
description: "Relevant tags describing the image."
|
|
269
|
-
}
|
|
270
|
-
},
|
|
271
|
-
required: ["image_description", "tags"],
|
|
272
|
-
additionalProperties: false
|
|
273
|
-
};
|
|
274
|
-
var DESCRIBE_NO_TAGS_SCHEMA = {
|
|
275
|
-
title: "ImageDescription",
|
|
276
|
-
description: "Structured description of an image.",
|
|
277
|
-
type: "object",
|
|
278
|
-
properties: {
|
|
279
|
-
image_description: {
|
|
280
|
-
type: "string",
|
|
281
|
-
description: "Objective description of the image."
|
|
282
|
-
}
|
|
283
|
-
},
|
|
284
|
-
required: ["image_description"],
|
|
285
|
-
additionalProperties: false
|
|
286
|
-
};
|
|
287
|
-
var CLASSIFY_SCHEMA = {
|
|
288
|
-
title: "ImageClassification",
|
|
289
|
-
description: "Structured classification of an image.",
|
|
290
|
-
type: "object",
|
|
291
|
-
properties: {
|
|
292
|
-
classification: {
|
|
293
|
-
type: "array",
|
|
294
|
-
items: { type: "string" },
|
|
295
|
-
description: "Predicted class names."
|
|
296
|
-
}
|
|
297
|
-
},
|
|
298
|
-
required: ["classification"],
|
|
299
|
-
additionalProperties: false
|
|
300
|
-
};
|
|
301
|
-
var ASK_SCHEMA = {
|
|
302
|
-
title: "ImageQuestionAnswer",
|
|
303
|
-
description: "Answer to a question about an image.",
|
|
304
|
-
type: "object",
|
|
305
|
-
properties: {
|
|
306
|
-
answer: {
|
|
307
|
-
type: "string",
|
|
308
|
-
description: "Answer to the question based on the image."
|
|
309
|
-
}
|
|
310
|
-
},
|
|
311
|
-
required: ["answer"],
|
|
312
|
-
additionalProperties: false
|
|
313
|
-
};
|
|
314
|
-
var COMPARE_SCHEMA = {
|
|
315
|
-
title: "ImageComparison",
|
|
316
|
-
description: "Structured comparison of two images.",
|
|
317
|
-
type: "object",
|
|
318
|
-
properties: {
|
|
319
|
-
comparison_result: {
|
|
320
|
-
type: "string",
|
|
321
|
-
description: "Textual comparison of the two images."
|
|
322
|
-
}
|
|
323
|
-
},
|
|
324
|
-
required: ["comparison_result"],
|
|
325
|
-
additionalProperties: false
|
|
326
|
-
};
|
|
327
197
|
var StructuredOutputError = class extends Error {
|
|
328
198
|
};
|
|
329
199
|
var StructuredOutputRefusalError = class extends StructuredOutputError {
|
|
@@ -350,11 +220,7 @@ var ViscribeAI = class {
|
|
|
350
220
|
this.strict = options.strict ?? true;
|
|
351
221
|
this.client = options.client;
|
|
352
222
|
this.images = {
|
|
353
|
-
extract: (extractOptions) => this.extractImage(extractOptions)
|
|
354
|
-
describe: (describeOptions) => this.describeImage(describeOptions),
|
|
355
|
-
classify: (classifyOptions) => this.classifyImage(classifyOptions),
|
|
356
|
-
ask: (askOptions) => this.askImage(askOptions),
|
|
357
|
-
compare: (compareOptions) => this.compareImages(compareOptions)
|
|
223
|
+
extract: (extractOptions) => this.extractImage(extractOptions)
|
|
358
224
|
};
|
|
359
225
|
}
|
|
360
226
|
async extractImage(options) {
|
|
@@ -368,45 +234,6 @@ var ViscribeAI = class {
|
|
|
368
234
|
schema
|
|
369
235
|
});
|
|
370
236
|
}
|
|
371
|
-
async describeImage(options) {
|
|
372
|
-
return this.structuredImageRequest({
|
|
373
|
-
messages: singleImageMessages({
|
|
374
|
-
systemPrompt: DESCRIBE_SYSTEM_PROMPT,
|
|
375
|
-
userPrompt: options.instruction ?? "Describe this image.",
|
|
376
|
-
imageSource: options
|
|
377
|
-
}),
|
|
378
|
-
schema: options.generateTags ?? true ? DESCRIBE_SCHEMA : DESCRIBE_NO_TAGS_SCHEMA
|
|
379
|
-
});
|
|
380
|
-
}
|
|
381
|
-
async classifyImage(options) {
|
|
382
|
-
const classes = validateClassificationOptions(options);
|
|
383
|
-
return this.structuredImageRequest({
|
|
384
|
-
messages: singleImageMessages({
|
|
385
|
-
systemPrompt: CLASSIFY_SYSTEM_PROMPT,
|
|
386
|
-
userPrompt: classificationPrompt({ ...options, classes }),
|
|
387
|
-
imageSource: options
|
|
388
|
-
}),
|
|
389
|
-
schema: classificationSchema({ multiLabel: options.multiLabel ?? false }),
|
|
390
|
-
transform: (data) => normalizeClassification(data, { classes })
|
|
391
|
-
});
|
|
392
|
-
}
|
|
393
|
-
async askImage(options) {
|
|
394
|
-
const question = validateQuestion(options.question);
|
|
395
|
-
return this.structuredImageRequest({
|
|
396
|
-
messages: singleImageMessages({
|
|
397
|
-
systemPrompt: ASK_SYSTEM_PROMPT,
|
|
398
|
-
userPrompt: question,
|
|
399
|
-
imageSource: options
|
|
400
|
-
}),
|
|
401
|
-
schema: ASK_SCHEMA
|
|
402
|
-
});
|
|
403
|
-
}
|
|
404
|
-
async compareImages(options) {
|
|
405
|
-
return this.structuredImageRequest({
|
|
406
|
-
messages: compareMessages(options),
|
|
407
|
-
schema: COMPARE_SCHEMA
|
|
408
|
-
});
|
|
409
|
-
}
|
|
410
237
|
async structuredImageRequest(options) {
|
|
411
238
|
const response = await this.getClient().chat.completions.create({
|
|
412
239
|
messages: options.messages,
|
|
@@ -414,10 +241,7 @@ var ViscribeAI = class {
|
|
|
414
241
|
response_format: responseFormatForSchema(options.schema, this.strict),
|
|
415
242
|
...this.modelConfig.request
|
|
416
243
|
});
|
|
417
|
-
return resultFromResponse(response
|
|
418
|
-
rawSchema: options.schema,
|
|
419
|
-
transform: options.transform
|
|
420
|
-
});
|
|
244
|
+
return resultFromResponse(response);
|
|
421
245
|
}
|
|
422
246
|
getClient() {
|
|
423
247
|
if (!this.client) {
|
|
@@ -477,114 +301,6 @@ function singleImageMessages(options) {
|
|
|
477
301
|
}
|
|
478
302
|
];
|
|
479
303
|
}
|
|
480
|
-
function compareMessages(options) {
|
|
481
|
-
const image1Source = buildNumberedImageSource("image1", {
|
|
482
|
-
imageUrl: options.image1Url,
|
|
483
|
-
imageBase64: options.image1Base64,
|
|
484
|
-
imagePath: options.image1Path
|
|
485
|
-
});
|
|
486
|
-
const image2Source = buildNumberedImageSource("image2", {
|
|
487
|
-
imageUrl: options.image2Url,
|
|
488
|
-
imageBase64: options.image2Base64,
|
|
489
|
-
imagePath: options.image2Path
|
|
490
|
-
});
|
|
491
|
-
return [
|
|
492
|
-
{ role: "system", content: COMPARE_SYSTEM_PROMPT },
|
|
493
|
-
{
|
|
494
|
-
role: "user",
|
|
495
|
-
content: [
|
|
496
|
-
{
|
|
497
|
-
type: "text",
|
|
498
|
-
text: options.instruction ?? "Describe the similarities and differences between these two images."
|
|
499
|
-
},
|
|
500
|
-
{ type: "image_url", image_url: { url: image1Source } },
|
|
501
|
-
{ type: "image_url", image_url: { url: image2Source } }
|
|
502
|
-
]
|
|
503
|
-
}
|
|
504
|
-
];
|
|
505
|
-
}
|
|
506
|
-
function buildNumberedImageSource(label, input) {
|
|
507
|
-
const provided = [
|
|
508
|
-
input.imageUrl !== void 0,
|
|
509
|
-
input.imageBase64 !== void 0,
|
|
510
|
-
input.imagePath !== void 0
|
|
511
|
-
].filter(Boolean).length;
|
|
512
|
-
if (provided !== 1) {
|
|
513
|
-
throw new Error(
|
|
514
|
-
`Provide exactly one source for '${label}': '${label}Url', '${label}Base64', or '${label}Path'.`
|
|
515
|
-
);
|
|
516
|
-
}
|
|
517
|
-
return buildImageSource(input);
|
|
518
|
-
}
|
|
519
|
-
function classificationSchema(options) {
|
|
520
|
-
const schema = structuredClone(CLASSIFY_SCHEMA);
|
|
521
|
-
const properties = schema.properties;
|
|
522
|
-
if (!options.multiLabel && isPlainObject(properties)) {
|
|
523
|
-
const classification = properties.classification;
|
|
524
|
-
if (isPlainObject(classification)) {
|
|
525
|
-
classification.maxItems = 1;
|
|
526
|
-
}
|
|
527
|
-
}
|
|
528
|
-
return schema;
|
|
529
|
-
}
|
|
530
|
-
function classificationPrompt(options) {
|
|
531
|
-
const parts = [options.instruction ?? "Classify this image."];
|
|
532
|
-
if (options.classes && options.classes.length > 0) {
|
|
533
|
-
parts.push(`Allowed classes: ${options.classes.join(", ")}.`);
|
|
534
|
-
} else if (options.classes) {
|
|
535
|
-
parts.push("No allowed classes were provided; return an empty list.");
|
|
536
|
-
}
|
|
537
|
-
if (options.classDescriptions) {
|
|
538
|
-
const descriptions = Object.entries(options.classDescriptions).map(([className, description]) => `- ${className}: ${description}`).join("\n");
|
|
539
|
-
parts.push(`Class descriptions:
|
|
540
|
-
${descriptions}`);
|
|
541
|
-
}
|
|
542
|
-
parts.push(
|
|
543
|
-
options.multiLabel ? "Return every relevant class in the classification list." : "Return at most one class in the classification list."
|
|
544
|
-
);
|
|
545
|
-
return parts.join("\n");
|
|
546
|
-
}
|
|
547
|
-
function validateClassificationOptions(options) {
|
|
548
|
-
const classes = options.classes ? [...options.classes] : void 0;
|
|
549
|
-
if (options.classDescriptions) {
|
|
550
|
-
if (!classes || classes.length === 0) {
|
|
551
|
-
throw new Error(
|
|
552
|
-
"'classDescriptions' can only be provided when 'classes' is provided."
|
|
553
|
-
);
|
|
554
|
-
}
|
|
555
|
-
const invalidClasses = Object.keys(options.classDescriptions).filter(
|
|
556
|
-
(className) => !classes.includes(className)
|
|
557
|
-
);
|
|
558
|
-
if (invalidClasses.length > 0) {
|
|
559
|
-
throw new Error(
|
|
560
|
-
`'classDescriptions' contains entries not present in 'classes': ${invalidClasses.sort().join(", ")}.`
|
|
561
|
-
);
|
|
562
|
-
}
|
|
563
|
-
}
|
|
564
|
-
return classes;
|
|
565
|
-
}
|
|
566
|
-
function normalizeClassification(data, options) {
|
|
567
|
-
const rawClassification = data.classification;
|
|
568
|
-
let classification;
|
|
569
|
-
if (typeof rawClassification === "string") {
|
|
570
|
-
classification = rawClassification ? [rawClassification] : [];
|
|
571
|
-
} else if (Array.isArray(rawClassification)) {
|
|
572
|
-
classification = rawClassification.filter((value) => typeof value === "string").map((value) => value);
|
|
573
|
-
} else {
|
|
574
|
-
classification = [];
|
|
575
|
-
}
|
|
576
|
-
if (options.classes) {
|
|
577
|
-
const allowedClasses = new Set(options.classes);
|
|
578
|
-
classification = classification.filter((className) => allowedClasses.has(className));
|
|
579
|
-
}
|
|
580
|
-
return { classification };
|
|
581
|
-
}
|
|
582
|
-
function validateQuestion(question) {
|
|
583
|
-
if (typeof question !== "string" || question.trim().length === 0) {
|
|
584
|
-
throw new Error("'question' must be provided.");
|
|
585
|
-
}
|
|
586
|
-
return question.trim();
|
|
587
|
-
}
|
|
588
304
|
function responseFormatForSchema(rawSchema, strict) {
|
|
589
305
|
if (rawSchema.type === "json_schema" && typeof rawSchema.json_schema === "object" && rawSchema.json_schema !== null) {
|
|
590
306
|
const jsonSchema = { ...rawSchema.json_schema };
|
|
@@ -644,10 +360,10 @@ function ensureStrictJsonSchema(schema) {
|
|
|
644
360
|
delete schema.default;
|
|
645
361
|
}
|
|
646
362
|
}
|
|
647
|
-
function resultFromResponse(response
|
|
363
|
+
function resultFromResponse(response) {
|
|
648
364
|
const parsed = parseStructuredResponse(response);
|
|
649
365
|
return {
|
|
650
|
-
data:
|
|
366
|
+
data: parsed,
|
|
651
367
|
raw: response,
|
|
652
368
|
usageMetadata: usageMetadata(response)
|
|
653
369
|
};
|
|
@@ -748,22 +464,6 @@ async function extract(options) {
|
|
|
748
464
|
const client = new ViscribeAI(options);
|
|
749
465
|
return client.images.extract(options);
|
|
750
466
|
}
|
|
751
|
-
async function describe(options) {
|
|
752
|
-
const client = new ViscribeAI(options);
|
|
753
|
-
return client.images.describe(options);
|
|
754
|
-
}
|
|
755
|
-
async function classify(options) {
|
|
756
|
-
const client = new ViscribeAI(options);
|
|
757
|
-
return client.images.classify(options);
|
|
758
|
-
}
|
|
759
|
-
async function ask(options) {
|
|
760
|
-
const client = new ViscribeAI(options);
|
|
761
|
-
return client.images.ask(options);
|
|
762
|
-
}
|
|
763
|
-
async function compare(options) {
|
|
764
|
-
const client = new ViscribeAI(options);
|
|
765
|
-
return client.images.compare(options);
|
|
766
|
-
}
|
|
767
467
|
export {
|
|
768
468
|
DEFAULT_MODEL,
|
|
769
469
|
StructuredOutputError,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "viscribe",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Extract structured data from images using AI models.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -24,10 +24,6 @@
|
|
|
24
24
|
"typecheck": "tsc --noEmit",
|
|
25
25
|
"test": "vitest run",
|
|
26
26
|
"example": "tsx examples/extract.ts",
|
|
27
|
-
"example:describe": "tsx examples/describe.ts",
|
|
28
|
-
"example:classify": "tsx examples/classify.ts",
|
|
29
|
-
"example:ask": "tsx examples/ask.ts",
|
|
30
|
-
"example:compare": "tsx examples/compare.ts",
|
|
31
27
|
"example:client": "tsx examples/client.ts"
|
|
32
28
|
},
|
|
33
29
|
"keywords": [
|