viscribe 0.1.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,12 +7,8 @@ var __export = (target, all) => {
7
7
  // src/images.ts
8
8
  var images_exports = {};
9
9
  __export(images_exports, {
10
- ask: () => ask,
11
10
  assertImagePathExists: () => assertImagePathExists,
12
11
  buildImageSource: () => buildImageSource,
13
- classify: () => classify,
14
- compare: () => compare,
15
- describe: () => describe,
16
12
  extract: () => extract
17
13
  });
18
14
 
@@ -30,9 +26,7 @@ function buildImageSource(input) {
30
26
  input.imagePath !== void 0
31
27
  ].filter(Boolean).length;
32
28
  if (provided !== 1) {
33
- throw new Error(
34
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
35
- );
29
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
36
30
  }
37
31
  if (input.imageUrl !== void 0) {
38
32
  validateImageUrl(input.imageUrl);
@@ -52,9 +46,7 @@ function buildImageSource(input) {
52
46
  const mimeType = detectImageMimeType(imageBytes);
53
47
  return `data:${mimeType};base64,${input.imageBase64}`;
54
48
  }
55
- throw new Error(
56
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
57
- );
49
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
58
50
  }
59
51
  async function assertImagePathExists(imagePath) {
60
52
  const result = await stat(imagePath);
@@ -119,67 +111,13 @@ CRITICAL INSTRUCTIONS:
119
111
  - Do not include explanatory text, code blocks, or markdown formatting.
120
112
  - Your response must be a single JSON object that exactly matches the provided schema.
121
113
 
122
- TASK:
114
+ GOAL:
123
115
  - Analyze the image carefully and extract data according to the provided schema.
124
116
  - Be precise and accurate.
125
117
  - If a value is not visible or cannot be inferred from the image, use null
126
118
  when the schema allows it.
127
119
  - Ensure all required fields are included.
128
120
  `.trim();
129
- var DESCRIBE_SYSTEM_PROMPT = `
130
- You are an expert image analyst.
131
-
132
- CRITICAL INSTRUCTIONS:
133
- - Respond with valid JSON only.
134
- - Do not include explanatory text, code blocks, or markdown formatting.
135
- - Your response must be a single JSON object that exactly matches the provided schema.
136
- - Follow any user-provided instruction closely.
137
-
138
- TASK:
139
- - Provide a detailed, objective description of the image.
140
- - Focus on main elements, context, and notable details.
141
- - If tags are requested, generate up to 5 relevant tags.
142
- `.trim();
143
- var CLASSIFY_SYSTEM_PROMPT = `
144
- You are an expert at image classification.
145
-
146
- CRITICAL INSTRUCTIONS:
147
- - Respond with valid JSON only.
148
- - Do not include explanatory text, code blocks, or markdown formatting.
149
- - Your response must be a single JSON object that exactly matches the provided schema.
150
-
151
- TASK:
152
- - Analyze the image and classify it according to the requested categories or task.
153
- - Return only categories that are supported by the image.
154
- - If the image cannot be classified into a requested category, return an empty list.
155
- `.trim();
156
- var ASK_SYSTEM_PROMPT = `
157
- You are an expert at analyzing images and answering related questions.
158
-
159
- CRITICAL INSTRUCTIONS:
160
- - Respond with valid JSON only.
161
- - Do not include explanatory text, code blocks, or markdown formatting.
162
- - Your response must be a single JSON object that exactly matches the provided schema.
163
-
164
- TASK:
165
- - Carefully examine the provided image.
166
- - Deliver a clear, accurate, and detailed response to the question.
167
- - If the answer cannot be determined from the image, state that the information is not available.
168
- `.trim();
169
- var COMPARE_SYSTEM_PROMPT = `
170
- You are an expert at comparing images.
171
-
172
- CRITICAL INSTRUCTIONS:
173
- - Respond with valid JSON only.
174
- - Do not include explanatory text, code blocks, or markdown formatting.
175
- - Your response must be a single JSON object that exactly matches the provided schema.
176
-
177
- TASK:
178
- - Analyze two provided images, focusing on their visual elements.
179
- - Identify and describe similarities.
180
- - Highlight differences in color, texture, composition, subject matter, and context.
181
- - Point out notable features unique to each image.
182
- `.trim();
183
121
 
184
122
  // src/schema.ts
185
123
  function buildSchemaFromFields(fields) {
@@ -256,78 +194,6 @@ var CLIENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
256
194
  "fetch",
257
195
  "httpAgent"
258
196
  ]);
259
- var DESCRIBE_SCHEMA = {
260
- title: "ImageDescription",
261
- description: "Structured description of an image.",
262
- type: "object",
263
- properties: {
264
- image_description: {
265
- type: "string",
266
- description: "Objective description of the image."
267
- },
268
- tags: {
269
- type: "array",
270
- items: { type: "string" },
271
- maxItems: 5,
272
- description: "Relevant tags describing the image."
273
- }
274
- },
275
- required: ["image_description", "tags"],
276
- additionalProperties: false
277
- };
278
- var DESCRIBE_NO_TAGS_SCHEMA = {
279
- title: "ImageDescription",
280
- description: "Structured description of an image.",
281
- type: "object",
282
- properties: {
283
- image_description: {
284
- type: "string",
285
- description: "Objective description of the image."
286
- }
287
- },
288
- required: ["image_description"],
289
- additionalProperties: false
290
- };
291
- var CLASSIFY_SCHEMA = {
292
- title: "ImageClassification",
293
- description: "Structured classification of an image.",
294
- type: "object",
295
- properties: {
296
- classification: {
297
- type: "array",
298
- items: { type: "string" },
299
- description: "Predicted class names."
300
- }
301
- },
302
- required: ["classification"],
303
- additionalProperties: false
304
- };
305
- var ASK_SCHEMA = {
306
- title: "ImageQuestionAnswer",
307
- description: "Answer to a question about an image.",
308
- type: "object",
309
- properties: {
310
- answer: {
311
- type: "string",
312
- description: "Answer to the question based on the image."
313
- }
314
- },
315
- required: ["answer"],
316
- additionalProperties: false
317
- };
318
- var COMPARE_SCHEMA = {
319
- title: "ImageComparison",
320
- description: "Structured comparison of two images.",
321
- type: "object",
322
- properties: {
323
- comparison_result: {
324
- type: "string",
325
- description: "Textual comparison of the two images."
326
- }
327
- },
328
- required: ["comparison_result"],
329
- additionalProperties: false
330
- };
331
197
  var StructuredOutputError = class extends Error {
332
198
  };
333
199
  var StructuredOutputRefusalError = class extends StructuredOutputError {
@@ -354,11 +220,7 @@ var ViscribeAI = class {
354
220
  this.strict = options.strict ?? true;
355
221
  this.client = options.client;
356
222
  this.images = {
357
- extract: (extractOptions) => this.extractImage(extractOptions),
358
- describe: (describeOptions) => this.describeImage(describeOptions),
359
- classify: (classifyOptions) => this.classifyImage(classifyOptions),
360
- ask: (askOptions) => this.askImage(askOptions),
361
- compare: (compareOptions) => this.compareImages(compareOptions)
223
+ extract: (extractOptions) => this.extractImage(extractOptions)
362
224
  };
363
225
  }
364
226
  async extractImage(options) {
@@ -372,45 +234,6 @@ var ViscribeAI = class {
372
234
  schema
373
235
  });
374
236
  }
375
- async describeImage(options) {
376
- return this.structuredImageRequest({
377
- messages: singleImageMessages({
378
- systemPrompt: DESCRIBE_SYSTEM_PROMPT,
379
- userPrompt: options.instruction ?? "Describe this image.",
380
- imageSource: options
381
- }),
382
- schema: options.generateTags ?? true ? DESCRIBE_SCHEMA : DESCRIBE_NO_TAGS_SCHEMA
383
- });
384
- }
385
- async classifyImage(options) {
386
- const classes = validateClassificationOptions(options);
387
- return this.structuredImageRequest({
388
- messages: singleImageMessages({
389
- systemPrompt: CLASSIFY_SYSTEM_PROMPT,
390
- userPrompt: classificationPrompt({ ...options, classes }),
391
- imageSource: options
392
- }),
393
- schema: classificationSchema({ multiLabel: options.multiLabel ?? false }),
394
- transform: (data) => normalizeClassification(data, { classes })
395
- });
396
- }
397
- async askImage(options) {
398
- const question = validateQuestion(options.question);
399
- return this.structuredImageRequest({
400
- messages: singleImageMessages({
401
- systemPrompt: ASK_SYSTEM_PROMPT,
402
- userPrompt: question,
403
- imageSource: options
404
- }),
405
- schema: ASK_SCHEMA
406
- });
407
- }
408
- async compareImages(options) {
409
- return this.structuredImageRequest({
410
- messages: compareMessages(options),
411
- schema: COMPARE_SCHEMA
412
- });
413
- }
414
237
  async structuredImageRequest(options) {
415
238
  const response = await this.getClient().chat.completions.create({
416
239
  messages: options.messages,
@@ -418,10 +241,7 @@ var ViscribeAI = class {
418
241
  response_format: responseFormatForSchema(options.schema, this.strict),
419
242
  ...this.modelConfig.request
420
243
  });
421
- return resultFromResponse(response, {
422
- rawSchema: options.schema,
423
- transform: options.transform
424
- });
244
+ return resultFromResponse(response);
425
245
  }
426
246
  getClient() {
427
247
  if (!this.client) {
@@ -481,116 +301,6 @@ function singleImageMessages(options) {
481
301
  }
482
302
  ];
483
303
  }
484
- function compareMessages(options) {
485
- const image1Source = buildNumberedImageSource("image1", {
486
- imageUrl: options.image1Url,
487
- imageBase64: options.image1Base64,
488
- imagePath: options.image1Path
489
- });
490
- const image2Source = buildNumberedImageSource("image2", {
491
- imageUrl: options.image2Url,
492
- imageBase64: options.image2Base64,
493
- imagePath: options.image2Path
494
- });
495
- return [
496
- { role: "system", content: COMPARE_SYSTEM_PROMPT },
497
- {
498
- role: "user",
499
- content: [
500
- {
501
- type: "text",
502
- text: options.instruction ?? "Describe the similarities and differences between these two images."
503
- },
504
- { type: "image_url", image_url: { url: image1Source } },
505
- { type: "image_url", image_url: { url: image2Source } }
506
- ]
507
- }
508
- ];
509
- }
510
- function buildNumberedImageSource(label, input) {
511
- const provided = [
512
- input.imageUrl !== void 0,
513
- input.imageBase64 !== void 0,
514
- input.imagePath !== void 0
515
- ].filter(Boolean).length;
516
- if (provided !== 1) {
517
- throw new Error(
518
- `Provide exactly one source for '${label}': '${label}Url', '${label}Base64', or '${label}Path'.`
519
- );
520
- }
521
- return buildImageSource(input);
522
- }
523
- function classificationSchema(options) {
524
- const schema = structuredClone(CLASSIFY_SCHEMA);
525
- const properties = schema.properties;
526
- if (!options.multiLabel && isPlainObject(properties)) {
527
- const classification = properties.classification;
528
- if (isPlainObject(classification)) {
529
- classification.maxItems = 1;
530
- }
531
- }
532
- return schema;
533
- }
534
- function classificationPrompt(options) {
535
- const parts = [options.instruction ?? "Classify this image."];
536
- if (options.classes && options.classes.length > 0) {
537
- parts.push(`Allowed classes: ${options.classes.join(", ")}.`);
538
- } else if (options.classes) {
539
- parts.push("No allowed classes were provided; return an empty list.");
540
- }
541
- if (options.classDescriptions) {
542
- const descriptions = Object.entries(options.classDescriptions).map(([className, description]) => `- ${className}: ${description}`).join("\n");
543
- parts.push(`Class descriptions:
544
- ${descriptions}`);
545
- }
546
- parts.push(
547
- options.multiLabel ? "Return every relevant class in the classification list." : "Return at most one class in the classification list."
548
- );
549
- return parts.join("\n");
550
- }
551
- function validateClassificationOptions(options) {
552
- const classes = options.classes ? [...options.classes] : void 0;
553
- if (options.classDescriptions) {
554
- if (!classes || classes.length === 0) {
555
- throw new Error(
556
- "'classDescriptions' can only be provided when 'classes' is provided."
557
- );
558
- }
559
- const invalidClasses = Object.keys(options.classDescriptions).filter(
560
- (className) => !classes.includes(className)
561
- );
562
- if (invalidClasses.length > 0) {
563
- throw new Error(
564
- `'classDescriptions' contains entries not present in 'classes': ${invalidClasses.sort().join(", ")}.`
565
- );
566
- }
567
- }
568
- return classes;
569
- }
570
- function normalizeClassification(data, options) {
571
- const rawClassification = data.classification;
572
- let classification;
573
- if (typeof rawClassification === "string") {
574
- classification = rawClassification ? [rawClassification] : [];
575
- } else if (Array.isArray(rawClassification)) {
576
- classification = rawClassification.filter((value) => typeof value === "string").map((value) => value);
577
- } else {
578
- classification = [];
579
- }
580
- if (options.classes) {
581
- const allowedClasses = new Set(options.classes);
582
- classification = classification.filter(
583
- (className) => allowedClasses.has(className)
584
- );
585
- }
586
- return { classification };
587
- }
588
- function validateQuestion(question) {
589
- if (typeof question !== "string" || question.trim().length === 0) {
590
- throw new Error("'question' must be provided.");
591
- }
592
- return question.trim();
593
- }
594
304
  function responseFormatForSchema(rawSchema, strict) {
595
305
  if (rawSchema.type === "json_schema" && typeof rawSchema.json_schema === "object" && rawSchema.json_schema !== null) {
596
306
  const jsonSchema = { ...rawSchema.json_schema };
@@ -650,10 +360,10 @@ function ensureStrictJsonSchema(schema) {
650
360
  delete schema.default;
651
361
  }
652
362
  }
653
- function resultFromResponse(response, context) {
363
+ function resultFromResponse(response) {
654
364
  const parsed = parseStructuredResponse(response);
655
365
  return {
656
- data: context.transform ? context.transform(parsed) : parsed,
366
+ data: parsed,
657
367
  raw: response,
658
368
  usageMetadata: usageMetadata(response)
659
369
  };
@@ -700,9 +410,7 @@ function messageContentText(message) {
700
410
  ).map((block) => block.text).join("");
701
411
  if (text) return text;
702
412
  }
703
- throw new StructuredOutputParseError(
704
- "Response message does not contain text content."
705
- );
413
+ throw new StructuredOutputParseError("Response message does not contain text content.");
706
414
  }
707
415
  function loadsJsonObject(content) {
708
416
  const trimmed = content.trim();
@@ -723,9 +431,7 @@ function loadsJsonObject(content) {
723
431
  try {
724
432
  const data = JSON.parse(attempt);
725
433
  if (!isPlainObject(data)) {
726
- throw new StructuredOutputParseError(
727
- "Structured output must be a JSON object."
728
- );
434
+ throw new StructuredOutputParseError("Structured output must be a JSON object.");
729
435
  }
730
436
  return data;
731
437
  } catch (error) {
@@ -758,22 +464,6 @@ async function extract(options) {
758
464
  const client = new ViscribeAI(options);
759
465
  return client.images.extract(options);
760
466
  }
761
- async function describe(options) {
762
- const client = new ViscribeAI(options);
763
- return client.images.describe(options);
764
- }
765
- async function classify(options) {
766
- const client = new ViscribeAI(options);
767
- return client.images.classify(options);
768
- }
769
- async function ask(options) {
770
- const client = new ViscribeAI(options);
771
- return client.images.ask(options);
772
- }
773
- async function compare(options) {
774
- const client = new ViscribeAI(options);
775
- return client.images.compare(options);
776
- }
777
467
  export {
778
468
  DEFAULT_MODEL,
779
469
  StructuredOutputError,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "viscribe",
3
- "version": "0.1.0",
4
- "description": "OpenAI-compatible image understanding SDK for structured extraction, description, classification, VQA, and comparison.",
3
+ "version": "1.1.0",
4
+ "description": "Extract structured data from images using AI models.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
7
7
  "module": "./dist/index.js",
@@ -24,10 +24,6 @@
24
24
  "typecheck": "tsc --noEmit",
25
25
  "test": "vitest run",
26
26
  "example": "tsx examples/extract.ts",
27
- "example:describe": "tsx examples/describe.ts",
28
- "example:classify": "tsx examples/classify.ts",
29
- "example:ask": "tsx examples/ask.ts",
30
- "example:compare": "tsx examples/compare.ts",
31
27
  "example:client": "tsx examples/client.ts"
32
28
  },
33
29
  "keywords": [