viscribe 0.1.0 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,26 +1,25 @@
1
1
  <div align="center">
2
- <picture>
3
- <source media="(prefers-color-scheme: dark)" srcset="./assets/white-v.png">
4
- <source media="(prefers-color-scheme: light)" srcset="./assets/black-v.png">
5
- <img src="./assets/black-v.png" alt="Viscribe" width="160">
6
- </picture>
2
+ <img src="./assets/viscribe-hero.png" alt="ViscribeAI" width="860">
7
3
 
8
4
  <h1>ViscribeAI</h1>
9
5
 
10
6
  <p>Extract <strong>structured data</strong> from <strong>images</strong> using <strong>AI models</strong>.</p>
11
7
 
12
8
  <p>
9
+ <a href="https://x.com/itsperini"><img alt="X @itsperini" src="https://img.shields.io/badge/X-@itsperini-000000?logo=x&logoColor=white"></a>
10
+ <a href="https://discord.gg/GVgJ9ujT"><img alt="Discord" src="https://img.shields.io/badge/discord-join-5865F2?logo=discord&logoColor=white"></a>
11
+ <a href="https://docs.viscribe.ai"><img alt="Docs docs.viscribe.ai" src="https://img.shields.io/badge/-docs.viscribe.ai-2563EB?logo=bookstack&logoColor=white"></a>
13
12
  <img alt="Python 3.10+" src="https://img.shields.io/badge/python-3.10%2B-3776AB">
14
13
  <img alt="Node.js 20+" src="https://img.shields.io/badge/node.js-20%2B-339933">
15
14
  <img alt="License MIT" src="https://img.shields.io/badge/license-MIT-blue">
16
- <a href="https://x.com/itsperini"><img alt="X @itsperini" src="https://img.shields.io/badge/X-@itsperini-000000?logo=x&logoColor=white"></a>
17
- <img alt="Discord coming soon" src="https://img.shields.io/badge/discord-coming_soon-5865F2?logo=discord&logoColor=white">
18
- <a href="https://docs.viscribe.ai"><img alt="Docs docs.viscribe.ai" src="https://img.shields.io/badge/docs-docs.viscribe.ai-2563EB"></a>
19
15
  </p>
20
16
  </div>
21
17
 
22
18
  > Define the output schema, pass the image, pick the AI model, and get parsed
23
- structured output back instead of free-form text.
19
+ > structured output back instead of free-form text.
20
+
21
+ ⭐ If Viscribe helps your project, please leave a
22
+ [star](https://github.com/itsperini/viscribe). ⭐
24
23
 
25
24
  ## 📦 Installation
26
25
 
@@ -54,9 +53,9 @@ result = describe(
54
53
  # image_base64="...",
55
54
  generate_tags=True,
56
55
  model_config={
57
- "model": "gpt-4o-mini",
56
+ "model": "gpt-5-mini",
58
57
  "api_key": "sk-...",
59
- "temperature": 0,
58
+ "temperature": 1,
60
59
  },
61
60
  )
62
61
 
@@ -82,9 +81,9 @@ const result = await images.describe({
82
81
  imagePath: "examples/venice.png",
83
82
  generateTags: true,
84
83
  modelConfig: {
85
- model: "gpt-4o-mini",
84
+ model: "gpt-5-mini",
86
85
  apiKey: "sk-...",
87
- temperature: 0,
86
+ temperature: 1,
88
87
  },
89
88
  });
90
89
 
@@ -98,6 +97,14 @@ console.log(result);
98
97
 
99
98
  ## 📚 Image Endpoints
100
99
 
100
+ | Method | Description |
101
+ | ---------- | ------------------------------------------------------------------------------------------------------ |
102
+ | `describe` | Generate an objective image description with optional tags. |
103
+ | `classify` | Classify an image into one or more allowed or free-form categories. |
104
+ | `ask` | Ask a visual question and get an answer grounded in the image. |
105
+ | `extract` | Extract structured data from an image using simple fields, JSON Schema, or a Pydantic model in Python. |
106
+ | `compare` | Compare two images and describe their similarities and differences. |
107
+
101
108
  ### 1. Describe Image
102
109
 
103
110
  Generate a natural language description of an image, optionally with tags.
@@ -109,9 +116,9 @@ result = describe(
109
116
  image_path="examples/venice.png",
110
117
  generate_tags=True,
111
118
  model_config={
112
- "model": "gpt-4o-mini",
119
+ "model": "gpt-5-mini",
113
120
  "api_key": "sk-...",
114
- "temperature": 0,
121
+ "temperature": 1,
115
122
  },
116
123
  )
117
124
 
@@ -128,9 +135,9 @@ const result = await images.describe({
128
135
  imagePath: "examples/venice.png",
129
136
  generateTags: true,
130
137
  modelConfig: {
131
- model: "gpt-4o-mini",
138
+ model: "gpt-5-mini",
132
139
  apiKey: "sk-...",
133
- temperature: 0,
140
+ temperature: 1,
134
141
  },
135
142
  });
136
143
 
@@ -151,9 +158,9 @@ result = classify(
151
158
  classes=["canal", "city", "landmark", "interior"],
152
159
  multi_label=True,
153
160
  model_config={
154
- "model": "gpt-4o-mini",
161
+ "model": "gpt-5-mini",
155
162
  "api_key": "sk-...",
156
- "temperature": 0,
163
+ "temperature": 1,
157
164
  },
158
165
  )
159
166
 
@@ -171,9 +178,9 @@ const result = await images.classify({
171
178
  classes: ["canal", "city", "landmark", "interior"],
172
179
  multiLabel: true,
173
180
  modelConfig: {
174
- model: "gpt-4o-mini",
181
+ model: "gpt-5-mini",
175
182
  apiKey: "sk-...",
176
- temperature: 0,
183
+ temperature: 1,
177
184
  },
178
185
  });
179
186
 
@@ -193,9 +200,9 @@ result = ask(
193
200
  image_path="examples/venice.png",
194
201
  question="What kind of place is shown in this image?",
195
202
  model_config={
196
- "model": "gpt-4o-mini",
203
+ "model": "gpt-5-mini",
197
204
  "api_key": "sk-...",
198
- "temperature": 0,
205
+ "temperature": 1,
199
206
  },
200
207
  )
201
208
 
@@ -212,9 +219,9 @@ const result = await images.ask({
212
219
  imagePath: "examples/venice.png",
213
220
  question: "What kind of place is shown in this image?",
214
221
  modelConfig: {
215
- model: "gpt-4o-mini",
222
+ model: "gpt-5-mini",
216
223
  apiKey: "sk-...",
217
- temperature: 0,
224
+ temperature: 1,
218
225
  },
219
226
  });
220
227
 
@@ -242,9 +249,9 @@ result = extract(
242
249
  {"name": "colors", "type": "array_text", "description": "Dominant colors"},
243
250
  ],
244
251
  model_config={
245
- "model": "gpt-4o-mini",
252
+ "model": "gpt-5-mini",
246
253
  "api_key": "sk-...",
247
- "temperature": 0,
254
+ "temperature": 1,
248
255
  },
249
256
  )
250
257
 
@@ -269,9 +276,9 @@ const result = await images.extract({
269
276
  { name: "colors", type: "array_text", description: "Dominant colors" },
270
277
  ],
271
278
  modelConfig: {
272
- model: "gpt-4o-mini",
279
+ model: "gpt-5-mini",
273
280
  apiKey: "sk-...",
274
- temperature: 0,
281
+ temperature: 1,
275
282
  },
276
283
  });
277
284
 
@@ -306,9 +313,9 @@ result = extract(
306
313
  image_path="examples/venice.png",
307
314
  output_schema=Scene,
308
315
  model_config={
309
- "model": "gpt-4o-mini",
316
+ "model": "gpt-5-mini",
310
317
  "api_key": "sk-...",
311
- "temperature": 0,
318
+ "temperature": 1,
312
319
  },
313
320
  )
314
321
 
@@ -338,9 +345,9 @@ const result = await images.extract({
338
345
  additionalProperties: false,
339
346
  },
340
347
  modelConfig: {
341
- model: "gpt-4o-mini",
348
+ model: "gpt-5-mini",
342
349
  apiKey: "sk-...",
343
- temperature: 0,
350
+ temperature: 1,
344
351
  },
345
352
  });
346
353
 
@@ -362,9 +369,9 @@ result = compare(
362
369
  image1_path="examples/venice.png",
363
370
  image2_path="examples/venice.png",
364
371
  model_config={
365
- "model": "gpt-4o-mini",
372
+ "model": "gpt-5-mini",
366
373
  "api_key": "sk-...",
367
- "temperature": 0,
374
+ "temperature": 1,
368
375
  },
369
376
  )
370
377
 
@@ -381,9 +388,9 @@ const result = await images.compare({
381
388
  image1Path: "examples/venice.png",
382
389
  image2Path: "examples/venice.png",
383
390
  modelConfig: {
384
- model: "gpt-4o-mini",
391
+ model: "gpt-5-mini",
385
392
  apiKey: "sk-...",
386
- temperature: 0,
393
+ temperature: 1,
387
394
  },
388
395
  });
389
396
 
@@ -406,9 +413,9 @@ async def main() -> None:
406
413
  image_path="examples/venice.png",
407
414
  generate_tags=True,
408
415
  model_config={
409
- "model": "gpt-4o-mini",
416
+ "model": "gpt-5-mini",
410
417
  "api_key": "sk-...",
411
- "temperature": 0,
418
+ "temperature": 1,
412
419
  },
413
420
  )
414
421
 
@@ -428,9 +435,9 @@ from viscribe import ViscribeAI
428
435
  async def main() -> None:
429
436
  client = ViscribeAI(
430
437
  model_config={
431
- "model": "gpt-4o-mini",
438
+ "model": "gpt-5-mini",
432
439
  "api_key": "sk-...",
433
- "temperature": 0,
440
+ "temperature": 1,
434
441
  }
435
442
  )
436
443
 
@@ -457,9 +464,9 @@ const result = await images.describe({
457
464
  imagePath: "examples/venice.png",
458
465
  generateTags: true,
459
466
  modelConfig: {
460
- model: "gpt-4o-mini",
467
+ model: "gpt-5-mini",
461
468
  apiKey: "sk-...",
462
- temperature: 0,
469
+ temperature: 1,
463
470
  },
464
471
  });
465
472
 
@@ -467,9 +474,9 @@ console.log(result.data);
467
474
 
468
475
  const client = new ViscribeAI({
469
476
  modelConfig: {
470
- model: "gpt-4o-mini",
477
+ model: "gpt-5-mini",
471
478
  apiKey: "sk-...",
472
- temperature: 0,
479
+ temperature: 1,
473
480
  },
474
481
  });
475
482
 
@@ -503,7 +510,7 @@ Feel free to contribute and join our Discord server to discuss with us improveme
503
510
 
504
511
  Please see the [contributing guidelines](./CONTRIBUTING.md).
505
512
 
506
- [![My Skills](https://skillicons.dev/icons?i=discord)](https://discord.gg/uJN7TYcp)
513
+ [![My Skills](https://skillicons.dev/icons?i=discord)](https://discord.gg/GVgJ9ujT)
507
514
  [![My Skills](https://skillicons.dev/icons?i=linkedin)](https://www.linkedin.com/in/itsperini)
508
515
  [![My Skills](https://skillicons.dev/icons?i=twitter)](https://twitter.com/itsperini)
509
516
 
@@ -517,6 +524,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
517
524
  - [Documentation](https://docs.viscribe.ai)
518
525
  - [GitHub](https://github.com/itsperini/viscribe)
519
526
 
527
+ ⭐ If Viscribe helps your project, please leave a
528
+ [star](https://github.com/itsperini/viscribe). ⭐
529
+
520
530
  ---
521
531
 
522
532
  Made with ❤️ by [ViscribeAI](https://viscribe.ai)
Binary file
package/dist/index.cjs CHANGED
@@ -68,9 +68,7 @@ function buildImageSource(input) {
68
68
  input.imagePath !== void 0
69
69
  ].filter(Boolean).length;
70
70
  if (provided !== 1) {
71
- throw new Error(
72
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
73
- );
71
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
74
72
  }
75
73
  if (input.imageUrl !== void 0) {
76
74
  validateImageUrl(input.imageUrl);
@@ -90,9 +88,7 @@ function buildImageSource(input) {
90
88
  const mimeType = detectImageMimeType(imageBytes);
91
89
  return `data:${mimeType};base64,${input.imageBase64}`;
92
90
  }
93
- throw new Error(
94
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
95
- );
91
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
96
92
  }
97
93
  async function assertImagePathExists(imagePath) {
98
94
  const result = await (0, import_promises.stat)(imagePath);
@@ -617,9 +613,7 @@ function normalizeClassification(data, options) {
617
613
  }
618
614
  if (options.classes) {
619
615
  const allowedClasses = new Set(options.classes);
620
- classification = classification.filter(
621
- (className) => allowedClasses.has(className)
622
- );
616
+ classification = classification.filter((className) => allowedClasses.has(className));
623
617
  }
624
618
  return { classification };
625
619
  }
@@ -738,9 +732,7 @@ function messageContentText(message) {
738
732
  ).map((block) => block.text).join("");
739
733
  if (text) return text;
740
734
  }
741
- throw new StructuredOutputParseError(
742
- "Response message does not contain text content."
743
- );
735
+ throw new StructuredOutputParseError("Response message does not contain text content.");
744
736
  }
745
737
  function loadsJsonObject(content) {
746
738
  const trimmed = content.trim();
@@ -761,9 +753,7 @@ function loadsJsonObject(content) {
761
753
  try {
762
754
  const data = JSON.parse(attempt);
763
755
  if (!isPlainObject(data)) {
764
- throw new StructuredOutputParseError(
765
- "Structured output must be a JSON object."
766
- );
756
+ throw new StructuredOutputParseError("Structured output must be a JSON object.");
767
757
  }
768
758
  return data;
769
759
  } catch (error) {
package/dist/index.js CHANGED
@@ -30,9 +30,7 @@ function buildImageSource(input) {
30
30
  input.imagePath !== void 0
31
31
  ].filter(Boolean).length;
32
32
  if (provided !== 1) {
33
- throw new Error(
34
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
35
- );
33
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
36
34
  }
37
35
  if (input.imageUrl !== void 0) {
38
36
  validateImageUrl(input.imageUrl);
@@ -52,9 +50,7 @@ function buildImageSource(input) {
52
50
  const mimeType = detectImageMimeType(imageBytes);
53
51
  return `data:${mimeType};base64,${input.imageBase64}`;
54
52
  }
55
- throw new Error(
56
- "Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
57
- );
53
+ throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
58
54
  }
59
55
  async function assertImagePathExists(imagePath) {
60
56
  const result = await stat(imagePath);
@@ -579,9 +575,7 @@ function normalizeClassification(data, options) {
579
575
  }
580
576
  if (options.classes) {
581
577
  const allowedClasses = new Set(options.classes);
582
- classification = classification.filter(
583
- (className) => allowedClasses.has(className)
584
- );
578
+ classification = classification.filter((className) => allowedClasses.has(className));
585
579
  }
586
580
  return { classification };
587
581
  }
@@ -700,9 +694,7 @@ function messageContentText(message) {
700
694
  ).map((block) => block.text).join("");
701
695
  if (text) return text;
702
696
  }
703
- throw new StructuredOutputParseError(
704
- "Response message does not contain text content."
705
- );
697
+ throw new StructuredOutputParseError("Response message does not contain text content.");
706
698
  }
707
699
  function loadsJsonObject(content) {
708
700
  const trimmed = content.trim();
@@ -723,9 +715,7 @@ function loadsJsonObject(content) {
723
715
  try {
724
716
  const data = JSON.parse(attempt);
725
717
  if (!isPlainObject(data)) {
726
- throw new StructuredOutputParseError(
727
- "Structured output must be a JSON object."
728
- );
718
+ throw new StructuredOutputParseError("Structured output must be a JSON object.");
729
719
  }
730
720
  return data;
731
721
  } catch (error) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "viscribe",
3
- "version": "0.1.0",
4
- "description": "OpenAI-compatible image understanding SDK for structured extraction, description, classification, VQA, and comparison.",
3
+ "version": "1.0.5",
4
+ "description": "Extract structured data from images using AI models.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
7
7
  "module": "./dist/index.js",