viscribe 0.1.0 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -46
- package/assets/viscribe-hero.png +0 -0
- package/dist/index.cjs +5 -15
- package/dist/index.js +5 -15
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<
|
|
3
|
-
<source media="(prefers-color-scheme: dark)" srcset="./assets/white-v.png">
|
|
4
|
-
<source media="(prefers-color-scheme: light)" srcset="./assets/black-v.png">
|
|
5
|
-
<img src="./assets/black-v.png" alt="Viscribe" width="160">
|
|
6
|
-
</picture>
|
|
2
|
+
<img src="./assets/viscribe-hero.png" alt="ViscribeAI" width="860">
|
|
7
3
|
|
|
8
4
|
<h1>ViscribeAI</h1>
|
|
9
5
|
|
|
10
6
|
<p>Extract <strong>structured data</strong> from <strong>images</strong> using <strong>AI models</strong>.</p>
|
|
11
7
|
|
|
12
8
|
<p>
|
|
9
|
+
<a href="https://x.com/itsperini"><img alt="X @itsperini" src="https://img.shields.io/badge/X-@itsperini-000000?logo=x&logoColor=white"></a>
|
|
10
|
+
<a href="https://discord.gg/GVgJ9ujT"><img alt="Discord" src="https://img.shields.io/badge/discord-join-5865F2?logo=discord&logoColor=white"></a>
|
|
11
|
+
<a href="https://docs.viscribe.ai"><img alt="Docs docs.viscribe.ai" src="https://img.shields.io/badge/-docs.viscribe.ai-2563EB?logo=bookstack&logoColor=white"></a>
|
|
13
12
|
<img alt="Python 3.10+" src="https://img.shields.io/badge/python-3.10%2B-3776AB">
|
|
14
13
|
<img alt="Node.js 20+" src="https://img.shields.io/badge/node.js-20%2B-339933">
|
|
15
14
|
<img alt="License MIT" src="https://img.shields.io/badge/license-MIT-blue">
|
|
16
|
-
<a href="https://x.com/itsperini"><img alt="X @itsperini" src="https://img.shields.io/badge/X-@itsperini-000000?logo=x&logoColor=white"></a>
|
|
17
|
-
<img alt="Discord coming soon" src="https://img.shields.io/badge/discord-coming_soon-5865F2?logo=discord&logoColor=white">
|
|
18
|
-
<a href="https://docs.viscribe.ai"><img alt="Docs docs.viscribe.ai" src="https://img.shields.io/badge/docs-docs.viscribe.ai-2563EB"></a>
|
|
19
15
|
</p>
|
|
20
16
|
</div>
|
|
21
17
|
|
|
22
18
|
> Define the output schema, pass the image, pick the AI model, and get parsed
|
|
23
|
-
structured output back instead of free-form text.
|
|
19
|
+
> structured output back instead of free-form text.
|
|
20
|
+
|
|
21
|
+
⭐ If Viscribe helps your project, please leave a
|
|
22
|
+
[star](https://github.com/itsperini/viscribe). ⭐
|
|
24
23
|
|
|
25
24
|
## 📦 Installation
|
|
26
25
|
|
|
@@ -54,9 +53,9 @@ result = describe(
|
|
|
54
53
|
# image_base64="...",
|
|
55
54
|
generate_tags=True,
|
|
56
55
|
model_config={
|
|
57
|
-
"model": "gpt-
|
|
56
|
+
"model": "gpt-5-mini",
|
|
58
57
|
"api_key": "sk-...",
|
|
59
|
-
"temperature":
|
|
58
|
+
"temperature": 1,
|
|
60
59
|
},
|
|
61
60
|
)
|
|
62
61
|
|
|
@@ -82,9 +81,9 @@ const result = await images.describe({
|
|
|
82
81
|
imagePath: "examples/venice.png",
|
|
83
82
|
generateTags: true,
|
|
84
83
|
modelConfig: {
|
|
85
|
-
model: "gpt-
|
|
84
|
+
model: "gpt-5-mini",
|
|
86
85
|
apiKey: "sk-...",
|
|
87
|
-
temperature:
|
|
86
|
+
temperature: 1,
|
|
88
87
|
},
|
|
89
88
|
});
|
|
90
89
|
|
|
@@ -98,6 +97,14 @@ console.log(result);
|
|
|
98
97
|
|
|
99
98
|
## 📚 Image Endpoints
|
|
100
99
|
|
|
100
|
+
| Method | Description |
|
|
101
|
+
| ---------- | ------------------------------------------------------------------------------------------------------ |
|
|
102
|
+
| `describe` | Generate an objective image description with optional tags. |
|
|
103
|
+
| `classify` | Classify an image into one or more allowed or free-form categories. |
|
|
104
|
+
| `ask` | Ask a visual question and get an answer grounded in the image. |
|
|
105
|
+
| `extract` | Extract structured data from an image using simple fields, JSON Schema, or a Pydantic model in Python. |
|
|
106
|
+
| `compare` | Compare two images and describe their similarities and differences. |
|
|
107
|
+
|
|
101
108
|
### 1. Describe Image
|
|
102
109
|
|
|
103
110
|
Generate a natural language description of an image, optionally with tags.
|
|
@@ -109,9 +116,9 @@ result = describe(
|
|
|
109
116
|
image_path="examples/venice.png",
|
|
110
117
|
generate_tags=True,
|
|
111
118
|
model_config={
|
|
112
|
-
"model": "gpt-
|
|
119
|
+
"model": "gpt-5-mini",
|
|
113
120
|
"api_key": "sk-...",
|
|
114
|
-
"temperature":
|
|
121
|
+
"temperature": 1,
|
|
115
122
|
},
|
|
116
123
|
)
|
|
117
124
|
|
|
@@ -128,9 +135,9 @@ const result = await images.describe({
|
|
|
128
135
|
imagePath: "examples/venice.png",
|
|
129
136
|
generateTags: true,
|
|
130
137
|
modelConfig: {
|
|
131
|
-
model: "gpt-
|
|
138
|
+
model: "gpt-5-mini",
|
|
132
139
|
apiKey: "sk-...",
|
|
133
|
-
temperature:
|
|
140
|
+
temperature: 1,
|
|
134
141
|
},
|
|
135
142
|
});
|
|
136
143
|
|
|
@@ -151,9 +158,9 @@ result = classify(
|
|
|
151
158
|
classes=["canal", "city", "landmark", "interior"],
|
|
152
159
|
multi_label=True,
|
|
153
160
|
model_config={
|
|
154
|
-
"model": "gpt-
|
|
161
|
+
"model": "gpt-5-mini",
|
|
155
162
|
"api_key": "sk-...",
|
|
156
|
-
"temperature":
|
|
163
|
+
"temperature": 1,
|
|
157
164
|
},
|
|
158
165
|
)
|
|
159
166
|
|
|
@@ -171,9 +178,9 @@ const result = await images.classify({
|
|
|
171
178
|
classes: ["canal", "city", "landmark", "interior"],
|
|
172
179
|
multiLabel: true,
|
|
173
180
|
modelConfig: {
|
|
174
|
-
model: "gpt-
|
|
181
|
+
model: "gpt-5-mini",
|
|
175
182
|
apiKey: "sk-...",
|
|
176
|
-
temperature:
|
|
183
|
+
temperature: 1,
|
|
177
184
|
},
|
|
178
185
|
});
|
|
179
186
|
|
|
@@ -193,9 +200,9 @@ result = ask(
|
|
|
193
200
|
image_path="examples/venice.png",
|
|
194
201
|
question="What kind of place is shown in this image?",
|
|
195
202
|
model_config={
|
|
196
|
-
"model": "gpt-
|
|
203
|
+
"model": "gpt-5-mini",
|
|
197
204
|
"api_key": "sk-...",
|
|
198
|
-
"temperature":
|
|
205
|
+
"temperature": 1,
|
|
199
206
|
},
|
|
200
207
|
)
|
|
201
208
|
|
|
@@ -212,9 +219,9 @@ const result = await images.ask({
|
|
|
212
219
|
imagePath: "examples/venice.png",
|
|
213
220
|
question: "What kind of place is shown in this image?",
|
|
214
221
|
modelConfig: {
|
|
215
|
-
model: "gpt-
|
|
222
|
+
model: "gpt-5-mini",
|
|
216
223
|
apiKey: "sk-...",
|
|
217
|
-
temperature:
|
|
224
|
+
temperature: 1,
|
|
218
225
|
},
|
|
219
226
|
});
|
|
220
227
|
|
|
@@ -242,9 +249,9 @@ result = extract(
|
|
|
242
249
|
{"name": "colors", "type": "array_text", "description": "Dominant colors"},
|
|
243
250
|
],
|
|
244
251
|
model_config={
|
|
245
|
-
"model": "gpt-
|
|
252
|
+
"model": "gpt-5-mini",
|
|
246
253
|
"api_key": "sk-...",
|
|
247
|
-
"temperature":
|
|
254
|
+
"temperature": 1,
|
|
248
255
|
},
|
|
249
256
|
)
|
|
250
257
|
|
|
@@ -269,9 +276,9 @@ const result = await images.extract({
|
|
|
269
276
|
{ name: "colors", type: "array_text", description: "Dominant colors" },
|
|
270
277
|
],
|
|
271
278
|
modelConfig: {
|
|
272
|
-
model: "gpt-
|
|
279
|
+
model: "gpt-5-mini",
|
|
273
280
|
apiKey: "sk-...",
|
|
274
|
-
temperature:
|
|
281
|
+
temperature: 1,
|
|
275
282
|
},
|
|
276
283
|
});
|
|
277
284
|
|
|
@@ -306,9 +313,9 @@ result = extract(
|
|
|
306
313
|
image_path="examples/venice.png",
|
|
307
314
|
output_schema=Scene,
|
|
308
315
|
model_config={
|
|
309
|
-
"model": "gpt-
|
|
316
|
+
"model": "gpt-5-mini",
|
|
310
317
|
"api_key": "sk-...",
|
|
311
|
-
"temperature":
|
|
318
|
+
"temperature": 1,
|
|
312
319
|
},
|
|
313
320
|
)
|
|
314
321
|
|
|
@@ -338,9 +345,9 @@ const result = await images.extract({
|
|
|
338
345
|
additionalProperties: false,
|
|
339
346
|
},
|
|
340
347
|
modelConfig: {
|
|
341
|
-
model: "gpt-
|
|
348
|
+
model: "gpt-5-mini",
|
|
342
349
|
apiKey: "sk-...",
|
|
343
|
-
temperature:
|
|
350
|
+
temperature: 1,
|
|
344
351
|
},
|
|
345
352
|
});
|
|
346
353
|
|
|
@@ -362,9 +369,9 @@ result = compare(
|
|
|
362
369
|
image1_path="examples/venice.png",
|
|
363
370
|
image2_path="examples/venice.png",
|
|
364
371
|
model_config={
|
|
365
|
-
"model": "gpt-
|
|
372
|
+
"model": "gpt-5-mini",
|
|
366
373
|
"api_key": "sk-...",
|
|
367
|
-
"temperature":
|
|
374
|
+
"temperature": 1,
|
|
368
375
|
},
|
|
369
376
|
)
|
|
370
377
|
|
|
@@ -381,9 +388,9 @@ const result = await images.compare({
|
|
|
381
388
|
image1Path: "examples/venice.png",
|
|
382
389
|
image2Path: "examples/venice.png",
|
|
383
390
|
modelConfig: {
|
|
384
|
-
model: "gpt-
|
|
391
|
+
model: "gpt-5-mini",
|
|
385
392
|
apiKey: "sk-...",
|
|
386
|
-
temperature:
|
|
393
|
+
temperature: 1,
|
|
387
394
|
},
|
|
388
395
|
});
|
|
389
396
|
|
|
@@ -406,9 +413,9 @@ async def main() -> None:
|
|
|
406
413
|
image_path="examples/venice.png",
|
|
407
414
|
generate_tags=True,
|
|
408
415
|
model_config={
|
|
409
|
-
"model": "gpt-
|
|
416
|
+
"model": "gpt-5-mini",
|
|
410
417
|
"api_key": "sk-...",
|
|
411
|
-
"temperature":
|
|
418
|
+
"temperature": 1,
|
|
412
419
|
},
|
|
413
420
|
)
|
|
414
421
|
|
|
@@ -428,9 +435,9 @@ from viscribe import ViscribeAI
|
|
|
428
435
|
async def main() -> None:
|
|
429
436
|
client = ViscribeAI(
|
|
430
437
|
model_config={
|
|
431
|
-
"model": "gpt-
|
|
438
|
+
"model": "gpt-5-mini",
|
|
432
439
|
"api_key": "sk-...",
|
|
433
|
-
"temperature":
|
|
440
|
+
"temperature": 1,
|
|
434
441
|
}
|
|
435
442
|
)
|
|
436
443
|
|
|
@@ -457,9 +464,9 @@ const result = await images.describe({
|
|
|
457
464
|
imagePath: "examples/venice.png",
|
|
458
465
|
generateTags: true,
|
|
459
466
|
modelConfig: {
|
|
460
|
-
model: "gpt-
|
|
467
|
+
model: "gpt-5-mini",
|
|
461
468
|
apiKey: "sk-...",
|
|
462
|
-
temperature:
|
|
469
|
+
temperature: 1,
|
|
463
470
|
},
|
|
464
471
|
});
|
|
465
472
|
|
|
@@ -467,9 +474,9 @@ console.log(result.data);
|
|
|
467
474
|
|
|
468
475
|
const client = new ViscribeAI({
|
|
469
476
|
modelConfig: {
|
|
470
|
-
model: "gpt-
|
|
477
|
+
model: "gpt-5-mini",
|
|
471
478
|
apiKey: "sk-...",
|
|
472
|
-
temperature:
|
|
479
|
+
temperature: 1,
|
|
473
480
|
},
|
|
474
481
|
});
|
|
475
482
|
|
|
@@ -503,7 +510,7 @@ Feel free to contribute and join our Discord server to discuss with us improveme
|
|
|
503
510
|
|
|
504
511
|
Please see the [contributing guidelines](./CONTRIBUTING.md).
|
|
505
512
|
|
|
506
|
-
[](https://discord.gg/
|
|
513
|
+
[](https://discord.gg/GVgJ9ujT)
|
|
507
514
|
[](https://www.linkedin.com/in/itsperini)
|
|
508
515
|
[](https://twitter.com/itsperini)
|
|
509
516
|
|
|
@@ -517,6 +524,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
517
524
|
- [Documentation](https://docs.viscribe.ai)
|
|
518
525
|
- [GitHub](https://github.com/itsperini/viscribe)
|
|
519
526
|
|
|
527
|
+
⭐ If Viscribe helps your project, please leave a
|
|
528
|
+
[star](https://github.com/itsperini/viscribe). ⭐
|
|
529
|
+
|
|
520
530
|
---
|
|
521
531
|
|
|
522
532
|
Made with ❤️ by [ViscribeAI](https://viscribe.ai)
|
|
Binary file
|
package/dist/index.cjs
CHANGED
|
@@ -68,9 +68,7 @@ function buildImageSource(input) {
|
|
|
68
68
|
input.imagePath !== void 0
|
|
69
69
|
].filter(Boolean).length;
|
|
70
70
|
if (provided !== 1) {
|
|
71
|
-
throw new Error(
|
|
72
|
-
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
73
|
-
);
|
|
71
|
+
throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
|
|
74
72
|
}
|
|
75
73
|
if (input.imageUrl !== void 0) {
|
|
76
74
|
validateImageUrl(input.imageUrl);
|
|
@@ -90,9 +88,7 @@ function buildImageSource(input) {
|
|
|
90
88
|
const mimeType = detectImageMimeType(imageBytes);
|
|
91
89
|
return `data:${mimeType};base64,${input.imageBase64}`;
|
|
92
90
|
}
|
|
93
|
-
throw new Error(
|
|
94
|
-
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
95
|
-
);
|
|
91
|
+
throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
|
|
96
92
|
}
|
|
97
93
|
async function assertImagePathExists(imagePath) {
|
|
98
94
|
const result = await (0, import_promises.stat)(imagePath);
|
|
@@ -617,9 +613,7 @@ function normalizeClassification(data, options) {
|
|
|
617
613
|
}
|
|
618
614
|
if (options.classes) {
|
|
619
615
|
const allowedClasses = new Set(options.classes);
|
|
620
|
-
classification = classification.filter(
|
|
621
|
-
(className) => allowedClasses.has(className)
|
|
622
|
-
);
|
|
616
|
+
classification = classification.filter((className) => allowedClasses.has(className));
|
|
623
617
|
}
|
|
624
618
|
return { classification };
|
|
625
619
|
}
|
|
@@ -738,9 +732,7 @@ function messageContentText(message) {
|
|
|
738
732
|
).map((block) => block.text).join("");
|
|
739
733
|
if (text) return text;
|
|
740
734
|
}
|
|
741
|
-
throw new StructuredOutputParseError(
|
|
742
|
-
"Response message does not contain text content."
|
|
743
|
-
);
|
|
735
|
+
throw new StructuredOutputParseError("Response message does not contain text content.");
|
|
744
736
|
}
|
|
745
737
|
function loadsJsonObject(content) {
|
|
746
738
|
const trimmed = content.trim();
|
|
@@ -761,9 +753,7 @@ function loadsJsonObject(content) {
|
|
|
761
753
|
try {
|
|
762
754
|
const data = JSON.parse(attempt);
|
|
763
755
|
if (!isPlainObject(data)) {
|
|
764
|
-
throw new StructuredOutputParseError(
|
|
765
|
-
"Structured output must be a JSON object."
|
|
766
|
-
);
|
|
756
|
+
throw new StructuredOutputParseError("Structured output must be a JSON object.");
|
|
767
757
|
}
|
|
768
758
|
return data;
|
|
769
759
|
} catch (error) {
|
package/dist/index.js
CHANGED
|
@@ -30,9 +30,7 @@ function buildImageSource(input) {
|
|
|
30
30
|
input.imagePath !== void 0
|
|
31
31
|
].filter(Boolean).length;
|
|
32
32
|
if (provided !== 1) {
|
|
33
|
-
throw new Error(
|
|
34
|
-
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
35
|
-
);
|
|
33
|
+
throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
|
|
36
34
|
}
|
|
37
35
|
if (input.imageUrl !== void 0) {
|
|
38
36
|
validateImageUrl(input.imageUrl);
|
|
@@ -52,9 +50,7 @@ function buildImageSource(input) {
|
|
|
52
50
|
const mimeType = detectImageMimeType(imageBytes);
|
|
53
51
|
return `data:${mimeType};base64,${input.imageBase64}`;
|
|
54
52
|
}
|
|
55
|
-
throw new Error(
|
|
56
|
-
"Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'."
|
|
57
|
-
);
|
|
53
|
+
throw new Error("Provide exactly one of 'imageUrl', 'imageBase64', or 'imagePath'.");
|
|
58
54
|
}
|
|
59
55
|
async function assertImagePathExists(imagePath) {
|
|
60
56
|
const result = await stat(imagePath);
|
|
@@ -579,9 +575,7 @@ function normalizeClassification(data, options) {
|
|
|
579
575
|
}
|
|
580
576
|
if (options.classes) {
|
|
581
577
|
const allowedClasses = new Set(options.classes);
|
|
582
|
-
classification = classification.filter(
|
|
583
|
-
(className) => allowedClasses.has(className)
|
|
584
|
-
);
|
|
578
|
+
classification = classification.filter((className) => allowedClasses.has(className));
|
|
585
579
|
}
|
|
586
580
|
return { classification };
|
|
587
581
|
}
|
|
@@ -700,9 +694,7 @@ function messageContentText(message) {
|
|
|
700
694
|
).map((block) => block.text).join("");
|
|
701
695
|
if (text) return text;
|
|
702
696
|
}
|
|
703
|
-
throw new StructuredOutputParseError(
|
|
704
|
-
"Response message does not contain text content."
|
|
705
|
-
);
|
|
697
|
+
throw new StructuredOutputParseError("Response message does not contain text content.");
|
|
706
698
|
}
|
|
707
699
|
function loadsJsonObject(content) {
|
|
708
700
|
const trimmed = content.trim();
|
|
@@ -723,9 +715,7 @@ function loadsJsonObject(content) {
|
|
|
723
715
|
try {
|
|
724
716
|
const data = JSON.parse(attempt);
|
|
725
717
|
if (!isPlainObject(data)) {
|
|
726
|
-
throw new StructuredOutputParseError(
|
|
727
|
-
"Structured output must be a JSON object."
|
|
728
|
-
);
|
|
718
|
+
throw new StructuredOutputParseError("Structured output must be a JSON object.");
|
|
729
719
|
}
|
|
730
720
|
return data;
|
|
731
721
|
} catch (error) {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "viscribe",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.0.5",
|
|
4
|
+
"description": "Extract structured data from images using AI models.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
7
7
|
"module": "./dist/index.js",
|