@huggingface/inference 2.2.2 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -1
- package/dist/index.d.ts +1260 -0
- package/dist/index.js +59 -21
- package/dist/index.mjs +57 -21
- package/package.json +7 -5
- package/src/index.ts +1 -0
- package/src/tasks/cv/imageToImage.ts +83 -0
- package/src/tasks/index.ts +1 -0
- package/src/tasks/multimodal/documentQuestionAnswering.ts +7 -3
- package/src/tasks/multimodal/visualQuestionAnswering.ts +7 -3
- package/src/utils/pick.ts +0 -3
package/dist/index.js
CHANGED
|
@@ -22,6 +22,7 @@ var src_exports = {};
|
|
|
22
22
|
__export(src_exports, {
|
|
23
23
|
HfInference: () => HfInference,
|
|
24
24
|
HfInferenceEndpoint: () => HfInferenceEndpoint,
|
|
25
|
+
InferenceOutputError: () => InferenceOutputError,
|
|
25
26
|
audioClassification: () => audioClassification,
|
|
26
27
|
automaticSpeechRecognition: () => automaticSpeechRecognition,
|
|
27
28
|
conversational: () => conversational,
|
|
@@ -30,6 +31,7 @@ __export(src_exports, {
|
|
|
30
31
|
fillMask: () => fillMask,
|
|
31
32
|
imageClassification: () => imageClassification,
|
|
32
33
|
imageSegmentation: () => imageSegmentation,
|
|
34
|
+
imageToImage: () => imageToImage,
|
|
33
35
|
imageToText: () => imageToText,
|
|
34
36
|
objectDetection: () => objectDetection,
|
|
35
37
|
questionAnswering: () => questionAnswering,
|
|
@@ -62,6 +64,7 @@ __export(tasks_exports, {
|
|
|
62
64
|
fillMask: () => fillMask,
|
|
63
65
|
imageClassification: () => imageClassification,
|
|
64
66
|
imageSegmentation: () => imageSegmentation,
|
|
67
|
+
imageToImage: () => imageToImage,
|
|
65
68
|
imageToText: () => imageToText,
|
|
66
69
|
objectDetection: () => objectDetection,
|
|
67
70
|
questionAnswering: () => questionAnswering,
|
|
@@ -397,6 +400,48 @@ async function textToImage(args, options) {
|
|
|
397
400
|
return res;
|
|
398
401
|
}
|
|
399
402
|
|
|
403
|
+
// ../shared/src/base64FromBytes.ts
|
|
404
|
+
function base64FromBytes(arr) {
|
|
405
|
+
if (globalThis.Buffer) {
|
|
406
|
+
return globalThis.Buffer.from(arr).toString("base64");
|
|
407
|
+
} else {
|
|
408
|
+
const bin = [];
|
|
409
|
+
arr.forEach((byte) => {
|
|
410
|
+
bin.push(String.fromCharCode(byte));
|
|
411
|
+
});
|
|
412
|
+
return globalThis.btoa(bin.join(""));
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// ../shared/src/isBackend.ts
|
|
417
|
+
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
418
|
+
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
419
|
+
|
|
420
|
+
// src/tasks/cv/imageToImage.ts
|
|
421
|
+
async function imageToImage(args, options) {
|
|
422
|
+
let reqArgs;
|
|
423
|
+
if (!args.parameters) {
|
|
424
|
+
reqArgs = {
|
|
425
|
+
accessToken: args.accessToken,
|
|
426
|
+
model: args.model,
|
|
427
|
+
data: args.inputs
|
|
428
|
+
};
|
|
429
|
+
} else {
|
|
430
|
+
reqArgs = {
|
|
431
|
+
...args,
|
|
432
|
+
inputs: base64FromBytes(
|
|
433
|
+
new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
|
|
434
|
+
)
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
const res = await request(reqArgs, options);
|
|
438
|
+
const isValidOutput = res && res instanceof Blob;
|
|
439
|
+
if (!isValidOutput) {
|
|
440
|
+
throw new InferenceOutputError("Expected Blob");
|
|
441
|
+
}
|
|
442
|
+
return res;
|
|
443
|
+
}
|
|
444
|
+
|
|
400
445
|
// src/tasks/nlp/conversational.ts
|
|
401
446
|
async function conversational(args, options) {
|
|
402
447
|
const res = await request(args, options);
|
|
@@ -561,31 +606,18 @@ async function zeroShotClassification(args, options) {
|
|
|
561
606
|
return res;
|
|
562
607
|
}
|
|
563
608
|
|
|
564
|
-
// ../shared/src/base64FromBytes.ts
|
|
565
|
-
function base64FromBytes(arr) {
|
|
566
|
-
if (globalThis.Buffer) {
|
|
567
|
-
return globalThis.Buffer.from(arr).toString("base64");
|
|
568
|
-
} else {
|
|
569
|
-
const bin = [];
|
|
570
|
-
arr.forEach((byte) => {
|
|
571
|
-
bin.push(String.fromCharCode(byte));
|
|
572
|
-
});
|
|
573
|
-
return globalThis.btoa(bin.join(""));
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
// ../shared/src/isBackend.ts
|
|
578
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
579
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
580
|
-
|
|
581
609
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
582
610
|
async function documentQuestionAnswering(args, options) {
|
|
583
611
|
const reqArgs = {
|
|
584
612
|
...args,
|
|
585
613
|
inputs: {
|
|
586
614
|
question: args.inputs.question,
|
|
587
|
-
// convert Blob to base64
|
|
588
|
-
image: base64FromBytes(
|
|
615
|
+
// convert Blob or ArrayBuffer to base64
|
|
616
|
+
image: base64FromBytes(
|
|
617
|
+
new Uint8Array(
|
|
618
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
619
|
+
)
|
|
620
|
+
)
|
|
589
621
|
}
|
|
590
622
|
};
|
|
591
623
|
const res = toArray(
|
|
@@ -604,8 +636,12 @@ async function visualQuestionAnswering(args, options) {
|
|
|
604
636
|
...args,
|
|
605
637
|
inputs: {
|
|
606
638
|
question: args.inputs.question,
|
|
607
|
-
// convert Blob to base64
|
|
608
|
-
image: base64FromBytes(
|
|
639
|
+
// convert Blob or ArrayBuffer to base64
|
|
640
|
+
image: base64FromBytes(
|
|
641
|
+
new Uint8Array(
|
|
642
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
643
|
+
)
|
|
644
|
+
)
|
|
609
645
|
}
|
|
610
646
|
};
|
|
611
647
|
const res = (await request(reqArgs, options))?.[0];
|
|
@@ -669,6 +705,7 @@ var HfInferenceEndpoint = class {
|
|
|
669
705
|
0 && (module.exports = {
|
|
670
706
|
HfInference,
|
|
671
707
|
HfInferenceEndpoint,
|
|
708
|
+
InferenceOutputError,
|
|
672
709
|
audioClassification,
|
|
673
710
|
automaticSpeechRecognition,
|
|
674
711
|
conversational,
|
|
@@ -677,6 +714,7 @@ var HfInferenceEndpoint = class {
|
|
|
677
714
|
fillMask,
|
|
678
715
|
imageClassification,
|
|
679
716
|
imageSegmentation,
|
|
717
|
+
imageToImage,
|
|
680
718
|
imageToText,
|
|
681
719
|
objectDetection,
|
|
682
720
|
questionAnswering,
|
package/dist/index.mjs
CHANGED
|
@@ -15,6 +15,7 @@ __export(tasks_exports, {
|
|
|
15
15
|
fillMask: () => fillMask,
|
|
16
16
|
imageClassification: () => imageClassification,
|
|
17
17
|
imageSegmentation: () => imageSegmentation,
|
|
18
|
+
imageToImage: () => imageToImage,
|
|
18
19
|
imageToText: () => imageToText,
|
|
19
20
|
objectDetection: () => objectDetection,
|
|
20
21
|
questionAnswering: () => questionAnswering,
|
|
@@ -350,6 +351,48 @@ async function textToImage(args, options) {
|
|
|
350
351
|
return res;
|
|
351
352
|
}
|
|
352
353
|
|
|
354
|
+
// ../shared/src/base64FromBytes.ts
|
|
355
|
+
function base64FromBytes(arr) {
|
|
356
|
+
if (globalThis.Buffer) {
|
|
357
|
+
return globalThis.Buffer.from(arr).toString("base64");
|
|
358
|
+
} else {
|
|
359
|
+
const bin = [];
|
|
360
|
+
arr.forEach((byte) => {
|
|
361
|
+
bin.push(String.fromCharCode(byte));
|
|
362
|
+
});
|
|
363
|
+
return globalThis.btoa(bin.join(""));
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// ../shared/src/isBackend.ts
|
|
368
|
+
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
369
|
+
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
370
|
+
|
|
371
|
+
// src/tasks/cv/imageToImage.ts
|
|
372
|
+
async function imageToImage(args, options) {
|
|
373
|
+
let reqArgs;
|
|
374
|
+
if (!args.parameters) {
|
|
375
|
+
reqArgs = {
|
|
376
|
+
accessToken: args.accessToken,
|
|
377
|
+
model: args.model,
|
|
378
|
+
data: args.inputs
|
|
379
|
+
};
|
|
380
|
+
} else {
|
|
381
|
+
reqArgs = {
|
|
382
|
+
...args,
|
|
383
|
+
inputs: base64FromBytes(
|
|
384
|
+
new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
|
|
385
|
+
)
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
const res = await request(reqArgs, options);
|
|
389
|
+
const isValidOutput = res && res instanceof Blob;
|
|
390
|
+
if (!isValidOutput) {
|
|
391
|
+
throw new InferenceOutputError("Expected Blob");
|
|
392
|
+
}
|
|
393
|
+
return res;
|
|
394
|
+
}
|
|
395
|
+
|
|
353
396
|
// src/tasks/nlp/conversational.ts
|
|
354
397
|
async function conversational(args, options) {
|
|
355
398
|
const res = await request(args, options);
|
|
@@ -514,31 +557,18 @@ async function zeroShotClassification(args, options) {
|
|
|
514
557
|
return res;
|
|
515
558
|
}
|
|
516
559
|
|
|
517
|
-
// ../shared/src/base64FromBytes.ts
|
|
518
|
-
function base64FromBytes(arr) {
|
|
519
|
-
if (globalThis.Buffer) {
|
|
520
|
-
return globalThis.Buffer.from(arr).toString("base64");
|
|
521
|
-
} else {
|
|
522
|
-
const bin = [];
|
|
523
|
-
arr.forEach((byte) => {
|
|
524
|
-
bin.push(String.fromCharCode(byte));
|
|
525
|
-
});
|
|
526
|
-
return globalThis.btoa(bin.join(""));
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
// ../shared/src/isBackend.ts
|
|
531
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
532
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
533
|
-
|
|
534
560
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
535
561
|
async function documentQuestionAnswering(args, options) {
|
|
536
562
|
const reqArgs = {
|
|
537
563
|
...args,
|
|
538
564
|
inputs: {
|
|
539
565
|
question: args.inputs.question,
|
|
540
|
-
// convert Blob to base64
|
|
541
|
-
image: base64FromBytes(
|
|
566
|
+
// convert Blob or ArrayBuffer to base64
|
|
567
|
+
image: base64FromBytes(
|
|
568
|
+
new Uint8Array(
|
|
569
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
570
|
+
)
|
|
571
|
+
)
|
|
542
572
|
}
|
|
543
573
|
};
|
|
544
574
|
const res = toArray(
|
|
@@ -557,8 +587,12 @@ async function visualQuestionAnswering(args, options) {
|
|
|
557
587
|
...args,
|
|
558
588
|
inputs: {
|
|
559
589
|
question: args.inputs.question,
|
|
560
|
-
// convert Blob to base64
|
|
561
|
-
image: base64FromBytes(
|
|
590
|
+
// convert Blob or ArrayBuffer to base64
|
|
591
|
+
image: base64FromBytes(
|
|
592
|
+
new Uint8Array(
|
|
593
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
594
|
+
)
|
|
595
|
+
)
|
|
562
596
|
}
|
|
563
597
|
};
|
|
564
598
|
const res = (await request(reqArgs, options))?.[0];
|
|
@@ -621,6 +655,7 @@ var HfInferenceEndpoint = class {
|
|
|
621
655
|
export {
|
|
622
656
|
HfInference,
|
|
623
657
|
HfInferenceEndpoint,
|
|
658
|
+
InferenceOutputError,
|
|
624
659
|
audioClassification,
|
|
625
660
|
automaticSpeechRecognition,
|
|
626
661
|
conversational,
|
|
@@ -629,6 +664,7 @@ export {
|
|
|
629
664
|
fillMask,
|
|
630
665
|
imageClassification,
|
|
631
666
|
imageSegmentation,
|
|
667
|
+
imageToImage,
|
|
632
668
|
imageToText,
|
|
633
669
|
objectDetection,
|
|
634
670
|
questionAnswering,
|
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.1",
|
|
4
|
+
"packageManager": "pnpm@8.3.1",
|
|
4
5
|
"license": "MIT",
|
|
5
6
|
"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
7
|
"description": "Typescript wrapper for the Hugging Face Inference API",
|
|
@@ -28,16 +29,17 @@
|
|
|
28
29
|
"src"
|
|
29
30
|
],
|
|
30
31
|
"source": "src/index.ts",
|
|
31
|
-
"types": "
|
|
32
|
+
"types": "./dist/index.d.ts",
|
|
32
33
|
"main": "./dist/index.js",
|
|
33
34
|
"module": "./dist/index.mjs",
|
|
34
35
|
"exports": {
|
|
35
|
-
"types": "./
|
|
36
|
+
"types": "./dist/index.d.ts",
|
|
36
37
|
"require": "./dist/index.js",
|
|
37
38
|
"import": "./dist/index.mjs"
|
|
38
39
|
},
|
|
39
40
|
"devDependencies": {
|
|
40
41
|
"@types/node": "18.13.0",
|
|
42
|
+
"ts-node": "^10.9.1",
|
|
41
43
|
"typescript": "^5.0.4",
|
|
42
44
|
"vite": "^4.1.4",
|
|
43
45
|
"vitest": "^0.29.8",
|
|
@@ -48,12 +50,12 @@
|
|
|
48
50
|
],
|
|
49
51
|
"resolutions": {},
|
|
50
52
|
"scripts": {
|
|
51
|
-
"build": "tsup src/index.ts --format cjs,esm --clean",
|
|
53
|
+
"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
|
|
54
|
+
"dts": "ts-node scripts/generate-dts.ts",
|
|
52
55
|
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
|
|
53
56
|
"lint:check": "eslint --ext .cjs,.ts .",
|
|
54
57
|
"format": "prettier --write .",
|
|
55
58
|
"format:check": "prettier --check .",
|
|
56
|
-
"preversion": "pnpm --filter doc-internal run fix-cdn-versions && git add ../../README.md",
|
|
57
59
|
"test": "vitest run --config vitest.config.ts",
|
|
58
60
|
"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.ts",
|
|
59
61
|
"type-check": "tsc"
|
package/src/index.ts
CHANGED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
2
|
+
import type { BaseArgs, Options, RequestArgs } from "../../types";
|
|
3
|
+
import { request } from "../custom/request";
|
|
4
|
+
import { base64FromBytes } from "@huggingface/shared";
|
|
5
|
+
|
|
6
|
+
export type ImageToImageArgs = BaseArgs & {
|
|
7
|
+
/**
|
|
8
|
+
* The initial image condition
|
|
9
|
+
*
|
|
10
|
+
**/
|
|
11
|
+
inputs: Blob | ArrayBuffer;
|
|
12
|
+
|
|
13
|
+
parameters?: {
|
|
14
|
+
/**
|
|
15
|
+
* The text prompt to guide the image generation.
|
|
16
|
+
*/
|
|
17
|
+
prompt?: string;
|
|
18
|
+
/**
|
|
19
|
+
* strengh param only works for SD img2img and alt diffusion img2img models
|
|
20
|
+
* Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
|
|
21
|
+
* will be used as a starting point, adding more noise to it the larger the `strength`. The number of
|
|
22
|
+
* denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will
|
|
23
|
+
* be maximum and the denoising process will run for the full number of iterations specified in
|
|
24
|
+
* `num_inference_steps`. A value of 1, therefore, essentially ignores `image`.
|
|
25
|
+
**/
|
|
26
|
+
strength?: number;
|
|
27
|
+
/**
|
|
28
|
+
* An optional negative prompt for the image generation
|
|
29
|
+
*/
|
|
30
|
+
negative_prompt?: string;
|
|
31
|
+
/**
|
|
32
|
+
* The height in pixels of the generated image
|
|
33
|
+
*/
|
|
34
|
+
height?: number;
|
|
35
|
+
/**
|
|
36
|
+
* The width in pixels of the generated image
|
|
37
|
+
*/
|
|
38
|
+
width?: number;
|
|
39
|
+
/**
|
|
40
|
+
* The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
|
|
41
|
+
*/
|
|
42
|
+
num_inference_steps?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
|
|
45
|
+
*/
|
|
46
|
+
guidance_scale?: number;
|
|
47
|
+
/**
|
|
48
|
+
* guess_mode only works for ControlNet models, defaults to False In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
|
|
49
|
+
* you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
|
50
|
+
*/
|
|
51
|
+
guess_mode?: boolean;
|
|
52
|
+
};
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export type ImageToImageOutput = Blob;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* This task reads some text input and outputs an image.
|
|
59
|
+
* Recommended model: lllyasviel/sd-controlnet-depth
|
|
60
|
+
*/
|
|
61
|
+
export async function imageToImage(args: ImageToImageArgs, options?: Options): Promise<ImageToImageOutput> {
|
|
62
|
+
let reqArgs: RequestArgs;
|
|
63
|
+
if (!args.parameters) {
|
|
64
|
+
reqArgs = {
|
|
65
|
+
accessToken: args.accessToken,
|
|
66
|
+
model: args.model,
|
|
67
|
+
data: args.inputs,
|
|
68
|
+
};
|
|
69
|
+
} else {
|
|
70
|
+
reqArgs = {
|
|
71
|
+
...args,
|
|
72
|
+
inputs: base64FromBytes(
|
|
73
|
+
new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await args.inputs.arrayBuffer())
|
|
74
|
+
),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
const res = await request<ImageToImageOutput>(reqArgs, options);
|
|
78
|
+
const isValidOutput = res && res instanceof Blob;
|
|
79
|
+
if (!isValidOutput) {
|
|
80
|
+
throw new InferenceOutputError("Expected Blob");
|
|
81
|
+
}
|
|
82
|
+
return res;
|
|
83
|
+
}
|
package/src/tasks/index.ts
CHANGED
|
@@ -13,6 +13,7 @@ export * from "./cv/imageSegmentation";
|
|
|
13
13
|
export * from "./cv/imageToText";
|
|
14
14
|
export * from "./cv/objectDetection";
|
|
15
15
|
export * from "./cv/textToImage";
|
|
16
|
+
export * from "./cv/imageToImage";
|
|
16
17
|
|
|
17
18
|
// Natural Language Processing tasks
|
|
18
19
|
export * from "./nlp/conversational";
|
|
@@ -12,7 +12,7 @@ export type DocumentQuestionAnsweringArgs = BaseArgs & {
|
|
|
12
12
|
*
|
|
13
13
|
* You can use native `File` in browsers, or `new Blob([buffer])` in node, or for a base64 image `new Blob([btoa(base64String)])`, or even `await (await fetch('...)).blob()`
|
|
14
14
|
**/
|
|
15
|
-
image: Blob;
|
|
15
|
+
image: Blob | ArrayBuffer;
|
|
16
16
|
question: string;
|
|
17
17
|
};
|
|
18
18
|
};
|
|
@@ -47,8 +47,12 @@ export async function documentQuestionAnswering(
|
|
|
47
47
|
...args,
|
|
48
48
|
inputs: {
|
|
49
49
|
question: args.inputs.question,
|
|
50
|
-
// convert Blob to base64
|
|
51
|
-
image: base64FromBytes(
|
|
50
|
+
// convert Blob or ArrayBuffer to base64
|
|
51
|
+
image: base64FromBytes(
|
|
52
|
+
new Uint8Array(
|
|
53
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
54
|
+
)
|
|
55
|
+
),
|
|
52
56
|
},
|
|
53
57
|
} as RequestArgs;
|
|
54
58
|
const res = toArray(
|
|
@@ -10,7 +10,7 @@ export type VisualQuestionAnsweringArgs = BaseArgs & {
|
|
|
10
10
|
*
|
|
11
11
|
* You can use native `File` in browsers, or `new Blob([buffer])` in node, or for a base64 image `new Blob([btoa(base64String)])`, or even `await (await fetch('...)).blob()`
|
|
12
12
|
**/
|
|
13
|
-
image: Blob;
|
|
13
|
+
image: Blob | ArrayBuffer;
|
|
14
14
|
question: string;
|
|
15
15
|
};
|
|
16
16
|
};
|
|
@@ -37,8 +37,12 @@ export async function visualQuestionAnswering(
|
|
|
37
37
|
...args,
|
|
38
38
|
inputs: {
|
|
39
39
|
question: args.inputs.question,
|
|
40
|
-
// convert Blob to base64
|
|
41
|
-
image: base64FromBytes(
|
|
40
|
+
// convert Blob or ArrayBuffer to base64
|
|
41
|
+
image: base64FromBytes(
|
|
42
|
+
new Uint8Array(
|
|
43
|
+
args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
|
|
44
|
+
)
|
|
45
|
+
),
|
|
42
46
|
},
|
|
43
47
|
} as RequestArgs;
|
|
44
48
|
const res = (await request<[VisualQuestionAnsweringOutput]>(reqArgs, options))?.[0];
|
package/src/utils/pick.ts
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Return copy of object, only keeping allowlisted properties.
|
|
3
|
-
*
|
|
4
|
-
* This doesn't add {p: undefined} anymore, for props not in the o object.
|
|
5
3
|
*/
|
|
6
4
|
export function pick<T, K extends keyof T>(o: T, props: K[] | ReadonlyArray<K>): Pick<T, K> {
|
|
7
|
-
// inspired by stackoverflow.com/questions/25553910/one-liner-to-take-some-properties-from-object-in-es-6
|
|
8
5
|
return Object.assign(
|
|
9
6
|
{},
|
|
10
7
|
...props.map((prop) => {
|