@huggingface/inference 2.1.3-test2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +19 -6
- package/dist/index.mjs +18 -6
- package/package.json +2 -6
- package/src/tasks/audio/textToSpeech.ts +25 -0
- package/src/tasks/custom/streamingRequest.ts +5 -1
- package/src/tasks/index.ts +1 -0
- package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
- package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
- package/src/tasks/nlp/textGeneration.ts +4 -0
- package/node_modules/@huggingface/shared/package.json +0 -8
- package/node_modules/@huggingface/shared/src/index.ts +0 -3
- package/node_modules/@huggingface/shared/tsconfig.json +0 -14
- /package/{node_modules/@huggingface/shared/src → src/utils}/base64FromBytes.ts +0 -0
- /package/{node_modules/@huggingface/shared/src → src/utils}/isBackend.ts +0 -0
- /package/{node_modules/@huggingface/shared/src → src/utils}/isFrontend.ts +0 -0
package/dist/index.js
CHANGED
|
@@ -42,6 +42,7 @@ __export(src_exports, {
|
|
|
42
42
|
textGeneration: () => textGeneration,
|
|
43
43
|
textGenerationStream: () => textGenerationStream,
|
|
44
44
|
textToImage: () => textToImage,
|
|
45
|
+
textToSpeech: () => textToSpeech,
|
|
45
46
|
tokenClassification: () => tokenClassification,
|
|
46
47
|
translation: () => translation,
|
|
47
48
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -72,6 +73,7 @@ __export(tasks_exports, {
|
|
|
72
73
|
textGeneration: () => textGeneration,
|
|
73
74
|
textGenerationStream: () => textGenerationStream,
|
|
74
75
|
textToImage: () => textToImage,
|
|
76
|
+
textToSpeech: () => textToSpeech,
|
|
75
77
|
tokenClassification: () => tokenClassification,
|
|
76
78
|
translation: () => translation,
|
|
77
79
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -286,7 +288,11 @@ async function* streamingRequest(args, options) {
|
|
|
286
288
|
onChunk(value);
|
|
287
289
|
for (const event of events) {
|
|
288
290
|
if (event.data.length > 0) {
|
|
289
|
-
|
|
291
|
+
const data = JSON.parse(event.data);
|
|
292
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
293
|
+
throw new Error(data.error);
|
|
294
|
+
}
|
|
295
|
+
yield data;
|
|
290
296
|
}
|
|
291
297
|
}
|
|
292
298
|
events = [];
|
|
@@ -326,6 +332,16 @@ async function automaticSpeechRecognition(args, options) {
|
|
|
326
332
|
return res;
|
|
327
333
|
}
|
|
328
334
|
|
|
335
|
+
// src/tasks/audio/textToSpeech.ts
|
|
336
|
+
async function textToSpeech(args, options) {
|
|
337
|
+
const res = await request(args, options);
|
|
338
|
+
const isValidOutput = res && res instanceof Blob;
|
|
339
|
+
if (!isValidOutput) {
|
|
340
|
+
throw new InferenceOutputError("Expected Blob");
|
|
341
|
+
}
|
|
342
|
+
return res;
|
|
343
|
+
}
|
|
344
|
+
|
|
329
345
|
// src/tasks/cv/imageClassification.ts
|
|
330
346
|
async function imageClassification(args, options) {
|
|
331
347
|
const res = await request(args, options);
|
|
@@ -543,7 +559,7 @@ async function zeroShotClassification(args, options) {
|
|
|
543
559
|
return res;
|
|
544
560
|
}
|
|
545
561
|
|
|
546
|
-
//
|
|
562
|
+
// src/utils/base64FromBytes.ts
|
|
547
563
|
function base64FromBytes(arr) {
|
|
548
564
|
if (globalThis.Buffer) {
|
|
549
565
|
return globalThis.Buffer.from(arr).toString("base64");
|
|
@@ -556,10 +572,6 @@ function base64FromBytes(arr) {
|
|
|
556
572
|
}
|
|
557
573
|
}
|
|
558
574
|
|
|
559
|
-
// ../shared/src/isBackend.ts
|
|
560
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
561
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
562
|
-
|
|
563
575
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
564
576
|
async function documentQuestionAnswering(args, options) {
|
|
565
577
|
const reqArgs = {
|
|
@@ -661,6 +673,7 @@ var HfInferenceEndpoint = class {
|
|
|
661
673
|
textGeneration,
|
|
662
674
|
textGenerationStream,
|
|
663
675
|
textToImage,
|
|
676
|
+
textToSpeech,
|
|
664
677
|
tokenClassification,
|
|
665
678
|
translation,
|
|
666
679
|
visualQuestionAnswering,
|
package/dist/index.mjs
CHANGED
|
@@ -27,6 +27,7 @@ __export(tasks_exports, {
|
|
|
27
27
|
textGeneration: () => textGeneration,
|
|
28
28
|
textGenerationStream: () => textGenerationStream,
|
|
29
29
|
textToImage: () => textToImage,
|
|
30
|
+
textToSpeech: () => textToSpeech,
|
|
30
31
|
tokenClassification: () => tokenClassification,
|
|
31
32
|
translation: () => translation,
|
|
32
33
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -241,7 +242,11 @@ async function* streamingRequest(args, options) {
|
|
|
241
242
|
onChunk(value);
|
|
242
243
|
for (const event of events) {
|
|
243
244
|
if (event.data.length > 0) {
|
|
244
|
-
|
|
245
|
+
const data = JSON.parse(event.data);
|
|
246
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
247
|
+
throw new Error(data.error);
|
|
248
|
+
}
|
|
249
|
+
yield data;
|
|
245
250
|
}
|
|
246
251
|
}
|
|
247
252
|
events = [];
|
|
@@ -281,6 +286,16 @@ async function automaticSpeechRecognition(args, options) {
|
|
|
281
286
|
return res;
|
|
282
287
|
}
|
|
283
288
|
|
|
289
|
+
// src/tasks/audio/textToSpeech.ts
|
|
290
|
+
async function textToSpeech(args, options) {
|
|
291
|
+
const res = await request(args, options);
|
|
292
|
+
const isValidOutput = res && res instanceof Blob;
|
|
293
|
+
if (!isValidOutput) {
|
|
294
|
+
throw new InferenceOutputError("Expected Blob");
|
|
295
|
+
}
|
|
296
|
+
return res;
|
|
297
|
+
}
|
|
298
|
+
|
|
284
299
|
// src/tasks/cv/imageClassification.ts
|
|
285
300
|
async function imageClassification(args, options) {
|
|
286
301
|
const res = await request(args, options);
|
|
@@ -498,7 +513,7 @@ async function zeroShotClassification(args, options) {
|
|
|
498
513
|
return res;
|
|
499
514
|
}
|
|
500
515
|
|
|
501
|
-
//
|
|
516
|
+
// src/utils/base64FromBytes.ts
|
|
502
517
|
function base64FromBytes(arr) {
|
|
503
518
|
if (globalThis.Buffer) {
|
|
504
519
|
return globalThis.Buffer.from(arr).toString("base64");
|
|
@@ -511,10 +526,6 @@ function base64FromBytes(arr) {
|
|
|
511
526
|
}
|
|
512
527
|
}
|
|
513
528
|
|
|
514
|
-
// ../shared/src/isBackend.ts
|
|
515
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
516
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
517
|
-
|
|
518
529
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
519
530
|
async function documentQuestionAnswering(args, options) {
|
|
520
531
|
const reqArgs = {
|
|
@@ -615,6 +626,7 @@ export {
|
|
|
615
626
|
textGeneration,
|
|
616
627
|
textGenerationStream,
|
|
617
628
|
textToImage,
|
|
629
|
+
textToSpeech,
|
|
618
630
|
tokenClassification,
|
|
619
631
|
translation,
|
|
620
632
|
visualQuestionAnswering,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
6
|
"description": "Typescript wrapper for the Hugging Face Inference API",
|
|
@@ -40,12 +40,8 @@
|
|
|
40
40
|
"@types/node": "18.13.0",
|
|
41
41
|
"typescript": "^5.0.4",
|
|
42
42
|
"vite": "^4.1.4",
|
|
43
|
-
"vitest": "^0.29.8"
|
|
44
|
-
"@huggingface/shared": "1.0.0"
|
|
43
|
+
"vitest": "^0.29.8"
|
|
45
44
|
},
|
|
46
|
-
"bundledDependencies": [
|
|
47
|
-
"@huggingface/shared"
|
|
48
|
-
],
|
|
49
45
|
"resolutions": {},
|
|
50
46
|
"scripts": {
|
|
51
47
|
"build": "tsup src/index.ts --format cjs,esm --clean",
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
2
|
+
import type { BaseArgs, Options } from "../../types";
|
|
3
|
+
import { request } from "../custom/request";
|
|
4
|
+
|
|
5
|
+
export type TextToSpeechArgs = BaseArgs & {
|
|
6
|
+
/**
|
|
7
|
+
* The text to generate an audio from
|
|
8
|
+
*/
|
|
9
|
+
inputs: string;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type TextToSpeechOutput = Blob;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* This task synthesize an audio of a voice pronouncing a given text.
|
|
16
|
+
* Recommended model: espnet/kan-bayashi_ljspeech_vits
|
|
17
|
+
*/
|
|
18
|
+
export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
|
|
19
|
+
const res = await request<TextToSpeechOutput>(args, options);
|
|
20
|
+
const isValidOutput = res && res instanceof Blob;
|
|
21
|
+
if (!isValidOutput) {
|
|
22
|
+
throw new InferenceOutputError("Expected Blob");
|
|
23
|
+
}
|
|
24
|
+
return res;
|
|
25
|
+
}
|
|
@@ -65,7 +65,11 @@ export async function* streamingRequest<T>(
|
|
|
65
65
|
onChunk(value);
|
|
66
66
|
for (const event of events) {
|
|
67
67
|
if (event.data.length > 0) {
|
|
68
|
-
|
|
68
|
+
const data = JSON.parse(event.data);
|
|
69
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
70
|
+
throw new Error(data.error);
|
|
71
|
+
}
|
|
72
|
+
yield data as T;
|
|
69
73
|
}
|
|
70
74
|
}
|
|
71
75
|
events = [];
|
package/src/tasks/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ export * from "./custom/streamingRequest";
|
|
|
5
5
|
// Audio tasks
|
|
6
6
|
export * from "./audio/audioClassification";
|
|
7
7
|
export * from "./audio/automaticSpeechRecognition";
|
|
8
|
+
export * from "./audio/textToSpeech";
|
|
8
9
|
|
|
9
10
|
// Commputer Vision tasks
|
|
10
11
|
export * from "./cv/imageClassification";
|
|
@@ -2,7 +2,7 @@ import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
|
2
2
|
import type { BaseArgs, Options } from "../../types";
|
|
3
3
|
import { request } from "../custom/request";
|
|
4
4
|
import type { RequestArgs } from "../../types";
|
|
5
|
-
import { base64FromBytes } from "
|
|
5
|
+
import { base64FromBytes } from "../../utils/base64FromBytes";
|
|
6
6
|
import { toArray } from "../../utils/toArray";
|
|
7
7
|
|
|
8
8
|
export type DocumentQuestionAnsweringArgs = BaseArgs & {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
2
2
|
import type { BaseArgs, Options, RequestArgs } from "../../types";
|
|
3
3
|
import { request } from "../custom/request";
|
|
4
|
-
import { base64FromBytes } from "
|
|
4
|
+
import { base64FromBytes } from "../../utils/base64FromBytes";
|
|
5
5
|
|
|
6
6
|
export type VisualQuestionAnsweringArgs = BaseArgs & {
|
|
7
7
|
inputs: {
|
|
@@ -44,6 +44,10 @@ export type TextGenerationArgs = BaseArgs & {
|
|
|
44
44
|
* (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
|
|
45
45
|
*/
|
|
46
46
|
top_p?: number;
|
|
47
|
+
/**
|
|
48
|
+
* (Default: None). Integer. The maximum number of tokens from the input.
|
|
49
|
+
*/
|
|
50
|
+
truncate?: number;
|
|
47
51
|
};
|
|
48
52
|
};
|
|
49
53
|
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"esModuleInterop": true,
|
|
4
|
-
"noEmit": false,
|
|
5
|
-
"module": "ESNext",
|
|
6
|
-
"target": "ESNext",
|
|
7
|
-
"moduleResolution": "Node",
|
|
8
|
-
"noImplicitAny": true,
|
|
9
|
-
"strict": true,
|
|
10
|
-
"strictNullChecks": true,
|
|
11
|
-
"skipLibCheck": true,
|
|
12
|
-
"composite": true
|
|
13
|
-
}
|
|
14
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|