@huggingface/inference 2.1.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -42,6 +42,7 @@ __export(src_exports, {
|
|
|
42
42
|
textGeneration: () => textGeneration,
|
|
43
43
|
textGenerationStream: () => textGenerationStream,
|
|
44
44
|
textToImage: () => textToImage,
|
|
45
|
+
textToSpeech: () => textToSpeech,
|
|
45
46
|
tokenClassification: () => tokenClassification,
|
|
46
47
|
translation: () => translation,
|
|
47
48
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -72,6 +73,7 @@ __export(tasks_exports, {
|
|
|
72
73
|
textGeneration: () => textGeneration,
|
|
73
74
|
textGenerationStream: () => textGenerationStream,
|
|
74
75
|
textToImage: () => textToImage,
|
|
76
|
+
textToSpeech: () => textToSpeech,
|
|
75
77
|
tokenClassification: () => tokenClassification,
|
|
76
78
|
translation: () => translation,
|
|
77
79
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -286,7 +288,11 @@ async function* streamingRequest(args, options) {
|
|
|
286
288
|
onChunk(value);
|
|
287
289
|
for (const event of events) {
|
|
288
290
|
if (event.data.length > 0) {
|
|
289
|
-
|
|
291
|
+
const data = JSON.parse(event.data);
|
|
292
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
293
|
+
throw new Error(data.error);
|
|
294
|
+
}
|
|
295
|
+
yield data;
|
|
290
296
|
}
|
|
291
297
|
}
|
|
292
298
|
events = [];
|
|
@@ -326,6 +332,16 @@ async function automaticSpeechRecognition(args, options) {
|
|
|
326
332
|
return res;
|
|
327
333
|
}
|
|
328
334
|
|
|
335
|
+
// src/tasks/audio/textToSpeech.ts
|
|
336
|
+
async function textToSpeech(args, options) {
|
|
337
|
+
const res = await request(args, options);
|
|
338
|
+
const isValidOutput = res && res instanceof Blob;
|
|
339
|
+
if (!isValidOutput) {
|
|
340
|
+
throw new InferenceOutputError("Expected Blob");
|
|
341
|
+
}
|
|
342
|
+
return res;
|
|
343
|
+
}
|
|
344
|
+
|
|
329
345
|
// src/tasks/cv/imageClassification.ts
|
|
330
346
|
async function imageClassification(args, options) {
|
|
331
347
|
const res = await request(args, options);
|
|
@@ -657,6 +673,7 @@ var HfInferenceEndpoint = class {
|
|
|
657
673
|
textGeneration,
|
|
658
674
|
textGenerationStream,
|
|
659
675
|
textToImage,
|
|
676
|
+
textToSpeech,
|
|
660
677
|
tokenClassification,
|
|
661
678
|
translation,
|
|
662
679
|
visualQuestionAnswering,
|
package/dist/index.mjs
CHANGED
|
@@ -27,6 +27,7 @@ __export(tasks_exports, {
|
|
|
27
27
|
textGeneration: () => textGeneration,
|
|
28
28
|
textGenerationStream: () => textGenerationStream,
|
|
29
29
|
textToImage: () => textToImage,
|
|
30
|
+
textToSpeech: () => textToSpeech,
|
|
30
31
|
tokenClassification: () => tokenClassification,
|
|
31
32
|
translation: () => translation,
|
|
32
33
|
visualQuestionAnswering: () => visualQuestionAnswering,
|
|
@@ -241,7 +242,11 @@ async function* streamingRequest(args, options) {
|
|
|
241
242
|
onChunk(value);
|
|
242
243
|
for (const event of events) {
|
|
243
244
|
if (event.data.length > 0) {
|
|
244
|
-
|
|
245
|
+
const data = JSON.parse(event.data);
|
|
246
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
247
|
+
throw new Error(data.error);
|
|
248
|
+
}
|
|
249
|
+
yield data;
|
|
245
250
|
}
|
|
246
251
|
}
|
|
247
252
|
events = [];
|
|
@@ -281,6 +286,16 @@ async function automaticSpeechRecognition(args, options) {
|
|
|
281
286
|
return res;
|
|
282
287
|
}
|
|
283
288
|
|
|
289
|
+
// src/tasks/audio/textToSpeech.ts
|
|
290
|
+
async function textToSpeech(args, options) {
|
|
291
|
+
const res = await request(args, options);
|
|
292
|
+
const isValidOutput = res && res instanceof Blob;
|
|
293
|
+
if (!isValidOutput) {
|
|
294
|
+
throw new InferenceOutputError("Expected Blob");
|
|
295
|
+
}
|
|
296
|
+
return res;
|
|
297
|
+
}
|
|
298
|
+
|
|
284
299
|
// src/tasks/cv/imageClassification.ts
|
|
285
300
|
async function imageClassification(args, options) {
|
|
286
301
|
const res = await request(args, options);
|
|
@@ -611,6 +626,7 @@ export {
|
|
|
611
626
|
textGeneration,
|
|
612
627
|
textGenerationStream,
|
|
613
628
|
textToImage,
|
|
629
|
+
textToSpeech,
|
|
614
630
|
tokenClassification,
|
|
615
631
|
translation,
|
|
616
632
|
visualQuestionAnswering,
|
package/package.json
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
2
|
+
import type { BaseArgs, Options } from "../../types";
|
|
3
|
+
import { request } from "../custom/request";
|
|
4
|
+
|
|
5
|
+
export type TextToSpeechArgs = BaseArgs & {
|
|
6
|
+
/**
|
|
7
|
+
* The text to generate an audio from
|
|
8
|
+
*/
|
|
9
|
+
inputs: string;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type TextToSpeechOutput = Blob;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* This task synthesize an audio of a voice pronouncing a given text.
|
|
16
|
+
* Recommended model: espnet/kan-bayashi_ljspeech_vits
|
|
17
|
+
*/
|
|
18
|
+
export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
|
|
19
|
+
const res = await request<TextToSpeechOutput>(args, options);
|
|
20
|
+
const isValidOutput = res && res instanceof Blob;
|
|
21
|
+
if (!isValidOutput) {
|
|
22
|
+
throw new InferenceOutputError("Expected Blob");
|
|
23
|
+
}
|
|
24
|
+
return res;
|
|
25
|
+
}
|
|
@@ -65,7 +65,11 @@ export async function* streamingRequest<T>(
|
|
|
65
65
|
onChunk(value);
|
|
66
66
|
for (const event of events) {
|
|
67
67
|
if (event.data.length > 0) {
|
|
68
|
-
|
|
68
|
+
const data = JSON.parse(event.data);
|
|
69
|
+
if (typeof data === "object" && data !== null && "error" in data) {
|
|
70
|
+
throw new Error(data.error);
|
|
71
|
+
}
|
|
72
|
+
yield data as T;
|
|
69
73
|
}
|
|
70
74
|
}
|
|
71
75
|
events = [];
|
package/src/tasks/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ export * from "./custom/streamingRequest";
|
|
|
5
5
|
// Audio tasks
|
|
6
6
|
export * from "./audio/audioClassification";
|
|
7
7
|
export * from "./audio/automaticSpeechRecognition";
|
|
8
|
+
export * from "./audio/textToSpeech";
|
|
8
9
|
|
|
9
10
|
// Commputer Vision tasks
|
|
10
11
|
export * from "./cv/imageClassification";
|
|
@@ -44,6 +44,10 @@ export type TextGenerationArgs = BaseArgs & {
|
|
|
44
44
|
* (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
|
|
45
45
|
*/
|
|
46
46
|
top_p?: number;
|
|
47
|
+
/**
|
|
48
|
+
* (Default: None). Integer. The maximum number of tokens from the input.
|
|
49
|
+
*/
|
|
50
|
+
truncate?: number;
|
|
47
51
|
};
|
|
48
52
|
};
|
|
49
53
|
|