@huggingface/inference 2.1.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -42,6 +42,7 @@ __export(src_exports, {
42
42
  textGeneration: () => textGeneration,
43
43
  textGenerationStream: () => textGenerationStream,
44
44
  textToImage: () => textToImage,
45
+ textToSpeech: () => textToSpeech,
45
46
  tokenClassification: () => tokenClassification,
46
47
  translation: () => translation,
47
48
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -72,6 +73,7 @@ __export(tasks_exports, {
72
73
  textGeneration: () => textGeneration,
73
74
  textGenerationStream: () => textGenerationStream,
74
75
  textToImage: () => textToImage,
76
+ textToSpeech: () => textToSpeech,
75
77
  tokenClassification: () => tokenClassification,
76
78
  translation: () => translation,
77
79
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -286,7 +288,11 @@ async function* streamingRequest(args, options) {
286
288
  onChunk(value);
287
289
  for (const event of events) {
288
290
  if (event.data.length > 0) {
289
- yield JSON.parse(event.data);
291
+ const data = JSON.parse(event.data);
292
+ if (typeof data === "object" && data !== null && "error" in data) {
293
+ throw new Error(data.error);
294
+ }
295
+ yield data;
290
296
  }
291
297
  }
292
298
  events = [];
@@ -326,6 +332,16 @@ async function automaticSpeechRecognition(args, options) {
326
332
  return res;
327
333
  }
328
334
 
335
+ // src/tasks/audio/textToSpeech.ts
336
+ async function textToSpeech(args, options) {
337
+ const res = await request(args, options);
338
+ const isValidOutput = res && res instanceof Blob;
339
+ if (!isValidOutput) {
340
+ throw new InferenceOutputError("Expected Blob");
341
+ }
342
+ return res;
343
+ }
344
+
329
345
  // src/tasks/cv/imageClassification.ts
330
346
  async function imageClassification(args, options) {
331
347
  const res = await request(args, options);
@@ -657,6 +673,7 @@ var HfInferenceEndpoint = class {
657
673
  textGeneration,
658
674
  textGenerationStream,
659
675
  textToImage,
676
+ textToSpeech,
660
677
  tokenClassification,
661
678
  translation,
662
679
  visualQuestionAnswering,
package/dist/index.mjs CHANGED
@@ -27,6 +27,7 @@ __export(tasks_exports, {
27
27
  textGeneration: () => textGeneration,
28
28
  textGenerationStream: () => textGenerationStream,
29
29
  textToImage: () => textToImage,
30
+ textToSpeech: () => textToSpeech,
30
31
  tokenClassification: () => tokenClassification,
31
32
  translation: () => translation,
32
33
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -241,7 +242,11 @@ async function* streamingRequest(args, options) {
241
242
  onChunk(value);
242
243
  for (const event of events) {
243
244
  if (event.data.length > 0) {
244
- yield JSON.parse(event.data);
245
+ const data = JSON.parse(event.data);
246
+ if (typeof data === "object" && data !== null && "error" in data) {
247
+ throw new Error(data.error);
248
+ }
249
+ yield data;
245
250
  }
246
251
  }
247
252
  events = [];
@@ -281,6 +286,16 @@ async function automaticSpeechRecognition(args, options) {
281
286
  return res;
282
287
  }
283
288
 
289
+ // src/tasks/audio/textToSpeech.ts
290
+ async function textToSpeech(args, options) {
291
+ const res = await request(args, options);
292
+ const isValidOutput = res && res instanceof Blob;
293
+ if (!isValidOutput) {
294
+ throw new InferenceOutputError("Expected Blob");
295
+ }
296
+ return res;
297
+ }
298
+
284
299
  // src/tasks/cv/imageClassification.ts
285
300
  async function imageClassification(args, options) {
286
301
  const res = await request(args, options);
@@ -611,6 +626,7 @@ export {
611
626
  textGeneration,
612
627
  textGenerationStream,
613
628
  textToImage,
629
+ textToSpeech,
614
630
  tokenClassification,
615
631
  translation,
616
632
  visualQuestionAnswering,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "2.1.3",
3
+ "version": "2.2.0",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -0,0 +1,25 @@
1
+ import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
+ import type { BaseArgs, Options } from "../../types";
3
+ import { request } from "../custom/request";
4
+
5
+ export type TextToSpeechArgs = BaseArgs & {
6
+ /**
7
+ * The text to generate an audio from
8
+ */
9
+ inputs: string;
10
+ };
11
+
12
+ export type TextToSpeechOutput = Blob;
13
+
14
+ /**
15
+ * This task synthesize an audio of a voice pronouncing a given text.
16
+ * Recommended model: espnet/kan-bayashi_ljspeech_vits
17
+ */
18
+ export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
19
+ const res = await request<TextToSpeechOutput>(args, options);
20
+ const isValidOutput = res && res instanceof Blob;
21
+ if (!isValidOutput) {
22
+ throw new InferenceOutputError("Expected Blob");
23
+ }
24
+ return res;
25
+ }
@@ -65,7 +65,11 @@ export async function* streamingRequest<T>(
65
65
  onChunk(value);
66
66
  for (const event of events) {
67
67
  if (event.data.length > 0) {
68
- yield JSON.parse(event.data) as T;
68
+ const data = JSON.parse(event.data);
69
+ if (typeof data === "object" && data !== null && "error" in data) {
70
+ throw new Error(data.error);
71
+ }
72
+ yield data as T;
69
73
  }
70
74
  }
71
75
  events = [];
@@ -5,6 +5,7 @@ export * from "./custom/streamingRequest";
5
5
  // Audio tasks
6
6
  export * from "./audio/audioClassification";
7
7
  export * from "./audio/automaticSpeechRecognition";
8
+ export * from "./audio/textToSpeech";
8
9
 
9
10
  // Commputer Vision tasks
10
11
  export * from "./cv/imageClassification";
@@ -44,6 +44,10 @@ export type TextGenerationArgs = BaseArgs & {
44
44
  * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
45
45
  */
46
46
  top_p?: number;
47
+ /**
48
+ * (Default: None). Integer. The maximum number of tokens from the input.
49
+ */
50
+ truncate?: number;
47
51
  };
48
52
  };
49
53