@huggingface/inference 2.1.3-test2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -42,6 +42,7 @@ __export(src_exports, {
42
42
  textGeneration: () => textGeneration,
43
43
  textGenerationStream: () => textGenerationStream,
44
44
  textToImage: () => textToImage,
45
+ textToSpeech: () => textToSpeech,
45
46
  tokenClassification: () => tokenClassification,
46
47
  translation: () => translation,
47
48
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -72,6 +73,7 @@ __export(tasks_exports, {
72
73
  textGeneration: () => textGeneration,
73
74
  textGenerationStream: () => textGenerationStream,
74
75
  textToImage: () => textToImage,
76
+ textToSpeech: () => textToSpeech,
75
77
  tokenClassification: () => tokenClassification,
76
78
  translation: () => translation,
77
79
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -286,7 +288,11 @@ async function* streamingRequest(args, options) {
286
288
  onChunk(value);
287
289
  for (const event of events) {
288
290
  if (event.data.length > 0) {
289
- yield JSON.parse(event.data);
291
+ const data = JSON.parse(event.data);
292
+ if (typeof data === "object" && data !== null && "error" in data) {
293
+ throw new Error(data.error);
294
+ }
295
+ yield data;
290
296
  }
291
297
  }
292
298
  events = [];
@@ -326,6 +332,16 @@ async function automaticSpeechRecognition(args, options) {
326
332
  return res;
327
333
  }
328
334
 
335
+ // src/tasks/audio/textToSpeech.ts
336
+ async function textToSpeech(args, options) {
337
+ const res = await request(args, options);
338
+ const isValidOutput = res && res instanceof Blob;
339
+ if (!isValidOutput) {
340
+ throw new InferenceOutputError("Expected Blob");
341
+ }
342
+ return res;
343
+ }
344
+
329
345
  // src/tasks/cv/imageClassification.ts
330
346
  async function imageClassification(args, options) {
331
347
  const res = await request(args, options);
@@ -543,7 +559,7 @@ async function zeroShotClassification(args, options) {
543
559
  return res;
544
560
  }
545
561
 
546
- // ../shared/src/base64FromBytes.ts
562
+ // src/utils/base64FromBytes.ts
547
563
  function base64FromBytes(arr) {
548
564
  if (globalThis.Buffer) {
549
565
  return globalThis.Buffer.from(arr).toString("base64");
@@ -556,10 +572,6 @@ function base64FromBytes(arr) {
556
572
  }
557
573
  }
558
574
 
559
- // ../shared/src/isBackend.ts
560
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
561
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
562
-
563
575
  // src/tasks/multimodal/documentQuestionAnswering.ts
564
576
  async function documentQuestionAnswering(args, options) {
565
577
  const reqArgs = {
@@ -661,6 +673,7 @@ var HfInferenceEndpoint = class {
661
673
  textGeneration,
662
674
  textGenerationStream,
663
675
  textToImage,
676
+ textToSpeech,
664
677
  tokenClassification,
665
678
  translation,
666
679
  visualQuestionAnswering,
package/dist/index.mjs CHANGED
@@ -27,6 +27,7 @@ __export(tasks_exports, {
27
27
  textGeneration: () => textGeneration,
28
28
  textGenerationStream: () => textGenerationStream,
29
29
  textToImage: () => textToImage,
30
+ textToSpeech: () => textToSpeech,
30
31
  tokenClassification: () => tokenClassification,
31
32
  translation: () => translation,
32
33
  visualQuestionAnswering: () => visualQuestionAnswering,
@@ -241,7 +242,11 @@ async function* streamingRequest(args, options) {
241
242
  onChunk(value);
242
243
  for (const event of events) {
243
244
  if (event.data.length > 0) {
244
- yield JSON.parse(event.data);
245
+ const data = JSON.parse(event.data);
246
+ if (typeof data === "object" && data !== null && "error" in data) {
247
+ throw new Error(data.error);
248
+ }
249
+ yield data;
245
250
  }
246
251
  }
247
252
  events = [];
@@ -281,6 +286,16 @@ async function automaticSpeechRecognition(args, options) {
281
286
  return res;
282
287
  }
283
288
 
289
+ // src/tasks/audio/textToSpeech.ts
290
+ async function textToSpeech(args, options) {
291
+ const res = await request(args, options);
292
+ const isValidOutput = res && res instanceof Blob;
293
+ if (!isValidOutput) {
294
+ throw new InferenceOutputError("Expected Blob");
295
+ }
296
+ return res;
297
+ }
298
+
284
299
  // src/tasks/cv/imageClassification.ts
285
300
  async function imageClassification(args, options) {
286
301
  const res = await request(args, options);
@@ -498,7 +513,7 @@ async function zeroShotClassification(args, options) {
498
513
  return res;
499
514
  }
500
515
 
501
- // ../shared/src/base64FromBytes.ts
516
+ // src/utils/base64FromBytes.ts
502
517
  function base64FromBytes(arr) {
503
518
  if (globalThis.Buffer) {
504
519
  return globalThis.Buffer.from(arr).toString("base64");
@@ -511,10 +526,6 @@ function base64FromBytes(arr) {
511
526
  }
512
527
  }
513
528
 
514
- // ../shared/src/isBackend.ts
515
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
516
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
517
-
518
529
  // src/tasks/multimodal/documentQuestionAnswering.ts
519
530
  async function documentQuestionAnswering(args, options) {
520
531
  const reqArgs = {
@@ -615,6 +626,7 @@ export {
615
626
  textGeneration,
616
627
  textGenerationStream,
617
628
  textToImage,
629
+ textToSpeech,
618
630
  tokenClassification,
619
631
  translation,
620
632
  visualQuestionAnswering,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "2.1.3-test2",
3
+ "version": "2.2.0",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -40,12 +40,8 @@
40
40
  "@types/node": "18.13.0",
41
41
  "typescript": "^5.0.4",
42
42
  "vite": "^4.1.4",
43
- "vitest": "^0.29.8",
44
- "@huggingface/shared": "1.0.0"
43
+ "vitest": "^0.29.8"
45
44
  },
46
- "bundledDependencies": [
47
- "@huggingface/shared"
48
- ],
49
45
  "resolutions": {},
50
46
  "scripts": {
51
47
  "build": "tsup src/index.ts --format cjs,esm --clean",
@@ -0,0 +1,25 @@
1
+ import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
+ import type { BaseArgs, Options } from "../../types";
3
+ import { request } from "../custom/request";
4
+
5
+ export type TextToSpeechArgs = BaseArgs & {
6
+ /**
7
+ * The text to generate an audio from
8
+ */
9
+ inputs: string;
10
+ };
11
+
12
+ export type TextToSpeechOutput = Blob;
13
+
14
+ /**
15
+ * This task synthesize an audio of a voice pronouncing a given text.
16
+ * Recommended model: espnet/kan-bayashi_ljspeech_vits
17
+ */
18
+ export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
19
+ const res = await request<TextToSpeechOutput>(args, options);
20
+ const isValidOutput = res && res instanceof Blob;
21
+ if (!isValidOutput) {
22
+ throw new InferenceOutputError("Expected Blob");
23
+ }
24
+ return res;
25
+ }
@@ -65,7 +65,11 @@ export async function* streamingRequest<T>(
65
65
  onChunk(value);
66
66
  for (const event of events) {
67
67
  if (event.data.length > 0) {
68
- yield JSON.parse(event.data) as T;
68
+ const data = JSON.parse(event.data);
69
+ if (typeof data === "object" && data !== null && "error" in data) {
70
+ throw new Error(data.error);
71
+ }
72
+ yield data as T;
69
73
  }
70
74
  }
71
75
  events = [];
@@ -5,6 +5,7 @@ export * from "./custom/streamingRequest";
5
5
  // Audio tasks
6
6
  export * from "./audio/audioClassification";
7
7
  export * from "./audio/automaticSpeechRecognition";
8
+ export * from "./audio/textToSpeech";
8
9
 
9
10
  // Commputer Vision tasks
10
11
  export * from "./cv/imageClassification";
@@ -2,7 +2,7 @@ import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
2
  import type { BaseArgs, Options } from "../../types";
3
3
  import { request } from "../custom/request";
4
4
  import type { RequestArgs } from "../../types";
5
- import { base64FromBytes } from "@huggingface/shared";
5
+ import { base64FromBytes } from "../../utils/base64FromBytes";
6
6
  import { toArray } from "../../utils/toArray";
7
7
 
8
8
  export type DocumentQuestionAnsweringArgs = BaseArgs & {
@@ -1,7 +1,7 @@
1
1
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
2
  import type { BaseArgs, Options, RequestArgs } from "../../types";
3
3
  import { request } from "../custom/request";
4
- import { base64FromBytes } from "@huggingface/shared";
4
+ import { base64FromBytes } from "../../utils/base64FromBytes";
5
5
 
6
6
  export type VisualQuestionAnsweringArgs = BaseArgs & {
7
7
  inputs: {
@@ -44,6 +44,10 @@ export type TextGenerationArgs = BaseArgs & {
44
44
  * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
45
45
  */
46
46
  top_p?: number;
47
+ /**
48
+ * (Default: None). Integer. The maximum number of tokens from the input.
49
+ */
50
+ truncate?: number;
47
51
  };
48
52
  };
49
53
 
@@ -1,8 +0,0 @@
1
- {
2
- "name": "@huggingface/shared",
3
- "version": "1.0.0",
4
- "private": true,
5
- "main": "src/index.ts",
6
- "source": "src/index.ts",
7
- "types": "./src/index.ts"
8
- }
@@ -1,3 +0,0 @@
1
- export * from './base64FromBytes';
2
- export * from './isBackend';
3
- export * from './isFrontend';
@@ -1,14 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "esModuleInterop": true,
4
- "noEmit": false,
5
- "module": "ESNext",
6
- "target": "ESNext",
7
- "moduleResolution": "Node",
8
- "noImplicitAny": true,
9
- "strict": true,
10
- "strictNullChecks": true,
11
- "skipLibCheck": true,
12
- "composite": true
13
- }
14
- }