@huggingface/inference 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/index.cjs +162 -69
  2. package/dist/index.js +162 -69
  3. package/dist/src/providers/fal-ai.d.ts.map +1 -1
  4. package/dist/src/providers/replicate.d.ts.map +1 -1
  5. package/dist/src/tasks/audio/audioClassification.d.ts +4 -18
  6. package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -1
  7. package/dist/src/tasks/audio/audioToAudio.d.ts +10 -9
  8. package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -1
  9. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +3 -12
  10. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -1
  11. package/dist/src/tasks/audio/textToSpeech.d.ts +4 -8
  12. package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -1
  13. package/dist/src/tasks/audio/utils.d.ts +11 -0
  14. package/dist/src/tasks/audio/utils.d.ts.map +1 -0
  15. package/dist/src/tasks/cv/imageClassification.d.ts +3 -17
  16. package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -1
  17. package/dist/src/tasks/cv/imageSegmentation.d.ts +3 -21
  18. package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -1
  19. package/dist/src/tasks/cv/imageToImage.d.ts +3 -49
  20. package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -1
  21. package/dist/src/tasks/cv/imageToText.d.ts +3 -12
  22. package/dist/src/tasks/cv/imageToText.d.ts.map +1 -1
  23. package/dist/src/tasks/cv/objectDetection.d.ts +3 -26
  24. package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -1
  25. package/dist/src/tasks/cv/textToImage.d.ts +3 -38
  26. package/dist/src/tasks/cv/textToImage.d.ts.map +1 -1
  27. package/dist/src/tasks/cv/textToVideo.d.ts +6 -0
  28. package/dist/src/tasks/cv/textToVideo.d.ts.map +1 -0
  29. package/dist/src/tasks/cv/utils.d.ts +11 -0
  30. package/dist/src/tasks/cv/utils.d.ts.map +1 -0
  31. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +7 -15
  32. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -1
  33. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +5 -28
  34. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -1
  35. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +5 -20
  36. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -1
  37. package/dist/src/tasks/nlp/fillMask.d.ts +2 -21
  38. package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -1
  39. package/dist/src/tasks/nlp/questionAnswering.d.ts +3 -25
  40. package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -1
  41. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +2 -13
  42. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -1
  43. package/dist/src/tasks/nlp/summarization.d.ts +2 -42
  44. package/dist/src/tasks/nlp/summarization.d.ts.map +1 -1
  45. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +3 -31
  46. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -1
  47. package/dist/src/tasks/nlp/textClassification.d.ts +2 -16
  48. package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -1
  49. package/dist/src/tasks/nlp/tokenClassification.d.ts +2 -45
  50. package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -1
  51. package/dist/src/tasks/nlp/translation.d.ts +2 -13
  52. package/dist/src/tasks/nlp/translation.d.ts.map +1 -1
  53. package/dist/src/tasks/nlp/zeroShotClassification.d.ts +2 -22
  54. package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -1
  55. package/dist/src/types.d.ts +4 -0
  56. package/dist/src/types.d.ts.map +1 -1
  57. package/package.json +2 -2
  58. package/src/providers/fal-ai.ts +4 -0
  59. package/src/providers/replicate.ts +3 -0
  60. package/src/tasks/audio/audioClassification.ts +7 -22
  61. package/src/tasks/audio/audioToAudio.ts +43 -23
  62. package/src/tasks/audio/automaticSpeechRecognition.ts +35 -23
  63. package/src/tasks/audio/textToSpeech.ts +8 -14
  64. package/src/tasks/audio/utils.ts +18 -0
  65. package/src/tasks/cv/imageClassification.ts +5 -20
  66. package/src/tasks/cv/imageSegmentation.ts +5 -24
  67. package/src/tasks/cv/imageToImage.ts +4 -52
  68. package/src/tasks/cv/imageToText.ts +6 -15
  69. package/src/tasks/cv/objectDetection.ts +5 -30
  70. package/src/tasks/cv/textToImage.ts +14 -50
  71. package/src/tasks/cv/textToVideo.ts +67 -0
  72. package/src/tasks/cv/utils.ts +13 -0
  73. package/src/tasks/cv/zeroShotImageClassification.ts +32 -31
  74. package/src/tasks/multimodal/documentQuestionAnswering.ts +25 -43
  75. package/src/tasks/multimodal/visualQuestionAnswering.ts +20 -36
  76. package/src/tasks/nlp/fillMask.ts +2 -22
  77. package/src/tasks/nlp/questionAnswering.ts +22 -36
  78. package/src/tasks/nlp/sentenceSimilarity.ts +12 -15
  79. package/src/tasks/nlp/summarization.ts +2 -43
  80. package/src/tasks/nlp/tableQuestionAnswering.ts +25 -41
  81. package/src/tasks/nlp/textClassification.ts +3 -18
  82. package/src/tasks/nlp/tokenClassification.ts +2 -47
  83. package/src/tasks/nlp/translation.ts +3 -17
  84. package/src/tasks/nlp/zeroShotClassification.ts +2 -24
  85. package/src/types.ts +7 -1
@@ -1,38 +1,9 @@
1
+ import type { TableQuestionAnsweringInput, TableQuestionAnsweringOutput } from "@huggingface/tasks";
1
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
3
  import type { BaseArgs, Options } from "../../types";
3
4
  import { request } from "../custom/request";
4
5
 
5
- export type TableQuestionAnsweringArgs = BaseArgs & {
6
- inputs: {
7
- /**
8
- * The query in plain text that you want to ask the table
9
- */
10
- query: string;
11
- /**
12
- * A table of data represented as a dict of list where entries are headers and the lists are all the values, all lists must have the same size.
13
- */
14
- table: Record<string, string[]>;
15
- };
16
- };
17
-
18
- export interface TableQuestionAnsweringOutput {
19
- /**
20
- * The aggregator used to get the answer
21
- */
22
- aggregator: string;
23
- /**
24
- * The plaintext answer
25
- */
26
- answer: string;
27
- /**
28
- * A list of coordinates of the cells contents
29
- */
30
- cells: string[];
31
- /**
32
- * a list of coordinates of the cells referenced in the answer
33
- */
34
- coordinates: number[][];
35
- }
6
+ export type TableQuestionAnsweringArgs = BaseArgs & TableQuestionAnsweringInput;
36
7
 
37
8
  /**
38
9
  * Don’t know SQL? Don’t want to dive into a large spreadsheet? Ask questions in plain english! Recommended model: google/tapas-base-finetuned-wtq.
@@ -40,22 +11,35 @@ export interface TableQuestionAnsweringOutput {
40
11
  export async function tableQuestionAnswering(
41
12
  args: TableQuestionAnsweringArgs,
42
13
  options?: Options
43
- ): Promise<TableQuestionAnsweringOutput> {
44
- const res = await request<TableQuestionAnsweringOutput>(args, {
14
+ ): Promise<TableQuestionAnsweringOutput[number]> {
15
+ const res = await request<TableQuestionAnsweringOutput | TableQuestionAnsweringOutput[number]>(args, {
45
16
  ...options,
46
17
  taskHint: "table-question-answering",
47
18
  });
48
- const isValidOutput =
49
- typeof res?.aggregator === "string" &&
50
- typeof res.answer === "string" &&
51
- Array.isArray(res.cells) &&
52
- res.cells.every((x) => typeof x === "string") &&
53
- Array.isArray(res.coordinates) &&
54
- res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
19
+ const isValidOutput = Array.isArray(res) ? res.every((elem) => validate(elem)) : validate(res);
55
20
  if (!isValidOutput) {
56
21
  throw new InferenceOutputError(
57
22
  "Expected {aggregator: string, answer: string, cells: string[], coordinates: number[][]}"
58
23
  );
59
24
  }
60
- return res;
25
+ return Array.isArray(res) ? res[0] : res;
26
+ }
27
+
28
+ function validate(elem: unknown): elem is TableQuestionAnsweringOutput[number] {
29
+ return (
30
+ typeof elem === "object" &&
31
+ !!elem &&
32
+ "aggregator" in elem &&
33
+ typeof elem.aggregator === "string" &&
34
+ "answer" in elem &&
35
+ typeof elem.answer === "string" &&
36
+ "cells" in elem &&
37
+ Array.isArray(elem.cells) &&
38
+ elem.cells.every((x: unknown): x is string => typeof x === "string") &&
39
+ "coordinates" in elem &&
40
+ Array.isArray(elem.coordinates) &&
41
+ elem.coordinates.every(
42
+ (coord: unknown): coord is number[] => Array.isArray(coord) && coord.every((x) => typeof x === "number")
43
+ )
44
+ );
61
45
  }
@@ -1,24 +1,9 @@
1
+ import type { TextClassificationInput, TextClassificationOutput } from "@huggingface/tasks";
1
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
3
  import type { BaseArgs, Options } from "../../types";
3
4
  import { request } from "../custom/request";
4
5
 
5
- export type TextClassificationArgs = BaseArgs & {
6
- /**
7
- * A string to be classified
8
- */
9
- inputs: string;
10
- };
11
-
12
- export type TextClassificationOutput = {
13
- /**
14
- * The label for the class (model specific)
15
- */
16
- label: string;
17
- /**
18
- * A floats that represents how likely is that the text belongs to this class.
19
- */
20
- score: number;
21
- }[];
6
+ export type TextClassificationArgs = BaseArgs & TextClassificationInput;
22
7
 
23
8
  /**
24
9
  * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
@@ -28,7 +13,7 @@ export async function textClassification(
28
13
  options?: Options
29
14
  ): Promise<TextClassificationOutput> {
30
15
  const res = (
31
- await request<TextClassificationOutput[]>(args, {
16
+ await request<TextClassificationOutput>(args, {
32
17
  ...options,
33
18
  taskHint: "text-classification",
34
19
  })
@@ -1,55 +1,10 @@
1
+ import type { TokenClassificationInput, TokenClassificationOutput } from "@huggingface/tasks";
1
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
3
  import type { BaseArgs, Options } from "../../types";
3
4
  import { toArray } from "../../utils/toArray";
4
5
  import { request } from "../custom/request";
5
6
 
6
- export type TokenClassificationArgs = BaseArgs & {
7
- /**
8
- * A string to be classified
9
- */
10
- inputs: string;
11
- parameters?: {
12
- /**
13
- * (Default: simple). There are several aggregation strategies:
14
- *
15
- * none: Every token gets classified without further aggregation.
16
- *
17
- * simple: Entities are grouped according to the default schema (B-, I- tags get merged when the tag is similar).
18
- *
19
- * first: Same as the simple strategy except words cannot end up with different tags. Words will use the tag of the first token when there is ambiguity.
20
- *
21
- * average: Same as the simple strategy except words cannot end up with different tags. Scores are averaged across tokens and then the maximum label is applied.
22
- *
23
- * max: Same as the simple strategy except words cannot end up with different tags. Word entity will be the token with the maximum score.
24
- */
25
- aggregation_strategy?: "none" | "simple" | "first" | "average" | "max";
26
- };
27
- };
28
-
29
- export interface TokenClassificationOutputValue {
30
- /**
31
- * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
32
- */
33
- end: number;
34
- /**
35
- * The type for the entity being recognized (model specific).
36
- */
37
- entity_group: string;
38
- /**
39
- * How likely the entity was recognized.
40
- */
41
- score: number;
42
- /**
43
- * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
44
- */
45
- start: number;
46
- /**
47
- * The string that was captured
48
- */
49
- word: string;
50
- }
51
-
52
- export type TokenClassificationOutput = TokenClassificationOutputValue[];
7
+ export type TokenClassificationArgs = BaseArgs & TokenClassificationInput;
53
8
 
54
9
  /**
55
10
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
@@ -1,28 +1,14 @@
1
+ import type { TranslationInput, TranslationOutput } from "@huggingface/tasks";
1
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
3
  import type { BaseArgs, Options } from "../../types";
3
4
  import { request } from "../custom/request";
4
5
 
5
- export type TranslationArgs = BaseArgs & {
6
- /**
7
- * A string to be translated
8
- */
9
- inputs: string | string[];
10
- };
11
-
12
- export interface TranslationOutputValue {
13
- /**
14
- * The string after translation
15
- */
16
- translation_text: string;
17
- }
18
-
19
- export type TranslationOutput = TranslationOutputValue | TranslationOutputValue[];
20
-
6
+ export type TranslationArgs = BaseArgs & TranslationInput;
21
7
  /**
22
8
  * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
23
9
  */
24
10
  export async function translation(args: TranslationArgs, options?: Options): Promise<TranslationOutput> {
25
- const res = await request<TranslationOutputValue[]>(args, {
11
+ const res = await request<TranslationOutput>(args, {
26
12
  ...options,
27
13
  taskHint: "translation",
28
14
  });
@@ -1,32 +1,10 @@
1
+ import type { ZeroShotClassificationInput, ZeroShotClassificationOutput } from "@huggingface/tasks";
1
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
3
  import type { BaseArgs, Options } from "../../types";
3
4
  import { toArray } from "../../utils/toArray";
4
5
  import { request } from "../custom/request";
5
6
 
6
- export type ZeroShotClassificationArgs = BaseArgs & {
7
- /**
8
- * a string or list of strings
9
- */
10
- inputs: string | string[];
11
- parameters: {
12
- /**
13
- * a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
14
- */
15
- candidate_labels: string[];
16
- /**
17
- * (Default: false) Boolean that is set to True if classes can overlap
18
- */
19
- multi_label?: boolean;
20
- };
21
- };
22
-
23
- export interface ZeroShotClassificationOutputValue {
24
- labels: string[];
25
- scores: number[];
26
- sequence: string;
27
- }
28
-
29
- export type ZeroShotClassificationOutput = ZeroShotClassificationOutputValue[];
7
+ export type ZeroShotClassificationArgs = BaseArgs & ZeroShotClassificationInput;
30
8
 
31
9
  /**
32
10
  * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
package/src/types.ts CHANGED
@@ -84,7 +84,13 @@ export interface BaseArgs {
84
84
  }
85
85
 
86
86
  export type RequestArgs = BaseArgs &
87
- ({ data: Blob | ArrayBuffer } | { inputs: unknown } | ChatCompletionInput) & {
87
+ (
88
+ | { data: Blob | ArrayBuffer }
89
+ | { inputs: unknown }
90
+ | { prompt: string }
91
+ | { audio_url: string }
92
+ | ChatCompletionInput
93
+ ) & {
88
94
  parameters?: Record<string, unknown>;
89
95
  accessToken?: string;
90
96
  };