@huggingface/tasks 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs +3144 -3085
  3. package/dist/index.d.ts +441 -74
  4. package/dist/index.js +3143 -3084
  5. package/package.json +1 -1
  6. package/src/index.ts +2 -5
  7. package/src/library-to-tasks.ts +1 -1
  8. package/src/model-libraries-downloads.ts +20 -0
  9. package/src/{library-ui-elements.ts → model-libraries-snippets.ts} +46 -292
  10. package/src/model-libraries.ts +375 -44
  11. package/src/pipelines.ts +14 -8
  12. package/src/tasks/audio-classification/inference.ts +4 -4
  13. package/src/tasks/audio-classification/spec/input.json +4 -4
  14. package/src/tasks/audio-classification/spec/output.json +1 -12
  15. package/src/tasks/automatic-speech-recognition/inference.ts +35 -30
  16. package/src/tasks/automatic-speech-recognition/spec/input.json +3 -3
  17. package/src/tasks/automatic-speech-recognition/spec/output.json +30 -28
  18. package/src/tasks/common-definitions.json +25 -17
  19. package/src/tasks/depth-estimation/inference.ts +10 -10
  20. package/src/tasks/depth-estimation/spec/input.json +3 -8
  21. package/src/tasks/depth-estimation/spec/output.json +9 -3
  22. package/src/tasks/document-question-answering/inference.ts +16 -8
  23. package/src/tasks/document-question-answering/spec/input.json +9 -9
  24. package/src/tasks/document-question-answering/spec/output.json +2 -2
  25. package/src/tasks/feature-extraction/inference.ts +1 -1
  26. package/src/tasks/feature-extraction/spec/input.json +2 -2
  27. package/src/tasks/fill-mask/inference.ts +4 -3
  28. package/src/tasks/fill-mask/spec/input.json +3 -3
  29. package/src/tasks/fill-mask/spec/output.json +1 -1
  30. package/src/tasks/image-classification/inference.ts +3 -3
  31. package/src/tasks/image-classification/spec/input.json +4 -4
  32. package/src/tasks/image-segmentation/inference.ts +3 -3
  33. package/src/tasks/image-segmentation/spec/input.json +4 -4
  34. package/src/tasks/image-to-image/inference.ts +5 -5
  35. package/src/tasks/image-to-image/spec/input.json +9 -7
  36. package/src/tasks/image-to-text/inference.ts +25 -20
  37. package/src/tasks/image-to-text/spec/input.json +3 -3
  38. package/src/tasks/image-to-text/spec/output.json +8 -11
  39. package/src/tasks/index.ts +2 -0
  40. package/src/tasks/object-detection/inference.ts +1 -1
  41. package/src/tasks/object-detection/spec/input.json +2 -2
  42. package/src/tasks/placeholder/spec/input.json +4 -4
  43. package/src/tasks/placeholder/spec/output.json +1 -1
  44. package/src/tasks/question-answering/inference.ts +8 -8
  45. package/src/tasks/question-answering/spec/input.json +9 -9
  46. package/src/tasks/sentence-similarity/inference.ts +1 -1
  47. package/src/tasks/sentence-similarity/spec/input.json +2 -2
  48. package/src/tasks/summarization/inference.ts +5 -4
  49. package/src/tasks/table-question-answering/inference.ts +1 -1
  50. package/src/tasks/table-question-answering/spec/input.json +8 -3
  51. package/src/tasks/text-classification/inference.ts +3 -3
  52. package/src/tasks/text-classification/spec/input.json +4 -4
  53. package/src/tasks/text-generation/inference.ts +123 -14
  54. package/src/tasks/text-generation/spec/input.json +28 -12
  55. package/src/tasks/text-generation/spec/output.json +112 -9
  56. package/src/tasks/text-to-audio/inference.ts +24 -19
  57. package/src/tasks/text-to-audio/spec/input.json +2 -2
  58. package/src/tasks/text-to-audio/spec/output.json +10 -13
  59. package/src/tasks/text-to-image/inference.ts +6 -8
  60. package/src/tasks/text-to-image/spec/input.json +9 -7
  61. package/src/tasks/text-to-image/spec/output.json +7 -9
  62. package/src/tasks/text-to-speech/inference.ts +18 -17
  63. package/src/tasks/text2text-generation/inference.ts +10 -8
  64. package/src/tasks/text2text-generation/spec/input.json +4 -4
  65. package/src/tasks/text2text-generation/spec/output.json +8 -11
  66. package/src/tasks/token-classification/inference.ts +4 -4
  67. package/src/tasks/token-classification/spec/input.json +4 -4
  68. package/src/tasks/token-classification/spec/output.json +1 -1
  69. package/src/tasks/translation/inference.ts +5 -4
  70. package/src/tasks/video-classification/inference.ts +5 -5
  71. package/src/tasks/video-classification/spec/input.json +6 -6
  72. package/src/tasks/visual-question-answering/inference.ts +2 -2
  73. package/src/tasks/visual-question-answering/spec/input.json +3 -3
  74. package/src/tasks/zero-shot-classification/inference.ts +3 -3
  75. package/src/tasks/zero-shot-classification/spec/input.json +4 -4
  76. package/src/tasks/zero-shot-image-classification/inference.ts +2 -2
  77. package/src/tasks/zero-shot-image-classification/spec/input.json +3 -3
  78. package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
  79. package/src/tasks/zero-shot-object-detection/spec/input.json +2 -2
@@ -3,18 +3,15 @@
3
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
4
  "description": "Outputs of inference for the Text To Audio task",
5
5
  "title": "TextToAudioOutput",
6
- "type": "array",
7
- "items": {
8
- "type": "object",
9
- "properties": {
10
- "audio": {
11
- "description": "The generated audio waveform."
12
- },
13
- "samplingRate": {
14
- "type": "number",
15
- "description": "The sampling rate of the generated audio waveform."
16
- }
6
+ "type": "object",
7
+ "properties": {
8
+ "audio": {
9
+ "description": "The generated audio waveform."
17
10
  },
18
- "required": ["audio", "samplingRate"]
19
- }
11
+ "sampling_rate": {
12
+ "type": "number",
13
+ "description": "The sampling rate of the generated audio waveform."
14
+ }
15
+ },
16
+ "required": ["audio", "samplingRate"]
20
17
  }
@@ -11,7 +11,7 @@ export interface TextToImageInput {
11
11
  /**
12
12
  * The input text data (sometimes called "prompt"
13
13
  */
14
- data: string;
14
+ inputs: string;
15
15
  /**
16
16
  * Additional inference parameters
17
17
  */
@@ -29,16 +29,16 @@ export interface TextToImageParameters {
29
29
  * For diffusion models. A higher guidance scale value encourages the model to generate
30
30
  * images closely linked to the text prompt at the expense of lower image quality.
31
31
  */
32
- guidanceScale?: number;
32
+ guidance_scale?: number;
33
33
  /**
34
34
  * One or several prompt to guide what NOT to include in image generation.
35
35
  */
36
- negativePrompt?: string[];
36
+ negative_prompt?: string[];
37
37
  /**
38
38
  * For diffusion models. The number of denoising steps. More denoising steps usually lead to
39
39
  * a higher quality image at the expense of slower inference.
40
40
  */
41
- numInferenceSteps?: number;
41
+ num_inference_steps?: number;
42
42
  /**
43
43
  * For diffusion models. Override the scheduler with a compatible one
44
44
  */
@@ -46,7 +46,7 @@ export interface TextToImageParameters {
46
46
  /**
47
47
  * The size in pixel of the output image
48
48
  */
49
- targetSize?: TargetSize;
49
+ target_size?: TargetSize;
50
50
  [property: string]: unknown;
51
51
  }
52
52
 
@@ -62,9 +62,7 @@ export interface TargetSize {
62
62
  /**
63
63
  * Outputs of inference for the Text To Image task
64
64
  */
65
- export type TextToImageOutput = unknown[] | boolean | number | number | null | TextToImageOutputObject | string;
66
-
67
- export interface TextToImageOutputObject {
65
+ export interface TextToImageOutput {
68
66
  /**
69
67
  * The generated image
70
68
  */
@@ -5,7 +5,7 @@
5
5
  "title": "TextToImageInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input text data (sometimes called \"prompt\"",
10
10
  "type": "string"
11
11
  },
@@ -20,20 +20,22 @@
20
20
  "description": "Additional inference parameters for Text To Image",
21
21
  "type": "object",
22
22
  "properties": {
23
- "guidanceScale": {
23
+ "guidance_scale": {
24
24
  "type": "number",
25
25
  "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
26
26
  },
27
- "negativePrompt": {
27
+ "negative_prompt": {
28
28
  "type": "array",
29
- "items": { "type": "string" },
29
+ "items": {
30
+ "type": "string"
31
+ },
30
32
  "description": "One or several prompt to guide what NOT to include in image generation."
31
33
  },
32
- "numInferenceSteps": {
34
+ "num_inference_steps": {
33
35
  "type": "integer",
34
36
  "description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
35
37
  },
36
- "targetSize": {
38
+ "target_size": {
37
39
  "type": "object",
38
40
  "description": "The size in pixel of the output image",
39
41
  "properties": {
@@ -53,5 +55,5 @@
53
55
  }
54
56
  }
55
57
  },
56
- "required": ["data"]
58
+ "required": ["inputs"]
57
59
  }
@@ -3,13 +3,11 @@
3
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
4
  "description": "Outputs of inference for the Text To Image task",
5
5
  "title": "TextToImageOutput",
6
- "type": "array",
7
- "items": {
8
- "properties": {
9
- "image": {
10
- "description": "The generated image"
11
- }
12
- },
13
- "required": ["image"]
14
- }
6
+ "type": "object",
7
+ "properties": {
8
+ "image": {
9
+ "description": "The generated image"
10
+ }
11
+ },
12
+ "required": ["image"]
15
13
  }
@@ -13,7 +13,7 @@ export interface TextToSpeechInput {
13
13
  /**
14
14
  * The input text data
15
15
  */
16
- data: string;
16
+ inputs: string;
17
17
  /**
18
18
  * Additional inference parameters
19
19
  */
@@ -43,18 +43,18 @@ export interface GenerationParameters {
43
43
  /**
44
44
  * Whether to use sampling instead of greedy decoding when generating new tokens.
45
45
  */
46
- doSample?: boolean;
46
+ do_sample?: boolean;
47
47
  /**
48
48
  * Controls the stopping condition for beam-based methods.
49
49
  */
50
- earlyStopping?: EarlyStoppingUnion;
50
+ early_stopping?: EarlyStoppingUnion;
51
51
  /**
52
52
  * If set to float strictly between 0 and 1, only tokens with a conditional probability
53
53
  * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
54
54
  * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
55
55
  * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
56
56
  */
57
- epsilonCutoff?: number;
57
+ epsilon_cutoff?: number;
58
58
  /**
59
59
  * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
60
60
  * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -64,37 +64,37 @@ export interface GenerationParameters {
64
64
  * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
65
65
  * for more details.
66
66
  */
67
- etaCutoff?: number;
67
+ eta_cutoff?: number;
68
68
  /**
69
69
  * The maximum length (in tokens) of the generated text, including the input.
70
70
  */
71
- maxLength?: number;
71
+ max_length?: number;
72
72
  /**
73
73
  * The maximum number of tokens to generate. Takes precedence over maxLength.
74
74
  */
75
- maxNewTokens?: number;
75
+ max_new_tokens?: number;
76
76
  /**
77
77
  * The minimum length (in tokens) of the generated text, including the input.
78
78
  */
79
- minLength?: number;
79
+ min_length?: number;
80
80
  /**
81
81
  * The minimum number of tokens to generate. Takes precedence over maxLength.
82
82
  */
83
- minNewTokens?: number;
83
+ min_new_tokens?: number;
84
84
  /**
85
85
  * Number of groups to divide num_beams into in order to ensure diversity among different
86
86
  * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
87
87
  */
88
- numBeamGroups?: number;
88
+ num_beam_groups?: number;
89
89
  /**
90
90
  * Number of beams to use for beam search.
91
91
  */
92
- numBeams?: number;
92
+ num_beams?: number;
93
93
  /**
94
94
  * The value balances the model confidence and the degeneration penalty in contrastive
95
95
  * search decoding.
96
96
  */
97
- penaltyAlpha?: number;
97
+ penalty_alpha?: number;
98
98
  /**
99
99
  * The value used to modulate the next token probabilities.
100
100
  */
@@ -102,12 +102,12 @@ export interface GenerationParameters {
102
102
  /**
103
103
  * The number of highest probability vocabulary tokens to keep for top-k-filtering.
104
104
  */
105
- topK?: number;
105
+ top_k?: number;
106
106
  /**
107
107
  * If set to float < 1, only the smallest set of most probable tokens with probabilities
108
108
  * that add up to top_p or higher are kept for generation.
109
109
  */
110
- topP?: number;
110
+ top_p?: number;
111
111
  /**
112
112
  * Local typicality measures how similar the conditional probability of predicting a target
113
113
  * token next is to the expected conditional probability of predicting a random token next,
@@ -115,11 +115,11 @@ export interface GenerationParameters {
115
115
  * most locally typical tokens with probabilities that add up to typical_p or higher are
116
116
  * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
117
117
  */
118
- typicalP?: number;
118
+ typical_p?: number;
119
119
  /**
120
120
  * Whether the model should use the past last key/values attentions to speed up decoding
121
121
  */
122
- useCache?: boolean;
122
+ use_cache?: boolean;
123
123
  [property: string]: unknown;
124
124
  }
125
125
 
@@ -138,9 +138,10 @@ export interface TextToSpeechOutput {
138
138
  * The generated audio waveform.
139
139
  */
140
140
  audio: unknown;
141
+ samplingRate: unknown;
141
142
  /**
142
143
  * The sampling rate of the generated audio waveform.
143
144
  */
144
- samplingRate: number;
145
+ sampling_rate?: number;
145
146
  [property: string]: unknown;
146
147
  }
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
+
6
7
  /**
7
8
  * Inputs for Text2text Generation inference
8
9
  */
@@ -10,13 +11,14 @@ export interface Text2TextGenerationInput {
10
11
  /**
11
12
  * The input text data
12
13
  */
13
- data: string;
14
+ inputs: string;
14
15
  /**
15
16
  * Additional inference parameters
16
17
  */
17
18
  parameters?: Text2TextGenerationParameters;
18
19
  [property: string]: unknown;
19
20
  }
21
+
20
22
  /**
21
23
  * Additional inference parameters
22
24
  *
@@ -26,28 +28,28 @@ export interface Text2TextGenerationParameters {
26
28
  /**
27
29
  * Whether to clean up the potential extra spaces in the text output.
28
30
  */
29
- cleanUpTokenizationSpaces?: boolean;
31
+ clean_up_tokenization_spaces?: boolean;
30
32
  /**
31
33
  * Additional parametrization of the text generation algorithm
32
34
  */
33
- generateParameters?: {
34
- [key: string]: unknown;
35
- };
35
+ generate_parameters?: { [key: string]: unknown };
36
36
  /**
37
37
  * The truncation strategy to use
38
38
  */
39
39
  truncation?: Text2TextGenerationTruncationStrategy;
40
40
  [property: string]: unknown;
41
41
  }
42
+
42
43
  export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
43
- export type Text2TextGenerationOutput = Text2TextGenerationOutputElement[];
44
+
44
45
  /**
45
46
  * Outputs of inference for the Text2text Generation task
46
47
  */
47
- export interface Text2TextGenerationOutputElement {
48
+ export interface Text2TextGenerationOutput {
49
+ generatedText: unknown;
48
50
  /**
49
51
  * The generated text.
50
52
  */
51
- generatedText: string;
53
+ generated_text?: string;
52
54
  [property: string]: unknown;
53
55
  }
@@ -5,7 +5,7 @@
5
5
  "title": "Text2TextGenerationInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input text data",
10
10
  "type": "string"
11
11
  },
@@ -20,7 +20,7 @@
20
20
  "description": "Additional inference parameters for Text2text Generation",
21
21
  "type": "object",
22
22
  "properties": {
23
- "cleanUpTokenizationSpaces": {
23
+ "clean_up_tokenization_spaces": {
24
24
  "type": "boolean",
25
25
  "description": "Whether to clean up the potential extra spaces in the text output."
26
26
  },
@@ -43,7 +43,7 @@
43
43
  }
44
44
  ]
45
45
  },
46
- "generateParameters": {
46
+ "generate_parameters": {
47
47
  "title": "generateParameters",
48
48
  "type": "object",
49
49
  "description": "Additional parametrization of the text generation algorithm"
@@ -51,5 +51,5 @@
51
51
  }
52
52
  }
53
53
  },
54
- "required": ["data"]
54
+ "required": ["inputs"]
55
55
  }
@@ -3,15 +3,12 @@
3
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
4
  "description": "Outputs of inference for the Text2text Generation task",
5
5
  "title": "Text2TextGenerationOutput",
6
- "type": "array",
7
- "items": {
8
- "type": "object",
9
- "properties": {
10
- "generatedText": {
11
- "type": "string",
12
- "description": "The generated text."
13
- }
14
- },
15
- "required": ["generatedText"]
16
- }
6
+ "type": "object",
7
+ "properties": {
8
+ "generated_text": {
9
+ "type": "string",
10
+ "description": "The generated text."
11
+ }
12
+ },
13
+ "required": ["generatedText"]
17
14
  }
@@ -10,7 +10,7 @@ export interface TokenClassificationInput {
10
10
  /**
11
11
  * The input text data
12
12
  */
13
- data: string;
13
+ inputs: string;
14
14
  /**
15
15
  * Additional inference parameters
16
16
  */
@@ -26,11 +26,11 @@ export interface TokenClassificationParameters {
26
26
  /**
27
27
  * The strategy used to fuse tokens based on model predictions
28
28
  */
29
- aggregationStrategy?: TokenClassificationAggregationStrategy;
29
+ aggregation_strategy?: TokenClassificationAggregationStrategy;
30
30
  /**
31
31
  * A list of labels to ignore
32
32
  */
33
- ignoreLabels?: string[];
33
+ ignore_labels?: string[];
34
34
  /**
35
35
  * The number of overlapping tokens between chunks when splitting the input text.
36
36
  */
@@ -64,7 +64,7 @@ export interface TokenClassificationOutputElement {
64
64
  /**
65
65
  * The predicted label for that group of tokens
66
66
  */
67
- entityGroup?: string;
67
+ entity_group?: string;
68
68
  label: unknown;
69
69
  /**
70
70
  * The associated score / probability
@@ -5,7 +5,7 @@
5
5
  "title": "TokenClassificationInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input text data",
10
10
  "type": "string"
11
11
  },
@@ -20,7 +20,7 @@
20
20
  "description": "Additional inference parameters for Token Classification",
21
21
  "type": "object",
22
22
  "properties": {
23
- "ignoreLabels": {
23
+ "ignore_labels": {
24
24
  "type": "array",
25
25
  "items": {
26
26
  "type": "string"
@@ -31,7 +31,7 @@
31
31
  "type": "integer",
32
32
  "description": "The number of overlapping tokens between chunks when splitting the input text."
33
33
  },
34
- "aggregationStrategy": {
34
+ "aggregation_strategy": {
35
35
  "title": "TokenClassificationAggregationStrategy",
36
36
  "type": "string",
37
37
  "description": "The strategy used to fuse tokens based on model predictions",
@@ -61,5 +61,5 @@
61
61
  }
62
62
  }
63
63
  },
64
- "required": ["data"]
64
+ "required": ["inputs"]
65
65
  }
@@ -7,7 +7,7 @@
7
7
  "items": {
8
8
  "type": "object",
9
9
  "properties": {
10
- "entityGroup": {
10
+ "entity_group": {
11
11
  "type": "string",
12
12
  "description": "The predicted label for that group of tokens"
13
13
  },
@@ -13,7 +13,7 @@ export interface TranslationInput {
13
13
  /**
14
14
  * The input text data
15
15
  */
16
- data: string;
16
+ inputs: string;
17
17
  /**
18
18
  * Additional inference parameters
19
19
  */
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
30
30
  /**
31
31
  * Whether to clean up the potential extra spaces in the text output.
32
32
  */
33
- cleanUpTokenizationSpaces?: boolean;
33
+ clean_up_tokenization_spaces?: boolean;
34
34
  /**
35
35
  * Additional parametrization of the text generation algorithm
36
36
  */
37
- generateParameters?: { [key: string]: unknown };
37
+ generate_parameters?: { [key: string]: unknown };
38
38
  /**
39
39
  * The truncation strategy to use
40
40
  */
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
50
50
  * Outputs of inference for the Text2text Generation task
51
51
  */
52
52
  export interface TranslationOutput {
53
+ generatedText: unknown;
53
54
  /**
54
55
  * The generated text.
55
56
  */
56
- generatedText: string;
57
+ generated_text?: string;
57
58
  [property: string]: unknown;
58
59
  }
@@ -10,7 +10,7 @@ export interface VideoClassificationInput {
10
10
  /**
11
11
  * The input video data
12
12
  */
13
- data: unknown;
13
+ inputs: unknown;
14
14
  /**
15
15
  * Additional inference parameters
16
16
  */
@@ -26,16 +26,16 @@ export interface VideoClassificationParameters {
26
26
  /**
27
27
  * The sampling rate used to select frames from the video.
28
28
  */
29
- frameSamplingRate?: number;
30
- functionToApply?: ClassificationOutputTransform;
29
+ frame_sampling_rate?: number;
30
+ function_to_apply?: ClassificationOutputTransform;
31
31
  /**
32
32
  * The number of sampled frames to consider for classification.
33
33
  */
34
- numFrames?: number;
34
+ num_frames?: number;
35
35
  /**
36
36
  * When specified, limits the output to the top K most probable classes.
37
37
  */
38
- topK?: number;
38
+ top_k?: number;
39
39
  [property: string]: unknown;
40
40
  }
41
41
  /**
@@ -5,7 +5,7 @@
5
5
  "title": "VideoClassificationInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input video data"
10
10
  },
11
11
  "parameters": {
@@ -19,24 +19,24 @@
19
19
  "description": "Additional inference parameters for Video Classification",
20
20
  "type": "object",
21
21
  "properties": {
22
- "functionToApply": {
22
+ "function_to_apply": {
23
23
  "title": "TextClassificationOutputTransform",
24
24
  "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
25
25
  },
26
- "numFrames": {
26
+ "num_frames": {
27
27
  "type": "integer",
28
28
  "description": "The number of sampled frames to consider for classification."
29
29
  },
30
- "frameSamplingRate": {
30
+ "frame_sampling_rate": {
31
31
  "type": "integer",
32
32
  "description": "The sampling rate used to select frames from the video."
33
33
  },
34
- "topK": {
34
+ "top_k": {
35
35
  "type": "integer",
36
36
  "description": "When specified, limits the output to the top K most probable classes."
37
37
  }
38
38
  }
39
39
  }
40
40
  },
41
- "required": ["data"]
41
+ "required": ["inputs"]
42
42
  }
@@ -10,7 +10,7 @@ export interface VisualQuestionAnsweringInput {
10
10
  /**
11
11
  * One (image, question) pair to answer
12
12
  */
13
- data: VisualQuestionAnsweringInputData;
13
+ inputs: VisualQuestionAnsweringInputData;
14
14
  /**
15
15
  * Additional inference parameters
16
16
  */
@@ -42,7 +42,7 @@ export interface VisualQuestionAnsweringParameters {
42
42
  * return less than topk answers if there are not enough options available within the
43
43
  * context.
44
44
  */
45
- topK?: number;
45
+ top_k?: number;
46
46
  [property: string]: unknown;
47
47
  }
48
48
  export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
@@ -5,7 +5,7 @@
5
5
  "title": "VisualQuestionAnsweringInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "One (image, question) pair to answer",
10
10
  "type": "object",
11
11
  "title": "VisualQuestionAnsweringInputData",
@@ -30,12 +30,12 @@
30
30
  "description": "Additional inference parameters for Visual Question Answering",
31
31
  "type": "object",
32
32
  "properties": {
33
- "topK": {
33
+ "top_k": {
34
34
  "type": "integer",
35
35
  "description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
36
36
  }
37
37
  }
38
38
  }
39
39
  },
40
- "required": ["data"]
40
+ "required": ["inputs"]
41
41
  }
@@ -10,7 +10,7 @@ export interface ZeroShotClassificationInput {
10
10
  /**
11
11
  * The input text data, with candidate labels
12
12
  */
13
- data: ZeroShotClassificationInputData;
13
+ inputs: ZeroShotClassificationInputData;
14
14
  /**
15
15
  * Additional inference parameters
16
16
  */
@@ -41,13 +41,13 @@ export interface ZeroShotClassificationParameters {
41
41
  * The sentence used in conjunction with candidateLabels to attempt the text classification
42
42
  * by replacing the placeholder with the candidate labels.
43
43
  */
44
- hypothesisTemplate?: string;
44
+ hypothesis_template?: string;
45
45
  /**
46
46
  * Whether multiple candidate labels can be true. If false, the scores are normalized such
47
47
  * that the sum of the label likelihoods for each sequence is 1. If true, the labels are
48
48
  * considered independent and probabilities are normalized for each candidate.
49
49
  */
50
- multiLabel?: boolean;
50
+ multi_label?: boolean;
51
51
  [property: string]: unknown;
52
52
  }
53
53
  export type ZeroShotClassificationOutput = ZeroShotClassificationOutputElement[];
@@ -5,7 +5,7 @@
5
5
  "title": "ZeroShotClassificationInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input text data, with candidate labels",
10
10
  "type": "object",
11
11
  "title": "ZeroShotClassificationInputData",
@@ -35,16 +35,16 @@
35
35
  "description": "Additional inference parameters for Zero Shot Classification",
36
36
  "type": "object",
37
37
  "properties": {
38
- "hypothesisTemplate": {
38
+ "hypothesis_template": {
39
39
  "type": "string",
40
40
  "description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels."
41
41
  },
42
- "multiLabel": {
42
+ "multi_label": {
43
43
  "type": "boolean",
44
44
  "description": "Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate."
45
45
  }
46
46
  }
47
47
  }
48
48
  },
49
- "required": ["data"]
49
+ "required": ["inputs"]
50
50
  }