@huggingface/tasks 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -549,7 +549,7 @@ interface ModelData {
549
549
  base_model_name?: string;
550
550
  task_type?: string;
551
551
  };
552
- tokenizer?: TokenizerConfig;
552
+ tokenizer_config?: TokenizerConfig;
553
553
  };
554
554
  /**
555
555
  * all the model tags
@@ -575,7 +575,7 @@ interface ModelData {
575
575
  */
576
576
  widgetData?: WidgetExample[] | undefined;
577
577
  /**
578
- * Parameters that will be used by the widget when calling Inference Endpoints (serverless)
578
+ * Parameters that will be used by the widget when calling Inference API (serverless)
579
579
  * https://huggingface.co/docs/api-inference/detailed_parameters
580
580
  *
581
581
  * can be set in the model card metadata (under `inference/parameters`)
@@ -732,6 +732,13 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
732
732
  };
733
733
  };
734
734
  };
735
+ audiocraft: {
736
+ prettyLabel: string;
737
+ repoName: string;
738
+ repoUrl: string;
739
+ snippets: (model: ModelData) => string[];
740
+ filter: false;
741
+ };
735
742
  bertopic: {
736
743
  prettyLabel: string;
737
744
  repoName: string;
@@ -1051,8 +1058,8 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
1051
1058
  };
1052
1059
  };
1053
1060
  type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
1054
- declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis")[];
1055
- declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis")[];
1061
+ declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis")[];
1062
+ declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis")[];
1056
1063
 
1057
1064
  /**
1058
1065
  * Mapping from library name (excluding Transformers) to its supported tasks.
@@ -1066,6 +1073,1987 @@ declare const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelL
1066
1073
  type PerLanguageMapping = Map<WidgetType, string[] | WidgetExample[]>;
1067
1074
  declare const MAPPING_DEFAULT_WIDGET: Map<string, PerLanguageMapping>;
1068
1075
 
1076
+ /**
1077
+ * Inference code generated from the JSON schema spec in ./spec
1078
+ *
1079
+ * Using src/scripts/inference-codegen
1080
+ */
1081
+ /**
1082
+ * Inputs for Audio Classification inference
1083
+ */
1084
+ interface AudioClassificationInput {
1085
+ /**
1086
+ * The input audio data
1087
+ */
1088
+ inputs: unknown;
1089
+ /**
1090
+ * Additional inference parameters
1091
+ */
1092
+ parameters?: AudioClassificationParameters;
1093
+ [property: string]: unknown;
1094
+ }
1095
+ /**
1096
+ * Additional inference parameters
1097
+ *
1098
+ * Additional inference parameters for Audio Classification
1099
+ */
1100
+ interface AudioClassificationParameters {
1101
+ function_to_apply?: ClassificationOutputTransform$3;
1102
+ /**
1103
+ * When specified, limits the output to the top K most probable classes.
1104
+ */
1105
+ top_k?: number;
1106
+ [property: string]: unknown;
1107
+ }
1108
+ /**
1109
+ * The function to apply to the model outputs in order to retrieve the scores.
1110
+ */
1111
+ type ClassificationOutputTransform$3 = "sigmoid" | "softmax" | "none";
1112
+ type AudioClassificationOutput = AudioClassificationOutputElement[];
1113
+ /**
1114
+ * Outputs for Audio Classification inference
1115
+ */
1116
+ interface AudioClassificationOutputElement {
1117
+ /**
1118
+ * The predicted class label.
1119
+ */
1120
+ label: string;
1121
+ /**
1122
+ * The corresponding probability.
1123
+ */
1124
+ score: number;
1125
+ [property: string]: unknown;
1126
+ }
1127
+
1128
+ /**
1129
+ * Inference code generated from the JSON schema spec in ./spec
1130
+ *
1131
+ * Using src/scripts/inference-codegen
1132
+ */
1133
+ /**
1134
+ * Inputs for Automatic Speech Recognition inference
1135
+ */
1136
+ interface AutomaticSpeechRecognitionInput {
1137
+ /**
1138
+ * The input audio data
1139
+ */
1140
+ inputs: unknown;
1141
+ /**
1142
+ * Additional inference parameters
1143
+ */
1144
+ parameters?: AutomaticSpeechRecognitionParameters;
1145
+ [property: string]: unknown;
1146
+ }
1147
+ /**
1148
+ * Additional inference parameters
1149
+ *
1150
+ * Additional inference parameters for Automatic Speech Recognition
1151
+ */
1152
+ interface AutomaticSpeechRecognitionParameters {
1153
+ /**
1154
+ * Parametrization of the text generation process
1155
+ */
1156
+ generate?: GenerationParameters$2;
1157
+ /**
1158
+ * Whether to output corresponding timestamps with the generated text
1159
+ */
1160
+ return_timestamps?: boolean;
1161
+ [property: string]: unknown;
1162
+ }
1163
+ /**
1164
+ * Parametrization of the text generation process
1165
+ *
1166
+ * Ad-hoc parametrization of the text generation process
1167
+ */
1168
+ interface GenerationParameters$2 {
1169
+ /**
1170
+ * Whether to use sampling instead of greedy decoding when generating new tokens.
1171
+ */
1172
+ do_sample?: boolean;
1173
+ /**
1174
+ * Controls the stopping condition for beam-based methods.
1175
+ */
1176
+ early_stopping?: EarlyStoppingUnion$2;
1177
+ /**
1178
+ * If set to float strictly between 0 and 1, only tokens with a conditional probability
1179
+ * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
1180
+ * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
1181
+ * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
1182
+ */
1183
+ epsilon_cutoff?: number;
1184
+ /**
1185
+ * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
1186
+ * float strictly between 0 and 1, a token is only considered if it is greater than either
1187
+ * eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
1188
+ * term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
1189
+ * the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
1190
+ * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
1191
+ * for more details.
1192
+ */
1193
+ eta_cutoff?: number;
1194
+ /**
1195
+ * The maximum length (in tokens) of the generated text, including the input.
1196
+ */
1197
+ max_length?: number;
1198
+ /**
1199
+ * The maximum number of tokens to generate. Takes precedence over maxLength.
1200
+ */
1201
+ max_new_tokens?: number;
1202
+ /**
1203
+ * The minimum length (in tokens) of the generated text, including the input.
1204
+ */
1205
+ min_length?: number;
1206
+ /**
1207
+ * The minimum number of tokens to generate. Takes precedence over maxLength.
1208
+ */
1209
+ min_new_tokens?: number;
1210
+ /**
1211
+ * Number of groups to divide num_beams into in order to ensure diversity among different
1212
+ * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
1213
+ */
1214
+ num_beam_groups?: number;
1215
+ /**
1216
+ * Number of beams to use for beam search.
1217
+ */
1218
+ num_beams?: number;
1219
+ /**
1220
+ * The value balances the model confidence and the degeneration penalty in contrastive
1221
+ * search decoding.
1222
+ */
1223
+ penalty_alpha?: number;
1224
+ /**
1225
+ * The value used to modulate the next token probabilities.
1226
+ */
1227
+ temperature?: number;
1228
+ /**
1229
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
1230
+ */
1231
+ top_k?: number;
1232
+ /**
1233
+ * If set to float < 1, only the smallest set of most probable tokens with probabilities
1234
+ * that add up to top_p or higher are kept for generation.
1235
+ */
1236
+ top_p?: number;
1237
+ /**
1238
+ * Local typicality measures how similar the conditional probability of predicting a target
1239
+ * token next is to the expected conditional probability of predicting a random token next,
1240
+ * given the partial text already generated. If set to float < 1, the smallest set of the
1241
+ * most locally typical tokens with probabilities that add up to typical_p or higher are
1242
+ * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
1243
+ */
1244
+ typical_p?: number;
1245
+ /**
1246
+ * Whether the model should use the past last key/values attentions to speed up decoding
1247
+ */
1248
+ use_cache?: boolean;
1249
+ [property: string]: unknown;
1250
+ }
1251
+ /**
1252
+ * Controls the stopping condition for beam-based methods.
1253
+ */
1254
+ type EarlyStoppingUnion$2 = boolean | "never";
1255
+ /**
1256
+ * Outputs of inference for the Automatic Speech Recognition task
1257
+ */
1258
+ interface AutomaticSpeechRecognitionOutput {
1259
+ /**
1260
+ * When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
1261
+ * the model.
1262
+ */
1263
+ chunks?: AutomaticSpeechRecognitionOutputChunk[];
1264
+ /**
1265
+ * The recognized text.
1266
+ */
1267
+ text: string;
1268
+ [property: string]: unknown;
1269
+ }
1270
+ interface AutomaticSpeechRecognitionOutputChunk {
1271
+ /**
1272
+ * A chunk of text identified by the model
1273
+ */
1274
+ text: string;
1275
+ /**
1276
+ * The start and end timestamps corresponding with the text
1277
+ */
1278
+ timestamps: number[];
1279
+ [property: string]: unknown;
1280
+ }
1281
+
1282
+ /**
1283
+ * Inference code generated from the JSON schema spec in ./spec
1284
+ *
1285
+ * Using src/scripts/inference-codegen
1286
+ */
1287
+ /**
1288
+ * Inputs for Document Question Answering inference
1289
+ */
1290
+ interface DocumentQuestionAnsweringInput {
1291
+ /**
1292
+ * One (document, question) pair to answer
1293
+ */
1294
+ inputs: DocumentQuestionAnsweringInputData;
1295
+ /**
1296
+ * Additional inference parameters
1297
+ */
1298
+ parameters?: DocumentQuestionAnsweringParameters;
1299
+ [property: string]: unknown;
1300
+ }
1301
+ /**
1302
+ * One (document, question) pair to answer
1303
+ */
1304
+ interface DocumentQuestionAnsweringInputData {
1305
+ /**
1306
+ * The image on which the question is asked
1307
+ */
1308
+ image: unknown;
1309
+ /**
1310
+ * A question to ask of the document
1311
+ */
1312
+ question: string;
1313
+ [property: string]: unknown;
1314
+ }
1315
+ /**
1316
+ * Additional inference parameters
1317
+ *
1318
+ * Additional inference parameters for Document Question Answering
1319
+ */
1320
+ interface DocumentQuestionAnsweringParameters {
1321
+ /**
1322
+ * If the words in the document are too long to fit with the question for the model, it will
1323
+ * be split in several chunks with some overlap. This argument controls the size of that
1324
+ * overlap.
1325
+ */
1326
+ doc_stride?: number;
1327
+ /**
1328
+ * Whether to accept impossible as an answer
1329
+ */
1330
+ handle_impossible_answer?: boolean;
1331
+ /**
1332
+ * Language to use while running OCR. Defaults to english.
1333
+ */
1334
+ lang?: string;
1335
+ /**
1336
+ * The maximum length of predicted answers (e.g., only answers with a shorter length are
1337
+ * considered).
1338
+ */
1339
+ max_answer_len?: number;
1340
+ /**
1341
+ * The maximum length of the question after tokenization. It will be truncated if needed.
1342
+ */
1343
+ max_question_len?: number;
1344
+ /**
1345
+ * The maximum length of the total sentence (context + question) in tokens of each chunk
1346
+ * passed to the model. The context will be split in several chunks (using doc_stride as
1347
+ * overlap) if needed.
1348
+ */
1349
+ max_seq_len?: number;
1350
+ /**
1351
+ * The number of answers to return (will be chosen by order of likelihood). Can return less
1352
+ * than top_k answers if there are not enough options available within the context.
1353
+ */
1354
+ top_k?: number;
1355
+ /**
1356
+ * A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
1357
+ * skip the OCR step and use the provided bounding boxes instead.
1358
+ */
1359
+ word_boxes?: WordBox[];
1360
+ [property: string]: unknown;
1361
+ }
1362
+ type WordBox = number[] | string;
1363
+ type DocumentQuestionAnsweringOutput = DocumentQuestionAnsweringOutputElement[];
1364
+ /**
1365
+ * Outputs of inference for the Document Question Answering task
1366
+ */
1367
+ interface DocumentQuestionAnsweringOutputElement {
1368
+ /**
1369
+ * The answer to the question.
1370
+ */
1371
+ answer: string;
1372
+ /**
1373
+ * The end word index of the answer (in the OCR’d version of the input or provided word
1374
+ * boxes).
1375
+ */
1376
+ end: number;
1377
+ /**
1378
+ * The probability associated to the answer.
1379
+ */
1380
+ score: number;
1381
+ /**
1382
+ * The start word index of the answer (in the OCR’d version of the input or provided word
1383
+ * boxes).
1384
+ */
1385
+ start: number;
1386
+ /**
1387
+ * The index of each word/box pair that is in the answer
1388
+ */
1389
+ words: number[];
1390
+ [property: string]: unknown;
1391
+ }
1392
+
1393
+ /**
1394
+ * Inference code generated from the JSON schema spec in ./spec
1395
+ *
1396
+ * Using src/scripts/inference-codegen
1397
+ */
1398
+ type FeatureExtractionOutput = unknown[];
1399
+ /**
1400
+ * Inputs for Text Embedding inference
1401
+ */
1402
+ interface FeatureExtractionInput {
1403
+ /**
1404
+ * The text to get the embeddings of
1405
+ */
1406
+ inputs: string;
1407
+ /**
1408
+ * Additional inference parameters
1409
+ */
1410
+ parameters?: {
1411
+ [key: string]: unknown;
1412
+ };
1413
+ [property: string]: unknown;
1414
+ }
1415
+
1416
+ /**
1417
+ * Inference code generated from the JSON schema spec in ./spec
1418
+ *
1419
+ * Using src/scripts/inference-codegen
1420
+ */
1421
+ /**
1422
+ * Inputs for Fill Mask inference
1423
+ */
1424
+ interface FillMaskInput {
1425
+ /**
1426
+ * The text with masked tokens
1427
+ */
1428
+ inputs: string;
1429
+ /**
1430
+ * Additional inference parameters
1431
+ */
1432
+ parameters?: FillMaskParameters;
1433
+ [property: string]: unknown;
1434
+ }
1435
+ /**
1436
+ * Additional inference parameters
1437
+ *
1438
+ * Additional inference parameters for Fill Mask
1439
+ */
1440
+ interface FillMaskParameters {
1441
+ /**
1442
+ * When passed, the model will limit the scores to the passed targets instead of looking up
1443
+ * in the whole vocabulary. If the provided targets are not in the model vocab, they will be
1444
+ * tokenized and the first resulting token will be used (with a warning, and that might be
1445
+ * slower).
1446
+ */
1447
+ targets?: string[];
1448
+ /**
1449
+ * When passed, overrides the number of predictions to return.
1450
+ */
1451
+ top_k?: number;
1452
+ [property: string]: unknown;
1453
+ }
1454
+ type FillMaskOutput = FillMaskOutputElement[];
1455
+ /**
1456
+ * Outputs of inference for the Fill Mask task
1457
+ */
1458
+ interface FillMaskOutputElement {
1459
+ /**
1460
+ * The corresponding probability
1461
+ */
1462
+ score: number;
1463
+ /**
1464
+ * The corresponding input with the mask token prediction.
1465
+ */
1466
+ sequence: string;
1467
+ /**
1468
+ * The predicted token id (to replace the masked one).
1469
+ */
1470
+ token: number;
1471
+ tokenStr: unknown;
1472
+ /**
1473
+ * The predicted token (to replace the masked one).
1474
+ */
1475
+ token_str?: string;
1476
+ [property: string]: unknown;
1477
+ }
1478
+
1479
+ /**
1480
+ * Inference code generated from the JSON schema spec in ./spec
1481
+ *
1482
+ * Using src/scripts/inference-codegen
1483
+ */
1484
+ /**
1485
+ * Inputs for Image Classification inference
1486
+ */
1487
+ interface ImageClassificationInput {
1488
+ /**
1489
+ * The input image data
1490
+ */
1491
+ inputs: unknown;
1492
+ /**
1493
+ * Additional inference parameters
1494
+ */
1495
+ parameters?: ImageClassificationParameters;
1496
+ [property: string]: unknown;
1497
+ }
1498
+ /**
1499
+ * Additional inference parameters
1500
+ *
1501
+ * Additional inference parameters for Image Classification
1502
+ */
1503
+ interface ImageClassificationParameters {
1504
+ function_to_apply?: ClassificationOutputTransform$2;
1505
+ /**
1506
+ * When specified, limits the output to the top K most probable classes.
1507
+ */
1508
+ top_k?: number;
1509
+ [property: string]: unknown;
1510
+ }
1511
+ /**
1512
+ * The function to apply to the model outputs in order to retrieve the scores.
1513
+ */
1514
+ type ClassificationOutputTransform$2 = "sigmoid" | "softmax" | "none";
1515
+ type ImageClassificationOutput = ImageClassificationOutputElement[];
1516
+ /**
1517
+ * Outputs of inference for the Image Classification task
1518
+ */
1519
+ interface ImageClassificationOutputElement {
1520
+ /**
1521
+ * The predicted class label.
1522
+ */
1523
+ label: string;
1524
+ /**
1525
+ * The corresponding probability.
1526
+ */
1527
+ score: number;
1528
+ [property: string]: unknown;
1529
+ }
1530
+
1531
+ /**
1532
+ * Inference code generated from the JSON schema spec in ./spec
1533
+ *
1534
+ * Using src/scripts/inference-codegen
1535
+ */
1536
+ /**
1537
+ * Inputs for Image To Image inference
1538
+ */
1539
+ interface ImageToImageInput {
1540
+ /**
1541
+ * The input image data
1542
+ */
1543
+ inputs: unknown;
1544
+ /**
1545
+ * Additional inference parameters
1546
+ */
1547
+ parameters?: ImageToImageParameters;
1548
+ [property: string]: unknown;
1549
+ }
1550
+ /**
1551
+ * Additional inference parameters
1552
+ *
1553
+ * Additional inference parameters for Image To Image
1554
+ */
1555
+ interface ImageToImageParameters {
1556
+ /**
1557
+ * For diffusion models. A higher guidance scale value encourages the model to generate
1558
+ * images closely linked to the text prompt at the expense of lower image quality.
1559
+ */
1560
+ guidance_scale?: number;
1561
+ /**
1562
+ * One or several prompt to guide what NOT to include in image generation.
1563
+ */
1564
+ negative_prompt?: string[];
1565
+ /**
1566
+ * For diffusion models. The number of denoising steps. More denoising steps usually lead to
1567
+ * a higher quality image at the expense of slower inference.
1568
+ */
1569
+ num_inference_steps?: number;
1570
+ /**
1571
+ * The size in pixel of the output image
1572
+ */
1573
+ target_size?: TargetSize$1;
1574
+ [property: string]: unknown;
1575
+ }
1576
+ /**
1577
+ * The size in pixel of the output image
1578
+ */
1579
+ interface TargetSize$1 {
1580
+ height: number;
1581
+ width: number;
1582
+ [property: string]: unknown;
1583
+ }
1584
+ /**
1585
+ * Outputs of inference for the Image To Image task
1586
+ */
1587
+ interface ImageToImageOutput {
1588
+ /**
1589
+ * The output image
1590
+ */
1591
+ image?: unknown;
1592
+ [property: string]: unknown;
1593
+ }
1594
+
1595
+ /**
1596
+ * Inference code generated from the JSON schema spec in ./spec
1597
+ *
1598
+ * Using src/scripts/inference-codegen
1599
+ */
1600
+ /**
1601
+ * Inputs for Image To Text inference
1602
+ */
1603
+ interface ImageToTextInput {
1604
+ /**
1605
+ * The input image data
1606
+ */
1607
+ inputs: unknown;
1608
+ /**
1609
+ * Additional inference parameters
1610
+ */
1611
+ parameters?: ImageToTextParameters;
1612
+ [property: string]: unknown;
1613
+ }
1614
+ /**
1615
+ * Additional inference parameters
1616
+ *
1617
+ * Additional inference parameters for Image To Text
1618
+ */
1619
+ interface ImageToTextParameters {
1620
+ /**
1621
+ * Parametrization of the text generation process
1622
+ */
1623
+ generate?: GenerationParameters$1;
1624
+ /**
1625
+ * The amount of maximum tokens to generate.
1626
+ */
1627
+ max_new_tokens?: number;
1628
+ [property: string]: unknown;
1629
+ }
1630
+ /**
1631
+ * Parametrization of the text generation process
1632
+ *
1633
+ * Ad-hoc parametrization of the text generation process
1634
+ */
1635
+ interface GenerationParameters$1 {
1636
+ /**
1637
+ * Whether to use sampling instead of greedy decoding when generating new tokens.
1638
+ */
1639
+ do_sample?: boolean;
1640
+ /**
1641
+ * Controls the stopping condition for beam-based methods.
1642
+ */
1643
+ early_stopping?: EarlyStoppingUnion$1;
1644
+ /**
1645
+ * If set to float strictly between 0 and 1, only tokens with a conditional probability
1646
+ * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
1647
+ * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
1648
+ * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
1649
+ */
1650
+ epsilon_cutoff?: number;
1651
+ /**
1652
+ * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
1653
+ * float strictly between 0 and 1, a token is only considered if it is greater than either
1654
+ * eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
1655
+ * term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
1656
+ * the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
1657
+ * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
1658
+ * for more details.
1659
+ */
1660
+ eta_cutoff?: number;
1661
+ /**
1662
+ * The maximum length (in tokens) of the generated text, including the input.
1663
+ */
1664
+ max_length?: number;
1665
+ /**
1666
+ * The maximum number of tokens to generate. Takes precedence over maxLength.
1667
+ */
1668
+ max_new_tokens?: number;
1669
+ /**
1670
+ * The minimum length (in tokens) of the generated text, including the input.
1671
+ */
1672
+ min_length?: number;
1673
+ /**
1674
+ * The minimum number of tokens to generate. Takes precedence over maxLength.
1675
+ */
1676
+ min_new_tokens?: number;
1677
+ /**
1678
+ * Number of groups to divide num_beams into in order to ensure diversity among different
1679
+ * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
1680
+ */
1681
+ num_beam_groups?: number;
1682
+ /**
1683
+ * Number of beams to use for beam search.
1684
+ */
1685
+ num_beams?: number;
1686
+ /**
1687
+ * The value balances the model confidence and the degeneration penalty in contrastive
1688
+ * search decoding.
1689
+ */
1690
+ penalty_alpha?: number;
1691
+ /**
1692
+ * The value used to modulate the next token probabilities.
1693
+ */
1694
+ temperature?: number;
1695
+ /**
1696
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
1697
+ */
1698
+ top_k?: number;
1699
+ /**
1700
+ * If set to float < 1, only the smallest set of most probable tokens with probabilities
1701
+ * that add up to top_p or higher are kept for generation.
1702
+ */
1703
+ top_p?: number;
1704
+ /**
1705
+ * Local typicality measures how similar the conditional probability of predicting a target
1706
+ * token next is to the expected conditional probability of predicting a random token next,
1707
+ * given the partial text already generated. If set to float < 1, the smallest set of the
1708
+ * most locally typical tokens with probabilities that add up to typical_p or higher are
1709
+ * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
1710
+ */
1711
+ typical_p?: number;
1712
+ /**
1713
+ * Whether the model should use the past last key/values attentions to speed up decoding
1714
+ */
1715
+ use_cache?: boolean;
1716
+ [property: string]: unknown;
1717
+ }
1718
+ /**
1719
+ * Controls the stopping condition for beam-based methods.
1720
+ */
1721
+ type EarlyStoppingUnion$1 = boolean | "never";
1722
+ /**
1723
+ * Outputs of inference for the Image To Text task
1724
+ */
1725
+ interface ImageToTextOutput {
1726
+ generatedText: unknown;
1727
+ /**
1728
+ * The generated text.
1729
+ */
1730
+ generated_text?: string;
1731
+ [property: string]: unknown;
1732
+ }
1733
+
1734
+ /**
1735
+ * Inference code generated from the JSON schema spec in ./spec
1736
+ *
1737
+ * Using src/scripts/inference-codegen
1738
+ */
1739
+ /**
1740
+ * Inputs for Image Segmentation inference
1741
+ */
1742
+ interface ImageSegmentationInput {
1743
+ /**
1744
+ * The input image data
1745
+ */
1746
+ inputs: unknown;
1747
+ /**
1748
+ * Additional inference parameters
1749
+ */
1750
+ parameters?: ImageSegmentationParameters;
1751
+ [property: string]: unknown;
1752
+ }
1753
+ /**
1754
+ * Additional inference parameters
1755
+ *
1756
+ * Additional inference parameters for Image Segmentation
1757
+ */
1758
+ interface ImageSegmentationParameters {
1759
+ /**
1760
+ * Threshold to use when turning the predicted masks into binary values.
1761
+ */
1762
+ mask_threshold?: number;
1763
+ /**
1764
+ * Mask overlap threshold to eliminate small, disconnected segments.
1765
+ */
1766
+ overlap_mask_area_threshold?: number;
1767
+ /**
1768
+ * Segmentation task to be performed, depending on model capabilities.
1769
+ */
1770
+ subtask?: ImageSegmentationSubtask;
1771
+ /**
1772
+ * Probability threshold to filter out predicted masks.
1773
+ */
1774
+ threshold?: number;
1775
+ [property: string]: unknown;
1776
+ }
1777
+ type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic";
1778
+ type ImageSegmentationOutput = ImageSegmentationOutputElement[];
1779
+ /**
1780
+ * Outputs of inference for the Image Segmentation task
1781
+ *
1782
+ * A predicted mask / segment
1783
+ */
1784
+ interface ImageSegmentationOutputElement {
1785
+ /**
1786
+ * The label of the predicted segment
1787
+ */
1788
+ label: string;
1789
+ /**
1790
+ * The corresponding mask as a black-and-white image
1791
+ */
1792
+ mask: unknown;
1793
+ /**
1794
+ * The score or confidence degreee the model has
1795
+ */
1796
+ score?: number;
1797
+ [property: string]: unknown;
1798
+ }
1799
+
1800
+ /**
1801
+ * Inference code generated from the JSON schema spec in ./spec
1802
+ *
1803
+ * Using src/scripts/inference-codegen
1804
+ */
1805
+ /**
1806
+ * Inputs for Object Detection inference
1807
+ */
1808
+ interface ObjectDetectionInput {
1809
+ /**
1810
+ * The input image data
1811
+ */
1812
+ inputs: unknown;
1813
+ /**
1814
+ * Additional inference parameters
1815
+ */
1816
+ parameters?: ObjectDetectionParameters;
1817
+ [property: string]: unknown;
1818
+ }
1819
+ /**
1820
+ * Additional inference parameters
1821
+ *
1822
+ * Additional inference parameters for Object Detection
1823
+ */
1824
+ interface ObjectDetectionParameters {
1825
+ /**
1826
+ * The probability necessary to make a prediction.
1827
+ */
1828
+ threshold?: number;
1829
+ [property: string]: unknown;
1830
+ }
1831
+ /**
1832
+ * The predicted bounding box. Coordinates are relative to the top left corner of the input
1833
+ * image.
1834
+ */
1835
+ interface BoundingBox$1 {
1836
+ xmax: number;
1837
+ xmin: number;
1838
+ ymax: number;
1839
+ ymin: number;
1840
+ [property: string]: unknown;
1841
+ }
1842
+ type ObjectDetectionOutput = ObjectDetectionOutputElement[];
1843
+ /**
1844
+ * Outputs of inference for the Object Detection task
1845
+ */
1846
+ interface ObjectDetectionOutputElement {
1847
+ /**
1848
+ * The predicted bounding box. Coordinates are relative to the top left corner of the input
1849
+ * image.
1850
+ */
1851
+ box: BoundingBox$1;
1852
+ /**
1853
+ * The predicted label for the bounding box
1854
+ */
1855
+ label: string;
1856
+ /**
1857
+ * The associated score / probability
1858
+ */
1859
+ score: number;
1860
+ [property: string]: unknown;
1861
+ }
1862
+
1863
+ /**
1864
+ * Inference code generated from the JSON schema spec in ./spec
1865
+ *
1866
+ * Using src/scripts/inference-codegen
1867
+ */
1868
+ /**
1869
+ * Inputs for Depth Estimation inference
1870
+ */
1871
+ interface DepthEstimationInput {
1872
+ /**
1873
+ * The input image data
1874
+ */
1875
+ inputs: unknown;
1876
+ /**
1877
+ * Additional inference parameters
1878
+ */
1879
+ parameters?: {
1880
+ [key: string]: unknown;
1881
+ };
1882
+ [property: string]: unknown;
1883
+ }
1884
+ /**
1885
+ * Outputs of inference for the Depth Estimation task
1886
+ */
1887
+ interface DepthEstimationOutput {
1888
+ /**
1889
+ * The predicted depth as an image
1890
+ */
1891
+ depth?: unknown;
1892
+ /**
1893
+ * The predicted depth as a tensor
1894
+ */
1895
+ predicted_depth?: unknown;
1896
+ [property: string]: unknown;
1897
+ }
1898
+
1899
+ /**
1900
+ * Inference code generated from the JSON schema spec in ./spec
1901
+ *
1902
+ * Using src/scripts/inference-codegen
1903
+ */
1904
+ /**
1905
+ * Inputs for Question Answering inference
1906
+ */
1907
+ interface QuestionAnsweringInput {
1908
+ /**
1909
+ * One (context, question) pair to answer
1910
+ */
1911
+ inputs: QuestionAnsweringInputData;
1912
+ /**
1913
+ * Additional inference parameters
1914
+ */
1915
+ parameters?: QuestionAnsweringParameters;
1916
+ [property: string]: unknown;
1917
+ }
1918
+ /**
1919
+ * One (context, question) pair to answer
1920
+ */
1921
+ interface QuestionAnsweringInputData {
1922
+ /**
1923
+ * The context to be used for answering the question
1924
+ */
1925
+ context: string;
1926
+ /**
1927
+ * The question to be answered
1928
+ */
1929
+ question: string;
1930
+ [property: string]: unknown;
1931
+ }
1932
+ /**
1933
+ * Additional inference parameters
1934
+ *
1935
+ * Additional inference parameters for Question Answering
1936
+ */
1937
+ interface QuestionAnsweringParameters {
1938
+ /**
1939
+ * Attempts to align the answer to real words. Improves quality on space separated
1940
+ * languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
1941
+ */
1942
+ align_to_words?: boolean;
1943
+ /**
1944
+ * If the context is too long to fit with the question for the model, it will be split in
1945
+ * several chunks with some overlap. This argument controls the size of that overlap.
1946
+ */
1947
+ doc_stride?: number;
1948
+ /**
1949
+ * Whether to accept impossible as an answer.
1950
+ */
1951
+ handle_impossible_answer?: boolean;
1952
+ /**
1953
+ * The maximum length of predicted answers (e.g., only answers with a shorter length are
1954
+ * considered).
1955
+ */
1956
+ max_answer_len?: number;
1957
+ /**
1958
+ * The maximum length of the question after tokenization. It will be truncated if needed.
1959
+ */
1960
+ max_question_len?: number;
1961
+ /**
1962
+ * The maximum length of the total sentence (context + question) in tokens of each chunk
1963
+ * passed to the model. The context will be split in several chunks (using docStride as
1964
+ * overlap) if needed.
1965
+ */
1966
+ max_seq_len?: number;
1967
+ /**
1968
+ * The number of answers to return (will be chosen by order of likelihood). Note that we
1969
+ * return less than topk answers if there are not enough options available within the
1970
+ * context.
1971
+ */
1972
+ top_k?: number;
1973
+ [property: string]: unknown;
1974
+ }
1975
+ type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
1976
+ /**
1977
+ * Outputs of inference for the Question Answering task
1978
+ */
1979
+ interface QuestionAnsweringOutputElement {
1980
+ /**
1981
+ * The answer to the question.
1982
+ */
1983
+ answer: string;
1984
+ /**
1985
+ * The character position in the input where the answer ends.
1986
+ */
1987
+ end: number;
1988
+ /**
1989
+ * The probability associated to the answer.
1990
+ */
1991
+ score: number;
1992
+ /**
1993
+ * The character position in the input where the answer begins.
1994
+ */
1995
+ start: number;
1996
+ [property: string]: unknown;
1997
+ }
1998
+
1999
+ /**
2000
+ * Inference code generated from the JSON schema spec in ./spec
2001
+ *
2002
+ * Using src/scripts/inference-codegen
2003
+ */
2004
+ type SentenceSimilarityOutput = number[];
2005
+ /**
2006
+ * Inputs for Sentence similarity inference
2007
+ */
2008
+ interface SentenceSimilarityInput {
2009
+ inputs: SentenceSimilarityInputData;
2010
+ /**
2011
+ * Additional inference parameters
2012
+ */
2013
+ parameters?: {
2014
+ [key: string]: unknown;
2015
+ };
2016
+ [property: string]: unknown;
2017
+ }
2018
+ interface SentenceSimilarityInputData {
2019
+ /**
2020
+ * A list of strings which will be compared against the source_sentence.
2021
+ */
2022
+ sentences: string[];
2023
+ /**
2024
+ * The string that you wish to compare the other strings with. This can be a phrase,
2025
+ * sentence, or longer passage, depending on the model being used.
2026
+ */
2027
+ sourceSentence: string;
2028
+ [property: string]: unknown;
2029
+ }
2030
+
2031
+ /**
2032
+ * Inference code generated from the JSON schema spec in ./spec
2033
+ *
2034
+ * Using src/scripts/inference-codegen
2035
+ */
2036
+ /**
2037
+ * Inputs for Summarization inference
2038
+ *
2039
+ * Inputs for Text2text Generation inference
2040
+ */
2041
+ interface SummarizationInput {
2042
+ /**
2043
+ * The input text data
2044
+ */
2045
+ inputs: string;
2046
+ /**
2047
+ * Additional inference parameters
2048
+ */
2049
+ parameters?: Text2TextGenerationParameters$1;
2050
+ [property: string]: unknown;
2051
+ }
2052
+ /**
2053
+ * Additional inference parameters
2054
+ *
2055
+ * Additional inference parameters for Text2text Generation
2056
+ */
2057
+ interface Text2TextGenerationParameters$1 {
2058
+ /**
2059
+ * Whether to clean up the potential extra spaces in the text output.
2060
+ */
2061
+ clean_up_tokenization_spaces?: boolean;
2062
+ /**
2063
+ * Additional parametrization of the text generation algorithm
2064
+ */
2065
+ generate_parameters?: {
2066
+ [key: string]: unknown;
2067
+ };
2068
+ /**
2069
+ * The truncation strategy to use
2070
+ */
2071
+ truncation?: Text2TextGenerationTruncationStrategy$1;
2072
+ [property: string]: unknown;
2073
+ }
2074
+ type Text2TextGenerationTruncationStrategy$1 = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
2075
+ /**
2076
+ * Outputs of inference for the Summarization task
2077
+ */
2078
+ interface SummarizationOutput {
2079
+ /**
2080
+ * The summarized text.
2081
+ */
2082
+ summary_text: string;
2083
+ [property: string]: unknown;
2084
+ }
2085
+
2086
+ /**
2087
+ * Inference code generated from the JSON schema spec in ./spec
2088
+ *
2089
+ * Using src/scripts/inference-codegen
2090
+ */
2091
+ /**
2092
+ * Inputs for Table Question Answering inference
2093
+ */
2094
+ interface TableQuestionAnsweringInput {
2095
+ /**
2096
+ * One (table, question) pair to answer
2097
+ */
2098
+ inputs: TableQuestionAnsweringInputData;
2099
+ /**
2100
+ * Additional inference parameters
2101
+ */
2102
+ parameters?: {
2103
+ [key: string]: unknown;
2104
+ };
2105
+ [property: string]: unknown;
2106
+ }
2107
+ /**
2108
+ * One (table, question) pair to answer
2109
+ */
2110
+ interface TableQuestionAnsweringInputData {
2111
+ /**
2112
+ * The question to be answered about the table
2113
+ */
2114
+ question: string;
2115
+ /**
2116
+ * The table to serve as context for the questions
2117
+ */
2118
+ table: {
2119
+ [key: string]: string[];
2120
+ };
2121
+ [property: string]: unknown;
2122
+ }
2123
+ type TableQuestionAnsweringOutput = TableQuestionAnsweringOutputElement[];
2124
+ /**
2125
+ * Outputs of inference for the Table Question Answering task
2126
+ */
2127
+ interface TableQuestionAnsweringOutputElement {
2128
+ /**
2129
+ * If the model has an aggregator, this returns the aggregator.
2130
+ */
2131
+ aggregator?: string;
2132
+ /**
2133
+ * The answer of the question given the table. If there is an aggregator, the answer will be
2134
+ * preceded by `AGGREGATOR >`.
2135
+ */
2136
+ answer: string;
2137
+ /**
2138
+ * List of strings made up of the answer cell values.
2139
+ */
2140
+ cells: string[];
2141
+ /**
2142
+ * Coordinates of the cells of the answers.
2143
+ */
2144
+ coordinates: Array<number[]>;
2145
+ [property: string]: unknown;
2146
+ }
2147
+
2148
+ /**
2149
+ * Inference code generated from the JSON schema spec in ./spec
2150
+ *
2151
+ * Using src/scripts/inference-codegen
2152
+ */
2153
+ /**
2154
+ * Inputs for Text To Image inference
2155
+ */
2156
+ interface TextToImageInput {
2157
+ /**
2158
+ * The input text data (sometimes called "prompt"
2159
+ */
2160
+ inputs: string;
2161
+ /**
2162
+ * Additional inference parameters
2163
+ */
2164
+ parameters?: TextToImageParameters;
2165
+ [property: string]: unknown;
2166
+ }
2167
+ /**
2168
+ * Additional inference parameters
2169
+ *
2170
+ * Additional inference parameters for Text To Image
2171
+ */
2172
+ interface TextToImageParameters {
2173
+ /**
2174
+ * For diffusion models. A higher guidance scale value encourages the model to generate
2175
+ * images closely linked to the text prompt at the expense of lower image quality.
2176
+ */
2177
+ guidance_scale?: number;
2178
+ /**
2179
+ * One or several prompt to guide what NOT to include in image generation.
2180
+ */
2181
+ negative_prompt?: string[];
2182
+ /**
2183
+ * For diffusion models. The number of denoising steps. More denoising steps usually lead to
2184
+ * a higher quality image at the expense of slower inference.
2185
+ */
2186
+ num_inference_steps?: number;
2187
+ /**
2188
+ * For diffusion models. Override the scheduler with a compatible one
2189
+ */
2190
+ scheduler?: string;
2191
+ /**
2192
+ * The size in pixel of the output image
2193
+ */
2194
+ target_size?: TargetSize;
2195
+ [property: string]: unknown;
2196
+ }
2197
+ /**
2198
+ * The size in pixel of the output image
2199
+ */
2200
+ interface TargetSize {
2201
+ height: number;
2202
+ width: number;
2203
+ [property: string]: unknown;
2204
+ }
2205
+ /**
2206
+ * Outputs of inference for the Text To Image task
2207
+ */
2208
+ interface TextToImageOutput {
2209
+ /**
2210
+ * The generated image
2211
+ */
2212
+ image: unknown;
2213
+ [property: string]: unknown;
2214
+ }
2215
+
2216
+ /**
2217
+ * Inference code generated from the JSON schema spec in ./spec
2218
+ *
2219
+ * Using src/scripts/inference-codegen
2220
+ */
2221
+ /**
2222
+ * Inputs for Text to Speech inference
2223
+ *
2224
+ * Inputs for Text To Audio inference
2225
+ */
2226
+ interface TextToSpeechInput {
2227
+ /**
2228
+ * The input text data
2229
+ */
2230
+ inputs: string;
2231
+ /**
2232
+ * Additional inference parameters
2233
+ */
2234
+ parameters?: TextToAudioParameters;
2235
+ [property: string]: unknown;
2236
+ }
2237
+ /**
2238
+ * Additional inference parameters
2239
+ *
2240
+ * Additional inference parameters for Text To Audio
2241
+ */
2242
+ interface TextToAudioParameters {
2243
+ /**
2244
+ * Parametrization of the text generation process
2245
+ */
2246
+ generate?: GenerationParameters;
2247
+ [property: string]: unknown;
2248
+ }
2249
+ /**
2250
+ * Parametrization of the text generation process
2251
+ *
2252
+ * Ad-hoc parametrization of the text generation process
2253
+ */
2254
+ interface GenerationParameters {
2255
+ /**
2256
+ * Whether to use sampling instead of greedy decoding when generating new tokens.
2257
+ */
2258
+ do_sample?: boolean;
2259
+ /**
2260
+ * Controls the stopping condition for beam-based methods.
2261
+ */
2262
+ early_stopping?: EarlyStoppingUnion;
2263
+ /**
2264
+ * If set to float strictly between 0 and 1, only tokens with a conditional probability
2265
+ * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
2266
+ * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
2267
+ * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
2268
+ */
2269
+ epsilon_cutoff?: number;
2270
+ /**
2271
+ * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
2272
+ * float strictly between 0 and 1, a token is only considered if it is greater than either
2273
+ * eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
2274
+ * term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
2275
+ * the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
2276
+ * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
2277
+ * for more details.
2278
+ */
2279
+ eta_cutoff?: number;
2280
+ /**
2281
+ * The maximum length (in tokens) of the generated text, including the input.
2282
+ */
2283
+ max_length?: number;
2284
+ /**
2285
+ * The maximum number of tokens to generate. Takes precedence over maxLength.
2286
+ */
2287
+ max_new_tokens?: number;
2288
+ /**
2289
+ * The minimum length (in tokens) of the generated text, including the input.
2290
+ */
2291
+ min_length?: number;
2292
+ /**
2293
+ * The minimum number of tokens to generate. Takes precedence over maxLength.
2294
+ */
2295
+ min_new_tokens?: number;
2296
+ /**
2297
+ * Number of groups to divide num_beams into in order to ensure diversity among different
2298
+ * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
2299
+ */
2300
+ num_beam_groups?: number;
2301
+ /**
2302
+ * Number of beams to use for beam search.
2303
+ */
2304
+ num_beams?: number;
2305
+ /**
2306
+ * The value balances the model confidence and the degeneration penalty in contrastive
2307
+ * search decoding.
2308
+ */
2309
+ penalty_alpha?: number;
2310
+ /**
2311
+ * The value used to modulate the next token probabilities.
2312
+ */
2313
+ temperature?: number;
2314
+ /**
2315
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
2316
+ */
2317
+ top_k?: number;
2318
+ /**
2319
+ * If set to float < 1, only the smallest set of most probable tokens with probabilities
2320
+ * that add up to top_p or higher are kept for generation.
2321
+ */
2322
+ top_p?: number;
2323
+ /**
2324
+ * Local typicality measures how similar the conditional probability of predicting a target
2325
+ * token next is to the expected conditional probability of predicting a random token next,
2326
+ * given the partial text already generated. If set to float < 1, the smallest set of the
2327
+ * most locally typical tokens with probabilities that add up to typical_p or higher are
2328
+ * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
2329
+ */
2330
+ typical_p?: number;
2331
+ /**
2332
+ * Whether the model should use the past last key/values attentions to speed up decoding
2333
+ */
2334
+ use_cache?: boolean;
2335
+ [property: string]: unknown;
2336
+ }
2337
+ /**
2338
+ * Controls the stopping condition for beam-based methods.
2339
+ */
2340
+ type EarlyStoppingUnion = boolean | "never";
2341
+ /**
2342
+ * Outputs for Text to Speech inference
2343
+ *
2344
+ * Outputs of inference for the Text To Audio task
2345
+ */
2346
+ interface TextToSpeechOutput {
2347
+ /**
2348
+ * The generated audio waveform.
2349
+ */
2350
+ audio: unknown;
2351
+ samplingRate: unknown;
2352
+ /**
2353
+ * The sampling rate of the generated audio waveform.
2354
+ */
2355
+ sampling_rate?: number;
2356
+ [property: string]: unknown;
2357
+ }
2358
+
2359
+ /**
2360
+ * Inference code generated from the JSON schema spec in ./spec
2361
+ *
2362
+ * Using src/scripts/inference-codegen
2363
+ */
2364
+ /**
2365
+ * Inputs for Token Classification inference
2366
+ */
2367
+ interface TokenClassificationInput {
2368
+ /**
2369
+ * The input text data
2370
+ */
2371
+ inputs: string;
2372
+ /**
2373
+ * Additional inference parameters
2374
+ */
2375
+ parameters?: TokenClassificationParameters;
2376
+ [property: string]: unknown;
2377
+ }
2378
+ /**
2379
+ * Additional inference parameters
2380
+ *
2381
+ * Additional inference parameters for Token Classification
2382
+ */
2383
+ interface TokenClassificationParameters {
2384
+ /**
2385
+ * The strategy used to fuse tokens based on model predictions
2386
+ */
2387
+ aggregation_strategy?: TokenClassificationAggregationStrategy;
2388
+ /**
2389
+ * A list of labels to ignore
2390
+ */
2391
+ ignore_labels?: string[];
2392
+ /**
2393
+ * The number of overlapping tokens between chunks when splitting the input text.
2394
+ */
2395
+ stride?: number;
2396
+ [property: string]: unknown;
2397
+ }
2398
+ /**
2399
+ * Do not aggregate tokens
2400
+ *
2401
+ * Group consecutive tokens with the same label in a single entity.
2402
+ *
2403
+ * Similar to "simple", also preserves word integrity (use the label predicted for the first
2404
+ * token in a word).
2405
+ *
2406
+ * Similar to "simple", also preserves word integrity (uses the label with the highest
2407
+ * score, averaged across the word's tokens).
2408
+ *
2409
+ * Similar to "simple", also preserves word integrity (uses the label with the highest score
2410
+ * across the word's tokens).
2411
+ */
2412
+ type TokenClassificationAggregationStrategy = "none" | "simple" | "first" | "average" | "max";
2413
+ type TokenClassificationOutput = TokenClassificationOutputElement[];
2414
+ /**
2415
+ * Outputs of inference for the Token Classification task
2416
+ */
2417
+ interface TokenClassificationOutputElement {
2418
+ /**
2419
+ * The character position in the input where this group ends.
2420
+ */
2421
+ end?: number;
2422
+ /**
2423
+ * The predicted label for that group of tokens
2424
+ */
2425
+ entity_group?: string;
2426
+ label: unknown;
2427
+ /**
2428
+ * The associated score / probability
2429
+ */
2430
+ score: number;
2431
+ /**
2432
+ * The character position in the input where this group begins.
2433
+ */
2434
+ start?: number;
2435
+ /**
2436
+ * The corresponding text
2437
+ */
2438
+ word?: string;
2439
+ [property: string]: unknown;
2440
+ }
2441
+
2442
+ /**
2443
+ * Inference code generated from the JSON schema spec in ./spec
2444
+ *
2445
+ * Using src/scripts/inference-codegen
2446
+ */
2447
+ /**
2448
+ * Inputs for Translation inference
2449
+ *
2450
+ * Inputs for Text2text Generation inference
2451
+ */
2452
+ interface TranslationInput {
2453
+ /**
2454
+ * The input text data
2455
+ */
2456
+ inputs: string;
2457
+ /**
2458
+ * Additional inference parameters
2459
+ */
2460
+ parameters?: Text2TextGenerationParameters;
2461
+ [property: string]: unknown;
2462
+ }
2463
+ /**
2464
+ * Additional inference parameters
2465
+ *
2466
+ * Additional inference parameters for Text2text Generation
2467
+ */
2468
+ interface Text2TextGenerationParameters {
2469
+ /**
2470
+ * Whether to clean up the potential extra spaces in the text output.
2471
+ */
2472
+ clean_up_tokenization_spaces?: boolean;
2473
+ /**
2474
+ * Additional parametrization of the text generation algorithm
2475
+ */
2476
+ generate_parameters?: {
2477
+ [key: string]: unknown;
2478
+ };
2479
+ /**
2480
+ * The truncation strategy to use
2481
+ */
2482
+ truncation?: Text2TextGenerationTruncationStrategy;
2483
+ [property: string]: unknown;
2484
+ }
2485
+ type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
2486
+ /**
2487
+ * Outputs of inference for the Translation task
2488
+ */
2489
+ interface TranslationOutput {
2490
+ /**
2491
+ * The translated text.
2492
+ */
2493
+ translation_text: string;
2494
+ [property: string]: unknown;
2495
+ }
2496
+
2497
+ /**
2498
+ * Inference code generated from the JSON schema spec in ./spec
2499
+ *
2500
+ * Using src/scripts/inference-codegen
2501
+ */
2502
+ /**
2503
+ * Inputs for Text Classification inference
2504
+ */
2505
+ interface TextClassificationInput {
2506
+ /**
2507
+ * The text to classify
2508
+ */
2509
+ inputs: string;
2510
+ /**
2511
+ * Additional inference parameters
2512
+ */
2513
+ parameters?: TextClassificationParameters;
2514
+ [property: string]: unknown;
2515
+ }
2516
+ /**
2517
+ * Additional inference parameters
2518
+ *
2519
+ * Additional inference parameters for Text Classification
2520
+ */
2521
+ interface TextClassificationParameters {
2522
+ function_to_apply?: ClassificationOutputTransform$1;
2523
+ /**
2524
+ * When specified, limits the output to the top K most probable classes.
2525
+ */
2526
+ top_k?: number;
2527
+ [property: string]: unknown;
2528
+ }
2529
+ /**
2530
+ * The function to apply to the model outputs in order to retrieve the scores.
2531
+ */
2532
+ type ClassificationOutputTransform$1 = "sigmoid" | "softmax" | "none";
2533
+ type TextClassificationOutput = TextClassificationOutputElement[];
2534
+ /**
2535
+ * Outputs of inference for the Text Classification task
2536
+ */
2537
+ interface TextClassificationOutputElement {
2538
+ /**
2539
+ * The predicted class label.
2540
+ */
2541
+ label: string;
2542
+ /**
2543
+ * The corresponding probability.
2544
+ */
2545
+ score: number;
2546
+ [property: string]: unknown;
2547
+ }
2548
+
2549
+ /**
2550
+ * Inference code generated from the JSON schema spec in ./spec
2551
+ *
2552
+ * Using src/scripts/inference-codegen
2553
+ */
2554
+ /**
2555
+ * Inputs for Text Generation inference
2556
+ */
2557
+ interface TextGenerationInput {
2558
+ /**
2559
+ * The text to initialize generation with
2560
+ */
2561
+ inputs: string;
2562
+ /**
2563
+ * Additional inference parameters
2564
+ */
2565
+ parameters?: TextGenerationParameters;
2566
+ [property: string]: unknown;
2567
+ }
2568
+ /**
2569
+ * Additional inference parameters
2570
+ *
2571
+ * Additional inference parameters for Text Generation
2572
+ */
2573
+ interface TextGenerationParameters {
2574
+ /**
2575
+ * The number of sampling queries to run. Only the best one (in terms of total logprob) will
2576
+ * be returned.
2577
+ */
2578
+ best_of?: number;
2579
+ /**
2580
+ * Whether or not to output decoder input details
2581
+ */
2582
+ decoder_input_details?: boolean;
2583
+ /**
2584
+ * Whether or not to output details
2585
+ */
2586
+ details?: boolean;
2587
+ /**
2588
+ * Whether to use logits sampling instead of greedy decoding when generating new tokens.
2589
+ */
2590
+ do_sample?: boolean;
2591
+ /**
2592
+ * The maximum number of tokens to generate.
2593
+ */
2594
+ max_new_tokens?: number;
2595
+ /**
2596
+ * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
2597
+ * paper](https://hf.co/papers/1909.05858) for more details.
2598
+ */
2599
+ repetition_penalty?: number;
2600
+ /**
2601
+ * Whether to prepend the prompt to the generated text.
2602
+ */
2603
+ return_full_text?: boolean;
2604
+ /**
2605
+ * The random sampling seed.
2606
+ */
2607
+ seed?: number;
2608
+ /**
2609
+ * Stop generating tokens if a member of `stop_sequences` is generated.
2610
+ */
2611
+ stop_sequences?: string[];
2612
+ /**
2613
+ * The value used to modulate the logits distribution.
2614
+ */
2615
+ temperature?: number;
2616
+ /**
2617
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
2618
+ */
2619
+ top_k?: number;
2620
+ /**
2621
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
2622
+ * up to `top_p` or higher are kept for generation.
2623
+ */
2624
+ top_p?: number;
2625
+ /**
2626
+ * Truncate input tokens to the given size.
2627
+ */
2628
+ truncate?: number;
2629
+ /**
2630
+ * Typical Decoding mass. See [Typical Decoding for Natural Language
2631
+ * Generation](https://hf.co/papers/2202.00666) for more information
2632
+ */
2633
+ typical_p?: number;
2634
+ /**
2635
+ * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
2636
+ */
2637
+ watermark?: boolean;
2638
+ [property: string]: unknown;
2639
+ }
2640
+ /**
2641
+ * Outputs for Text Generation inference
2642
+ */
2643
+ interface TextGenerationOutput {
2644
+ /**
2645
+ * When enabled, details about the generation
2646
+ */
2647
+ details?: TextGenerationOutputDetails;
2648
+ /**
2649
+ * The generated text
2650
+ */
2651
+ generated_text: string;
2652
+ [property: string]: unknown;
2653
+ }
2654
+ /**
2655
+ * When enabled, details about the generation
2656
+ */
2657
+ interface TextGenerationOutputDetails {
2658
+ /**
2659
+ * Details about additional sequences when best_of is provided
2660
+ */
2661
+ best_of_sequences?: TextGenerationSequenceDetails[];
2662
+ /**
2663
+ * The reason why the generation was stopped.
2664
+ */
2665
+ finish_reason: FinishReason;
2666
+ /**
2667
+ * The number of generated tokens
2668
+ */
2669
+ generated_tokens: number;
2670
+ prefill: PrefillToken[];
2671
+ /**
2672
+ * The random seed used for generation
2673
+ */
2674
+ seed?: number;
2675
+ /**
2676
+ * The generated tokens and associated details
2677
+ */
2678
+ tokens: Token[];
2679
+ [property: string]: unknown;
2680
+ }
2681
+ interface TextGenerationSequenceDetails {
2682
+ /**
2683
+ * The reason why the generation was stopped.
2684
+ */
2685
+ finish_reason: FinishReason;
2686
+ /**
2687
+ * The generated text
2688
+ */
2689
+ generated_text: number;
2690
+ /**
2691
+ * The number of generated tokens
2692
+ */
2693
+ generated_tokens: number;
2694
+ prefill: PrefillToken[];
2695
+ /**
2696
+ * The random seed used for generation
2697
+ */
2698
+ seed?: number;
2699
+ /**
2700
+ * The generated tokens and associated details
2701
+ */
2702
+ tokens: Token[];
2703
+ [property: string]: unknown;
2704
+ }
2705
+ /**
2706
+ * The generated sequence reached the maximum allowed length
2707
+ *
2708
+ * The model generated an end-of-sentence (EOS) token
2709
+ *
2710
+ * One of the sequence in stop_sequences was generated
2711
+ */
2712
+ type FinishReason = "length" | "eos_token" | "stop_sequence";
2713
+ interface PrefillToken {
2714
+ id: number;
2715
+ logprob: number;
2716
+ /**
2717
+ * The text associated with that token
2718
+ */
2719
+ text: string;
2720
+ [property: string]: unknown;
2721
+ }
2722
+ interface Token {
2723
+ id: number;
2724
+ logprob: number;
2725
+ /**
2726
+ * Whether or not that token is a special one
2727
+ */
2728
+ special: boolean;
2729
+ /**
2730
+ * The text associated with that token
2731
+ */
2732
+ text: string;
2733
+ [property: string]: unknown;
2734
+ }
2735
+
2736
+ /**
2737
+ * Inference code generated from the JSON schema spec in ./spec
2738
+ *
2739
+ * Using src/scripts/inference-codegen
2740
+ */
2741
+ /**
2742
+ * Inputs for Video Classification inference
2743
+ */
2744
+ interface VideoClassificationInput {
2745
+ /**
2746
+ * The input video data
2747
+ */
2748
+ inputs: unknown;
2749
+ /**
2750
+ * Additional inference parameters
2751
+ */
2752
+ parameters?: VideoClassificationParameters;
2753
+ [property: string]: unknown;
2754
+ }
2755
+ /**
2756
+ * Additional inference parameters
2757
+ *
2758
+ * Additional inference parameters for Video Classification
2759
+ */
2760
+ interface VideoClassificationParameters {
2761
+ /**
2762
+ * The sampling rate used to select frames from the video.
2763
+ */
2764
+ frame_sampling_rate?: number;
2765
+ function_to_apply?: ClassificationOutputTransform;
2766
+ /**
2767
+ * The number of sampled frames to consider for classification.
2768
+ */
2769
+ num_frames?: number;
2770
+ /**
2771
+ * When specified, limits the output to the top K most probable classes.
2772
+ */
2773
+ top_k?: number;
2774
+ [property: string]: unknown;
2775
+ }
2776
+ /**
2777
+ * The function to apply to the model outputs in order to retrieve the scores.
2778
+ */
2779
+ type ClassificationOutputTransform = "sigmoid" | "softmax" | "none";
2780
+ type VideoClassificationOutput = VideoClassificationOutputElement[];
2781
+ /**
2782
+ * Outputs of inference for the Video Classification task
2783
+ */
2784
+ interface VideoClassificationOutputElement {
2785
+ /**
2786
+ * The predicted class label.
2787
+ */
2788
+ label: string;
2789
+ /**
2790
+ * The corresponding probability.
2791
+ */
2792
+ score: number;
2793
+ [property: string]: unknown;
2794
+ }
2795
+
2796
+ /**
2797
+ * Inference code generated from the JSON schema spec in ./spec
2798
+ *
2799
+ * Using src/scripts/inference-codegen
2800
+ */
2801
+ /**
2802
+ * Inputs for Visual Question Answering inference
2803
+ */
2804
+ interface VisualQuestionAnsweringInput {
2805
+ /**
2806
+ * One (image, question) pair to answer
2807
+ */
2808
+ inputs: VisualQuestionAnsweringInputData;
2809
+ /**
2810
+ * Additional inference parameters
2811
+ */
2812
+ parameters?: VisualQuestionAnsweringParameters;
2813
+ [property: string]: unknown;
2814
+ }
2815
+ /**
2816
+ * One (image, question) pair to answer
2817
+ */
2818
+ interface VisualQuestionAnsweringInputData {
2819
+ /**
2820
+ * The image.
2821
+ */
2822
+ image: unknown;
2823
+ /**
2824
+ * The question to answer based on the image.
2825
+ */
2826
+ question: unknown;
2827
+ [property: string]: unknown;
2828
+ }
2829
+ /**
2830
+ * Additional inference parameters
2831
+ *
2832
+ * Additional inference parameters for Visual Question Answering
2833
+ */
2834
+ interface VisualQuestionAnsweringParameters {
2835
+ /**
2836
+ * The number of answers to return (will be chosen by order of likelihood). Note that we
2837
+ * return less than topk answers if there are not enough options available within the
2838
+ * context.
2839
+ */
2840
+ top_k?: number;
2841
+ [property: string]: unknown;
2842
+ }
2843
+ type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
2844
+ /**
2845
+ * Outputs of inference for the Visual Question Answering task
2846
+ */
2847
+ interface VisualQuestionAnsweringOutputElement {
2848
+ /**
2849
+ * The answer to the question
2850
+ */
2851
+ answer?: string;
2852
+ label: unknown;
2853
+ /**
2854
+ * The associated score / probability
2855
+ */
2856
+ score: number;
2857
+ [property: string]: unknown;
2858
+ }
2859
+
2860
+ /**
2861
+ * Inference code generated from the JSON schema spec in ./spec
2862
+ *
2863
+ * Using src/scripts/inference-codegen
2864
+ */
2865
+ /**
2866
+ * Inputs for Zero Shot Classification inference
2867
+ */
2868
+ interface ZeroShotClassificationInput {
2869
+ /**
2870
+ * The input text data, with candidate labels
2871
+ */
2872
+ inputs: ZeroShotClassificationInputData;
2873
+ /**
2874
+ * Additional inference parameters
2875
+ */
2876
+ parameters?: ZeroShotClassificationParameters;
2877
+ [property: string]: unknown;
2878
+ }
2879
+ /**
2880
+ * The input text data, with candidate labels
2881
+ */
2882
+ interface ZeroShotClassificationInputData {
2883
+ /**
2884
+ * The set of possible class labels to classify the text into.
2885
+ */
2886
+ candidateLabels: string[];
2887
+ /**
2888
+ * The text to classify
2889
+ */
2890
+ text: string;
2891
+ [property: string]: unknown;
2892
+ }
2893
+ /**
2894
+ * Additional inference parameters
2895
+ *
2896
+ * Additional inference parameters for Zero Shot Classification
2897
+ */
2898
+ interface ZeroShotClassificationParameters {
2899
+ /**
2900
+ * The sentence used in conjunction with candidateLabels to attempt the text classification
2901
+ * by replacing the placeholder with the candidate labels.
2902
+ */
2903
+ hypothesis_template?: string;
2904
+ /**
2905
+ * Whether multiple candidate labels can be true. If false, the scores are normalized such
2906
+ * that the sum of the label likelihoods for each sequence is 1. If true, the labels are
2907
+ * considered independent and probabilities are normalized for each candidate.
2908
+ */
2909
+ multi_label?: boolean;
2910
+ [property: string]: unknown;
2911
+ }
2912
+ type ZeroShotClassificationOutput = ZeroShotClassificationOutputElement[];
2913
+ /**
2914
+ * Outputs of inference for the Zero Shot Classification task
2915
+ */
2916
+ interface ZeroShotClassificationOutputElement {
2917
+ /**
2918
+ * The predicted class label.
2919
+ */
2920
+ label: string;
2921
+ /**
2922
+ * The corresponding probability.
2923
+ */
2924
+ score: number;
2925
+ [property: string]: unknown;
2926
+ }
2927
+
2928
+ /**
2929
+ * Inference code generated from the JSON schema spec in ./spec
2930
+ *
2931
+ * Using src/scripts/inference-codegen
2932
+ */
2933
+ /**
2934
+ * Inputs for Zero Shot Image Classification inference
2935
+ */
2936
+ interface ZeroShotImageClassificationInput {
2937
+ /**
2938
+ * The input image data, with candidate labels
2939
+ */
2940
+ inputs: ZeroShotImageClassificationInputData;
2941
+ /**
2942
+ * Additional inference parameters
2943
+ */
2944
+ parameters?: ZeroShotImageClassificationParameters;
2945
+ [property: string]: unknown;
2946
+ }
2947
+ /**
2948
+ * The input image data, with candidate labels
2949
+ */
2950
+ interface ZeroShotImageClassificationInputData {
2951
+ /**
2952
+ * The candidate labels for this image
2953
+ */
2954
+ candidateLabels: string[];
2955
+ /**
2956
+ * The image data to classify
2957
+ */
2958
+ image: unknown;
2959
+ [property: string]: unknown;
2960
+ }
2961
+ /**
2962
+ * Additional inference parameters
2963
+ *
2964
+ * Additional inference parameters for Zero Shot Image Classification
2965
+ */
2966
+ interface ZeroShotImageClassificationParameters {
2967
+ /**
2968
+ * The sentence used in conjunction with candidateLabels to attempt the text classification
2969
+ * by replacing the placeholder with the candidate labels.
2970
+ */
2971
+ hypothesis_template?: string;
2972
+ [property: string]: unknown;
2973
+ }
2974
+ type ZeroShotImageClassificationOutput = ZeroShotImageClassificationOutputElement[];
2975
+ /**
2976
+ * Outputs of inference for the Zero Shot Image Classification task
2977
+ */
2978
+ interface ZeroShotImageClassificationOutputElement {
2979
+ /**
2980
+ * The predicted class label.
2981
+ */
2982
+ label: string;
2983
+ /**
2984
+ * The corresponding probability.
2985
+ */
2986
+ score: number;
2987
+ [property: string]: unknown;
2988
+ }
2989
+
2990
+ /**
2991
+ * Inference code generated from the JSON schema spec in ./spec
2992
+ *
2993
+ * Using src/scripts/inference-codegen
2994
+ */
2995
+ /**
2996
+ * Inputs for Zero Shot Object Detection inference
2997
+ */
2998
+ interface ZeroShotObjectDetectionInput {
2999
+ /**
3000
+ * The input image data, with candidate labels
3001
+ */
3002
+ inputs: ZeroShotObjectDetectionInputData;
3003
+ /**
3004
+ * Additional inference parameters
3005
+ */
3006
+ parameters?: {
3007
+ [key: string]: unknown;
3008
+ };
3009
+ [property: string]: unknown;
3010
+ }
3011
+ /**
3012
+ * The input image data, with candidate labels
3013
+ */
3014
+ interface ZeroShotObjectDetectionInputData {
3015
+ /**
3016
+ * The candidate labels for this image
3017
+ */
3018
+ candidateLabels: string[];
3019
+ /**
3020
+ * The image data to generate bounding boxes from
3021
+ */
3022
+ image: unknown;
3023
+ [property: string]: unknown;
3024
+ }
3025
+ /**
3026
+ * The predicted bounding box. Coordinates are relative to the top left corner of the input
3027
+ * image.
3028
+ */
3029
+ interface BoundingBox {
3030
+ xmax: number;
3031
+ xmin: number;
3032
+ ymax: number;
3033
+ ymin: number;
3034
+ [property: string]: unknown;
3035
+ }
3036
+ type ZeroShotObjectDetectionOutput = ZeroShotObjectDetectionOutputElement[];
3037
+ /**
3038
+ * Outputs of inference for the Zero Shot Object Detection task
3039
+ */
3040
+ interface ZeroShotObjectDetectionOutputElement {
3041
+ /**
3042
+ * The predicted bounding box. Coordinates are relative to the top left corner of the input
3043
+ * image.
3044
+ */
3045
+ box: BoundingBox;
3046
+ /**
3047
+ * A candidate label
3048
+ */
3049
+ label: string;
3050
+ /**
3051
+ * The associated score / probability
3052
+ */
3053
+ score: number;
3054
+ [property: string]: unknown;
3055
+ }
3056
+
1069
3057
  /**
1070
3058
  * Model libraries compatible with each ML task
1071
3059
  */
@@ -1231,4 +3219,4 @@ declare namespace index {
1231
3219
  };
1232
3220
  }
1233
3221
 
1234
- export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, ExampleRepo, InferenceDisplayability, LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS, LibraryUiElement, MAPPING_DEFAULT_WIDGET, MODALITIES, MODALITY_LABELS, MODEL_LIBRARIES_UI_ELEMENTS, Modality, ModelData, ModelLibraryKey, PIPELINE_DATA, PIPELINE_TYPES, PIPELINE_TYPES_SET, PipelineData, PipelineType, SPECIAL_TOKENS_ATTRIBUTES, SUBTASK_TYPES, SpecialTokensMap, TASKS_DATA, TASKS_MODEL_LIBRARIES, TaskData, TaskDataCustom, TaskDemo, TaskDemoEntry, TokenizerConfig, TransformersInfo, WidgetExample, WidgetExampleAssetAndPromptInput, WidgetExampleAssetAndTextInput, WidgetExampleAssetAndZeroShotInput, WidgetExampleAssetInput, WidgetExampleAttribute, WidgetExampleOutput, WidgetExampleOutputAnswerScore, WidgetExampleOutputLabels, WidgetExampleOutputText, WidgetExampleOutputUrl, WidgetExampleSentenceSimilarityInput, WidgetExampleStructuredDataInput, WidgetExampleTableDataInput, WidgetExampleTextAndContextInput, WidgetExampleTextAndTableInput, WidgetExampleTextInput, WidgetExampleZeroShotTextInput, WidgetType, index as snippets };
3222
+ export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, AudioClassificationInput, AudioClassificationOutput, AudioClassificationOutputElement, AudioClassificationParameters, AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput, AutomaticSpeechRecognitionOutputChunk, AutomaticSpeechRecognitionParameters, BoundingBox, ClassificationOutputTransform$1 as ClassificationOutputTransform, DepthEstimationInput, DepthEstimationOutput, DocumentQuestionAnsweringInput, DocumentQuestionAnsweringInputData, DocumentQuestionAnsweringOutput, DocumentQuestionAnsweringOutputElement, DocumentQuestionAnsweringParameters, EarlyStoppingUnion$2 as EarlyStoppingUnion, ExampleRepo, FeatureExtractionInput, FeatureExtractionOutput, FillMaskInput, FillMaskOutput, FillMaskOutputElement, FillMaskParameters, FinishReason, GenerationParameters$2 as GenerationParameters, ImageClassificationInput, ImageClassificationOutput, ImageClassificationOutputElement, ImageClassificationParameters, ImageSegmentationInput, ImageSegmentationOutput, ImageSegmentationOutputElement, ImageSegmentationParameters, ImageSegmentationSubtask, ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToTextInput, ImageToTextOutput, ImageToTextParameters, InferenceDisplayability, LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS, LibraryUiElement, MAPPING_DEFAULT_WIDGET, MODALITIES, MODALITY_LABELS, MODEL_LIBRARIES_UI_ELEMENTS, Modality, ModelData, ModelLibraryKey, ObjectDetectionInput, ObjectDetectionOutput, ObjectDetectionOutputElement, ObjectDetectionParameters, PIPELINE_DATA, PIPELINE_TYPES, PIPELINE_TYPES_SET, PipelineData, PipelineType, PrefillToken, QuestionAnsweringInput, QuestionAnsweringInputData, QuestionAnsweringOutput, QuestionAnsweringOutputElement, QuestionAnsweringParameters, SPECIAL_TOKENS_ATTRIBUTES, SUBTASK_TYPES, SentenceSimilarityInput, SentenceSimilarityInputData, SentenceSimilarityOutput, SpecialTokensMap, SummarizationInput, SummarizationOutput, TASKS_DATA, TASKS_MODEL_LIBRARIES, TableQuestionAnsweringInput, TableQuestionAnsweringInputData, TableQuestionAnsweringOutput, TableQuestionAnsweringOutputElement, TargetSize$1 as TargetSize, TaskData, TaskDataCustom, TaskDemo, TaskDemoEntry, Text2TextGenerationParameters, Text2TextGenerationTruncationStrategy, TextClassificationInput, TextClassificationOutput, TextClassificationOutputElement, TextClassificationParameters, TextGenerationInput, TextGenerationOutput, TextGenerationOutputDetails, TextGenerationParameters, TextGenerationSequenceDetails, TextToAudioParameters, TextToImageInput, TextToImageOutput, TextToImageParameters, TextToSpeechInput, TextToSpeechOutput, Token, TokenClassificationAggregationStrategy, TokenClassificationInput, TokenClassificationOutput, TokenClassificationOutputElement, TokenClassificationParameters, TokenizerConfig, TransformersInfo, TranslationInput, TranslationOutput, VideoClassificationInput, VideoClassificationOutput, VideoClassificationOutputElement, VideoClassificationParameters, VisualQuestionAnsweringInput, VisualQuestionAnsweringInputData, VisualQuestionAnsweringOutput, VisualQuestionAnsweringOutputElement, VisualQuestionAnsweringParameters, WidgetExample, WidgetExampleAssetAndPromptInput, WidgetExampleAssetAndTextInput, WidgetExampleAssetAndZeroShotInput, WidgetExampleAssetInput, WidgetExampleAttribute, WidgetExampleOutput, WidgetExampleOutputAnswerScore, WidgetExampleOutputLabels, WidgetExampleOutputText, WidgetExampleOutputUrl, WidgetExampleSentenceSimilarityInput, WidgetExampleStructuredDataInput, WidgetExampleTableDataInput, WidgetExampleTextAndContextInput, WidgetExampleTextAndTableInput, WidgetExampleTextInput, WidgetExampleZeroShotTextInput, WidgetType, WordBox, ZeroShotClassificationInput, ZeroShotClassificationInputData, ZeroShotClassificationOutput, ZeroShotClassificationOutputElement, ZeroShotClassificationParameters, ZeroShotImageClassificationInput, ZeroShotImageClassificationInputData, ZeroShotImageClassificationOutput, ZeroShotImageClassificationOutputElement, ZeroShotImageClassificationParameters, ZeroShotObjectDetectionInput, ZeroShotObjectDetectionInputData, ZeroShotObjectDetectionOutput, ZeroShotObjectDetectionOutputElement, index as snippets };