@huggingface/tasks 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -23,7 +23,7 @@ __export(src_exports, {
23
23
  ALL_DISPLAY_MODEL_LIBRARY_KEYS: () => ALL_DISPLAY_MODEL_LIBRARY_KEYS,
24
24
  ALL_MODEL_LIBRARY_KEYS: () => ALL_MODEL_LIBRARY_KEYS,
25
25
  InferenceDisplayability: () => InferenceDisplayability,
26
- LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: () => LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
26
+ LIBRARY_TASK_MAPPING: () => LIBRARY_TASK_MAPPING,
27
27
  MAPPING_DEFAULT_WIDGET: () => MAPPING_DEFAULT_WIDGET,
28
28
  MODALITIES: () => MODALITIES,
29
29
  MODALITY_LABELS: () => MODALITY_LABELS,
@@ -40,7 +40,7 @@ __export(src_exports, {
40
40
  module.exports = __toCommonJS(src_exports);
41
41
 
42
42
  // src/library-to-tasks.ts
43
- var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
43
+ var LIBRARY_TASK_MAPPING = {
44
44
  "adapter-transformers": ["question-answering", "text-classification", "token-classification"],
45
45
  allennlp: ["question-answering"],
46
46
  asteroid: [
@@ -76,6 +76,24 @@ var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
76
76
  ],
77
77
  stanza: ["token-classification"],
78
78
  timm: ["image-classification"],
79
+ transformers: [
80
+ "audio-classification",
81
+ "automatic-speech-recognition",
82
+ "depth-estimation",
83
+ "document-question-answering",
84
+ "fill-mask",
85
+ "image-classification",
86
+ "image-segmentation",
87
+ "image-to-text",
88
+ "image-to-image",
89
+ "object-detection",
90
+ "question-answering",
91
+ "text-generation",
92
+ "text2text-generation",
93
+ "visual-question-answering",
94
+ "zero-shot-classification",
95
+ "zero-shot-image-classification"
96
+ ],
79
97
  mindspore: ["image-classification"]
80
98
  };
81
99
 
@@ -3847,7 +3865,7 @@ var data_default34 = taskData34;
3847
3865
  // src/tasks/index.ts
3848
3866
  var TASKS_MODEL_LIBRARIES = {
3849
3867
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
3850
- "audio-to-audio": ["asteroid", "speechbrain"],
3868
+ "audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
3851
3869
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
3852
3870
  "depth-estimation": ["transformers", "transformers.js"],
3853
3871
  "document-question-answering": ["transformers", "transformers.js"],
@@ -4084,7 +4102,7 @@ var flair = (model) => [
4084
4102
  tagger = SequenceTagger.load("${model.id}")`
4085
4103
  ];
4086
4104
  var gliner = (model) => [
4087
- `from model import GLiNER
4105
+ `from gliner import GLiNER
4088
4106
 
4089
4107
  model = GLiNER.from_pretrained("${model.id}")`
4090
4108
  ];
@@ -4434,6 +4452,11 @@ var mlx = (model) => [
4434
4452
  export HF_HUB_ENABLE_HF_TRANS: string[]FER=1
4435
4453
  huggingface-cli download --local-dir ${nameWithoutNamespace(model.id)} ${model.id}`
4436
4454
  ];
4455
+ var mlxim = (model) => [
4456
+ `from mlxim.model import create_model
4457
+
4458
+ model = create_model(${model.id})`
4459
+ ];
4437
4460
  var nemo = (model) => {
4438
4461
  let command = void 0;
4439
4462
  if (model.tags?.includes("automatic-speech-recognition")) {
@@ -4593,6 +4616,15 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4593
4616
  term: { path: "gliner_config.json" }
4594
4617
  }
4595
4618
  },
4619
+ grok: {
4620
+ prettyLabel: "Grok",
4621
+ repoName: "Grok",
4622
+ repoUrl: "https://github.com/xai-org/grok-1",
4623
+ filter: false,
4624
+ countDownloads: {
4625
+ terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] }
4626
+ }
4627
+ },
4596
4628
  keras: {
4597
4629
  prettyLabel: "Keras",
4598
4630
  repoName: "Keras",
@@ -4628,6 +4660,15 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4628
4660
  snippets: mlx,
4629
4661
  filter: true
4630
4662
  },
4663
+ "mlx-image": {
4664
+ prettyLabel: "mlx-image",
4665
+ repoName: "mlx-image",
4666
+ repoUrl: "https://github.com/riccardomusmeci/mlx-image",
4667
+ docsUrl: "https://huggingface.co/docs/hub/mlx-image",
4668
+ snippets: mlxim,
4669
+ filter: false,
4670
+ countDownloads: { term: { path: "model.safetensors" } }
4671
+ },
4631
4672
  nemo: {
4632
4673
  prettyLabel: "NeMo",
4633
4674
  repoName: "NeMo",
@@ -5301,7 +5342,7 @@ function hasJsInferenceSnippet(model) {
5301
5342
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5302
5343
  ALL_MODEL_LIBRARY_KEYS,
5303
5344
  InferenceDisplayability,
5304
- LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
5345
+ LIBRARY_TASK_MAPPING,
5305
5346
  MAPPING_DEFAULT_WIDGET,
5306
5347
  MODALITIES,
5307
5348
  MODALITY_LABELS,
package/dist/index.d.ts CHANGED
@@ -851,6 +851,17 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
851
851
  };
852
852
  };
853
853
  };
854
+ grok: {
855
+ prettyLabel: string;
856
+ repoName: string;
857
+ repoUrl: string;
858
+ filter: false;
859
+ countDownloads: {
860
+ terms: {
861
+ path: string[];
862
+ };
863
+ };
864
+ };
854
865
  keras: {
855
866
  prettyLabel: string;
856
867
  repoName: string;
@@ -894,6 +905,19 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
894
905
  snippets: (model: ModelData) => string[];
895
906
  filter: true;
896
907
  };
908
+ "mlx-image": {
909
+ prettyLabel: string;
910
+ repoName: string;
911
+ repoUrl: string;
912
+ docsUrl: string;
913
+ snippets: (model: ModelData) => string[];
914
+ filter: false;
915
+ countDownloads: {
916
+ term: {
917
+ path: string;
918
+ };
919
+ };
920
+ };
897
921
  nemo: {
898
922
  prettyLabel: string;
899
923
  repoName: string;
@@ -1117,17 +1141,17 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
1117
1141
  };
1118
1142
  };
1119
1143
  type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
1120
- declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1121
- declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1144
+ declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1145
+ declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1122
1146
 
1123
1147
  /**
1124
- * Mapping from library name (excluding Transformers) to its supported tasks.
1148
+ * Mapping from library name to its supported tasks.
1125
1149
  * Inference API (serverless) should be disabled for all other (library, task) pairs beyond this mapping.
1126
- * As an exception, we assume Transformers supports all inference tasks.
1127
- * This mapping is generated automatically by "python-api-export-tasks" action in huggingface/api-inference-community repo upon merge.
1128
- * Ref: https://github.com/huggingface/api-inference-community/pull/158
1150
+ * This mapping is partially generated automatically by "python-api-export-tasks" action in
1151
+ * huggingface/api-inference-community repo upon merge. For transformers, the mapping is manually
1152
+ * based on api-inference.
1129
1153
  */
1130
- declare const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelLibraryKey, PipelineType[]>>;
1154
+ declare const LIBRARY_TASK_MAPPING: Partial<Record<ModelLibraryKey, PipelineType[]>>;
1131
1155
 
1132
1156
  type PerLanguageMapping = Map<WidgetType, string[] | WidgetExample[]>;
1133
1157
  declare const MAPPING_DEFAULT_WIDGET: Map<string, PerLanguageMapping>;
@@ -1338,6 +1362,154 @@ interface AutomaticSpeechRecognitionOutputChunk {
1338
1362
  [property: string]: unknown;
1339
1363
  }
1340
1364
 
1365
+ /**
1366
+ * Inference code generated from the JSON schema spec in ./spec
1367
+ *
1368
+ * Using src/scripts/inference-codegen
1369
+ */
1370
+ /**
1371
+ * Inputs for ChatCompletion inference
1372
+ */
1373
+ interface ChatCompletionInput {
1374
+ /**
1375
+ * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
1376
+ * frequency in the text so far, decreasing the model's likelihood to repeat the same line
1377
+ * verbatim.
1378
+ */
1379
+ frequency_penalty?: number;
1380
+ /**
1381
+ * The maximum number of tokens that can be generated in the chat completion.
1382
+ */
1383
+ max_tokens?: number;
1384
+ messages: ChatCompletionInputMessage[];
1385
+ /**
1386
+ * The random sampling seed.
1387
+ */
1388
+ seed?: number;
1389
+ /**
1390
+ * Stop generating tokens if a stop token is generated.
1391
+ */
1392
+ stop?: ChatCompletionInputStopReason;
1393
+ /**
1394
+ * If set, partial message deltas will be sent.
1395
+ */
1396
+ stream?: boolean;
1397
+ /**
1398
+ * The value used to modulate the logits distribution.
1399
+ */
1400
+ temperature?: number;
1401
+ /**
1402
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
1403
+ * up to `top_p` or higher are kept for generation.
1404
+ */
1405
+ top_p?: number;
1406
+ [property: string]: unknown;
1407
+ }
1408
+ interface ChatCompletionInputMessage {
1409
+ /**
1410
+ * The content of the message.
1411
+ */
1412
+ content: string;
1413
+ role: ChatCompletionMessageRole;
1414
+ [property: string]: unknown;
1415
+ }
1416
+ /**
1417
+ * The role of the message author.
1418
+ */
1419
+ type ChatCompletionMessageRole = "assistant" | "system" | "user";
1420
+ /**
1421
+ * Stop generating tokens if a stop token is generated.
1422
+ */
1423
+ type ChatCompletionInputStopReason = string[] | string;
1424
+ /**
1425
+ * Outputs for Chat Completion inference
1426
+ */
1427
+ interface ChatCompletionOutput {
1428
+ /**
1429
+ * A list of chat completion choices.
1430
+ */
1431
+ choices: ChatCompletionOutputChoice[];
1432
+ /**
1433
+ * The Unix timestamp (in seconds) of when the chat completion was created.
1434
+ */
1435
+ created: number;
1436
+ [property: string]: unknown;
1437
+ }
1438
+ interface ChatCompletionOutputChoice {
1439
+ /**
1440
+ * The reason why the generation was stopped.
1441
+ */
1442
+ finish_reason: ChatCompletionFinishReason;
1443
+ /**
1444
+ * The index of the choice in the list of choices.
1445
+ */
1446
+ index: number;
1447
+ message: ChatCompletionOutputChoiceMessage;
1448
+ [property: string]: unknown;
1449
+ }
1450
+ /**
1451
+ * The reason why the generation was stopped.
1452
+ *
1453
+ * The generated sequence reached the maximum allowed length
1454
+ *
1455
+ * The model generated an end-of-sentence (EOS) token
1456
+ *
1457
+ * One of the sequence in stop_sequences was generated
1458
+ */
1459
+ type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
1460
+ interface ChatCompletionOutputChoiceMessage {
1461
+ /**
1462
+ * The content of the chat completion message.
1463
+ */
1464
+ content: string;
1465
+ role: ChatCompletionMessageRole;
1466
+ [property: string]: unknown;
1467
+ }
1468
+ /**
1469
+ * Chat Completion Stream Output
1470
+ */
1471
+ interface ChatCompletionStreamOutput {
1472
+ /**
1473
+ * A list of chat completion choices.
1474
+ */
1475
+ choices: ChatCompletionStreamOutputChoice[];
1476
+ /**
1477
+ * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
1478
+ * the same timestamp.
1479
+ */
1480
+ created: number;
1481
+ [property: string]: unknown;
1482
+ }
1483
+ interface ChatCompletionStreamOutputChoice {
1484
+ /**
1485
+ * A chat completion delta generated by streamed model responses.
1486
+ */
1487
+ delta: ChatCompletionStreamOutputDelta;
1488
+ /**
1489
+ * The reason why the generation was stopped.
1490
+ */
1491
+ finish_reason?: ChatCompletionFinishReason;
1492
+ /**
1493
+ * The index of the choice in the list of choices.
1494
+ */
1495
+ index: number;
1496
+ [property: string]: unknown;
1497
+ }
1498
+ /**
1499
+ * A chat completion delta generated by streamed model responses.
1500
+ */
1501
+ interface ChatCompletionStreamOutputDelta {
1502
+ /**
1503
+ * The contents of the chunk message.
1504
+ */
1505
+ content?: string;
1506
+ /**
1507
+ * The role of the author of this message.
1508
+ */
1509
+ role?: string;
1510
+ [property: string]: unknown;
1511
+ }
1512
+
1341
1513
  /**
1342
1514
  * Inference code generated from the JSON schema spec in ./spec
1343
1515
  *
@@ -2622,6 +2794,10 @@ interface TextGenerationInput {
2622
2794
  * Additional inference parameters
2623
2795
  */
2624
2796
  parameters?: TextGenerationParameters;
2797
+ /**
2798
+ * Whether to stream output tokens
2799
+ */
2800
+ stream?: boolean;
2625
2801
  [property: string]: unknown;
2626
2802
  }
2627
2803
  /**
@@ -2717,16 +2893,16 @@ interface TextGenerationOutputDetails {
2717
2893
  /**
2718
2894
  * Details about additional sequences when best_of is provided
2719
2895
  */
2720
- best_of_sequences?: TextGenerationSequenceDetails[];
2896
+ best_of_sequences?: TextGenerationOutputSequenceDetails[];
2721
2897
  /**
2722
2898
  * The reason why the generation was stopped.
2723
2899
  */
2724
- finish_reason: FinishReason;
2900
+ finish_reason: TextGenerationFinishReason;
2725
2901
  /**
2726
2902
  * The number of generated tokens
2727
2903
  */
2728
2904
  generated_tokens: number;
2729
- prefill: PrefillToken[];
2905
+ prefill: TextGenerationPrefillToken[];
2730
2906
  /**
2731
2907
  * The random seed used for generation
2732
2908
  */
@@ -2734,23 +2910,24 @@ interface TextGenerationOutputDetails {
2734
2910
  /**
2735
2911
  * The generated tokens and associated details
2736
2912
  */
2737
- tokens: Token[];
2738
- [property: string]: unknown;
2739
- }
2740
- interface TextGenerationSequenceDetails {
2913
+ tokens: TextGenerationOutputToken[];
2741
2914
  /**
2742
- * The reason why the generation was stopped.
2915
+ * Most likely tokens
2743
2916
  */
2744
- finish_reason: FinishReason;
2917
+ top_tokens?: Array<TextGenerationOutputToken[]>;
2918
+ [property: string]: unknown;
2919
+ }
2920
+ interface TextGenerationOutputSequenceDetails {
2921
+ finish_reason: TextGenerationFinishReason;
2745
2922
  /**
2746
2923
  * The generated text
2747
2924
  */
2748
- generated_text: number;
2925
+ generated_text: string;
2749
2926
  /**
2750
2927
  * The number of generated tokens
2751
2928
  */
2752
2929
  generated_tokens: number;
2753
- prefill: PrefillToken[];
2930
+ prefill: TextGenerationPrefillToken[];
2754
2931
  /**
2755
2932
  * The random seed used for generation
2756
2933
  */
@@ -2758,18 +2935,24 @@ interface TextGenerationSequenceDetails {
2758
2935
  /**
2759
2936
  * The generated tokens and associated details
2760
2937
  */
2761
- tokens: Token[];
2938
+ tokens: TextGenerationOutputToken[];
2939
+ /**
2940
+ * Most likely tokens
2941
+ */
2942
+ top_tokens?: Array<TextGenerationOutputToken[]>;
2762
2943
  [property: string]: unknown;
2763
2944
  }
2764
2945
  /**
2765
- * The generated sequence reached the maximum allowed length
2946
+ * The reason why the generation was stopped.
2766
2947
  *
2767
- * The model generated an end-of-sentence (EOS) token
2948
+ * length: The generated sequence reached the maximum allowed length
2768
2949
  *
2769
- * One of the sequence in stop_sequences was generated
2950
+ * eos_token: The model generated an end-of-sentence (EOS) token
2951
+ *
2952
+ * stop_sequence: One of the sequence in stop_sequences was generated
2770
2953
  */
2771
- type FinishReason = "length" | "eos_token" | "stop_sequence";
2772
- interface PrefillToken {
2954
+ type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
2955
+ interface TextGenerationPrefillToken {
2773
2956
  id: number;
2774
2957
  logprob: number;
2775
2958
  /**
@@ -2778,9 +2961,12 @@ interface PrefillToken {
2778
2961
  text: string;
2779
2962
  [property: string]: unknown;
2780
2963
  }
2781
- interface Token {
2964
+ /**
2965
+ * Generated token.
2966
+ */
2967
+ interface TextGenerationOutputToken {
2782
2968
  id: number;
2783
- logprob: number;
2969
+ logprob?: number;
2784
2970
  /**
2785
2971
  * Whether or not that token is a special one
2786
2972
  */
@@ -3285,4 +3471,4 @@ declare namespace index {
3285
3471
  };
3286
3472
  }
3287
3473
 
3288
- export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, AddedToken, AudioClassificationInput, AudioClassificationOutput, AudioClassificationOutputElement, AudioClassificationParameters, AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput, AutomaticSpeechRecognitionOutputChunk, AutomaticSpeechRecognitionParameters, BoundingBox, ChatMessage, ClassificationOutputTransform$1 as ClassificationOutputTransform, DepthEstimationInput, DepthEstimationOutput, DocumentQuestionAnsweringInput, DocumentQuestionAnsweringInputData, DocumentQuestionAnsweringOutput, DocumentQuestionAnsweringOutputElement, DocumentQuestionAnsweringParameters, EarlyStoppingUnion$2 as EarlyStoppingUnion, ExampleRepo, FeatureExtractionInput, FeatureExtractionOutput, FillMaskInput, FillMaskOutput, FillMaskOutputElement, FillMaskParameters, FinishReason, GenerationParameters$2 as GenerationParameters, ImageClassificationInput, ImageClassificationOutput, ImageClassificationOutputElement, ImageClassificationParameters, ImageSegmentationInput, ImageSegmentationOutput, ImageSegmentationOutputElement, ImageSegmentationParameters, ImageSegmentationSubtask, ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToTextInput, ImageToTextOutput, ImageToTextParameters, InferenceDisplayability, LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS, LibraryUiElement, MAPPING_DEFAULT_WIDGET, MODALITIES, MODALITY_LABELS, MODEL_LIBRARIES_UI_ELEMENTS, Modality, ModelData, ModelLibraryKey, ObjectDetectionInput, ObjectDetectionOutput, ObjectDetectionOutputElement, ObjectDetectionParameters, PIPELINE_DATA, PIPELINE_TYPES, PIPELINE_TYPES_SET, PipelineData, PipelineType, PrefillToken, QuestionAnsweringInput, QuestionAnsweringInputData, QuestionAnsweringOutput, QuestionAnsweringOutputElement, QuestionAnsweringParameters, SPECIAL_TOKENS_ATTRIBUTES, SUBTASK_TYPES, SentenceSimilarityInput, SentenceSimilarityInputData, SentenceSimilarityOutput, SpecialTokensMap, SummarizationInput, SummarizationOutput, TASKS_DATA, TASKS_MODEL_LIBRARIES, TableQuestionAnsweringInput, TableQuestionAnsweringInputData, TableQuestionAnsweringOutput, TableQuestionAnsweringOutputElement, TargetSize$1 as TargetSize, TaskData, TaskDataCustom, TaskDemo, TaskDemoEntry, Text2TextGenerationParameters, Text2TextGenerationTruncationStrategy, TextClassificationInput, TextClassificationOutput, TextClassificationOutputElement, TextClassificationParameters, TextGenerationInput, TextGenerationOutput, TextGenerationOutputDetails, TextGenerationParameters, TextGenerationSequenceDetails, TextToAudioParameters, TextToImageInput, TextToImageOutput, TextToImageParameters, TextToSpeechInput, TextToSpeechOutput, Token, TokenClassificationAggregationStrategy, TokenClassificationInput, TokenClassificationOutput, TokenClassificationOutputElement, TokenClassificationParameters, TokenizerConfig, TransformersInfo, TranslationInput, TranslationOutput, VideoClassificationInput, VideoClassificationOutput, VideoClassificationOutputElement, VideoClassificationParameters, VisualQuestionAnsweringInput, VisualQuestionAnsweringInputData, VisualQuestionAnsweringOutput, VisualQuestionAnsweringOutputElement, VisualQuestionAnsweringParameters, WidgetExample, WidgetExampleAssetAndPromptInput, WidgetExampleAssetAndTextInput, WidgetExampleAssetAndZeroShotInput, WidgetExampleAssetInput, WidgetExampleAttribute, WidgetExampleChatInput, WidgetExampleOutput, WidgetExampleOutputAnswerScore, WidgetExampleOutputLabels, WidgetExampleOutputText, WidgetExampleOutputUrl, WidgetExampleSentenceSimilarityInput, WidgetExampleStructuredDataInput, WidgetExampleTableDataInput, WidgetExampleTextAndContextInput, WidgetExampleTextAndTableInput, WidgetExampleTextInput, WidgetExampleZeroShotTextInput, WidgetType, WordBox, ZeroShotClassificationInput, ZeroShotClassificationInputData, ZeroShotClassificationOutput, ZeroShotClassificationOutputElement, ZeroShotClassificationParameters, ZeroShotImageClassificationInput, ZeroShotImageClassificationInputData, ZeroShotImageClassificationOutput, ZeroShotImageClassificationOutputElement, ZeroShotImageClassificationParameters, ZeroShotObjectDetectionInput, ZeroShotObjectDetectionInputData, ZeroShotObjectDetectionOutput, ZeroShotObjectDetectionOutputElement, index as snippets };
3474
+ export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, AddedToken, AudioClassificationInput, AudioClassificationOutput, AudioClassificationOutputElement, AudioClassificationParameters, AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput, AutomaticSpeechRecognitionOutputChunk, AutomaticSpeechRecognitionParameters, BoundingBox, ChatCompletionFinishReason, ChatCompletionInput, ChatCompletionInputMessage, ChatCompletionOutput, ChatCompletionOutputChoice, ChatCompletionOutputChoiceMessage, ChatCompletionStreamOutput, ChatCompletionStreamOutputChoice, ChatCompletionStreamOutputDelta, ChatMessage, ClassificationOutputTransform$1 as ClassificationOutputTransform, DepthEstimationInput, DepthEstimationOutput, DocumentQuestionAnsweringInput, DocumentQuestionAnsweringInputData, DocumentQuestionAnsweringOutput, DocumentQuestionAnsweringOutputElement, DocumentQuestionAnsweringParameters, EarlyStoppingUnion$2 as EarlyStoppingUnion, ExampleRepo, FeatureExtractionInput, FeatureExtractionOutput, FillMaskInput, FillMaskOutput, FillMaskOutputElement, FillMaskParameters, GenerationParameters$2 as GenerationParameters, ImageClassificationInput, ImageClassificationOutput, ImageClassificationOutputElement, ImageClassificationParameters, ImageSegmentationInput, ImageSegmentationOutput, ImageSegmentationOutputElement, ImageSegmentationParameters, ImageSegmentationSubtask, ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToTextInput, ImageToTextOutput, ImageToTextParameters, InferenceDisplayability, LIBRARY_TASK_MAPPING, LibraryUiElement, MAPPING_DEFAULT_WIDGET, MODALITIES, MODALITY_LABELS, MODEL_LIBRARIES_UI_ELEMENTS, Modality, ModelData, ModelLibraryKey, ObjectDetectionInput, ObjectDetectionOutput, ObjectDetectionOutputElement, ObjectDetectionParameters, PIPELINE_DATA, PIPELINE_TYPES, PIPELINE_TYPES_SET, PipelineData, PipelineType, QuestionAnsweringInput, QuestionAnsweringInputData, QuestionAnsweringOutput, QuestionAnsweringOutputElement, QuestionAnsweringParameters, SPECIAL_TOKENS_ATTRIBUTES, SUBTASK_TYPES, SentenceSimilarityInput, SentenceSimilarityInputData, SentenceSimilarityOutput, SpecialTokensMap, SummarizationInput, SummarizationOutput, TASKS_DATA, TASKS_MODEL_LIBRARIES, TableQuestionAnsweringInput, TableQuestionAnsweringInputData, TableQuestionAnsweringOutput, TableQuestionAnsweringOutputElement, TargetSize$1 as TargetSize, TaskData, TaskDataCustom, TaskDemo, TaskDemoEntry, Text2TextGenerationParameters, Text2TextGenerationTruncationStrategy, TextClassificationInput, TextClassificationOutput, TextClassificationOutputElement, TextClassificationParameters, TextGenerationFinishReason, TextGenerationInput, TextGenerationOutput, TextGenerationOutputDetails, TextGenerationOutputSequenceDetails, TextGenerationOutputToken, TextGenerationParameters, TextGenerationPrefillToken, TextToAudioParameters, TextToImageInput, TextToImageOutput, TextToImageParameters, TextToSpeechInput, TextToSpeechOutput, TokenClassificationAggregationStrategy, TokenClassificationInput, TokenClassificationOutput, TokenClassificationOutputElement, TokenClassificationParameters, TokenizerConfig, TransformersInfo, TranslationInput, TranslationOutput, VideoClassificationInput, VideoClassificationOutput, VideoClassificationOutputElement, VideoClassificationParameters, VisualQuestionAnsweringInput, VisualQuestionAnsweringInputData, VisualQuestionAnsweringOutput, VisualQuestionAnsweringOutputElement, VisualQuestionAnsweringParameters, WidgetExample, WidgetExampleAssetAndPromptInput, WidgetExampleAssetAndTextInput, WidgetExampleAssetAndZeroShotInput, WidgetExampleAssetInput, WidgetExampleAttribute, WidgetExampleChatInput, WidgetExampleOutput, WidgetExampleOutputAnswerScore, WidgetExampleOutputLabels, WidgetExampleOutputText, WidgetExampleOutputUrl, WidgetExampleSentenceSimilarityInput, WidgetExampleStructuredDataInput, WidgetExampleTableDataInput, WidgetExampleTextAndContextInput, WidgetExampleTextAndTableInput, WidgetExampleTextInput, WidgetExampleZeroShotTextInput, WidgetType, WordBox, ZeroShotClassificationInput, ZeroShotClassificationInputData, ZeroShotClassificationOutput, ZeroShotClassificationOutputElement, ZeroShotClassificationParameters, ZeroShotImageClassificationInput, ZeroShotImageClassificationInputData, ZeroShotImageClassificationOutput, ZeroShotImageClassificationOutputElement, ZeroShotImageClassificationParameters, ZeroShotObjectDetectionInput, ZeroShotObjectDetectionInputData, ZeroShotObjectDetectionOutput, ZeroShotObjectDetectionOutputElement, index as snippets };
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ var __export = (target, all) => {
5
5
  };
6
6
 
7
7
  // src/library-to-tasks.ts
8
- var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
8
+ var LIBRARY_TASK_MAPPING = {
9
9
  "adapter-transformers": ["question-answering", "text-classification", "token-classification"],
10
10
  allennlp: ["question-answering"],
11
11
  asteroid: [
@@ -41,6 +41,24 @@ var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
41
41
  ],
42
42
  stanza: ["token-classification"],
43
43
  timm: ["image-classification"],
44
+ transformers: [
45
+ "audio-classification",
46
+ "automatic-speech-recognition",
47
+ "depth-estimation",
48
+ "document-question-answering",
49
+ "fill-mask",
50
+ "image-classification",
51
+ "image-segmentation",
52
+ "image-to-text",
53
+ "image-to-image",
54
+ "object-detection",
55
+ "question-answering",
56
+ "text-generation",
57
+ "text2text-generation",
58
+ "visual-question-answering",
59
+ "zero-shot-classification",
60
+ "zero-shot-image-classification"
61
+ ],
44
62
  mindspore: ["image-classification"]
45
63
  };
46
64
 
@@ -3812,7 +3830,7 @@ var data_default34 = taskData34;
3812
3830
  // src/tasks/index.ts
3813
3831
  var TASKS_MODEL_LIBRARIES = {
3814
3832
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
3815
- "audio-to-audio": ["asteroid", "speechbrain"],
3833
+ "audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
3816
3834
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
3817
3835
  "depth-estimation": ["transformers", "transformers.js"],
3818
3836
  "document-question-answering": ["transformers", "transformers.js"],
@@ -4049,7 +4067,7 @@ var flair = (model) => [
4049
4067
  tagger = SequenceTagger.load("${model.id}")`
4050
4068
  ];
4051
4069
  var gliner = (model) => [
4052
- `from model import GLiNER
4070
+ `from gliner import GLiNER
4053
4071
 
4054
4072
  model = GLiNER.from_pretrained("${model.id}")`
4055
4073
  ];
@@ -4399,6 +4417,11 @@ var mlx = (model) => [
4399
4417
  export HF_HUB_ENABLE_HF_TRANS: string[]FER=1
4400
4418
  huggingface-cli download --local-dir ${nameWithoutNamespace(model.id)} ${model.id}`
4401
4419
  ];
4420
+ var mlxim = (model) => [
4421
+ `from mlxim.model import create_model
4422
+
4423
+ model = create_model(${model.id})`
4424
+ ];
4402
4425
  var nemo = (model) => {
4403
4426
  let command = void 0;
4404
4427
  if (model.tags?.includes("automatic-speech-recognition")) {
@@ -4558,6 +4581,15 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4558
4581
  term: { path: "gliner_config.json" }
4559
4582
  }
4560
4583
  },
4584
+ grok: {
4585
+ prettyLabel: "Grok",
4586
+ repoName: "Grok",
4587
+ repoUrl: "https://github.com/xai-org/grok-1",
4588
+ filter: false,
4589
+ countDownloads: {
4590
+ terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] }
4591
+ }
4592
+ },
4561
4593
  keras: {
4562
4594
  prettyLabel: "Keras",
4563
4595
  repoName: "Keras",
@@ -4593,6 +4625,15 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4593
4625
  snippets: mlx,
4594
4626
  filter: true
4595
4627
  },
4628
+ "mlx-image": {
4629
+ prettyLabel: "mlx-image",
4630
+ repoName: "mlx-image",
4631
+ repoUrl: "https://github.com/riccardomusmeci/mlx-image",
4632
+ docsUrl: "https://huggingface.co/docs/hub/mlx-image",
4633
+ snippets: mlxim,
4634
+ filter: false,
4635
+ countDownloads: { term: { path: "model.safetensors" } }
4636
+ },
4596
4637
  nemo: {
4597
4638
  prettyLabel: "NeMo",
4598
4639
  repoName: "NeMo",
@@ -5265,7 +5306,7 @@ export {
5265
5306
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5266
5307
  ALL_MODEL_LIBRARY_KEYS,
5267
5308
  InferenceDisplayability,
5268
- LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
5309
+ LIBRARY_TASK_MAPPING,
5269
5310
  MAPPING_DEFAULT_WIDGET,
5270
5311
  MODALITIES,
5271
5312
  MODALITY_LABELS,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.6.0",
4
+ "version": "0.7.0",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
@@ -33,7 +33,7 @@
33
33
  "license": "MIT",
34
34
  "devDependencies": {
35
35
  "@types/node": "^20.11.5",
36
- "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz"
36
+ "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz"
37
37
  },
38
38
  "scripts": {
39
39
  "lint": "eslint --quiet --fix --ext .cjs,.ts .",
package/src/index.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS } from "./library-to-tasks";
1
+ export { LIBRARY_TASK_MAPPING } from "./library-to-tasks";
2
2
  export { MAPPING_DEFAULT_WIDGET } from "./default-widget-inputs";
3
3
  export type { TaskData, TaskDemo, TaskDemoEntry, ExampleRepo } from "./tasks";
4
4
  export * from "./tasks";
@@ -2,13 +2,13 @@ import type { ModelLibraryKey } from "./model-libraries";
2
2
  import type { PipelineType } from "./pipelines";
3
3
 
4
4
  /**
5
- * Mapping from library name (excluding Transformers) to its supported tasks.
5
+ * Mapping from library name to its supported tasks.
6
6
  * Inference API (serverless) should be disabled for all other (library, task) pairs beyond this mapping.
7
- * As an exception, we assume Transformers supports all inference tasks.
8
- * This mapping is generated automatically by "python-api-export-tasks" action in huggingface/api-inference-community repo upon merge.
9
- * Ref: https://github.com/huggingface/api-inference-community/pull/158
7
+ * This mapping is partially generated automatically by "python-api-export-tasks" action in
8
+ * huggingface/api-inference-community repo upon merge. For transformers, the mapping is manually
9
+ * based on api-inference.
10
10
  */
11
- export const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelLibraryKey, PipelineType[]>> = {
11
+ export const LIBRARY_TASK_MAPPING: Partial<Record<ModelLibraryKey, PipelineType[]>> = {
12
12
  "adapter-transformers": ["question-answering", "text-classification", "token-classification"],
13
13
  allennlp: ["question-answering"],
14
14
  asteroid: [
@@ -44,5 +44,23 @@ export const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelLi
44
44
  ],
45
45
  stanza: ["token-classification"],
46
46
  timm: ["image-classification"],
47
+ transformers: [
48
+ "audio-classification",
49
+ "automatic-speech-recognition",
50
+ "depth-estimation",
51
+ "document-question-answering",
52
+ "fill-mask",
53
+ "image-classification",
54
+ "image-segmentation",
55
+ "image-to-text",
56
+ "image-to-image",
57
+ "object-detection",
58
+ "question-answering",
59
+ "text-generation",
60
+ "text2text-generation",
61
+ "visual-question-answering",
62
+ "zero-shot-classification",
63
+ "zero-shot-image-classification",
64
+ ],
47
65
  mindspore: ["image-classification"],
48
66
  };
@@ -141,7 +141,7 @@ tagger = SequenceTagger.load("${model.id}")`,
141
141
  ];
142
142
 
143
143
  export const gliner = (model: ModelData): string[] => [
144
- `from model import GLiNER
144
+ `from gliner import GLiNER
145
145
 
146
146
  model = GLiNER.from_pretrained("${model.id}")`,
147
147
  ];
@@ -538,6 +538,12 @@ export HF_HUB_ENABLE_HF_TRANS: string[]FER=1
538
538
  huggingface-cli download --local-dir ${nameWithoutNamespace(model.id)} ${model.id}`,
539
539
  ];
540
540
 
541
+ export const mlxim = (model: ModelData): string[] => [
542
+ `from mlxim.model import create_model
543
+
544
+ model = create_model(${model.id})`,
545
+ ];
546
+
541
547
  export const nemo = (model: ModelData): string[] => {
542
548
  let command: string[] | undefined = undefined;
543
549
  // Resolve the tag to a nemo domain/sub-domain