@huggingface/tasks 0.12.0 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/index.cjs +185 -57
  2. package/dist/index.js +185 -57
  3. package/dist/src/local-apps.d.ts +7 -0
  4. package/dist/src/local-apps.d.ts.map +1 -1
  5. package/dist/src/model-data.d.ts +4 -0
  6. package/dist/src/model-data.d.ts.map +1 -1
  7. package/dist/src/model-libraries-snippets.d.ts +1 -0
  8. package/dist/src/model-libraries-snippets.d.ts.map +1 -1
  9. package/dist/src/model-libraries.d.ts +9 -2
  10. package/dist/src/model-libraries.d.ts.map +1 -1
  11. package/dist/src/tasks/audio-classification/data.d.ts.map +1 -1
  12. package/dist/src/tasks/audio-to-audio/data.d.ts.map +1 -1
  13. package/dist/src/tasks/automatic-speech-recognition/data.d.ts.map +1 -1
  14. package/dist/src/tasks/document-question-answering/data.d.ts.map +1 -1
  15. package/dist/src/tasks/question-answering/data.d.ts.map +1 -1
  16. package/dist/src/tasks/text-classification/data.d.ts.map +1 -1
  17. package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
  18. package/dist/src/tasks/token-classification/data.d.ts.map +1 -1
  19. package/dist/src/tasks/translation/data.d.ts.map +1 -1
  20. package/dist/src/tasks/zero-shot-classification/data.d.ts.map +1 -1
  21. package/package.json +1 -1
  22. package/src/local-apps.ts +75 -2
  23. package/src/model-data.ts +4 -0
  24. package/src/model-libraries-snippets.ts +9 -0
  25. package/src/model-libraries.ts +7 -0
  26. package/src/tasks/audio-classification/data.ts +8 -4
  27. package/src/tasks/audio-to-audio/data.ts +5 -1
  28. package/src/tasks/automatic-speech-recognition/data.ts +6 -2
  29. package/src/tasks/document-question-answering/data.ts +7 -3
  30. package/src/tasks/fill-mask/data.ts +3 -3
  31. package/src/tasks/image-segmentation/data.ts +1 -1
  32. package/src/tasks/image-to-image/data.ts +1 -1
  33. package/src/tasks/image-to-text/data.ts +1 -1
  34. package/src/tasks/question-answering/data.ts +5 -1
  35. package/src/tasks/sentence-similarity/data.ts +3 -3
  36. package/src/tasks/summarization/data.ts +2 -2
  37. package/src/tasks/text-classification/data.ts +18 -6
  38. package/src/tasks/text-generation/data.ts +3 -3
  39. package/src/tasks/text-to-image/data.ts +1 -1
  40. package/src/tasks/text-to-speech/data.ts +7 -3
  41. package/src/tasks/token-classification/data.ts +11 -3
  42. package/src/tasks/translation/data.ts +9 -8
  43. package/src/tasks/video-classification/data.ts +3 -3
  44. package/src/tasks/visual-question-answering/data.ts +2 -2
  45. package/src/tasks/zero-shot-classification/data.ts +8 -4
  46. package/src/tasks/zero-shot-image-classification/data.ts +2 -2
@@ -651,6 +651,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
651
651
  docsUrl: string;
652
652
  snippets: (model: ModelData) => string[];
653
653
  };
654
+ yolov10: {
655
+ prettyLabel: string;
656
+ repoName: string;
657
+ repoUrl: string;
658
+ docsUrl: string;
659
+ snippets: (model: ModelData) => string[];
660
+ };
654
661
  whisperkit: {
655
662
  prettyLabel: string;
656
663
  repoName: string;
@@ -661,6 +668,6 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
661
668
  };
662
669
  };
663
670
  export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
664
- export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
665
- export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
671
+ export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "yolov10" | "whisperkit")[];
672
+ export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "yolov10" | "whisperkit")[];
666
673
  //# sourceMappingURL=model-libraries.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgmBI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,4mCAAgE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,4mCAQ1B,CAAC"}
1
+ {"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAumBI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,wnCAAgE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,wnCAQ1B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAwEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA4Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio-to-audio/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA6Df,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio-to-audio/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAiEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAyEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA6Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/document-question-answering/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA4Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/document-question-answering/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAgFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/question-answering/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAkEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/question-answering/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAsEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAsFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAkGf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-speech/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAiEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-speech/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAqEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/token-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA+Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/token-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAuFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/translation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAgEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/translation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAiEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA6Df,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAiEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.12.0",
4
+ "version": "0.12.2",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
package/src/local-apps.ts CHANGED
@@ -58,11 +58,30 @@ export type LocalApp = {
58
58
  }
59
59
  );
60
60
 
61
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
62
- function isGgufModel(model: ModelData) {
61
+ function isGgufModel(model: ModelData): boolean {
63
62
  return model.tags.includes("gguf");
64
63
  }
65
64
 
65
+ function isAwqModel(model: ModelData): boolean {
66
+ return model.config?.quantization_config?.quant_method === "awq";
67
+ }
68
+
69
+ function isGptqModel(model: ModelData): boolean {
70
+ return model.config?.quantization_config?.quant_method === "gptq";
71
+ }
72
+
73
+ function isAqlmModel(model: ModelData): boolean {
74
+ return model.config?.quantization_config?.quant_method === "aqlm";
75
+ }
76
+
77
+ function isMarlinModel(model: ModelData): boolean {
78
+ return model.config?.quantization_config?.quant_method === "marlin";
79
+ }
80
+
81
+ function isTransformersModel(model: ModelData): boolean {
82
+ return model.tags.includes("transformers");
83
+ }
84
+
66
85
  function isLlamaCppGgufModel(model: ModelData) {
67
86
  return !!model.gguf?.context_length;
68
87
  }
@@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
127
146
  ];
128
147
  };
129
148
 
149
+ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
150
+ const runCommand = [
151
+ "",
152
+ "# Call the server using curl:",
153
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
154
+ ` -H "Content-Type: application/json" \\ `,
155
+ ` --data '{`,
156
+ ` "model": "${model.id}"`,
157
+ ` "messages": [`,
158
+ ` {"role": "user", "content": "Hello!"}`,
159
+ ` ]`,
160
+ ` }'`,
161
+ ];
162
+ return [
163
+ {
164
+ title: "Install from pip",
165
+ setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
166
+ content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n"),
167
+ },
168
+ {
169
+ title: "Use Docker images",
170
+ setup: [
171
+ "# Deploy with docker on Linux:",
172
+ `docker run --runtime nvidia --gpus all \\`,
173
+ ` --name my_vllm_container \\`,
174
+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
175
+ ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
176
+ ` -p 8000:8000 \\`,
177
+ ` --ipc=host \\`,
178
+ ` vllm/vllm-openai:latest \\`,
179
+ ` --model ${model.id}`,
180
+ ].join("\n"),
181
+ content: [
182
+ "# Load and run the model:",
183
+ `docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
184
+ ...runCommand,
185
+ ].join("\n"),
186
+ },
187
+ ];
188
+ };
189
+
130
190
  /**
131
191
  * Add your new local app here.
132
192
  *
@@ -146,6 +206,19 @@ export const LOCAL_APPS = {
146
206
  displayOnModelPage: isLlamaCppGgufModel,
147
207
  snippet: snippetLlamacpp,
148
208
  },
209
+ vllm: {
210
+ prettyLabel: "vLLM",
211
+ docsUrl: "https://docs.vllm.ai",
212
+ mainTask: "text-generation",
213
+ displayOnModelPage: (model: ModelData) =>
214
+ isAwqModel(model) ||
215
+ isGptqModel(model) ||
216
+ isAqlmModel(model) ||
217
+ isMarlinModel(model) ||
218
+ isGgufModel(model) ||
219
+ isTransformersModel(model),
220
+ snippet: snippetVllm,
221
+ },
149
222
  lmstudio: {
150
223
  prettyLabel: "LM Studio",
151
224
  docsUrl: "https://lmstudio.ai",
package/src/model-data.ts CHANGED
@@ -38,6 +38,10 @@ export interface ModelData {
38
38
  bits?: number;
39
39
  load_in_4bit?: boolean;
40
40
  load_in_8bit?: boolean;
41
+ /**
42
+ * awq, gptq, aqlm, marlin, … Used by vLLM
43
+ */
44
+ quant_method?: string;
41
45
  };
42
46
  tokenizer_config?: TokenizerConfig;
43
47
  adapter_transformers?: {
@@ -891,6 +891,15 @@ wavs = chat.infer(texts, )
891
891
  torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)`,
892
892
  ];
893
893
 
894
+ export const yolov10 = (model: ModelData): string[] => [
895
+ `from ultralytics import YOLOv10
896
+
897
+ model = YOLOv10.from_pretrained("${model.id}")
898
+ source = 'http://images.cocodataset.org/val2017/000000039769.jpg'
899
+ model.predict(source=source, save=True)
900
+ `,
901
+ ];
902
+
894
903
  export const birefnet = (model: ModelData): string[] => [
895
904
  `# Option 1: use with transformers
896
905
 
@@ -657,6 +657,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
657
657
  docsUrl: "https://github.com/jasonppy/VoiceCraft",
658
658
  snippets: snippets.voicecraft,
659
659
  },
660
+ yolov10: {
661
+ prettyLabel: "YOLOv10",
662
+ repoName: "yolov10",
663
+ repoUrl: "https://github.com/THU-MIG/yolov10",
664
+ docsUrl: "https://github.com/THU-MIG/yolov10",
665
+ snippets: snippets.yolov10,
666
+ },
660
667
  whisperkit: {
661
668
  prettyLabel: "WhisperKit",
662
669
  repoName: "WhisperKit",
@@ -4,7 +4,11 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A benchmark of 10 different audio tasks.",
7
- id: "superb",
7
+ id: "s3prl/superb",
8
+ },
9
+ {
10
+ description: "A dataset of YouTube clips and their sound categories.",
11
+ id: "agkphysics/AudioSet",
8
12
  },
9
13
  ],
10
14
  demo: {
@@ -50,11 +54,11 @@ const taskData: TaskDataCustom = {
50
54
  ],
51
55
  models: [
52
56
  {
53
- description: "An easy-to-use model for Command Recognition.",
57
+ description: "An easy-to-use model for command recognition.",
54
58
  id: "speechbrain/google_speech_command_xvector",
55
59
  },
56
60
  {
57
- description: "An Emotion Recognition model.",
61
+ description: "An emotion recognition model.",
58
62
  id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
59
63
  },
60
64
  {
@@ -70,7 +74,7 @@ const taskData: TaskDataCustom = {
70
74
  ],
71
75
  summary:
72
76
  "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
73
- widgetModels: ["facebook/mms-lid-126"],
77
+ widgetModels: ["MIT/ast-finetuned-audioset-10-10-0.4593"],
74
78
  youtubeId: "KWwzcmG98Ds",
75
79
  };
76
80
 
@@ -44,7 +44,11 @@ const taskData: TaskDataCustom = {
44
44
  },
45
45
  {
46
46
  description: "A speech enhancement model.",
47
- id: "speechbrain/metricgan-plus-voicebank",
47
+ id: "ResembleAI/resemble-enhance",
48
+ },
49
+ {
50
+ description: "A model that can change the voice in a speech recording.",
51
+ id: "microsoft/speecht5_vc",
48
52
  },
49
53
  ],
50
54
  spaces: [
@@ -7,8 +7,8 @@ const taskData: TaskDataCustom = {
7
7
  id: "mozilla-foundation/common_voice_17_0",
8
8
  },
9
9
  {
10
- description: "An English dataset with 1,000 hours of data.",
11
- id: "librispeech_asr",
10
+ description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
11
+ id: "parler-tts/mls_eng",
12
12
  },
13
13
  {
14
14
  description: "A multi-lingual audio dataset with 370K hours of audio.",
@@ -54,6 +54,10 @@ const taskData: TaskDataCustom = {
54
54
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
55
  id: "facebook/seamless-m4t-v2-large",
56
56
  },
57
+ {
58
+ description: "Powerful speaker diarization model.",
59
+ id: "pyannote/speaker-diarization-3.1",
60
+ },
57
61
  ],
58
62
  spaces: [
59
63
  {
@@ -46,11 +46,15 @@ const taskData: TaskDataCustom = {
46
46
  ],
47
47
  models: [
48
48
  {
49
- description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.",
49
+ description: "A robust document question answering model.",
50
50
  id: "impira/layoutlm-document-qa",
51
51
  },
52
52
  {
53
- description: "A special model for OCR-free Document QA task.",
53
+ description: "A document question answering model specialized in invoices.",
54
+ id: "impira/layoutlm-invoices",
55
+ },
56
+ {
57
+ description: "A special model for OCR-free document question answering.",
54
58
  id: "microsoft/udop-large",
55
59
  },
56
60
  {
@@ -74,7 +78,7 @@ const taskData: TaskDataCustom = {
74
78
  ],
75
79
  summary:
76
80
  "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
77
- widgetModels: ["impira/layoutlm-document-qa"],
81
+ widgetModels: ["impira/layoutlm-invoices"],
78
82
  youtubeId: "",
79
83
  };
80
84
 
@@ -61,12 +61,12 @@ const taskData: TaskDataCustom = {
61
61
  ],
62
62
  models: [
63
63
  {
64
- description: "A faster and smaller model than the famous BERT model.",
65
- id: "distilbert-base-uncased",
64
+ description: "The famous BERT model.",
65
+ id: "google-bert/bert-base-uncased",
66
66
  },
67
67
  {
68
68
  description: "A multilingual model trained on 100 languages.",
69
- id: "xlm-roberta-base",
69
+ id: "FacebookAI/xlm-roberta-base",
70
70
  },
71
71
  ],
72
72
  spaces: [],
@@ -92,7 +92,7 @@ const taskData: TaskDataCustom = {
92
92
  ],
93
93
  summary:
94
94
  "Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.",
95
- widgetModels: ["facebook/detr-resnet-50-panoptic"],
95
+ widgetModels: ["nvidia/segformer-b0-finetuned-ade-512-512"],
96
96
  youtubeId: "dKE8SIt9C-w",
97
97
  };
98
98
 
@@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
94
94
  ],
95
95
  summary:
96
96
  "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
97
- widgetModels: ["lllyasviel/sd-controlnet-canny"],
97
+ widgetModels: ["stabilityai/stable-diffusion-2-inpainting"],
98
98
  youtubeId: "",
99
99
  };
100
100
 
@@ -75,7 +75,7 @@ const taskData: TaskDataCustom = {
75
75
  ],
76
76
  summary:
77
77
  "Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
78
- widgetModels: ["Salesforce/blip-image-captioning-base"],
78
+ widgetModels: ["Salesforce/blip-image-captioning-large"],
79
79
  youtubeId: "",
80
80
  };
81
81
 
@@ -52,7 +52,11 @@ const taskData: TaskDataCustom = {
52
52
  id: "deepset/roberta-base-squad2",
53
53
  },
54
54
  {
55
- description: "A special model that can answer questions from tables!",
55
+ description: "Small yet robust model that can answer questions.",
56
+ id: "distilbert/distilbert-base-cased-distilled-squad",
57
+ },
58
+ {
59
+ description: "A special model that can answer questions from tables.",
56
60
  id: "google/tapas-base-finetuned-wtq",
57
61
  },
58
62
  ],
@@ -69,8 +69,8 @@ const taskData: TaskDataCustom = {
69
69
  id: "sentence-transformers/all-mpnet-base-v2",
70
70
  },
71
71
  {
72
- description: "A multilingual model trained for FAQ retrieval.",
73
- id: "clips/mfaq",
72
+ description: "A multilingual robust sentence similarity model..",
73
+ id: "BAAI/bge-m3",
74
74
  },
75
75
  ],
76
76
  spaces: [
@@ -94,7 +94,7 @@ const taskData: TaskDataCustom = {
94
94
  ],
95
95
  summary:
96
96
  "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
97
- widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
97
+ widgetModels: ["BAAI/bge-small-en-v1.5"],
98
98
  youtubeId: "VCZq5AkbNEU",
99
99
  };
100
100
 
@@ -46,7 +46,7 @@ const taskData: TaskDataCustom = {
46
46
  },
47
47
  {
48
48
  description: "A summarization model trained on medical articles.",
49
- id: "google/bigbird-pegasus-large-pubmed",
49
+ id: "Falconsai/medical_summarization",
50
50
  },
51
51
  ],
52
52
  spaces: [
@@ -69,7 +69,7 @@ const taskData: TaskDataCustom = {
69
69
  ],
70
70
  summary:
71
71
  "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
72
- widgetModels: ["sshleifer/distilbart-cnn-12-6"],
72
+ widgetModels: ["facebook/bart-large-cnn"],
73
73
  youtubeId: "yHnr5Dk2zCI",
74
74
  };
75
75
 
@@ -4,11 +4,11 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
7
- id: "glue",
7
+ id: "nyu-mll/glue",
8
8
  },
9
9
  {
10
10
  description: "A text classification dataset used to benchmark natural language inference models",
11
- id: "snli",
11
+ id: "stanfordnlp/snli",
12
12
  },
13
13
  ],
14
14
  demo: {
@@ -61,11 +61,23 @@ const taskData: TaskDataCustom = {
61
61
  models: [
62
62
  {
63
63
  description: "A robust model trained for sentiment analysis.",
64
- id: "distilbert-base-uncased-finetuned-sst-2-english",
64
+ id: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
65
65
  },
66
66
  {
67
- description: "Multi-genre natural language inference model.",
68
- id: "roberta-large-mnli",
67
+ description: "A sentiment analysis model specialized in financial sentiment.",
68
+ id: "ProsusAI/finbert",
69
+ },
70
+ {
71
+ description: "A sentiment analysis model specialized in analyzing tweets.",
72
+ id: "cardiffnlp/twitter-roberta-base-sentiment-latest",
73
+ },
74
+ {
75
+ description: "A model that can classify languages.",
76
+ id: "papluca/xlm-roberta-base-language-detection",
77
+ },
78
+ {
79
+ description: "A model that can classify text generation attacks.",
80
+ id: "meta-llama/Prompt-Guard-86M",
69
81
  },
70
82
  ],
71
83
  spaces: [
@@ -84,7 +96,7 @@ const taskData: TaskDataCustom = {
84
96
  ],
85
97
  summary:
86
98
  "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
87
- widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
99
+ widgetModels: ["distilbert/distilbert-base-uncased-finetuned-sst-2-english"],
88
100
  youtubeId: "leNG9fN9FQU",
89
101
  };
90
102
 
@@ -97,8 +97,8 @@ const taskData: TaskDataCustom = {
97
97
  id: "HuggingFaceH4/zephyr-chat",
98
98
  },
99
99
  {
100
- description: "An text generation application that combines OpenAI and Hugging Face models.",
101
- id: "microsoft/HuggingGPT",
100
+ description: "A leaderboard that ranks text generation models based on blind votes from people.",
101
+ id: "lmsys/chatbot-arena-leaderboard",
102
102
  },
103
103
  {
104
104
  description: "An chatbot to converse with a very powerful text generation model.",
@@ -107,7 +107,7 @@ const taskData: TaskDataCustom = {
107
107
  ],
108
108
  summary:
109
109
  "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
110
- widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
110
+ widgetModels: ["mistralai/Mistral-Nemo-Instruct-2407"],
111
111
  youtubeId: "e9gNEAlsOvU",
112
112
  };
113
113
 
@@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
93
93
  ],
94
94
  summary:
95
95
  "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
96
- widgetModels: ["CompVis/stable-diffusion-v1-4"],
96
+ widgetModels: ["black-forest-labs/FLUX.1-dev"],
97
97
  youtubeId: "",
98
98
  };
99
99
 
@@ -9,7 +9,7 @@ const taskData: TaskDataCustom = {
9
9
  },
10
10
  {
11
11
  description: "Multi-speaker English dataset.",
12
- id: "LibriTTS",
12
+ id: "mythicinfinity/libritts_r",
13
13
  },
14
14
  ],
15
15
  demo: {
@@ -36,11 +36,15 @@ const taskData: TaskDataCustom = {
36
36
  models: [
37
37
  {
38
38
  description: "A powerful TTS model.",
39
- id: "suno/bark",
39
+ id: "parler-tts/parler-tts-large-v1",
40
40
  },
41
41
  {
42
42
  description: "A massively multi-lingual TTS model.",
43
- id: "facebook/mms-tts",
43
+ id: "coqui/XTTS-v2",
44
+ },
45
+ {
46
+ description: "Robust TTS model.",
47
+ id: "metavoiceio/metavoice-1B-v0.1",
44
48
  },
45
49
  {
46
50
  description: "A prompt based, powerful TTS model.",
@@ -4,12 +4,12 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A widely used dataset useful to benchmark named entity recognition models.",
7
- id: "conll2003",
7
+ id: "eriktks/conll2003",
8
8
  },
9
9
  {
10
10
  description:
11
11
  "A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.",
12
- id: "wikiann",
12
+ id: "unimelb-nlp/wikiann",
13
13
  },
14
14
  ],
15
15
  demo: {
@@ -63,6 +63,14 @@ const taskData: TaskDataCustom = {
63
63
  "A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
64
64
  id: "dslim/bert-base-NER",
65
65
  },
66
+ {
67
+ description: "A strong model to identify people, locations, organizations and names in multiple languages.",
68
+ id: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
69
+ },
70
+ {
71
+ description: "A token classification model specialized on medical entity recognition.",
72
+ id: "blaze999/Medical-NER",
73
+ },
66
74
  {
67
75
  description: "Flair models are typically the state of the art in named entity recognition tasks.",
68
76
  id: "flair/ner-english",
@@ -77,7 +85,7 @@ const taskData: TaskDataCustom = {
77
85
  ],
78
86
  summary:
79
87
  "Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
80
- widgetModels: ["dslim/bert-base-NER"],
88
+ widgetModels: ["FacebookAI/xlm-roberta-large-finetuned-conll03-english"],
81
89
  youtubeId: "wVHdVlPScxA",
82
90
  };
83
91