@huggingface/tasks 0.13.2 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data.js";
12
12
  import * as snippets from "./snippets/index.js";
13
13
  export * from "./gguf.js";
14
14
  export { snippets };
15
+ export type { InferenceSnippet } from "./snippets/index.js";
15
16
  export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
16
17
  export type { HardwareSpec, SkuType } from "./hardware.js";
17
18
  export { LOCAL_APPS } from "./local-apps.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AAEpB,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAG5D,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
@@ -170,6 +170,12 @@ export declare const PIPELINE_DATA: {
170
170
  modality: "audio";
171
171
  color: "green";
172
172
  };
173
+ "audio-text-to-text": {
174
+ name: string;
175
+ modality: "multimodal";
176
+ color: "red";
177
+ hideInDatasets: true;
178
+ };
173
179
  "voice-activity-detection": {
174
180
  name: string;
175
181
  modality: "audio";
@@ -421,5 +427,5 @@ export type PipelineType = keyof typeof PIPELINE_DATA;
421
427
  export type WidgetType = PipelineType | "conversational";
422
428
  export declare const PIPELINE_TYPES: PipelineType[];
423
429
  export declare const SUBTASK_TYPES: string[];
424
- export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
430
+ export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "audio-text-to-text" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
425
431
  //# sourceMappingURL=pipelines.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsmBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,ipCAA0B,CAAC"}
1
+ {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4mBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,wqCAA0B,CAAC"}
@@ -309,6 +309,12 @@ exports.PIPELINE_DATA = {
309
309
  modality: "audio",
310
310
  color: "green",
311
311
  },
312
+ "audio-text-to-text": {
313
+ name: "Audio-Text-to-Text",
314
+ modality: "multimodal",
315
+ color: "red",
316
+ hideInDatasets: true,
317
+ },
312
318
  "voice-activity-detection": {
313
319
  name: "Voice Activity Detection",
314
320
  modality: "audio",
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cA6Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAyFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -6,12 +6,16 @@ const taskData = {
6
6
  description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
7
7
  id: "mozilla-foundation/common_voice_17_0",
8
8
  },
9
+ {
10
+ description: "Multilingual and diverse audio dataset with 101k hours of audio.",
11
+ id: "amphion/Emilia-Dataset",
12
+ },
9
13
  {
10
14
  description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
11
15
  id: "parler-tts/mls_eng",
12
16
  },
13
17
  {
14
- description: "A multi-lingual audio dataset with 370K hours of audio.",
18
+ description: "A multilingual audio dataset with 370K hours of audio.",
15
19
  id: "espnet/yodas",
16
20
  },
17
21
  ],
@@ -54,6 +58,10 @@ const taskData = {
54
58
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
59
  id: "facebook/seamless-m4t-v2-large",
56
60
  },
61
+ {
62
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
63
+ id: "nvidia/canary-1b",
64
+ },
57
65
  {
58
66
  description: "Powerful speaker diarization model.",
59
67
  id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData = {
65
73
  id: "hf-audio/whisper-large-v3",
66
74
  },
67
75
  {
68
- description: "Fastest speech recognition application.",
69
- id: "sanchit-gandhi/whisper-jax",
76
+ description: "Latest ASR model from Useful Sensors.",
77
+ id: "mrfakename/Moonshinex",
70
78
  },
71
79
  {
72
80
  description: "A high quality speech and text translation model by Meta.",
73
81
  id: "facebook/seamless_m4t",
74
82
  },
83
+ {
84
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
85
+ id: "nvidia/canary-1b",
86
+ },
75
87
  ],
76
88
  summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
77
89
  widgetModels: ["openai/whisper-large-v3"],
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA6DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAqDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA8DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAsDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
@@ -52,6 +52,7 @@ exports.TASKS_MODEL_LIBRARIES = {
52
52
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
53
53
  "audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
54
54
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
55
+ "audio-text-to-text": [],
55
56
  "depth-estimation": ["transformers", "transformers.js"],
56
57
  "document-question-answering": ["transformers", "transformers.js"],
57
58
  "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
@@ -131,6 +132,7 @@ exports.TASKS_DATA = {
131
132
  "any-to-any": getData("any-to-any", data_js_16.default),
132
133
  "audio-classification": getData("audio-classification", data_js_1.default),
133
134
  "audio-to-audio": getData("audio-to-audio", data_js_2.default),
135
+ "audio-text-to-text": getData("audio-text-to-text", data_js_16.default),
134
136
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_js_3.default),
135
137
  "depth-estimation": getData("depth-estimation", data_js_15.default),
136
138
  "document-question-answering": getData("document-question-answering", data_js_4.default),
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAkDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAgEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1,7 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const taskData = {
4
- datasets: [],
4
+ datasets: [
5
+ {
6
+ description: "Widely used benchmark dataset for multiple Vision tasks.",
7
+ id: "merve/coco2017",
8
+ },
9
+ {
10
+ description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
11
+ id: "rocky93/BraTS_segmentation",
12
+ },
13
+ ],
5
14
  demo: {
6
15
  inputs: [
7
16
  {
@@ -16,7 +25,12 @@ const taskData = {
16
25
  },
17
26
  ],
18
27
  },
19
- metrics: [],
28
+ metrics: [
29
+ {
30
+ description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
31
+ id: "Intersection over Union (IoU)",
32
+ },
33
+ ],
20
34
  models: [
21
35
  {
22
36
  description: "Small yet powerful mask generation model.",
@@ -12,6 +12,7 @@ export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data.js";
12
12
  import * as snippets from "./snippets/index.js";
13
13
  export * from "./gguf.js";
14
14
  export { snippets };
15
+ export type { InferenceSnippet } from "./snippets/index.js";
15
16
  export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
16
17
  export type { HardwareSpec, SkuType } from "./hardware.js";
17
18
  export { LOCAL_APPS } from "./local-apps.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AAEpB,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAG5D,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
@@ -170,6 +170,12 @@ export declare const PIPELINE_DATA: {
170
170
  modality: "audio";
171
171
  color: "green";
172
172
  };
173
+ "audio-text-to-text": {
174
+ name: string;
175
+ modality: "multimodal";
176
+ color: "red";
177
+ hideInDatasets: true;
178
+ };
173
179
  "voice-activity-detection": {
174
180
  name: string;
175
181
  modality: "audio";
@@ -421,5 +427,5 @@ export type PipelineType = keyof typeof PIPELINE_DATA;
421
427
  export type WidgetType = PipelineType | "conversational";
422
428
  export declare const PIPELINE_TYPES: PipelineType[];
423
429
  export declare const SUBTASK_TYPES: string[];
424
- export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
430
+ export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "audio-text-to-text" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
425
431
  //# sourceMappingURL=pipelines.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsmBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,ipCAA0B,CAAC"}
1
+ {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4mBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,wqCAA0B,CAAC"}
@@ -306,6 +306,12 @@ export const PIPELINE_DATA = {
306
306
  modality: "audio",
307
307
  color: "green",
308
308
  },
309
+ "audio-text-to-text": {
310
+ name: "Audio-Text-to-Text",
311
+ modality: "multimodal",
312
+ color: "red",
313
+ hideInDatasets: true,
314
+ },
309
315
  "voice-activity-detection": {
310
316
  name: "Voice Activity Detection",
311
317
  modality: "audio",
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cA6Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAyFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -4,12 +4,16 @@ const taskData = {
4
4
  description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
5
5
  id: "mozilla-foundation/common_voice_17_0",
6
6
  },
7
+ {
8
+ description: "Multilingual and diverse audio dataset with 101k hours of audio.",
9
+ id: "amphion/Emilia-Dataset",
10
+ },
7
11
  {
8
12
  description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
9
13
  id: "parler-tts/mls_eng",
10
14
  },
11
15
  {
12
- description: "A multi-lingual audio dataset with 370K hours of audio.",
16
+ description: "A multilingual audio dataset with 370K hours of audio.",
13
17
  id: "espnet/yodas",
14
18
  },
15
19
  ],
@@ -52,6 +56,10 @@ const taskData = {
52
56
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
53
57
  id: "facebook/seamless-m4t-v2-large",
54
58
  },
59
+ {
60
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
61
+ id: "nvidia/canary-1b",
62
+ },
55
63
  {
56
64
  description: "Powerful speaker diarization model.",
57
65
  id: "pyannote/speaker-diarization-3.1",
@@ -63,13 +71,17 @@ const taskData = {
63
71
  id: "hf-audio/whisper-large-v3",
64
72
  },
65
73
  {
66
- description: "Fastest speech recognition application.",
67
- id: "sanchit-gandhi/whisper-jax",
74
+ description: "Latest ASR model from Useful Sensors.",
75
+ id: "mrfakename/Moonshinex",
68
76
  },
69
77
  {
70
78
  description: "A high quality speech and text translation model by Meta.",
71
79
  id: "facebook/seamless_m4t",
72
80
  },
81
+ {
82
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
83
+ id: "nvidia/canary-1b",
84
+ },
73
85
  ],
74
86
  summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
75
87
  widgetModels: ["openai/whisper-large-v3"],
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA6DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAqDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA8DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAsDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
@@ -46,6 +46,7 @@ export const TASKS_MODEL_LIBRARIES = {
46
46
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
47
47
  "audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
48
48
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
49
+ "audio-text-to-text": [],
49
50
  "depth-estimation": ["transformers", "transformers.js"],
50
51
  "document-question-answering": ["transformers", "transformers.js"],
51
52
  "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
@@ -125,6 +126,7 @@ export const TASKS_DATA = {
125
126
  "any-to-any": getData("any-to-any", placeholder),
126
127
  "audio-classification": getData("audio-classification", audioClassification),
127
128
  "audio-to-audio": getData("audio-to-audio", audioToAudio),
129
+ "audio-text-to-text": getData("audio-text-to-text", placeholder),
128
130
  "automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
129
131
  "depth-estimation": getData("depth-estimation", depthEstimation),
130
132
  "document-question-answering": getData("document-question-answering", documentQuestionAnswering),
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAkDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAgEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1,5 +1,14 @@
1
1
  const taskData = {
2
- datasets: [],
2
+ datasets: [
3
+ {
4
+ description: "Widely used benchmark dataset for multiple Vision tasks.",
5
+ id: "merve/coco2017",
6
+ },
7
+ {
8
+ description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
9
+ id: "rocky93/BraTS_segmentation",
10
+ },
11
+ ],
3
12
  demo: {
4
13
  inputs: [
5
14
  {
@@ -14,7 +23,12 @@ const taskData = {
14
23
  },
15
24
  ],
16
25
  },
17
- metrics: [],
26
+ metrics: [
27
+ {
28
+ description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
29
+ id: "Intersection over Union (IoU)",
30
+ },
31
+ ],
18
32
  models: [
19
33
  {
20
34
  description: "Small yet powerful mask generation model.",
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.13.2",
4
+ "version": "0.13.3",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
package/src/index.ts CHANGED
@@ -49,6 +49,8 @@ import * as snippets from "./snippets/index.js";
49
49
  export * from "./gguf.js";
50
50
 
51
51
  export { snippets };
52
+ export type { InferenceSnippet } from "./snippets/index.js";
53
+
52
54
 
53
55
  export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
54
56
  export type { HardwareSpec, SkuType } from "./hardware.js";
package/src/pipelines.ts CHANGED
@@ -355,6 +355,12 @@ export const PIPELINE_DATA = {
355
355
  modality: "audio",
356
356
  color: "green",
357
357
  },
358
+ "audio-text-to-text": {
359
+ name: "Audio-Text-to-Text",
360
+ modality: "multimodal",
361
+ color: "red",
362
+ hideInDatasets: true,
363
+ },
358
364
  "voice-activity-detection": {
359
365
  name: "Voice Activity Detection",
360
366
  modality: "audio",
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
6
6
  description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
7
7
  id: "mozilla-foundation/common_voice_17_0",
8
8
  },
9
+ {
10
+ description: "Multilingual and diverse audio dataset with 101k hours of audio.",
11
+ id: "amphion/Emilia-Dataset",
12
+ },
9
13
  {
10
14
  description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
11
15
  id: "parler-tts/mls_eng",
12
16
  },
13
17
  {
14
- description: "A multi-lingual audio dataset with 370K hours of audio.",
18
+ description: "A multilingual audio dataset with 370K hours of audio.",
15
19
  id: "espnet/yodas",
16
20
  },
17
21
  ],
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
54
58
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
59
  id: "facebook/seamless-m4t-v2-large",
56
60
  },
61
+ {
62
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
63
+ id: "nvidia/canary-1b",
64
+ },
57
65
  {
58
66
  description: "Powerful speaker diarization model.",
59
67
  id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
65
73
  id: "hf-audio/whisper-large-v3",
66
74
  },
67
75
  {
68
- description: "Fastest speech recognition application.",
69
- id: "sanchit-gandhi/whisper-jax",
76
+ description: "Latest ASR model from Useful Sensors.",
77
+ id: "mrfakename/Moonshinex",
70
78
  },
71
79
  {
72
80
  description: "A high quality speech and text translation model by Meta.",
73
81
  id: "facebook/seamless_m4t",
74
82
  },
83
+ {
84
+ description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
85
+ id: "nvidia/canary-1b",
86
+ },
75
87
  ],
76
88
  summary:
77
89
  "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
@@ -116,6 +116,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
116
116
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
117
117
  "audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
118
118
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
119
+ "audio-text-to-text": [],
119
120
  "depth-estimation": ["transformers", "transformers.js"],
120
121
  "document-question-answering": ["transformers", "transformers.js"],
121
122
  "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
@@ -197,6 +198,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
197
198
  "any-to-any": getData("any-to-any", placeholder),
198
199
  "audio-classification": getData("audio-classification", audioClassification),
199
200
  "audio-to-audio": getData("audio-to-audio", audioToAudio),
201
+ "audio-text-to-text": getData("audio-text-to-text", placeholder),
200
202
  "automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
201
203
  "depth-estimation": getData("depth-estimation", depthEstimation),
202
204
  "document-question-answering": getData("document-question-answering", documentQuestionAnswering),
@@ -12,6 +12,16 @@ Generating masks can facilitate learning, especially in semi or unsupervised lea
12
12
 
13
13
  For applications where humans are in the loop, masks highlight certain regions of images for humans to validate.
14
14
 
15
+ ### Medical Imaging
16
+
17
+ Mask generation models are used in medical imaging to aid in segmenting and analyzing specific regions.
18
+
19
+ ### Autonomous Vehicles
20
+
21
+ Mask generation models are used to create segments and masks for obstacles and other objects in view.
22
+
23
+ This page was made possible thanks to the efforts of [Raj Aryan](https://huggingface.co/thatrajaryan) and other contributors.
24
+
15
25
  ## Task Variants
16
26
 
17
27
  ### Segmentation
@@ -1,7 +1,16 @@
1
1
  import type { TaskDataCustom } from "../index.js";
2
2
 
3
3
  const taskData: TaskDataCustom = {
4
- datasets: [],
4
+ datasets: [
5
+ {
6
+ description: "Widely used benchmark dataset for multiple Vision tasks.",
7
+ id: "merve/coco2017",
8
+ },
9
+ {
10
+ description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
11
+ id: "rocky93/BraTS_segmentation",
12
+ },
13
+ ],
5
14
  demo: {
6
15
  inputs: [
7
16
  {
@@ -16,7 +25,12 @@ const taskData: TaskDataCustom = {
16
25
  },
17
26
  ],
18
27
  },
19
- metrics: [],
28
+ metrics: [
29
+ {
30
+ description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
31
+ id: "Intersection over Union (IoU)",
32
+ },
33
+ ],
20
34
  models: [
21
35
  {
22
36
  description: "Small yet powerful mask generation model.",