@huggingface/tasks 0.13.2 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/index.d.ts +1 -0
- package/dist/commonjs/index.d.ts.map +1 -1
- package/dist/commonjs/pipelines.d.ts +7 -1
- package/dist/commonjs/pipelines.d.ts.map +1 -1
- package/dist/commonjs/pipelines.js +6 -0
- package/dist/commonjs/tasks/automatic-speech-recognition/data.d.ts.map +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/data.js +15 -3
- package/dist/commonjs/tasks/index.d.ts.map +1 -1
- package/dist/commonjs/tasks/index.js +2 -0
- package/dist/commonjs/tasks/mask-generation/data.d.ts.map +1 -1
- package/dist/commonjs/tasks/mask-generation/data.js +16 -2
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/pipelines.d.ts +7 -1
- package/dist/esm/pipelines.d.ts.map +1 -1
- package/dist/esm/pipelines.js +6 -0
- package/dist/esm/tasks/automatic-speech-recognition/data.d.ts.map +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/data.js +15 -3
- package/dist/esm/tasks/index.d.ts.map +1 -1
- package/dist/esm/tasks/index.js +2 -0
- package/dist/esm/tasks/mask-generation/data.d.ts.map +1 -1
- package/dist/esm/tasks/mask-generation/data.js +16 -2
- package/package.json +1 -1
- package/src/index.ts +2 -0
- package/src/pipelines.ts +6 -0
- package/src/tasks/automatic-speech-recognition/data.ts +15 -3
- package/src/tasks/index.ts +2 -0
- package/src/tasks/mask-generation/about.md +10 -0
- package/src/tasks/mask-generation/data.ts +16 -2
package/dist/commonjs/index.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data.js";
|
|
|
12
12
|
import * as snippets from "./snippets/index.js";
|
|
13
13
|
export * from "./gguf.js";
|
|
14
14
|
export { snippets };
|
|
15
|
+
export type { InferenceSnippet } from "./snippets/index.js";
|
|
15
16
|
export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
|
|
16
17
|
export type { HardwareSpec, SkuType } from "./hardware.js";
|
|
17
18
|
export { LOCAL_APPS } from "./local-apps.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAG5D,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -170,6 +170,12 @@ export declare const PIPELINE_DATA: {
|
|
|
170
170
|
modality: "audio";
|
|
171
171
|
color: "green";
|
|
172
172
|
};
|
|
173
|
+
"audio-text-to-text": {
|
|
174
|
+
name: string;
|
|
175
|
+
modality: "multimodal";
|
|
176
|
+
color: "red";
|
|
177
|
+
hideInDatasets: true;
|
|
178
|
+
};
|
|
173
179
|
"voice-activity-detection": {
|
|
174
180
|
name: string;
|
|
175
181
|
modality: "audio";
|
|
@@ -421,5 +427,5 @@ export type PipelineType = keyof typeof PIPELINE_DATA;
|
|
|
421
427
|
export type WidgetType = PipelineType | "conversational";
|
|
422
428
|
export declare const PIPELINE_TYPES: PipelineType[];
|
|
423
429
|
export declare const SUBTASK_TYPES: string[];
|
|
424
|
-
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
|
|
430
|
+
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "audio-text-to-text" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
|
|
425
431
|
//# sourceMappingURL=pipelines.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa
|
|
1
|
+
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4mBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,wqCAA0B,CAAC"}
|
|
@@ -309,6 +309,12 @@ exports.PIPELINE_DATA = {
|
|
|
309
309
|
modality: "audio",
|
|
310
310
|
color: "green",
|
|
311
311
|
},
|
|
312
|
+
"audio-text-to-text": {
|
|
313
|
+
name: "Audio-Text-to-Text",
|
|
314
|
+
modality: "multimodal",
|
|
315
|
+
color: "red",
|
|
316
|
+
hideInDatasets: true,
|
|
317
|
+
},
|
|
312
318
|
"voice-activity-detection": {
|
|
313
319
|
name: "Voice Activity Detection",
|
|
314
320
|
modality: "audio",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAyFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -6,12 +6,16 @@ const taskData = {
|
|
|
6
6
|
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
7
7
|
id: "mozilla-foundation/common_voice_17_0",
|
|
8
8
|
},
|
|
9
|
+
{
|
|
10
|
+
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
|
|
11
|
+
id: "amphion/Emilia-Dataset",
|
|
12
|
+
},
|
|
9
13
|
{
|
|
10
14
|
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
|
|
11
15
|
id: "parler-tts/mls_eng",
|
|
12
16
|
},
|
|
13
17
|
{
|
|
14
|
-
description: "A
|
|
18
|
+
description: "A multilingual audio dataset with 370K hours of audio.",
|
|
15
19
|
id: "espnet/yodas",
|
|
16
20
|
},
|
|
17
21
|
],
|
|
@@ -54,6 +58,10 @@ const taskData = {
|
|
|
54
58
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
55
59
|
id: "facebook/seamless-m4t-v2-large",
|
|
56
60
|
},
|
|
61
|
+
{
|
|
62
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
|
|
63
|
+
id: "nvidia/canary-1b",
|
|
64
|
+
},
|
|
57
65
|
{
|
|
58
66
|
description: "Powerful speaker diarization model.",
|
|
59
67
|
id: "pyannote/speaker-diarization-3.1",
|
|
@@ -65,13 +73,17 @@ const taskData = {
|
|
|
65
73
|
id: "hf-audio/whisper-large-v3",
|
|
66
74
|
},
|
|
67
75
|
{
|
|
68
|
-
description: "
|
|
69
|
-
id: "
|
|
76
|
+
description: "Latest ASR model from Useful Sensors.",
|
|
77
|
+
id: "mrfakename/Moonshinex",
|
|
70
78
|
},
|
|
71
79
|
{
|
|
72
80
|
description: "A high quality speech and text translation model by Meta.",
|
|
73
81
|
id: "facebook/seamless_m4t",
|
|
74
82
|
},
|
|
83
|
+
{
|
|
84
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
|
|
85
|
+
id: "nvidia/canary-1b",
|
|
86
|
+
},
|
|
75
87
|
],
|
|
76
88
|
summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
|
|
77
89
|
widgetModels: ["openai/whisper-large-v3"],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA8DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAsDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
|
|
@@ -52,6 +52,7 @@ exports.TASKS_MODEL_LIBRARIES = {
|
|
|
52
52
|
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
53
53
|
"audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
|
|
54
54
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
55
|
+
"audio-text-to-text": [],
|
|
55
56
|
"depth-estimation": ["transformers", "transformers.js"],
|
|
56
57
|
"document-question-answering": ["transformers", "transformers.js"],
|
|
57
58
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
@@ -131,6 +132,7 @@ exports.TASKS_DATA = {
|
|
|
131
132
|
"any-to-any": getData("any-to-any", data_js_16.default),
|
|
132
133
|
"audio-classification": getData("audio-classification", data_js_1.default),
|
|
133
134
|
"audio-to-audio": getData("audio-to-audio", data_js_2.default),
|
|
135
|
+
"audio-text-to-text": getData("audio-text-to-text", data_js_16.default),
|
|
134
136
|
"automatic-speech-recognition": getData("automatic-speech-recognition", data_js_3.default),
|
|
135
137
|
"depth-estimation": getData("depth-estimation", data_js_15.default),
|
|
136
138
|
"document-question-answering": getData("document-question-answering", data_js_4.default),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAgEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const taskData = {
|
|
4
|
-
datasets: [
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "Widely used benchmark dataset for multiple Vision tasks.",
|
|
7
|
+
id: "merve/coco2017",
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
|
|
11
|
+
id: "rocky93/BraTS_segmentation",
|
|
12
|
+
},
|
|
13
|
+
],
|
|
5
14
|
demo: {
|
|
6
15
|
inputs: [
|
|
7
16
|
{
|
|
@@ -16,7 +25,12 @@ const taskData = {
|
|
|
16
25
|
},
|
|
17
26
|
],
|
|
18
27
|
},
|
|
19
|
-
metrics: [
|
|
28
|
+
metrics: [
|
|
29
|
+
{
|
|
30
|
+
description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
|
|
31
|
+
id: "Intersection over Union (IoU)",
|
|
32
|
+
},
|
|
33
|
+
],
|
|
20
34
|
models: [
|
|
21
35
|
{
|
|
22
36
|
description: "Small yet powerful mask generation model.",
|
package/dist/esm/index.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data.js";
|
|
|
12
12
|
import * as snippets from "./snippets/index.js";
|
|
13
13
|
export * from "./gguf.js";
|
|
14
14
|
export { snippets };
|
|
15
|
+
export type { InferenceSnippet } from "./snippets/index.js";
|
|
15
16
|
export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
|
|
16
17
|
export type { HardwareSpec, SkuType } from "./hardware.js";
|
|
17
18
|
export { LOCAL_APPS } from "./local-apps.js";
|
package/dist/esm/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AACpE,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACvF,cAAc,kBAAkB,CAAC;AACjC,OAAO,EACN,aAAa,EACb,cAAc,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,QAAQ,EACb,UAAU,EACV,eAAe,EACf,aAAa,EACb,kBAAkB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACN,8BAA8B,EAC9B,sBAAsB,EACtB,2BAA2B,GAC3B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAC9E,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACzF,YAAY,EACX,aAAa,EACb,sBAAsB,EACtB,gCAAgC,EAChC,8BAA8B,EAC9B,kCAAkC,EAClC,uBAAuB,EACvB,sBAAsB,EACtB,oCAAoC,EACpC,gCAAgC,EAChC,2BAA2B,EAC3B,gCAAgC,EAChC,8BAA8B,EAC9B,sBAAsB,EACtB,8BAA8B,EAC9B,mBAAmB,EACnB,sBAAsB,EACtB,yBAAyB,EACzB,8BAA8B,EAC9B,uBAAuB,GACvB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,QAAQ,MAAM,qBAAqB,CAAC;AAChD,cAAc,WAAW,CAAC;AAE1B,OAAO,EAAE,QAAQ,EAAE,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAG5D,OAAO,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC7D,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAE9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
|
package/dist/esm/pipelines.d.ts
CHANGED
|
@@ -170,6 +170,12 @@ export declare const PIPELINE_DATA: {
|
|
|
170
170
|
modality: "audio";
|
|
171
171
|
color: "green";
|
|
172
172
|
};
|
|
173
|
+
"audio-text-to-text": {
|
|
174
|
+
name: string;
|
|
175
|
+
modality: "multimodal";
|
|
176
|
+
color: "red";
|
|
177
|
+
hideInDatasets: true;
|
|
178
|
+
};
|
|
173
179
|
"voice-activity-detection": {
|
|
174
180
|
name: string;
|
|
175
181
|
modality: "audio";
|
|
@@ -421,5 +427,5 @@ export type PipelineType = keyof typeof PIPELINE_DATA;
|
|
|
421
427
|
export type WidgetType = PipelineType | "conversational";
|
|
422
428
|
export declare const PIPELINE_TYPES: PipelineType[];
|
|
423
429
|
export declare const SUBTASK_TYPES: string[];
|
|
424
|
-
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
|
|
430
|
+
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "audio-text-to-text" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
|
|
425
431
|
//# sourceMappingURL=pipelines.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa
|
|
1
|
+
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4mBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,EAAiC,YAAY,EAAE,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,wqCAA0B,CAAC"}
|
package/dist/esm/pipelines.js
CHANGED
|
@@ -306,6 +306,12 @@ export const PIPELINE_DATA = {
|
|
|
306
306
|
modality: "audio",
|
|
307
307
|
color: "green",
|
|
308
308
|
},
|
|
309
|
+
"audio-text-to-text": {
|
|
310
|
+
name: "Audio-Text-to-Text",
|
|
311
|
+
modality: "multimodal",
|
|
312
|
+
color: "red",
|
|
313
|
+
hideInDatasets: true,
|
|
314
|
+
},
|
|
309
315
|
"voice-activity-detection": {
|
|
310
316
|
name: "Voice Activity Detection",
|
|
311
317
|
modality: "audio",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/automatic-speech-recognition/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAyFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -4,12 +4,16 @@ const taskData = {
|
|
|
4
4
|
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
5
5
|
id: "mozilla-foundation/common_voice_17_0",
|
|
6
6
|
},
|
|
7
|
+
{
|
|
8
|
+
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
|
|
9
|
+
id: "amphion/Emilia-Dataset",
|
|
10
|
+
},
|
|
7
11
|
{
|
|
8
12
|
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
|
|
9
13
|
id: "parler-tts/mls_eng",
|
|
10
14
|
},
|
|
11
15
|
{
|
|
12
|
-
description: "A
|
|
16
|
+
description: "A multilingual audio dataset with 370K hours of audio.",
|
|
13
17
|
id: "espnet/yodas",
|
|
14
18
|
},
|
|
15
19
|
],
|
|
@@ -52,6 +56,10 @@ const taskData = {
|
|
|
52
56
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
53
57
|
id: "facebook/seamless-m4t-v2-large",
|
|
54
58
|
},
|
|
59
|
+
{
|
|
60
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
|
|
61
|
+
id: "nvidia/canary-1b",
|
|
62
|
+
},
|
|
55
63
|
{
|
|
56
64
|
description: "Powerful speaker diarization model.",
|
|
57
65
|
id: "pyannote/speaker-diarization-3.1",
|
|
@@ -63,13 +71,17 @@ const taskData = {
|
|
|
63
71
|
id: "hf-audio/whisper-large-v3",
|
|
64
72
|
},
|
|
65
73
|
{
|
|
66
|
-
description: "
|
|
67
|
-
id: "
|
|
74
|
+
description: "Latest ASR model from Useful Sensors.",
|
|
75
|
+
id: "mrfakename/Moonshinex",
|
|
68
76
|
},
|
|
69
77
|
{
|
|
70
78
|
description: "A high quality speech and text translation model by Meta.",
|
|
71
79
|
id: "facebook/seamless_m4t",
|
|
72
80
|
},
|
|
81
|
+
{
|
|
82
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
|
|
83
|
+
id: "nvidia/canary-1b",
|
|
84
|
+
},
|
|
73
85
|
],
|
|
74
86
|
summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
|
|
75
87
|
widgetModels: ["openai/whisper-large-v3"],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA4CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA8DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAsDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
|
package/dist/esm/tasks/index.js
CHANGED
|
@@ -46,6 +46,7 @@ export const TASKS_MODEL_LIBRARIES = {
|
|
|
46
46
|
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
47
47
|
"audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
|
|
48
48
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
49
|
+
"audio-text-to-text": [],
|
|
49
50
|
"depth-estimation": ["transformers", "transformers.js"],
|
|
50
51
|
"document-question-answering": ["transformers", "transformers.js"],
|
|
51
52
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
@@ -125,6 +126,7 @@ export const TASKS_DATA = {
|
|
|
125
126
|
"any-to-any": getData("any-to-any", placeholder),
|
|
126
127
|
"audio-classification": getData("audio-classification", audioClassification),
|
|
127
128
|
"audio-to-audio": getData("audio-to-audio", audioToAudio),
|
|
129
|
+
"audio-text-to-text": getData("audio-text-to-text", placeholder),
|
|
128
130
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
129
131
|
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
130
132
|
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/mask-generation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAgEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
const taskData = {
|
|
2
|
-
datasets: [
|
|
2
|
+
datasets: [
|
|
3
|
+
{
|
|
4
|
+
description: "Widely used benchmark dataset for multiple Vision tasks.",
|
|
5
|
+
id: "merve/coco2017",
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
|
|
9
|
+
id: "rocky93/BraTS_segmentation",
|
|
10
|
+
},
|
|
11
|
+
],
|
|
3
12
|
demo: {
|
|
4
13
|
inputs: [
|
|
5
14
|
{
|
|
@@ -14,7 +23,12 @@ const taskData = {
|
|
|
14
23
|
},
|
|
15
24
|
],
|
|
16
25
|
},
|
|
17
|
-
metrics: [
|
|
26
|
+
metrics: [
|
|
27
|
+
{
|
|
28
|
+
description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
|
|
29
|
+
id: "Intersection over Union (IoU)",
|
|
30
|
+
},
|
|
31
|
+
],
|
|
18
32
|
models: [
|
|
19
33
|
{
|
|
20
34
|
description: "Small yet powerful mask generation model.",
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.13.
|
|
4
|
+
"version": "0.13.3",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/index.ts
CHANGED
|
@@ -49,6 +49,8 @@ import * as snippets from "./snippets/index.js";
|
|
|
49
49
|
export * from "./gguf.js";
|
|
50
50
|
|
|
51
51
|
export { snippets };
|
|
52
|
+
export type { InferenceSnippet } from "./snippets/index.js";
|
|
53
|
+
|
|
52
54
|
|
|
53
55
|
export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
|
|
54
56
|
export type { HardwareSpec, SkuType } from "./hardware.js";
|
package/src/pipelines.ts
CHANGED
|
@@ -355,6 +355,12 @@ export const PIPELINE_DATA = {
|
|
|
355
355
|
modality: "audio",
|
|
356
356
|
color: "green",
|
|
357
357
|
},
|
|
358
|
+
"audio-text-to-text": {
|
|
359
|
+
name: "Audio-Text-to-Text",
|
|
360
|
+
modality: "multimodal",
|
|
361
|
+
color: "red",
|
|
362
|
+
hideInDatasets: true,
|
|
363
|
+
},
|
|
358
364
|
"voice-activity-detection": {
|
|
359
365
|
name: "Voice Activity Detection",
|
|
360
366
|
modality: "audio",
|
|
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
|
|
|
6
6
|
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
7
7
|
id: "mozilla-foundation/common_voice_17_0",
|
|
8
8
|
},
|
|
9
|
+
{
|
|
10
|
+
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
|
|
11
|
+
id: "amphion/Emilia-Dataset",
|
|
12
|
+
},
|
|
9
13
|
{
|
|
10
14
|
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
|
|
11
15
|
id: "parler-tts/mls_eng",
|
|
12
16
|
},
|
|
13
17
|
{
|
|
14
|
-
description: "A
|
|
18
|
+
description: "A multilingual audio dataset with 370K hours of audio.",
|
|
15
19
|
id: "espnet/yodas",
|
|
16
20
|
},
|
|
17
21
|
],
|
|
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
|
|
|
54
58
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
55
59
|
id: "facebook/seamless-m4t-v2-large",
|
|
56
60
|
},
|
|
61
|
+
{
|
|
62
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
|
|
63
|
+
id: "nvidia/canary-1b",
|
|
64
|
+
},
|
|
57
65
|
{
|
|
58
66
|
description: "Powerful speaker diarization model.",
|
|
59
67
|
id: "pyannote/speaker-diarization-3.1",
|
|
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
|
|
|
65
73
|
id: "hf-audio/whisper-large-v3",
|
|
66
74
|
},
|
|
67
75
|
{
|
|
68
|
-
description: "
|
|
69
|
-
id: "
|
|
76
|
+
description: "Latest ASR model from Useful Sensors.",
|
|
77
|
+
id: "mrfakename/Moonshinex",
|
|
70
78
|
},
|
|
71
79
|
{
|
|
72
80
|
description: "A high quality speech and text translation model by Meta.",
|
|
73
81
|
id: "facebook/seamless_m4t",
|
|
74
82
|
},
|
|
83
|
+
{
|
|
84
|
+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
|
|
85
|
+
id: "nvidia/canary-1b",
|
|
86
|
+
},
|
|
75
87
|
],
|
|
76
88
|
summary:
|
|
77
89
|
"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
|
package/src/tasks/index.ts
CHANGED
|
@@ -116,6 +116,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
|
116
116
|
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
117
117
|
"audio-to-audio": ["asteroid", "fairseq", "speechbrain"],
|
|
118
118
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
119
|
+
"audio-text-to-text": [],
|
|
119
120
|
"depth-estimation": ["transformers", "transformers.js"],
|
|
120
121
|
"document-question-answering": ["transformers", "transformers.js"],
|
|
121
122
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
@@ -197,6 +198,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
197
198
|
"any-to-any": getData("any-to-any", placeholder),
|
|
198
199
|
"audio-classification": getData("audio-classification", audioClassification),
|
|
199
200
|
"audio-to-audio": getData("audio-to-audio", audioToAudio),
|
|
201
|
+
"audio-text-to-text": getData("audio-text-to-text", placeholder),
|
|
200
202
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
201
203
|
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
202
204
|
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
@@ -12,6 +12,16 @@ Generating masks can facilitate learning, especially in semi or unsupervised lea
|
|
|
12
12
|
|
|
13
13
|
For applications where humans are in the loop, masks highlight certain regions of images for humans to validate.
|
|
14
14
|
|
|
15
|
+
### Medical Imaging
|
|
16
|
+
|
|
17
|
+
Mask generation models are used in medical imaging to aid in segmenting and analyzing specific regions.
|
|
18
|
+
|
|
19
|
+
### Autonomous Vehicles
|
|
20
|
+
|
|
21
|
+
Mask generation models are used to create segments and masks for obstacles and other objects in view.
|
|
22
|
+
|
|
23
|
+
This page was made possible thanks to the efforts of [Raj Aryan](https://huggingface.co/thatrajaryan) and other contributors.
|
|
24
|
+
|
|
15
25
|
## Task Variants
|
|
16
26
|
|
|
17
27
|
### Segmentation
|
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import type { TaskDataCustom } from "../index.js";
|
|
2
2
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
|
-
datasets: [
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "Widely used benchmark dataset for multiple Vision tasks.",
|
|
7
|
+
id: "merve/coco2017",
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
description: "Medical Imaging dataset of the Human Brain for segmentation and mask generating tasks",
|
|
11
|
+
id: "rocky93/BraTS_segmentation",
|
|
12
|
+
},
|
|
13
|
+
],
|
|
5
14
|
demo: {
|
|
6
15
|
inputs: [
|
|
7
16
|
{
|
|
@@ -16,7 +25,12 @@ const taskData: TaskDataCustom = {
|
|
|
16
25
|
},
|
|
17
26
|
],
|
|
18
27
|
},
|
|
19
|
-
metrics: [
|
|
28
|
+
metrics: [
|
|
29
|
+
{
|
|
30
|
+
description: "IoU is used to measure the overlap between predicted mask and the ground truth mask.",
|
|
31
|
+
id: "Intersection over Union (IoU)",
|
|
32
|
+
},
|
|
33
|
+
],
|
|
20
34
|
models: [
|
|
21
35
|
{
|
|
22
36
|
description: "Small yet powerful mask generation model.",
|