@huggingface/tasks 0.18.10 → 0.18.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/hardware.js +1 -1
- package/dist/commonjs/model-libraries-snippets.d.ts +2 -0
- package/dist/commonjs/model-libraries-snippets.d.ts.map +1 -1
- package/dist/commonjs/model-libraries-snippets.js +47 -4
- package/dist/commonjs/model-libraries.d.ts +16 -2
- package/dist/commonjs/model-libraries.d.ts.map +1 -1
- package/dist/commonjs/model-libraries.js +15 -1
- package/dist/commonjs/tasks/any-to-any/data.d.ts.map +1 -1
- package/dist/commonjs/tasks/any-to-any/data.js +1 -1
- package/dist/commonjs/tasks/index.d.ts.map +1 -1
- package/dist/commonjs/tasks/index.js +18 -17
- package/dist/commonjs/tasks/visual-document-retrieval/data.d.ts +4 -0
- package/dist/commonjs/tasks/visual-document-retrieval/data.d.ts.map +1 -0
- package/dist/commonjs/tasks/visual-document-retrieval/data.js +69 -0
- package/dist/esm/hardware.js +1 -1
- package/dist/esm/model-libraries-snippets.d.ts +2 -0
- package/dist/esm/model-libraries-snippets.d.ts.map +1 -1
- package/dist/esm/model-libraries-snippets.js +43 -2
- package/dist/esm/model-libraries.d.ts +16 -2
- package/dist/esm/model-libraries.d.ts.map +1 -1
- package/dist/esm/model-libraries.js +15 -1
- package/dist/esm/tasks/any-to-any/data.d.ts.map +1 -1
- package/dist/esm/tasks/any-to-any/data.js +1 -1
- package/dist/esm/tasks/index.d.ts.map +1 -1
- package/dist/esm/tasks/index.js +2 -1
- package/dist/esm/tasks/visual-document-retrieval/data.d.ts +4 -0
- package/dist/esm/tasks/visual-document-retrieval/data.d.ts.map +1 -0
- package/dist/esm/tasks/visual-document-retrieval/data.js +67 -0
- package/package.json +1 -1
- package/src/hardware.ts +1 -1
- package/src/model-libraries-snippets.ts +45 -3
- package/src/model-libraries.ts +15 -1
- package/src/tasks/any-to-any/data.ts +4 -5
- package/src/tasks/index.ts +2 -1
- package/src/tasks/visual-document-retrieval/about.md +54 -0
- package/src/tasks/visual-document-retrieval/data.ts +71 -0
|
@@ -134,7 +134,7 @@ exports.SKUS = {
|
|
|
134
134
|
memory: [16],
|
|
135
135
|
},
|
|
136
136
|
"RTX 5060 Ti": {
|
|
137
|
-
tflops: 23.
|
|
137
|
+
tflops: 23.7, // source https://www.techpowerup.com/gpu-specs/geforce-rtx-5060-ti.c4246
|
|
138
138
|
memory: [16, 8],
|
|
139
139
|
},
|
|
140
140
|
"RTX 5060": {
|
|
@@ -23,6 +23,7 @@ export declare const espnet: (model: ModelData) => string[];
|
|
|
23
23
|
export declare const fairseq: (model: ModelData) => string[];
|
|
24
24
|
export declare const flair: (model: ModelData) => string[];
|
|
25
25
|
export declare const gliner: (model: ModelData) => string[];
|
|
26
|
+
export declare const indextts: (model: ModelData) => string[];
|
|
26
27
|
export declare const htrflow: (model: ModelData) => string[];
|
|
27
28
|
export declare const keras: (model: ModelData) => string[];
|
|
28
29
|
export declare const keras_hub: (model: ModelData) => string[];
|
|
@@ -73,6 +74,7 @@ export declare const mlx: (model: ModelData) => string[];
|
|
|
73
74
|
export declare const mlxim: (model: ModelData) => string[];
|
|
74
75
|
export declare const model2vec: (model: ModelData) => string[];
|
|
75
76
|
export declare const nemo: (model: ModelData) => string[];
|
|
77
|
+
export declare const outetts: (model: ModelData) => string[];
|
|
76
78
|
export declare const pxia: (model: ModelData) => string[];
|
|
77
79
|
export declare const pythae: (model: ModelData) => string[];
|
|
78
80
|
export declare const anemoi: (model: ModelData) => string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAkBjD,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAIhD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAaF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAY7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,cAAc,QAAO,MAAM,EAcvC,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAwBlD,CAAC;AAEF,eAAO,MAAM,eAAe,QAAO,MAAM,EAoBxC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAS5C,CAAC;AAuCF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAwCrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAmBrD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AA4EF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA+BrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EA0BzD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAkBjD,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAIhD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAaF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAY7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,cAAc,QAAO,MAAM,EAcvC,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAwBlD,CAAC;AAEF,eAAO,MAAM,eAAe,QAAO,MAAM,EAoBxC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAS5C,CAAC;AAuCF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAwCrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAmBrD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAgBjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AA4EF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA+BrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EA0BzD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAgB3D,CAAC;AAEF,eAAO,MAAM,uBAAuB,UAAW,SAAS,KAAG,MAAM,EAehE,CAAC;AAiBF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAyBF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAOtD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAEF,eAAO,MAAM,OAAO,QAA6B,MAAM,EAQtD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAanC,CAAC;AAsCF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAmC3D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EA2B7C,CAAC;AAEF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAEtD,CAAC;AASF,eAAO,MAAM,oBAAoB,UAAW,SAAS,KAAG,MAAM,EAuC7D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAU9C,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAIpD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAK/C,CAAC;AAkBF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAkBpD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA4CrD,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAcvD,CAAC;AAiBF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAiB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAMzD,CAAC;AAgBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAEjD,CAAC;AAEF,eAAO,MAAM,MAAM,QAA6B,MAAM,EAMrD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAkB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,OAAO,QAAO,MAAM,EAYhC,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAiBpD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAYjD,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAKpD,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAK5C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAQ7C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAqB9C,CAAC;AAEJ,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AA4BF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAUnD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAYnC,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
exports.hezar = exports.threedtopia_xl = exports.whisperkit = exports.audiocraft = exports.anemoi = exports.pythae = exports.pxia = exports.nemo = exports.model2vec = exports.mlxim = exports.mlx = exports.swarmformer = exports.birefnet = exports.ultralytics = exports.chattts = exports.voicecraft = exports.vfimamba = exports.sana = exports.sentis = exports.mlAgents = exports.stableBaselines3 = exports.fasttext = exports.peft = exports.transformersJS = exports.transformers = exports.terratorch = exports.speechbrain = exports.stanza = exports.span_marker = exports.spacy = exports.setfit = void 0;
|
|
3
|
+
exports.sampleFactory = exports.sam2 = exports.fastai = exports.stable_audio_tools = exports.sklearn = exports.seed_story = exports.saelens = exports.timm = exports.tensorflowtts = exports.relik = exports.pyannote_audio = exports.pyannote_audio_pipeline = exports.perception_encoder = exports.paddlenlp = exports.open_clip = exports.mesh_anything = exports.matanyone = exports.mars5_tts = exports.mamba_ssm = exports.tf_keras = exports.llama_cpp_python = exports.lightning_ir = exports.keras_hub = exports.keras = exports.htrflow = exports.indextts = exports.gliner = exports.flair = exports.fairseq = exports.espnet = exports.espnetASR = exports.espnetTTS = exports.edsnlp = exports.cartesia_mlx = exports.cartesia_pytorch = exports.diffusionkit = exports.diffusers = exports.dia = exports.derm_foundation = exports.depth_pro = exports.depth_anything_v2 = exports.cxr_foundation = exports.bm25s = exports.bertopic = exports.ben2 = exports.audioseal = exports.asteroid = exports.araclip = exports.allennlp = exports.adapters = void 0;
|
|
4
|
+
exports.hezar = exports.threedtopia_xl = exports.whisperkit = exports.audiocraft = exports.anemoi = exports.pythae = exports.pxia = exports.outetts = exports.nemo = exports.model2vec = exports.mlxim = exports.mlx = exports.swarmformer = exports.birefnet = exports.ultralytics = exports.chattts = exports.voicecraft = exports.vfimamba = exports.sana = exports.sentis = exports.mlAgents = exports.stableBaselines3 = exports.fasttext = exports.peft = exports.transformersJS = exports.transformers = exports.terratorch = exports.speechbrain = exports.stanza = exports.span_marker = exports.spacy = exports.setfit = exports.sentenceTransformers = void 0;
|
|
5
5
|
const library_to_tasks_js_1 = require("./library-to-tasks.js");
|
|
6
6
|
const inputs_js_1 = require("./snippets/inputs.js");
|
|
7
7
|
const common_js_1 = require("./snippets/common.js");
|
|
@@ -417,6 +417,24 @@ const gliner = (model) => [
|
|
|
417
417
|
model = GLiNER.from_pretrained("${model.id}")`,
|
|
418
418
|
];
|
|
419
419
|
exports.gliner = gliner;
|
|
420
|
+
const indextts = (model) => [
|
|
421
|
+
`# Download model
|
|
422
|
+
from huggingface_hub import snapshot_download
|
|
423
|
+
|
|
424
|
+
snapshot_download(${model.id}, local_dir="checkpoints")
|
|
425
|
+
|
|
426
|
+
from indextts.infer import IndexTTS
|
|
427
|
+
|
|
428
|
+
# Ensure config.yaml is present in the checkpoints directory
|
|
429
|
+
tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
|
|
430
|
+
|
|
431
|
+
voice = "path/to/your/reference_voice.wav" # Path to the voice reference audio file
|
|
432
|
+
text = "Hello, how are you?"
|
|
433
|
+
output_path = "output_index.wav"
|
|
434
|
+
|
|
435
|
+
tts.infer(voice, text, output_path)`,
|
|
436
|
+
];
|
|
437
|
+
exports.indextts = indextts;
|
|
420
438
|
const htrflow = (model) => [
|
|
421
439
|
`# CLI usage
|
|
422
440
|
# see docs: https://ai-riksarkivet.github.io/htrflow/latest/getting_started/quick_start.html
|
|
@@ -674,10 +692,12 @@ model = pe.CLIP.from_config("${model.id}", pretrained=True)`;
|
|
|
674
692
|
import core.vision_encoder.pe as pe
|
|
675
693
|
|
|
676
694
|
model = pe.VisionTransformer.from_config("${model.id}", pretrained=True)`;
|
|
677
|
-
if (model.id.includes("Core"))
|
|
695
|
+
if (model.id.includes("Core")) {
|
|
678
696
|
return [clip_model, vision_encoder];
|
|
679
|
-
|
|
697
|
+
}
|
|
698
|
+
else {
|
|
680
699
|
return [vision_encoder];
|
|
700
|
+
}
|
|
681
701
|
};
|
|
682
702
|
exports.perception_encoder = perception_encoder;
|
|
683
703
|
const pyannote_audio_pipeline = (model) => [
|
|
@@ -1267,6 +1287,29 @@ const nemo = (model) => {
|
|
|
1267
1287
|
return command ?? [`# tag did not correspond to a valid NeMo domain.`];
|
|
1268
1288
|
};
|
|
1269
1289
|
exports.nemo = nemo;
|
|
1290
|
+
const outetts = (model) => {
|
|
1291
|
+
// Don’t show this block on GGUF / ONNX mirrors
|
|
1292
|
+
const t = model.tags ?? [];
|
|
1293
|
+
if (t.includes("gguf") || t.includes("onnx"))
|
|
1294
|
+
return [];
|
|
1295
|
+
// v1.0 HF → minimal runnable snippet
|
|
1296
|
+
return [`
|
|
1297
|
+
import outetts
|
|
1298
|
+
|
|
1299
|
+
enum = outetts.Models("${model.id}".split("/", 1)[1]) # VERSION_1_0_SIZE_1B
|
|
1300
|
+
cfg = outetts.ModelConfig.auto_config(enum, outetts.Backend.HF)
|
|
1301
|
+
tts = outetts.Interface(cfg)
|
|
1302
|
+
|
|
1303
|
+
speaker = tts.load_default_speaker("EN-FEMALE-1-NEUTRAL")
|
|
1304
|
+
tts.generate(
|
|
1305
|
+
outetts.GenerationConfig(
|
|
1306
|
+
text="Hello there, how are you doing?",
|
|
1307
|
+
speaker=speaker,
|
|
1308
|
+
)
|
|
1309
|
+
).save("output.wav")
|
|
1310
|
+
`];
|
|
1311
|
+
};
|
|
1312
|
+
exports.outetts = outetts;
|
|
1270
1313
|
const pxia = (model) => [
|
|
1271
1314
|
`from pxia import AutoModel
|
|
1272
1315
|
|
|
@@ -226,7 +226,7 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
226
226
|
filter: false;
|
|
227
227
|
countDownloads: string;
|
|
228
228
|
};
|
|
229
|
-
dia: {
|
|
229
|
+
"dia-tts": {
|
|
230
230
|
prettyLabel: string;
|
|
231
231
|
repoName: string;
|
|
232
232
|
repoUrl: string;
|
|
@@ -421,6 +421,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
421
421
|
repoUrl: string;
|
|
422
422
|
countDownloads: string;
|
|
423
423
|
};
|
|
424
|
+
"index-tts": {
|
|
425
|
+
prettyLabel: string;
|
|
426
|
+
repoName: string;
|
|
427
|
+
repoUrl: string;
|
|
428
|
+
snippets: (model: ModelData) => string[];
|
|
429
|
+
filter: false;
|
|
430
|
+
};
|
|
424
431
|
"infinite-you": {
|
|
425
432
|
prettyLabel: string;
|
|
426
433
|
repoName: string;
|
|
@@ -614,6 +621,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
614
621
|
filter: false;
|
|
615
622
|
countDownloads: string;
|
|
616
623
|
};
|
|
624
|
+
outetts: {
|
|
625
|
+
prettyLabel: string;
|
|
626
|
+
repoName: string;
|
|
627
|
+
repoUrl: string;
|
|
628
|
+
snippets: (model: ModelData) => string[];
|
|
629
|
+
filter: false;
|
|
630
|
+
};
|
|
617
631
|
paddlenlp: {
|
|
618
632
|
prettyLabel: string;
|
|
619
633
|
repoName: string;
|
|
@@ -996,5 +1010,5 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
996
1010
|
};
|
|
997
1011
|
export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
|
|
998
1012
|
export declare const ALL_MODEL_LIBRARY_KEYS: ModelLibraryKey[];
|
|
999
|
-
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "anemoi" | "araclip" | "asteroid" | "audiocraft" | "audioseal" | "ben2" | "bertopic" | "big_vision" | "birder" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "comet" | "cosmos" | "cxr-foundation" | "deepforest" | "depth-anything-v2" | "depth-pro" | "derm-foundation" | "dia" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "clipscope" | "cosyvoice" | "cotracker" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "geometry-crafter" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hezar" | "htrflow" | "hunyuan-dit" | "hunyuan3d-2" | "imstoucan" | "infinite-you" | "keras" | "tf-keras" | "keras-hub" | "k2" | "lightning-ir" | "liveportrait" | "llama-cpp-python" | "mini-omni2" | "mindspore" | "mamba-ssm" | "mars5-tts" | "matanyone" | "mesh-anything" | "merlin" | "medvae" | "mitie" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "model2vec" | "moshi" | "nemo" | "open-oasis" | "open_clip" | "open-sora" | "paddlenlp" | "peft" | "perception-encoder" | "pxia" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "reverb" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "swarmformer" | "f5-tts" | "genmo" | "tensorflowtts" | "tabpfn" | "terratorch" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "trellis" | "ultralytics" | "uni-3dar" | "unity-sentis" | "sana" | "vfi-mamba" | "voicecraft" | "wham" | "whisperkit" | "yolov10" | "3dtopia-xl")[];
|
|
1013
|
+
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "anemoi" | "araclip" | "asteroid" | "audiocraft" | "audioseal" | "ben2" | "bertopic" | "big_vision" | "birder" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "comet" | "cosmos" | "cxr-foundation" | "deepforest" | "depth-anything-v2" | "depth-pro" | "derm-foundation" | "dia-tts" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "clipscope" | "cosyvoice" | "cotracker" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "geometry-crafter" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hezar" | "htrflow" | "hunyuan-dit" | "hunyuan3d-2" | "imstoucan" | "index-tts" | "infinite-you" | "keras" | "tf-keras" | "keras-hub" | "k2" | "lightning-ir" | "liveportrait" | "llama-cpp-python" | "mini-omni2" | "mindspore" | "mamba-ssm" | "mars5-tts" | "matanyone" | "mesh-anything" | "merlin" | "medvae" | "mitie" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "model2vec" | "moshi" | "nemo" | "open-oasis" | "open_clip" | "open-sora" | "outetts" | "paddlenlp" | "peft" | "perception-encoder" | "pxia" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "reverb" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "swarmformer" | "f5-tts" | "genmo" | "tensorflowtts" | "tabpfn" | "terratorch" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "trellis" | "ultralytics" | "uni-3dar" | "unity-sentis" | "sana" | "vfi-mamba" | "voicecraft" | "wham" | "whisperkit" | "yolov10" | "3dtopia-xl")[];
|
|
1000
1014
|
//# sourceMappingURL=model-libraries.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B
|
|
1
|
+
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eg8BI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,EAA+C,eAAe,EAAE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,qwDAQ1B,CAAC"}
|
|
@@ -212,7 +212,7 @@ exports.MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
212
212
|
filter: false,
|
|
213
213
|
countDownloads: `path:"scin_dataset_precomputed_embeddings.npz" OR path:"saved_model.pb"`,
|
|
214
214
|
},
|
|
215
|
-
dia: {
|
|
215
|
+
"dia-tts": {
|
|
216
216
|
prettyLabel: "Dia",
|
|
217
217
|
repoName: "Dia",
|
|
218
218
|
repoUrl: "https://github.com/nari-labs/dia",
|
|
@@ -408,6 +408,13 @@ exports.MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
408
408
|
repoUrl: "https://github.com/DigitalPhonetics/IMS-Toucan",
|
|
409
409
|
countDownloads: `path:"embedding_gan.pt" OR path:"Vocoder.pt" OR path:"ToucanTTS.pt"`,
|
|
410
410
|
},
|
|
411
|
+
"index-tts": {
|
|
412
|
+
prettyLabel: "IndexTTS",
|
|
413
|
+
repoName: "IndexTTS",
|
|
414
|
+
repoUrl: "https://github.com/index-tts/index-tts",
|
|
415
|
+
snippets: snippets.indextts,
|
|
416
|
+
filter: false,
|
|
417
|
+
},
|
|
411
418
|
"infinite-you": {
|
|
412
419
|
prettyLabel: "InfiniteYou",
|
|
413
420
|
repoName: "InfiniteYou",
|
|
@@ -605,6 +612,13 @@ exports.MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
605
612
|
filter: false,
|
|
606
613
|
countDownloads: `path:"Open_Sora_v2.safetensors"`,
|
|
607
614
|
},
|
|
615
|
+
outetts: {
|
|
616
|
+
prettyLabel: "OuteTTS",
|
|
617
|
+
repoName: "OuteTTS",
|
|
618
|
+
repoUrl: "https://github.com/edwko/OuteTTS",
|
|
619
|
+
snippets: snippets.outetts,
|
|
620
|
+
filter: false,
|
|
621
|
+
},
|
|
608
622
|
paddlenlp: {
|
|
609
623
|
prettyLabel: "paddlenlp",
|
|
610
624
|
repoName: "PaddleNLP",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/any-to-any/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/any-to-any/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cA4Df,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA+CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CAgEzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAwDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
|
|
@@ -39,14 +39,15 @@ const data_js_31 = __importDefault(require("./text-ranking/data.js"));
|
|
|
39
39
|
const data_js_32 = __importDefault(require("./text-to-video/data.js"));
|
|
40
40
|
const data_js_33 = __importDefault(require("./unconditional-image-generation/data.js"));
|
|
41
41
|
const data_js_34 = __importDefault(require("./video-classification/data.js"));
|
|
42
|
-
const data_js_35 = __importDefault(require("./visual-
|
|
43
|
-
const data_js_36 = __importDefault(require("./
|
|
44
|
-
const data_js_37 = __importDefault(require("./zero-shot-
|
|
45
|
-
const data_js_38 = __importDefault(require("./zero-shot-
|
|
46
|
-
const data_js_39 = __importDefault(require("./
|
|
47
|
-
const data_js_40 = __importDefault(require("./
|
|
48
|
-
const data_js_41 = __importDefault(require("./
|
|
49
|
-
const data_js_42 = __importDefault(require("./
|
|
42
|
+
const data_js_35 = __importDefault(require("./visual-document-retrieval/data.js"));
|
|
43
|
+
const data_js_36 = __importDefault(require("./visual-question-answering/data.js"));
|
|
44
|
+
const data_js_37 = __importDefault(require("./zero-shot-classification/data.js"));
|
|
45
|
+
const data_js_38 = __importDefault(require("./zero-shot-image-classification/data.js"));
|
|
46
|
+
const data_js_39 = __importDefault(require("./zero-shot-object-detection/data.js"));
|
|
47
|
+
const data_js_40 = __importDefault(require("./image-to-3d/data.js"));
|
|
48
|
+
const data_js_41 = __importDefault(require("./text-to-3d/data.js"));
|
|
49
|
+
const data_js_42 = __importDefault(require("./keypoint-detection/data.js"));
|
|
50
|
+
const data_js_43 = __importDefault(require("./video-text-to-text/data.js"));
|
|
50
51
|
/**
|
|
51
52
|
* Model libraries compatible with each ML task
|
|
52
53
|
*/
|
|
@@ -140,7 +141,7 @@ exports.TASKS_DATA = {
|
|
|
140
141
|
"automatic-speech-recognition": getData("automatic-speech-recognition", data_js_4.default),
|
|
141
142
|
"depth-estimation": getData("depth-estimation", data_js_16.default),
|
|
142
143
|
"document-question-answering": getData("document-question-answering", data_js_5.default),
|
|
143
|
-
"visual-document-retrieval": getData("visual-document-retrieval",
|
|
144
|
+
"visual-document-retrieval": getData("visual-document-retrieval", data_js_35.default),
|
|
144
145
|
"feature-extraction": getData("feature-extraction", data_js_6.default),
|
|
145
146
|
"fill-mask": getData("fill-mask", data_js_7.default),
|
|
146
147
|
"graph-ml": undefined,
|
|
@@ -151,7 +152,7 @@ exports.TASKS_DATA = {
|
|
|
151
152
|
"image-text-to-text": getData("image-text-to-text", data_js_12.default),
|
|
152
153
|
"image-to-text": getData("image-to-text", data_js_11.default),
|
|
153
154
|
"image-to-video": undefined,
|
|
154
|
-
"keypoint-detection": getData("keypoint-detection",
|
|
155
|
+
"keypoint-detection": getData("keypoint-detection", data_js_42.default),
|
|
155
156
|
"mask-generation": getData("mask-generation", data_js_14.default),
|
|
156
157
|
"multiple-choice": undefined,
|
|
157
158
|
"object-detection": getData("object-detection", data_js_15.default),
|
|
@@ -180,12 +181,12 @@ exports.TASKS_DATA = {
|
|
|
180
181
|
"token-classification": getData("token-classification", data_js_27.default),
|
|
181
182
|
translation: getData("translation", data_js_28.default),
|
|
182
183
|
"unconditional-image-generation": getData("unconditional-image-generation", data_js_33.default),
|
|
183
|
-
"video-text-to-text": getData("video-text-to-text",
|
|
184
|
-
"visual-question-answering": getData("visual-question-answering",
|
|
184
|
+
"video-text-to-text": getData("video-text-to-text", data_js_43.default),
|
|
185
|
+
"visual-question-answering": getData("visual-question-answering", data_js_36.default),
|
|
185
186
|
"voice-activity-detection": undefined,
|
|
186
|
-
"zero-shot-classification": getData("zero-shot-classification",
|
|
187
|
-
"zero-shot-image-classification": getData("zero-shot-image-classification",
|
|
188
|
-
"zero-shot-object-detection": getData("zero-shot-object-detection",
|
|
189
|
-
"text-to-3d": getData("text-to-3d",
|
|
190
|
-
"image-to-3d": getData("image-to-3d",
|
|
187
|
+
"zero-shot-classification": getData("zero-shot-classification", data_js_37.default),
|
|
188
|
+
"zero-shot-image-classification": getData("zero-shot-image-classification", data_js_38.default),
|
|
189
|
+
"zero-shot-object-detection": getData("zero-shot-object-detection", data_js_39.default),
|
|
190
|
+
"text-to-3d": getData("text-to-3d", data_js_41.default),
|
|
191
|
+
"image-to-3d": getData("image-to-3d", data_js_40.default),
|
|
191
192
|
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/visual-document-retrieval/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAkEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const taskData = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "A large dataset used to train visual document retrieval models.",
|
|
7
|
+
id: "vidore/colpali_train_set",
|
|
8
|
+
},
|
|
9
|
+
],
|
|
10
|
+
demo: {
|
|
11
|
+
inputs: [
|
|
12
|
+
{
|
|
13
|
+
filename: "input.png",
|
|
14
|
+
type: "img",
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
label: "Question",
|
|
18
|
+
content: "Is the model in this paper the fastest for inference?",
|
|
19
|
+
type: "text",
|
|
20
|
+
},
|
|
21
|
+
],
|
|
22
|
+
outputs: [
|
|
23
|
+
{
|
|
24
|
+
type: "chart",
|
|
25
|
+
data: [
|
|
26
|
+
{
|
|
27
|
+
label: "Page 10",
|
|
28
|
+
score: 0.7,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
label: "Page 11",
|
|
32
|
+
score: 0.06,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
label: "Page 9",
|
|
36
|
+
score: 0.003,
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
},
|
|
42
|
+
isPlaceholder: false,
|
|
43
|
+
metrics: [
|
|
44
|
+
{
|
|
45
|
+
description: "NDCG@k scores ranked recommendation lists for top-k results. 0 is the worst, 1 is the best.",
|
|
46
|
+
id: "Normalized Discounted Cumulative Gain at K",
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
models: [
|
|
50
|
+
{
|
|
51
|
+
description: "Very accurate visual document retrieval model for multilingual queries and documents.",
|
|
52
|
+
id: "vidore/colqwen2-v1.0",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
description: "Very fast and efficient visual document retrieval model that works on five languages.",
|
|
56
|
+
id: "marco/mcdse-2b-v1",
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
spaces: [
|
|
60
|
+
{
|
|
61
|
+
description: "A leaderboard of visual document retrieval models.",
|
|
62
|
+
id: "vidore/vidore-leaderboard",
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
summary: "Visual document retrieval is the task of searching for relevant image-based documents, such as PDFs. These models take a text query and multiple documents as input and return the top-most relevant documents and relevancy scores as output.",
|
|
66
|
+
widgetModels: [""],
|
|
67
|
+
youtubeId: "",
|
|
68
|
+
};
|
|
69
|
+
exports.default = taskData;
|
package/dist/esm/hardware.js
CHANGED
|
@@ -131,7 +131,7 @@ export const SKUS = {
|
|
|
131
131
|
memory: [16],
|
|
132
132
|
},
|
|
133
133
|
"RTX 5060 Ti": {
|
|
134
|
-
tflops: 23.
|
|
134
|
+
tflops: 23.7, // source https://www.techpowerup.com/gpu-specs/geforce-rtx-5060-ti.c4246
|
|
135
135
|
memory: [16, 8],
|
|
136
136
|
},
|
|
137
137
|
"RTX 5060": {
|
|
@@ -23,6 +23,7 @@ export declare const espnet: (model: ModelData) => string[];
|
|
|
23
23
|
export declare const fairseq: (model: ModelData) => string[];
|
|
24
24
|
export declare const flair: (model: ModelData) => string[];
|
|
25
25
|
export declare const gliner: (model: ModelData) => string[];
|
|
26
|
+
export declare const indextts: (model: ModelData) => string[];
|
|
26
27
|
export declare const htrflow: (model: ModelData) => string[];
|
|
27
28
|
export declare const keras: (model: ModelData) => string[];
|
|
28
29
|
export declare const keras_hub: (model: ModelData) => string[];
|
|
@@ -73,6 +74,7 @@ export declare const mlx: (model: ModelData) => string[];
|
|
|
73
74
|
export declare const mlxim: (model: ModelData) => string[];
|
|
74
75
|
export declare const model2vec: (model: ModelData) => string[];
|
|
75
76
|
export declare const nemo: (model: ModelData) => string[];
|
|
77
|
+
export declare const outetts: (model: ModelData) => string[];
|
|
76
78
|
export declare const pxia: (model: ModelData) => string[];
|
|
77
79
|
export declare const pythae: (model: ModelData) => string[];
|
|
78
80
|
export declare const anemoi: (model: ModelData) => string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAkBjD,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAIhD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAaF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAY7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,cAAc,QAAO,MAAM,EAcvC,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAwBlD,CAAC;AAEF,eAAO,MAAM,eAAe,QAAO,MAAM,EAoBxC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAS5C,CAAC;AAuCF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAwCrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAmBrD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AA4EF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA+BrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EA0BzD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAkBjD,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAIhD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAaF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAY7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,cAAc,QAAO,MAAM,EAcvC,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAwBlD,CAAC;AAEF,eAAO,MAAM,eAAe,QAAO,MAAM,EAoBxC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAS5C,CAAC;AAuCF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAwCrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAmBrD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAgBjD,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AA4EF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA+BrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EA0BzD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAgB3D,CAAC;AAEF,eAAO,MAAM,uBAAuB,UAAW,SAAS,KAAG,MAAM,EAehE,CAAC;AAiBF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAyBF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAOtD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAEF,eAAO,MAAM,OAAO,QAA6B,MAAM,EAQtD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAanC,CAAC;AAsCF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAmC3D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EA2B7C,CAAC;AAEF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAEtD,CAAC;AASF,eAAO,MAAM,oBAAoB,UAAW,SAAS,KAAG,MAAM,EAuC7D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAU9C,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAIpD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAK/C,CAAC;AAkBF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAkBpD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA4CrD,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAcvD,CAAC;AAiBF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAiB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAMzD,CAAC;AAgBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAEjD,CAAC;AAEF,eAAO,MAAM,MAAM,QAA6B,MAAM,EAMrD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAkB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,OAAO,QAAO,MAAM,EAYhC,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAiBpD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAYjD,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAKpD,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAK5C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAQ7C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAqB9C,CAAC;AAEJ,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AA4BF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAUnD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAYnC,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC"}
|
|
@@ -389,6 +389,23 @@ export const gliner = (model) => [
|
|
|
389
389
|
|
|
390
390
|
model = GLiNER.from_pretrained("${model.id}")`,
|
|
391
391
|
];
|
|
392
|
+
export const indextts = (model) => [
|
|
393
|
+
`# Download model
|
|
394
|
+
from huggingface_hub import snapshot_download
|
|
395
|
+
|
|
396
|
+
snapshot_download(${model.id}, local_dir="checkpoints")
|
|
397
|
+
|
|
398
|
+
from indextts.infer import IndexTTS
|
|
399
|
+
|
|
400
|
+
# Ensure config.yaml is present in the checkpoints directory
|
|
401
|
+
tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
|
|
402
|
+
|
|
403
|
+
voice = "path/to/your/reference_voice.wav" # Path to the voice reference audio file
|
|
404
|
+
text = "Hello, how are you?"
|
|
405
|
+
output_path = "output_index.wav"
|
|
406
|
+
|
|
407
|
+
tts.infer(voice, text, output_path)`,
|
|
408
|
+
];
|
|
392
409
|
export const htrflow = (model) => [
|
|
393
410
|
`# CLI usage
|
|
394
411
|
# see docs: https://ai-riksarkivet.github.io/htrflow/latest/getting_started/quick_start.html
|
|
@@ -634,10 +651,12 @@ model = pe.CLIP.from_config("${model.id}", pretrained=True)`;
|
|
|
634
651
|
import core.vision_encoder.pe as pe
|
|
635
652
|
|
|
636
653
|
model = pe.VisionTransformer.from_config("${model.id}", pretrained=True)`;
|
|
637
|
-
if (model.id.includes("Core"))
|
|
654
|
+
if (model.id.includes("Core")) {
|
|
638
655
|
return [clip_model, vision_encoder];
|
|
639
|
-
|
|
656
|
+
}
|
|
657
|
+
else {
|
|
640
658
|
return [vision_encoder];
|
|
659
|
+
}
|
|
641
660
|
};
|
|
642
661
|
export const pyannote_audio_pipeline = (model) => [
|
|
643
662
|
`from pyannote.audio import Pipeline
|
|
@@ -1189,6 +1208,28 @@ export const nemo = (model) => {
|
|
|
1189
1208
|
}
|
|
1190
1209
|
return command ?? [`# tag did not correspond to a valid NeMo domain.`];
|
|
1191
1210
|
};
|
|
1211
|
+
export const outetts = (model) => {
|
|
1212
|
+
// Don’t show this block on GGUF / ONNX mirrors
|
|
1213
|
+
const t = model.tags ?? [];
|
|
1214
|
+
if (t.includes("gguf") || t.includes("onnx"))
|
|
1215
|
+
return [];
|
|
1216
|
+
// v1.0 HF → minimal runnable snippet
|
|
1217
|
+
return [`
|
|
1218
|
+
import outetts
|
|
1219
|
+
|
|
1220
|
+
enum = outetts.Models("${model.id}".split("/", 1)[1]) # VERSION_1_0_SIZE_1B
|
|
1221
|
+
cfg = outetts.ModelConfig.auto_config(enum, outetts.Backend.HF)
|
|
1222
|
+
tts = outetts.Interface(cfg)
|
|
1223
|
+
|
|
1224
|
+
speaker = tts.load_default_speaker("EN-FEMALE-1-NEUTRAL")
|
|
1225
|
+
tts.generate(
|
|
1226
|
+
outetts.GenerationConfig(
|
|
1227
|
+
text="Hello there, how are you doing?",
|
|
1228
|
+
speaker=speaker,
|
|
1229
|
+
)
|
|
1230
|
+
).save("output.wav")
|
|
1231
|
+
`];
|
|
1232
|
+
};
|
|
1192
1233
|
export const pxia = (model) => [
|
|
1193
1234
|
`from pxia import AutoModel
|
|
1194
1235
|
|
|
@@ -226,7 +226,7 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
226
226
|
filter: false;
|
|
227
227
|
countDownloads: string;
|
|
228
228
|
};
|
|
229
|
-
dia: {
|
|
229
|
+
"dia-tts": {
|
|
230
230
|
prettyLabel: string;
|
|
231
231
|
repoName: string;
|
|
232
232
|
repoUrl: string;
|
|
@@ -421,6 +421,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
421
421
|
repoUrl: string;
|
|
422
422
|
countDownloads: string;
|
|
423
423
|
};
|
|
424
|
+
"index-tts": {
|
|
425
|
+
prettyLabel: string;
|
|
426
|
+
repoName: string;
|
|
427
|
+
repoUrl: string;
|
|
428
|
+
snippets: (model: ModelData) => string[];
|
|
429
|
+
filter: false;
|
|
430
|
+
};
|
|
424
431
|
"infinite-you": {
|
|
425
432
|
prettyLabel: string;
|
|
426
433
|
repoName: string;
|
|
@@ -614,6 +621,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
614
621
|
filter: false;
|
|
615
622
|
countDownloads: string;
|
|
616
623
|
};
|
|
624
|
+
outetts: {
|
|
625
|
+
prettyLabel: string;
|
|
626
|
+
repoName: string;
|
|
627
|
+
repoUrl: string;
|
|
628
|
+
snippets: (model: ModelData) => string[];
|
|
629
|
+
filter: false;
|
|
630
|
+
};
|
|
617
631
|
paddlenlp: {
|
|
618
632
|
prettyLabel: string;
|
|
619
633
|
repoName: string;
|
|
@@ -996,5 +1010,5 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
996
1010
|
};
|
|
997
1011
|
export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
|
|
998
1012
|
export declare const ALL_MODEL_LIBRARY_KEYS: ModelLibraryKey[];
|
|
999
|
-
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "anemoi" | "araclip" | "asteroid" | "audiocraft" | "audioseal" | "ben2" | "bertopic" | "big_vision" | "birder" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "comet" | "cosmos" | "cxr-foundation" | "deepforest" | "depth-anything-v2" | "depth-pro" | "derm-foundation" | "dia" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "clipscope" | "cosyvoice" | "cotracker" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "geometry-crafter" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hezar" | "htrflow" | "hunyuan-dit" | "hunyuan3d-2" | "imstoucan" | "infinite-you" | "keras" | "tf-keras" | "keras-hub" | "k2" | "lightning-ir" | "liveportrait" | "llama-cpp-python" | "mini-omni2" | "mindspore" | "mamba-ssm" | "mars5-tts" | "matanyone" | "mesh-anything" | "merlin" | "medvae" | "mitie" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "model2vec" | "moshi" | "nemo" | "open-oasis" | "open_clip" | "open-sora" | "paddlenlp" | "peft" | "perception-encoder" | "pxia" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "reverb" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "swarmformer" | "f5-tts" | "genmo" | "tensorflowtts" | "tabpfn" | "terratorch" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "trellis" | "ultralytics" | "uni-3dar" | "unity-sentis" | "sana" | "vfi-mamba" | "voicecraft" | "wham" | "whisperkit" | "yolov10" | "3dtopia-xl")[];
|
|
1013
|
+
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "anemoi" | "araclip" | "asteroid" | "audiocraft" | "audioseal" | "ben2" | "bertopic" | "big_vision" | "birder" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "comet" | "cosmos" | "cxr-foundation" | "deepforest" | "depth-anything-v2" | "depth-pro" | "derm-foundation" | "dia-tts" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "clipscope" | "cosyvoice" | "cotracker" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "geometry-crafter" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hezar" | "htrflow" | "hunyuan-dit" | "hunyuan3d-2" | "imstoucan" | "index-tts" | "infinite-you" | "keras" | "tf-keras" | "keras-hub" | "k2" | "lightning-ir" | "liveportrait" | "llama-cpp-python" | "mini-omni2" | "mindspore" | "mamba-ssm" | "mars5-tts" | "matanyone" | "mesh-anything" | "merlin" | "medvae" | "mitie" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "model2vec" | "moshi" | "nemo" | "open-oasis" | "open_clip" | "open-sora" | "outetts" | "paddlenlp" | "peft" | "perception-encoder" | "pxia" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "reverb" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "swarmformer" | "f5-tts" | "genmo" | "tensorflowtts" | "tabpfn" | "terratorch" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "trellis" | "ultralytics" | "uni-3dar" | "unity-sentis" | "sana" | "vfi-mamba" | "voicecraft" | "wham" | "whisperkit" | "yolov10" | "3dtopia-xl")[];
|
|
1000
1014
|
//# sourceMappingURL=model-libraries.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B
|
|
1
|
+
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eg8BI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,EAA+C,eAAe,EAAE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,qwDAQ1B,CAAC"}
|
|
@@ -186,7 +186,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
186
186
|
filter: false,
|
|
187
187
|
countDownloads: `path:"scin_dataset_precomputed_embeddings.npz" OR path:"saved_model.pb"`,
|
|
188
188
|
},
|
|
189
|
-
dia: {
|
|
189
|
+
"dia-tts": {
|
|
190
190
|
prettyLabel: "Dia",
|
|
191
191
|
repoName: "Dia",
|
|
192
192
|
repoUrl: "https://github.com/nari-labs/dia",
|
|
@@ -382,6 +382,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
382
382
|
repoUrl: "https://github.com/DigitalPhonetics/IMS-Toucan",
|
|
383
383
|
countDownloads: `path:"embedding_gan.pt" OR path:"Vocoder.pt" OR path:"ToucanTTS.pt"`,
|
|
384
384
|
},
|
|
385
|
+
"index-tts": {
|
|
386
|
+
prettyLabel: "IndexTTS",
|
|
387
|
+
repoName: "IndexTTS",
|
|
388
|
+
repoUrl: "https://github.com/index-tts/index-tts",
|
|
389
|
+
snippets: snippets.indextts,
|
|
390
|
+
filter: false,
|
|
391
|
+
},
|
|
385
392
|
"infinite-you": {
|
|
386
393
|
prettyLabel: "InfiniteYou",
|
|
387
394
|
repoName: "InfiniteYou",
|
|
@@ -579,6 +586,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
579
586
|
filter: false,
|
|
580
587
|
countDownloads: `path:"Open_Sora_v2.safetensors"`,
|
|
581
588
|
},
|
|
589
|
+
outetts: {
|
|
590
|
+
prettyLabel: "OuteTTS",
|
|
591
|
+
repoName: "OuteTTS",
|
|
592
|
+
repoUrl: "https://github.com/edwko/OuteTTS",
|
|
593
|
+
snippets: snippets.outetts,
|
|
594
|
+
filter: false,
|
|
595
|
+
},
|
|
582
596
|
paddlenlp: {
|
|
583
597
|
prettyLabel: "paddlenlp",
|
|
584
598
|
repoName: "PaddleNLP",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/any-to-any/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/any-to-any/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cA4Df,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AA+CpD,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,6CAA6C,CAAC;AACjE,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,4CAA4C,CAAC;AAChE,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,0BAA0B,CAAC;AAC9C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,qCAAqC,CAAC;AAC7C,mBAAmB,+BAA+B,CAAC;AACnD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,mCAAmC,CAAC;AACvD,mBAAmB,oCAAoC,CAAC;AACxD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,yCAAyC,CAAC;AAC7D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAC/G,YAAY,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnH,mBAAmB,qCAAqC,CAAC;AACzD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AACtF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,oCAAoC,CAAC;AAC5C,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,gCAAgC,CAAC;AACxC,mBAAmB,qCAAqC,CAAC;AACzD,mBAAmB,0CAA0C,CAAC;AAC9D,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,+CAA+C,CAAC;AACnE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,2CAA2C,CAAC;AAEnD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAE7D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CAgEzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAwDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
|
package/dist/esm/tasks/index.js
CHANGED
|
@@ -33,6 +33,7 @@ import textRanking from "./text-ranking/data.js";
|
|
|
33
33
|
import textToVideo from "./text-to-video/data.js";
|
|
34
34
|
import unconditionalImageGeneration from "./unconditional-image-generation/data.js";
|
|
35
35
|
import videoClassification from "./video-classification/data.js";
|
|
36
|
+
import visualDocumentRetrieval from "./visual-document-retrieval/data.js";
|
|
36
37
|
import visualQuestionAnswering from "./visual-question-answering/data.js";
|
|
37
38
|
import zeroShotClassification from "./zero-shot-classification/data.js";
|
|
38
39
|
import zeroShotImageClassification from "./zero-shot-image-classification/data.js";
|
|
@@ -134,7 +135,7 @@ export const TASKS_DATA = {
|
|
|
134
135
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
135
136
|
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
136
137
|
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
137
|
-
"visual-document-retrieval": getData("visual-document-retrieval",
|
|
138
|
+
"visual-document-retrieval": getData("visual-document-retrieval", visualDocumentRetrieval),
|
|
138
139
|
"feature-extraction": getData("feature-extraction", featureExtraction),
|
|
139
140
|
"fill-mask": getData("fill-mask", fillMask),
|
|
140
141
|
"graph-ml": undefined,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/visual-document-retrieval/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,QAAA,MAAM,QAAQ,EAAE,cAkEf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
const taskData = {
|
|
2
|
+
datasets: [
|
|
3
|
+
{
|
|
4
|
+
description: "A large dataset used to train visual document retrieval models.",
|
|
5
|
+
id: "vidore/colpali_train_set",
|
|
6
|
+
},
|
|
7
|
+
],
|
|
8
|
+
demo: {
|
|
9
|
+
inputs: [
|
|
10
|
+
{
|
|
11
|
+
filename: "input.png",
|
|
12
|
+
type: "img",
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
label: "Question",
|
|
16
|
+
content: "Is the model in this paper the fastest for inference?",
|
|
17
|
+
type: "text",
|
|
18
|
+
},
|
|
19
|
+
],
|
|
20
|
+
outputs: [
|
|
21
|
+
{
|
|
22
|
+
type: "chart",
|
|
23
|
+
data: [
|
|
24
|
+
{
|
|
25
|
+
label: "Page 10",
|
|
26
|
+
score: 0.7,
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
label: "Page 11",
|
|
30
|
+
score: 0.06,
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
label: "Page 9",
|
|
34
|
+
score: 0.003,
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
isPlaceholder: false,
|
|
41
|
+
metrics: [
|
|
42
|
+
{
|
|
43
|
+
description: "NDCG@k scores ranked recommendation lists for top-k results. 0 is the worst, 1 is the best.",
|
|
44
|
+
id: "Normalized Discounted Cumulative Gain at K",
|
|
45
|
+
},
|
|
46
|
+
],
|
|
47
|
+
models: [
|
|
48
|
+
{
|
|
49
|
+
description: "Very accurate visual document retrieval model for multilingual queries and documents.",
|
|
50
|
+
id: "vidore/colqwen2-v1.0",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
description: "Very fast and efficient visual document retrieval model that works on five languages.",
|
|
54
|
+
id: "marco/mcdse-2b-v1",
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
spaces: [
|
|
58
|
+
{
|
|
59
|
+
description: "A leaderboard of visual document retrieval models.",
|
|
60
|
+
id: "vidore/vidore-leaderboard",
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
summary: "Visual document retrieval is the task of searching for relevant image-based documents, such as PDFs. These models take a text query and multiple documents as input and return the top-most relevant documents and relevancy scores as output.",
|
|
64
|
+
widgetModels: [""],
|
|
65
|
+
youtubeId: "",
|
|
66
|
+
};
|
|
67
|
+
export default taskData;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.18.
|
|
4
|
+
"version": "0.18.12",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/hardware.ts
CHANGED
|
@@ -153,7 +153,7 @@ export const SKUS = {
|
|
|
153
153
|
memory: [16],
|
|
154
154
|
},
|
|
155
155
|
"RTX 5060 Ti": {
|
|
156
|
-
tflops: 23.
|
|
156
|
+
tflops: 23.7, // source https://www.techpowerup.com/gpu-specs/geforce-rtx-5060-ti.c4246
|
|
157
157
|
memory: [16, 8],
|
|
158
158
|
},
|
|
159
159
|
"RTX 5060": {
|
|
@@ -435,6 +435,24 @@ export const gliner = (model: ModelData): string[] => [
|
|
|
435
435
|
model = GLiNER.from_pretrained("${model.id}")`,
|
|
436
436
|
];
|
|
437
437
|
|
|
438
|
+
export const indextts = (model: ModelData): string[] => [
|
|
439
|
+
`# Download model
|
|
440
|
+
from huggingface_hub import snapshot_download
|
|
441
|
+
|
|
442
|
+
snapshot_download(${model.id}, local_dir="checkpoints")
|
|
443
|
+
|
|
444
|
+
from indextts.infer import IndexTTS
|
|
445
|
+
|
|
446
|
+
# Ensure config.yaml is present in the checkpoints directory
|
|
447
|
+
tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
|
|
448
|
+
|
|
449
|
+
voice = "path/to/your/reference_voice.wav" # Path to the voice reference audio file
|
|
450
|
+
text = "Hello, how are you?"
|
|
451
|
+
output_path = "output_index.wav"
|
|
452
|
+
|
|
453
|
+
tts.infer(voice, text, output_path)`,
|
|
454
|
+
];
|
|
455
|
+
|
|
438
456
|
export const htrflow = (model: ModelData): string[] => [
|
|
439
457
|
`# CLI usage
|
|
440
458
|
# see docs: https://ai-riksarkivet.github.io/htrflow/latest/getting_started/quick_start.html
|
|
@@ -702,11 +720,12 @@ model = pe.CLIP.from_config("${model.id}", pretrained=True)`;
|
|
|
702
720
|
import core.vision_encoder.pe as pe
|
|
703
721
|
|
|
704
722
|
model = pe.VisionTransformer.from_config("${model.id}", pretrained=True)`;
|
|
705
|
-
|
|
706
|
-
if (model.id.includes("Core"))
|
|
723
|
+
|
|
724
|
+
if (model.id.includes("Core")) {
|
|
707
725
|
return [clip_model, vision_encoder];
|
|
708
|
-
else
|
|
726
|
+
} else {
|
|
709
727
|
return [vision_encoder];
|
|
728
|
+
}
|
|
710
729
|
};
|
|
711
730
|
|
|
712
731
|
export const pyannote_audio_pipeline = (model: ModelData): string[] => [
|
|
@@ -1319,6 +1338,29 @@ export const nemo = (model: ModelData): string[] => {
|
|
|
1319
1338
|
return command ?? [`# tag did not correspond to a valid NeMo domain.`];
|
|
1320
1339
|
};
|
|
1321
1340
|
|
|
1341
|
+
export const outetts = (model: ModelData): string[] => {
|
|
1342
|
+
// Don’t show this block on GGUF / ONNX mirrors
|
|
1343
|
+
const t = model.tags ?? [];
|
|
1344
|
+
if (t.includes("gguf") || t.includes("onnx")) return [];
|
|
1345
|
+
|
|
1346
|
+
// v1.0 HF → minimal runnable snippet
|
|
1347
|
+
return [`
|
|
1348
|
+
import outetts
|
|
1349
|
+
|
|
1350
|
+
enum = outetts.Models("${model.id}".split("/", 1)[1]) # VERSION_1_0_SIZE_1B
|
|
1351
|
+
cfg = outetts.ModelConfig.auto_config(enum, outetts.Backend.HF)
|
|
1352
|
+
tts = outetts.Interface(cfg)
|
|
1353
|
+
|
|
1354
|
+
speaker = tts.load_default_speaker("EN-FEMALE-1-NEUTRAL")
|
|
1355
|
+
tts.generate(
|
|
1356
|
+
outetts.GenerationConfig(
|
|
1357
|
+
text="Hello there, how are you doing?",
|
|
1358
|
+
speaker=speaker,
|
|
1359
|
+
)
|
|
1360
|
+
).save("output.wav")
|
|
1361
|
+
`];
|
|
1362
|
+
};
|
|
1363
|
+
|
|
1322
1364
|
export const pxia = (model: ModelData): string[] => [
|
|
1323
1365
|
`from pxia import AutoModel
|
|
1324
1366
|
|
package/src/model-libraries.ts
CHANGED
|
@@ -230,7 +230,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
230
230
|
filter: false,
|
|
231
231
|
countDownloads: `path:"scin_dataset_precomputed_embeddings.npz" OR path:"saved_model.pb"`,
|
|
232
232
|
},
|
|
233
|
-
dia: {
|
|
233
|
+
"dia-tts": {
|
|
234
234
|
prettyLabel: "Dia",
|
|
235
235
|
repoName: "Dia",
|
|
236
236
|
repoUrl: "https://github.com/nari-labs/dia",
|
|
@@ -426,6 +426,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
426
426
|
repoUrl: "https://github.com/DigitalPhonetics/IMS-Toucan",
|
|
427
427
|
countDownloads: `path:"embedding_gan.pt" OR path:"Vocoder.pt" OR path:"ToucanTTS.pt"`,
|
|
428
428
|
},
|
|
429
|
+
"index-tts": {
|
|
430
|
+
prettyLabel: "IndexTTS",
|
|
431
|
+
repoName: "IndexTTS",
|
|
432
|
+
repoUrl: "https://github.com/index-tts/index-tts",
|
|
433
|
+
snippets: snippets.indextts,
|
|
434
|
+
filter: false,
|
|
435
|
+
},
|
|
429
436
|
"infinite-you": {
|
|
430
437
|
prettyLabel: "InfiniteYou",
|
|
431
438
|
repoName: "InfiniteYou",
|
|
@@ -623,6 +630,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
623
630
|
filter: false,
|
|
624
631
|
countDownloads: `path:"Open_Sora_v2.safetensors"`,
|
|
625
632
|
},
|
|
633
|
+
outetts: {
|
|
634
|
+
prettyLabel: "OuteTTS",
|
|
635
|
+
repoName: "OuteTTS",
|
|
636
|
+
repoUrl: "https://github.com/edwko/OuteTTS",
|
|
637
|
+
snippets: snippets.outetts,
|
|
638
|
+
filter: false,
|
|
639
|
+
},
|
|
626
640
|
paddlenlp: {
|
|
627
641
|
prettyLabel: "paddlenlp",
|
|
628
642
|
repoName: "PaddleNLP",
|
|
@@ -9,13 +9,13 @@ const taskData: TaskDataCustom = {
|
|
|
9
9
|
],
|
|
10
10
|
demo: {
|
|
11
11
|
inputs: [
|
|
12
|
-
{
|
|
12
|
+
{
|
|
13
13
|
filename: "any-to-any-input.jpg",
|
|
14
14
|
type: "img",
|
|
15
15
|
},
|
|
16
16
|
{
|
|
17
17
|
label: "Text Prompt",
|
|
18
|
-
content: "
|
|
18
|
+
content: "What is the significance of this place?",
|
|
19
19
|
type: "text",
|
|
20
20
|
},
|
|
21
21
|
],
|
|
@@ -26,7 +26,7 @@ const taskData: TaskDataCustom = {
|
|
|
26
26
|
"The place in the picture is Osaka Castle, located in Osaka, Japan. Osaka Castle is a historic castle that was originally built in the 16th century by Toyotomi Hideyoshi, a powerful warlord of the time. It is one of the most famous landmarks in Osaka and is known for its distinctive white walls and black roof tiles. The castle has been rebuilt several times over the centuries and is now a popular tourist attraction, offering visitors a glimpse into Japan's rich history and culture.",
|
|
27
27
|
type: "text",
|
|
28
28
|
},
|
|
29
|
-
|
|
29
|
+
{
|
|
30
30
|
filename: "any-to-any-output.wav",
|
|
31
31
|
type: "audio",
|
|
32
32
|
},
|
|
@@ -57,8 +57,7 @@ const taskData: TaskDataCustom = {
|
|
|
57
57
|
id: "deepseek-ai/Janus-Pro-7B",
|
|
58
58
|
},
|
|
59
59
|
],
|
|
60
|
-
summary:
|
|
61
|
-
"Any-to-any models can understand two or more modalities and output two or more modalities.",
|
|
60
|
+
summary: "Any-to-any models can understand two or more modalities and output two or more modalities.",
|
|
62
61
|
widgetModels: [],
|
|
63
62
|
youtubeId: "",
|
|
64
63
|
};
|
package/src/tasks/index.ts
CHANGED
|
@@ -35,6 +35,7 @@ import textRanking from "./text-ranking/data.js";
|
|
|
35
35
|
import textToVideo from "./text-to-video/data.js";
|
|
36
36
|
import unconditionalImageGeneration from "./unconditional-image-generation/data.js";
|
|
37
37
|
import videoClassification from "./video-classification/data.js";
|
|
38
|
+
import visualDocumentRetrieval from "./visual-document-retrieval/data.js";
|
|
38
39
|
import visualQuestionAnswering from "./visual-question-answering/data.js";
|
|
39
40
|
import zeroShotClassification from "./zero-shot-classification/data.js";
|
|
40
41
|
import zeroShotImageClassification from "./zero-shot-image-classification/data.js";
|
|
@@ -206,7 +207,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
206
207
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
207
208
|
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
208
209
|
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
209
|
-
"visual-document-retrieval": getData("visual-document-retrieval",
|
|
210
|
+
"visual-document-retrieval": getData("visual-document-retrieval", visualDocumentRetrieval),
|
|
210
211
|
"feature-extraction": getData("feature-extraction", featureExtraction),
|
|
211
212
|
"fill-mask": getData("fill-mask", fillMask),
|
|
212
213
|
"graph-ml": undefined,
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
### Multimodal Document Retrieval
|
|
4
|
+
|
|
5
|
+
Visual document retrieval models can be used to retrieve relevant documents when given a text query. One needs to index the documents first, which is a one-time operation. After indexing is done, the retrieval model takes in a text query (question) and number `k` of documents to return, and the model returns the top-k most relevant documents for the query. The index can be used repetitively for inference.
|
|
6
|
+
|
|
7
|
+
### Multimodal Retrieval Augmented Generation (RAG)
|
|
8
|
+
|
|
9
|
+
Multimodal RAG is the task of generating answers from documents (texts or images) when given a text query and a bunch of documents. These documents and the text query can be fed to [a vision language model](https://huggingface.co/tasks/image-text-to-text) to get the actual answer.
|
|
10
|
+
|
|
11
|
+
## Inference
|
|
12
|
+
|
|
13
|
+
You can use transformers to infer visual document retrieval models. To calculate similarity between images and text, simply process both separately and pass each processed input through the model. The model outputs can then be passed to calculate similarity scores.
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import torch
|
|
17
|
+
from PIL import Image
|
|
18
|
+
from transformers import ColPaliForRetrieval, ColPaliProcessor
|
|
19
|
+
|
|
20
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
21
|
+
|
|
22
|
+
model = ColPaliForRetrieval.from_pretrained(
|
|
23
|
+
"vidore/colpali-v1.2-hf",
|
|
24
|
+
torch_dtype=torch.bfloat16,
|
|
25
|
+
).to(device)
|
|
26
|
+
|
|
27
|
+
processor = ColPaliProcessor.from_pretrained("vidore/colpali-v1.2-hf")
|
|
28
|
+
|
|
29
|
+
# Your inputs (replace dummy images with screenshots of your documents)
|
|
30
|
+
images = [
|
|
31
|
+
Image.new("RGB", (32, 32), color="white"),
|
|
32
|
+
Image.new("RGB", (16, 16), color="black"),
|
|
33
|
+
]
|
|
34
|
+
queries = [
|
|
35
|
+
"What is the organizational structure for our R&D department?",
|
|
36
|
+
"Can you provide a breakdown of last year’s financial performance?",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
# Process the image and text
|
|
40
|
+
batch_images = processor(images=images).to(device)
|
|
41
|
+
batch_queries = processor(text=queries).to(device)
|
|
42
|
+
|
|
43
|
+
with torch.no_grad():
|
|
44
|
+
image_embeddings = model(**batch_images).embeddings
|
|
45
|
+
query_embeddings = model(**batch_queries).embeddings
|
|
46
|
+
|
|
47
|
+
# Score the queries against the images
|
|
48
|
+
scores = processor.score_retrieval(query_embeddings, image_embeddings)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Useful Resources
|
|
52
|
+
|
|
53
|
+
- [Multimodal Retrieval Augmented Generation using ColPali and Qwen2VL](https://github.com/merveenoyan/smol-vision/blob/main/ColPali_%2B_Qwen2_VL.ipynb)
|
|
54
|
+
- [Fine-tuning ColPali for Multimodal Retrieval Augmented Generation](https://github.com/merveenoyan/smol-vision/blob/main/Finetune_ColPali.ipynb)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "../index.js";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "A large dataset used to train visual document retrieval models.",
|
|
7
|
+
id: "vidore/colpali_train_set",
|
|
8
|
+
},
|
|
9
|
+
],
|
|
10
|
+
demo: {
|
|
11
|
+
inputs: [
|
|
12
|
+
{
|
|
13
|
+
filename: "input.png",
|
|
14
|
+
type: "img",
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
label: "Question",
|
|
18
|
+
content: "Is the model in this paper the fastest for inference?",
|
|
19
|
+
type: "text",
|
|
20
|
+
},
|
|
21
|
+
],
|
|
22
|
+
outputs: [
|
|
23
|
+
{
|
|
24
|
+
type: "chart",
|
|
25
|
+
data: [
|
|
26
|
+
{
|
|
27
|
+
label: "Page 10",
|
|
28
|
+
score: 0.7,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
label: "Page 11",
|
|
32
|
+
score: 0.06,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
label: "Page 9",
|
|
36
|
+
score: 0.003,
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
},
|
|
42
|
+
isPlaceholder: false,
|
|
43
|
+
metrics: [
|
|
44
|
+
{
|
|
45
|
+
description: "NDCG@k scores ranked recommendation lists for top-k results. 0 is the worst, 1 is the best.",
|
|
46
|
+
id: "Normalized Discounted Cumulative Gain at K",
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
models: [
|
|
50
|
+
{
|
|
51
|
+
description: "Very accurate visual document retrieval model for multilingual queries and documents.",
|
|
52
|
+
id: "vidore/colqwen2-v1.0",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
description: "Very fast and efficient visual document retrieval model that works on five languages.",
|
|
56
|
+
id: "marco/mcdse-2b-v1",
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
spaces: [
|
|
60
|
+
{
|
|
61
|
+
description: "A leaderboard of visual document retrieval models.",
|
|
62
|
+
id: "vidore/vidore-leaderboard",
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
summary:
|
|
66
|
+
"Visual document retrieval is the task of searching for relevant image-based documents, such as PDFs. These models take a text query and multiple documents as input and return the top-most relevant documents and relevancy scores as output.",
|
|
67
|
+
widgetModels: [""],
|
|
68
|
+
youtubeId: "",
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
export default taskData;
|