@huggingface/tasks 0.12.3 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +95 -9
- package/dist/index.js +95 -9
- package/dist/src/hardware.d.ts +25 -0
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/model-data.d.ts +1 -0
- package/dist/src/model-data.d.ts.map +1 -1
- package/dist/src/model-libraries-snippets.d.ts +2 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +39 -3
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/tasks/keypoint-detection/data.d.ts.map +1 -1
- package/dist/src/tasks/video-text-to-text/data.d.ts +4 -0
- package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/hardware.ts +25 -0
- package/src/model-data.ts +1 -0
- package/src/model-libraries-snippets.ts +37 -6
- package/src/model-libraries.ts +37 -1
- package/src/tasks/image-text-to-text/data.ts +1 -1
- package/src/tasks/keypoint-detection/about.md +6 -8
- package/src/tasks/keypoint-detection/data.ts +4 -0
- package/src/tasks/text-to-image/about.md +24 -3
- package/src/tasks/text-to-image/data.ts +1 -1
- package/src/tasks/video-text-to-text/about.md +98 -0
- package/src/tasks/video-text-to-text/data.ts +58 -0
package/dist/index.cjs
CHANGED
|
@@ -2237,7 +2237,7 @@ var taskData11 = {
|
|
|
2237
2237
|
},
|
|
2238
2238
|
{
|
|
2239
2239
|
description: "Strong image-text-to-text model.",
|
|
2240
|
-
id: "
|
|
2240
|
+
id: "microsoft/Phi-3.5-vision-instruct"
|
|
2241
2241
|
}
|
|
2242
2242
|
],
|
|
2243
2243
|
spaces: [
|
|
@@ -3147,7 +3147,7 @@ var taskData24 = {
|
|
|
3147
3147
|
id: "InstantX/InstantID"
|
|
3148
3148
|
}
|
|
3149
3149
|
],
|
|
3150
|
-
summary: "
|
|
3150
|
+
summary: "Text-to-image is the task of generating images from input text. These pipelines can also be used to modify and edit images based on text prompts.",
|
|
3151
3151
|
widgetModels: ["black-forest-labs/FLUX.1-dev"],
|
|
3152
3152
|
youtubeId: ""
|
|
3153
3153
|
};
|
|
@@ -4390,6 +4390,7 @@ function nameWithoutNamespace(modelId) {
|
|
|
4390
4390
|
const splitted = modelId.split("/");
|
|
4391
4391
|
return splitted.length === 1 ? splitted[0] : splitted[1];
|
|
4392
4392
|
}
|
|
4393
|
+
var escapeStringForJson = (str) => JSON.stringify(str);
|
|
4393
4394
|
var adapters = (model) => [
|
|
4394
4395
|
`from adapters import AutoAdapterModel
|
|
4395
4396
|
|
|
@@ -4442,6 +4443,12 @@ result, message = detector.detect_watermark(watermarked_audio, sr)`;
|
|
|
4442
4443
|
function get_base_diffusers_model(model) {
|
|
4443
4444
|
return model.cardData?.base_model?.toString() ?? "fill-in-base-model";
|
|
4444
4445
|
}
|
|
4446
|
+
function get_prompt_from_diffusers_model(model) {
|
|
4447
|
+
const prompt = (model.widgetData?.[0]).text ?? model.cardData?.instance_prompt;
|
|
4448
|
+
if (prompt) {
|
|
4449
|
+
return escapeStringForJson(prompt);
|
|
4450
|
+
}
|
|
4451
|
+
}
|
|
4445
4452
|
var bertopic = (model) => [
|
|
4446
4453
|
`from bertopic import BERTopic
|
|
4447
4454
|
|
|
@@ -4495,30 +4502,37 @@ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
|
|
|
4495
4502
|
`
|
|
4496
4503
|
];
|
|
4497
4504
|
};
|
|
4505
|
+
var diffusersDefaultPrompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k";
|
|
4498
4506
|
var diffusers_default = (model) => [
|
|
4499
4507
|
`from diffusers import DiffusionPipeline
|
|
4500
4508
|
|
|
4501
|
-
|
|
4509
|
+
pipe = DiffusionPipeline.from_pretrained("${model.id}")
|
|
4510
|
+
|
|
4511
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
4512
|
+
image = pipe(prompt).images[0]`
|
|
4502
4513
|
];
|
|
4503
4514
|
var diffusers_controlnet = (model) => [
|
|
4504
4515
|
`from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
|
|
4505
4516
|
|
|
4506
4517
|
controlnet = ControlNetModel.from_pretrained("${model.id}")
|
|
4507
|
-
|
|
4518
|
+
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
|
4508
4519
|
"${get_base_diffusers_model(model)}", controlnet=controlnet
|
|
4509
4520
|
)`
|
|
4510
4521
|
];
|
|
4511
4522
|
var diffusers_lora = (model) => [
|
|
4512
4523
|
`from diffusers import DiffusionPipeline
|
|
4513
4524
|
|
|
4514
|
-
|
|
4515
|
-
|
|
4525
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
4526
|
+
pipe.load_lora_weights("${model.id}")
|
|
4527
|
+
|
|
4528
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
4529
|
+
image = pipe(prompt).images[0]`
|
|
4516
4530
|
];
|
|
4517
4531
|
var diffusers_textual_inversion = (model) => [
|
|
4518
4532
|
`from diffusers import DiffusionPipeline
|
|
4519
4533
|
|
|
4520
|
-
|
|
4521
|
-
|
|
4534
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
4535
|
+
pipe.load_textual_inversion("${model.id}")`
|
|
4522
4536
|
];
|
|
4523
4537
|
var diffusers = (model) => {
|
|
4524
4538
|
if (model.tags.includes("controlnet")) {
|
|
@@ -5158,6 +5172,11 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
|
5158
5172
|
// Please see provided C# file for more details
|
|
5159
5173
|
`
|
|
5160
5174
|
];
|
|
5175
|
+
var vfimamba = (model) => [
|
|
5176
|
+
`from Trainer_finetune import Model
|
|
5177
|
+
|
|
5178
|
+
model = Model.from_pretrained("${model.id}")`
|
|
5179
|
+
];
|
|
5161
5180
|
var voicecraft = (model) => [
|
|
5162
5181
|
`from voicecraft import VoiceCraft
|
|
5163
5182
|
|
|
@@ -5268,6 +5287,12 @@ whisperkit-cli transcribe --audio-path /path/to/audio.mp3
|
|
|
5268
5287
|
# Or use your preferred model variant
|
|
5269
5288
|
whisperkit-cli transcribe --model "large-v3" --model-prefix "distil" --audio-path /path/to/audio.mp3 --verbose`
|
|
5270
5289
|
];
|
|
5290
|
+
var threedtopia_xl = (model) => [
|
|
5291
|
+
`from threedtopia_xl.models import threedtopia_xl
|
|
5292
|
+
|
|
5293
|
+
model = threedtopia_xl.from_pretrained("${model.id}")
|
|
5294
|
+
model.generate(cond="path/to/image.png")`
|
|
5295
|
+
];
|
|
5271
5296
|
|
|
5272
5297
|
// src/model-libraries.ts
|
|
5273
5298
|
var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
@@ -5671,7 +5696,7 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5671
5696
|
repoName: "pythae",
|
|
5672
5697
|
repoUrl: "https://github.com/clementchadebec/benchmark_VAE",
|
|
5673
5698
|
snippets: pythae,
|
|
5674
|
-
filter:
|
|
5699
|
+
filter: false
|
|
5675
5700
|
},
|
|
5676
5701
|
recurrentgemma: {
|
|
5677
5702
|
prettyLabel: "RecurrentGemma",
|
|
@@ -5719,6 +5744,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5719
5744
|
filter: true,
|
|
5720
5745
|
countDownloads: `path:"cfg.json"`
|
|
5721
5746
|
},
|
|
5747
|
+
sapiens: {
|
|
5748
|
+
prettyLabel: "sapiens",
|
|
5749
|
+
repoName: "sapiens",
|
|
5750
|
+
repoUrl: "https://github.com/facebookresearch/sapiens",
|
|
5751
|
+
filter: false,
|
|
5752
|
+
countDownloads: `path_extension:"pt2" OR path_extension:"pth" OR path_extension:"onnx"`
|
|
5753
|
+
},
|
|
5722
5754
|
"sentence-transformers": {
|
|
5723
5755
|
prettyLabel: "sentence-transformers",
|
|
5724
5756
|
repoName: "sentence-transformers",
|
|
@@ -5769,6 +5801,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5769
5801
|
filter: true,
|
|
5770
5802
|
countDownloads: `path:"hyperparams.yaml"`
|
|
5771
5803
|
},
|
|
5804
|
+
"ssr-speech": {
|
|
5805
|
+
prettyLabel: "SSR-Speech",
|
|
5806
|
+
repoName: "SSR-Speech",
|
|
5807
|
+
repoUrl: "https://github.com/WangHelin1997/SSR-Speech",
|
|
5808
|
+
filter: false,
|
|
5809
|
+
countDownloads: `path_extension:".pth"`
|
|
5810
|
+
},
|
|
5772
5811
|
"stable-audio-tools": {
|
|
5773
5812
|
prettyLabel: "Stable Audio Tools",
|
|
5774
5813
|
repoName: "stable-audio-tools",
|
|
@@ -5792,6 +5831,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5792
5831
|
countDownloads: `path:"cvlm_llama2_tokenizer/tokenizer.model"`,
|
|
5793
5832
|
snippets: seed_story
|
|
5794
5833
|
},
|
|
5834
|
+
soloaudio: {
|
|
5835
|
+
prettyLabel: "SoloAudio",
|
|
5836
|
+
repoName: "SoloAudio",
|
|
5837
|
+
repoUrl: "https://github.com/WangHelin1997/SoloAudio",
|
|
5838
|
+
filter: false,
|
|
5839
|
+
countDownloads: `path:"soloaudio_v2.pt"`
|
|
5840
|
+
},
|
|
5795
5841
|
"stable-baselines3": {
|
|
5796
5842
|
prettyLabel: "stable-baselines3",
|
|
5797
5843
|
repoName: "stable-baselines3",
|
|
@@ -5863,6 +5909,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5863
5909
|
filter: true,
|
|
5864
5910
|
countDownloads: `path_extension:"sentis"`
|
|
5865
5911
|
},
|
|
5912
|
+
"vfi-mamba": {
|
|
5913
|
+
prettyLabel: "VFIMamba",
|
|
5914
|
+
repoName: "VFIMamba",
|
|
5915
|
+
repoUrl: "https://github.com/MCG-NJU/VFIMamba",
|
|
5916
|
+
countDownloads: `path_extension:"pkl"`,
|
|
5917
|
+
snippets: vfimamba
|
|
5918
|
+
},
|
|
5866
5919
|
voicecraft: {
|
|
5867
5920
|
prettyLabel: "VoiceCraft",
|
|
5868
5921
|
repoName: "VoiceCraft",
|
|
@@ -5884,6 +5937,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5884
5937
|
docsUrl: "https://github.com/argmaxinc/WhisperKit?tab=readme-ov-file#homebrew",
|
|
5885
5938
|
snippets: whisperkit,
|
|
5886
5939
|
countDownloads: `path_filename:"model" AND path_extension:"mil" AND _exists_:"path_prefix"`
|
|
5940
|
+
},
|
|
5941
|
+
"3dtopia-xl": {
|
|
5942
|
+
prettyLabel: "3DTopia-XL",
|
|
5943
|
+
repoName: "3DTopia-XL",
|
|
5944
|
+
repoUrl: "https://github.com/3DTopia/3DTopia-XL",
|
|
5945
|
+
filter: false,
|
|
5946
|
+
countDownloads: `path:"model_vae_fp16.pt"`,
|
|
5947
|
+
snippets: threedtopia_xl
|
|
5887
5948
|
}
|
|
5888
5949
|
};
|
|
5889
5950
|
var ALL_MODEL_LIBRARY_KEYS = Object.keys(MODEL_LIBRARIES_UI_ELEMENTS);
|
|
@@ -6568,6 +6629,14 @@ var SKUS = {
|
|
|
6568
6629
|
tflops: 14.93,
|
|
6569
6630
|
memory: [8]
|
|
6570
6631
|
},
|
|
6632
|
+
"RTX 2070 SUPER Mobile": {
|
|
6633
|
+
tflops: 14.13,
|
|
6634
|
+
memory: [8]
|
|
6635
|
+
},
|
|
6636
|
+
"RTX 2070 SUPER": {
|
|
6637
|
+
tflops: 18.12,
|
|
6638
|
+
memory: [8]
|
|
6639
|
+
},
|
|
6571
6640
|
"RTX 3050 Mobile": {
|
|
6572
6641
|
tflops: 7.639,
|
|
6573
6642
|
memory: [6]
|
|
@@ -6670,6 +6739,23 @@ var SKUS = {
|
|
|
6670
6739
|
tflops: 26.11,
|
|
6671
6740
|
memory: [16]
|
|
6672
6741
|
}
|
|
6742
|
+
},
|
|
6743
|
+
QUALCOMM: {
|
|
6744
|
+
"Snapdragon X Elite X1E-00-1DE": {
|
|
6745
|
+
tflops: 4.6
|
|
6746
|
+
},
|
|
6747
|
+
"Snapdragon X Elite X1E-84-100": {
|
|
6748
|
+
tflops: 4.6
|
|
6749
|
+
},
|
|
6750
|
+
"Snapdragon X Elite X1E-80-100": {
|
|
6751
|
+
tflops: 3.8
|
|
6752
|
+
},
|
|
6753
|
+
"Snapdragon X Elite X1E-78-100": {
|
|
6754
|
+
tflops: 3.8
|
|
6755
|
+
},
|
|
6756
|
+
"Snapdragon X Plus X1P-64-100": {
|
|
6757
|
+
tflops: 3.8
|
|
6758
|
+
}
|
|
6673
6759
|
}
|
|
6674
6760
|
},
|
|
6675
6761
|
CPU: {
|
package/dist/index.js
CHANGED
|
@@ -2199,7 +2199,7 @@ var taskData11 = {
|
|
|
2199
2199
|
},
|
|
2200
2200
|
{
|
|
2201
2201
|
description: "Strong image-text-to-text model.",
|
|
2202
|
-
id: "
|
|
2202
|
+
id: "microsoft/Phi-3.5-vision-instruct"
|
|
2203
2203
|
}
|
|
2204
2204
|
],
|
|
2205
2205
|
spaces: [
|
|
@@ -3109,7 +3109,7 @@ var taskData24 = {
|
|
|
3109
3109
|
id: "InstantX/InstantID"
|
|
3110
3110
|
}
|
|
3111
3111
|
],
|
|
3112
|
-
summary: "
|
|
3112
|
+
summary: "Text-to-image is the task of generating images from input text. These pipelines can also be used to modify and edit images based on text prompts.",
|
|
3113
3113
|
widgetModels: ["black-forest-labs/FLUX.1-dev"],
|
|
3114
3114
|
youtubeId: ""
|
|
3115
3115
|
};
|
|
@@ -4352,6 +4352,7 @@ function nameWithoutNamespace(modelId) {
|
|
|
4352
4352
|
const splitted = modelId.split("/");
|
|
4353
4353
|
return splitted.length === 1 ? splitted[0] : splitted[1];
|
|
4354
4354
|
}
|
|
4355
|
+
var escapeStringForJson = (str) => JSON.stringify(str);
|
|
4355
4356
|
var adapters = (model) => [
|
|
4356
4357
|
`from adapters import AutoAdapterModel
|
|
4357
4358
|
|
|
@@ -4404,6 +4405,12 @@ result, message = detector.detect_watermark(watermarked_audio, sr)`;
|
|
|
4404
4405
|
function get_base_diffusers_model(model) {
|
|
4405
4406
|
return model.cardData?.base_model?.toString() ?? "fill-in-base-model";
|
|
4406
4407
|
}
|
|
4408
|
+
function get_prompt_from_diffusers_model(model) {
|
|
4409
|
+
const prompt = (model.widgetData?.[0]).text ?? model.cardData?.instance_prompt;
|
|
4410
|
+
if (prompt) {
|
|
4411
|
+
return escapeStringForJson(prompt);
|
|
4412
|
+
}
|
|
4413
|
+
}
|
|
4407
4414
|
var bertopic = (model) => [
|
|
4408
4415
|
`from bertopic import BERTopic
|
|
4409
4416
|
|
|
@@ -4457,30 +4464,37 @@ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
|
|
|
4457
4464
|
`
|
|
4458
4465
|
];
|
|
4459
4466
|
};
|
|
4467
|
+
var diffusersDefaultPrompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k";
|
|
4460
4468
|
var diffusers_default = (model) => [
|
|
4461
4469
|
`from diffusers import DiffusionPipeline
|
|
4462
4470
|
|
|
4463
|
-
|
|
4471
|
+
pipe = DiffusionPipeline.from_pretrained("${model.id}")
|
|
4472
|
+
|
|
4473
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
4474
|
+
image = pipe(prompt).images[0]`
|
|
4464
4475
|
];
|
|
4465
4476
|
var diffusers_controlnet = (model) => [
|
|
4466
4477
|
`from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
|
|
4467
4478
|
|
|
4468
4479
|
controlnet = ControlNetModel.from_pretrained("${model.id}")
|
|
4469
|
-
|
|
4480
|
+
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
|
4470
4481
|
"${get_base_diffusers_model(model)}", controlnet=controlnet
|
|
4471
4482
|
)`
|
|
4472
4483
|
];
|
|
4473
4484
|
var diffusers_lora = (model) => [
|
|
4474
4485
|
`from diffusers import DiffusionPipeline
|
|
4475
4486
|
|
|
4476
|
-
|
|
4477
|
-
|
|
4487
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
4488
|
+
pipe.load_lora_weights("${model.id}")
|
|
4489
|
+
|
|
4490
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
4491
|
+
image = pipe(prompt).images[0]`
|
|
4478
4492
|
];
|
|
4479
4493
|
var diffusers_textual_inversion = (model) => [
|
|
4480
4494
|
`from diffusers import DiffusionPipeline
|
|
4481
4495
|
|
|
4482
|
-
|
|
4483
|
-
|
|
4496
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
4497
|
+
pipe.load_textual_inversion("${model.id}")`
|
|
4484
4498
|
];
|
|
4485
4499
|
var diffusers = (model) => {
|
|
4486
4500
|
if (model.tags.includes("controlnet")) {
|
|
@@ -5120,6 +5134,11 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
|
5120
5134
|
// Please see provided C# file for more details
|
|
5121
5135
|
`
|
|
5122
5136
|
];
|
|
5137
|
+
var vfimamba = (model) => [
|
|
5138
|
+
`from Trainer_finetune import Model
|
|
5139
|
+
|
|
5140
|
+
model = Model.from_pretrained("${model.id}")`
|
|
5141
|
+
];
|
|
5123
5142
|
var voicecraft = (model) => [
|
|
5124
5143
|
`from voicecraft import VoiceCraft
|
|
5125
5144
|
|
|
@@ -5230,6 +5249,12 @@ whisperkit-cli transcribe --audio-path /path/to/audio.mp3
|
|
|
5230
5249
|
# Or use your preferred model variant
|
|
5231
5250
|
whisperkit-cli transcribe --model "large-v3" --model-prefix "distil" --audio-path /path/to/audio.mp3 --verbose`
|
|
5232
5251
|
];
|
|
5252
|
+
var threedtopia_xl = (model) => [
|
|
5253
|
+
`from threedtopia_xl.models import threedtopia_xl
|
|
5254
|
+
|
|
5255
|
+
model = threedtopia_xl.from_pretrained("${model.id}")
|
|
5256
|
+
model.generate(cond="path/to/image.png")`
|
|
5257
|
+
];
|
|
5233
5258
|
|
|
5234
5259
|
// src/model-libraries.ts
|
|
5235
5260
|
var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
@@ -5633,7 +5658,7 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5633
5658
|
repoName: "pythae",
|
|
5634
5659
|
repoUrl: "https://github.com/clementchadebec/benchmark_VAE",
|
|
5635
5660
|
snippets: pythae,
|
|
5636
|
-
filter:
|
|
5661
|
+
filter: false
|
|
5637
5662
|
},
|
|
5638
5663
|
recurrentgemma: {
|
|
5639
5664
|
prettyLabel: "RecurrentGemma",
|
|
@@ -5681,6 +5706,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5681
5706
|
filter: true,
|
|
5682
5707
|
countDownloads: `path:"cfg.json"`
|
|
5683
5708
|
},
|
|
5709
|
+
sapiens: {
|
|
5710
|
+
prettyLabel: "sapiens",
|
|
5711
|
+
repoName: "sapiens",
|
|
5712
|
+
repoUrl: "https://github.com/facebookresearch/sapiens",
|
|
5713
|
+
filter: false,
|
|
5714
|
+
countDownloads: `path_extension:"pt2" OR path_extension:"pth" OR path_extension:"onnx"`
|
|
5715
|
+
},
|
|
5684
5716
|
"sentence-transformers": {
|
|
5685
5717
|
prettyLabel: "sentence-transformers",
|
|
5686
5718
|
repoName: "sentence-transformers",
|
|
@@ -5731,6 +5763,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5731
5763
|
filter: true,
|
|
5732
5764
|
countDownloads: `path:"hyperparams.yaml"`
|
|
5733
5765
|
},
|
|
5766
|
+
"ssr-speech": {
|
|
5767
|
+
prettyLabel: "SSR-Speech",
|
|
5768
|
+
repoName: "SSR-Speech",
|
|
5769
|
+
repoUrl: "https://github.com/WangHelin1997/SSR-Speech",
|
|
5770
|
+
filter: false,
|
|
5771
|
+
countDownloads: `path_extension:".pth"`
|
|
5772
|
+
},
|
|
5734
5773
|
"stable-audio-tools": {
|
|
5735
5774
|
prettyLabel: "Stable Audio Tools",
|
|
5736
5775
|
repoName: "stable-audio-tools",
|
|
@@ -5754,6 +5793,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5754
5793
|
countDownloads: `path:"cvlm_llama2_tokenizer/tokenizer.model"`,
|
|
5755
5794
|
snippets: seed_story
|
|
5756
5795
|
},
|
|
5796
|
+
soloaudio: {
|
|
5797
|
+
prettyLabel: "SoloAudio",
|
|
5798
|
+
repoName: "SoloAudio",
|
|
5799
|
+
repoUrl: "https://github.com/WangHelin1997/SoloAudio",
|
|
5800
|
+
filter: false,
|
|
5801
|
+
countDownloads: `path:"soloaudio_v2.pt"`
|
|
5802
|
+
},
|
|
5757
5803
|
"stable-baselines3": {
|
|
5758
5804
|
prettyLabel: "stable-baselines3",
|
|
5759
5805
|
repoName: "stable-baselines3",
|
|
@@ -5825,6 +5871,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5825
5871
|
filter: true,
|
|
5826
5872
|
countDownloads: `path_extension:"sentis"`
|
|
5827
5873
|
},
|
|
5874
|
+
"vfi-mamba": {
|
|
5875
|
+
prettyLabel: "VFIMamba",
|
|
5876
|
+
repoName: "VFIMamba",
|
|
5877
|
+
repoUrl: "https://github.com/MCG-NJU/VFIMamba",
|
|
5878
|
+
countDownloads: `path_extension:"pkl"`,
|
|
5879
|
+
snippets: vfimamba
|
|
5880
|
+
},
|
|
5828
5881
|
voicecraft: {
|
|
5829
5882
|
prettyLabel: "VoiceCraft",
|
|
5830
5883
|
repoName: "VoiceCraft",
|
|
@@ -5846,6 +5899,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5846
5899
|
docsUrl: "https://github.com/argmaxinc/WhisperKit?tab=readme-ov-file#homebrew",
|
|
5847
5900
|
snippets: whisperkit,
|
|
5848
5901
|
countDownloads: `path_filename:"model" AND path_extension:"mil" AND _exists_:"path_prefix"`
|
|
5902
|
+
},
|
|
5903
|
+
"3dtopia-xl": {
|
|
5904
|
+
prettyLabel: "3DTopia-XL",
|
|
5905
|
+
repoName: "3DTopia-XL",
|
|
5906
|
+
repoUrl: "https://github.com/3DTopia/3DTopia-XL",
|
|
5907
|
+
filter: false,
|
|
5908
|
+
countDownloads: `path:"model_vae_fp16.pt"`,
|
|
5909
|
+
snippets: threedtopia_xl
|
|
5849
5910
|
}
|
|
5850
5911
|
};
|
|
5851
5912
|
var ALL_MODEL_LIBRARY_KEYS = Object.keys(MODEL_LIBRARIES_UI_ELEMENTS);
|
|
@@ -6530,6 +6591,14 @@ var SKUS = {
|
|
|
6530
6591
|
tflops: 14.93,
|
|
6531
6592
|
memory: [8]
|
|
6532
6593
|
},
|
|
6594
|
+
"RTX 2070 SUPER Mobile": {
|
|
6595
|
+
tflops: 14.13,
|
|
6596
|
+
memory: [8]
|
|
6597
|
+
},
|
|
6598
|
+
"RTX 2070 SUPER": {
|
|
6599
|
+
tflops: 18.12,
|
|
6600
|
+
memory: [8]
|
|
6601
|
+
},
|
|
6533
6602
|
"RTX 3050 Mobile": {
|
|
6534
6603
|
tflops: 7.639,
|
|
6535
6604
|
memory: [6]
|
|
@@ -6632,6 +6701,23 @@ var SKUS = {
|
|
|
6632
6701
|
tflops: 26.11,
|
|
6633
6702
|
memory: [16]
|
|
6634
6703
|
}
|
|
6704
|
+
},
|
|
6705
|
+
QUALCOMM: {
|
|
6706
|
+
"Snapdragon X Elite X1E-00-1DE": {
|
|
6707
|
+
tflops: 4.6
|
|
6708
|
+
},
|
|
6709
|
+
"Snapdragon X Elite X1E-84-100": {
|
|
6710
|
+
tflops: 4.6
|
|
6711
|
+
},
|
|
6712
|
+
"Snapdragon X Elite X1E-80-100": {
|
|
6713
|
+
tflops: 3.8
|
|
6714
|
+
},
|
|
6715
|
+
"Snapdragon X Elite X1E-78-100": {
|
|
6716
|
+
tflops: 3.8
|
|
6717
|
+
},
|
|
6718
|
+
"Snapdragon X Plus X1P-64-100": {
|
|
6719
|
+
tflops: 3.8
|
|
6720
|
+
}
|
|
6635
6721
|
}
|
|
6636
6722
|
},
|
|
6637
6723
|
CPU: {
|
package/dist/src/hardware.d.ts
CHANGED
|
@@ -172,6 +172,14 @@ export declare const SKUS: {
|
|
|
172
172
|
tflops: number;
|
|
173
173
|
memory: number[];
|
|
174
174
|
};
|
|
175
|
+
"RTX 2070 SUPER Mobile": {
|
|
176
|
+
tflops: number;
|
|
177
|
+
memory: number[];
|
|
178
|
+
};
|
|
179
|
+
"RTX 2070 SUPER": {
|
|
180
|
+
tflops: number;
|
|
181
|
+
memory: number[];
|
|
182
|
+
};
|
|
175
183
|
"RTX 3050 Mobile": {
|
|
176
184
|
tflops: number;
|
|
177
185
|
memory: number[];
|
|
@@ -271,6 +279,23 @@ export declare const SKUS: {
|
|
|
271
279
|
memory: number[];
|
|
272
280
|
};
|
|
273
281
|
};
|
|
282
|
+
QUALCOMM: {
|
|
283
|
+
"Snapdragon X Elite X1E-00-1DE": {
|
|
284
|
+
tflops: number;
|
|
285
|
+
};
|
|
286
|
+
"Snapdragon X Elite X1E-84-100": {
|
|
287
|
+
tflops: number;
|
|
288
|
+
};
|
|
289
|
+
"Snapdragon X Elite X1E-80-100": {
|
|
290
|
+
tflops: number;
|
|
291
|
+
};
|
|
292
|
+
"Snapdragon X Elite X1E-78-100": {
|
|
293
|
+
tflops: number;
|
|
294
|
+
};
|
|
295
|
+
"Snapdragon X Plus X1P-64-100": {
|
|
296
|
+
tflops: number;
|
|
297
|
+
};
|
|
298
|
+
};
|
|
274
299
|
};
|
|
275
300
|
CPU: {
|
|
276
301
|
Intel: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI
|
|
1
|
+
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwauD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
|
package/dist/src/model-data.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB;;eAEG;YACH,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,gBAAgB,CAAC,EAAE,eAAe,CAAC;QACnC,oBAAoB,CAAC,EAAE;YACtB,UAAU,CAAC,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,SAAS,CAAC,EAAE;YACX,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,OAAO,CAAC,EAAE;YACT,KAAK,CAAC,EAAE;gBACP,IAAI,CAAC,EAAE,MAAM,CAAC;aACd,CAAC;YACF,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,WAAW,CAAC,EAAE;YACb,qBAAqB,CAAC,EAAE,MAAM,CAAC;YAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF,IAAI,CAAC,EAAE;YACN,uBAAuB,CAAC,EAAE,MAAM,CAAC;YACjC,SAAS,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC;KACF,CAAC;IACF;;OAEG;IACH,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;OAEG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACxC;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,aAAa,EAAE,GAAG,SAAS,CAAC;IACzC;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE;QACV,SAAS,CAAC,EACP,OAAO,GACP;YACA,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC;QACL,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB;;eAEG;YACH,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,gBAAgB,CAAC,EAAE,eAAe,CAAC;QACnC,oBAAoB,CAAC,EAAE;YACtB,UAAU,CAAC,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,SAAS,CAAC,EAAE;YACX,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,OAAO,CAAC,EAAE;YACT,KAAK,CAAC,EAAE;gBACP,IAAI,CAAC,EAAE,MAAM,CAAC;aACd,CAAC;YACF,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,WAAW,CAAC,EAAE;YACb,qBAAqB,CAAC,EAAE,MAAM,CAAC;YAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF,IAAI,CAAC,EAAE;YACN,uBAAuB,CAAC,EAAE,MAAM,CAAC;YACjC,SAAS,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC;KACF,CAAC;IACF;;OAEG;IACH,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;OAEG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACxC;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,aAAa,EAAE,GAAG,SAAS,CAAC;IACzC;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE;QACV,SAAS,CAAC,EACP,OAAO,GACP;YACA,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC;QACL,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;QAC/B,eAAe,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE;QACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,OAAO,CAAC;KACjB,CAAC;IACF,IAAI,CAAC,EAAE;QACN,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB"}
|
|
@@ -51,6 +51,7 @@ export declare const fasttext: (model: ModelData) => string[];
|
|
|
51
51
|
export declare const stableBaselines3: (model: ModelData) => string[];
|
|
52
52
|
export declare const mlAgents: (model: ModelData) => string[];
|
|
53
53
|
export declare const sentis: () => string[];
|
|
54
|
+
export declare const vfimamba: (model: ModelData) => string[];
|
|
54
55
|
export declare const voicecraft: (model: ModelData) => string[];
|
|
55
56
|
export declare const chattts: () => string[];
|
|
56
57
|
export declare const yolov10: (model: ModelData) => string[];
|
|
@@ -61,4 +62,5 @@ export declare const nemo: (model: ModelData) => string[];
|
|
|
61
62
|
export declare const pythae: (model: ModelData) => string[];
|
|
62
63
|
export declare const audiocraft: (model: ModelData) => string[];
|
|
63
64
|
export declare const whisperkit: () => string[];
|
|
65
|
+
export declare const threedtopia_xl: (model: ModelData) => string[];
|
|
64
66
|
//# sourceMappingURL=model-libraries-snippets.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAe9C,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAaF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AAuCF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAwCrD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EAmBrD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAgBzD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAIlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,uBAAuB,UAAW,SAAS,KAAG,MAAM,EAehE,CAAC;AAiBF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAyBF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAOtD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAEF,eAAO,MAAM,OAAO,QAA6B,MAAM,EAQtD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAanC,CAAC;AAsCF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAmC3D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EA2B7C,CAAC;AAEF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAEtD,CAAC;AAEF,eAAO,MAAM,oBAAoB,UAAW,SAAS,KAAG,MAAM,EAI7D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAU9C,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAIpD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAK/C,CAAC;AAkBF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAkBpD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA4CrD,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAcvD,CAAC;AAiBF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAkB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAMzD,CAAC;AAgBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAEjD,CAAC;AAEF,eAAO,MAAM,MAAM,QAA6B,MAAM,EAMrD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,OAAO,QAAO,MAAM,EAYhC,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAOhD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAYjD,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAK5C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAQ7C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AA6BF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAUnD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAYnC,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC"}
|
|
@@ -452,7 +452,7 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
452
452
|
repoName: string;
|
|
453
453
|
repoUrl: string;
|
|
454
454
|
snippets: (model: ModelData) => string[];
|
|
455
|
-
filter:
|
|
455
|
+
filter: false;
|
|
456
456
|
};
|
|
457
457
|
recurrentgemma: {
|
|
458
458
|
prettyLabel: string;
|
|
@@ -500,6 +500,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
500
500
|
filter: true;
|
|
501
501
|
countDownloads: string;
|
|
502
502
|
};
|
|
503
|
+
sapiens: {
|
|
504
|
+
prettyLabel: string;
|
|
505
|
+
repoName: string;
|
|
506
|
+
repoUrl: string;
|
|
507
|
+
filter: false;
|
|
508
|
+
countDownloads: string;
|
|
509
|
+
};
|
|
503
510
|
"sentence-transformers": {
|
|
504
511
|
prettyLabel: string;
|
|
505
512
|
repoName: string;
|
|
@@ -550,6 +557,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
550
557
|
filter: true;
|
|
551
558
|
countDownloads: string;
|
|
552
559
|
};
|
|
560
|
+
"ssr-speech": {
|
|
561
|
+
prettyLabel: string;
|
|
562
|
+
repoName: string;
|
|
563
|
+
repoUrl: string;
|
|
564
|
+
filter: false;
|
|
565
|
+
countDownloads: string;
|
|
566
|
+
};
|
|
553
567
|
"stable-audio-tools": {
|
|
554
568
|
prettyLabel: string;
|
|
555
569
|
repoName: string;
|
|
@@ -573,6 +587,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
573
587
|
countDownloads: string;
|
|
574
588
|
snippets: () => string[];
|
|
575
589
|
};
|
|
590
|
+
soloaudio: {
|
|
591
|
+
prettyLabel: string;
|
|
592
|
+
repoName: string;
|
|
593
|
+
repoUrl: string;
|
|
594
|
+
filter: false;
|
|
595
|
+
countDownloads: string;
|
|
596
|
+
};
|
|
576
597
|
"stable-baselines3": {
|
|
577
598
|
prettyLabel: string;
|
|
578
599
|
repoName: string;
|
|
@@ -644,6 +665,13 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
644
665
|
filter: true;
|
|
645
666
|
countDownloads: string;
|
|
646
667
|
};
|
|
668
|
+
"vfi-mamba": {
|
|
669
|
+
prettyLabel: string;
|
|
670
|
+
repoName: string;
|
|
671
|
+
repoUrl: string;
|
|
672
|
+
countDownloads: string;
|
|
673
|
+
snippets: (model: ModelData) => string[];
|
|
674
|
+
};
|
|
647
675
|
voicecraft: {
|
|
648
676
|
prettyLabel: string;
|
|
649
677
|
repoName: string;
|
|
@@ -666,8 +694,16 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
|
|
|
666
694
|
snippets: () => string[];
|
|
667
695
|
countDownloads: string;
|
|
668
696
|
};
|
|
697
|
+
"3dtopia-xl": {
|
|
698
|
+
prettyLabel: string;
|
|
699
|
+
repoName: string;
|
|
700
|
+
repoUrl: string;
|
|
701
|
+
filter: false;
|
|
702
|
+
countDownloads: string;
|
|
703
|
+
snippets: (model: ModelData) => string[];
|
|
704
|
+
};
|
|
669
705
|
};
|
|
670
706
|
export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
|
|
671
|
-
export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "yolov10" | "whisperkit")[];
|
|
672
|
-
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "yolov10" | "whisperkit")[];
|
|
707
|
+
export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "vfi-mamba" | "voicecraft" | "yolov10" | "whisperkit" | "3dtopia-xl")[];
|
|
708
|
+
export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "birefnet" | "bm25s" | "champ" | "chat_tts" | "colpali" | "deepforest" | "depth-anything-v2" | "diffree" | "diffusers" | "diffusionkit" | "doctr" | "cartesia_pytorch" | "cartesia_mlx" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "liveportrait" | "llama-cpp-python" | "mindspore" | "mamba-ssm" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "py-feat" | "pythae" | "recurrentgemma" | "relik" | "refiners" | "saelens" | "sam2" | "sample-factory" | "sapiens" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "ssr-speech" | "stable-audio-tools" | "diffusion-single-file" | "seed-story" | "soloaudio" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "vfi-mamba" | "voicecraft" | "yolov10" | "whisperkit" | "3dtopia-xl")[];
|
|
673
709
|
//# sourceMappingURL=model-libraries.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B
|
|
1
|
+
{"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2oBI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,8rCAAgE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,8rCAQ1B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/keypoint-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/keypoint-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA6Cf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/video-text-to-text/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAqDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.12.
|
|
4
|
+
"version": "0.12.5",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/hardware.ts
CHANGED
|
@@ -176,6 +176,14 @@ export const SKUS = {
|
|
|
176
176
|
tflops: 14.93,
|
|
177
177
|
memory: [8],
|
|
178
178
|
},
|
|
179
|
+
"RTX 2070 SUPER Mobile": {
|
|
180
|
+
tflops: 14.13,
|
|
181
|
+
memory: [8],
|
|
182
|
+
},
|
|
183
|
+
"RTX 2070 SUPER": {
|
|
184
|
+
tflops: 18.12,
|
|
185
|
+
memory: [8],
|
|
186
|
+
},
|
|
179
187
|
"RTX 3050 Mobile": {
|
|
180
188
|
tflops: 7.639,
|
|
181
189
|
memory: [6],
|
|
@@ -275,6 +283,23 @@ export const SKUS = {
|
|
|
275
283
|
memory: [16],
|
|
276
284
|
},
|
|
277
285
|
},
|
|
286
|
+
QUALCOMM: {
|
|
287
|
+
"Snapdragon X Elite X1E-00-1DE": {
|
|
288
|
+
tflops: 4.6,
|
|
289
|
+
},
|
|
290
|
+
"Snapdragon X Elite X1E-84-100": {
|
|
291
|
+
tflops: 4.6,
|
|
292
|
+
},
|
|
293
|
+
"Snapdragon X Elite X1E-80-100": {
|
|
294
|
+
tflops: 3.8,
|
|
295
|
+
},
|
|
296
|
+
"Snapdragon X Elite X1E-78-100": {
|
|
297
|
+
tflops: 3.8,
|
|
298
|
+
},
|
|
299
|
+
"Snapdragon X Plus X1P-64-100": {
|
|
300
|
+
tflops: 3.8,
|
|
301
|
+
},
|
|
302
|
+
},
|
|
278
303
|
},
|
|
279
304
|
CPU: {
|
|
280
305
|
Intel: {
|
package/src/model-data.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ModelData } from "./model-data";
|
|
2
|
+
import type { WidgetExampleTextInput } from "./widget-example";
|
|
2
3
|
import { LIBRARY_TASK_MAPPING } from "./library-to-tasks";
|
|
3
4
|
|
|
4
5
|
const TAG_CUSTOM_CODE = "custom_code";
|
|
@@ -8,6 +9,8 @@ function nameWithoutNamespace(modelId: string): string {
|
|
|
8
9
|
return splitted.length === 1 ? splitted[0] : splitted[1];
|
|
9
10
|
}
|
|
10
11
|
|
|
12
|
+
const escapeStringForJson = (str: string): string => JSON.stringify(str);
|
|
13
|
+
|
|
11
14
|
//#region snippets
|
|
12
15
|
|
|
13
16
|
export const adapters = (model: ModelData): string[] => [
|
|
@@ -70,6 +73,13 @@ function get_base_diffusers_model(model: ModelData): string {
|
|
|
70
73
|
return model.cardData?.base_model?.toString() ?? "fill-in-base-model";
|
|
71
74
|
}
|
|
72
75
|
|
|
76
|
+
function get_prompt_from_diffusers_model(model: ModelData): string | undefined {
|
|
77
|
+
const prompt = (model.widgetData?.[0] as WidgetExampleTextInput).text ?? model.cardData?.instance_prompt;
|
|
78
|
+
if (prompt) {
|
|
79
|
+
return escapeStringForJson(prompt);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
73
83
|
export const bertopic = (model: ModelData): string[] => [
|
|
74
84
|
`from bertopic import BERTopic
|
|
75
85
|
|
|
@@ -129,17 +139,22 @@ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
|
|
|
129
139
|
];
|
|
130
140
|
};
|
|
131
141
|
|
|
142
|
+
const diffusersDefaultPrompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k";
|
|
143
|
+
|
|
132
144
|
const diffusers_default = (model: ModelData) => [
|
|
133
145
|
`from diffusers import DiffusionPipeline
|
|
134
146
|
|
|
135
|
-
|
|
147
|
+
pipe = DiffusionPipeline.from_pretrained("${model.id}")
|
|
148
|
+
|
|
149
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
150
|
+
image = pipe(prompt).images[0]`,
|
|
136
151
|
];
|
|
137
152
|
|
|
138
153
|
const diffusers_controlnet = (model: ModelData) => [
|
|
139
154
|
`from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
|
|
140
155
|
|
|
141
156
|
controlnet = ControlNetModel.from_pretrained("${model.id}")
|
|
142
|
-
|
|
157
|
+
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
|
143
158
|
"${get_base_diffusers_model(model)}", controlnet=controlnet
|
|
144
159
|
)`,
|
|
145
160
|
];
|
|
@@ -147,15 +162,18 @@ pipeline = StableDiffusionControlNetPipeline.from_pretrained(
|
|
|
147
162
|
const diffusers_lora = (model: ModelData) => [
|
|
148
163
|
`from diffusers import DiffusionPipeline
|
|
149
164
|
|
|
150
|
-
|
|
151
|
-
|
|
165
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
166
|
+
pipe.load_lora_weights("${model.id}")
|
|
167
|
+
|
|
168
|
+
prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}"
|
|
169
|
+
image = pipe(prompt).images[0]`,
|
|
152
170
|
];
|
|
153
171
|
|
|
154
172
|
const diffusers_textual_inversion = (model: ModelData) => [
|
|
155
173
|
`from diffusers import DiffusionPipeline
|
|
156
174
|
|
|
157
|
-
|
|
158
|
-
|
|
175
|
+
pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}")
|
|
176
|
+
pipe.load_textual_inversion("${model.id}")`,
|
|
159
177
|
];
|
|
160
178
|
|
|
161
179
|
export const diffusers = (model: ModelData): string[] => {
|
|
@@ -871,6 +889,12 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
|
871
889
|
`,
|
|
872
890
|
];
|
|
873
891
|
|
|
892
|
+
export const vfimamba = (model: ModelData): string[] => [
|
|
893
|
+
`from Trainer_finetune import Model
|
|
894
|
+
|
|
895
|
+
model = Model.from_pretrained("${model.id}")`,
|
|
896
|
+
];
|
|
897
|
+
|
|
874
898
|
export const voicecraft = (model: ModelData): string[] => [
|
|
875
899
|
`from voicecraft import VoiceCraft
|
|
876
900
|
|
|
@@ -995,4 +1019,11 @@ whisperkit-cli transcribe --audio-path /path/to/audio.mp3
|
|
|
995
1019
|
# Or use your preferred model variant
|
|
996
1020
|
whisperkit-cli transcribe --model "large-v3" --model-prefix "distil" --audio-path /path/to/audio.mp3 --verbose`,
|
|
997
1021
|
];
|
|
1022
|
+
|
|
1023
|
+
export const threedtopia_xl = (model: ModelData): string[] => [
|
|
1024
|
+
`from threedtopia_xl.models import threedtopia_xl
|
|
1025
|
+
|
|
1026
|
+
model = threedtopia_xl.from_pretrained("${model.id}")
|
|
1027
|
+
model.generate(cond="path/to/image.png")`,
|
|
1028
|
+
];
|
|
998
1029
|
//#endregion
|
package/src/model-libraries.ts
CHANGED
|
@@ -458,7 +458,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
458
458
|
repoName: "pythae",
|
|
459
459
|
repoUrl: "https://github.com/clementchadebec/benchmark_VAE",
|
|
460
460
|
snippets: snippets.pythae,
|
|
461
|
-
filter:
|
|
461
|
+
filter: false,
|
|
462
462
|
},
|
|
463
463
|
recurrentgemma: {
|
|
464
464
|
prettyLabel: "RecurrentGemma",
|
|
@@ -506,6 +506,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
506
506
|
filter: true,
|
|
507
507
|
countDownloads: `path:"cfg.json"`,
|
|
508
508
|
},
|
|
509
|
+
sapiens: {
|
|
510
|
+
prettyLabel: "sapiens",
|
|
511
|
+
repoName: "sapiens",
|
|
512
|
+
repoUrl: "https://github.com/facebookresearch/sapiens",
|
|
513
|
+
filter: false,
|
|
514
|
+
countDownloads: `path_extension:"pt2" OR path_extension:"pth" OR path_extension:"onnx"`,
|
|
515
|
+
},
|
|
509
516
|
"sentence-transformers": {
|
|
510
517
|
prettyLabel: "sentence-transformers",
|
|
511
518
|
repoName: "sentence-transformers",
|
|
@@ -556,6 +563,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
556
563
|
filter: true,
|
|
557
564
|
countDownloads: `path:"hyperparams.yaml"`,
|
|
558
565
|
},
|
|
566
|
+
"ssr-speech": {
|
|
567
|
+
prettyLabel: "SSR-Speech",
|
|
568
|
+
repoName: "SSR-Speech",
|
|
569
|
+
repoUrl: "https://github.com/WangHelin1997/SSR-Speech",
|
|
570
|
+
filter: false,
|
|
571
|
+
countDownloads: `path_extension:".pth"`,
|
|
572
|
+
},
|
|
559
573
|
"stable-audio-tools": {
|
|
560
574
|
prettyLabel: "Stable Audio Tools",
|
|
561
575
|
repoName: "stable-audio-tools",
|
|
@@ -579,6 +593,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
579
593
|
countDownloads: `path:"cvlm_llama2_tokenizer/tokenizer.model"`,
|
|
580
594
|
snippets: snippets.seed_story,
|
|
581
595
|
},
|
|
596
|
+
soloaudio: {
|
|
597
|
+
prettyLabel: "SoloAudio",
|
|
598
|
+
repoName: "SoloAudio",
|
|
599
|
+
repoUrl: "https://github.com/WangHelin1997/SoloAudio",
|
|
600
|
+
filter: false,
|
|
601
|
+
countDownloads: `path:"soloaudio_v2.pt"`,
|
|
602
|
+
},
|
|
582
603
|
"stable-baselines3": {
|
|
583
604
|
prettyLabel: "stable-baselines3",
|
|
584
605
|
repoName: "stable-baselines3",
|
|
@@ -650,6 +671,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
650
671
|
filter: true,
|
|
651
672
|
countDownloads: `path_extension:"sentis"`,
|
|
652
673
|
},
|
|
674
|
+
"vfi-mamba": {
|
|
675
|
+
prettyLabel: "VFIMamba",
|
|
676
|
+
repoName: "VFIMamba",
|
|
677
|
+
repoUrl: "https://github.com/MCG-NJU/VFIMamba",
|
|
678
|
+
countDownloads: `path_extension:"pkl"`,
|
|
679
|
+
snippets: snippets.vfimamba,
|
|
680
|
+
},
|
|
653
681
|
voicecraft: {
|
|
654
682
|
prettyLabel: "VoiceCraft",
|
|
655
683
|
repoName: "VoiceCraft",
|
|
@@ -672,6 +700,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
672
700
|
snippets: snippets.whisperkit,
|
|
673
701
|
countDownloads: `path_filename:"model" AND path_extension:"mil" AND _exists_:"path_prefix"`,
|
|
674
702
|
},
|
|
703
|
+
"3dtopia-xl": {
|
|
704
|
+
prettyLabel: "3DTopia-XL",
|
|
705
|
+
repoName: "3DTopia-XL",
|
|
706
|
+
repoUrl: "https://github.com/3DTopia/3DTopia-XL",
|
|
707
|
+
filter: false,
|
|
708
|
+
countDownloads: `path:"model_vae_fp16.pt"`,
|
|
709
|
+
snippets: snippets.threedtopia_xl,
|
|
710
|
+
},
|
|
675
711
|
} satisfies Record<string, LibraryUiElement>;
|
|
676
712
|
|
|
677
713
|
export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
|
|
@@ -36,16 +36,14 @@ model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/sup
|
|
|
36
36
|
inputs = processor(image, return_tensors="pt").to(model.device, model.dtype)
|
|
37
37
|
outputs = model(**inputs)
|
|
38
38
|
|
|
39
|
-
#
|
|
39
|
+
# postprocess
|
|
40
|
+
image_sizes = [(image.size[1], image.size[0])]
|
|
41
|
+
outputs = processor.post_process_keypoint_detection(model_outputs, image_sizes)
|
|
42
|
+
keypoints = outputs[0]["keypoints"].detach().numpy()
|
|
43
|
+
scores = outputs[0]["scores"].detach().numpy()
|
|
40
44
|
image_width, image_height = image.size
|
|
41
|
-
image_mask = outputs.mask
|
|
42
|
-
image_indices = torch.nonzero(image_mask).squeeze()
|
|
43
|
-
|
|
44
|
-
image_scores = outputs.scores.squeeze()
|
|
45
|
-
image_keypoints = outputs.keypoints.squeeze()
|
|
46
|
-
keypoints = image_keypoints.detach().numpy()
|
|
47
|
-
scores = image_scores.detach().numpy()
|
|
48
45
|
|
|
46
|
+
# plot
|
|
49
47
|
plt.axis('off')
|
|
50
48
|
plt.imshow(image)
|
|
51
49
|
plt.scatter(
|
|
@@ -37,6 +37,10 @@ const taskData: TaskDataCustom = {
|
|
|
37
37
|
description: "An application that detects hand keypoints in real-time.",
|
|
38
38
|
id: "datasciencedojo/Hand-Keypoint-Detection-Realtime",
|
|
39
39
|
},
|
|
40
|
+
{
|
|
41
|
+
description: "An application to try a universal keypoint detection model.",
|
|
42
|
+
id: "merve/SuperPoint",
|
|
43
|
+
},
|
|
40
44
|
],
|
|
41
45
|
summary: "Keypoint detection is the task of identifying meaningful distinctive points or features in an image.",
|
|
42
46
|
widgetModels: [],
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
### Data Generation
|
|
4
4
|
|
|
5
|
-
Businesses can generate data for their
|
|
5
|
+
Businesses can generate data for their use cases by inputting text and getting image outputs.
|
|
6
6
|
|
|
7
7
|
### Immersive Conversational Chatbots
|
|
8
8
|
|
|
@@ -16,9 +16,27 @@ Different patterns can be generated to obtain unique pieces of fashion. Text-to-
|
|
|
16
16
|
|
|
17
17
|
Architects can utilise the models to construct an environment based out on the requirements of the floor plan. This can also include the furniture that has to be placed in that environment.
|
|
18
18
|
|
|
19
|
-
##
|
|
19
|
+
## Task Variants
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
### Image Editing
|
|
22
|
+
|
|
23
|
+
Image editing with text-to-image models involves modifying an image following edit instructions provided in a text prompt.
|
|
24
|
+
|
|
25
|
+
- **Synthetic image editing**: Adjusting images that were initially created using an input prompt while preserving the overall meaning or context of the original image.
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+
_Figure taken from ["InstructPix2Pix: Learning to Follow Image Editing Instructions"](https://www.timothybrooks.com/instruct-pix2pix)_
|
|
29
|
+
|
|
30
|
+
- **Real image editing**: Similar to synthetic image editing, except we're using real photos/images. This task is usually more complex.
|
|
31
|
+
|
|
32
|
+

|
|
33
|
+
_Figure taken from ["Prompt-to-Prompt Image Editing with Cross-Attention Control"](https://prompt-to-prompt.github.io)_
|
|
34
|
+
|
|
35
|
+
### Personalization
|
|
36
|
+
|
|
37
|
+
Personalization refers to techniques used to customize text-to-image models. We introduce new subjects or concepts to the model, which the model can then generate when we refer to them with a text prompt.
|
|
38
|
+
|
|
39
|
+
For example, you can use these techniques to generate images of your dog in imaginary settings, after you have taught the model using a few reference images of the subject (or just one in some cases). Teaching the model a new concept can be achieved through fine-tuning, or by using training-free techniques.
|
|
22
40
|
|
|
23
41
|
## Inference
|
|
24
42
|
|
|
@@ -65,11 +83,14 @@ await inference.textToImage({
|
|
|
65
83
|
- [Introducing Würstchen: Fast Diffusion for Image Generation](https://huggingface.co/blog/wuerstchen)
|
|
66
84
|
- [Efficient Controllable Generation for SDXL with T2I-Adapters](https://huggingface.co/blog/t2i-sdxl-adapters)
|
|
67
85
|
- [Welcome aMUSEd: Efficient Text-to-Image Generation](https://huggingface.co/blog/amused)
|
|
86
|
+
- Image Editing Demos: [LEDITS++](https://huggingface.co/spaces/editing-images/leditsplusplus), [Turbo Edit](https://huggingface.co/spaces/turboedit/turbo_edit), [InstructPix2Pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix), [CosXL](https://huggingface.co/spaces/multimodalart/cosxl)
|
|
87
|
+
- Training free Personalization Demos: [Face-to-All](https://huggingface.co/spaces/multimodalart/face-to-all), [InstantStyle](https://huggingface.co/spaces/InstantX/InstantStyle), [RB-modulation](https://huggingface.co/spaces/fffiloni/RB-Modulation), [Photomaker v2](https://huggingface.co/spaces/TencentARC/PhotoMaker-V2)
|
|
68
88
|
|
|
69
89
|
### Model Fine-tuning
|
|
70
90
|
|
|
71
91
|
- [Finetune Stable Diffusion Models with DDPO via TRL](https://huggingface.co/blog/pref-tuning)
|
|
72
92
|
- [LoRA training scripts of the world, unite!](https://huggingface.co/blog/sdxl_lora_advanced_script)
|
|
73
93
|
- [Using LoRA for Efficient Stable Diffusion Fine-Tuning](https://huggingface.co/blog/lora)
|
|
94
|
+
- LoRA fine tuning Spaces: [FLUX.1 finetuning](https://huggingface.co/spaces/autotrain-projects/train-flux-lora-ease), [SDXL finetuning](https://huggingface.co/spaces/multimodalart/lora-ease)
|
|
74
95
|
|
|
75
96
|
This page was made possible thanks to the efforts of [Ishan Dutta](https://huggingface.co/ishandutta), [Enrique Elias Ubaldo](https://huggingface.co/herrius) and [Oğuz Akif](https://huggingface.co/oguzakif).
|
|
@@ -92,7 +92,7 @@ const taskData: TaskDataCustom = {
|
|
|
92
92
|
},
|
|
93
93
|
],
|
|
94
94
|
summary:
|
|
95
|
-
"
|
|
95
|
+
"Text-to-image is the task of generating images from input text. These pipelines can also be used to modify and edit images based on text prompts.",
|
|
96
96
|
widgetModels: ["black-forest-labs/FLUX.1-dev"],
|
|
97
97
|
youtubeId: "",
|
|
98
98
|
};
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Most of the video language models can take in videos, multiple videos, images and multiple images. Some of these models can also take interleaved inputs, which can have images and videos inside the text, where you can refer to the input images and input videos within the text prompt.
|
|
2
|
+
|
|
3
|
+
## Different Types of Video Language Models
|
|
4
|
+
|
|
5
|
+
Video language models come in three types:
|
|
6
|
+
|
|
7
|
+
- **Base:** Pre-trained models that can be fine-tuned.
|
|
8
|
+
- **Instruction:** Base models fine-tuned on video-instruction pairs and answers.
|
|
9
|
+
- **Chatty/Conversational:** Base models fine-tuned on video conversation datasets.
|
|
10
|
+
|
|
11
|
+
## Use Cases
|
|
12
|
+
|
|
13
|
+
### Video Question Answering
|
|
14
|
+
|
|
15
|
+
Video language models trained on video-question-answer pairs can be used for video question answering and generating captions for videos.
|
|
16
|
+
|
|
17
|
+
### Video Chat
|
|
18
|
+
|
|
19
|
+
Video language models can be used to have a dialogue about a video.
|
|
20
|
+
|
|
21
|
+
### Video Recognition with Instructions
|
|
22
|
+
|
|
23
|
+
Video language models can recognize images through descriptions. When given detailed descriptions of specific entities, they can classify the entities in a video.
|
|
24
|
+
|
|
25
|
+
## Inference
|
|
26
|
+
|
|
27
|
+
You can use the Transformers library to interact with video-language models.
|
|
28
|
+
Below we load [a video language model](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf), write a simple utility to sample videos, use chat template to format the text prompt, process the video and the text prompt and infer. To run the snippet below, please install [OpenCV](https://pypi.org/project/opencv-python/) by running `pip install opencv-python`.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import uuid
|
|
32
|
+
import requests
|
|
33
|
+
import cv2
|
|
34
|
+
import torch
|
|
35
|
+
from transformers import LlavaNextVideoProcessor, LlavaNextVideoForConditionalGeneration
|
|
36
|
+
|
|
37
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
38
|
+
model_id = "llava-hf/LLaVA-NeXT-Video-7B-hf"
|
|
39
|
+
|
|
40
|
+
model = LlavaNextVideoForConditionalGeneration.from_pretrained(
|
|
41
|
+
model_id,
|
|
42
|
+
torch_dtype=torch.float16,
|
|
43
|
+
low_cpu_mem_usage=True,
|
|
44
|
+
).to(device)
|
|
45
|
+
|
|
46
|
+
processor = LlavaNextVideoProcessor.from_pretrained(model_id)
|
|
47
|
+
|
|
48
|
+
def sample_frames(url, num_frames):
|
|
49
|
+
response = requests.get(url)
|
|
50
|
+
path_id = str(uuid.uuid4())
|
|
51
|
+
|
|
52
|
+
path = f"./{path_id}.mp4"
|
|
53
|
+
|
|
54
|
+
with open(path, "wb") as f:
|
|
55
|
+
f.write(response.content)
|
|
56
|
+
|
|
57
|
+
video = cv2.VideoCapture(path)
|
|
58
|
+
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
59
|
+
interval = total_frames // num_frames
|
|
60
|
+
frames = []
|
|
61
|
+
for i in range(total_frames):
|
|
62
|
+
ret, frame = video.read()
|
|
63
|
+
if not ret:
|
|
64
|
+
continue
|
|
65
|
+
if i % interval == 0:
|
|
66
|
+
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
|
67
|
+
frames.append(pil_img)
|
|
68
|
+
video.release()
|
|
69
|
+
return frames
|
|
70
|
+
|
|
71
|
+
conversation = [
|
|
72
|
+
{
|
|
73
|
+
|
|
74
|
+
"role": "user",
|
|
75
|
+
"content": [
|
|
76
|
+
{"type": "text", "text": "Why is this video funny?"},
|
|
77
|
+
{"type": "video"},
|
|
78
|
+
],
|
|
79
|
+
},
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
|
83
|
+
|
|
84
|
+
video_url = "https://huggingface.co/spaces/merve/llava-interleave/resolve/main/cats_1.mp4"
|
|
85
|
+
video = sample_frames(video, 8)
|
|
86
|
+
|
|
87
|
+
inputs = processor(text=prompt, videos=video, padding=True, return_tensors="pt").to(model.device)
|
|
88
|
+
|
|
89
|
+
output = model.generate(**inputs, max_new_tokens=100, do_sample=False)
|
|
90
|
+
print(processor.decode(output[0][2:], skip_special_tokens=True))
|
|
91
|
+
|
|
92
|
+
# Why is this video funny? ASSISTANT: The humor in this video comes from the cat's facial expression and body language. The cat appears to be making a funny face, with its eyes squinted and mouth open, which can be interpreted as a playful or mischievous expression. Cats often make such faces when they are in a good mood or are playful, and this can be amusing to people who are familiar with their behavior. The combination of the cat's expression and the close-
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Useful Resources
|
|
97
|
+
|
|
98
|
+
- [Transformers task guide on video-text-to-text](https://huggingface.co/docs/transformers/tasks/video_text_to_text)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "Multiple-choice questions and answers about videos.",
|
|
7
|
+
id: "lmms-lab/Video-MME",
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
description: "A dataset of instructions and question-answer pairs about videos.",
|
|
11
|
+
id: "lmms-lab/VideoChatGPT",
|
|
12
|
+
},
|
|
13
|
+
],
|
|
14
|
+
demo: {
|
|
15
|
+
inputs: [
|
|
16
|
+
{
|
|
17
|
+
filename: "video-text-to-text-input.gif",
|
|
18
|
+
type: "img",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
label: "Text Prompt",
|
|
22
|
+
content: "What is happening in this video?",
|
|
23
|
+
type: "text",
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
outputs: [
|
|
27
|
+
{
|
|
28
|
+
label: "Answer",
|
|
29
|
+
content:
|
|
30
|
+
"The video shows a series of images showing a fountain with water jets and a variety of colorful flowers and butterflies in the background.",
|
|
31
|
+
type: "text",
|
|
32
|
+
},
|
|
33
|
+
],
|
|
34
|
+
},
|
|
35
|
+
metrics: [],
|
|
36
|
+
models: [
|
|
37
|
+
{
|
|
38
|
+
description: "A robust video-text-to-text model that can take in image and video inputs.",
|
|
39
|
+
id: "llava-hf/llava-onevision-qwen2-72b-ov-hf",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
description: "Large and powerful video-text-to-text model that can take in image and video inputs.",
|
|
43
|
+
id: "llava-hf/LLaVA-NeXT-Video-34B-hf",
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
spaces: [
|
|
47
|
+
{
|
|
48
|
+
description: "An application to chat with a video-text-to-text model.",
|
|
49
|
+
id: "llava-hf/video-llava",
|
|
50
|
+
},
|
|
51
|
+
],
|
|
52
|
+
summary:
|
|
53
|
+
"Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
|
|
54
|
+
widgetModels: [""],
|
|
55
|
+
youtubeId: "",
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export default taskData;
|