@huggingface/tasks 0.12.23 → 0.12.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/index.cjs +476 -123
  2. package/dist/index.js +476 -123
  3. package/dist/src/hardware.d.ts +20 -0
  4. package/dist/src/hardware.d.ts.map +1 -1
  5. package/dist/src/model-libraries-snippets.d.ts +1 -0
  6. package/dist/src/model-libraries-snippets.d.ts.map +1 -1
  7. package/dist/src/model-libraries.d.ts +9 -2
  8. package/dist/src/model-libraries.d.ts.map +1 -1
  9. package/dist/src/snippets/common.d.ts +20 -0
  10. package/dist/src/snippets/common.d.ts.map +1 -0
  11. package/dist/src/snippets/curl.d.ts +15 -8
  12. package/dist/src/snippets/curl.d.ts.map +1 -1
  13. package/dist/src/snippets/js.d.ts +17 -10
  14. package/dist/src/snippets/js.d.ts.map +1 -1
  15. package/dist/src/snippets/python.d.ts +20 -13
  16. package/dist/src/snippets/python.d.ts.map +1 -1
  17. package/dist/src/snippets/types.d.ts +4 -0
  18. package/dist/src/snippets/types.d.ts.map +1 -1
  19. package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
  20. package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -1
  21. package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
  22. package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -1
  23. package/dist/src/tasks/visual-question-answering/inference.d.ts +0 -1
  24. package/dist/src/tasks/visual-question-answering/inference.d.ts.map +1 -1
  25. package/package.json +1 -1
  26. package/src/hardware.ts +20 -0
  27. package/src/model-libraries-snippets.ts +7 -1
  28. package/src/model-libraries.ts +8 -1
  29. package/src/snippets/common.ts +63 -0
  30. package/src/snippets/curl.ts +71 -26
  31. package/src/snippets/js.ts +165 -40
  32. package/src/snippets/python.ts +186 -48
  33. package/src/snippets/types.ts +5 -0
  34. package/src/tasks/depth-estimation/data.ts +15 -7
  35. package/src/tasks/image-segmentation/data.ts +5 -5
  36. package/src/tasks/image-text-to-text/data.ts +17 -9
  37. package/src/tasks/keypoint-detection/data.ts +1 -1
  38. package/src/tasks/text-generation/data.ts +7 -7
  39. package/src/tasks/text-to-image/data.ts +2 -2
  40. package/src/tasks/text-to-speech/data.ts +5 -1
  41. package/src/tasks/text-to-video/data.ts +10 -10
  42. package/src/tasks/video-text-to-text/data.ts +8 -0
  43. package/src/tasks/visual-question-answering/inference.ts +0 -1
  44. package/src/tasks/visual-question-answering/spec/output.json +1 -1
package/dist/index.js CHANGED
@@ -2186,20 +2186,24 @@ var taskData11 = {
2186
2186
  id: "meta-llama/Llama-3.2-11B-Vision-Instruct"
2187
2187
  },
2188
2188
  {
2189
- description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
2190
- id: "HuggingFaceM4/idefics2-8b-chatty"
2189
+ description: "Cutting-edge vision language models.",
2190
+ id: "allenai/Molmo-7B-D-0924"
2191
2191
  },
2192
2192
  {
2193
2193
  description: "Small yet powerful model.",
2194
2194
  id: "vikhyatk/moondream2"
2195
2195
  },
2196
2196
  {
2197
- description: "Strong image-text-to-text model made to understand documents.",
2198
- id: "mPLUG/DocOwl1.5"
2197
+ description: "Strong image-text-to-text model.",
2198
+ id: "Qwen/Qwen2-VL-7B-Instruct"
2199
2199
  },
2200
2200
  {
2201
2201
  description: "Strong image-text-to-text model.",
2202
- id: "microsoft/Phi-3.5-vision-instruct"
2202
+ id: "mistralai/Pixtral-12B-2409"
2203
+ },
2204
+ {
2205
+ description: "Strong image-text-to-text model focused on documents.",
2206
+ id: "stepfun-ai/GOT-OCR2_0"
2203
2207
  }
2204
2208
  ],
2205
2209
  spaces: [
@@ -2213,19 +2217,23 @@ var taskData11 = {
2213
2217
  },
2214
2218
  {
2215
2219
  description: "Powerful vision-language model assistant.",
2216
- id: "liuhaotian/LLaVA-1.6"
2220
+ id: "akhaliq/Molmo-7B-D-0924"
2221
+ },
2222
+ {
2223
+ description: "An image-text-to-text application focused on documents.",
2224
+ id: "stepfun-ai/GOT_official_online_demo"
2217
2225
  },
2218
2226
  {
2219
2227
  description: "An application to compare outputs of different vision language models.",
2220
2228
  id: "merve/compare_VLMs"
2221
2229
  },
2222
2230
  {
2223
- description: "An application for document vision language tasks.",
2224
- id: "mPLUG/DocOwl"
2231
+ description: "An application for chatting with an image-text-to-text model.",
2232
+ id: "GanymedeNil/Qwen2-VL-7B"
2225
2233
  }
2226
2234
  ],
2227
2235
  summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
2228
- widgetModels: ["microsoft/kosmos-2-patch14-224"],
2236
+ widgetModels: ["meta-llama/Llama-3.2-11B-Vision-Instruct"],
2229
2237
  youtubeId: "IoGaGfU1CIg"
2230
2238
  };
2231
2239
  var data_default11 = taskData11;
@@ -2285,11 +2293,11 @@ var taskData12 = {
2285
2293
  id: "ZhengPeng7/BiRefNet"
2286
2294
  },
2287
2295
  {
2288
- description: "Semantic segmentation model trained on ADE20k dataset.",
2289
- id: "nvidia/segformer-b0-finetuned-ade-512-512"
2296
+ description: "Powerful human-centric image segmentation model.",
2297
+ id: "facebook/sapiens-seg-1b"
2290
2298
  },
2291
2299
  {
2292
- description: "Panoptic segmentation model trained COCO (common objects) dataset.",
2300
+ description: "Panoptic segmentation model trained on the COCO (common objects) dataset.",
2293
2301
  id: "facebook/mask2former-swin-large-coco-panoptic"
2294
2302
  }
2295
2303
  ],
@@ -2303,8 +2311,8 @@ var taskData12 = {
2303
2311
  id: "jbrinkma/segment-anything"
2304
2312
  },
2305
2313
  {
2306
- description: "A semantic segmentation application that predicts human silhouettes.",
2307
- id: "keras-io/Human-Part-Segmentation"
2314
+ description: "A human-centric segmentation model.",
2315
+ id: "facebook/sapiens-pose"
2308
2316
  },
2309
2317
  {
2310
2318
  description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
@@ -2493,11 +2501,15 @@ var taskData15 = {
2493
2501
  },
2494
2502
  {
2495
2503
  description: "A strong monocular depth estimation model.",
2496
- id: "Bingxin/Marigold"
2504
+ id: "jingheya/lotus-depth-g-v1-0"
2497
2505
  },
2498
2506
  {
2499
- description: "A metric depth estimation model trained on NYU dataset.",
2500
- id: "Intel/zoedepth-nyu"
2507
+ description: "A depth estimation model that predicts depth in videos.",
2508
+ id: "tencent/DepthCrafter"
2509
+ },
2510
+ {
2511
+ description: "A robust depth estimation model.",
2512
+ id: "apple/DepthPro"
2501
2513
  }
2502
2514
  ],
2503
2515
  spaces: [
@@ -2506,12 +2518,16 @@ var taskData15 = {
2506
2518
  id: "radames/dpt-depth-estimation-3d-voxels"
2507
2519
  },
2508
2520
  {
2509
- description: "An application on cutting-edge depth estimation.",
2510
- id: "depth-anything/Depth-Anything-V2"
2521
+ description: "An application for bleeding-edge depth estimation.",
2522
+ id: "akhaliq/depth-pro"
2523
+ },
2524
+ {
2525
+ description: "An application on cutting-edge depth estimation in videos.",
2526
+ id: "tencent/DepthCrafter"
2511
2527
  },
2512
2528
  {
2513
- description: "An application to try state-of-the-art depth estimation.",
2514
- id: "merve/compare_depth_models"
2529
+ description: "A human-centric depth estimation application.",
2530
+ id: "facebook/sapiens-depth"
2515
2531
  }
2516
2532
  ],
2517
2533
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
@@ -3089,8 +3105,8 @@ var taskData24 = {
3089
3105
  id: "jbilcke-hf/ai-comic-factory"
3090
3106
  },
3091
3107
  {
3092
- description: "A text-to-image application that can generate coherent text inside the image.",
3093
- id: "DeepFloyd/IF"
3108
+ description: "An application to match multiple custom image generation models.",
3109
+ id: "multimodalart/flux-lora-lab"
3094
3110
  },
3095
3111
  {
3096
3112
  description: "A powerful yet very fast image generation application.",
@@ -3173,9 +3189,13 @@ var taskData25 = {
3173
3189
  id: "suno/bark"
3174
3190
  },
3175
3191
  {
3176
- description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
3192
+ description: "An application on XTTS, a voice generation model that lets you clone voices into different languages.",
3177
3193
  id: "coqui/xtts"
3178
3194
  },
3195
+ {
3196
+ description: "An application that generates speech in different styles in English and Chinese.",
3197
+ id: "mrfakename/E2-F5-TTS"
3198
+ },
3179
3199
  {
3180
3200
  description: "An application that synthesizes speech for diverse speaker prompts.",
3181
3201
  id: "parler-tts/parler_tts_mini"
@@ -3494,10 +3514,6 @@ var taskData29 = {
3494
3514
  description: "A text-generation model trained to follow instructions.",
3495
3515
  id: "google/gemma-2-2b-it"
3496
3516
  },
3497
- {
3498
- description: "A code generation model that can generate code in 80+ languages.",
3499
- id: "bigcode/starcoder"
3500
- },
3501
3517
  {
3502
3518
  description: "Very powerful text generation model trained to follow instructions.",
3503
3519
  id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -3511,12 +3527,12 @@ var taskData29 = {
3511
3527
  id: "AI-MO/NuminaMath-7B-TIR"
3512
3528
  },
3513
3529
  {
3514
- description: "Strong coding assistant model.",
3515
- id: "HuggingFaceH4/starchat2-15b-v0.1"
3530
+ description: "Strong text generation model to follow instructions.",
3531
+ id: "Qwen/Qwen2.5-7B-Instruct"
3516
3532
  },
3517
3533
  {
3518
3534
  description: "Very strong open-source large language model.",
3519
- id: "mistralai/Mistral-Nemo-Instruct-2407"
3535
+ id: "nvidia/Llama-3.1-Nemotron-70B-Instruct"
3520
3536
  }
3521
3537
  ],
3522
3538
  spaces: [
@@ -3524,6 +3540,10 @@ var taskData29 = {
3524
3540
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
3525
3541
  id: "open-llm-leaderboard/open_llm_leaderboard"
3526
3542
  },
3543
+ {
3544
+ description: "A leaderboard for comparing chain-of-thought performance of models.",
3545
+ id: "logikon/open_cot_leaderboard"
3546
+ },
3527
3547
  {
3528
3548
  description: "An text generation based application based on a very powerful LLaMA2 model.",
3529
3549
  id: "ysharma/Explore_llamav2_with_TGI"
@@ -3610,30 +3630,30 @@ var taskData30 = {
3610
3630
  ],
3611
3631
  models: [
3612
3632
  {
3613
- description: "A strong model for video generation.",
3614
- id: "Vchitect/LaVie"
3633
+ description: "A strong model for consistent video generation.",
3634
+ id: "rain1011/pyramid-flow-sd3"
3615
3635
  },
3616
3636
  {
3617
3637
  description: "A robust model for text-to-video generation.",
3618
- id: "damo-vilab/text-to-video-ms-1.7b"
3638
+ id: "VideoCrafter/VideoCrafter2"
3619
3639
  },
3620
3640
  {
3621
- description: "A text-to-video generation model with high quality and smooth outputs.",
3622
- id: "hotshotco/Hotshot-XL"
3641
+ description: "A cutting-edge text-to-video generation model.",
3642
+ id: "TIGER-Lab/T2V-Turbo-V2"
3623
3643
  }
3624
3644
  ],
3625
3645
  spaces: [
3626
3646
  {
3627
3647
  description: "An application that generates video from text.",
3628
- id: "fffiloni/zeroscope"
3648
+ id: "VideoCrafter/VideoCrafter"
3629
3649
  },
3630
3650
  {
3631
- description: "An application that generates video from image and text.",
3632
- id: "Vchitect/LaVie"
3651
+ description: "Consistent video generation application.",
3652
+ id: "TIGER-Lab/T2V-Turbo-V2"
3633
3653
  },
3634
3654
  {
3635
- description: "An application that generates videos from text and provides multi-model support.",
3636
- id: "ArtGAN/Video-Diffusion-WebUI"
3655
+ description: "A cutting edge video generation application.",
3656
+ id: "Pyramid-Flow/pyramid-flow"
3637
3657
  }
3638
3658
  ],
3639
3659
  summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ",
@@ -4250,7 +4270,7 @@ var taskData39 = {
4250
4270
  },
4251
4271
  {
4252
4272
  description: "Strong keypoint detection model used to detect human pose.",
4253
- id: "qualcomm/MediaPipe-Pose-Estimation"
4273
+ id: "facebook/sapiens-pose-1b"
4254
4274
  }
4255
4275
  ],
4256
4276
  spaces: [
@@ -4279,6 +4299,10 @@ var taskData40 = {
4279
4299
  {
4280
4300
  description: "A dataset of instructions and question-answer pairs about videos.",
4281
4301
  id: "lmms-lab/VideoChatGPT"
4302
+ },
4303
+ {
4304
+ description: "Large video understanding dataset.",
4305
+ id: "HuggingFaceFV/finevideo"
4282
4306
  }
4283
4307
  ],
4284
4308
  demo: {
@@ -4316,6 +4340,10 @@ var taskData40 = {
4316
4340
  {
4317
4341
  description: "An application to chat with a video-text-to-text model.",
4318
4342
  id: "llava-hf/video-llava"
4343
+ },
4344
+ {
4345
+ description: "A leaderboard for various video-text-to-text models.",
4346
+ id: "opencompass/openvlm_video_leaderboard"
4319
4347
  }
4320
4348
  ],
4321
4349
  summary: "Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
@@ -5205,7 +5233,7 @@ var transformersJS = (model) => {
5205
5233
  if (!model.pipeline_tag) {
5206
5234
  return [`// \u26A0\uFE0F Unknown pipeline tag`];
5207
5235
  }
5208
- const libName = "@xenova/transformers";
5236
+ const libName = "@huggingface/transformers";
5209
5237
  return [
5210
5238
  `// npm i ${libName}
5211
5239
  import { pipeline } from '${libName}';
@@ -5349,6 +5377,11 @@ var nemo = (model) => {
5349
5377
  }
5350
5378
  return command ?? [`# tag did not correspond to a valid NeMo domain.`];
5351
5379
  };
5380
+ var pxia = (model) => [
5381
+ `from pxia import AutoModel
5382
+
5383
+ model = AutoModel.from_pretrained("${model.id}")`
5384
+ ];
5352
5385
  var pythae = (model) => [
5353
5386
  `from pythae.models import AutoModel
5354
5387
 
@@ -5834,6 +5867,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
5834
5867
  filter: true,
5835
5868
  countDownloads: `path:"adapter_config.json"`
5836
5869
  },
5870
+ pxia: {
5871
+ prettyLabel: "pxia",
5872
+ repoName: "pxia",
5873
+ repoUrl: "https://github.com/not-lain/pxia",
5874
+ snippets: pxia,
5875
+ filter: false
5876
+ },
5837
5877
  "pyannote-audio": {
5838
5878
  prettyLabel: "pyannote.audio",
5839
5879
  repoName: "pyannote-audio",
@@ -6066,7 +6106,7 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
6066
6106
  "transformers.js": {
6067
6107
  prettyLabel: "Transformers.js",
6068
6108
  repoName: "transformers.js",
6069
- repoUrl: "https://github.com/xenova/transformers.js",
6109
+ repoUrl: "https://github.com/huggingface/transformers.js",
6070
6110
  docsUrl: "https://huggingface.co/docs/hub/transformers-js",
6071
6111
  snippets: transformersJS,
6072
6112
  filter: true
@@ -6255,30 +6295,91 @@ __export(curl_exports, {
6255
6295
  snippetTextGeneration: () => snippetTextGeneration,
6256
6296
  snippetZeroShotClassification: () => snippetZeroShotClassification
6257
6297
  });
6258
- var snippetBasic = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6298
+
6299
+ // src/snippets/common.ts
6300
+ function stringifyMessages(messages, opts) {
6301
+ const keyRole = opts.attributeKeyQuotes ? `"role"` : "role";
6302
+ const keyContent = opts.attributeKeyQuotes ? `"content"` : "content";
6303
+ const messagesStringified = messages.map(({ role, content }) => {
6304
+ if (typeof content === "string") {
6305
+ content = JSON.stringify(content).slice(1, -1);
6306
+ if (opts.customContentEscaper) {
6307
+ content = opts.customContentEscaper(content);
6308
+ }
6309
+ return `{ ${keyRole}: "${role}", ${keyContent}: "${content}" }`;
6310
+ } else {
6311
+ 2;
6312
+ content = content.map(({ image_url, text, type }) => ({
6313
+ type,
6314
+ image_url,
6315
+ ...text ? { text: JSON.stringify(text).slice(1, -1) } : void 0
6316
+ }));
6317
+ content = JSON.stringify(content).slice(1, -1);
6318
+ if (opts.customContentEscaper) {
6319
+ content = opts.customContentEscaper(content);
6320
+ }
6321
+ return `{ ${keyRole}: "${role}", ${keyContent}: ${content} }`;
6322
+ }
6323
+ });
6324
+ return opts.start + messagesStringified.join(opts.sep) + opts.end;
6325
+ }
6326
+ function stringifyGenerationConfig(config, opts) {
6327
+ const quote = opts.attributeKeyQuotes ? `"` : "";
6328
+ return opts.start + Object.entries(config).map(([key, val]) => `${quote}${key}${quote}${opts.attributeValueConnector}${val}`).join(opts.sep) + opts.end;
6329
+ }
6330
+
6331
+ // src/snippets/curl.ts
6332
+ var snippetBasic = (model, accessToken) => ({
6333
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6259
6334
  -X POST \\
6260
6335
  -d '{"inputs": ${getModelInputSnippet(model, true)}}' \\
6261
6336
  -H 'Content-Type: application/json' \\
6262
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6263
- var snippetTextGeneration = (model, accessToken) => {
6337
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6338
+ });
6339
+ var snippetTextGeneration = (model, accessToken, opts) => {
6264
6340
  if (model.tags.includes("conversational")) {
6265
- return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6341
+ const streaming = opts?.streaming ?? true;
6342
+ const messages = opts?.messages ?? [
6343
+ { role: "user", content: "What is the capital of France?" }
6344
+ ];
6345
+ const config = {
6346
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6347
+ max_tokens: opts?.max_tokens ?? 500,
6348
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6349
+ };
6350
+ return {
6351
+ content: `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6266
6352
  -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\
6267
6353
  -H 'Content-Type: application/json' \\
6268
- -d '{
6269
- "model": "${model.id}",
6270
- "messages": [{"role": "user", "content": "What is the capital of France?"}],
6271
- "max_tokens": 500,
6272
- "stream": false
6273
- }'
6274
- `;
6354
+ --data '{
6355
+ "model": "${model.id}",
6356
+ "messages": ${stringifyMessages(messages, {
6357
+ sep: ",\n ",
6358
+ start: `[
6359
+ `,
6360
+ end: `
6361
+ ]`,
6362
+ attributeKeyQuotes: true,
6363
+ customContentEscaper: (str) => str.replace(/'/g, "'\\''")
6364
+ })},
6365
+ ${stringifyGenerationConfig(config, {
6366
+ sep: ",\n ",
6367
+ start: "",
6368
+ end: "",
6369
+ attributeKeyQuotes: true,
6370
+ attributeValueConnector: ": "
6371
+ })},
6372
+ "stream": ${!!streaming}
6373
+ }'`
6374
+ };
6275
6375
  } else {
6276
6376
  return snippetBasic(model, accessToken);
6277
6377
  }
6278
6378
  };
6279
6379
  var snippetImageTextToTextGeneration = (model, accessToken) => {
6280
6380
  if (model.tags.includes("conversational")) {
6281
- return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6381
+ return {
6382
+ content: `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6282
6383
  -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\
6283
6384
  -H 'Content-Type: application/json' \\
6284
6385
  -d '{
@@ -6295,20 +6396,25 @@ var snippetImageTextToTextGeneration = (model, accessToken) => {
6295
6396
  "max_tokens": 500,
6296
6397
  "stream": false
6297
6398
  }'
6298
- `;
6399
+ `
6400
+ };
6299
6401
  } else {
6300
6402
  return snippetBasic(model, accessToken);
6301
6403
  }
6302
6404
  };
6303
- var snippetZeroShotClassification = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6405
+ var snippetZeroShotClassification = (model, accessToken) => ({
6406
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6304
6407
  -X POST \\
6305
6408
  -d '{"inputs": ${getModelInputSnippet(model, true)}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \\
6306
6409
  -H 'Content-Type: application/json' \\
6307
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6308
- var snippetFile = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6410
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6411
+ });
6412
+ var snippetFile = (model, accessToken) => ({
6413
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6309
6414
  -X POST \\
6310
6415
  --data-binary '@${getModelInputSnippet(model, true, true)}' \\
6311
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6416
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6417
+ });
6312
6418
  var curlSnippets = {
6313
6419
  // Same order as in js/src/lib/interfaces/Types.ts
6314
6420
  "text-classification": snippetBasic,
@@ -6336,7 +6442,7 @@ var curlSnippets = {
6336
6442
  "image-segmentation": snippetFile
6337
6443
  };
6338
6444
  function getCurlInferenceSnippet(model, accessToken) {
6339
- return model.pipeline_tag && model.pipeline_tag in curlSnippets ? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
6445
+ return model.pipeline_tag && model.pipeline_tag in curlSnippets ? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6340
6446
  }
6341
6447
  function hasCurlInferenceSnippet(model) {
6342
6448
  return !!model.pipeline_tag && model.pipeline_tag in curlSnippets;
@@ -6359,18 +6465,114 @@ __export(python_exports, {
6359
6465
  snippetZeroShotClassification: () => snippetZeroShotClassification2,
6360
6466
  snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
6361
6467
  });
6362
- var snippetConversational = (model, accessToken) => `from huggingface_hub import InferenceClient
6468
+ var snippetConversational = (model, accessToken, opts) => {
6469
+ const streaming = opts?.streaming ?? true;
6470
+ const messages = opts?.messages ?? [
6471
+ { role: "user", content: "What is the capital of France?" }
6472
+ ];
6473
+ const messagesStr = stringifyMessages(messages, {
6474
+ sep: ",\n ",
6475
+ start: `[
6476
+ `,
6477
+ end: `
6478
+ ]`,
6479
+ attributeKeyQuotes: true
6480
+ });
6481
+ const config = {
6482
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6483
+ max_tokens: opts?.max_tokens ?? 500,
6484
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6485
+ };
6486
+ const configStr = stringifyGenerationConfig(config, {
6487
+ sep: ",\n ",
6488
+ start: "",
6489
+ end: "",
6490
+ attributeValueConnector: "="
6491
+ });
6492
+ if (streaming) {
6493
+ return [
6494
+ {
6495
+ client: "huggingface_hub",
6496
+ content: `from huggingface_hub import InferenceClient
6363
6497
 
6364
6498
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6365
6499
 
6366
- for message in client.chat_completion(
6367
- model="${model.id}",
6368
- messages=[{"role": "user", "content": "What is the capital of France?"}],
6369
- max_tokens=500,
6370
- stream=True,
6371
- ):
6372
- print(message.choices[0].delta.content, end="")`;
6373
- var snippetConversationalWithImage = (model, accessToken) => `from huggingface_hub import InferenceClient
6500
+ messages = ${messagesStr}
6501
+
6502
+ stream = client.chat.completions.create(
6503
+ model="${model.id}",
6504
+ messages=messages,
6505
+ ${configStr},
6506
+ stream=True
6507
+ )
6508
+
6509
+ for chunk in stream:
6510
+ print(chunk.choices[0].delta.content)`
6511
+ },
6512
+ {
6513
+ client: "openai",
6514
+ content: `from openai import OpenAI
6515
+
6516
+ client = OpenAI(
6517
+ base_url="https://api-inference.huggingface.co/v1/",
6518
+ api_key="${accessToken || "{API_TOKEN}"}"
6519
+ )
6520
+
6521
+ messages = ${messagesStr}
6522
+
6523
+ stream = client.chat.completions.create(
6524
+ model="${model.id}",
6525
+ messages=messages,
6526
+ ${configStr},
6527
+ stream=True
6528
+ )
6529
+
6530
+ for chunk in stream:
6531
+ print(chunk.choices[0].delta.content)`
6532
+ }
6533
+ ];
6534
+ } else {
6535
+ return [
6536
+ {
6537
+ client: "huggingface_hub",
6538
+ content: `from huggingface_hub import InferenceClient
6539
+
6540
+ client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6541
+
6542
+ messages = ${messagesStr}
6543
+
6544
+ completion = client.chat.completions.create(
6545
+ model="${model.id}",
6546
+ messages=messages,
6547
+ ${configStr}
6548
+ )
6549
+
6550
+ print(completion.choices[0].message)`
6551
+ },
6552
+ {
6553
+ client: "openai",
6554
+ content: `from openai import OpenAI
6555
+
6556
+ client = OpenAI(
6557
+ base_url="https://api-inference.huggingface.co/v1/",
6558
+ api_key="${accessToken || "{API_TOKEN}"}"
6559
+ )
6560
+
6561
+ messages = ${messagesStr}
6562
+
6563
+ completion = client.chat.completions.create(
6564
+ model="${model.id}",
6565
+ messages=messages,
6566
+ ${configStr}
6567
+ )
6568
+
6569
+ print(completion.choices[0].message)`
6570
+ }
6571
+ ];
6572
+ }
6573
+ };
6574
+ var snippetConversationalWithImage = (model, accessToken) => ({
6575
+ content: `from huggingface_hub import InferenceClient
6374
6576
 
6375
6577
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6376
6578
 
@@ -6390,16 +6592,20 @@ for message in client.chat_completion(
6390
6592
  max_tokens=500,
6391
6593
  stream=True,
6392
6594
  ):
6393
- print(message.choices[0].delta.content, end="")`;
6394
- var snippetZeroShotClassification2 = (model) => `def query(payload):
6595
+ print(message.choices[0].delta.content, end="")`
6596
+ });
6597
+ var snippetZeroShotClassification2 = (model) => ({
6598
+ content: `def query(payload):
6395
6599
  response = requests.post(API_URL, headers=headers, json=payload)
6396
6600
  return response.json()
6397
6601
 
6398
6602
  output = query({
6399
6603
  "inputs": ${getModelInputSnippet(model)},
6400
6604
  "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
6401
- })`;
6402
- var snippetZeroShotImageClassification = (model) => `def query(data):
6605
+ })`
6606
+ });
6607
+ var snippetZeroShotImageClassification = (model) => ({
6608
+ content: `def query(data):
6403
6609
  with open(data["image_path"], "rb") as f:
6404
6610
  img = f.read()
6405
6611
  payload={
@@ -6412,22 +6618,28 @@ var snippetZeroShotImageClassification = (model) => `def query(data):
6412
6618
  output = query({
6413
6619
  "image_path": ${getModelInputSnippet(model)},
6414
6620
  "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
6415
- })`;
6416
- var snippetBasic2 = (model) => `def query(payload):
6621
+ })`
6622
+ });
6623
+ var snippetBasic2 = (model) => ({
6624
+ content: `def query(payload):
6417
6625
  response = requests.post(API_URL, headers=headers, json=payload)
6418
6626
  return response.json()
6419
6627
 
6420
6628
  output = query({
6421
6629
  "inputs": ${getModelInputSnippet(model)},
6422
- })`;
6423
- var snippetFile2 = (model) => `def query(filename):
6630
+ })`
6631
+ });
6632
+ var snippetFile2 = (model) => ({
6633
+ content: `def query(filename):
6424
6634
  with open(filename, "rb") as f:
6425
6635
  data = f.read()
6426
6636
  response = requests.post(API_URL, headers=headers, data=data)
6427
6637
  return response.json()
6428
6638
 
6429
- output = query(${getModelInputSnippet(model)})`;
6430
- var snippetTextToImage = (model) => `def query(payload):
6639
+ output = query(${getModelInputSnippet(model)})`
6640
+ });
6641
+ var snippetTextToImage = (model) => ({
6642
+ content: `def query(payload):
6431
6643
  response = requests.post(API_URL, headers=headers, json=payload)
6432
6644
  return response.content
6433
6645
  image_bytes = query({
@@ -6436,16 +6648,20 @@ image_bytes = query({
6436
6648
  # You can access the image with PIL.Image for example
6437
6649
  import io
6438
6650
  from PIL import Image
6439
- image = Image.open(io.BytesIO(image_bytes))`;
6440
- var snippetTabular = (model) => `def query(payload):
6651
+ image = Image.open(io.BytesIO(image_bytes))`
6652
+ });
6653
+ var snippetTabular = (model) => ({
6654
+ content: `def query(payload):
6441
6655
  response = requests.post(API_URL, headers=headers, json=payload)
6442
6656
  return response.content
6443
6657
  response = query({
6444
6658
  "inputs": {"data": ${getModelInputSnippet(model)}},
6445
- })`;
6659
+ })`
6660
+ });
6446
6661
  var snippetTextToAudio = (model) => {
6447
6662
  if (model.library_name === "transformers") {
6448
- return `def query(payload):
6663
+ return {
6664
+ content: `def query(payload):
6449
6665
  response = requests.post(API_URL, headers=headers, json=payload)
6450
6666
  return response.content
6451
6667
 
@@ -6454,9 +6670,11 @@ audio_bytes = query({
6454
6670
  })
6455
6671
  # You can access the audio with IPython.display for example
6456
6672
  from IPython.display import Audio
6457
- Audio(audio_bytes)`;
6673
+ Audio(audio_bytes)`
6674
+ };
6458
6675
  } else {
6459
- return `def query(payload):
6676
+ return {
6677
+ content: `def query(payload):
6460
6678
  response = requests.post(API_URL, headers=headers, json=payload)
6461
6679
  return response.json()
6462
6680
 
@@ -6465,10 +6683,12 @@ audio, sampling_rate = query({
6465
6683
  })
6466
6684
  # You can access the audio with IPython.display for example
6467
6685
  from IPython.display import Audio
6468
- Audio(audio, rate=sampling_rate)`;
6686
+ Audio(audio, rate=sampling_rate)`
6687
+ };
6469
6688
  }
6470
6689
  };
6471
- var snippetDocumentQuestionAnswering = (model) => `def query(payload):
6690
+ var snippetDocumentQuestionAnswering = (model) => ({
6691
+ content: `def query(payload):
6472
6692
  with open(payload["image"], "rb") as f:
6473
6693
  img = f.read()
6474
6694
  payload["image"] = base64.b64encode(img).decode("utf-8")
@@ -6477,7 +6697,8 @@ var snippetDocumentQuestionAnswering = (model) => `def query(payload):
6477
6697
 
6478
6698
  output = query({
6479
6699
  "inputs": ${getModelInputSnippet(model)},
6480
- })`;
6700
+ })`
6701
+ });
6481
6702
  var pythonSnippets = {
6482
6703
  // Same order as in tasks/src/pipelines.ts
6483
6704
  "text-classification": snippetBasic2,
@@ -6508,19 +6729,25 @@ var pythonSnippets = {
6508
6729
  "image-to-text": snippetFile2,
6509
6730
  "zero-shot-image-classification": snippetZeroShotImageClassification
6510
6731
  };
6511
- function getPythonInferenceSnippet(model, accessToken) {
6732
+ function getPythonInferenceSnippet(model, accessToken, opts) {
6512
6733
  if (model.pipeline_tag === "text-generation" && model.tags.includes("conversational")) {
6513
- return snippetConversational(model, accessToken);
6734
+ return snippetConversational(model, accessToken, opts);
6514
6735
  } else if (model.pipeline_tag === "image-text-to-text" && model.tags.includes("conversational")) {
6515
6736
  return snippetConversationalWithImage(model, accessToken);
6516
6737
  } else {
6517
- const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
6518
- return `import requests
6519
-
6738
+ let snippets = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6739
+ snippets = Array.isArray(snippets) ? snippets : [snippets];
6740
+ return snippets.map((snippet) => {
6741
+ return {
6742
+ ...snippet,
6743
+ content: `import requests
6744
+
6520
6745
  API_URL = "https://api-inference.huggingface.co/models/${model.id}"
6521
6746
  headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
6522
-
6523
- ${body}`;
6747
+
6748
+ ${snippet.content}`
6749
+ };
6750
+ });
6524
6751
  }
6525
6752
  }
6526
6753
  function hasPythonInferenceSnippet(model) {
@@ -6541,7 +6768,8 @@ __export(js_exports, {
6541
6768
  snippetTextToImage: () => snippetTextToImage2,
6542
6769
  snippetZeroShotClassification: () => snippetZeroShotClassification3
6543
6770
  });
6544
- var snippetBasic3 = (model, accessToken) => `async function query(data) {
6771
+ var snippetBasic3 = (model, accessToken) => ({
6772
+ content: `async function query(data) {
6545
6773
  const response = await fetch(
6546
6774
  "https://api-inference.huggingface.co/models/${model.id}",
6547
6775
  {
@@ -6559,27 +6787,120 @@ var snippetBasic3 = (model, accessToken) => `async function query(data) {
6559
6787
 
6560
6788
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6561
6789
  console.log(JSON.stringify(response));
6562
- });`;
6563
- var snippetTextGeneration2 = (model, accessToken) => {
6790
+ });`
6791
+ });
6792
+ var snippetTextGeneration2 = (model, accessToken, opts) => {
6564
6793
  if (model.tags.includes("conversational")) {
6565
- return `import { HfInference } from "@huggingface/inference";
6794
+ const streaming = opts?.streaming ?? true;
6795
+ const messages = opts?.messages ?? [
6796
+ { role: "user", content: "What is the capital of France?" }
6797
+ ];
6798
+ const messagesStr = stringifyMessages(messages, { sep: ",\n ", start: "[\n ", end: "\n ]" });
6799
+ const config = {
6800
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6801
+ max_tokens: opts?.max_tokens ?? 500,
6802
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6803
+ };
6804
+ const configStr = stringifyGenerationConfig(config, {
6805
+ sep: ",\n ",
6806
+ start: "",
6807
+ end: "",
6808
+ attributeValueConnector: ": "
6809
+ });
6810
+ if (streaming) {
6811
+ return [
6812
+ {
6813
+ client: "huggingface_hub",
6814
+ content: `import { HfInference } from "@huggingface/inference"
6566
6815
 
6567
- const inference = new HfInference("${accessToken || `{API_TOKEN}`}");
6816
+ const client = new HfInference("${accessToken || `{API_TOKEN}`}")
6568
6817
 
6569
- for await (const chunk of inference.chatCompletionStream({
6818
+ let out = "";
6819
+
6820
+ const stream = client.chatCompletionStream({
6570
6821
  model: "${model.id}",
6571
- messages: [{ role: "user", content: "What is the capital of France?" }],
6572
- max_tokens: 500,
6573
- })) {
6574
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
6575
- }`;
6822
+ messages: ${messagesStr},
6823
+ ${configStr}
6824
+ });
6825
+
6826
+ for await (const chunk of stream) {
6827
+ if (chunk.choices && chunk.choices.length > 0) {
6828
+ const newContent = chunk.choices[0].delta.content;
6829
+ out += newContent;
6830
+ console.log(newContent);
6831
+ }
6832
+ }`
6833
+ },
6834
+ {
6835
+ client: "openai",
6836
+ content: `import { OpenAI } from "openai"
6837
+
6838
+ const client = new OpenAI({
6839
+ baseURL: "https://api-inference.huggingface.co/v1/",
6840
+ apiKey: "${accessToken || `{API_TOKEN}`}"
6841
+ })
6842
+
6843
+ let out = "";
6844
+
6845
+ const stream = await client.chat.completions.create({
6846
+ model: "${model.id}",
6847
+ messages: ${messagesStr},
6848
+ ${configStr},
6849
+ stream: true,
6850
+ });
6851
+
6852
+ for await (const chunk of stream) {
6853
+ if (chunk.choices && chunk.choices.length > 0) {
6854
+ const newContent = chunk.choices[0].delta.content;
6855
+ out += newContent;
6856
+ console.log(newContent);
6857
+ }
6858
+ }`
6859
+ }
6860
+ ];
6861
+ } else {
6862
+ return [
6863
+ {
6864
+ client: "huggingface_hub",
6865
+ content: `import { HfInference } from '@huggingface/inference'
6866
+
6867
+ const client = new HfInference("${accessToken || `{API_TOKEN}`}")
6868
+
6869
+ const chatCompletion = await client.chatCompletion({
6870
+ model: "${model.id}",
6871
+ messages: ${messagesStr},
6872
+ ${configStr}
6873
+ });
6874
+
6875
+ console.log(chatCompletion.choices[0].message);`
6876
+ },
6877
+ {
6878
+ client: "openai",
6879
+ content: `import { OpenAI } from "openai"
6880
+
6881
+ const client = new OpenAI({
6882
+ baseURL: "https://api-inference.huggingface.co/v1/",
6883
+ apiKey: "${accessToken || `{API_TOKEN}`}"
6884
+ })
6885
+
6886
+ const chatCompletion = await client.chat.completions.create({
6887
+ model: "${model.id}",
6888
+ messages: ${messagesStr},
6889
+ ${configStr}
6890
+ });
6891
+
6892
+ console.log(chatCompletion.choices[0].message);`
6893
+ }
6894
+ ];
6895
+ }
6576
6896
  } else {
6577
6897
  return snippetBasic3(model, accessToken);
6578
6898
  }
6579
6899
  };
6580
6900
  var snippetImageTextToTextGeneration2 = (model, accessToken) => {
6581
6901
  if (model.tags.includes("conversational")) {
6582
- return `import { HfInference } from "@huggingface/inference";
6902
+ return {
6903
+ content: `import { HfInference } from "@huggingface/inference";
6583
6904
 
6584
6905
  const inference = new HfInference("${accessToken || `{API_TOKEN}`}");
6585
6906
  const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg";
@@ -6598,12 +6919,14 @@ for await (const chunk of inference.chatCompletionStream({
6598
6919
  max_tokens: 500,
6599
6920
  })) {
6600
6921
  process.stdout.write(chunk.choices[0]?.delta?.content || "");
6601
- }`;
6922
+ }`
6923
+ };
6602
6924
  } else {
6603
6925
  return snippetBasic3(model, accessToken);
6604
6926
  }
6605
6927
  };
6606
- var snippetZeroShotClassification3 = (model, accessToken) => `async function query(data) {
6928
+ var snippetZeroShotClassification3 = (model, accessToken) => ({
6929
+ content: `async function query(data) {
6607
6930
  const response = await fetch(
6608
6931
  "https://api-inference.huggingface.co/models/${model.id}",
6609
6932
  {
@@ -6620,11 +6943,13 @@ var snippetZeroShotClassification3 = (model, accessToken) => `async function que
6620
6943
  }
6621
6944
 
6622
6945
  query({"inputs": ${getModelInputSnippet(
6623
- model
6624
- )}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
6946
+ model
6947
+ )}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
6625
6948
  console.log(JSON.stringify(response));
6626
- });`;
6627
- var snippetTextToImage2 = (model, accessToken) => `async function query(data) {
6949
+ });`
6950
+ });
6951
+ var snippetTextToImage2 = (model, accessToken) => ({
6952
+ content: `async function query(data) {
6628
6953
  const response = await fetch(
6629
6954
  "https://api-inference.huggingface.co/models/${model.id}",
6630
6955
  {
@@ -6641,7 +6966,8 @@ var snippetTextToImage2 = (model, accessToken) => `async function query(data) {
6641
6966
  }
6642
6967
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6643
6968
  // Use image
6644
- });`;
6969
+ });`
6970
+ });
6645
6971
  var snippetTextToAudio2 = (model, accessToken) => {
6646
6972
  const commonSnippet = `async function query(data) {
6647
6973
  const response = await fetch(
@@ -6656,25 +6982,30 @@ var snippetTextToAudio2 = (model, accessToken) => {
6656
6982
  }
6657
6983
  );`;
6658
6984
  if (model.library_name === "transformers") {
6659
- return commonSnippet + `
6985
+ return {
6986
+ content: commonSnippet + `
6660
6987
  const result = await response.blob();
6661
6988
  return result;
6662
6989
  }
6663
6990
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6664
6991
  // Returns a byte object of the Audio wavform. Use it directly!
6665
- });`;
6992
+ });`
6993
+ };
6666
6994
  } else {
6667
- return commonSnippet + `
6995
+ return {
6996
+ content: commonSnippet + `
6668
6997
  const result = await response.json();
6669
6998
  return result;
6670
6999
  }
6671
7000
 
6672
7001
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6673
7002
  console.log(JSON.stringify(response));
6674
- });`;
7003
+ });`
7004
+ };
6675
7005
  }
6676
7006
  };
6677
- var snippetFile3 = (model, accessToken) => `async function query(filename) {
7007
+ var snippetFile3 = (model, accessToken) => ({
7008
+ content: `async function query(filename) {
6678
7009
  const data = fs.readFileSync(filename);
6679
7010
  const response = await fetch(
6680
7011
  "https://api-inference.huggingface.co/models/${model.id}",
@@ -6693,7 +7024,8 @@ var snippetFile3 = (model, accessToken) => `async function query(filename) {
6693
7024
 
6694
7025
  query(${getModelInputSnippet(model)}).then((response) => {
6695
7026
  console.log(JSON.stringify(response));
6696
- });`;
7027
+ });`
7028
+ });
6697
7029
  var jsSnippets = {
6698
7030
  // Same order as in js/src/lib/interfaces/Types.ts
6699
7031
  "text-classification": snippetBasic3,
@@ -6721,7 +7053,7 @@ var jsSnippets = {
6721
7053
  "image-segmentation": snippetFile3
6722
7054
  };
6723
7055
  function getJsInferenceSnippet(model, accessToken) {
6724
- return model.pipeline_tag && model.pipeline_tag in jsSnippets ? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
7056
+ return model.pipeline_tag && model.pipeline_tag in jsSnippets ? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6725
7057
  }
6726
7058
  function hasJsInferenceSnippet(model) {
6727
7059
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
@@ -6788,6 +7120,11 @@ var SKUS = {
6788
7120
  tflops: 31.24,
6789
7121
  memory: [24]
6790
7122
  },
7123
+ A2: {
7124
+ tflops: 4.531,
7125
+ // source: https://www.techpowerup.com/gpu-specs/a2.c3848
7126
+ memory: [16]
7127
+ },
6791
7128
  "RTX 4090": {
6792
7129
  tflops: 82.58,
6793
7130
  memory: [24]
@@ -6950,6 +7287,14 @@ var SKUS = {
6950
7287
  tflops: 184.6,
6951
7288
  memory: [32]
6952
7289
  },
7290
+ MI60: {
7291
+ tflops: 29.5,
7292
+ memory: [32]
7293
+ },
7294
+ MI50: {
7295
+ tflops: 26.5,
7296
+ memory: [16]
7297
+ },
6953
7298
  "RX 7900 XTX": {
6954
7299
  tflops: 122.8,
6955
7300
  memory: [24]
@@ -6982,6 +7327,14 @@ var SKUS = {
6982
7327
  tflops: 32.33,
6983
7328
  memory: [16]
6984
7329
  },
7330
+ "RX 6700 XT": {
7331
+ tflops: 26.43,
7332
+ memory: [12]
7333
+ },
7334
+ "RX 6700": {
7335
+ tflops: 22.58,
7336
+ memory: [10]
7337
+ },
6985
7338
  "Radeon Pro VII": {
6986
7339
  tflops: 26.11,
6987
7340
  memory: [16]