@huggingface/tasks 0.12.23 → 0.12.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/index.cjs +476 -123
  2. package/dist/index.js +476 -123
  3. package/dist/src/hardware.d.ts +20 -0
  4. package/dist/src/hardware.d.ts.map +1 -1
  5. package/dist/src/model-libraries-snippets.d.ts +1 -0
  6. package/dist/src/model-libraries-snippets.d.ts.map +1 -1
  7. package/dist/src/model-libraries.d.ts +9 -2
  8. package/dist/src/model-libraries.d.ts.map +1 -1
  9. package/dist/src/snippets/common.d.ts +20 -0
  10. package/dist/src/snippets/common.d.ts.map +1 -0
  11. package/dist/src/snippets/curl.d.ts +15 -8
  12. package/dist/src/snippets/curl.d.ts.map +1 -1
  13. package/dist/src/snippets/js.d.ts +17 -10
  14. package/dist/src/snippets/js.d.ts.map +1 -1
  15. package/dist/src/snippets/python.d.ts +20 -13
  16. package/dist/src/snippets/python.d.ts.map +1 -1
  17. package/dist/src/snippets/types.d.ts +4 -0
  18. package/dist/src/snippets/types.d.ts.map +1 -1
  19. package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
  20. package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -1
  21. package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
  22. package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -1
  23. package/dist/src/tasks/visual-question-answering/inference.d.ts +0 -1
  24. package/dist/src/tasks/visual-question-answering/inference.d.ts.map +1 -1
  25. package/package.json +1 -1
  26. package/src/hardware.ts +20 -0
  27. package/src/model-libraries-snippets.ts +7 -1
  28. package/src/model-libraries.ts +8 -1
  29. package/src/snippets/common.ts +63 -0
  30. package/src/snippets/curl.ts +71 -26
  31. package/src/snippets/js.ts +165 -40
  32. package/src/snippets/python.ts +186 -48
  33. package/src/snippets/types.ts +5 -0
  34. package/src/tasks/depth-estimation/data.ts +15 -7
  35. package/src/tasks/image-segmentation/data.ts +5 -5
  36. package/src/tasks/image-text-to-text/data.ts +17 -9
  37. package/src/tasks/keypoint-detection/data.ts +1 -1
  38. package/src/tasks/text-generation/data.ts +7 -7
  39. package/src/tasks/text-to-image/data.ts +2 -2
  40. package/src/tasks/text-to-speech/data.ts +5 -1
  41. package/src/tasks/text-to-video/data.ts +10 -10
  42. package/src/tasks/video-text-to-text/data.ts +8 -0
  43. package/src/tasks/visual-question-answering/inference.ts +0 -1
  44. package/src/tasks/visual-question-answering/spec/output.json +1 -1
package/dist/index.cjs CHANGED
@@ -2224,20 +2224,24 @@ var taskData11 = {
2224
2224
  id: "meta-llama/Llama-3.2-11B-Vision-Instruct"
2225
2225
  },
2226
2226
  {
2227
- description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
2228
- id: "HuggingFaceM4/idefics2-8b-chatty"
2227
+ description: "Cutting-edge vision language models.",
2228
+ id: "allenai/Molmo-7B-D-0924"
2229
2229
  },
2230
2230
  {
2231
2231
  description: "Small yet powerful model.",
2232
2232
  id: "vikhyatk/moondream2"
2233
2233
  },
2234
2234
  {
2235
- description: "Strong image-text-to-text model made to understand documents.",
2236
- id: "mPLUG/DocOwl1.5"
2235
+ description: "Strong image-text-to-text model.",
2236
+ id: "Qwen/Qwen2-VL-7B-Instruct"
2237
2237
  },
2238
2238
  {
2239
2239
  description: "Strong image-text-to-text model.",
2240
- id: "microsoft/Phi-3.5-vision-instruct"
2240
+ id: "mistralai/Pixtral-12B-2409"
2241
+ },
2242
+ {
2243
+ description: "Strong image-text-to-text model focused on documents.",
2244
+ id: "stepfun-ai/GOT-OCR2_0"
2241
2245
  }
2242
2246
  ],
2243
2247
  spaces: [
@@ -2251,19 +2255,23 @@ var taskData11 = {
2251
2255
  },
2252
2256
  {
2253
2257
  description: "Powerful vision-language model assistant.",
2254
- id: "liuhaotian/LLaVA-1.6"
2258
+ id: "akhaliq/Molmo-7B-D-0924"
2259
+ },
2260
+ {
2261
+ description: "An image-text-to-text application focused on documents.",
2262
+ id: "stepfun-ai/GOT_official_online_demo"
2255
2263
  },
2256
2264
  {
2257
2265
  description: "An application to compare outputs of different vision language models.",
2258
2266
  id: "merve/compare_VLMs"
2259
2267
  },
2260
2268
  {
2261
- description: "An application for document vision language tasks.",
2262
- id: "mPLUG/DocOwl"
2269
+ description: "An application for chatting with an image-text-to-text model.",
2270
+ id: "GanymedeNil/Qwen2-VL-7B"
2263
2271
  }
2264
2272
  ],
2265
2273
  summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
2266
- widgetModels: ["microsoft/kosmos-2-patch14-224"],
2274
+ widgetModels: ["meta-llama/Llama-3.2-11B-Vision-Instruct"],
2267
2275
  youtubeId: "IoGaGfU1CIg"
2268
2276
  };
2269
2277
  var data_default11 = taskData11;
@@ -2323,11 +2331,11 @@ var taskData12 = {
2323
2331
  id: "ZhengPeng7/BiRefNet"
2324
2332
  },
2325
2333
  {
2326
- description: "Semantic segmentation model trained on ADE20k dataset.",
2327
- id: "nvidia/segformer-b0-finetuned-ade-512-512"
2334
+ description: "Powerful human-centric image segmentation model.",
2335
+ id: "facebook/sapiens-seg-1b"
2328
2336
  },
2329
2337
  {
2330
- description: "Panoptic segmentation model trained COCO (common objects) dataset.",
2338
+ description: "Panoptic segmentation model trained on the COCO (common objects) dataset.",
2331
2339
  id: "facebook/mask2former-swin-large-coco-panoptic"
2332
2340
  }
2333
2341
  ],
@@ -2341,8 +2349,8 @@ var taskData12 = {
2341
2349
  id: "jbrinkma/segment-anything"
2342
2350
  },
2343
2351
  {
2344
- description: "A semantic segmentation application that predicts human silhouettes.",
2345
- id: "keras-io/Human-Part-Segmentation"
2352
+ description: "A human-centric segmentation model.",
2353
+ id: "facebook/sapiens-pose"
2346
2354
  },
2347
2355
  {
2348
2356
  description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
@@ -2531,11 +2539,15 @@ var taskData15 = {
2531
2539
  },
2532
2540
  {
2533
2541
  description: "A strong monocular depth estimation model.",
2534
- id: "Bingxin/Marigold"
2542
+ id: "jingheya/lotus-depth-g-v1-0"
2535
2543
  },
2536
2544
  {
2537
- description: "A metric depth estimation model trained on NYU dataset.",
2538
- id: "Intel/zoedepth-nyu"
2545
+ description: "A depth estimation model that predicts depth in videos.",
2546
+ id: "tencent/DepthCrafter"
2547
+ },
2548
+ {
2549
+ description: "A robust depth estimation model.",
2550
+ id: "apple/DepthPro"
2539
2551
  }
2540
2552
  ],
2541
2553
  spaces: [
@@ -2544,12 +2556,16 @@ var taskData15 = {
2544
2556
  id: "radames/dpt-depth-estimation-3d-voxels"
2545
2557
  },
2546
2558
  {
2547
- description: "An application on cutting-edge depth estimation.",
2548
- id: "depth-anything/Depth-Anything-V2"
2559
+ description: "An application for bleeding-edge depth estimation.",
2560
+ id: "akhaliq/depth-pro"
2561
+ },
2562
+ {
2563
+ description: "An application on cutting-edge depth estimation in videos.",
2564
+ id: "tencent/DepthCrafter"
2549
2565
  },
2550
2566
  {
2551
- description: "An application to try state-of-the-art depth estimation.",
2552
- id: "merve/compare_depth_models"
2567
+ description: "A human-centric depth estimation application.",
2568
+ id: "facebook/sapiens-depth"
2553
2569
  }
2554
2570
  ],
2555
2571
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
@@ -3127,8 +3143,8 @@ var taskData24 = {
3127
3143
  id: "jbilcke-hf/ai-comic-factory"
3128
3144
  },
3129
3145
  {
3130
- description: "A text-to-image application that can generate coherent text inside the image.",
3131
- id: "DeepFloyd/IF"
3146
+ description: "An application to match multiple custom image generation models.",
3147
+ id: "multimodalart/flux-lora-lab"
3132
3148
  },
3133
3149
  {
3134
3150
  description: "A powerful yet very fast image generation application.",
@@ -3211,9 +3227,13 @@ var taskData25 = {
3211
3227
  id: "suno/bark"
3212
3228
  },
3213
3229
  {
3214
- description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
3230
+ description: "An application on XTTS, a voice generation model that lets you clone voices into different languages.",
3215
3231
  id: "coqui/xtts"
3216
3232
  },
3233
+ {
3234
+ description: "An application that generates speech in different styles in English and Chinese.",
3235
+ id: "mrfakename/E2-F5-TTS"
3236
+ },
3217
3237
  {
3218
3238
  description: "An application that synthesizes speech for diverse speaker prompts.",
3219
3239
  id: "parler-tts/parler_tts_mini"
@@ -3532,10 +3552,6 @@ var taskData29 = {
3532
3552
  description: "A text-generation model trained to follow instructions.",
3533
3553
  id: "google/gemma-2-2b-it"
3534
3554
  },
3535
- {
3536
- description: "A code generation model that can generate code in 80+ languages.",
3537
- id: "bigcode/starcoder"
3538
- },
3539
3555
  {
3540
3556
  description: "Very powerful text generation model trained to follow instructions.",
3541
3557
  id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -3549,12 +3565,12 @@ var taskData29 = {
3549
3565
  id: "AI-MO/NuminaMath-7B-TIR"
3550
3566
  },
3551
3567
  {
3552
- description: "Strong coding assistant model.",
3553
- id: "HuggingFaceH4/starchat2-15b-v0.1"
3568
+ description: "Strong text generation model to follow instructions.",
3569
+ id: "Qwen/Qwen2.5-7B-Instruct"
3554
3570
  },
3555
3571
  {
3556
3572
  description: "Very strong open-source large language model.",
3557
- id: "mistralai/Mistral-Nemo-Instruct-2407"
3573
+ id: "nvidia/Llama-3.1-Nemotron-70B-Instruct"
3558
3574
  }
3559
3575
  ],
3560
3576
  spaces: [
@@ -3562,6 +3578,10 @@ var taskData29 = {
3562
3578
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
3563
3579
  id: "open-llm-leaderboard/open_llm_leaderboard"
3564
3580
  },
3581
+ {
3582
+ description: "A leaderboard for comparing chain-of-thought performance of models.",
3583
+ id: "logikon/open_cot_leaderboard"
3584
+ },
3565
3585
  {
3566
3586
  description: "An text generation based application based on a very powerful LLaMA2 model.",
3567
3587
  id: "ysharma/Explore_llamav2_with_TGI"
@@ -3648,30 +3668,30 @@ var taskData30 = {
3648
3668
  ],
3649
3669
  models: [
3650
3670
  {
3651
- description: "A strong model for video generation.",
3652
- id: "Vchitect/LaVie"
3671
+ description: "A strong model for consistent video generation.",
3672
+ id: "rain1011/pyramid-flow-sd3"
3653
3673
  },
3654
3674
  {
3655
3675
  description: "A robust model for text-to-video generation.",
3656
- id: "damo-vilab/text-to-video-ms-1.7b"
3676
+ id: "VideoCrafter/VideoCrafter2"
3657
3677
  },
3658
3678
  {
3659
- description: "A text-to-video generation model with high quality and smooth outputs.",
3660
- id: "hotshotco/Hotshot-XL"
3679
+ description: "A cutting-edge text-to-video generation model.",
3680
+ id: "TIGER-Lab/T2V-Turbo-V2"
3661
3681
  }
3662
3682
  ],
3663
3683
  spaces: [
3664
3684
  {
3665
3685
  description: "An application that generates video from text.",
3666
- id: "fffiloni/zeroscope"
3686
+ id: "VideoCrafter/VideoCrafter"
3667
3687
  },
3668
3688
  {
3669
- description: "An application that generates video from image and text.",
3670
- id: "Vchitect/LaVie"
3689
+ description: "Consistent video generation application.",
3690
+ id: "TIGER-Lab/T2V-Turbo-V2"
3671
3691
  },
3672
3692
  {
3673
- description: "An application that generates videos from text and provides multi-model support.",
3674
- id: "ArtGAN/Video-Diffusion-WebUI"
3693
+ description: "A cutting edge video generation application.",
3694
+ id: "Pyramid-Flow/pyramid-flow"
3675
3695
  }
3676
3696
  ],
3677
3697
  summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ",
@@ -4288,7 +4308,7 @@ var taskData39 = {
4288
4308
  },
4289
4309
  {
4290
4310
  description: "Strong keypoint detection model used to detect human pose.",
4291
- id: "qualcomm/MediaPipe-Pose-Estimation"
4311
+ id: "facebook/sapiens-pose-1b"
4292
4312
  }
4293
4313
  ],
4294
4314
  spaces: [
@@ -4317,6 +4337,10 @@ var taskData40 = {
4317
4337
  {
4318
4338
  description: "A dataset of instructions and question-answer pairs about videos.",
4319
4339
  id: "lmms-lab/VideoChatGPT"
4340
+ },
4341
+ {
4342
+ description: "Large video understanding dataset.",
4343
+ id: "HuggingFaceFV/finevideo"
4320
4344
  }
4321
4345
  ],
4322
4346
  demo: {
@@ -4354,6 +4378,10 @@ var taskData40 = {
4354
4378
  {
4355
4379
  description: "An application to chat with a video-text-to-text model.",
4356
4380
  id: "llava-hf/video-llava"
4381
+ },
4382
+ {
4383
+ description: "A leaderboard for various video-text-to-text models.",
4384
+ id: "opencompass/openvlm_video_leaderboard"
4357
4385
  }
4358
4386
  ],
4359
4387
  summary: "Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
@@ -5243,7 +5271,7 @@ var transformersJS = (model) => {
5243
5271
  if (!model.pipeline_tag) {
5244
5272
  return [`// \u26A0\uFE0F Unknown pipeline tag`];
5245
5273
  }
5246
- const libName = "@xenova/transformers";
5274
+ const libName = "@huggingface/transformers";
5247
5275
  return [
5248
5276
  `// npm i ${libName}
5249
5277
  import { pipeline } from '${libName}';
@@ -5387,6 +5415,11 @@ var nemo = (model) => {
5387
5415
  }
5388
5416
  return command ?? [`# tag did not correspond to a valid NeMo domain.`];
5389
5417
  };
5418
+ var pxia = (model) => [
5419
+ `from pxia import AutoModel
5420
+
5421
+ model = AutoModel.from_pretrained("${model.id}")`
5422
+ ];
5390
5423
  var pythae = (model) => [
5391
5424
  `from pythae.models import AutoModel
5392
5425
 
@@ -5872,6 +5905,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
5872
5905
  filter: true,
5873
5906
  countDownloads: `path:"adapter_config.json"`
5874
5907
  },
5908
+ pxia: {
5909
+ prettyLabel: "pxia",
5910
+ repoName: "pxia",
5911
+ repoUrl: "https://github.com/not-lain/pxia",
5912
+ snippets: pxia,
5913
+ filter: false
5914
+ },
5875
5915
  "pyannote-audio": {
5876
5916
  prettyLabel: "pyannote.audio",
5877
5917
  repoName: "pyannote-audio",
@@ -6104,7 +6144,7 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
6104
6144
  "transformers.js": {
6105
6145
  prettyLabel: "Transformers.js",
6106
6146
  repoName: "transformers.js",
6107
- repoUrl: "https://github.com/xenova/transformers.js",
6147
+ repoUrl: "https://github.com/huggingface/transformers.js",
6108
6148
  docsUrl: "https://huggingface.co/docs/hub/transformers-js",
6109
6149
  snippets: transformersJS,
6110
6150
  filter: true
@@ -6293,30 +6333,91 @@ __export(curl_exports, {
6293
6333
  snippetTextGeneration: () => snippetTextGeneration,
6294
6334
  snippetZeroShotClassification: () => snippetZeroShotClassification
6295
6335
  });
6296
- var snippetBasic = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6336
+
6337
+ // src/snippets/common.ts
6338
+ function stringifyMessages(messages, opts) {
6339
+ const keyRole = opts.attributeKeyQuotes ? `"role"` : "role";
6340
+ const keyContent = opts.attributeKeyQuotes ? `"role"` : "role";
6341
+ const messagesStringified = messages.map(({ role, content }) => {
6342
+ if (typeof content === "string") {
6343
+ content = JSON.stringify(content).slice(1, -1);
6344
+ if (opts.customContentEscaper) {
6345
+ content = opts.customContentEscaper(content);
6346
+ }
6347
+ return `{ ${keyRole}: "${role}", ${keyContent}: "${content}" }`;
6348
+ } else {
6349
+ 2;
6350
+ content = content.map(({ image_url, text, type }) => ({
6351
+ type,
6352
+ image_url,
6353
+ ...text ? { text: JSON.stringify(text).slice(1, -1) } : void 0
6354
+ }));
6355
+ content = JSON.stringify(content).slice(1, -1);
6356
+ if (opts.customContentEscaper) {
6357
+ content = opts.customContentEscaper(content);
6358
+ }
6359
+ return `{ ${keyRole}: "${role}", ${keyContent}: ${content} }`;
6360
+ }
6361
+ });
6362
+ return opts.start + messagesStringified.join(opts.sep) + opts.end;
6363
+ }
6364
+ function stringifyGenerationConfig(config, opts) {
6365
+ const quote = opts.attributeKeyQuotes ? `"` : "";
6366
+ return opts.start + Object.entries(config).map(([key, val]) => `${quote}${key}${quote}${opts.attributeValueConnector}${val}`).join(opts.sep) + opts.end;
6367
+ }
6368
+
6369
+ // src/snippets/curl.ts
6370
+ var snippetBasic = (model, accessToken) => ({
6371
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6297
6372
  -X POST \\
6298
6373
  -d '{"inputs": ${getModelInputSnippet(model, true)}}' \\
6299
6374
  -H 'Content-Type: application/json' \\
6300
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6301
- var snippetTextGeneration = (model, accessToken) => {
6375
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6376
+ });
6377
+ var snippetTextGeneration = (model, accessToken, opts) => {
6302
6378
  if (model.tags.includes("conversational")) {
6303
- return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6379
+ const streaming = opts?.streaming ?? true;
6380
+ const messages = opts?.messages ?? [
6381
+ { role: "user", content: "What is the capital of France?" }
6382
+ ];
6383
+ const config = {
6384
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6385
+ max_tokens: opts?.max_tokens ?? 500,
6386
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6387
+ };
6388
+ return {
6389
+ content: `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6304
6390
  -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\
6305
6391
  -H 'Content-Type: application/json' \\
6306
- -d '{
6307
- "model": "${model.id}",
6308
- "messages": [{"role": "user", "content": "What is the capital of France?"}],
6309
- "max_tokens": 500,
6310
- "stream": false
6311
- }'
6312
- `;
6392
+ --data '{
6393
+ "model": "${model.id}",
6394
+ "messages": ${stringifyMessages(messages, {
6395
+ sep: ",\n ",
6396
+ start: `[
6397
+ `,
6398
+ end: `
6399
+ ]`,
6400
+ attributeKeyQuotes: true,
6401
+ customContentEscaper: (str) => str.replace(/'/g, "'\\''")
6402
+ })},
6403
+ ${stringifyGenerationConfig(config, {
6404
+ sep: ",\n ",
6405
+ start: "",
6406
+ end: "",
6407
+ attributeKeyQuotes: true,
6408
+ attributeValueConnector: ": "
6409
+ })},
6410
+ "stream": ${!!streaming}
6411
+ }'`
6412
+ };
6313
6413
  } else {
6314
6414
  return snippetBasic(model, accessToken);
6315
6415
  }
6316
6416
  };
6317
6417
  var snippetImageTextToTextGeneration = (model, accessToken) => {
6318
6418
  if (model.tags.includes("conversational")) {
6319
- return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6419
+ return {
6420
+ content: `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
6320
6421
  -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\
6321
6422
  -H 'Content-Type: application/json' \\
6322
6423
  -d '{
@@ -6333,20 +6434,25 @@ var snippetImageTextToTextGeneration = (model, accessToken) => {
6333
6434
  "max_tokens": 500,
6334
6435
  "stream": false
6335
6436
  }'
6336
- `;
6437
+ `
6438
+ };
6337
6439
  } else {
6338
6440
  return snippetBasic(model, accessToken);
6339
6441
  }
6340
6442
  };
6341
- var snippetZeroShotClassification = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6443
+ var snippetZeroShotClassification = (model, accessToken) => ({
6444
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6342
6445
  -X POST \\
6343
6446
  -d '{"inputs": ${getModelInputSnippet(model, true)}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \\
6344
6447
  -H 'Content-Type: application/json' \\
6345
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6346
- var snippetFile = (model, accessToken) => `curl https://api-inference.huggingface.co/models/${model.id} \\
6448
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6449
+ });
6450
+ var snippetFile = (model, accessToken) => ({
6451
+ content: `curl https://api-inference.huggingface.co/models/${model.id} \\
6347
6452
  -X POST \\
6348
6453
  --data-binary '@${getModelInputSnippet(model, true, true)}' \\
6349
- -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`;
6454
+ -H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"`
6455
+ });
6350
6456
  var curlSnippets = {
6351
6457
  // Same order as in js/src/lib/interfaces/Types.ts
6352
6458
  "text-classification": snippetBasic,
@@ -6374,7 +6480,7 @@ var curlSnippets = {
6374
6480
  "image-segmentation": snippetFile
6375
6481
  };
6376
6482
  function getCurlInferenceSnippet(model, accessToken) {
6377
- return model.pipeline_tag && model.pipeline_tag in curlSnippets ? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
6483
+ return model.pipeline_tag && model.pipeline_tag in curlSnippets ? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6378
6484
  }
6379
6485
  function hasCurlInferenceSnippet(model) {
6380
6486
  return !!model.pipeline_tag && model.pipeline_tag in curlSnippets;
@@ -6397,18 +6503,114 @@ __export(python_exports, {
6397
6503
  snippetZeroShotClassification: () => snippetZeroShotClassification2,
6398
6504
  snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
6399
6505
  });
6400
- var snippetConversational = (model, accessToken) => `from huggingface_hub import InferenceClient
6506
+ var snippetConversational = (model, accessToken, opts) => {
6507
+ const streaming = opts?.streaming ?? true;
6508
+ const messages = opts?.messages ?? [
6509
+ { role: "user", content: "What is the capital of France?" }
6510
+ ];
6511
+ const messagesStr = stringifyMessages(messages, {
6512
+ sep: ",\n ",
6513
+ start: `[
6514
+ `,
6515
+ end: `
6516
+ ]`,
6517
+ attributeKeyQuotes: true
6518
+ });
6519
+ const config = {
6520
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6521
+ max_tokens: opts?.max_tokens ?? 500,
6522
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6523
+ };
6524
+ const configStr = stringifyGenerationConfig(config, {
6525
+ sep: ",\n ",
6526
+ start: "",
6527
+ end: "",
6528
+ attributeValueConnector: "="
6529
+ });
6530
+ if (streaming) {
6531
+ return [
6532
+ {
6533
+ client: "huggingface_hub",
6534
+ content: `from huggingface_hub import InferenceClient
6401
6535
 
6402
6536
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6403
6537
 
6404
- for message in client.chat_completion(
6405
- model="${model.id}",
6406
- messages=[{"role": "user", "content": "What is the capital of France?"}],
6407
- max_tokens=500,
6408
- stream=True,
6409
- ):
6410
- print(message.choices[0].delta.content, end="")`;
6411
- var snippetConversationalWithImage = (model, accessToken) => `from huggingface_hub import InferenceClient
6538
+ messages = ${messagesStr}
6539
+
6540
+ stream = client.chat.completions.create(
6541
+ model="${model.id}",
6542
+ messages=messages,
6543
+ ${configStr},
6544
+ stream=True
6545
+ )
6546
+
6547
+ for chunk in stream:
6548
+ print(chunk.choices[0].delta.content)`
6549
+ },
6550
+ {
6551
+ client: "openai",
6552
+ content: `from openai import OpenAI
6553
+
6554
+ client = OpenAI(
6555
+ base_url="https://api-inference.huggingface.co/v1/",
6556
+ api_key="${accessToken || "{API_TOKEN}"}"
6557
+ )
6558
+
6559
+ messages = ${messagesStr}
6560
+
6561
+ stream = client.chat.completions.create(
6562
+ model="${model.id}",
6563
+ messages=messages,
6564
+ ${configStr},
6565
+ stream=True
6566
+ )
6567
+
6568
+ for chunk in stream:
6569
+ print(chunk.choices[0].delta.content)`
6570
+ }
6571
+ ];
6572
+ } else {
6573
+ return [
6574
+ {
6575
+ client: "huggingface_hub",
6576
+ content: `from huggingface_hub import InferenceClient
6577
+
6578
+ client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6579
+
6580
+ messages = ${messagesStr}
6581
+
6582
+ completion = client.chat.completions.create(
6583
+ model="${model.id}",
6584
+ messages=messages,
6585
+ ${configStr}
6586
+ )
6587
+
6588
+ print(completion.choices[0].message)`
6589
+ },
6590
+ {
6591
+ client: "openai",
6592
+ content: `from openai import OpenAI
6593
+
6594
+ client = OpenAI(
6595
+ base_url="https://api-inference.huggingface.co/v1/",
6596
+ api_key="${accessToken || "{API_TOKEN}"}"
6597
+ )
6598
+
6599
+ messages = ${messagesStr}
6600
+
6601
+ completion = client.chat.completions.create(
6602
+ model="${model.id}",
6603
+ messages=messages,
6604
+ ${configStr}
6605
+ )
6606
+
6607
+ print(completion.choices[0].message)`
6608
+ }
6609
+ ];
6610
+ }
6611
+ };
6612
+ var snippetConversationalWithImage = (model, accessToken) => ({
6613
+ content: `from huggingface_hub import InferenceClient
6412
6614
 
6413
6615
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
6414
6616
 
@@ -6428,16 +6630,20 @@ for message in client.chat_completion(
6428
6630
  max_tokens=500,
6429
6631
  stream=True,
6430
6632
  ):
6431
- print(message.choices[0].delta.content, end="")`;
6432
- var snippetZeroShotClassification2 = (model) => `def query(payload):
6633
+ print(message.choices[0].delta.content, end="")`
6634
+ });
6635
+ var snippetZeroShotClassification2 = (model) => ({
6636
+ content: `def query(payload):
6433
6637
  response = requests.post(API_URL, headers=headers, json=payload)
6434
6638
  return response.json()
6435
6639
 
6436
6640
  output = query({
6437
6641
  "inputs": ${getModelInputSnippet(model)},
6438
6642
  "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
6439
- })`;
6440
- var snippetZeroShotImageClassification = (model) => `def query(data):
6643
+ })`
6644
+ });
6645
+ var snippetZeroShotImageClassification = (model) => ({
6646
+ content: `def query(data):
6441
6647
  with open(data["image_path"], "rb") as f:
6442
6648
  img = f.read()
6443
6649
  payload={
@@ -6450,22 +6656,28 @@ var snippetZeroShotImageClassification = (model) => `def query(data):
6450
6656
  output = query({
6451
6657
  "image_path": ${getModelInputSnippet(model)},
6452
6658
  "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
6453
- })`;
6454
- var snippetBasic2 = (model) => `def query(payload):
6659
+ })`
6660
+ });
6661
+ var snippetBasic2 = (model) => ({
6662
+ content: `def query(payload):
6455
6663
  response = requests.post(API_URL, headers=headers, json=payload)
6456
6664
  return response.json()
6457
6665
 
6458
6666
  output = query({
6459
6667
  "inputs": ${getModelInputSnippet(model)},
6460
- })`;
6461
- var snippetFile2 = (model) => `def query(filename):
6668
+ })`
6669
+ });
6670
+ var snippetFile2 = (model) => ({
6671
+ content: `def query(filename):
6462
6672
  with open(filename, "rb") as f:
6463
6673
  data = f.read()
6464
6674
  response = requests.post(API_URL, headers=headers, data=data)
6465
6675
  return response.json()
6466
6676
 
6467
- output = query(${getModelInputSnippet(model)})`;
6468
- var snippetTextToImage = (model) => `def query(payload):
6677
+ output = query(${getModelInputSnippet(model)})`
6678
+ });
6679
+ var snippetTextToImage = (model) => ({
6680
+ content: `def query(payload):
6469
6681
  response = requests.post(API_URL, headers=headers, json=payload)
6470
6682
  return response.content
6471
6683
  image_bytes = query({
@@ -6474,16 +6686,20 @@ image_bytes = query({
6474
6686
  # You can access the image with PIL.Image for example
6475
6687
  import io
6476
6688
  from PIL import Image
6477
- image = Image.open(io.BytesIO(image_bytes))`;
6478
- var snippetTabular = (model) => `def query(payload):
6689
+ image = Image.open(io.BytesIO(image_bytes))`
6690
+ });
6691
+ var snippetTabular = (model) => ({
6692
+ content: `def query(payload):
6479
6693
  response = requests.post(API_URL, headers=headers, json=payload)
6480
6694
  return response.content
6481
6695
  response = query({
6482
6696
  "inputs": {"data": ${getModelInputSnippet(model)}},
6483
- })`;
6697
+ })`
6698
+ });
6484
6699
  var snippetTextToAudio = (model) => {
6485
6700
  if (model.library_name === "transformers") {
6486
- return `def query(payload):
6701
+ return {
6702
+ content: `def query(payload):
6487
6703
  response = requests.post(API_URL, headers=headers, json=payload)
6488
6704
  return response.content
6489
6705
 
@@ -6492,9 +6708,11 @@ audio_bytes = query({
6492
6708
  })
6493
6709
  # You can access the audio with IPython.display for example
6494
6710
  from IPython.display import Audio
6495
- Audio(audio_bytes)`;
6711
+ Audio(audio_bytes)`
6712
+ };
6496
6713
  } else {
6497
- return `def query(payload):
6714
+ return {
6715
+ content: `def query(payload):
6498
6716
  response = requests.post(API_URL, headers=headers, json=payload)
6499
6717
  return response.json()
6500
6718
 
@@ -6503,10 +6721,12 @@ audio, sampling_rate = query({
6503
6721
  })
6504
6722
  # You can access the audio with IPython.display for example
6505
6723
  from IPython.display import Audio
6506
- Audio(audio, rate=sampling_rate)`;
6724
+ Audio(audio, rate=sampling_rate)`
6725
+ };
6507
6726
  }
6508
6727
  };
6509
- var snippetDocumentQuestionAnswering = (model) => `def query(payload):
6728
+ var snippetDocumentQuestionAnswering = (model) => ({
6729
+ content: `def query(payload):
6510
6730
  with open(payload["image"], "rb") as f:
6511
6731
  img = f.read()
6512
6732
  payload["image"] = base64.b64encode(img).decode("utf-8")
@@ -6515,7 +6735,8 @@ var snippetDocumentQuestionAnswering = (model) => `def query(payload):
6515
6735
 
6516
6736
  output = query({
6517
6737
  "inputs": ${getModelInputSnippet(model)},
6518
- })`;
6738
+ })`
6739
+ });
6519
6740
  var pythonSnippets = {
6520
6741
  // Same order as in tasks/src/pipelines.ts
6521
6742
  "text-classification": snippetBasic2,
@@ -6546,19 +6767,25 @@ var pythonSnippets = {
6546
6767
  "image-to-text": snippetFile2,
6547
6768
  "zero-shot-image-classification": snippetZeroShotImageClassification
6548
6769
  };
6549
- function getPythonInferenceSnippet(model, accessToken) {
6770
+ function getPythonInferenceSnippet(model, accessToken, opts) {
6550
6771
  if (model.pipeline_tag === "text-generation" && model.tags.includes("conversational")) {
6551
- return snippetConversational(model, accessToken);
6772
+ return snippetConversational(model, accessToken, opts);
6552
6773
  } else if (model.pipeline_tag === "image-text-to-text" && model.tags.includes("conversational")) {
6553
6774
  return snippetConversationalWithImage(model, accessToken);
6554
6775
  } else {
6555
- const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
6556
- return `import requests
6557
-
6776
+ let snippets = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6777
+ snippets = Array.isArray(snippets) ? snippets : [snippets];
6778
+ return snippets.map((snippet) => {
6779
+ return {
6780
+ ...snippet,
6781
+ content: `import requests
6782
+
6558
6783
  API_URL = "https://api-inference.huggingface.co/models/${model.id}"
6559
6784
  headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
6560
-
6561
- ${body}`;
6785
+
6786
+ ${snippet.content}`
6787
+ };
6788
+ });
6562
6789
  }
6563
6790
  }
6564
6791
  function hasPythonInferenceSnippet(model) {
@@ -6579,7 +6806,8 @@ __export(js_exports, {
6579
6806
  snippetTextToImage: () => snippetTextToImage2,
6580
6807
  snippetZeroShotClassification: () => snippetZeroShotClassification3
6581
6808
  });
6582
- var snippetBasic3 = (model, accessToken) => `async function query(data) {
6809
+ var snippetBasic3 = (model, accessToken) => ({
6810
+ content: `async function query(data) {
6583
6811
  const response = await fetch(
6584
6812
  "https://api-inference.huggingface.co/models/${model.id}",
6585
6813
  {
@@ -6597,27 +6825,120 @@ var snippetBasic3 = (model, accessToken) => `async function query(data) {
6597
6825
 
6598
6826
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6599
6827
  console.log(JSON.stringify(response));
6600
- });`;
6601
- var snippetTextGeneration2 = (model, accessToken) => {
6828
+ });`
6829
+ });
6830
+ var snippetTextGeneration2 = (model, accessToken, opts) => {
6602
6831
  if (model.tags.includes("conversational")) {
6603
- return `import { HfInference } from "@huggingface/inference";
6832
+ const streaming = opts?.streaming ?? true;
6833
+ const messages = opts?.messages ?? [
6834
+ { role: "user", content: "What is the capital of France?" }
6835
+ ];
6836
+ const messagesStr = stringifyMessages(messages, { sep: ",\n ", start: "[\n ", end: "\n ]" });
6837
+ const config = {
6838
+ ...opts?.temperature ? { temperature: opts.temperature } : void 0,
6839
+ max_tokens: opts?.max_tokens ?? 500,
6840
+ ...opts?.top_p ? { top_p: opts.top_p } : void 0
6841
+ };
6842
+ const configStr = stringifyGenerationConfig(config, {
6843
+ sep: ",\n ",
6844
+ start: "",
6845
+ end: "",
6846
+ attributeValueConnector: ": "
6847
+ });
6848
+ if (streaming) {
6849
+ return [
6850
+ {
6851
+ client: "huggingface_hub",
6852
+ content: `import { HfInference } from "@huggingface/inference"
6604
6853
 
6605
- const inference = new HfInference("${accessToken || `{API_TOKEN}`}");
6854
+ const client = new HfInference("${accessToken || `{API_TOKEN}`}")
6606
6855
 
6607
- for await (const chunk of inference.chatCompletionStream({
6856
+ let out = "";
6857
+
6858
+ const stream = client.chatCompletionStream({
6608
6859
  model: "${model.id}",
6609
- messages: [{ role: "user", content: "What is the capital of France?" }],
6610
- max_tokens: 500,
6611
- })) {
6612
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
6613
- }`;
6860
+ messages: ${messagesStr},
6861
+ ${configStr}
6862
+ });
6863
+
6864
+ for await (const chunk of stream) {
6865
+ if (chunk.choices && chunk.choices.length > 0) {
6866
+ const newContent = chunk.choices[0].delta.content;
6867
+ out += newContent;
6868
+ console.log(newContent);
6869
+ }
6870
+ }`
6871
+ },
6872
+ {
6873
+ client: "openai",
6874
+ content: `import { OpenAI } from "openai"
6875
+
6876
+ const client = new OpenAI({
6877
+ baseURL: "https://api-inference.huggingface.co/v1/",
6878
+ apiKey: "${accessToken || `{API_TOKEN}`}"
6879
+ })
6880
+
6881
+ let out = "";
6882
+
6883
+ const stream = await client.chat.completions.create({
6884
+ model: "${model.id}",
6885
+ messages: ${messagesStr},
6886
+ ${configStr},
6887
+ stream: true,
6888
+ });
6889
+
6890
+ for await (const chunk of stream) {
6891
+ if (chunk.choices && chunk.choices.length > 0) {
6892
+ const newContent = chunk.choices[0].delta.content;
6893
+ out += newContent;
6894
+ console.log(newContent);
6895
+ }
6896
+ }`
6897
+ }
6898
+ ];
6899
+ } else {
6900
+ return [
6901
+ {
6902
+ client: "huggingface_hub",
6903
+ content: `import { HfInference } from '@huggingface/inference'
6904
+
6905
+ const client = new HfInference("${accessToken || `{API_TOKEN}`}")
6906
+
6907
+ const chatCompletion = await client.chatCompletion({
6908
+ model: "${model.id}",
6909
+ messages: ${messagesStr},
6910
+ ${configStr}
6911
+ });
6912
+
6913
+ console.log(chatCompletion.choices[0].message);`
6914
+ },
6915
+ {
6916
+ client: "openai",
6917
+ content: `import { OpenAI } from "openai"
6918
+
6919
+ const client = new OpenAI({
6920
+ baseURL: "https://api-inference.huggingface.co/v1/",
6921
+ apiKey: "${accessToken || `{API_TOKEN}`}"
6922
+ })
6923
+
6924
+ const chatCompletion = await client.chat.completions.create({
6925
+ model: "${model.id}",
6926
+ messages: ${messagesStr},
6927
+ ${configStr}
6928
+ });
6929
+
6930
+ console.log(chatCompletion.choices[0].message);`
6931
+ }
6932
+ ];
6933
+ }
6614
6934
  } else {
6615
6935
  return snippetBasic3(model, accessToken);
6616
6936
  }
6617
6937
  };
6618
6938
  var snippetImageTextToTextGeneration2 = (model, accessToken) => {
6619
6939
  if (model.tags.includes("conversational")) {
6620
- return `import { HfInference } from "@huggingface/inference";
6940
+ return {
6941
+ content: `import { HfInference } from "@huggingface/inference";
6621
6942
 
6622
6943
  const inference = new HfInference("${accessToken || `{API_TOKEN}`}");
6623
6944
  const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg";
@@ -6636,12 +6957,14 @@ for await (const chunk of inference.chatCompletionStream({
6636
6957
  max_tokens: 500,
6637
6958
  })) {
6638
6959
  process.stdout.write(chunk.choices[0]?.delta?.content || "");
6639
- }`;
6960
+ }`
6961
+ };
6640
6962
  } else {
6641
6963
  return snippetBasic3(model, accessToken);
6642
6964
  }
6643
6965
  };
6644
- var snippetZeroShotClassification3 = (model, accessToken) => `async function query(data) {
6966
+ var snippetZeroShotClassification3 = (model, accessToken) => ({
6967
+ content: `async function query(data) {
6645
6968
  const response = await fetch(
6646
6969
  "https://api-inference.huggingface.co/models/${model.id}",
6647
6970
  {
@@ -6658,11 +6981,13 @@ var snippetZeroShotClassification3 = (model, accessToken) => `async function que
6658
6981
  }
6659
6982
 
6660
6983
  query({"inputs": ${getModelInputSnippet(
6661
- model
6662
- )}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
6984
+ model
6985
+ )}, "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
6663
6986
  console.log(JSON.stringify(response));
6664
- });`;
6665
- var snippetTextToImage2 = (model, accessToken) => `async function query(data) {
6987
+ });`
6988
+ });
6989
+ var snippetTextToImage2 = (model, accessToken) => ({
6990
+ content: `async function query(data) {
6666
6991
  const response = await fetch(
6667
6992
  "https://api-inference.huggingface.co/models/${model.id}",
6668
6993
  {
@@ -6679,7 +7004,8 @@ var snippetTextToImage2 = (model, accessToken) => `async function query(data) {
6679
7004
  }
6680
7005
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6681
7006
  // Use image
6682
- });`;
7007
+ });`
7008
+ });
6683
7009
  var snippetTextToAudio2 = (model, accessToken) => {
6684
7010
  const commonSnippet = `async function query(data) {
6685
7011
  const response = await fetch(
@@ -6694,25 +7020,30 @@ var snippetTextToAudio2 = (model, accessToken) => {
6694
7020
  }
6695
7021
  );`;
6696
7022
  if (model.library_name === "transformers") {
6697
- return commonSnippet + `
7023
+ return {
7024
+ content: commonSnippet + `
6698
7025
  const result = await response.blob();
6699
7026
  return result;
6700
7027
  }
6701
7028
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6702
7029
  // Returns a byte object of the Audio wavform. Use it directly!
6703
- });`;
7030
+ });`
7031
+ };
6704
7032
  } else {
6705
- return commonSnippet + `
7033
+ return {
7034
+ content: commonSnippet + `
6706
7035
  const result = await response.json();
6707
7036
  return result;
6708
7037
  }
6709
7038
 
6710
7039
  query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
6711
7040
  console.log(JSON.stringify(response));
6712
- });`;
7041
+ });`
7042
+ };
6713
7043
  }
6714
7044
  };
6715
- var snippetFile3 = (model, accessToken) => `async function query(filename) {
7045
+ var snippetFile3 = (model, accessToken) => ({
7046
+ content: `async function query(filename) {
6716
7047
  const data = fs.readFileSync(filename);
6717
7048
  const response = await fetch(
6718
7049
  "https://api-inference.huggingface.co/models/${model.id}",
@@ -6731,7 +7062,8 @@ var snippetFile3 = (model, accessToken) => `async function query(filename) {
6731
7062
 
6732
7063
  query(${getModelInputSnippet(model)}).then((response) => {
6733
7064
  console.log(JSON.stringify(response));
6734
- });`;
7065
+ });`
7066
+ });
6735
7067
  var jsSnippets = {
6736
7068
  // Same order as in js/src/lib/interfaces/Types.ts
6737
7069
  "text-classification": snippetBasic3,
@@ -6759,7 +7091,7 @@ var jsSnippets = {
6759
7091
  "image-segmentation": snippetFile3
6760
7092
  };
6761
7093
  function getJsInferenceSnippet(model, accessToken) {
6762
- return model.pipeline_tag && model.pipeline_tag in jsSnippets ? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? "" : "";
7094
+ return model.pipeline_tag && model.pipeline_tag in jsSnippets ? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" } : { content: "" };
6763
7095
  }
6764
7096
  function hasJsInferenceSnippet(model) {
6765
7097
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
@@ -6826,6 +7158,11 @@ var SKUS = {
6826
7158
  tflops: 31.24,
6827
7159
  memory: [24]
6828
7160
  },
7161
+ A2: {
7162
+ tflops: 4.531,
7163
+ // source: https://www.techpowerup.com/gpu-specs/a2.c3848
7164
+ memory: [16]
7165
+ },
6829
7166
  "RTX 4090": {
6830
7167
  tflops: 82.58,
6831
7168
  memory: [24]
@@ -6988,6 +7325,14 @@ var SKUS = {
6988
7325
  tflops: 184.6,
6989
7326
  memory: [32]
6990
7327
  },
7328
+ MI60: {
7329
+ tflops: 29.5,
7330
+ memory: [32]
7331
+ },
7332
+ MI50: {
7333
+ tflops: 26.5,
7334
+ memory: [16]
7335
+ },
6991
7336
  "RX 7900 XTX": {
6992
7337
  tflops: 122.8,
6993
7338
  memory: [24]
@@ -7020,6 +7365,14 @@ var SKUS = {
7020
7365
  tflops: 32.33,
7021
7366
  memory: [16]
7022
7367
  },
7368
+ "RX 6700 XT": {
7369
+ tflops: 26.43,
7370
+ memory: [12]
7371
+ },
7372
+ "RX 6700": {
7373
+ tflops: 22.58,
7374
+ memory: [10]
7375
+ },
7023
7376
  "Radeon Pro VII": {
7024
7377
  tflops: 26.11,
7025
7378
  memory: [16]