@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,587 @@
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "datasets",
5
+ # "flashinfer-python",
6
+ # "huggingface-hub[hf_transfer]",
7
+ # "hf-xet>= 1.1.7",
8
+ # "torch",
9
+ # "transformers",
10
+ # "vllm>=0.8.5",
11
+ # ]
12
+ #
13
+ # ///
14
+ """
15
+ Generate responses for prompts in a dataset using vLLM for efficient GPU inference.
16
+
17
+ This script loads a dataset from Hugging Face Hub containing chat-formatted messages,
18
+ applies the model's chat template, generates responses using vLLM, and saves the
19
+ results back to the Hub with a comprehensive dataset card.
20
+
21
+ Example usage:
22
+ # Local execution with auto GPU detection
23
+ uv run generate-responses.py \\
24
+ username/input-dataset \\
25
+ username/output-dataset \\
26
+ --messages-column messages
27
+
28
+ # With custom model and sampling parameters
29
+ uv run generate-responses.py \\
30
+ username/input-dataset \\
31
+ username/output-dataset \\
32
+ --model-id meta-llama/Llama-3.1-8B-Instruct \\
33
+ --temperature 0.9 \\
34
+ --top-p 0.95 \\
35
+ --max-tokens 2048
36
+
37
+ # HF Jobs execution (see script output for full command)
38
+ hf jobs uv run --flavor a100x4 ...
39
+ """
40
+
41
+ import argparse
42
+ import logging
43
+ import os
44
+ import sys
45
+ from datetime import datetime
46
+ from typing import Optional
47
+
48
+ from datasets import load_dataset
49
+ from huggingface_hub import DatasetCard, get_token, login
50
+ from torch import cuda
51
+ from tqdm.auto import tqdm
52
+ from transformers import AutoTokenizer
53
+ from vllm import LLM, SamplingParams
54
+
55
+ # Enable HF Transfer for faster downloads
56
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
57
+
58
+ logging.basicConfig(
59
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
60
+ )
61
+ logger = logging.getLogger(__name__)
62
+
63
+
64
+ def check_gpu_availability() -> int:
65
+ """Check if CUDA is available and return the number of GPUs."""
66
+ if not cuda.is_available():
67
+ logger.error("CUDA is not available. This script requires a GPU.")
68
+ logger.error(
69
+ "Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor."
70
+ )
71
+ sys.exit(1)
72
+
73
+ num_gpus = cuda.device_count()
74
+ for i in range(num_gpus):
75
+ gpu_name = cuda.get_device_name(i)
76
+ gpu_memory = cuda.get_device_properties(i).total_memory / 1024**3
77
+ logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory")
78
+
79
+ return num_gpus
80
+
81
+
82
+ def create_dataset_card(
83
+ source_dataset: str,
84
+ model_id: str,
85
+ messages_column: str,
86
+ prompt_column: Optional[str],
87
+ sampling_params: SamplingParams,
88
+ tensor_parallel_size: int,
89
+ num_examples: int,
90
+ generation_time: str,
91
+ num_skipped: int = 0,
92
+ max_model_len_used: Optional[int] = None,
93
+ ) -> str:
94
+ """Create a comprehensive dataset card documenting the generation process."""
95
+ filtering_section = ""
96
+ if num_skipped > 0:
97
+ skip_percentage = (num_skipped / num_examples) * 100
98
+ processed = num_examples - num_skipped
99
+ filtering_section = f"""
100
+
101
+ ### Filtering Statistics
102
+
103
+ - **Total Examples**: {num_examples:,}
104
+ - **Processed**: {processed:,} ({100 - skip_percentage:.1f}%)
105
+ - **Skipped (too long)**: {num_skipped:,} ({skip_percentage:.1f}%)
106
+ - **Max Model Length Used**: {max_model_len_used:,} tokens
107
+
108
+ Note: Prompts exceeding the maximum model length were skipped and have empty responses."""
109
+
110
+ return f"""---
111
+ tags:
112
+ - generated
113
+ - vllm
114
+ - uv-script
115
+ ---
116
+
117
+ # Generated Responses Dataset
118
+
119
+ This dataset contains generated responses for prompts from [{source_dataset}](https://huggingface.co/datasets/{source_dataset}).
120
+
121
+ ## Generation Details
122
+
123
+ - **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset})
124
+ - **Input Column**: `{prompt_column if prompt_column else messages_column}` ({"plain text prompts" if prompt_column else "chat messages"})
125
+ - **Model**: [{model_id}](https://huggingface.co/{model_id})
126
+ - **Number of Examples**: {num_examples:,}
127
+ - **Generation Date**: {generation_time}{filtering_section}
128
+
129
+ ### Sampling Parameters
130
+
131
+ - **Temperature**: {sampling_params.temperature}
132
+ - **Top P**: {sampling_params.top_p}
133
+ - **Top K**: {sampling_params.top_k}
134
+ - **Min P**: {sampling_params.min_p}
135
+ - **Max Tokens**: {sampling_params.max_tokens}
136
+ - **Repetition Penalty**: {sampling_params.repetition_penalty}
137
+
138
+ ### Hardware Configuration
139
+
140
+ - **Tensor Parallel Size**: {tensor_parallel_size}
141
+ - **GPU Configuration**: {tensor_parallel_size} GPU(s)
142
+
143
+ ## Dataset Structure
144
+
145
+ The dataset contains all columns from the source dataset plus:
146
+ - `response`: The generated response from the model
147
+
148
+ ## Generation Script
149
+
150
+ Generated using the vLLM inference script from [uv-scripts/vllm](https://huggingface.co/datasets/uv-scripts/vllm).
151
+
152
+ To reproduce this generation:
153
+
154
+ ```bash
155
+ uv run https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\
156
+ {source_dataset} \\
157
+ <output-dataset> \\
158
+ --model-id {model_id} \\
159
+ {"--prompt-column " + prompt_column if prompt_column else "--messages-column " + messages_column} \\
160
+ --temperature {sampling_params.temperature} \\
161
+ --top-p {sampling_params.top_p} \\
162
+ --top-k {sampling_params.top_k} \\
163
+ --max-tokens {sampling_params.max_tokens}{f" \\\\\\n --max-model-len {max_model_len_used}" if max_model_len_used else ""}
164
+ ```
165
+ """
166
+
167
+
168
+ def main(
169
+ src_dataset_hub_id: str,
170
+ output_dataset_hub_id: str,
171
+ model_id: str = "Qwen/Qwen3-30B-A3B-Instruct-2507",
172
+ messages_column: str = "messages",
173
+ prompt_column: Optional[str] = None,
174
+ output_column: str = "response",
175
+ temperature: float = 0.7,
176
+ top_p: float = 0.8,
177
+ top_k: int = 20,
178
+ min_p: float = 0.0,
179
+ max_tokens: int = 16384,
180
+ repetition_penalty: float = 1.0,
181
+ gpu_memory_utilization: float = 0.90,
182
+ max_model_len: Optional[int] = None,
183
+ tensor_parallel_size: Optional[int] = None,
184
+ skip_long_prompts: bool = True,
185
+ max_samples: Optional[int] = None,
186
+ hf_token: Optional[str] = None,
187
+ ):
188
+ """
189
+ Main generation pipeline.
190
+
191
+ Args:
192
+ src_dataset_hub_id: Input dataset on Hugging Face Hub
193
+ output_dataset_hub_id: Where to save results on Hugging Face Hub
194
+ model_id: Hugging Face model ID for generation
195
+ messages_column: Column name containing chat messages
196
+ prompt_column: Column name containing plain text prompts (alternative to messages_column)
197
+ output_column: Column name for generated responses
198
+ temperature: Sampling temperature
199
+ top_p: Top-p sampling parameter
200
+ top_k: Top-k sampling parameter
201
+ min_p: Minimum probability threshold
202
+ max_tokens: Maximum tokens to generate
203
+ repetition_penalty: Repetition penalty parameter
204
+ gpu_memory_utilization: GPU memory utilization factor
205
+ max_model_len: Maximum model context length (None uses model default)
206
+ tensor_parallel_size: Number of GPUs to use (auto-detect if None)
207
+ skip_long_prompts: Skip prompts exceeding max_model_len instead of failing
208
+ max_samples: Maximum number of samples to process (None for all)
209
+ hf_token: Hugging Face authentication token
210
+ """
211
+ generation_start_time = datetime.now().isoformat()
212
+
213
+ # GPU check and configuration
214
+ num_gpus = check_gpu_availability()
215
+ if tensor_parallel_size is None:
216
+ tensor_parallel_size = num_gpus
217
+ logger.info(
218
+ f"Auto-detected {num_gpus} GPU(s), using tensor_parallel_size={tensor_parallel_size}"
219
+ )
220
+ else:
221
+ logger.info(f"Using specified tensor_parallel_size={tensor_parallel_size}")
222
+ if tensor_parallel_size > num_gpus:
223
+ logger.warning(
224
+ f"Requested {tensor_parallel_size} GPUs but only {num_gpus} available"
225
+ )
226
+
227
+ # Authentication - try multiple methods
228
+ HF_TOKEN = hf_token or os.environ.get("HF_TOKEN") or get_token()
229
+
230
+ if not HF_TOKEN:
231
+ logger.error("No HuggingFace token found. Please provide token via:")
232
+ logger.error(" 1. --hf-token argument")
233
+ logger.error(" 2. HF_TOKEN environment variable")
234
+ logger.error(" 3. Run 'huggingface-cli login' or use login() in Python")
235
+ sys.exit(1)
236
+
237
+ logger.info("HuggingFace token found, authenticating...")
238
+ login(token=HF_TOKEN)
239
+
240
+ # Initialize vLLM
241
+ logger.info(f"Loading model: {model_id}")
242
+ vllm_kwargs = {
243
+ "model": model_id,
244
+ "tensor_parallel_size": tensor_parallel_size,
245
+ "gpu_memory_utilization": gpu_memory_utilization,
246
+ }
247
+ if max_model_len is not None:
248
+ vllm_kwargs["max_model_len"] = max_model_len
249
+ logger.info(f"Using max_model_len={max_model_len}")
250
+
251
+ llm = LLM(**vllm_kwargs)
252
+
253
+ # Load tokenizer for chat template
254
+ logger.info("Loading tokenizer...")
255
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
256
+
257
+ # Create sampling parameters
258
+ sampling_params = SamplingParams(
259
+ temperature=temperature,
260
+ top_p=top_p,
261
+ top_k=top_k,
262
+ min_p=min_p,
263
+ max_tokens=max_tokens,
264
+ repetition_penalty=repetition_penalty,
265
+ )
266
+
267
+ # Load dataset
268
+ logger.info(f"Loading dataset: {src_dataset_hub_id}")
269
+ dataset = load_dataset(src_dataset_hub_id, split="train")
270
+
271
+ # Apply max_samples if specified
272
+ if max_samples is not None and max_samples < len(dataset):
273
+ logger.info(f"Limiting dataset to {max_samples} samples")
274
+ dataset = dataset.select(range(max_samples))
275
+
276
+ total_examples = len(dataset)
277
+ logger.info(f"Dataset loaded with {total_examples:,} examples")
278
+
279
+ # Determine which column to use and validate
280
+ if prompt_column:
281
+ # Use prompt column mode
282
+ if prompt_column not in dataset.column_names:
283
+ logger.error(
284
+ f"Column '{prompt_column}' not found. Available columns: {dataset.column_names}"
285
+ )
286
+ sys.exit(1)
287
+ logger.info(f"Using prompt column mode with column: '{prompt_column}'")
288
+ use_messages = False
289
+ else:
290
+ # Use messages column mode
291
+ if messages_column not in dataset.column_names:
292
+ logger.error(
293
+ f"Column '{messages_column}' not found. Available columns: {dataset.column_names}"
294
+ )
295
+ sys.exit(1)
296
+ logger.info(f"Using messages column mode with column: '{messages_column}'")
297
+ use_messages = True
298
+
299
+ # Get effective max length for filtering
300
+ if max_model_len is not None:
301
+ effective_max_len = max_model_len
302
+ else:
303
+ # Get model's default max length
304
+ effective_max_len = llm.llm_engine.model_config.max_model_len
305
+ logger.info(f"Using effective max model length: {effective_max_len}")
306
+
307
+ # Process messages and apply chat template
308
+ logger.info("Preparing prompts...")
309
+ all_prompts = []
310
+ valid_prompts = []
311
+ valid_indices = []
312
+ skipped_info = []
313
+
314
+ for i, example in enumerate(tqdm(dataset, desc="Processing prompts")):
315
+ if use_messages:
316
+ # Messages mode: use existing chat messages
317
+ messages = example[messages_column]
318
+ # Apply chat template
319
+ prompt = tokenizer.apply_chat_template(
320
+ messages, tokenize=False, add_generation_prompt=True
321
+ )
322
+ else:
323
+ # Prompt mode: convert plain text to messages format
324
+ user_prompt = example[prompt_column]
325
+ messages = [{"role": "user", "content": user_prompt}]
326
+ # Apply chat template
327
+ prompt = tokenizer.apply_chat_template(
328
+ messages, tokenize=False, add_generation_prompt=True
329
+ )
330
+
331
+ all_prompts.append(prompt)
332
+
333
+ # Count tokens if filtering is enabled
334
+ if skip_long_prompts:
335
+ tokens = tokenizer.encode(prompt)
336
+ if len(tokens) <= effective_max_len:
337
+ valid_prompts.append(prompt)
338
+ valid_indices.append(i)
339
+ else:
340
+ skipped_info.append((i, len(tokens)))
341
+ else:
342
+ valid_prompts.append(prompt)
343
+ valid_indices.append(i)
344
+
345
+ # Log filtering results
346
+ if skip_long_prompts and skipped_info:
347
+ logger.warning(
348
+ f"Skipped {len(skipped_info)} prompts that exceed max_model_len ({effective_max_len} tokens)"
349
+ )
350
+ logger.info("Skipped prompt details (first 10):")
351
+ for idx, (prompt_idx, token_count) in enumerate(skipped_info[:10]):
352
+ logger.info(
353
+ f" - Example {prompt_idx}: {token_count} tokens (exceeds by {token_count - effective_max_len})"
354
+ )
355
+ if len(skipped_info) > 10:
356
+ logger.info(f" ... and {len(skipped_info) - 10} more")
357
+
358
+ skip_percentage = (len(skipped_info) / total_examples) * 100
359
+ if skip_percentage > 10:
360
+ logger.warning(f"WARNING: {skip_percentage:.1f}% of prompts were skipped!")
361
+
362
+ if not valid_prompts:
363
+ logger.error("No valid prompts to process after filtering!")
364
+ sys.exit(1)
365
+
366
+ # Generate responses - vLLM handles batching internally
367
+ logger.info(f"Starting generation for {len(valid_prompts):,} valid prompts...")
368
+ logger.info("vLLM will handle batching and scheduling automatically")
369
+
370
+ outputs = llm.generate(valid_prompts, sampling_params)
371
+
372
+ # Extract generated text and create full response list
373
+ logger.info("Extracting generated responses...")
374
+ responses = [""] * total_examples # Initialize with empty strings
375
+
376
+ for idx, output in enumerate(outputs):
377
+ original_idx = valid_indices[idx]
378
+ response = output.outputs[0].text.strip()
379
+ responses[original_idx] = response
380
+
381
+ # Add responses to dataset
382
+ logger.info("Adding responses to dataset...")
383
+ dataset = dataset.add_column(output_column, responses)
384
+
385
+ # Create dataset card
386
+ logger.info("Creating dataset card...")
387
+ card_content = create_dataset_card(
388
+ source_dataset=src_dataset_hub_id,
389
+ model_id=model_id,
390
+ messages_column=messages_column,
391
+ prompt_column=prompt_column,
392
+ sampling_params=sampling_params,
393
+ tensor_parallel_size=tensor_parallel_size,
394
+ num_examples=total_examples,
395
+ generation_time=generation_start_time,
396
+ num_skipped=len(skipped_info) if skip_long_prompts else 0,
397
+ max_model_len_used=effective_max_len if skip_long_prompts else None,
398
+ )
399
+
400
+ # Push dataset to hub
401
+ logger.info(f"Pushing dataset to: {output_dataset_hub_id}")
402
+ dataset.push_to_hub(output_dataset_hub_id, token=HF_TOKEN)
403
+
404
+ # Push dataset card
405
+ card = DatasetCard(card_content)
406
+ card.push_to_hub(output_dataset_hub_id, token=HF_TOKEN)
407
+
408
+ logger.info("✅ Generation complete!")
409
+ logger.info(
410
+ f"Dataset available at: https://huggingface.co/datasets/{output_dataset_hub_id}"
411
+ )
412
+
413
+
414
+ if __name__ == "__main__":
415
+ if len(sys.argv) > 1:
416
+ parser = argparse.ArgumentParser(
417
+ description="Generate responses for dataset prompts using vLLM",
418
+ formatter_class=argparse.RawDescriptionHelpFormatter,
419
+ epilog="""
420
+ Examples:
421
+ # Basic usage with default Qwen model
422
+ uv run generate-responses.py input-dataset output-dataset
423
+
424
+ # With custom model and parameters
425
+ uv run generate-responses.py input-dataset output-dataset \\
426
+ --model-id meta-llama/Llama-3.1-8B-Instruct \\
427
+ --temperature 0.9 \\
428
+ --max-tokens 2048
429
+
430
+ # Force specific GPU configuration
431
+ uv run generate-responses.py input-dataset output-dataset \\
432
+ --tensor-parallel-size 2 \\
433
+ --gpu-memory-utilization 0.95
434
+
435
+ # Using environment variable for token
436
+ HF_TOKEN=hf_xxx uv run generate-responses.py input-dataset output-dataset
437
+ """,
438
+ )
439
+
440
+ parser.add_argument(
441
+ "src_dataset_hub_id",
442
+ help="Input dataset on Hugging Face Hub (e.g., username/dataset-name)",
443
+ )
444
+ parser.add_argument(
445
+ "output_dataset_hub_id", help="Output dataset name on Hugging Face Hub"
446
+ )
447
+ parser.add_argument(
448
+ "--model-id",
449
+ type=str,
450
+ default="Qwen/Qwen3-30B-A3B-Instruct-2507",
451
+ help="Model to use for generation (default: Qwen3-30B-A3B-Instruct-2507)",
452
+ )
453
+ parser.add_argument(
454
+ "--messages-column",
455
+ type=str,
456
+ default="messages",
457
+ help="Column containing chat messages (default: messages)",
458
+ )
459
+ parser.add_argument(
460
+ "--prompt-column",
461
+ type=str,
462
+ help="Column containing plain text prompts (alternative to --messages-column)",
463
+ )
464
+ parser.add_argument(
465
+ "--output-column",
466
+ type=str,
467
+ default="response",
468
+ help="Column name for generated responses (default: response)",
469
+ )
470
+ parser.add_argument(
471
+ "--max-samples",
472
+ type=int,
473
+ help="Maximum number of samples to process (default: all)",
474
+ )
475
+ parser.add_argument(
476
+ "--temperature",
477
+ type=float,
478
+ default=0.7,
479
+ help="Sampling temperature (default: 0.7)",
480
+ )
481
+ parser.add_argument(
482
+ "--top-p",
483
+ type=float,
484
+ default=0.8,
485
+ help="Top-p sampling parameter (default: 0.8)",
486
+ )
487
+ parser.add_argument(
488
+ "--top-k",
489
+ type=int,
490
+ default=20,
491
+ help="Top-k sampling parameter (default: 20)",
492
+ )
493
+ parser.add_argument(
494
+ "--min-p",
495
+ type=float,
496
+ default=0.0,
497
+ help="Minimum probability threshold (default: 0.0)",
498
+ )
499
+ parser.add_argument(
500
+ "--max-tokens",
501
+ type=int,
502
+ default=16384,
503
+ help="Maximum tokens to generate (default: 16384)",
504
+ )
505
+ parser.add_argument(
506
+ "--repetition-penalty",
507
+ type=float,
508
+ default=1.0,
509
+ help="Repetition penalty (default: 1.0)",
510
+ )
511
+ parser.add_argument(
512
+ "--gpu-memory-utilization",
513
+ type=float,
514
+ default=0.90,
515
+ help="GPU memory utilization factor (default: 0.90)",
516
+ )
517
+ parser.add_argument(
518
+ "--max-model-len",
519
+ type=int,
520
+ help="Maximum model context length (default: model's default)",
521
+ )
522
+ parser.add_argument(
523
+ "--tensor-parallel-size",
524
+ type=int,
525
+ help="Number of GPUs to use (default: auto-detect)",
526
+ )
527
+ parser.add_argument(
528
+ "--hf-token",
529
+ type=str,
530
+ help="Hugging Face token (can also use HF_TOKEN env var)",
531
+ )
532
+ parser.add_argument(
533
+ "--skip-long-prompts",
534
+ action="store_true",
535
+ default=True,
536
+ help="Skip prompts that exceed max_model_len instead of failing (default: True)",
537
+ )
538
+ parser.add_argument(
539
+ "--no-skip-long-prompts",
540
+ dest="skip_long_prompts",
541
+ action="store_false",
542
+ help="Fail on prompts that exceed max_model_len",
543
+ )
544
+
545
+ args = parser.parse_args()
546
+
547
+ main(
548
+ src_dataset_hub_id=args.src_dataset_hub_id,
549
+ output_dataset_hub_id=args.output_dataset_hub_id,
550
+ model_id=args.model_id,
551
+ messages_column=args.messages_column,
552
+ prompt_column=args.prompt_column,
553
+ output_column=args.output_column,
554
+ temperature=args.temperature,
555
+ top_p=args.top_p,
556
+ top_k=args.top_k,
557
+ min_p=args.min_p,
558
+ max_tokens=args.max_tokens,
559
+ repetition_penalty=args.repetition_penalty,
560
+ gpu_memory_utilization=args.gpu_memory_utilization,
561
+ max_model_len=args.max_model_len,
562
+ tensor_parallel_size=args.tensor_parallel_size,
563
+ skip_long_prompts=args.skip_long_prompts,
564
+ max_samples=args.max_samples,
565
+ hf_token=args.hf_token,
566
+ )
567
+ else:
568
+ # Show HF Jobs example when run without arguments
569
+ print("""
570
+ vLLM Response Generation Script
571
+ ==============================
572
+
573
+ This script requires arguments. For usage information:
574
+ uv run generate-responses.py --help
575
+
576
+ Example HF Jobs command with multi-GPU:
577
+ # If you're logged in with huggingface-cli, token will be auto-detected
578
+ hf jobs uv run \\
579
+ --flavor l4x4 \\
580
+ https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\
581
+ username/input-dataset \\
582
+ username/output-dataset \\
583
+ --messages-column messages \\
584
+ --model-id Qwen/Qwen3-30B-A3B-Instruct-2507 \\
585
+ --temperature 0.7 \\
586
+ --max-tokens 16384
587
+ """)