@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,503 @@
1
+ # Modal Advanced Usage Guide
2
+
3
+ ## Multi-GPU Training
4
+
5
+ ### Single-node multi-GPU
6
+
7
+ ```python
8
+ import modal
9
+
10
+ app = modal.App("multi-gpu-training")
11
+ image = modal.Image.debian_slim().pip_install("torch", "transformers", "accelerate")
12
+
13
+ @app.function(gpu="H100:4", image=image, timeout=7200)
14
+ def train_multi_gpu():
15
+ from accelerate import Accelerator
16
+
17
+ accelerator = Accelerator()
18
+ model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
19
+
20
+ for batch in dataloader:
21
+ outputs = model(**batch)
22
+ loss = outputs.loss
23
+ accelerator.backward(loss)
24
+ optimizer.step()
25
+ ```
26
+
27
+ ### DeepSpeed integration
28
+
29
+ ```python
30
+ image = modal.Image.debian_slim().pip_install(
31
+ "torch", "transformers", "deepspeed", "accelerate"
32
+ )
33
+
34
+ @app.function(gpu="A100:8", image=image, timeout=14400)
35
+ def deepspeed_train(config: dict):
36
+ from transformers import Trainer, TrainingArguments
37
+
38
+ args = TrainingArguments(
39
+ output_dir="/outputs",
40
+ deepspeed="ds_config.json",
41
+ fp16=True,
42
+ per_device_train_batch_size=4,
43
+ gradient_accumulation_steps=4
44
+ )
45
+
46
+ trainer = Trainer(model=model, args=args, train_dataset=dataset)
47
+ trainer.train()
48
+ ```
49
+
50
+ ### Multi-GPU considerations
51
+
52
+ For frameworks that re-execute the Python entrypoint (like PyTorch Lightning), use:
53
+ - `ddp_spawn` or `ddp_notebook` strategy
54
+ - Run training as a subprocess to avoid issues
55
+
56
+ ```python
57
+ @app.function(gpu="H100:4")
58
+ def train_with_subprocess():
59
+ import subprocess
60
+ subprocess.run(["python", "-m", "torch.distributed.launch", "train.py"])
61
+ ```
62
+
63
+ ## Advanced Container Configuration
64
+
65
+ ### Multi-stage builds for caching
66
+
67
+ ```python
68
+ # Stage 1: Base dependencies (cached)
69
+ base_image = modal.Image.debian_slim().pip_install("torch", "numpy", "scipy")
70
+
71
+ # Stage 2: ML libraries (cached separately)
72
+ ml_image = base_image.pip_install("transformers", "datasets", "accelerate")
73
+
74
+ # Stage 3: Custom code (rebuilt on changes)
75
+ final_image = ml_image.copy_local_dir("./src", "/app/src")
76
+ ```
77
+
78
+ ### Custom Dockerfiles
79
+
80
+ ```python
81
+ image = modal.Image.from_dockerfile("./Dockerfile")
82
+ ```
83
+
84
+ ### Installing from Git
85
+
86
+ ```python
87
+ image = modal.Image.debian_slim().pip_install(
88
+ "git+https://github.com/huggingface/transformers.git@main"
89
+ )
90
+ ```
91
+
92
+ ### Using uv for faster installs
93
+
94
+ ```python
95
+ image = modal.Image.debian_slim().uv_pip_install(
96
+ "torch", "transformers", "accelerate"
97
+ )
98
+ ```
99
+
100
+ ## Advanced Class Patterns
101
+
102
+ ### Lifecycle hooks
103
+
104
+ ```python
105
+ @app.cls(gpu="A10G")
106
+ class InferenceService:
107
+ @modal.enter()
108
+ def startup(self):
109
+ """Called once when container starts"""
110
+ self.model = load_model()
111
+ self.tokenizer = load_tokenizer()
112
+
113
+ @modal.exit()
114
+ def shutdown(self):
115
+ """Called when container shuts down"""
116
+ cleanup_resources()
117
+
118
+ @modal.method()
119
+ def predict(self, text: str):
120
+ return self.model(self.tokenizer(text))
121
+ ```
122
+
123
+ ### Concurrent request handling
124
+
125
+ ```python
126
+ @app.cls(
127
+ gpu="A100",
128
+ allow_concurrent_inputs=20, # Handle 20 requests per container
129
+ container_idle_timeout=300
130
+ )
131
+ class BatchInference:
132
+ @modal.enter()
133
+ def load(self):
134
+ self.model = load_model()
135
+
136
+ @modal.method()
137
+ def predict(self, inputs: list):
138
+ return self.model.batch_predict(inputs)
139
+ ```
140
+
141
+ ### Input concurrency vs batching
142
+
143
+ - **Input concurrency**: Multiple requests processed simultaneously (async I/O)
144
+ - **Dynamic batching**: Requests accumulated and processed together (GPU efficiency)
145
+
146
+ ```python
147
+ # Input concurrency - good for I/O-bound
148
+ @app.function(allow_concurrent_inputs=10)
149
+ async def fetch_data(url: str):
150
+ async with aiohttp.ClientSession() as session:
151
+ return await session.get(url)
152
+
153
+ # Dynamic batching - good for GPU inference
154
+ @app.function()
155
+ @modal.batched(max_batch_size=32, wait_ms=100)
156
+ async def batch_embed(texts: list[str]) -> list[list[float]]:
157
+ return model.encode(texts)
158
+ ```
159
+
160
+ ## Advanced Volumes
161
+
162
+ ### Volume operations
163
+
164
+ ```python
165
+ volume = modal.Volume.from_name("my-volume", create_if_missing=True)
166
+
167
+ @app.function(volumes={"/data": volume})
168
+ def volume_operations():
169
+ import os
170
+
171
+ # Write data
172
+ with open("/data/output.txt", "w") as f:
173
+ f.write("Results")
174
+
175
+ # Commit changes (persist to volume)
176
+ volume.commit()
177
+
178
+ # Reload from remote (get latest)
179
+ volume.reload()
180
+ ```
181
+
182
+ ### Shared volumes between functions
183
+
184
+ ```python
185
+ shared_volume = modal.Volume.from_name("shared-data", create_if_missing=True)
186
+
187
+ @app.function(volumes={"/shared": shared_volume})
188
+ def writer():
189
+ with open("/shared/data.txt", "w") as f:
190
+ f.write("Hello from writer")
191
+ shared_volume.commit()
192
+
193
+ @app.function(volumes={"/shared": shared_volume})
194
+ def reader():
195
+ shared_volume.reload() # Get latest
196
+ with open("/shared/data.txt", "r") as f:
197
+ return f.read()
198
+ ```
199
+
200
+ ### Cloud bucket mounts
201
+
202
+ ```python
203
+ # Mount S3 bucket
204
+ bucket = modal.CloudBucketMount(
205
+ bucket_name="my-bucket",
206
+ secret=modal.Secret.from_name("aws-credentials")
207
+ )
208
+
209
+ @app.function(volumes={"/s3": bucket})
210
+ def process_s3_data():
211
+ # Access S3 files like local filesystem
212
+ data = open("/s3/data.parquet").read()
213
+ ```
214
+
215
+ ## Function Composition
216
+
217
+ ### Chaining functions
218
+
219
+ ```python
220
+ @app.function()
221
+ def preprocess(data):
222
+ return cleaned_data
223
+
224
+ @app.function(gpu="T4")
225
+ def inference(data):
226
+ return predictions
227
+
228
+ @app.function()
229
+ def postprocess(predictions):
230
+ return formatted_results
231
+
232
+ @app.function()
233
+ def pipeline(raw_data):
234
+ cleaned = preprocess.remote(raw_data)
235
+ predictions = inference.remote(cleaned)
236
+ results = postprocess.remote(predictions)
237
+ return results
238
+ ```
239
+
240
+ ### Parallel fan-out
241
+
242
+ ```python
243
+ @app.function()
244
+ def process_item(item):
245
+ return expensive_computation(item)
246
+
247
+ @app.function()
248
+ def parallel_pipeline(items):
249
+ # Fan out: process all items in parallel
250
+ results = list(process_item.map(items))
251
+ return results
252
+ ```
253
+
254
+ ### Starmap for multiple arguments
255
+
256
+ ```python
257
+ @app.function()
258
+ def process(x, y, z):
259
+ return x + y + z
260
+
261
+ @app.function()
262
+ def orchestrate():
263
+ args = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
264
+ results = list(process.starmap(args))
265
+ return results
266
+ ```
267
+
268
+ ## Advanced Web Endpoints
269
+
270
+ ### WebSocket support
271
+
272
+ ```python
273
+ from fastapi import FastAPI, WebSocket
274
+
275
+ app = modal.App("websocket-app")
276
+ web_app = FastAPI()
277
+
278
+ @web_app.websocket("/ws")
279
+ async def websocket_endpoint(websocket: WebSocket):
280
+ await websocket.accept()
281
+ while True:
282
+ data = await websocket.receive_text()
283
+ await websocket.send_text(f"Processed: {data}")
284
+
285
+ @app.function()
286
+ @modal.asgi_app()
287
+ def ws_app():
288
+ return web_app
289
+ ```
290
+
291
+ ### Streaming responses
292
+
293
+ ```python
294
+ from fastapi.responses import StreamingResponse
295
+
296
+ @app.function(gpu="A100")
297
+ def generate_stream(prompt: str):
298
+ for token in model.generate_stream(prompt):
299
+ yield token
300
+
301
+ @web_app.get("/stream")
302
+ async def stream_response(prompt: str):
303
+ return StreamingResponse(
304
+ generate_stream.remote_gen(prompt),
305
+ media_type="text/event-stream"
306
+ )
307
+ ```
308
+
309
+ ### Authentication
310
+
311
+ ```python
312
+ from fastapi import Depends, HTTPException, Header
313
+
314
+ async def verify_token(authorization: str = Header(None)):
315
+ if not authorization or not authorization.startswith("Bearer "):
316
+ raise HTTPException(status_code=401)
317
+ token = authorization.split(" ")[1]
318
+ if not verify_jwt(token):
319
+ raise HTTPException(status_code=403)
320
+ return token
321
+
322
+ @web_app.post("/predict")
323
+ async def predict(data: dict, token: str = Depends(verify_token)):
324
+ return model.predict(data)
325
+ ```
326
+
327
+ ## Cost Optimization
328
+
329
+ ### Right-sizing GPUs
330
+
331
+ ```python
332
+ # For inference: smaller GPUs often sufficient
333
+ @app.function(gpu="L40S") # 48GB, best cost/perf for inference
334
+ def inference():
335
+ pass
336
+
337
+ # For training: larger GPUs for throughput
338
+ @app.function(gpu="A100-80GB")
339
+ def training():
340
+ pass
341
+ ```
342
+
343
+ ### GPU fallbacks for availability
344
+
345
+ ```python
346
+ @app.function(gpu=["H100", "A100", "L40S"]) # Try in order
347
+ def flexible_compute():
348
+ pass
349
+ ```
350
+
351
+ ### Scale to zero
352
+
353
+ ```python
354
+ # Default behavior: scale to zero when idle
355
+ @app.function(gpu="A100")
356
+ def on_demand():
357
+ pass
358
+
359
+ # Keep containers warm for low latency (costs more)
360
+ @app.function(gpu="A100", keep_warm=1)
361
+ def always_ready():
362
+ pass
363
+ ```
364
+
365
+ ### Batch processing for efficiency
366
+
367
+ ```python
368
+ # Process in batches to reduce cold starts
369
+ @app.function(gpu="A100")
370
+ def batch_process(items: list):
371
+ return [process(item) for item in items]
372
+
373
+ # Better than individual calls
374
+ results = batch_process.remote(all_items)
375
+ ```
376
+
377
+ ## Monitoring and Observability
378
+
379
+ ### Structured logging
380
+
381
+ ```python
382
+ import json
383
+ import logging
384
+
385
+ logging.basicConfig(level=logging.INFO)
386
+ logger = logging.getLogger(__name__)
387
+
388
+ @app.function()
389
+ def structured_logging(request_id: str, data: dict):
390
+ logger.info(json.dumps({
391
+ "event": "inference_start",
392
+ "request_id": request_id,
393
+ "input_size": len(data)
394
+ }))
395
+
396
+ result = process(data)
397
+
398
+ logger.info(json.dumps({
399
+ "event": "inference_complete",
400
+ "request_id": request_id,
401
+ "output_size": len(result)
402
+ }))
403
+
404
+ return result
405
+ ```
406
+
407
+ ### Custom metrics
408
+
409
+ ```python
410
+ @app.function(gpu="A100")
411
+ def monitored_inference(inputs):
412
+ import time
413
+
414
+ start = time.time()
415
+ results = model.predict(inputs)
416
+ latency = time.time() - start
417
+
418
+ # Log metrics (visible in Modal dashboard)
419
+ print(f"METRIC latency={latency:.3f}s batch_size={len(inputs)}")
420
+
421
+ return results
422
+ ```
423
+
424
+ ## Production Deployment
425
+
426
+ ### Environment separation
427
+
428
+ ```python
429
+ import os
430
+
431
+ env = os.environ.get("MODAL_ENV", "dev")
432
+ app = modal.App(f"my-service-{env}")
433
+
434
+ # Environment-specific config
435
+ if env == "prod":
436
+ gpu_config = "A100"
437
+ timeout = 3600
438
+ else:
439
+ gpu_config = "T4"
440
+ timeout = 300
441
+ ```
442
+
443
+ ### Zero-downtime deployments
444
+
445
+ Modal automatically handles zero-downtime deployments:
446
+ 1. New containers are built and started
447
+ 2. Traffic gradually shifts to new version
448
+ 3. Old containers drain existing requests
449
+ 4. Old containers are terminated
450
+
451
+ ### Health checks
452
+
453
+ ```python
454
+ @app.function()
455
+ @modal.web_endpoint()
456
+ def health():
457
+ return {
458
+ "status": "healthy",
459
+ "model_loaded": hasattr(Model, "_model"),
460
+ "gpu_available": torch.cuda.is_available()
461
+ }
462
+ ```
463
+
464
+ ## Sandboxes
465
+
466
+ ### Interactive execution environments
467
+
468
+ ```python
469
+ @app.function()
470
+ def run_sandbox():
471
+ sandbox = modal.Sandbox.create(
472
+ app=app,
473
+ image=image,
474
+ gpu="T4"
475
+ )
476
+
477
+ # Execute code in sandbox
478
+ result = sandbox.exec("python", "-c", "print('Hello from sandbox')")
479
+
480
+ sandbox.terminate()
481
+ return result
482
+ ```
483
+
484
+ ## Invoking Deployed Functions
485
+
486
+ ### From external code
487
+
488
+ ```python
489
+ # Call deployed function from any Python script
490
+ import modal
491
+
492
+ f = modal.Function.lookup("my-app", "my_function")
493
+ result = f.remote(arg1, arg2)
494
+ ```
495
+
496
+ ### REST API invocation
497
+
498
+ ```bash
499
+ # Deployed endpoints accessible via HTTPS
500
+ curl -X POST https://your-workspace--my-app-predict.modal.run \
501
+ -H "Content-Type: application/json" \
502
+ -d '{"text": "Hello world"}'
503
+ ```