@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,556 @@
1
+ # Hyperparameter Tuning with PyTorch Lightning
2
+
3
+ ## Integration with Tuning Frameworks
4
+
5
+ Lightning integrates seamlessly with popular hyperparameter tuning libraries.
6
+
7
+ ### 1. Ray Tune Integration
8
+
9
+ **Installation**:
10
+ ```bash
11
+ pip install ray[tune]
12
+ pip install lightning
13
+ ```
14
+
15
+ **Basic Ray Tune example**:
16
+
17
+ ```python
18
+ import lightning as L
19
+ from ray import tune
20
+ from ray.tune.integration.pytorch_lightning import TuneReportCallback
21
+
22
+ class LitModel(L.LightningModule):
23
+ def __init__(self, lr, batch_size):
24
+ super().__init__()
25
+ self.lr = lr
26
+ self.batch_size = batch_size
27
+ self.model = nn.Sequential(nn.Linear(10, 128), nn.ReLU(), nn.Linear(128, 1))
28
+
29
+ def training_step(self, batch, batch_idx):
30
+ loss = self.model(batch).mean()
31
+ self.log('train_loss', loss)
32
+ return loss
33
+
34
+ def validation_step(self, batch, batch_idx):
35
+ val_loss = self.model(batch).mean()
36
+ self.log('val_loss', val_loss)
37
+
38
+ def configure_optimizers(self):
39
+ return torch.optim.Adam(self.parameters(), lr=self.lr)
40
+
41
+ def train_fn(config):
42
+ """Training function for Ray Tune."""
43
+ model = LitModel(lr=config["lr"], batch_size=config["batch_size"])
44
+
45
+ # Add callback to report metrics to Tune
46
+ trainer = L.Trainer(
47
+ max_epochs=10,
48
+ callbacks=[TuneReportCallback({"loss": "val_loss"}, on="validation_end")]
49
+ )
50
+
51
+ trainer.fit(model, train_loader, val_loader)
52
+
53
+ # Define search space
54
+ config = {
55
+ "lr": tune.loguniform(1e-5, 1e-1),
56
+ "batch_size": tune.choice([16, 32, 64, 128])
57
+ }
58
+
59
+ # Run hyperparameter search
60
+ analysis = tune.run(
61
+ train_fn,
62
+ config=config,
63
+ num_samples=20, # 20 trials
64
+ resources_per_trial={"gpu": 1}
65
+ )
66
+
67
+ # Best hyperparameters
68
+ best_config = analysis.get_best_config(metric="loss", mode="min")
69
+ print(f"Best config: {best_config}")
70
+ ```
71
+
72
+ **Advanced: Population-Based Training (PBT)**:
73
+
74
+ ```python
75
+ from ray.tune.schedulers import PopulationBasedTraining
76
+
77
+ # PBT scheduler
78
+ scheduler = PopulationBasedTraining(
79
+ time_attr='training_iteration',
80
+ metric='val_loss',
81
+ mode='min',
82
+ perturbation_interval=5, # Perturb every 5 epochs
83
+ hyperparam_mutations={
84
+ "lr": tune.loguniform(1e-5, 1e-1),
85
+ "batch_size": [16, 32, 64, 128]
86
+ }
87
+ )
88
+
89
+ analysis = tune.run(
90
+ train_fn,
91
+ config=config,
92
+ num_samples=8, # Population size
93
+ scheduler=scheduler,
94
+ resources_per_trial={"gpu": 1}
95
+ )
96
+ ```
97
+
98
+ ### 2. Optuna Integration
99
+
100
+ **Installation**:
101
+ ```bash
102
+ pip install optuna
103
+ pip install optuna-integration
104
+ ```
105
+
106
+ **Optuna example**:
107
+
108
+ ```python
109
+ import optuna
110
+ from optuna.integration import PyTorchLightningPruningCallback
111
+
112
+ def objective(trial):
113
+ # Suggest hyperparameters
114
+ lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
115
+ batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
116
+ n_layers = trial.suggest_int('n_layers', 1, 3)
117
+ hidden_size = trial.suggest_int('hidden_size', 64, 512, step=64)
118
+
119
+ # Create model
120
+ model = LitModel(lr=lr, n_layers=n_layers, hidden_size=hidden_size)
121
+
122
+ # Pruning callback (early stopping for bad trials)
123
+ pruning_callback = PyTorchLightningPruningCallback(trial, monitor="val_loss")
124
+
125
+ trainer = L.Trainer(
126
+ max_epochs=20,
127
+ callbacks=[pruning_callback],
128
+ enable_progress_bar=False,
129
+ logger=False
130
+ )
131
+
132
+ trainer.fit(model, train_loader, val_loader)
133
+
134
+ return trainer.callback_metrics["val_loss"].item()
135
+
136
+ # Create study
137
+ study = optuna.create_study(
138
+ direction='minimize',
139
+ pruner=optuna.pruners.MedianPruner() # Prune bad trials early
140
+ )
141
+
142
+ # Optimize
143
+ study.optimize(objective, n_trials=50, timeout=3600)
144
+
145
+ # Best params
146
+ print(f"Best trial: {study.best_trial.params}")
147
+ print(f"Best value: {study.best_value}")
148
+
149
+ # Visualization
150
+ optuna.visualization.plot_optimization_history(study).show()
151
+ optuna.visualization.plot_param_importances(study).show()
152
+ ```
153
+
154
+ **Optuna with distributed training**:
155
+
156
+ ```python
157
+ import optuna
158
+
159
+ # Shared database for distributed optimization
160
+ storage = optuna.storages.RDBStorage(
161
+ url='postgresql://user:pass@localhost/optuna'
162
+ )
163
+
164
+ study = optuna.create_study(
165
+ study_name='distributed_study',
166
+ storage=storage,
167
+ load_if_exists=True,
168
+ direction='minimize'
169
+ )
170
+
171
+ # Run on multiple machines
172
+ study.optimize(objective, n_trials=50)
173
+ ```
174
+
175
+ ### 3. Weights & Biases (WandB) Sweeps
176
+
177
+ **Installation**:
178
+ ```bash
179
+ pip install wandb
180
+ ```
181
+
182
+ **WandB sweep config** (`sweep.yaml`):
183
+ ```yaml
184
+ program: train.py
185
+ method: bayes
186
+ metric:
187
+ name: val_loss
188
+ goal: minimize
189
+ parameters:
190
+ lr:
191
+ distribution: log_uniform_values
192
+ min: 0.00001
193
+ max: 0.1
194
+ batch_size:
195
+ values: [16, 32, 64, 128]
196
+ optimizer:
197
+ values: ['adam', 'sgd', 'adamw']
198
+ dropout:
199
+ distribution: uniform
200
+ min: 0.0
201
+ max: 0.5
202
+ ```
203
+
204
+ **Training script** (`train.py`):
205
+ ```python
206
+ import wandb
207
+ import lightning as L
208
+ from lightning.pytorch.loggers import WandbLogger
209
+
210
+ def train():
211
+ # Initialize wandb
212
+ wandb.init()
213
+ config = wandb.config
214
+
215
+ # Create model with sweep params
216
+ model = LitModel(
217
+ lr=config.lr,
218
+ batch_size=config.batch_size,
219
+ optimizer=config.optimizer,
220
+ dropout=config.dropout
221
+ )
222
+
223
+ # WandB logger
224
+ wandb_logger = WandbLogger(project='hyperparameter-sweep')
225
+
226
+ trainer = L.Trainer(
227
+ max_epochs=20,
228
+ logger=wandb_logger
229
+ )
230
+
231
+ trainer.fit(model, train_loader, val_loader)
232
+
233
+ if __name__ == '__main__':
234
+ train()
235
+ ```
236
+
237
+ **Launch sweep**:
238
+ ```bash
239
+ # Initialize sweep
240
+ wandb sweep sweep.yaml
241
+ # Output: wandb: Created sweep with ID: abc123
242
+
243
+ # Run agent (can run on multiple machines)
244
+ wandb agent your-entity/your-project/abc123
245
+ ```
246
+
247
+ ### 4. Hyperopt Integration
248
+
249
+ **Installation**:
250
+ ```bash
251
+ pip install hyperopt
252
+ ```
253
+
254
+ **Hyperopt example**:
255
+
256
+ ```python
257
+ from hyperopt import hp, fmin, tpe, Trials
258
+
259
+ def objective(params):
260
+ model = LitModel(
261
+ lr=params['lr'],
262
+ batch_size=int(params['batch_size']),
263
+ hidden_size=int(params['hidden_size'])
264
+ )
265
+
266
+ trainer = L.Trainer(
267
+ max_epochs=10,
268
+ enable_progress_bar=False,
269
+ logger=False
270
+ )
271
+
272
+ trainer.fit(model, train_loader, val_loader)
273
+
274
+ # Return loss (minimize)
275
+ return trainer.callback_metrics["val_loss"].item()
276
+
277
+ # Define search space
278
+ space = {
279
+ 'lr': hp.loguniform('lr', np.log(1e-5), np.log(1e-1)),
280
+ 'batch_size': hp.quniform('batch_size', 16, 128, 16),
281
+ 'hidden_size': hp.quniform('hidden_size', 64, 512, 64)
282
+ }
283
+
284
+ # Optimize
285
+ trials = Trials()
286
+ best = fmin(
287
+ fn=objective,
288
+ space=space,
289
+ algo=tpe.suggest, # Tree-structured Parzen Estimator
290
+ max_evals=50,
291
+ trials=trials
292
+ )
293
+
294
+ print(f"Best hyperparameters: {best}")
295
+ ```
296
+
297
+ ## Built-In Lightning Tuning
298
+
299
+ ### Auto Learning Rate Finder
300
+
301
+ ```python
302
+ class LitModel(L.LightningModule):
303
+ def __init__(self, lr=1e-3):
304
+ super().__init__()
305
+ self.lr = lr
306
+ self.model = nn.Linear(10, 1)
307
+
308
+ def configure_optimizers(self):
309
+ return torch.optim.Adam(self.parameters(), lr=self.lr)
310
+
311
+ def training_step(self, batch, batch_idx):
312
+ loss = self.model(batch).mean()
313
+ return loss
314
+
315
+ # Find optimal learning rate
316
+ model = LitModel()
317
+ trainer = L.Trainer(auto_lr_find=True)
318
+
319
+ # This runs LR finder before training
320
+ trainer.tune(model, train_loader)
321
+
322
+ # Or manually
323
+ from lightning.pytorch.tuner import Tuner
324
+ tuner = Tuner(trainer)
325
+ lr_finder = tuner.lr_find(model, train_loader)
326
+
327
+ # Plot results
328
+ fig = lr_finder.plot(suggest=True)
329
+ fig.show()
330
+
331
+ # Get suggested LR
332
+ suggested_lr = lr_finder.suggestion()
333
+ print(f"Suggested LR: {suggested_lr}")
334
+
335
+ # Update model
336
+ model.lr = suggested_lr
337
+
338
+ # Train with optimal LR
339
+ trainer.fit(model, train_loader)
340
+ ```
341
+
342
+ ### Auto Batch Size Finder
343
+
344
+ ```python
345
+ class LitModel(L.LightningModule):
346
+ def __init__(self, batch_size=32):
347
+ super().__init__()
348
+ self.batch_size = batch_size
349
+ self.model = nn.Linear(10, 1)
350
+
351
+ def train_dataloader(self):
352
+ return DataLoader(dataset, batch_size=self.batch_size)
353
+
354
+ model = LitModel()
355
+ trainer = L.Trainer(auto_scale_batch_size='binsearch')
356
+
357
+ # Find optimal batch size
358
+ trainer.tune(model)
359
+
360
+ print(f"Optimal batch size: {model.batch_size}")
361
+
362
+ # Train with optimal batch size
363
+ trainer.fit(model, train_loader)
364
+ ```
365
+
366
+ ## Advanced Tuning Strategies
367
+
368
+ ### 1. Multi-Fidelity Optimization (Successive Halving)
369
+
370
+ ```python
371
+ from ray.tune.schedulers import ASHAScheduler
372
+
373
+ # ASHA: Asynchronous Successive Halving Algorithm
374
+ scheduler = ASHAScheduler(
375
+ max_t=100, # Max epochs
376
+ grace_period=10, # Min epochs before stopping
377
+ reduction_factor=2 # Halve resources each round
378
+ )
379
+
380
+ analysis = tune.run(
381
+ train_fn,
382
+ config=config,
383
+ num_samples=64,
384
+ scheduler=scheduler,
385
+ resources_per_trial={"gpu": 1}
386
+ )
387
+ ```
388
+
389
+ **How it works**:
390
+ - Start 64 trials
391
+ - After 10 epochs, stop bottom 50% (32 trials remain)
392
+ - After 20 epochs, stop bottom 50% (16 trials remain)
393
+ - After 40 epochs, stop bottom 50% (8 trials remain)
394
+ - After 80 epochs, stop bottom 50% (4 trials remain)
395
+ - Run remaining 4 trials to completion (100 epochs)
396
+
397
+ ### 2. Bayesian Optimization
398
+
399
+ ```python
400
+ from ray.tune.search.bayesopt import BayesOptSearch
401
+
402
+ search = BayesOptSearch(
403
+ metric="val_loss",
404
+ mode="min"
405
+ )
406
+
407
+ analysis = tune.run(
408
+ train_fn,
409
+ config=config,
410
+ num_samples=50,
411
+ search_alg=search,
412
+ resources_per_trial={"gpu": 1}
413
+ )
414
+ ```
415
+
416
+ ### 3. Grid Search
417
+
418
+ ```python
419
+ from ray import tune
420
+
421
+ # Exhaustive grid search
422
+ config = {
423
+ "lr": tune.grid_search([1e-5, 1e-4, 1e-3, 1e-2]),
424
+ "batch_size": tune.grid_search([16, 32, 64, 128]),
425
+ "optimizer": tune.grid_search(['adam', 'sgd', 'adamw'])
426
+ }
427
+
428
+ # Total trials: 4 × 4 × 3 = 48
429
+ analysis = tune.run(train_fn, config=config)
430
+ ```
431
+
432
+ ### 4. Random Search
433
+
434
+ ```python
435
+ config = {
436
+ "lr": tune.loguniform(1e-5, 1e-1),
437
+ "batch_size": tune.choice([16, 32, 64, 128]),
438
+ "dropout": tune.uniform(0.0, 0.5),
439
+ "hidden_size": tune.randint(64, 512)
440
+ }
441
+
442
+ # Random sampling
443
+ analysis = tune.run(
444
+ train_fn,
445
+ config=config,
446
+ num_samples=100 # 100 random samples
447
+ )
448
+ ```
449
+
450
+ ## Best Practices
451
+
452
+ ### 1. Start Simple
453
+
454
+ ```python
455
+ # Phase 1: Coarse search (fast)
456
+ coarse_config = {
457
+ "lr": tune.loguniform(1e-5, 1e-1),
458
+ "batch_size": tune.choice([32, 64])
459
+ }
460
+ coarse_analysis = tune.run(train_fn, config=coarse_config, num_samples=10, max_epochs=5)
461
+
462
+ # Phase 2: Fine-tune around best (slow)
463
+ best_lr = coarse_analysis.best_config["lr"]
464
+ fine_config = {
465
+ "lr": tune.uniform(best_lr * 0.5, best_lr * 2),
466
+ "batch_size": tune.choice([16, 32, 64, 128])
467
+ }
468
+ fine_analysis = tune.run(train_fn, config=fine_config, num_samples=20, max_epochs=20)
469
+ ```
470
+
471
+ ### 2. Use Checkpointing
472
+
473
+ ```python
474
+ def train_fn(config, checkpoint_dir=None):
475
+ model = LitModel(lr=config["lr"])
476
+
477
+ trainer = L.Trainer(
478
+ max_epochs=100,
479
+ callbacks=[
480
+ TuneReportCheckpointCallback(
481
+ metrics={"loss": "val_loss"},
482
+ filename="checkpoint",
483
+ on="validation_end"
484
+ )
485
+ ]
486
+ )
487
+
488
+ # Resume from checkpoint if exists
489
+ ckpt_path = None
490
+ if checkpoint_dir:
491
+ ckpt_path = os.path.join(checkpoint_dir, "checkpoint")
492
+
493
+ trainer.fit(model, train_loader, val_loader, ckpt_path=ckpt_path)
494
+ ```
495
+
496
+ ### 3. Monitor Resource Usage
497
+
498
+ ```python
499
+ import GPUtil
500
+
501
+ def train_fn(config):
502
+ # Before training
503
+ GPUs = GPUtil.getGPUs()
504
+ print(f"GPU memory before: {GPUs[0].memoryUsed} MB")
505
+
506
+ # Train
507
+ model = LitModel(lr=config["lr"], batch_size=config["batch_size"])
508
+ trainer.fit(model, train_loader)
509
+
510
+ # After training
511
+ GPUs = GPUtil.getGPUs()
512
+ print(f"GPU memory after: {GPUs[0].memoryUsed} MB")
513
+ ```
514
+
515
+ ## Common Issues
516
+
517
+ ### Issue: Trials Running Out of Memory
518
+
519
+ **Solution**: Reduce concurrent trials or batch size
520
+ ```python
521
+ analysis = tune.run(
522
+ train_fn,
523
+ config=config,
524
+ resources_per_trial={"gpu": 0.5}, # 2 trials per GPU
525
+ max_concurrent_trials=2 # Limit concurrent trials
526
+ )
527
+ ```
528
+
529
+ ### Issue: Slow Hyperparameter Search
530
+
531
+ **Solution**: Use early stopping scheduler
532
+ ```python
533
+ from ray.tune.schedulers import ASHAScheduler
534
+
535
+ scheduler = ASHAScheduler(
536
+ max_t=100,
537
+ grace_period=5, # Stop bad trials after 5 epochs
538
+ reduction_factor=3
539
+ )
540
+ ```
541
+
542
+ ### Issue: Can't Reproduce Best Trial
543
+
544
+ **Solution**: Set seeds in training function
545
+ ```python
546
+ def train_fn(config):
547
+ L.seed_everything(42, workers=True)
548
+ # Rest of training...
549
+ ```
550
+
551
+ ## Resources
552
+
553
+ - Ray Tune + Lightning: https://docs.ray.io/en/latest/tune/examples/tune-pytorch-lightning.html
554
+ - Optuna: https://optuna.readthedocs.io/
555
+ - WandB Sweeps: https://docs.wandb.ai/guides/sweeps
556
+ - Lightning Tuner: https://lightning.ai/docs/pytorch/stable/tuning.html