@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,436 @@
1
+ # PyTorch Lightning Callbacks
2
+
3
+ ## Overview
4
+
5
+ Callbacks add functionality to training without modifying the LightningModule. They capture **non-essential logic** like checkpointing, early stopping, and logging.
6
+
7
+ ## Built-In Callbacks
8
+
9
+ ### 1. ModelCheckpoint
10
+
11
+ **Saves best models during training**:
12
+
13
+ ```python
14
+ from lightning.pytorch.callbacks import ModelCheckpoint
15
+
16
+ # Save top 3 models based on validation loss
17
+ checkpoint = ModelCheckpoint(
18
+ dirpath='checkpoints/',
19
+ filename='model-{epoch:02d}-{val_loss:.2f}',
20
+ monitor='val_loss',
21
+ mode='min',
22
+ save_top_k=3,
23
+ save_last=True, # Also save last epoch
24
+ verbose=True
25
+ )
26
+
27
+ trainer = L.Trainer(callbacks=[checkpoint])
28
+ trainer.fit(model, train_loader, val_loader)
29
+ ```
30
+
31
+ **Configuration options**:
32
+ ```python
33
+ checkpoint = ModelCheckpoint(
34
+ monitor='val_acc', # Metric to monitor
35
+ mode='max', # 'max' for accuracy, 'min' for loss
36
+ save_top_k=5, # Keep best 5 models
37
+ save_last=True, # Save last epoch separately
38
+ every_n_epochs=1, # Save every N epochs
39
+ save_on_train_epoch_end=False, # Save on validation end instead
40
+ filename='best-{epoch}-{val_acc:.3f}', # Naming pattern
41
+ auto_insert_metric_name=False # Don't auto-add metric to filename
42
+ )
43
+ ```
44
+
45
+ **Load checkpoint**:
46
+ ```python
47
+ # Load best model
48
+ best_model_path = checkpoint.best_model_path
49
+ model = LitModel.load_from_checkpoint(best_model_path)
50
+
51
+ # Resume training
52
+ trainer = L.Trainer(callbacks=[checkpoint])
53
+ trainer.fit(model, train_loader, val_loader, ckpt_path='checkpoints/last.ckpt')
54
+ ```
55
+
56
+ ### 2. EarlyStopping
57
+
58
+ **Stops training when metric stops improving**:
59
+
60
+ ```python
61
+ from lightning.pytorch.callbacks import EarlyStopping
62
+
63
+ early_stop = EarlyStopping(
64
+ monitor='val_loss',
65
+ patience=5, # Wait 5 epochs
66
+ mode='min',
67
+ min_delta=0.001, # Minimum change to qualify as improvement
68
+ verbose=True,
69
+ strict=True, # Crash if monitored metric not found
70
+ check_on_train_epoch_end=False # Check on validation end
71
+ )
72
+
73
+ trainer = L.Trainer(callbacks=[early_stop])
74
+ trainer.fit(model, train_loader, val_loader)
75
+ # Stops automatically if no improvement for 5 epochs
76
+ ```
77
+
78
+ **Advanced usage**:
79
+ ```python
80
+ early_stop = EarlyStopping(
81
+ monitor='val_loss',
82
+ patience=10,
83
+ min_delta=0.0,
84
+ verbose=True,
85
+ mode='min',
86
+ stopping_threshold=0.1, # Stop if val_loss < 0.1
87
+ divergence_threshold=5.0, # Stop if val_loss > 5.0
88
+ check_finite=True # Stop on NaN/Inf
89
+ )
90
+ ```
91
+
92
+ ### 3. LearningRateMonitor
93
+
94
+ **Logs learning rate**:
95
+
96
+ ```python
97
+ from lightning.pytorch.callbacks import LearningRateMonitor
98
+
99
+ lr_monitor = LearningRateMonitor(
100
+ logging_interval='epoch', # Or 'step'
101
+ log_momentum=True # Also log momentum
102
+ )
103
+
104
+ trainer = L.Trainer(callbacks=[lr_monitor])
105
+ # Learning rate automatically logged to TensorBoard/WandB
106
+ ```
107
+
108
+ ### 4. TQDMProgressBar
109
+
110
+ **Customizes progress bar**:
111
+
112
+ ```python
113
+ from lightning.pytorch.callbacks import TQDMProgressBar
114
+
115
+ progress_bar = TQDMProgressBar(
116
+ refresh_rate=10, # Update every 10 batches
117
+ process_position=0
118
+ )
119
+
120
+ trainer = L.Trainer(callbacks=[progress_bar])
121
+ ```
122
+
123
+ ### 5. GradientAccumulationScheduler
124
+
125
+ **Dynamic gradient accumulation**:
126
+
127
+ ```python
128
+ from lightning.pytorch.callbacks import GradientAccumulationScheduler
129
+
130
+ # Accumulate more gradients as training progresses
131
+ accumulator = GradientAccumulationScheduler(
132
+ scheduling={
133
+ 0: 8, # Epochs 0-4: accumulate 8 batches
134
+ 5: 4, # Epochs 5-9: accumulate 4 batches
135
+ 10: 2 # Epochs 10+: accumulate 2 batches
136
+ }
137
+ )
138
+
139
+ trainer = L.Trainer(callbacks=[accumulator])
140
+ ```
141
+
142
+ ### 6. StochasticWeightAveraging (SWA)
143
+
144
+ **Averages weights for better generalization**:
145
+
146
+ ```python
147
+ from lightning.pytorch.callbacks import StochasticWeightAveraging
148
+
149
+ swa = StochasticWeightAveraging(
150
+ swa_lrs=1e-2, # SWA learning rate
151
+ swa_epoch_start=0.8, # Start at 80% of training
152
+ annealing_epochs=10, # Annealing period
153
+ annealing_strategy='cos' # 'cos' or 'linear'
154
+ )
155
+
156
+ trainer = L.Trainer(callbacks=[swa])
157
+ ```
158
+
159
+ ## Custom Callbacks
160
+
161
+ ### Basic Custom Callback
162
+
163
+ ```python
164
+ from lightning.pytorch.callbacks import Callback
165
+
166
+ class PrintingCallback(Callback):
167
+ def on_train_start(self, trainer, pl_module):
168
+ print("Training is starting!")
169
+
170
+ def on_train_end(self, trainer, pl_module):
171
+ print("Training is done!")
172
+
173
+ def on_epoch_end(self, trainer, pl_module):
174
+ print(f"Epoch {trainer.current_epoch} ended")
175
+
176
+ # Use it
177
+ trainer = L.Trainer(callbacks=[PrintingCallback()])
178
+ ```
179
+
180
+ ### Advanced Custom Callback
181
+
182
+ ```python
183
+ class MetricsCallback(Callback):
184
+ """Logs custom metrics every N batches."""
185
+
186
+ def __init__(self, log_every_n_batches=100):
187
+ self.log_every_n_batches = log_every_n_batches
188
+ self.metrics = []
189
+
190
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
191
+ if batch_idx % self.log_every_n_batches == 0:
192
+ # Compute custom metric
193
+ metric = self.compute_metric(outputs)
194
+ self.metrics.append(metric)
195
+
196
+ # Log to Lightning
197
+ pl_module.log('custom_metric', metric)
198
+
199
+ def compute_metric(self, outputs):
200
+ # Your custom logic
201
+ return outputs['loss'].item()
202
+
203
+ def state_dict(self):
204
+ """Save callback state in checkpoint."""
205
+ return {'metrics': self.metrics}
206
+
207
+ def load_state_dict(self, state_dict):
208
+ """Restore callback state from checkpoint."""
209
+ self.metrics = state_dict['metrics']
210
+ ```
211
+
212
+ ### Gradient Monitoring Callback
213
+
214
+ ```python
215
+ class GradientMonitorCallback(Callback):
216
+ """Monitor gradient norms."""
217
+
218
+ def on_after_backward(self, trainer, pl_module):
219
+ # Compute gradient norm
220
+ total_norm = 0.0
221
+ for p in pl_module.parameters():
222
+ if p.grad is not None:
223
+ param_norm = p.grad.data.norm(2)
224
+ total_norm += param_norm.item() ** 2
225
+ total_norm = total_norm ** 0.5
226
+
227
+ # Log
228
+ pl_module.log('grad_norm', total_norm)
229
+
230
+ # Warn if exploding
231
+ if total_norm > 100:
232
+ print(f"Warning: Large gradient norm: {total_norm:.2f}")
233
+ ```
234
+
235
+ ### Model Inspection Callback
236
+
237
+ ```python
238
+ class ModelInspectionCallback(Callback):
239
+ """Inspect model activations during training."""
240
+
241
+ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
242
+ if batch_idx == 0: # First batch of epoch
243
+ # Register hooks
244
+ self.activations = {}
245
+
246
+ def get_activation(name):
247
+ def hook(model, input, output):
248
+ self.activations[name] = output.detach()
249
+ return hook
250
+
251
+ # Attach to specific layers
252
+ pl_module.model.layer1.register_forward_hook(get_activation('layer1'))
253
+ pl_module.model.layer2.register_forward_hook(get_activation('layer2'))
254
+
255
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
256
+ if batch_idx == 0:
257
+ # Log activation statistics
258
+ for name, activation in self.activations.items():
259
+ mean = activation.mean().item()
260
+ std = activation.std().item()
261
+ pl_module.log(f'{name}_mean', mean)
262
+ pl_module.log(f'{name}_std', std)
263
+ ```
264
+
265
+ ## Callback Hooks
266
+
267
+ **All available hooks**:
268
+
269
+ ```python
270
+ class MyCallback(Callback):
271
+ # Setup/Teardown
272
+ def setup(self, trainer, pl_module, stage):
273
+ """Called at beginning of fit/test/predict."""
274
+ pass
275
+
276
+ def teardown(self, trainer, pl_module, stage):
277
+ """Called at end of fit/test/predict."""
278
+ pass
279
+
280
+ # Training
281
+ def on_train_start(self, trainer, pl_module):
282
+ pass
283
+
284
+ def on_train_epoch_start(self, trainer, pl_module):
285
+ pass
286
+
287
+ def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
288
+ pass
289
+
290
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
291
+ pass
292
+
293
+ def on_train_epoch_end(self, trainer, pl_module):
294
+ pass
295
+
296
+ def on_train_end(self, trainer, pl_module):
297
+ pass
298
+
299
+ # Validation
300
+ def on_validation_start(self, trainer, pl_module):
301
+ pass
302
+
303
+ def on_validation_epoch_start(self, trainer, pl_module):
304
+ pass
305
+
306
+ def on_validation_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
307
+ pass
308
+
309
+ def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
310
+ pass
311
+
312
+ def on_validation_epoch_end(self, trainer, pl_module):
313
+ pass
314
+
315
+ def on_validation_end(self, trainer, pl_module):
316
+ pass
317
+
318
+ # Test (same structure as validation)
319
+ def on_test_start(self, trainer, pl_module):
320
+ pass
321
+ # ... (test_epoch_start, test_batch_start, etc.)
322
+
323
+ # Predict
324
+ def on_predict_start(self, trainer, pl_module):
325
+ pass
326
+ # ... (predict_epoch_start, predict_batch_start, etc.)
327
+
328
+ # Backward
329
+ def on_before_backward(self, trainer, pl_module, loss):
330
+ pass
331
+
332
+ def on_after_backward(self, trainer, pl_module):
333
+ pass
334
+
335
+ # Optimizer
336
+ def on_before_optimizer_step(self, trainer, pl_module, optimizer):
337
+ pass
338
+
339
+ # Checkpointing
340
+ def on_save_checkpoint(self, trainer, pl_module, checkpoint):
341
+ """Add data to checkpoint."""
342
+ pass
343
+
344
+ def on_load_checkpoint(self, trainer, pl_module, checkpoint):
345
+ """Restore data from checkpoint."""
346
+ pass
347
+ ```
348
+
349
+ ## Combining Multiple Callbacks
350
+
351
+ ```python
352
+ from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
353
+
354
+ # Create all callbacks
355
+ checkpoint = ModelCheckpoint(monitor='val_loss', mode='min', save_top_k=3)
356
+ early_stop = EarlyStopping(monitor='val_loss', patience=5)
357
+ lr_monitor = LearningRateMonitor(logging_interval='epoch')
358
+ custom_callback = MyCustomCallback()
359
+
360
+ # Add all to Trainer
361
+ trainer = L.Trainer(
362
+ callbacks=[checkpoint, early_stop, lr_monitor, custom_callback]
363
+ )
364
+
365
+ trainer.fit(model, train_loader, val_loader)
366
+ ```
367
+
368
+ **Execution order**: Callbacks execute in the order they're added
369
+
370
+ ## Best Practices
371
+
372
+ ### 1. Keep Callbacks Independent
373
+
374
+ **Bad** (dependent on other callback):
375
+ ```python
376
+ class BadCallback(Callback):
377
+ def on_train_end(self, trainer, pl_module):
378
+ # Assumes ModelCheckpoint is present
379
+ best_path = trainer.checkpoint_callback.best_model_path # Fragile!
380
+ ```
381
+
382
+ **Good** (self-contained):
383
+ ```python
384
+ class GoodCallback(Callback):
385
+ def on_train_end(self, trainer, pl_module):
386
+ # Find checkpoint callback if present
387
+ for callback in trainer.callbacks:
388
+ if isinstance(callback, ModelCheckpoint):
389
+ best_path = callback.best_model_path
390
+ break
391
+ ```
392
+
393
+ ### 2. Use State Dict for Persistence
394
+
395
+ ```python
396
+ class StatefulCallback(Callback):
397
+ def __init__(self):
398
+ self.counter = 0
399
+ self.history = []
400
+
401
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
402
+ self.counter += 1
403
+ self.history.append(outputs['loss'].item())
404
+
405
+ def state_dict(self):
406
+ """Save state."""
407
+ return {
408
+ 'counter': self.counter,
409
+ 'history': self.history
410
+ }
411
+
412
+ def load_state_dict(self, state_dict):
413
+ """Restore state."""
414
+ self.counter = state_dict['counter']
415
+ self.history = state_dict['history']
416
+ ```
417
+
418
+ ### 3. Handle Distributed Training
419
+
420
+ ```python
421
+ class DistributedCallback(Callback):
422
+ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
423
+ # Only run on main process
424
+ if trainer.is_global_zero:
425
+ print("This only prints once in distributed training")
426
+
427
+ # Run on all processes
428
+ loss = outputs['loss']
429
+ # ... do something with loss on each GPU
430
+ ```
431
+
432
+ ## Resources
433
+
434
+ - Callback API: https://lightning.ai/docs/pytorch/stable/extensions/callbacks.html
435
+ - Built-in callbacks: https://lightning.ai/docs/pytorch/stable/api_references.html#callbacks
436
+ - Examples: https://github.com/Lightning-AI/pytorch-lightning/tree/master/examples/callbacks