@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,564 @@
1
+ # NanoGPT Training Guide
2
+
3
+ ## Training Loop (~300 Lines)
4
+
5
+ NanoGPT's `train.py` is a self-contained training script with minimal dependencies.
6
+
7
+ ### Complete Training Script Structure
8
+
9
+ ```python
10
+ # train.py (simplified)
11
+ import os
12
+ import time
13
+ import math
14
+ import pickle
15
+ import torch
16
+ from model import GPTConfig, GPT
17
+
18
+ # Training config
19
+ batch_size = 12 # Micro batch size
20
+ block_size = 1024 # Context length
21
+ gradient_accumulation_steps = 5 * 8 # ~60K tokens per batch
22
+
23
+ # Model config
24
+ n_layer = 12
25
+ n_head = 12
26
+ n_embd = 768
27
+ dropout = 0.0
28
+
29
+ # Optimizer config
30
+ learning_rate = 6e-4
31
+ max_iters = 600000
32
+ weight_decay = 1e-1
33
+ beta1 = 0.9
34
+ beta2 = 0.95
35
+ grad_clip = 1.0
36
+
37
+ # Learning rate schedule
38
+ warmup_iters = 2000
39
+ lr_decay_iters = 600000
40
+ min_lr = 6e-5
41
+
42
+ # System
43
+ device = 'cuda'
44
+ dtype = 'bfloat16' if torch.cuda.is_bf16_supported() else 'float16'
45
+ compile = True # PyTorch 2.0
46
+
47
+ # Data loader
48
+ def get_batch(split):
49
+ data = train_data if split == 'train' else val_data
50
+ ix = torch.randint(len(data) - block_size, (batch_size,))
51
+ x = torch.stack([data[i:i+block_size] for i in ix])
52
+ y = torch.stack([data[i+1:i+1+block_size] for i in ix])
53
+ x, y = x.to(device), y.to(device)
54
+ return x, y
55
+
56
+ # Learning rate schedule
57
+ def get_lr(it):
58
+ # Warmup
59
+ if it < warmup_iters:
60
+ return learning_rate * it / warmup_iters
61
+ # Decay to min_lr
62
+ if it > lr_decay_iters:
63
+ return min_lr
64
+ # Cosine decay
65
+ decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
66
+ coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))
67
+ return min_lr + coeff * (learning_rate - min_lr)
68
+
69
+ # Init model
70
+ model = GPT(GPTConfig())
71
+ model.to(device)
72
+
73
+ # Compile model (PyTorch 2.0)
74
+ if compile:
75
+ print("Compiling model...")
76
+ model = torch.compile(model)
77
+
78
+ # Optimizer
79
+ optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device)
80
+
81
+ # Training loop
82
+ for iter_num in range(max_iters):
83
+ # Set learning rate
84
+ lr = get_lr(iter_num)
85
+ for param_group in optimizer.param_groups:
86
+ param_group['lr'] = lr
87
+
88
+ # Gradient accumulation
89
+ for micro_step in range(gradient_accumulation_steps):
90
+ X, Y = get_batch('train')
91
+ with torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16):
92
+ logits, loss = model(X, Y)
93
+ loss = loss / gradient_accumulation_steps
94
+ loss.backward()
95
+
96
+ # Clip gradients
97
+ if grad_clip != 0.0:
98
+ torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
99
+
100
+ # Update weights
101
+ optimizer.step()
102
+ optimizer.zero_grad(set_to_none=True)
103
+
104
+ # Logging
105
+ if iter_num % 100 == 0:
106
+ print(f"iter {iter_num}: loss {loss.item():.4f}, lr {lr:.2e}")
107
+ ```
108
+
109
+ ## Data Preparation
110
+
111
+ ### Shakespeare Character-Level
112
+
113
+ ```bash
114
+ # Step 1: Download Shakespeare
115
+ cd data/shakespeare_char
116
+ python prepare.py
117
+
118
+ # Creates:
119
+ # - train.bin (90% of data, ~1MB)
120
+ # - val.bin (10% of data, ~110KB)
121
+ # - meta.pkl (vocab info)
122
+ ```
123
+
124
+ **prepare.py**:
125
+ ```python
126
+ import os
127
+ import pickle
128
+ import requests
129
+ import numpy as np
130
+
131
+ # Download
132
+ input_file = 'input.txt'
133
+ if not os.path.exists(input_file):
134
+ url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
135
+ with open(input_file, 'w') as f:
136
+ f.write(requests.get(url).text)
137
+
138
+ # Read and process
139
+ with open(input_file, 'r') as f:
140
+ data = f.read()
141
+
142
+ print(f"Length: {len(data):,} characters")
143
+
144
+ # Create vocabulary
145
+ chars = sorted(list(set(data)))
146
+ vocab_size = len(chars)
147
+ print(f"Vocab size: {vocab_size}")
148
+
149
+ # Create mappings
150
+ stoi = {ch: i for i, ch in enumerate(chars)}
151
+ itos = {i: ch for i, ch in enumerate(chars)}
152
+
153
+ # Encode dataset
154
+ data_ids = [stoi[c] for c in data]
155
+
156
+ # Train/val split
157
+ n = len(data_ids)
158
+ train_ids = data_ids[:int(n*0.9)]
159
+ val_ids = data_ids[int(n*0.9):]
160
+
161
+ # Save as numpy arrays
162
+ train_ids = np.array(train_ids, dtype=np.uint16)
163
+ val_ids = np.array(val_ids, dtype=np.uint16)
164
+ train_ids.tofile('train.bin')
165
+ val_ids.tofile('val.bin')
166
+
167
+ # Save metadata
168
+ meta = {'vocab_size': vocab_size, 'itos': itos, 'stoi': stoi}
169
+ with open('meta.pkl', 'wb') as f:
170
+ pickle.dump(meta, f)
171
+ ```
172
+
173
+ ### OpenWebText (GPT-2 Reproduction)
174
+
175
+ ```bash
176
+ # Step 1: Download OpenWebText (~12GB compressed)
177
+ cd data/openwebtext
178
+ python prepare.py
179
+
180
+ # Warning: Takes 1-2 hours, creates ~54GB of tokenized data
181
+ ```
182
+
183
+ **prepare.py**:
184
+ ```python
185
+ import os
186
+ import numpy as np
187
+ import tiktoken
188
+ from datasets import load_dataset
189
+
190
+ # Download dataset
191
+ dataset = load_dataset("openwebtext", num_proc=8)
192
+
193
+ # Use GPT-2 tokenizer
194
+ enc = tiktoken.get_encoding("gpt2")
195
+
196
+ def tokenize(example):
197
+ ids = enc.encode_ordinary(example['text'])
198
+ ids.append(enc.eot_token) # Add <|endoftext|>
199
+ return {'ids': ids, 'len': len(ids)}
200
+
201
+ # Tokenize (parallel)
202
+ tokenized = dataset.map(
203
+ tokenize,
204
+ remove_columns=['text'],
205
+ desc="Tokenizing",
206
+ num_proc=8
207
+ )
208
+
209
+ # Concatenate all tokens
210
+ train_ids = np.concatenate([np.array(x['ids'], dtype=np.uint16) for x in tokenized['train']])
211
+ print(f"Train tokens: {len(train_ids):,}") # ~9B tokens
212
+
213
+ # Save
214
+ train_ids.tofile('train.bin')
215
+
216
+ # Validation set (sample)
217
+ val_ids = np.concatenate([np.array(x['ids'], dtype=np.uint16) for x in tokenized['train'][:5000]])
218
+ val_ids.tofile('val.bin')
219
+
220
+ # Save metadata
221
+ meta = {'vocab_size': enc.n_vocab, 'eot_token': enc.eot_token}
222
+ with open('meta.pkl', 'wb') as f:
223
+ pickle.dump(meta, f)
224
+ ```
225
+
226
+ ## Learning Rate Schedules
227
+
228
+ ### Cosine Decay with Warmup (GPT-2 style)
229
+
230
+ ```python
231
+ def get_lr(it):
232
+ # 1) Linear warmup
233
+ if it < warmup_iters:
234
+ return learning_rate * it / warmup_iters
235
+
236
+ # 2) Constant at min_lr after decay
237
+ if it > lr_decay_iters:
238
+ return min_lr
239
+
240
+ # 3) Cosine decay in between
241
+ decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
242
+ coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))
243
+ return min_lr + coeff * (learning_rate - min_lr)
244
+
245
+ # Example values
246
+ learning_rate = 6e-4 # Peak LR
247
+ min_lr = 6e-5 # Final LR (10% of peak)
248
+ warmup_iters = 2000 # Warmup steps
249
+ lr_decay_iters = 600000 # Total training steps
250
+ ```
251
+
252
+ **Visualization**:
253
+ ```
254
+ LR
255
+ ^
256
+ | Peak (6e-4)
257
+ | /‾‾‾‾‾‾‾‾‾‾\
258
+ | / \
259
+ | / \_____ Min (6e-5)
260
+ | /
261
+ |/________________> Iteration
262
+ Warmup Cosine Const
263
+ (2K) (598K)
264
+ ```
265
+
266
+ ### Constant LR with Warmup (Simple)
267
+
268
+ ```python
269
+ def get_lr(it):
270
+ if it < warmup_iters:
271
+ return learning_rate * it / warmup_iters
272
+ return learning_rate
273
+
274
+ # Good for small experiments
275
+ ```
276
+
277
+ ## Gradient Accumulation
278
+
279
+ **Effective batch size** = `batch_size × gradient_accumulation_steps × num_gpus`
280
+
281
+ ```python
282
+ # Config
283
+ batch_size = 12 # Per-GPU micro batch
284
+ gradient_accumulation_steps = 40 # Accumulate gradients
285
+ # Effective batch: 12 × 40 = 480 sequences = ~0.5M tokens
286
+
287
+ # Training loop
288
+ optimizer.zero_grad()
289
+ for micro_step in range(gradient_accumulation_steps):
290
+ X, Y = get_batch('train')
291
+ logits, loss = model(X, Y)
292
+ loss = loss / gradient_accumulation_steps # Scale loss
293
+ loss.backward() # Accumulate gradients
294
+
295
+ # Update once
296
+ torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
297
+ optimizer.step()
298
+ ```
299
+
300
+ **Why?**
301
+ - Simulates large batch size without OOM
302
+ - GPT-2 (124M) uses effective batch ~0.5M tokens
303
+ - More stable training
304
+
305
+ ## Mixed Precision Training
306
+
307
+ ### BF16 (Best for A100/H100)
308
+
309
+ ```python
310
+ # Enable bfloat16
311
+ dtype = torch.bfloat16
312
+
313
+ # Training loop
314
+ for iter in range(max_iters):
315
+ X, Y = get_batch('train')
316
+
317
+ # Forward in BF16
318
+ with torch.amp.autocast(device_type='cuda', dtype=torch.bfloat16):
319
+ logits, loss = model(X, Y)
320
+
321
+ # Backward in FP32 (automatic)
322
+ loss.backward()
323
+ optimizer.step()
324
+ ```
325
+
326
+ **Advantages**:
327
+ - No gradient scaler needed
328
+ - Same dynamic range as FP32
329
+ - 2× faster, 50% memory reduction
330
+
331
+ ### FP16 (V100, older GPUs)
332
+
333
+ ```python
334
+ from torch.cuda.amp import GradScaler, autocast
335
+
336
+ scaler = GradScaler()
337
+
338
+ for iter in range(max_iters):
339
+ X, Y = get_batch('train')
340
+
341
+ # Forward in FP16
342
+ with autocast():
343
+ logits, loss = model(X, Y)
344
+
345
+ # Scale loss, backward
346
+ scaler.scale(loss).backward()
347
+
348
+ # Unscale, clip gradients
349
+ scaler.unscale_(optimizer)
350
+ torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
351
+
352
+ # Update weights
353
+ scaler.step(optimizer)
354
+ scaler.update()
355
+ ```
356
+
357
+ ## Distributed Data Parallel (DDP)
358
+
359
+ ### Single Node, Multiple GPUs
360
+
361
+ ```python
362
+ # train.py (DDP version)
363
+ import torch.distributed as dist
364
+ from torch.nn.parallel import DistributedDataParallel as DDP
365
+
366
+ # Initialize
367
+ dist.init_process_group(backend='nccl')
368
+ ddp_rank = int(os.environ['RANK'])
369
+ ddp_local_rank = int(os.environ['LOCAL_RANK'])
370
+ ddp_world_size = int(os.environ['WORLD_SIZE'])
371
+ device = f'cuda:{ddp_local_rank}'
372
+ torch.cuda.set_device(device)
373
+
374
+ # Model
375
+ model = GPT(GPTConfig())
376
+ model.to(device)
377
+ model = DDP(model, device_ids=[ddp_local_rank])
378
+
379
+ # Training loop (same as before, DDP handles gradient sync)
380
+ for iter in range(max_iters):
381
+ X, Y = get_batch('train') # Each rank gets different data
382
+ logits, loss = model(X, Y)
383
+ loss.backward() # DDP syncs gradients across GPUs
384
+ optimizer.step()
385
+ ```
386
+
387
+ **Launch**:
388
+ ```bash
389
+ # 8 GPUs on single node
390
+ torchrun --standalone --nproc_per_node=8 train.py config/train_gpt2.py
391
+ ```
392
+
393
+ ### Multi-Node Training
394
+
395
+ ```bash
396
+ # Node 0 (master)
397
+ torchrun --nproc_per_node=8 \
398
+ --nnodes=4 --node_rank=0 \
399
+ --master_addr=192.168.1.100 --master_port=29500 \
400
+ train.py config/train_gpt2.py
401
+
402
+ # Node 1-3 (workers)
403
+ torchrun --nproc_per_node=8 \
404
+ --nnodes=4 --node_rank=$RANK \
405
+ --master_addr=192.168.1.100 --master_port=29500 \
406
+ train.py config/train_gpt2.py
407
+ ```
408
+
409
+ ## Checkpointing
410
+
411
+ ### Save Checkpoint
412
+
413
+ ```python
414
+ # Save every N iterations
415
+ if iter_num % 5000 == 0:
416
+ checkpoint = {
417
+ 'model': model.state_dict(),
418
+ 'optimizer': optimizer.state_dict(),
419
+ 'model_args': model_args,
420
+ 'iter_num': iter_num,
421
+ 'best_val_loss': best_val_loss,
422
+ 'config': config,
423
+ }
424
+ torch.save(checkpoint, os.path.join(out_dir, f'ckpt_{iter_num}.pt'))
425
+ ```
426
+
427
+ ### Resume from Checkpoint
428
+
429
+ ```python
430
+ # Load checkpoint
431
+ init_from = 'resume' # or 'gpt2', 'gpt2-medium', etc.
432
+
433
+ if init_from == 'resume':
434
+ ckpt_path = os.path.join(out_dir, 'ckpt_latest.pt')
435
+ checkpoint = torch.load(ckpt_path, map_location=device)
436
+
437
+ # Restore model
438
+ model_args = checkpoint['model_args']
439
+ model = GPT(GPTConfig(**model_args))
440
+ model.load_state_dict(checkpoint['model'])
441
+
442
+ # Restore optimizer
443
+ optimizer.load_state_dict(checkpoint['optimizer'])
444
+
445
+ # Restore iteration counter
446
+ iter_num = checkpoint['iter_num']
447
+ best_val_loss = checkpoint['best_val_loss']
448
+ ```
449
+
450
+ ## Fine-Tuning Pretrained Models
451
+
452
+ ### Load OpenAI GPT-2 Weights
453
+
454
+ ```python
455
+ # model.py - from_pretrained method
456
+ @classmethod
457
+ def from_pretrained(cls, model_type):
458
+ """Load pretrained GPT-2 model weights from HuggingFace."""
459
+ from transformers import GPT2LMHeadModel
460
+
461
+ # Download from HuggingFace
462
+ model_hf = GPT2LMHeadModel.from_pretrained(model_type)
463
+ sd_hf = model_hf.state_dict()
464
+
465
+ # Filter out keys we don't need
466
+ sd_hf_keys = [k for k in sd_hf.keys() if not k.endswith('.attn.masked_bias')]
467
+ sd_hf_keys = [k for k in sd_hf_keys if not k.endswith('.attn.bias')]
468
+
469
+ # Create our model
470
+ config = GPTConfig.from_model_type(model_type)
471
+ model = GPT(config)
472
+ sd = model.state_dict()
473
+
474
+ # Copy weights (transpose Conv1D → Linear)
475
+ for k in sd_hf_keys:
476
+ if any([k.endswith(w) for w in ['.c_attn.weight', '.c_proj.weight', '.c_fc.weight']]):
477
+ sd[k] = sd_hf[k].t() # Transpose
478
+ else:
479
+ sd[k] = sd_hf[k] # Direct copy
480
+
481
+ model.load_state_dict(sd)
482
+ return model
483
+
484
+ # Usage
485
+ model = GPT.from_pretrained('gpt2') # Load GPT-2 (124M)
486
+ ```
487
+
488
+ ### Fine-Tune on Custom Data
489
+
490
+ ```python
491
+ # config/finetune_shakespeare.py
492
+ init_from = 'gpt2' # Start from GPT-2
493
+ dataset = 'shakespeare_char'
494
+
495
+ # Fine-tuning hyperparameters
496
+ learning_rate = 3e-5 # Lower LR for fine-tuning
497
+ max_iters = 2000 # Short fine-tuning
498
+ warmup_iters = 100
499
+
500
+ # Regularization
501
+ weight_decay = 1e-1
502
+ dropout = 0.2 # Add dropout
503
+
504
+ # Run
505
+ # python train.py config/finetune_shakespeare.py
506
+ ```
507
+
508
+ ## Evaluation
509
+
510
+ ### Perplexity
511
+
512
+ ```python
513
+ @torch.no_grad()
514
+ def estimate_loss():
515
+ model.eval()
516
+ losses = torch.zeros(eval_iters)
517
+
518
+ for k in range(eval_iters):
519
+ X, Y = get_batch('val')
520
+ logits, loss = model(X, Y)
521
+ losses[k] = loss.item()
522
+
523
+ model.train()
524
+ return losses.mean()
525
+
526
+ # Usage
527
+ val_loss = estimate_loss()
528
+ perplexity = math.exp(val_loss)
529
+ print(f"Val perplexity: {perplexity:.2f}")
530
+ ```
531
+
532
+ ### Sample Generation
533
+
534
+ ```python
535
+ # sample.py
536
+ model.eval()
537
+
538
+ start = "ROMEO:" # Prompt
539
+ start_ids = encode(start)
540
+ x = torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...]
541
+
542
+ # Generate
543
+ with torch.no_grad():
544
+ y = model.generate(x, max_new_tokens=500, temperature=0.8, top_k=200)
545
+
546
+ print(decode(y[0].tolist()))
547
+ ```
548
+
549
+ ## Training Times
550
+
551
+ | Setup | Model | Hardware | Batch Size | Time to Perplexity 10 |
552
+ |-------|-------|----------|------------|----------------------|
553
+ | Shakespeare | 10M | 1× CPU | 64 | 5 minutes |
554
+ | Shakespeare | 10M | 1× T4 GPU | 64 | 1 minute |
555
+ | OpenWebText | 124M | 1× A100 | 480 | 7 days |
556
+ | OpenWebText | 124M | 8× A100 | 3840 | 4 days |
557
+ | OpenWebText | 350M | 8× A100 | 1920 | 14 days |
558
+
559
+ ## Resources
560
+
561
+ - Training script: https://github.com/karpathy/nanoGPT/blob/master/train.py
562
+ - Configs: https://github.com/karpathy/nanoGPT/tree/master/config
563
+ - Video walkthrough: "Let's build GPT" (training section)
564
+ - GPT-2 paper: https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf