@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,568 @@
1
+ # Custom Models
2
+
3
+ Guide to implementing custom model architectures in LitGPT.
4
+
5
+ ## Overview
6
+
7
+ LitGPT's clean, single-file implementations make it easy to create custom architectures. You can extend the base `GPT` class or create entirely new models.
8
+
9
+ **Use cases**:
10
+ - Implementing new research architectures
11
+ - Adapting models for specific domains
12
+ - Experimenting with attention mechanisms
13
+ - Adding custom layers or components
14
+
15
+ ## Key Files and Classes
16
+
17
+ ### Core Architecture (`litgpt/model.py`)
18
+
19
+ **Main classes**:
20
+ - `GPT`: Top-level model class
21
+ - `Block`: Transformer block (attention + MLP)
22
+ - `CausalSelfAttention`: Attention mechanism
23
+ - `MLP`: Feed-forward network
24
+ - `RMSNorm` / `LayerNorm`: Normalization layers
25
+
26
+ **Configuration** (`litgpt/config.py`):
27
+ - `Config`: Base configuration dataclass
28
+ - Model-specific configs: `LlamaConfig`, `MistralConfig`, `PhiConfig`, etc.
29
+
30
+ ## Custom Architecture Workflow
31
+
32
+ ### Step 1: Define Configuration
33
+
34
+ Create a `Config` dataclass with your model's hyperparameters:
35
+
36
+ ```python
37
+ from dataclasses import dataclass
38
+ from litgpt.config import Config
39
+
40
+ @dataclass
41
+ class MyModelConfig(Config):
42
+ """Configuration for my custom model."""
43
+ # Standard parameters
44
+ name: str = "my-model-7b"
45
+ block_size: int = 4096
46
+ vocab_size: int = 32000
47
+ n_layer: int = 32
48
+ n_head: int = 32
49
+ n_embd: int = 4096
50
+
51
+ # Custom parameters
52
+ custom_param: float = 0.1
53
+ use_custom_attention: bool = True
54
+
55
+ # Optional: override defaults
56
+ rope_base: int = 10000
57
+ intermediate_size: int = 11008
58
+ ```
59
+
60
+ ### Step 2: Implement Custom Components
61
+
62
+ #### Option A: Custom Attention
63
+
64
+ ```python
65
+ from litgpt.model import CausalSelfAttention
66
+ import torch
67
+ import torch.nn as nn
68
+
69
+ class CustomAttention(CausalSelfAttention):
70
+ """Custom attention mechanism."""
71
+
72
+ def __init__(self, config):
73
+ super().__init__(config)
74
+ # Add custom components
75
+ self.custom_proj = nn.Linear(config.n_embd, config.n_embd)
76
+ self.custom_param = config.custom_param
77
+
78
+ def forward(self, x, mask=None, input_pos=None):
79
+ B, T, C = x.size()
80
+
81
+ # Standard Q, K, V projections
82
+ q = self.attn(x)
83
+ k = self.attn(x)
84
+ v = self.attn(x)
85
+
86
+ # Custom modification
87
+ q = q + self.custom_proj(x) * self.custom_param
88
+
89
+ # Rest of attention computation
90
+ q = q.view(B, T, self.n_head, self.head_size)
91
+ k = k.view(B, T, self.n_query_groups, self.head_size)
92
+ v = v.view(B, T, self.n_query_groups, self.head_size)
93
+
94
+ # Scaled dot-product attention
95
+ y = self.scaled_dot_product_attention(q, k, v, mask=mask)
96
+
97
+ y = y.reshape(B, T, C)
98
+ return self.proj(y)
99
+ ```
100
+
101
+ #### Option B: Custom MLP
102
+
103
+ ```python
104
+ from litgpt.model import MLP
105
+
106
+ class CustomMLP(MLP):
107
+ """Custom feed-forward network."""
108
+
109
+ def __init__(self, config):
110
+ super().__init__(config)
111
+ # Add custom layers
112
+ self.custom_layer = nn.Linear(config.intermediate_size, config.intermediate_size)
113
+
114
+ def forward(self, x):
115
+ x = self.fc_1(x)
116
+ x = self.act(x)
117
+ x = self.custom_layer(x) # Custom modification
118
+ x = self.fc_2(x)
119
+ return x
120
+ ```
121
+
122
+ #### Option C: Custom Block
123
+
124
+ ```python
125
+ from litgpt.model import Block
126
+
127
+ class CustomBlock(Block):
128
+ """Custom transformer block."""
129
+
130
+ def __init__(self, config):
131
+ super().__init__(config)
132
+ # Replace attention or MLP
133
+ self.attn = CustomAttention(config)
134
+ # Or: self.mlp = CustomMLP(config)
135
+
136
+ # Add custom components
137
+ self.custom_norm = nn.LayerNorm(config.n_embd)
138
+
139
+ def forward(self, x, input_pos=None, mask=None):
140
+ # Custom forward pass
141
+ h = self.norm_1(x)
142
+ h = self.attn(h, mask=mask, input_pos=input_pos)
143
+ x = x + h
144
+
145
+ # Custom normalization
146
+ x = x + self.custom_norm(x)
147
+
148
+ x = x + self.mlp(self.norm_2(x))
149
+ return x
150
+ ```
151
+
152
+ ### Step 3: Create Custom GPT Model
153
+
154
+ ```python
155
+ from litgpt.model import GPT
156
+ import torch.nn as nn
157
+
158
+ class CustomGPT(GPT):
159
+ """Custom GPT model."""
160
+
161
+ def __init__(self, config: MyModelConfig):
162
+ # Don't call super().__init__() - we reimplement
163
+ nn.Module.__init__(self)
164
+ self.config = config
165
+
166
+ # Standard components
167
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
168
+ self.transformer = nn.ModuleDict(
169
+ dict(
170
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
171
+ h=nn.ModuleList(CustomBlock(config) for _ in range(config.n_layer)),
172
+ ln_f=nn.LayerNorm(config.n_embd),
173
+ )
174
+ )
175
+
176
+ # Custom components
177
+ if config.use_custom_attention:
178
+ self.custom_embedding = nn.Linear(config.n_embd, config.n_embd)
179
+
180
+ # Initialize weights
181
+ self.apply(self._init_weights)
182
+
183
+ def _init_weights(self, module):
184
+ """Initialize weights (required)."""
185
+ if isinstance(module, nn.Linear):
186
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
187
+ if module.bias is not None:
188
+ torch.nn.init.zeros_(module.bias)
189
+ elif isinstance(module, nn.Embedding):
190
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
191
+
192
+ def forward(self, idx, input_pos=None):
193
+ """Forward pass (must match base signature)."""
194
+ B, T = idx.size()
195
+
196
+ # Token embeddings
197
+ x = self.transformer.wte(idx)
198
+
199
+ # Custom embedding modification
200
+ if self.config.use_custom_attention:
201
+ x = x + self.custom_embedding(x)
202
+
203
+ # Transformer blocks
204
+ for block in self.transformer.h:
205
+ x = block(x, input_pos=input_pos)
206
+
207
+ # Final norm + LM head
208
+ x = self.transformer.ln_f(x)
209
+ return self.lm_head(x)
210
+ ```
211
+
212
+ ### Step 4: Register Configuration
213
+
214
+ Add your config to `litgpt/config.py`:
215
+
216
+ ```python
217
+ # In litgpt/config.py
218
+ configs = [
219
+ # ... existing configs ...
220
+
221
+ # My custom model
222
+ dict(
223
+ name="my-model-7b",
224
+ hf_config=dict(org="myorg", name="my-model-7b"),
225
+ block_size=4096,
226
+ vocab_size=32000,
227
+ n_layer=32,
228
+ n_head=32,
229
+ n_embd=4096,
230
+ custom_param=0.1,
231
+ ),
232
+ ]
233
+ ```
234
+
235
+ ### Step 5: Use Your Custom Model
236
+
237
+ ```python
238
+ from litgpt.api import LLM
239
+ from my_model import CustomGPT, MyModelConfig
240
+
241
+ # Initialize
242
+ config = MyModelConfig()
243
+ model = CustomGPT(config)
244
+
245
+ # Wrap with LLM API
246
+ llm = LLM(model=model, tokenizer_dir="path/to/tokenizer")
247
+
248
+ # Generate
249
+ result = llm.generate("Once upon a time", max_new_tokens=100)
250
+ print(result)
251
+ ```
252
+
253
+ ## Real Example: Adapter Fine-tuning
254
+
255
+ LitGPT's `Adapter` implementation shows a complete custom architecture:
256
+
257
+ ### Adapter Configuration
258
+
259
+ ```python
260
+ @dataclass
261
+ class Config(BaseConfig):
262
+ """Adds adapter-specific parameters."""
263
+ adapter_prompt_length: int = 10
264
+ adapter_start_layer: int = 2
265
+ ```
266
+
267
+ ### Adapter GPT Model
268
+
269
+ ```python
270
+ class GPT(BaseModel):
271
+ """GPT model with adapter layers."""
272
+
273
+ def __init__(self, config: Config):
274
+ nn.Module.__init__(self)
275
+ self.config = config
276
+
277
+ # Standard components
278
+ self.lm_head = nn.Linear(config.n_embd, config.padded_vocab_size, bias=False)
279
+ self.transformer = nn.ModuleDict(
280
+ dict(
281
+ wte=nn.Embedding(config.padded_vocab_size, config.n_embd),
282
+ h=nn.ModuleList(Block(config, i) for i in range(config.n_layer)),
283
+ ln_f=config.norm_class(config.n_embd, eps=config.norm_eps),
284
+ )
285
+ )
286
+
287
+ # Adapter-specific: gating factor
288
+ self.gating_factor = torch.nn.Parameter(torch.zeros(1))
289
+ ```
290
+
291
+ ### Adapter Block
292
+
293
+ ```python
294
+ class Block(BaseBlock):
295
+ """Transformer block with adapter."""
296
+
297
+ def __init__(self, config: Config, block_idx: int):
298
+ super().__init__()
299
+ self.norm_1 = config.norm_class(config.n_embd, eps=config.norm_eps)
300
+ self.attn = CausalSelfAttention(config, block_idx)
301
+ self.norm_2 = config.norm_class(config.n_embd, eps=config.norm_eps)
302
+ self.mlp = config.mlp_class(config)
303
+
304
+ # Adapter: add prefix for certain layers
305
+ self.adapter_wte = (
306
+ nn.Embedding(config.adapter_prompt_length, config.n_embd)
307
+ if block_idx >= config.adapter_start_layer
308
+ else None
309
+ )
310
+ ```
311
+
312
+ ### Adapter Attention
313
+
314
+ ```python
315
+ class CausalSelfAttention(BaseCausalSelfAttention):
316
+ """Attention with adapter prompts."""
317
+
318
+ def forward(self, x: torch.Tensor, ...) -> torch.Tensor:
319
+ B, T, C = x.size()
320
+
321
+ # Add adapter prefix if enabled
322
+ if self.adapter_wte is not None:
323
+ adapter_prompts = self.adapter_wte(
324
+ torch.arange(self.adapter_prompt_length, device=x.device)
325
+ )
326
+ adapter_prompts = adapter_prompts.unsqueeze(0).expand(B, -1, -1)
327
+ x = torch.cat([adapter_prompts, x], dim=1)
328
+
329
+ # Standard attention with gating
330
+ q, k, v = self.attn(x).split(self.n_embd, dim=2)
331
+ y = self.scaled_dot_product_attention(q, k, v, mask=mask)
332
+
333
+ # Apply gating factor
334
+ y = y * self.gating_factor
335
+
336
+ return self.proj(y)
337
+ ```
338
+
339
+ See full implementation: `litgpt/finetune/adapter.py`
340
+
341
+ ## Real Example: AdapterV2
342
+
343
+ AdapterV2 shows custom linear layers:
344
+
345
+ ### AdapterV2Linear
346
+
347
+ ```python
348
+ class AdapterV2Linear(torch.nn.Module):
349
+ """Linear layer with low-rank adapter."""
350
+
351
+ def __init__(self, in_features, out_features, adapter_rank=8, **kwargs):
352
+ super().__init__()
353
+ self.linear = torch.nn.Linear(in_features, out_features, **kwargs)
354
+
355
+ # Adapter: low-rank bottleneck
356
+ self.adapter_down = torch.nn.Linear(in_features, adapter_rank, bias=False)
357
+ self.adapter_up = torch.nn.Linear(adapter_rank, out_features, bias=False)
358
+
359
+ # Initialize adapter to identity
360
+ torch.nn.init.zeros_(self.adapter_up.weight)
361
+
362
+ def forward(self, x):
363
+ # Original linear transformation
364
+ out = self.linear(x)
365
+
366
+ # Add adapter contribution
367
+ adapter_out = self.adapter_up(self.adapter_down(x))
368
+ return out + adapter_out
369
+ ```
370
+
371
+ See full implementation: `litgpt/finetune/adapter_v2.py`
372
+
373
+ ## Custom Model Checklist
374
+
375
+ - [ ] Define `Config` dataclass with all hyperparameters
376
+ - [ ] Implement custom components (Attention, MLP, Block)
377
+ - [ ] Create custom `GPT` class
378
+ - [ ] Implement `_init_weights()` for proper initialization
379
+ - [ ] Implement `forward()` matching base signature
380
+ - [ ] Register configuration in `litgpt/config.py`
381
+ - [ ] Test with small model (100M params) first
382
+ - [ ] Verify training convergence
383
+ - [ ] Profile memory usage
384
+
385
+ ## Testing Your Custom Model
386
+
387
+ ### Unit Test
388
+
389
+ ```python
390
+ import torch
391
+ from my_model import CustomGPT, MyModelConfig
392
+
393
+ def test_custom_model():
394
+ """Test custom model forward pass."""
395
+ config = MyModelConfig(
396
+ n_layer=2,
397
+ n_head=4,
398
+ n_embd=128,
399
+ vocab_size=1000,
400
+ block_size=256,
401
+ )
402
+
403
+ model = CustomGPT(config)
404
+ model.eval()
405
+
406
+ # Test forward pass
407
+ batch_size = 2
408
+ seq_length = 16
409
+ idx = torch.randint(0, config.vocab_size, (batch_size, seq_length))
410
+
411
+ with torch.no_grad():
412
+ logits = model(idx)
413
+
414
+ assert logits.shape == (batch_size, seq_length, config.vocab_size)
415
+ print("✓ Forward pass works")
416
+
417
+ if __name__ == "__main__":
418
+ test_custom_model()
419
+ ```
420
+
421
+ ### Training Test
422
+
423
+ ```python
424
+ from litgpt.api import LLM
425
+
426
+ def test_training():
427
+ """Test custom model training."""
428
+ config = MyModelConfig(n_layer=2, n_head=4, n_embd=128)
429
+ model = CustomGPT(config)
430
+
431
+ # Small dataset for testing
432
+ data = [
433
+ {"instruction": "Test", "input": "", "output": "OK"}
434
+ ]
435
+
436
+ # Should run without errors
437
+ llm = LLM(model=model)
438
+ # ... training code ...
439
+ print("✓ Training works")
440
+ ```
441
+
442
+ ## Common Patterns
443
+
444
+ ### Adding New Attention Mechanism
445
+
446
+ ```python
447
+ class MyAttention(nn.Module):
448
+ """Template for custom attention."""
449
+
450
+ def __init__(self, config):
451
+ super().__init__()
452
+ self.n_head = config.n_head
453
+ self.n_embd = config.n_embd
454
+ self.head_size = self.n_embd // self.n_head
455
+
456
+ # Q, K, V projections
457
+ self.q_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
458
+ self.k_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
459
+ self.v_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
460
+
461
+ # Output projection
462
+ self.out_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
463
+
464
+ def forward(self, x, mask=None):
465
+ B, T, C = x.size()
466
+
467
+ # Project Q, K, V
468
+ q = self.q_proj(x).view(B, T, self.n_head, self.head_size)
469
+ k = self.k_proj(x).view(B, T, self.n_head, self.head_size)
470
+ v = self.v_proj(x).view(B, T, self.n_head, self.head_size)
471
+
472
+ # Custom attention computation here
473
+ # attn = custom_attention_function(q, k, v, mask)
474
+
475
+ # Output projection
476
+ out = self.out_proj(attn.reshape(B, T, C))
477
+ return out
478
+ ```
479
+
480
+ ### Adding Mixture of Experts
481
+
482
+ ```python
483
+ class MoELayer(nn.Module):
484
+ """Mixture of Experts layer."""
485
+
486
+ def __init__(self, config):
487
+ super().__init__()
488
+ self.num_experts = config.num_experts
489
+ self.top_k = config.moe_top_k
490
+
491
+ # Router
492
+ self.router = nn.Linear(config.n_embd, self.num_experts)
493
+
494
+ # Experts
495
+ self.experts = nn.ModuleList([
496
+ MLP(config) for _ in range(self.num_experts)
497
+ ])
498
+
499
+ def forward(self, x):
500
+ B, T, C = x.size()
501
+
502
+ # Route tokens to experts
503
+ router_logits = self.router(x) # (B, T, num_experts)
504
+ router_probs = torch.softmax(router_logits, dim=-1)
505
+
506
+ # Select top-k experts
507
+ top_k_probs, top_k_indices = torch.topk(router_probs, self.top_k, dim=-1)
508
+
509
+ # Process through selected experts
510
+ output = torch.zeros_like(x)
511
+ for i in range(self.top_k):
512
+ expert_idx = top_k_indices[:, :, i]
513
+ expert_prob = top_k_probs[:, :, i:i+1]
514
+
515
+ # Route to expert
516
+ for expert_id in range(self.num_experts):
517
+ mask = (expert_idx == expert_id)
518
+ if mask.any():
519
+ expert_out = self.experts[expert_id](x[mask])
520
+ output[mask] += expert_out * expert_prob[mask]
521
+
522
+ return output
523
+ ```
524
+
525
+ ### Adding Positional Encoding
526
+
527
+ ```python
528
+ class CustomPositionalEncoding(nn.Module):
529
+ """Custom positional encoding."""
530
+
531
+ def __init__(self, config):
532
+ super().__init__()
533
+ self.n_embd = config.n_embd
534
+ self.register_buffer(
535
+ "pos_encoding",
536
+ self._create_encoding(config.block_size, config.n_embd)
537
+ )
538
+
539
+ def _create_encoding(self, max_len, d_model):
540
+ """Create positional encoding matrix."""
541
+ pos = torch.arange(max_len).unsqueeze(1)
542
+ div = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
543
+
544
+ encoding = torch.zeros(max_len, d_model)
545
+ encoding[:, 0::2] = torch.sin(pos * div)
546
+ encoding[:, 1::2] = torch.cos(pos * div)
547
+ return encoding
548
+
549
+ def forward(self, x):
550
+ """Add positional encoding."""
551
+ return x + self.pos_encoding[:x.size(1), :]
552
+ ```
553
+
554
+ ## Debugging Tips
555
+
556
+ 1. **Start small**: Test with 2 layers, 128 hidden size
557
+ 2. **Check shapes**: Print tensor shapes at each step
558
+ 3. **Verify gradients**: Ensure all parameters have gradients
559
+ 4. **Compare to base**: Run same config with base `GPT` model
560
+ 5. **Profile memory**: Use `torch.cuda.memory_summary()`
561
+
562
+ ## References
563
+
564
+ - Base model: `litgpt/model.py`
565
+ - Configuration: `litgpt/config.py`
566
+ - Adapter example: `litgpt/finetune/adapter.py`
567
+ - AdapterV2 example: `litgpt/finetune/adapter_v2.py`
568
+ - LoRA example: `litgpt/finetune/lora.py`