@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,163 @@
1
+ # Loss Functions
2
+
3
+ ## Built-in Loss Functions
4
+
5
+ Pass as string to `forward_backward(data, loss_fn)`.
6
+
7
+ ## Supervised Learning
8
+
9
+ ### cross_entropy
10
+
11
+ Standard next-token prediction loss.
12
+
13
+ $$\mathcal{L}(\theta) = -\mathbb{E}_x[\log p_\theta(x)]$$
14
+
15
+ ```python
16
+ fwd_bwd = training_client.forward_backward(data, loss_fn="cross_entropy")
17
+ ```
18
+
19
+ **Inputs:**
20
+ - `target_tokens: array[(N,), int]` - Target token IDs
21
+ - `weights: array[(N,), float]` - Loss weights (0 = ignore, 1 = train)
22
+
23
+ **Outputs:**
24
+ - `logprobs: array[(N,), float]` - Log probabilities
25
+ - `loss:sum` (scalar) - Total weighted loss
26
+
27
+ ## Policy Gradient Methods
28
+
29
+ ### importance_sampling
30
+
31
+ Policy gradient with importance weighting for off-policy correction:
32
+
33
+ $$\mathcal{L}_{IS}(\theta) = \mathbb{E}_{x\sim q}\left[\frac{p_\theta(x)}{q(x)}A(x)\right]$$
34
+
35
+ ```python
36
+ fwd_bwd = training_client.forward_backward(data, loss_fn="importance_sampling")
37
+ ```
38
+
39
+ **Inputs:**
40
+ - `target_tokens: array[(N,), int]` - Target tokens
41
+ - `logprobs: array[(N,), float]` - Sampling logprobs (from q)
42
+ - `advantages: array[(N,), float]` - Advantage values
43
+
44
+ ### ppo
45
+
46
+ Proximal Policy Optimization with clipping:
47
+
48
+ $$\mathcal{L}_{PPO}(\theta) = -\mathbb{E}_{x \sim q}\left[\min\left(\frac{p_\theta(x)}{q(x)} A(x), \text{clip}\left(\frac{p_\theta(x)}{q(x)}, 1-\epsilon, 1+\epsilon\right) A(x)\right)\right]$$
49
+
50
+ ```python
51
+ fwd_bwd = training_client.forward_backward(
52
+ data,
53
+ loss_fn="ppo",
54
+ loss_fn_config={"clip_low_threshold": 0.9, "clip_high_threshold": 1.1}
55
+ )
56
+ ```
57
+
58
+ **Inputs:** Same as `importance_sampling`
59
+
60
+ ### cispo
61
+
62
+ Clipped Importance Sampling Policy Optimization:
63
+
64
+ $$\mathcal{L}_{CISPO}(\theta) = \mathbb{E}_{x \sim q}\left[\text{sg}\left(\text{clip}\left(\frac{p_\theta(x)}{q(x)}, 1-\epsilon, 1+\epsilon\right)\right) \log p_\theta(x) A(x)\right]$$
65
+
66
+ ```python
67
+ fwd_bwd = training_client.forward_backward(
68
+ data,
69
+ loss_fn="cispo",
70
+ loss_fn_config={"clip_low_threshold": 0.8, "clip_high_threshold": 1.2}
71
+ )
72
+ ```
73
+
74
+ ### dro
75
+
76
+ Direct Reward Optimization with quadratic penalty:
77
+
78
+ $$\mathcal{L}_{DRO}(\theta) = \mathbb{E}_{x \sim q}\left[\log p_\theta(x) A(x) - \frac{1}{2}\beta \left(\log \frac{p_\theta(x)}{q(x)}\right)^2\right]$$
79
+
80
+ ```python
81
+ fwd_bwd = training_client.forward_backward(
82
+ data,
83
+ loss_fn="dro",
84
+ loss_fn_config={"beta": 0.05}
85
+ )
86
+ ```
87
+
88
+ ## Custom Loss Functions
89
+
90
+ For losses not covered above, use `forward_backward_custom`:
91
+
92
+ ```python
93
+ def custom_loss(data: list[Datum], logprobs: list[torch.Tensor]) -> tuple[torch.Tensor, dict[str, float]]:
94
+ loss = (logprobs ** 2).sum()
95
+ return loss, {"custom_loss": loss.item()}
96
+
97
+ loss, metrics = training_client.forward_backward_custom(data, custom_loss)
98
+ ```
99
+
100
+ ### Multi-Sequence Loss
101
+
102
+ ```python
103
+ def variance_loss(data: list[Datum], logprobs: list[torch.Tensor]) -> tuple[torch.Tensor, dict[str, float]]:
104
+ flat_logprobs = torch.cat(logprobs)
105
+ variance = torch.var(flat_logprobs)
106
+ return variance, {"variance_loss": variance.item()}
107
+ ```
108
+
109
+ ### How forward_backward_custom Works
110
+
111
+ 1. Forward pass computes logprobs
112
+ 2. Custom function computes loss from logprobs
113
+ 3. `loss.backward()` computes grad_outputs
114
+ 4. Second forward_backward with linear surrogate loss
115
+
116
+ **Note:** Uses 1.5x FLOPs and up to 3x wall time compared to built-in losses.
117
+
118
+ ## Implementation Details
119
+
120
+ All losses:
121
+ - Applied at token level with shape `(N,)` where N = sequence length
122
+ - Use sum reduction (not mean)
123
+ - Accept numpy.ndarray or torch.Tensor inputs
124
+
125
+ ### cross_entropy Implementation
126
+ ```python
127
+ elementwise_loss = -target_logprobs * weights
128
+ loss = elementwise_loss.sum()
129
+ ```
130
+
131
+ ### importance_sampling Implementation
132
+ ```python
133
+ prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
134
+ loss = -(prob_ratio * advantages).sum()
135
+ ```
136
+
137
+ ### ppo Implementation
138
+ ```python
139
+ prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
140
+ clipped_ratio = torch.clamp(prob_ratio, 1 - eps, 1 + eps)
141
+ unclipped = prob_ratio * advantages
142
+ clipped = clipped_ratio * advantages
143
+ loss = -torch.min(unclipped, clipped).sum()
144
+ ```
145
+
146
+ ### cispo Implementation
147
+ ```python
148
+ prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
149
+ clipped_ratio = torch.clamp(prob_ratio, 1 - eps, 1 + eps)
150
+ loss = -(clipped_ratio.detach() * target_logprobs * advantages).sum()
151
+ ```
152
+
153
+ ### dro Implementation
154
+ ```python
155
+ quadratic_term = (target_logprobs - sampling_logprobs) ** 2
156
+ loss = -(target_logprobs * advantages - 0.5 * beta * quadratic_term).sum()
157
+ ```
158
+
159
+ ## Notes
160
+
161
+ - KL regularization: Include in reward rather than loss (see `incorporate_kl_penalty` in Cookbook)
162
+ - Aggregation: Token-level losses are summed; for different schemes, modify advantages
163
+ - Reference: [Schulman et al., 2017](https://arxiv.org/abs/1707.06347) for PPO
@@ -0,0 +1,139 @@
1
+ # Available Models & LoRA
2
+
3
+ ## Model Selection Guide
4
+
5
+ - **Use MoE models** - More cost effective than dense
6
+ - **Base models** - Only for research or full post-training
7
+ - **Instruction models** - Fast inference, no chain-of-thought
8
+ - **Hybrid/Reasoning models** - Long chain-of-thought for quality
9
+
10
+ ## Model Lineup
11
+
12
+ | Model | Type | Architecture |
13
+ |-------|------|--------------|
14
+ | **Qwen/Qwen3-VL-235B-A22B-Instruct** | Vision | MoE Large |
15
+ | **Qwen/Qwen3-VL-30B-A3B-Instruct** | Vision | MoE Medium |
16
+ | Qwen/Qwen3-235B-A22B-Instruct-2507 | Instruction | MoE Large |
17
+ | Qwen/Qwen3-30B-A3B-Instruct-2507 | Instruction | MoE Medium |
18
+ | **Qwen/Qwen3-30B-A3B** | Hybrid | MoE Medium |
19
+ | Qwen/Qwen3-30B-A3B-Base | Base | MoE Medium |
20
+ | Qwen/Qwen3-32B | Hybrid | Dense Medium |
21
+ | Qwen/Qwen3-8B | Hybrid | Dense Small |
22
+ | Qwen/Qwen3-8B-Base | Base | Dense Small |
23
+ | Qwen/Qwen3-4B-Instruct-2507 | Instruction | Dense Compact |
24
+ | openai/gpt-oss-120b | Reasoning | MoE Medium |
25
+ | openai/gpt-oss-20b | Reasoning | MoE Small |
26
+ | deepseek-ai/DeepSeek-V3.1 | Hybrid | MoE Large |
27
+ | deepseek-ai/DeepSeek-V3.1-Base | Base | MoE Large |
28
+ | **meta-llama/Llama-3.1-8B** | Base | Dense Small |
29
+ | meta-llama/Llama-3.1-8B-Instruct | Instruction | Dense Small |
30
+ | meta-llama/Llama-3.3-70B-Instruct | Instruction | Dense Large |
31
+ | meta-llama/Llama-3.1-70B | Base | Dense Large |
32
+ | meta-llama/Llama-3.2-3B | Base | Dense Compact |
33
+ | meta-llama/Llama-3.2-1B | Base | Dense Compact |
34
+ | moonshotai/Kimi-K2-Thinking | Reasoning | MoE Large |
35
+
36
+ **Sizes:** Compact (1-4B), Small (8B), Medium (30-32B), Large (70B+)
37
+
38
+ **Types:**
39
+ - **Base**: Pretrained, for post-training research
40
+ - **Instruction**: Chat-tuned, fast inference
41
+ - **Hybrid**: Thinking + non-thinking modes
42
+ - **Reasoning**: Always uses chain-of-thought
43
+ - **Vision**: VLMs with image processing
44
+
45
+ ## LoRA Primer
46
+
47
+ LoRA (Low-Rank Adaptation) fine-tunes small parameter subset instead of all weights.
48
+
49
+ ### When LoRA Works Well
50
+
51
+ - SL on small-medium instruction datasets: **Same as full fine-tuning**
52
+ - RL: **Equivalent to full fine-tuning even with small ranks**
53
+ - Large datasets: May underperform (increase rank)
54
+
55
+ ### LoRA Learning Rate
56
+
57
+ **Critical:** LoRA needs 20-100x higher LR than full fine-tuning!
58
+
59
+ ```python
60
+ from tinker_cookbook.hyperparam_utils import get_lora_lr_over_full_finetune_lr
61
+
62
+ model_name = "meta-llama/Llama-3.1-8B"
63
+ factor = get_lora_lr_over_full_finetune_lr(model_name)
64
+ # Returns 10.0 for all models (empirically validated)
65
+ ```
66
+
67
+ ### Recommended Learning Rate
68
+
69
+ ```python
70
+ from tinker_cookbook.hyperparam_utils import get_lr
71
+
72
+ recommended_lr = get_lr("meta-llama/Llama-3.1-8B")
73
+ ```
74
+
75
+ ### LoRA Rank
76
+
77
+ Default rank: 32
78
+
79
+ ```python
80
+ from tinker_cookbook.hyperparam_utils import get_lora_param_count
81
+
82
+ # Check parameter count
83
+ param_count = get_lora_param_count("meta-llama/Llama-3.1-8B", lora_rank=32)
84
+ ```
85
+
86
+ **Rule of thumb:** LoRA params ≥ completion tokens for good SL results.
87
+
88
+ For RL: Small ranks work fine.
89
+
90
+ **Optimal LR does NOT depend on rank** - same LR works across ranks.
91
+
92
+ ### LoRA Configuration
93
+
94
+ ```python
95
+ training_client = service_client.create_lora_training_client(
96
+ base_model="meta-llama/Llama-3.1-8B",
97
+ rank=32,
98
+ train_attn=True, # Attention layers (default)
99
+ train_mlp=True, # MLP layers (default)
100
+ train_unembed=False, # Output embedding (optional)
101
+ seed=42, # For reproducibility
102
+ )
103
+ ```
104
+
105
+ **Best practice:** Train all layers (attention + MLP), not just attention.
106
+
107
+ ### Mathematical Definition
108
+
109
+ Original weight: W (n×n)
110
+ LoRA: W' = W + BA
111
+
112
+ - B: n×r matrix
113
+ - A: r×n matrix
114
+ - r: rank (default 32)
115
+
116
+ Think of LoRA as efficient random projection of parameter space.
117
+
118
+ ## Model Selection Tips
119
+
120
+ 1. **For cost efficiency:** Use MoE models (Qwen3-VL, Qwen3-30B-A3B)
121
+ 2. **For experimentation:** Start with 8B models
122
+ 3. **For vision tasks:** Qwen3-VL-30B-A3B-Instruct (cost-effective)
123
+ 4. **For reasoning:** Hybrid or Reasoning models with CoT
124
+ 5. **For latency:** Instruction models without CoT
125
+
126
+ ## Creating Training Client
127
+
128
+ ```python
129
+ # Get available models
130
+ service_client = tinker.ServiceClient()
131
+ for model in service_client.get_server_capabilities().supported_models:
132
+ print(model.model_name)
133
+
134
+ # Create training client
135
+ training_client = service_client.create_lora_training_client(
136
+ base_model="Qwen/Qwen3-30B-A3B",
137
+ rank=32,
138
+ )
139
+ ```
@@ -0,0 +1,280 @@
1
+ # Example Recipes
2
+
3
+ ## sl_basic.py - Basic Supervised Learning
4
+
5
+ ```python
6
+ import chz
7
+ import sys
8
+ import asyncio
9
+ from tinker_cookbook import cli_utils, model_info
10
+ from tinker_cookbook.recipes.chat_sl import chat_datasets
11
+ from tinker_cookbook.renderers import TrainOnWhat
12
+ from tinker_cookbook.supervised import train
13
+ from tinker_cookbook.supervised.data import FromConversationFileBuilder
14
+ from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
15
+
16
+ def build_config_blueprint() -> chz.Blueprint[train.Config]:
17
+ model_name = "meta-llama/Llama-3.1-8B"
18
+ renderer_name = model_info.get_recommended_renderer_name(model_name)
19
+ common_config = ChatDatasetBuilderCommonConfig(
20
+ model_name_for_tokenizer=model_name,
21
+ renderer_name=renderer_name,
22
+ max_length=32768,
23
+ batch_size=128,
24
+ train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
25
+ )
26
+ dataset = chat_datasets.NoRobotsBuilder(common_config=common_config)
27
+
28
+ # For custom dataset:
29
+ # dataset = FromConversationFileBuilder(
30
+ # common_config=common_config,
31
+ # file_path="/path/to/dataset.jsonl"
32
+ # )
33
+
34
+ return chz.Blueprint(train.Config).apply({
35
+ "log_path": "/tmp/tinker-examples/sl_basic",
36
+ "model_name": model_name,
37
+ "dataset_builder": dataset,
38
+ "learning_rate": 2e-4,
39
+ "lr_schedule": "linear",
40
+ "num_epochs": 1,
41
+ "eval_every": 8,
42
+ })
43
+
44
+ def main(config: train.Config):
45
+ cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
46
+ asyncio.run(train.main(config))
47
+
48
+ if __name__ == "__main__":
49
+ blueprint = build_config_blueprint()
50
+ blueprint.make_from_argv(sys.argv[1:])
51
+ main(blueprint.make())
52
+ ```
53
+
54
+ ## sl_loop.py - Manual Training Loop
55
+
56
+ ```python
57
+ import chz
58
+ import datasets
59
+ import tinker
60
+ from tinker_cookbook import checkpoint_utils, model_info, renderers
61
+ from tinker_cookbook.supervised.common import compute_mean_nll
62
+ from tinker_cookbook.supervised.data import conversation_to_datum
63
+ from tinker_cookbook.tokenizer_utils import get_tokenizer
64
+
65
+ @chz.chz
66
+ class Config:
67
+ log_path: str = "/tmp/tinker-examples/sl-loop"
68
+ model_name: str = "meta-llama/Llama-3.1-8B"
69
+ batch_size: int = 128
70
+ learning_rate: float = 1e-4
71
+ max_length: int = 32768
72
+ train_on_what: renderers.TrainOnWhat = renderers.TrainOnWhat.ALL_ASSISTANT_MESSAGES
73
+ lora_rank: int = 32
74
+
75
+ def main(config: Config):
76
+ tokenizer = get_tokenizer(config.model_name)
77
+ renderer_name = model_info.get_recommended_renderer_name(config.model_name)
78
+ renderer = renderers.get_renderer(renderer_name, tokenizer)
79
+
80
+ dataset = datasets.load_dataset("HuggingFaceH4/no_robots")
81
+ train_dataset = dataset["train"].shuffle(seed=0)
82
+ n_batches = len(train_dataset) // config.batch_size
83
+
84
+ service_client = tinker.ServiceClient()
85
+ training_client = service_client.create_lora_training_client(
86
+ base_model=config.model_name, rank=config.lora_rank
87
+ )
88
+
89
+ for batch_idx in range(n_batches):
90
+ # Linear LR decay
91
+ lr_mult = max(0.0, 1.0 - batch_idx / n_batches)
92
+ adam_params = tinker.AdamParams(
93
+ learning_rate=config.learning_rate * lr_mult
94
+ )
95
+
96
+ # Get batch
97
+ start = batch_idx * config.batch_size
98
+ end = (batch_idx + 1) * config.batch_size
99
+ batch_rows = train_dataset.select(range(start, end))
100
+
101
+ batch = [
102
+ conversation_to_datum(row["messages"], renderer, config.max_length, config.train_on_what)
103
+ for row in batch_rows
104
+ ]
105
+
106
+ # Training step
107
+ fwd_bwd = training_client.forward_backward(batch, loss_fn="cross_entropy")
108
+ optim = training_client.optim_step(adam_params)
109
+ fwd_bwd_result = fwd_bwd.result()
110
+ optim.result()
111
+
112
+ # Compute metrics
113
+ train_nll = compute_mean_nll(
114
+ [x["logprobs"] for x in fwd_bwd_result.loss_fn_outputs],
115
+ [d.loss_fn_inputs["weights"] for d in batch]
116
+ )
117
+ print(f"Step {batch_idx}, NLL: {train_nll:.4f}")
118
+
119
+ if __name__ == "__main__":
120
+ chz.nested_entrypoint(main)
121
+ ```
122
+
123
+ ## rl_basic.py - Basic RL
124
+
125
+ ```python
126
+ import asyncio
127
+ import chz
128
+ import sys
129
+ from tinker_cookbook import cli_utils, model_info
130
+ from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
131
+ from tinker_cookbook.rl import train
132
+
133
+ def build_config_blueprint() -> chz.Blueprint[train.Config]:
134
+ model_name = "meta-llama/Llama-3.1-8B"
135
+ renderer_name = model_info.get_recommended_renderer_name(model_name)
136
+ builder = Gsm8kDatasetBuilder(
137
+ batch_size=128,
138
+ group_size=16,
139
+ renderer_name=renderer_name,
140
+ model_name_for_tokenizer=model_name,
141
+ )
142
+ return chz.Blueprint(train.Config).apply({
143
+ "model_name": model_name,
144
+ "log_path": "/tmp/tinker-examples/rl_basic",
145
+ "dataset_builder": builder,
146
+ "learning_rate": 4e-5,
147
+ "max_tokens": 256,
148
+ "eval_every": 0,
149
+ })
150
+
151
+ def main(config: train.Config):
152
+ cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
153
+ asyncio.run(train.main(config))
154
+
155
+ if __name__ == "__main__":
156
+ blueprint = build_config_blueprint()
157
+ blueprint.make_from_argv(sys.argv[1:])
158
+ main(blueprint.make())
159
+ ```
160
+
161
+ ## rl_loop.py - Manual RL Loop
162
+
163
+ ```python
164
+ import chz
165
+ import datasets
166
+ import tinker
167
+ from tinker import types
168
+ from tinker.types.tensor_data import TensorData
169
+ import torch
170
+ from tinker_cookbook import model_info, renderers
171
+ from tinker_cookbook.recipes.math_rl.math_grading import extract_boxed, grade_answer
172
+ from tinker_cookbook.tokenizer_utils import get_tokenizer
173
+
174
+ @chz.chz
175
+ class Config:
176
+ model_name: str = "meta-llama/Llama-3.1-8B"
177
+ batch_size: int = 128
178
+ group_size: int = 16
179
+ learning_rate: float = 4e-5
180
+ max_tokens: int = 256
181
+
182
+ def get_reward(response: str, answer: str) -> float:
183
+ try:
184
+ given = extract_boxed(response)
185
+ return 1.0 if grade_answer(given, answer) else 0.0
186
+ except ValueError:
187
+ return 0.0
188
+
189
+ def main(config: Config):
190
+ tokenizer = get_tokenizer(config.model_name)
191
+ renderer = renderers.get_renderer(
192
+ model_info.get_recommended_renderer_name(config.model_name),
193
+ tokenizer
194
+ )
195
+
196
+ dataset = datasets.load_dataset("openai/gsm8k", "main")["train"]
197
+
198
+ service_client = tinker.ServiceClient()
199
+ training_client = service_client.create_lora_training_client(
200
+ base_model=config.model_name, rank=32
201
+ )
202
+
203
+ sampling_params = types.SamplingParams(
204
+ max_tokens=config.max_tokens,
205
+ stop=renderer.get_stop_sequences(),
206
+ )
207
+ adam_params = types.AdamParams(learning_rate=config.learning_rate)
208
+
209
+ for batch_idx in range(len(dataset) // config.batch_size):
210
+ # Save weights for sampling
211
+ path = training_client.save_weights_for_sampler(name=f"{batch_idx:06d}").result().path
212
+ sampling_client = service_client.create_sampling_client(model_path=path)
213
+
214
+ batch_start = batch_idx * config.batch_size
215
+ batch_rows = dataset.select(range(batch_start, batch_start + config.batch_size))
216
+
217
+ datums = []
218
+ for question, answer in zip(batch_rows["question"], batch_rows["answer"]):
219
+ convo = [{"role": "user", "content": question}]
220
+ prompt = renderer.build_generation_prompt(convo)
221
+ prompt_tokens = prompt.to_ints()
222
+
223
+ result = sampling_client.sample(
224
+ prompt=prompt,
225
+ num_samples=config.group_size,
226
+ sampling_params=sampling_params,
227
+ ).result()
228
+
229
+ rewards = [get_reward(renderers.get_text_content(renderer.parse_response(s.tokens)[0]), answer)
230
+ for s in result.sequences]
231
+ mean_reward = sum(rewards) / len(rewards)
232
+ advantages = [r - mean_reward for r in rewards]
233
+
234
+ if all(a == 0 for a in advantages):
235
+ continue
236
+
237
+ for seq, advantage in zip(result.sequences, advantages):
238
+ tokens = prompt_tokens + seq.tokens
239
+ ob_len = len(prompt_tokens) - 1
240
+
241
+ datum = types.Datum(
242
+ model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
243
+ loss_fn_inputs={
244
+ "target_tokens": TensorData.from_torch(torch.tensor(tokens[1:])),
245
+ "logprobs": TensorData.from_torch(torch.tensor([0.0]*ob_len + list(seq.logprobs))),
246
+ "advantages": TensorData.from_torch(torch.tensor([0.0]*ob_len + [advantage]*(len(tokens)-1-ob_len))),
247
+ },
248
+ )
249
+ datums.append(datum)
250
+
251
+ fwd_bwd = training_client.forward_backward(datums, loss_fn="importance_sampling")
252
+ optim = training_client.optim_step(adam_params)
253
+ fwd_bwd.result()
254
+ optim.result()
255
+
256
+ if __name__ == "__main__":
257
+ chz.nested_entrypoint(main)
258
+ ```
259
+
260
+ ## Running Recipes
261
+
262
+ ```bash
263
+ # Basic SL
264
+ python -m tinker_cookbook.recipes.sl_basic
265
+
266
+ # Manual SL loop
267
+ python -m tinker_cookbook.recipes.sl_loop
268
+
269
+ # Basic RL
270
+ python -m tinker_cookbook.recipes.rl_basic
271
+
272
+ # Manual RL loop
273
+ python -m tinker_cookbook.recipes.rl_loop
274
+ ```
275
+
276
+ ## CLI Overrides
277
+
278
+ ```bash
279
+ python -m tinker_cookbook.recipes.sl_basic --learning_rate 1e-4 --batch_size 64
280
+ ```