@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,212 @@
1
+ # Reinforcement Learning
2
+
3
+ ## Quick Start
4
+
5
+ ```bash
6
+ python -m tinker_cookbook.recipes.rl_basic
7
+ ```
8
+
9
+ Fine-tunes Llama-3.1-8B on GSM8K with reward:
10
+ ```
11
+ 1[answer correct] + 0.1 * (1[format correct] - 1)
12
+ ```
13
+
14
+ ## Basic RL Config
15
+
16
+ ```python
17
+ import chz
18
+ import asyncio
19
+ from tinker_cookbook.rl import train
20
+ from tinker_cookbook import model_info
21
+ from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
22
+
23
+ def build_config_blueprint() -> chz.Blueprint[train.Config]:
24
+ model_name = "meta-llama/Llama-3.1-8B"
25
+ renderer_name = model_info.get_recommended_renderer_name(model_name)
26
+
27
+ builder = Gsm8kDatasetBuilder(
28
+ batch_size=128,
29
+ group_size=16,
30
+ renderer_name=renderer_name,
31
+ model_name_for_tokenizer=model_name,
32
+ )
33
+
34
+ return chz.Blueprint(train.Config).apply({
35
+ "model_name": model_name,
36
+ "log_path": "/tmp/rl_basic",
37
+ "dataset_builder": builder,
38
+ "learning_rate": 4e-5,
39
+ "max_tokens": 256,
40
+ })
41
+
42
+ if __name__ == "__main__":
43
+ blueprint = build_config_blueprint()
44
+ blueprint.make_from_argv(sys.argv[1:])
45
+ asyncio.run(train.main(blueprint.make()))
46
+ ```
47
+
48
+ ## Key Metrics
49
+
50
+ - `ac_tokens_per_turn` - Tokens per completion
51
+ - `env/all/correct` - Accuracy
52
+ - `env/all/format` - Format compliance
53
+ - `env/all/reward/total` - Mean total reward
54
+ - `entropy` - Per-token entropy
55
+ - `kl_sample_train_v1/v2` - KL divergence (sampler vs learner)
56
+
57
+ ## Custom RL Loop
58
+
59
+ ```python
60
+ import tinker
61
+ from tinker import types
62
+ from tinker.types.tensor_data import TensorData
63
+ import torch
64
+ from tinker_cookbook import model_info, renderers
65
+ from tinker_cookbook.tokenizer_utils import get_tokenizer
66
+
67
+ @chz.chz
68
+ class Config:
69
+ model_name: str = "meta-llama/Llama-3.1-8B"
70
+ batch_size: int = 128
71
+ group_size: int = 16
72
+ learning_rate: float = 4e-5
73
+ max_tokens: int = 256
74
+
75
+ def main(config: Config):
76
+ service_client = tinker.ServiceClient()
77
+ training_client = service_client.create_lora_training_client(
78
+ base_model=config.model_name, rank=32
79
+ )
80
+ tokenizer = training_client.get_tokenizer()
81
+ renderer = renderers.get_renderer(
82
+ model_info.get_recommended_renderer_name(config.model_name),
83
+ tokenizer
84
+ )
85
+
86
+ sampling_params = types.SamplingParams(
87
+ max_tokens=config.max_tokens,
88
+ stop=renderer.get_stop_sequences(),
89
+ )
90
+ adam_params = types.AdamParams(learning_rate=config.learning_rate)
91
+
92
+ for batch_idx, batch_rows in enumerate(dataset):
93
+ # Save weights for sampling
94
+ sampling_path = training_client.save_weights_for_sampler(name=f"{batch_idx:06d}").result().path
95
+ sampling_client = service_client.create_sampling_client(model_path=sampling_path)
96
+
97
+ datums = []
98
+ for question, answer in batch_rows:
99
+ convo = [{"role": "user", "content": question}]
100
+ model_input = renderer.build_generation_prompt(convo)
101
+ prompt_tokens = model_input.to_ints()
102
+
103
+ # Sample group_size responses
104
+ result = sampling_client.sample(
105
+ prompt=model_input,
106
+ num_samples=config.group_size,
107
+ sampling_params=sampling_params,
108
+ ).result()
109
+
110
+ rewards = []
111
+ for seq in result.sequences:
112
+ parsed, _ = renderer.parse_response(seq.tokens)
113
+ reward = compute_reward(parsed["content"], answer)
114
+ rewards.append(reward)
115
+
116
+ # GRPO-style advantage centering
117
+ mean_reward = sum(rewards) / len(rewards)
118
+ advantages = [r - mean_reward for r in rewards]
119
+
120
+ if all(a == 0 for a in advantages):
121
+ continue
122
+
123
+ for seq, advantage in zip(result.sequences, advantages):
124
+ tokens = prompt_tokens + seq.tokens
125
+ ob_len = len(prompt_tokens) - 1
126
+
127
+ datum = types.Datum(
128
+ model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
129
+ loss_fn_inputs={
130
+ "target_tokens": TensorData.from_torch(torch.tensor(tokens[1:])),
131
+ "logprobs": TensorData.from_torch(torch.tensor([0.0]*ob_len + list(seq.logprobs))),
132
+ "advantages": TensorData.from_torch(torch.tensor([0.0]*ob_len + [advantage]*(len(tokens)-1-ob_len))),
133
+ },
134
+ )
135
+ datums.append(datum)
136
+
137
+ # Training step
138
+ fwd_bwd = training_client.forward_backward(datums, loss_fn="importance_sampling")
139
+ optim = training_client.optim_step(adam_params)
140
+ fwd_bwd.result()
141
+ optim.result()
142
+ ```
143
+
144
+ ## Hyperparameters
145
+
146
+ ### Batch and Group Sizes
147
+
148
+ - `batch_size`: Number of unique problems
149
+ - `group_size`: Rollouts per problem (for variance reduction)
150
+
151
+ Scale: `LR ∝ √batch_size`
152
+
153
+ ### Multiple Updates (num_substeps)
154
+
155
+ ```python
156
+ # Default: 1 update per batch
157
+ num_substeps = 1
158
+
159
+ # Multiple updates: split batch into mini-batches
160
+ num_substeps = 4 # Batch must be divisible
161
+ ```
162
+
163
+ Use with PPO objective. Start with 2-4.
164
+
165
+ ### Streaming Minibatch Training
166
+
167
+ Overlaps sampling and training for throughput:
168
+
169
+ ```python
170
+ StreamMinibatchConfig(
171
+ groups_per_batch=128,
172
+ num_minibatches=8,
173
+ )
174
+ ```
175
+
176
+ ### Async Off-Policy Training
177
+
178
+ For long rollouts:
179
+
180
+ ```python
181
+ AsyncConfig(
182
+ max_steps_off_policy=3, # Max age of trajectories
183
+ groups_per_batch=64,
184
+ )
185
+ ```
186
+
187
+ ## Monitoring
188
+
189
+ ### KL Divergence
190
+
191
+ Monitor `kl_sample_train_v1/v2`:
192
+ - Should stay below 0.01 for stable training
193
+ - High KL indicates numerical instability
194
+
195
+ ### Reward Curves
196
+
197
+ ```python
198
+ import pandas
199
+ df = pandas.read_json("/tmp/rl-loop/metrics.jsonl", lines=True)
200
+ plt.plot(df["reward/total"])
201
+ ```
202
+
203
+ ## Loss Functions for RL
204
+
205
+ | Loss | Description |
206
+ |------|-------------|
207
+ | `importance_sampling` | Policy gradient with importance weighting |
208
+ | `ppo` | Proximal Policy Optimization with clipping |
209
+ | `cispo` | Clipped Importance Sampling PO |
210
+ | `dro` | Direct Reward Optimization |
211
+
212
+ See [Loss Functions](loss-functions.md) for details.
@@ -0,0 +1,243 @@
1
+ # Rendering to Tokens
2
+
3
+ Renderers convert messages ↔ tokens for training and inference.
4
+
5
+ ## Getting a Renderer
6
+
7
+ ```python
8
+ from tinker_cookbook.model_info import get_recommended_renderer_name
9
+ from tinker_cookbook.renderers import get_renderer
10
+ from tinker_cookbook.tokenizer_utils import get_tokenizer
11
+
12
+ model_name = "meta-llama/Llama-3.1-8B"
13
+ renderer_name = get_recommended_renderer_name(model_name)
14
+
15
+ tokenizer = get_tokenizer(model_name)
16
+ renderer = get_renderer(name=renderer_name, tokenizer=tokenizer)
17
+ ```
18
+
19
+ **Renderer names:** `qwen3`, `qwen3_disable_thinking`, `qwen3_instruct`, `qwen3_vl`, `qwen3_vl_instruct`, `llama3`, `deepseekv3`, `deepseekv3_thinking`, `kimi_k2`, `gpt_oss_no_sysprompt`, `gpt_oss_low_reasoning`, `gpt_oss_medium_reasoning`, `gpt_oss_high_reasoning`, `role_colon`
20
+
21
+ ## HuggingFace Compatibility
22
+
23
+ Default renderers produce **identical tokens** to HuggingFace's `apply_chat_template`:
24
+
25
+ | Renderer | HF Equivalent |
26
+ |----------|---------------|
27
+ | `qwen3` | `apply_chat_template(..., enable_thinking=True)` |
28
+ | `qwen3_disable_thinking` | `apply_chat_template(..., enable_thinking=False)` |
29
+ | `llama3` | `apply_chat_template(...)` * |
30
+ | `deepseekv3` | `apply_chat_template(...)` |
31
+
32
+ \* Llama3 omits "Cutting Knowledge Date..." preamble
33
+
34
+ ## Core Methods
35
+
36
+ ### build_supervised_example
37
+
38
+ For training with loss weights:
39
+
40
+ ```python
41
+ from tinker_cookbook.renderers import TrainOnWhat
42
+
43
+ messages = [
44
+ {"role": "user", "content": "What is 2+2?"},
45
+ {"role": "assistant", "content": "4"},
46
+ ]
47
+
48
+ model_input, weights = renderer.build_supervised_example(
49
+ messages,
50
+ train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
51
+ )
52
+ # model_input: ModelInput (list of chunks)
53
+ # weights: per-token weights (0.0 = prompt, 1.0 = train)
54
+ ```
55
+
56
+ ### build_generation_prompt
57
+
58
+ For inference:
59
+
60
+ ```python
61
+ messages = [
62
+ {"role": "system", "content": "You are helpful."},
63
+ {"role": "user", "content": "What is 2+2?"},
64
+ ]
65
+
66
+ prompt = renderer.build_generation_prompt(messages)
67
+ # Returns ModelInput ready for sampling
68
+ ```
69
+
70
+ ### get_stop_sequences
71
+
72
+ ```python
73
+ stop_sequences = renderer.get_stop_sequences()
74
+
75
+ sampling_params = SamplingParams(
76
+ max_tokens=100,
77
+ stop=stop_sequences,
78
+ )
79
+ ```
80
+
81
+ ### parse_response
82
+
83
+ ```python
84
+ output_tokens = result.sequences[0].tokens
85
+ message, success = renderer.parse_response(output_tokens)
86
+ # {"role": "assistant", "content": "..."}
87
+ ```
88
+
89
+ ## TrainOnWhat Enum
90
+
91
+ ```python
92
+ from tinker_cookbook.renderers import TrainOnWhat
93
+
94
+ # Train on ALL assistant messages
95
+ TrainOnWhat.ALL_ASSISTANT_MESSAGES
96
+
97
+ # Train only on LAST assistant message
98
+ TrainOnWhat.LAST_ASSISTANT_MESSAGE
99
+ ```
100
+
101
+ **ALL_ASSISTANT_MESSAGES:**
102
+ ```python
103
+ messages = [
104
+ {"role": "user", "content": "Hello"},
105
+ {"role": "assistant", "content": "Hi!"}, # weight=1
106
+ {"role": "user", "content": "How are you?"},
107
+ {"role": "assistant", "content": "Good!"}, # weight=1
108
+ ]
109
+ ```
110
+
111
+ **LAST_ASSISTANT_MESSAGE:**
112
+ ```python
113
+ messages = [
114
+ {"role": "user", "content": "Let me think..."},
115
+ {"role": "assistant", "content": "..."}, # weight=0
116
+ {"role": "user", "content": "Answer?"},
117
+ {"role": "assistant", "content": "42"}, # weight=1
118
+ ]
119
+ ```
120
+
121
+ Use `LAST` for classification, reward modeling, preference learning.
122
+
123
+ ## Message Formats
124
+
125
+ ### Text-Only
126
+
127
+ ```python
128
+ messages = [
129
+ {"role": "system", "content": "You are helpful."},
130
+ {"role": "user", "content": "Hello"},
131
+ {"role": "assistant", "content": "Hi!"},
132
+ ]
133
+ ```
134
+
135
+ ### Vision (Multi-Modal)
136
+
137
+ ```python
138
+ messages = [
139
+ {
140
+ "role": "user",
141
+ "content": [
142
+ {"type": "image", "image": image_bytes},
143
+ {"type": "text", "text": "What's in this image?"},
144
+ ]
145
+ },
146
+ {"role": "assistant", "content": "A cat."}
147
+ ]
148
+ ```
149
+
150
+ ## Using with conversation_to_datum
151
+
152
+ ```python
153
+ from tinker_cookbook.supervised.data import conversation_to_datum
154
+
155
+ datum = conversation_to_datum(
156
+ messages=messages,
157
+ renderer=renderer,
158
+ max_length=2048,
159
+ train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
160
+ )
161
+ # Returns Datum ready for training
162
+ ```
163
+
164
+ ## Format Examples
165
+
166
+ ### ChatML
167
+
168
+ ```
169
+ <|im_start|>system
170
+ You are helpful.<|im_end|>
171
+ <|im_start|>user
172
+ What is 2+2?<|im_end|>
173
+ <|im_start|>assistant
174
+ 4<|im_end|>
175
+ ```
176
+
177
+ ### Llama 3
178
+
179
+ ```
180
+ <|begin_of_text|><|start_header_id|>system<|end_header_id|>
181
+
182
+ You are helpful.<|eot_id|><|start_header_id|>user<|end_header_id|>
183
+
184
+ What is 2+2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
185
+
186
+ 4<|eot_id|>
187
+ ```
188
+
189
+ **Important:** Don't construct formats manually—use renderers!
190
+
191
+ ## Vision Renderers
192
+
193
+ For VLMs (Qwen3-VL):
194
+
195
+ ```python
196
+ from tinker_cookbook.image_processing_utils import get_image_processor
197
+
198
+ model_name = "Qwen/Qwen3-VL-235B-A22B-Instruct"
199
+ tokenizer = get_tokenizer(model_name)
200
+ image_processor = get_image_processor(model_name)
201
+
202
+ renderer = renderers.Qwen3VLInstructRenderer(tokenizer, image_processor)
203
+
204
+ messages = [
205
+ {
206
+ "role": "user",
207
+ "content": [
208
+ {"type": "image", "image": "https://example.com/image.png"},
209
+ {"type": "text", "text": "What is this?"},
210
+ ]
211
+ }
212
+ ]
213
+
214
+ prompt = renderer.build_generation_prompt(messages)
215
+ ```
216
+
217
+ ## In Dataset Builders
218
+
219
+ `ChatDatasetBuilder` creates renderer automatically:
220
+
221
+ ```python
222
+ @chz.chz
223
+ class MyDatasetBuilder(ChatDatasetBuilder):
224
+ common_config: ChatDatasetBuilderCommonConfig
225
+
226
+ def __call__(self):
227
+ def map_fn(row):
228
+ return conversation_to_datum(
229
+ messages=messages,
230
+ renderer=self.renderer, # Auto-created from common_config
231
+ max_length=self.common_config.max_length,
232
+ train_on_what=self.common_config.train_on_what,
233
+ )
234
+ # ...
235
+ ```
236
+
237
+ ## Troubleshooting
238
+
239
+ **Wrong format:** Use `get_recommended_renderer_name(model_name)`
240
+
241
+ **High loss:** Check weights (0.0 for prompts, 1.0 for completions)
242
+
243
+ **Generation doesn't stop:** Use `renderer.get_stop_sequences()` in SamplingParams
@@ -0,0 +1,232 @@
1
+ # Supervised Learning
2
+
3
+ ## Quick Start with Cookbook
4
+
5
+ ```bash
6
+ python -m tinker_cookbook.recipes.sl_basic
7
+ ```
8
+
9
+ ## Blueprint Pattern (Recommended)
10
+
11
+ ```python
12
+ import chz
13
+ import sys
14
+ import asyncio
15
+ from tinker_cookbook.supervised import train
16
+ from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
17
+ from tinker_cookbook.supervised.data import FromConversationFileBuilder
18
+ from tinker_cookbook.renderers import TrainOnWhat
19
+ from tinker_cookbook.model_info import get_recommended_renderer_name
20
+
21
+ def build_config_blueprint() -> chz.Blueprint[train.Config]:
22
+ model_name = "meta-llama/Llama-3.1-8B"
23
+ renderer_name = get_recommended_renderer_name(model_name)
24
+
25
+ common_config = ChatDatasetBuilderCommonConfig(
26
+ model_name_for_tokenizer=model_name,
27
+ renderer_name=renderer_name,
28
+ max_length=2048,
29
+ batch_size=128,
30
+ train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
31
+ )
32
+
33
+ dataset_builder = FromConversationFileBuilder(
34
+ common_config=common_config,
35
+ file_path="data.jsonl",
36
+ )
37
+
38
+ return chz.Blueprint(train.Config).apply({
39
+ "log_path": "/tmp/training",
40
+ "model_name": model_name,
41
+ "dataset_builder": dataset_builder,
42
+ "learning_rate": 2e-4,
43
+ "lr_schedule": "cosine",
44
+ "num_epochs": 3,
45
+ "lora_rank": 32,
46
+ })
47
+
48
+ if __name__ == "__main__":
49
+ blueprint = build_config_blueprint()
50
+ blueprint.make_from_argv(sys.argv[1:])
51
+ asyncio.run(train.main(blueprint.make()))
52
+ ```
53
+
54
+ ## HuggingFace Dataset Builder
55
+
56
+ ```python
57
+ from tinker_cookbook.supervised.types import ChatDatasetBuilder
58
+ from tinker_cookbook.supervised.data import SupervisedDatasetFromHFDataset, conversation_to_datum
59
+ import datasets
60
+
61
+ @chz.chz
62
+ class MyDatasetBuilder(ChatDatasetBuilder):
63
+ common_config: ChatDatasetBuilderCommonConfig
64
+
65
+ def __call__(self):
66
+ hf_dataset = datasets.load_dataset("HuggingFaceH4/no_robots", split="train")
67
+ split = hf_dataset.train_test_split(test_size=0.1, seed=42)
68
+
69
+ def map_fn(row):
70
+ messages = [
71
+ {"role": "user", "content": row["prompt"]},
72
+ {"role": "assistant", "content": row["completion"]},
73
+ ]
74
+ return conversation_to_datum(
75
+ messages=messages,
76
+ renderer=self.renderer,
77
+ max_length=self.common_config.max_length,
78
+ train_on_what=self.common_config.train_on_what,
79
+ )
80
+
81
+ train_dataset = SupervisedDatasetFromHFDataset(
82
+ hf_dataset=split["train"],
83
+ batch_size=self.common_config.batch_size,
84
+ map_fn=map_fn,
85
+ )
86
+ test_dataset = SupervisedDatasetFromHFDataset(
87
+ hf_dataset=split["test"],
88
+ batch_size=self.common_config.batch_size,
89
+ map_fn=map_fn,
90
+ )
91
+ return train_dataset, test_dataset
92
+ ```
93
+
94
+ ## Streaming Large Datasets
95
+
96
+ For datasets >1M examples:
97
+
98
+ ```python
99
+ from tinker_cookbook.supervised.data import StreamingSupervisedDatasetFromHFDataset
100
+
101
+ @chz.chz
102
+ class StreamingDatasetBuilder(ChatDatasetBuilder):
103
+ common_config: ChatDatasetBuilderCommonConfig
104
+ max_prompts: int = 100000
105
+
106
+ def __call__(self):
107
+ ds = datasets.load_dataset(
108
+ "open-thoughts/OpenThoughts3-1.2M",
109
+ split="train",
110
+ streaming=True, # Important!
111
+ )
112
+
113
+ def map_fn(row):
114
+ messages = [
115
+ {"role": "user", "content": row["question"]},
116
+ {"role": "assistant", "content": row["response"]},
117
+ ]
118
+ return conversation_to_datum(
119
+ messages=messages,
120
+ renderer=self.renderer,
121
+ max_length=self.common_config.max_length,
122
+ train_on_what=self.common_config.train_on_what,
123
+ )
124
+
125
+ train_dataset = StreamingSupervisedDatasetFromHFDataset(
126
+ hf_dataset=ds,
127
+ batch_size=self.common_config.batch_size,
128
+ length=self.max_prompts, # Required
129
+ map_fn=map_fn,
130
+ buffer_size=10000,
131
+ )
132
+ return train_dataset, train_dataset.take(1000)
133
+ ```
134
+
135
+ ## File-Based Dataset
136
+
137
+ JSONL format:
138
+ ```json
139
+ {"messages": [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
140
+ ```
141
+
142
+ ```python
143
+ from tinker_cookbook.supervised.data import FromConversationFileBuilder
144
+
145
+ dataset_builder = FromConversationFileBuilder(
146
+ common_config=common_config,
147
+ file_path="/path/to/data.jsonl",
148
+ )
149
+ ```
150
+
151
+ ## TrainOnWhat Enum
152
+
153
+ ```python
154
+ from tinker_cookbook.renderers import TrainOnWhat
155
+
156
+ TrainOnWhat.ALL_ASSISTANT_MESSAGES # Train on all assistant turns
157
+ TrainOnWhat.LAST_ASSISTANT_MESSAGE # Train only on final response
158
+ ```
159
+
160
+ Use `LAST_ASSISTANT_MESSAGE` for:
161
+ - Classification (only final answer matters)
162
+ - Chain-of-thought where intermediate steps shouldn't be trained
163
+ - Preference learning
164
+
165
+ ## Custom Dataset
166
+
167
+ ```python
168
+ from tinker_cookbook.supervised.types import SupervisedDataset
169
+ from tinker.types import Datum, ModelInput, TensorData
170
+ import numpy as np
171
+
172
+ class CustomDataset(SupervisedDataset):
173
+ def __init__(self, config):
174
+ self.config = config
175
+ self.tokenizer = get_tokenizer(config.model_name)
176
+ self.renderer = get_renderer(config.renderer_name, self.tokenizer)
177
+ self.data = self._load_data()
178
+
179
+ def __len__(self):
180
+ return len(self.data) // self.config.batch_size
181
+
182
+ def __iter__(self):
183
+ for item in self.data:
184
+ messages = self._preprocess(item)
185
+ example = self.renderer.build_supervised_example(
186
+ messages=messages,
187
+ train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
188
+ )
189
+ yield Datum(
190
+ model_input=ModelInput([example.chunk]),
191
+ loss_fn_inputs={
192
+ "target_tokens": TensorData.from_numpy(np.array(example.target_tokens, dtype=np.int64)),
193
+ "weights": TensorData.from_numpy(np.array(example.weights, dtype=np.float32)),
194
+ },
195
+ )
196
+ ```
197
+
198
+ ## Hyperparameters
199
+
200
+ ### Learning Rate
201
+
202
+ ```python
203
+ from tinker_cookbook.hyperparam_utils import get_lr
204
+
205
+ model_name = "meta-llama/Llama-3.2-1B"
206
+ recommended_lr = get_lr(model_name)
207
+ ```
208
+
209
+ Formula: `LR = lr_base * M_LoRA * (2000/H_m)^P_m`
210
+ - `lr_base = 5e-5`
211
+ - `M_LoRA = 10` (multiplier for LoRA)
212
+ - `P_m = 0.0775` (Qwen) or `0.781` (Llama)
213
+
214
+ ### Batch Size
215
+
216
+ - Smaller batch sizes (128) generally better for fine-tuning
217
+ - Scale LR with `LR ∝ √batch_size`
218
+ - Aim for at least 100 steps of training
219
+
220
+ ## Output Files
221
+
222
+ After training, check `log_path` for:
223
+ - `metrics.jsonl` - Training metrics
224
+ - `checkpoints.jsonl` - Saved checkpoints
225
+ - `config.json` - Training configuration
226
+
227
+ ```python
228
+ import pandas
229
+ df = pandas.read_json("/tmp/training/metrics.jsonl", lines=True)
230
+ plt.plot(df['train_mean_nll'], label='train_loss')
231
+ plt.plot(df['test/nll'].dropna(), label='test_loss')
232
+ ```