@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,680 @@
1
+ # Comprehensive Tracking Guide
2
+
3
+ Complete guide to experiment tracking with MLflow.
4
+
5
+ ## Table of Contents
6
+ - Logging Parameters
7
+ - Logging Metrics
8
+ - Logging Artifacts
9
+ - Logging Models
10
+ - Autologging
11
+ - Runs and Experiments
12
+ - Searching and Comparing
13
+
14
+ ## Logging Parameters
15
+
16
+ ### Basic Parameter Logging
17
+
18
+ ```python
19
+ import mlflow
20
+
21
+ with mlflow.start_run():
22
+ # Single parameter
23
+ mlflow.log_param("learning_rate", 0.001)
24
+ mlflow.log_param("batch_size", 32)
25
+ mlflow.log_param("optimizer", "Adam")
26
+
27
+ # Multiple parameters at once
28
+ mlflow.log_params({
29
+ "epochs": 50,
30
+ "dropout": 0.2,
31
+ "weight_decay": 1e-4,
32
+ "momentum": 0.9
33
+ })
34
+ ```
35
+
36
+ ### Structured Parameters
37
+
38
+ ```python
39
+ # Nested configuration
40
+ config = {
41
+ "model": {
42
+ "architecture": "ResNet50",
43
+ "pretrained": True,
44
+ "num_classes": 10
45
+ },
46
+ "training": {
47
+ "lr": 0.001,
48
+ "batch_size": 32,
49
+ "epochs": 50
50
+ },
51
+ "data": {
52
+ "dataset": "ImageNet",
53
+ "augmentation": True
54
+ }
55
+ }
56
+
57
+ with mlflow.start_run():
58
+ # Log as flattened params
59
+ for section, params in config.items():
60
+ for key, value in params.items():
61
+ mlflow.log_param(f"{section}.{key}", value)
62
+
63
+ # Or log entire config as artifact
64
+ mlflow.log_dict(config, "config.json")
65
+ ```
66
+
67
+ ### Parameter Best Practices
68
+
69
+ ```python
70
+ with mlflow.start_run():
71
+ # ✅ Good: Log all hyperparameters
72
+ mlflow.log_params({
73
+ "learning_rate": 0.001,
74
+ "batch_size": 32,
75
+ "optimizer": "Adam",
76
+ "scheduler": "CosineAnnealing",
77
+ "weight_decay": 1e-4
78
+ })
79
+
80
+ # ✅ Good: Log data info
81
+ mlflow.log_params({
82
+ "dataset": "ImageNet",
83
+ "train_samples": len(train_dataset),
84
+ "val_samples": len(val_dataset),
85
+ "num_classes": 1000
86
+ })
87
+
88
+ # ✅ Good: Log environment info
89
+ mlflow.log_params({
90
+ "framework": "PyTorch 2.0",
91
+ "cuda_version": torch.version.cuda,
92
+ "gpu": torch.cuda.get_device_name(0)
93
+ })
94
+ ```
95
+
96
+ ## Logging Metrics
97
+
98
+ ### Time-Series Metrics
99
+
100
+ ```python
101
+ with mlflow.start_run():
102
+ for epoch in range(num_epochs):
103
+ # Train
104
+ train_loss, train_acc = train_epoch()
105
+
106
+ # Validate
107
+ val_loss, val_acc = validate()
108
+
109
+ # Log metrics with step
110
+ mlflow.log_metric("train_loss", train_loss, step=epoch)
111
+ mlflow.log_metric("train_accuracy", train_acc, step=epoch)
112
+ mlflow.log_metric("val_loss", val_loss, step=epoch)
113
+ mlflow.log_metric("val_accuracy", val_acc, step=epoch)
114
+
115
+ # Log learning rate
116
+ current_lr = optimizer.param_groups[0]['lr']
117
+ mlflow.log_metric("learning_rate", current_lr, step=epoch)
118
+ ```
119
+
120
+ ### Batch-Level Metrics
121
+
122
+ ```python
123
+ with mlflow.start_run():
124
+ global_step = 0
125
+
126
+ for epoch in range(num_epochs):
127
+ for batch_idx, (data, target) in enumerate(train_loader):
128
+ loss = train_batch(data, target)
129
+
130
+ # Log every 100 batches
131
+ if global_step % 100 == 0:
132
+ mlflow.log_metric("batch_loss", loss, step=global_step)
133
+
134
+ global_step += 1
135
+
136
+ # Log epoch metrics
137
+ val_loss = validate()
138
+ mlflow.log_metric("epoch_val_loss", val_loss, step=epoch)
139
+ ```
140
+
141
+ ### Multiple Metrics at Once
142
+
143
+ ```python
144
+ with mlflow.start_run():
145
+ metrics = {
146
+ "train_loss": 0.15,
147
+ "val_loss": 0.18,
148
+ "train_accuracy": 0.95,
149
+ "val_accuracy": 0.92,
150
+ "f1_score": 0.93,
151
+ "precision": 0.94,
152
+ "recall": 0.92
153
+ }
154
+
155
+ mlflow.log_metrics(metrics, step=epoch)
156
+ ```
157
+
158
+ ### Custom Metrics
159
+
160
+ ```python
161
+ def compute_custom_metrics(y_true, y_pred):
162
+ """Compute custom evaluation metrics."""
163
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
164
+
165
+ return {
166
+ "accuracy": accuracy_score(y_true, y_pred),
167
+ "f1_macro": f1_score(y_true, y_pred, average='macro'),
168
+ "f1_weighted": f1_score(y_true, y_pred, average='weighted'),
169
+ "precision": precision_score(y_true, y_pred, average='weighted'),
170
+ "recall": recall_score(y_true, y_pred, average='weighted')
171
+ }
172
+
173
+ with mlflow.start_run():
174
+ predictions = model.predict(X_test)
175
+ metrics = compute_custom_metrics(y_test, predictions)
176
+
177
+ # Log all metrics
178
+ mlflow.log_metrics(metrics)
179
+ ```
180
+
181
+ ## Logging Artifacts
182
+
183
+ ### Files and Directories
184
+
185
+ ```python
186
+ with mlflow.start_run():
187
+ # Log single file
188
+ plt.savefig('loss_curve.png')
189
+ mlflow.log_artifact('loss_curve.png')
190
+
191
+ # Log directory
192
+ os.makedirs('plots', exist_ok=True)
193
+ plt.savefig('plots/train_loss.png')
194
+ plt.savefig('plots/val_loss.png')
195
+ mlflow.log_artifacts('plots') # Logs entire directory
196
+
197
+ # Log to specific artifact path
198
+ mlflow.log_artifact('model.pkl', artifact_path='models')
199
+ # Stored at: artifacts/models/model.pkl
200
+ ```
201
+
202
+ ### JSON and YAML
203
+
204
+ ```python
205
+ import json
206
+ import yaml
207
+
208
+ with mlflow.start_run():
209
+ # Log dict as JSON
210
+ config = {"lr": 0.001, "batch_size": 32}
211
+ mlflow.log_dict(config, "config.json")
212
+
213
+ # Log as YAML
214
+ with open('config.yaml', 'w') as f:
215
+ yaml.dump(config, f)
216
+ mlflow.log_artifact('config.yaml')
217
+ ```
218
+
219
+ ### Text Files
220
+
221
+ ```python
222
+ with mlflow.start_run():
223
+ # Log training summary
224
+ summary = f"""
225
+ Training Summary:
226
+ - Epochs: {num_epochs}
227
+ - Final train loss: {final_train_loss:.4f}
228
+ - Final val loss: {final_val_loss:.4f}
229
+ - Best accuracy: {best_acc:.4f}
230
+ - Training time: {training_time:.2f}s
231
+ """
232
+
233
+ with open('summary.txt', 'w') as f:
234
+ f.write(summary)
235
+
236
+ mlflow.log_artifact('summary.txt')
237
+ ```
238
+
239
+ ### Model Checkpoints
240
+
241
+ ```python
242
+ import torch
243
+
244
+ with mlflow.start_run():
245
+ # Save checkpoint
246
+ checkpoint = {
247
+ 'epoch': epoch,
248
+ 'model_state_dict': model.state_dict(),
249
+ 'optimizer_state_dict': optimizer.state_dict(),
250
+ 'loss': loss,
251
+ 'accuracy': accuracy
252
+ }
253
+
254
+ torch.save(checkpoint, f'checkpoint_epoch_{epoch}.pth')
255
+ mlflow.log_artifact(f'checkpoint_epoch_{epoch}.pth', artifact_path='checkpoints')
256
+ ```
257
+
258
+ ## Logging Models
259
+
260
+ ### Framework-Specific Logging
261
+
262
+ ```python
263
+ # Scikit-learn
264
+ import mlflow.sklearn
265
+
266
+ with mlflow.start_run():
267
+ model = train_sklearn_model()
268
+ mlflow.sklearn.log_model(model, "model")
269
+
270
+ # PyTorch
271
+ import mlflow.pytorch
272
+
273
+ with mlflow.start_run():
274
+ model = train_pytorch_model()
275
+ mlflow.pytorch.log_model(model, "model")
276
+
277
+ # TensorFlow/Keras
278
+ import mlflow.keras
279
+
280
+ with mlflow.start_run():
281
+ model = train_keras_model()
282
+ mlflow.keras.log_model(model, "model")
283
+
284
+ # XGBoost
285
+ import mlflow.xgboost
286
+
287
+ with mlflow.start_run():
288
+ model = train_xgboost_model()
289
+ mlflow.xgboost.log_model(model, "model")
290
+ ```
291
+
292
+ ### Log Model with Signature
293
+
294
+ ```python
295
+ from mlflow.models.signature import infer_signature
296
+ import mlflow.sklearn
297
+
298
+ with mlflow.start_run():
299
+ model = train_model()
300
+
301
+ # Infer signature from training data
302
+ signature = infer_signature(X_train, model.predict(X_train))
303
+
304
+ # Log with signature
305
+ mlflow.sklearn.log_model(
306
+ model,
307
+ "model",
308
+ signature=signature
309
+ )
310
+ ```
311
+
312
+ ### Log Model with Input Example
313
+
314
+ ```python
315
+ with mlflow.start_run():
316
+ model = train_model()
317
+
318
+ # Log with input example
319
+ input_example = X_train[:5]
320
+
321
+ mlflow.sklearn.log_model(
322
+ model,
323
+ "model",
324
+ signature=signature,
325
+ input_example=input_example
326
+ )
327
+ ```
328
+
329
+ ### Log Model to Registry
330
+
331
+ ```python
332
+ with mlflow.start_run():
333
+ model = train_model()
334
+
335
+ # Log and register in one step
336
+ mlflow.sklearn.log_model(
337
+ model,
338
+ "model",
339
+ registered_model_name="my-classifier" # Register immediately
340
+ )
341
+ ```
342
+
343
+ ## Autologging
344
+
345
+ ### Enable Autologging
346
+
347
+ ```python
348
+ import mlflow
349
+
350
+ # Enable for all frameworks
351
+ mlflow.autolog()
352
+
353
+ # Or framework-specific
354
+ mlflow.sklearn.autolog()
355
+ mlflow.pytorch.autolog()
356
+ mlflow.keras.autolog()
357
+ mlflow.xgboost.autolog()
358
+ mlflow.lightgbm.autolog()
359
+ ```
360
+
361
+ ### Autologging with Scikit-learn
362
+
363
+ ```python
364
+ import mlflow
365
+ from sklearn.ensemble import RandomForestClassifier
366
+
367
+ mlflow.sklearn.autolog()
368
+
369
+ with mlflow.start_run():
370
+ model = RandomForestClassifier(n_estimators=100, max_depth=5)
371
+ model.fit(X_train, y_train)
372
+
373
+ # Automatically logs:
374
+ # - Parameters: n_estimators, max_depth, etc.
375
+ # - Metrics: training score, test score
376
+ # - Model: pickled model
377
+ # - Training time
378
+ ```
379
+
380
+ ### Autologging with PyTorch Lightning
381
+
382
+ ```python
383
+ import mlflow
384
+ import pytorch_lightning as pl
385
+
386
+ mlflow.pytorch.autolog()
387
+
388
+ with mlflow.start_run():
389
+ trainer = pl.Trainer(max_epochs=10)
390
+ trainer.fit(model, datamodule=dm)
391
+
392
+ # Automatically logs:
393
+ # - Hyperparameters from model and trainer
394
+ # - Training and validation metrics
395
+ # - Model checkpoints
396
+ ```
397
+
398
+ ### Disable Autologging
399
+
400
+ ```python
401
+ # Disable for specific framework
402
+ mlflow.sklearn.autolog(disable=True)
403
+
404
+ # Disable all
405
+ mlflow.autolog(disable=True)
406
+ ```
407
+
408
+ ### Configure Autologging
409
+
410
+ ```python
411
+ mlflow.sklearn.autolog(
412
+ log_input_examples=True, # Log input examples
413
+ log_model_signatures=True, # Log model signatures
414
+ log_models=True, # Log models
415
+ disable=False,
416
+ exclusive=False,
417
+ disable_for_unsupported_versions=False,
418
+ silent=False
419
+ )
420
+ ```
421
+
422
+ ## Runs and Experiments
423
+
424
+ ### Create Experiment
425
+
426
+ ```python
427
+ # Create experiment
428
+ experiment_id = mlflow.create_experiment(
429
+ "my-experiment",
430
+ artifact_location="s3://my-bucket/mlflow",
431
+ tags={"project": "classification", "team": "ml-team"}
432
+ )
433
+
434
+ # Set active experiment
435
+ mlflow.set_experiment("my-experiment")
436
+
437
+ # Get experiment
438
+ experiment = mlflow.get_experiment_by_name("my-experiment")
439
+ print(f"Experiment ID: {experiment.experiment_id}")
440
+ ```
441
+
442
+ ### Nested Runs
443
+
444
+ ```python
445
+ # Parent run
446
+ with mlflow.start_run(run_name="hyperparameter-tuning"):
447
+ parent_run_id = mlflow.active_run().info.run_id
448
+
449
+ # Child runs
450
+ for lr in [0.001, 0.01, 0.1]:
451
+ with mlflow.start_run(run_name=f"lr-{lr}", nested=True):
452
+ mlflow.log_param("learning_rate", lr)
453
+ model = train(lr)
454
+ accuracy = evaluate(model)
455
+ mlflow.log_metric("accuracy", accuracy)
456
+ ```
457
+
458
+ ### Run Tags
459
+
460
+ ```python
461
+ with mlflow.start_run():
462
+ # Set tags
463
+ mlflow.set_tags({
464
+ "model_type": "ResNet50",
465
+ "dataset": "ImageNet",
466
+ "git_commit": get_git_commit(),
467
+ "developer": "alice@company.com"
468
+ })
469
+
470
+ # Single tag
471
+ mlflow.set_tag("production_ready", "true")
472
+ ```
473
+
474
+ ### Run Notes
475
+
476
+ ```python
477
+ with mlflow.start_run():
478
+ # Add notes
479
+ mlflow.set_tag("mlflow.note.content", """
480
+ ## Experiment Notes
481
+
482
+ - Using pretrained ResNet50
483
+ - Fine-tuning last 2 layers
484
+ - Data augmentation: random flip, crop, rotation
485
+ - Learning rate schedule: cosine annealing
486
+
487
+ ## Results
488
+ - Best validation accuracy: 95.2%
489
+ - Converged after 35 epochs
490
+ """)
491
+ ```
492
+
493
+ ## Searching and Comparing
494
+
495
+ ### Search Runs
496
+
497
+ ```python
498
+ from mlflow.tracking import MlflowClient
499
+
500
+ client = MlflowClient()
501
+
502
+ # Get experiment
503
+ experiment = mlflow.get_experiment_by_name("my-experiment")
504
+ experiment_id = experiment.experiment_id
505
+
506
+ # Search all runs
507
+ runs = client.search_runs(
508
+ experiment_ids=[experiment_id],
509
+ filter_string="",
510
+ order_by=["metrics.accuracy DESC"],
511
+ max_results=10
512
+ )
513
+
514
+ for run in runs:
515
+ print(f"Run ID: {run.info.run_id}")
516
+ print(f"Accuracy: {run.data.metrics.get('accuracy', 'N/A')}")
517
+ print(f"Params: {run.data.params}")
518
+ print("---")
519
+ ```
520
+
521
+ ### Filter Runs
522
+
523
+ ```python
524
+ # Filter by metric
525
+ runs = client.search_runs(
526
+ experiment_ids=[experiment_id],
527
+ filter_string="metrics.accuracy > 0.9"
528
+ )
529
+
530
+ # Filter by parameter
531
+ runs = client.search_runs(
532
+ experiment_ids=[experiment_id],
533
+ filter_string="params.model = 'ResNet50'"
534
+ )
535
+
536
+ # Complex filter
537
+ runs = client.search_runs(
538
+ experiment_ids=[experiment_id],
539
+ filter_string="""
540
+ metrics.accuracy > 0.9 AND
541
+ params.learning_rate < 0.01 AND
542
+ tags.dataset = 'ImageNet'
543
+ """
544
+ )
545
+ ```
546
+
547
+ ### Compare Best Runs
548
+
549
+ ```python
550
+ def compare_best_runs(experiment_name, metric="accuracy", top_n=5):
551
+ """Compare top N runs by metric."""
552
+ experiment = mlflow.get_experiment_by_name(experiment_name)
553
+ client = MlflowClient()
554
+
555
+ runs = client.search_runs(
556
+ experiment_ids=[experiment.experiment_id],
557
+ filter_string=f"metrics.{metric} > 0",
558
+ order_by=[f"metrics.{metric} DESC"],
559
+ max_results=top_n
560
+ )
561
+
562
+ print(f"Top {top_n} runs by {metric}:")
563
+ print("-" * 80)
564
+
565
+ for i, run in enumerate(runs, 1):
566
+ print(f"{i}. Run ID: {run.info.run_id}")
567
+ print(f" {metric}: {run.data.metrics.get(metric, 'N/A')}")
568
+ print(f" Params: {run.data.params}")
569
+ print()
570
+
571
+ compare_best_runs("my-experiment", metric="accuracy", top_n=5)
572
+ ```
573
+
574
+ ### Download Artifacts
575
+
576
+ ```python
577
+ client = MlflowClient()
578
+
579
+ # Download artifact
580
+ run_id = "abc123"
581
+ local_path = client.download_artifacts(run_id, "model")
582
+ print(f"Downloaded to: {local_path}")
583
+
584
+ # Download specific file
585
+ local_file = client.download_artifacts(run_id, "plots/loss_curve.png")
586
+ ```
587
+
588
+ ## Best Practices
589
+
590
+ ### 1. Use Descriptive Names
591
+
592
+ ```python
593
+ # ✅ Good: Descriptive experiment and run names
594
+ mlflow.set_experiment("sentiment-analysis-bert")
595
+
596
+ with mlflow.start_run(run_name="bert-base-lr1e-5-bs32-epochs10"):
597
+ train()
598
+
599
+ # ❌ Bad: Generic names
600
+ mlflow.set_experiment("experiment1")
601
+ with mlflow.start_run():
602
+ train()
603
+ ```
604
+
605
+ ### 2. Log Comprehensive Metadata
606
+
607
+ ```python
608
+ with mlflow.start_run():
609
+ # Hyperparameters
610
+ mlflow.log_params(config)
611
+
612
+ # System info
613
+ mlflow.set_tags({
614
+ "git_commit": get_git_commit(),
615
+ "framework": f"PyTorch {torch.__version__}",
616
+ "cuda": torch.version.cuda,
617
+ "gpu": torch.cuda.get_device_name(0)
618
+ })
619
+
620
+ # Data info
621
+ mlflow.log_params({
622
+ "train_samples": len(train_dataset),
623
+ "val_samples": len(val_dataset),
624
+ "num_classes": num_classes
625
+ })
626
+ ```
627
+
628
+ ### 3. Track Time
629
+
630
+ ```python
631
+ import time
632
+
633
+ with mlflow.start_run():
634
+ start_time = time.time()
635
+
636
+ # Training
637
+ model = train()
638
+
639
+ # Log training time
640
+ training_time = time.time() - start_time
641
+ mlflow.log_metric("training_time_seconds", training_time)
642
+ ```
643
+
644
+ ### 4. Version Control Integration
645
+
646
+ ```python
647
+ import subprocess
648
+
649
+ def get_git_commit():
650
+ """Get current git commit hash."""
651
+ try:
652
+ return subprocess.check_output(
653
+ ['git', 'rev-parse', 'HEAD']
654
+ ).decode('ascii').strip()
655
+ except:
656
+ return "unknown"
657
+
658
+ with mlflow.start_run():
659
+ mlflow.set_tag("git_commit", get_git_commit())
660
+ mlflow.set_tag("git_branch", get_git_branch())
661
+ ```
662
+
663
+ ### 5. Error Handling
664
+
665
+ ```python
666
+ with mlflow.start_run():
667
+ try:
668
+ model = train()
669
+ mlflow.set_tag("status", "completed")
670
+ except Exception as e:
671
+ mlflow.set_tag("status", "failed")
672
+ mlflow.set_tag("error", str(e))
673
+ raise
674
+ ```
675
+
676
+ ## Resources
677
+
678
+ - **Tracking API**: https://mlflow.org/docs/latest/tracking.html
679
+ - **Python API**: https://mlflow.org/docs/latest/python_api/mlflow.html
680
+ - **Examples**: https://github.com/mlflow/mlflow/tree/master/examples