@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,494 @@
1
+ # Modal Troubleshooting Guide
2
+
3
+ ## Installation Issues
4
+
5
+ ### Authentication fails
6
+
7
+ **Error**: `modal setup` doesn't complete or token is invalid
8
+
9
+ **Solutions**:
10
+ ```bash
11
+ # Re-authenticate
12
+ modal token new
13
+
14
+ # Check current token
15
+ modal config show
16
+
17
+ # Set token via environment
18
+ export MODAL_TOKEN_ID=ak-...
19
+ export MODAL_TOKEN_SECRET=as-...
20
+ ```
21
+
22
+ ### Package installation issues
23
+
24
+ **Error**: `pip install modal` fails
25
+
26
+ **Solutions**:
27
+ ```bash
28
+ # Upgrade pip
29
+ pip install --upgrade pip
30
+
31
+ # Install with specific Python version
32
+ python3.11 -m pip install modal
33
+
34
+ # Install from wheel
35
+ pip install modal --prefer-binary
36
+ ```
37
+
38
+ ## Container Image Issues
39
+
40
+ ### Image build fails
41
+
42
+ **Error**: `ImageBuilderError: Failed to build image`
43
+
44
+ **Solutions**:
45
+ ```python
46
+ # Pin package versions to avoid conflicts
47
+ image = modal.Image.debian_slim().pip_install(
48
+ "torch==2.1.0",
49
+ "transformers==4.36.0", # Pin versions
50
+ "accelerate==0.25.0"
51
+ )
52
+
53
+ # Use compatible CUDA versions
54
+ image = modal.Image.from_registry(
55
+ "nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04", # Match PyTorch CUDA
56
+ add_python="3.11"
57
+ )
58
+ ```
59
+
60
+ ### Dependency conflicts
61
+
62
+ **Error**: `ERROR: Cannot install package due to conflicting dependencies`
63
+
64
+ **Solutions**:
65
+ ```python
66
+ # Layer dependencies separately
67
+ base = modal.Image.debian_slim().pip_install("torch")
68
+ ml = base.pip_install("transformers") # Install after torch
69
+
70
+ # Use uv for better resolution
71
+ image = modal.Image.debian_slim().uv_pip_install(
72
+ "torch", "transformers"
73
+ )
74
+ ```
75
+
76
+ ### Large image builds timeout
77
+
78
+ **Error**: Image build exceeds time limit
79
+
80
+ **Solutions**:
81
+ ```python
82
+ # Split into multiple layers (better caching)
83
+ base = modal.Image.debian_slim().pip_install("torch") # Cached
84
+ ml = base.pip_install("transformers", "datasets") # Cached
85
+ app = ml.copy_local_dir("./src", "/app") # Rebuilds on code change
86
+
87
+ # Download models during build, not runtime
88
+ image = modal.Image.debian_slim().pip_install("transformers").run_commands(
89
+ "python -c 'from transformers import AutoModel; AutoModel.from_pretrained(\"bert-base\")'"
90
+ )
91
+ ```
92
+
93
+ ## GPU Issues
94
+
95
+ ### GPU not available
96
+
97
+ **Error**: `RuntimeError: CUDA not available`
98
+
99
+ **Solutions**:
100
+ ```python
101
+ # Ensure GPU is specified
102
+ @app.function(gpu="T4") # Must specify GPU
103
+ def my_function():
104
+ import torch
105
+ assert torch.cuda.is_available()
106
+
107
+ # Check CUDA compatibility in image
108
+ image = modal.Image.from_registry(
109
+ "nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04",
110
+ add_python="3.11"
111
+ ).pip_install(
112
+ "torch",
113
+ index_url="https://download.pytorch.org/whl/cu121" # Match CUDA
114
+ )
115
+ ```
116
+
117
+ ### GPU out of memory
118
+
119
+ **Error**: `torch.cuda.OutOfMemoryError: CUDA out of memory`
120
+
121
+ **Solutions**:
122
+ ```python
123
+ # Use larger GPU
124
+ @app.function(gpu="A100-80GB") # More VRAM
125
+ def train():
126
+ pass
127
+
128
+ # Enable memory optimization
129
+ @app.function(gpu="A100")
130
+ def memory_optimized():
131
+ import torch
132
+ torch.backends.cuda.enable_flash_sdp(True)
133
+
134
+ # Use gradient checkpointing
135
+ model.gradient_checkpointing_enable()
136
+
137
+ # Mixed precision
138
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
139
+ outputs = model(**inputs)
140
+ ```
141
+
142
+ ### Wrong GPU allocated
143
+
144
+ **Error**: Got different GPU than requested
145
+
146
+ **Solutions**:
147
+ ```python
148
+ # Use strict GPU selection
149
+ @app.function(gpu="H100!") # H100! prevents auto-upgrade to H200
150
+
151
+ # Specify exact memory variant
152
+ @app.function(gpu="A100-80GB") # Not just "A100"
153
+
154
+ # Check GPU at runtime
155
+ @app.function(gpu="A100")
156
+ def check_gpu():
157
+ import subprocess
158
+ result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
159
+ print(result.stdout)
160
+ ```
161
+
162
+ ## Cold Start Issues
163
+
164
+ ### Slow cold starts
165
+
166
+ **Problem**: First request takes too long
167
+
168
+ **Solutions**:
169
+ ```python
170
+ # Keep containers warm
171
+ @app.function(
172
+ container_idle_timeout=600, # Keep warm 10 min
173
+ keep_warm=1 # Always keep 1 container ready
174
+ )
175
+ def low_latency():
176
+ pass
177
+
178
+ # Load model during container start
179
+ @app.cls(gpu="A100")
180
+ class Model:
181
+ @modal.enter()
182
+ def load(self):
183
+ # This runs once at container start, not per request
184
+ self.model = load_heavy_model()
185
+
186
+ # Cache model in volume
187
+ volume = modal.Volume.from_name("models", create_if_missing=True)
188
+
189
+ @app.function(volumes={"/cache": volume})
190
+ def cached_model():
191
+ if os.path.exists("/cache/model"):
192
+ model = load_from_disk("/cache/model")
193
+ else:
194
+ model = download_model()
195
+ save_to_disk(model, "/cache/model")
196
+ volume.commit()
197
+ ```
198
+
199
+ ### Container keeps restarting
200
+
201
+ **Problem**: Containers are killed and restarted frequently
202
+
203
+ **Solutions**:
204
+ ```python
205
+ # Increase memory
206
+ @app.function(memory=32768) # 32GB RAM
207
+ def memory_heavy():
208
+ pass
209
+
210
+ # Increase timeout
211
+ @app.function(timeout=3600) # 1 hour
212
+ def long_running():
213
+ pass
214
+
215
+ # Handle signals gracefully
216
+ import signal
217
+
218
+ def handler(signum, frame):
219
+ cleanup()
220
+ exit(0)
221
+
222
+ signal.signal(signal.SIGTERM, handler)
223
+ ```
224
+
225
+ ## Volume Issues
226
+
227
+ ### Volume changes not persisting
228
+
229
+ **Error**: Data written to volume disappears
230
+
231
+ **Solutions**:
232
+ ```python
233
+ volume = modal.Volume.from_name("my-volume", create_if_missing=True)
234
+
235
+ @app.function(volumes={"/data": volume})
236
+ def write_data():
237
+ with open("/data/file.txt", "w") as f:
238
+ f.write("data")
239
+
240
+ # CRITICAL: Commit changes!
241
+ volume.commit()
242
+ ```
243
+
244
+ ### Volume read shows stale data
245
+
246
+ **Error**: Reading outdated data from volume
247
+
248
+ **Solutions**:
249
+ ```python
250
+ @app.function(volumes={"/data": volume})
251
+ def read_data():
252
+ # Reload to get latest
253
+ volume.reload()
254
+
255
+ with open("/data/file.txt", "r") as f:
256
+ return f.read()
257
+ ```
258
+
259
+ ### Volume mount fails
260
+
261
+ **Error**: `VolumeError: Failed to mount volume`
262
+
263
+ **Solutions**:
264
+ ```python
265
+ # Ensure volume exists
266
+ volume = modal.Volume.from_name("my-volume", create_if_missing=True)
267
+
268
+ # Use absolute path
269
+ @app.function(volumes={"/data": volume}) # Not "./data"
270
+ def my_function():
271
+ pass
272
+
273
+ # Check volume in dashboard
274
+ # modal volume list
275
+ ```
276
+
277
+ ## Web Endpoint Issues
278
+
279
+ ### Endpoint returns 502
280
+
281
+ **Error**: Gateway timeout or bad gateway
282
+
283
+ **Solutions**:
284
+ ```python
285
+ # Increase timeout
286
+ @app.function(timeout=300) # 5 min
287
+ @modal.web_endpoint()
288
+ def slow_endpoint():
289
+ pass
290
+
291
+ # Return streaming response for long operations
292
+ from fastapi.responses import StreamingResponse
293
+
294
+ @app.function()
295
+ @modal.asgi_app()
296
+ def streaming_app():
297
+ async def generate():
298
+ for i in range(100):
299
+ yield f"data: {i}\n\n"
300
+ await process_chunk(i)
301
+ return StreamingResponse(generate(), media_type="text/event-stream")
302
+ ```
303
+
304
+ ### Endpoint not accessible
305
+
306
+ **Error**: 404 or cannot reach endpoint
307
+
308
+ **Solutions**:
309
+ ```bash
310
+ # Check deployment status
311
+ modal app list
312
+
313
+ # Redeploy
314
+ modal deploy my_app.py
315
+
316
+ # Check logs
317
+ modal app logs my-app
318
+ ```
319
+
320
+ ### CORS errors
321
+
322
+ **Error**: Cross-origin request blocked
323
+
324
+ **Solutions**:
325
+ ```python
326
+ from fastapi import FastAPI
327
+ from fastapi.middleware.cors import CORSMiddleware
328
+
329
+ web_app = FastAPI()
330
+ web_app.add_middleware(
331
+ CORSMiddleware,
332
+ allow_origins=["*"],
333
+ allow_credentials=True,
334
+ allow_methods=["*"],
335
+ allow_headers=["*"],
336
+ )
337
+
338
+ @app.function()
339
+ @modal.asgi_app()
340
+ def cors_enabled():
341
+ return web_app
342
+ ```
343
+
344
+ ## Secret Issues
345
+
346
+ ### Secret not found
347
+
348
+ **Error**: `SecretNotFound: Secret 'my-secret' not found`
349
+
350
+ **Solutions**:
351
+ ```bash
352
+ # Create secret via CLI
353
+ modal secret create my-secret KEY=value
354
+
355
+ # List secrets
356
+ modal secret list
357
+
358
+ # Check secret name matches exactly
359
+ ```
360
+
361
+ ### Secret value not accessible
362
+
363
+ **Error**: Environment variable is empty
364
+
365
+ **Solutions**:
366
+ ```python
367
+ # Ensure secret is attached
368
+ @app.function(secrets=[modal.Secret.from_name("my-secret")])
369
+ def use_secret():
370
+ import os
371
+ value = os.environ.get("KEY") # Use get() to handle missing
372
+ if not value:
373
+ raise ValueError("KEY not set in secret")
374
+ ```
375
+
376
+ ## Scheduling Issues
377
+
378
+ ### Scheduled job not running
379
+
380
+ **Error**: Cron job doesn't execute
381
+
382
+ **Solutions**:
383
+ ```python
384
+ # Verify cron syntax
385
+ @app.function(schedule=modal.Cron("0 0 * * *")) # Daily at midnight UTC
386
+ def daily_job():
387
+ pass
388
+
389
+ # Check timezone (Modal uses UTC)
390
+ # "0 8 * * *" = 8am UTC, not local time
391
+
392
+ # Ensure app is deployed
393
+ # modal deploy my_app.py
394
+ ```
395
+
396
+ ### Job runs multiple times
397
+
398
+ **Problem**: Scheduled job executes more than expected
399
+
400
+ **Solutions**:
401
+ ```python
402
+ # Implement idempotency
403
+ @app.function(schedule=modal.Cron("0 * * * *"))
404
+ def hourly_job():
405
+ job_id = get_current_hour_id()
406
+ if already_processed(job_id):
407
+ return
408
+ process()
409
+ mark_processed(job_id)
410
+ ```
411
+
412
+ ## Debugging Tips
413
+
414
+ ### Enable debug logging
415
+
416
+ ```python
417
+ import logging
418
+ logging.basicConfig(level=logging.DEBUG)
419
+
420
+ @app.function()
421
+ def debug_function():
422
+ logging.debug("Debug message")
423
+ logging.info("Info message")
424
+ ```
425
+
426
+ ### View container logs
427
+
428
+ ```bash
429
+ # Stream logs
430
+ modal app logs my-app
431
+
432
+ # View specific function
433
+ modal app logs my-app --function my_function
434
+
435
+ # View historical logs
436
+ modal app logs my-app --since 1h
437
+ ```
438
+
439
+ ### Test locally
440
+
441
+ ```python
442
+ # Run function locally without Modal
443
+ if __name__ == "__main__":
444
+ result = my_function.local() # Runs on your machine
445
+ print(result)
446
+ ```
447
+
448
+ ### Inspect container
449
+
450
+ ```python
451
+ @app.function(gpu="T4")
452
+ def debug_environment():
453
+ import subprocess
454
+ import sys
455
+
456
+ # System info
457
+ print(f"Python: {sys.version}")
458
+ print(subprocess.run(["nvidia-smi"], capture_output=True, text=True).stdout)
459
+ print(subprocess.run(["pip", "list"], capture_output=True, text=True).stdout)
460
+
461
+ # CUDA info
462
+ import torch
463
+ print(f"CUDA available: {torch.cuda.is_available()}")
464
+ print(f"CUDA version: {torch.version.cuda}")
465
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
466
+ ```
467
+
468
+ ## Common Error Messages
469
+
470
+ | Error | Cause | Solution |
471
+ |-------|-------|----------|
472
+ | `FunctionTimeoutError` | Function exceeded timeout | Increase `timeout` parameter |
473
+ | `ContainerMemoryExceeded` | OOM killed | Increase `memory` parameter |
474
+ | `ImageBuilderError` | Build failed | Check dependencies, pin versions |
475
+ | `ResourceExhausted` | No GPUs available | Use GPU fallbacks, try later |
476
+ | `AuthenticationError` | Invalid token | Run `modal token new` |
477
+ | `VolumeNotFound` | Volume doesn't exist | Use `create_if_missing=True` |
478
+ | `SecretNotFound` | Secret doesn't exist | Create secret via CLI |
479
+
480
+ ## Getting Help
481
+
482
+ 1. **Documentation**: https://modal.com/docs
483
+ 2. **Examples**: https://github.com/modal-labs/modal-examples
484
+ 3. **Discord**: https://discord.gg/modal
485
+ 4. **Status**: https://status.modal.com
486
+
487
+ ### Reporting Issues
488
+
489
+ Include:
490
+ - Modal client version: `modal --version`
491
+ - Python version: `python --version`
492
+ - Full error traceback
493
+ - Minimal reproducible code
494
+ - GPU type if relevant