@synsci/cli-darwin-arm64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,509 @@
1
+ ---
2
+ name: skypilot-multi-cloud-orchestration
3
+ description: Multi-cloud orchestration for ML workloads with automatic cost optimization. Use when you need to run training or batch jobs across multiple clouds, leverage spot instances with auto-recovery, or optimize GPU costs across providers.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Infrastructure, Multi-Cloud, Synthetic Sciencestion, GPU, Cost Optimization, SkyPilot]
8
+ dependencies: [skypilot>=0.7.0]
9
+ ---
10
+
11
+ # SkyPilot Multi-Cloud Synthetic Sciencestion
12
+
13
+ Comprehensive guide to running ML workloads across clouds with automatic cost optimization using SkyPilot.
14
+
15
+ ## When to use SkyPilot
16
+
17
+ **Use SkyPilot when:**
18
+ - Running ML workloads across multiple clouds (AWS, GCP, Azure, etc.)
19
+ - Need cost optimization with automatic cloud/region selection
20
+ - Running long jobs on spot instances with auto-recovery
21
+ - Managing distributed multi-node training
22
+ - Want unified interface for 20+ cloud providers
23
+ - Need to avoid vendor lock-in
24
+
25
+ **Key features:**
26
+ - **Multi-cloud**: AWS, GCP, Azure, Kubernetes, Lambda, RunPod, 20+ providers
27
+ - **Cost optimization**: Automatic cheapest cloud/region selection
28
+ - **Spot instances**: 3-6x cost savings with automatic recovery
29
+ - **Distributed training**: Multi-node jobs with gang scheduling
30
+ - **Managed jobs**: Auto-recovery, checkpointing, fault tolerance
31
+ - **Sky Serve**: Model serving with autoscaling
32
+
33
+ **Use alternatives instead:**
34
+ - **Modal**: For simpler serverless GPU with Python-native API
35
+ - **RunPod**: For single-cloud persistent pods
36
+ - **Kubernetes**: For existing K8s infrastructure
37
+ - **Ray**: For pure Ray-based orchestration
38
+
39
+ ## Quick start
40
+
41
+ ### Installation
42
+
43
+ ```bash
44
+ pip install "skypilot[aws,gcp,azure,kubernetes]"
45
+
46
+ # Verify cloud credentials
47
+ sky check
48
+ ```
49
+
50
+ ### Hello World
51
+
52
+ Create `hello.yaml`:
53
+ ```yaml
54
+ resources:
55
+ accelerators: T4:1
56
+
57
+ run: |
58
+ nvidia-smi
59
+ echo "Hello from SkyPilot!"
60
+ ```
61
+
62
+ Launch:
63
+ ```bash
64
+ sky launch -c hello hello.yaml
65
+
66
+ # SSH to cluster
67
+ ssh hello
68
+
69
+ # Terminate
70
+ sky down hello
71
+ ```
72
+
73
+ ## Core concepts
74
+
75
+ ### Task YAML structure
76
+
77
+ ```yaml
78
+ # Task name (optional)
79
+ name: my-task
80
+
81
+ # Resource requirements
82
+ resources:
83
+ cloud: aws # Optional: auto-select if omitted
84
+ region: us-west-2 # Optional: auto-select if omitted
85
+ accelerators: A100:4 # GPU type and count
86
+ cpus: 8+ # Minimum CPUs
87
+ memory: 32+ # Minimum memory (GB)
88
+ use_spot: true # Use spot instances
89
+ disk_size: 256 # Disk size (GB)
90
+
91
+ # Number of nodes for distributed training
92
+ num_nodes: 2
93
+
94
+ # Working directory (synced to ~/sky_workdir)
95
+ workdir: .
96
+
97
+ # Setup commands (run once)
98
+ setup: |
99
+ pip install -r requirements.txt
100
+
101
+ # Run commands
102
+ run: |
103
+ python train.py
104
+ ```
105
+
106
+ ### Key commands
107
+
108
+ | Command | Purpose |
109
+ |---------|---------|
110
+ | `sky launch` | Launch cluster and run task |
111
+ | `sky exec` | Run task on existing cluster |
112
+ | `sky status` | Show cluster status |
113
+ | `sky stop` | Stop cluster (preserve state) |
114
+ | `sky down` | Terminate cluster |
115
+ | `sky logs` | View task logs |
116
+ | `sky queue` | Show job queue |
117
+ | `sky jobs launch` | Launch managed job |
118
+ | `sky serve up` | Deploy serving endpoint |
119
+
120
+ ## GPU configuration
121
+
122
+ ### Available accelerators
123
+
124
+ ```yaml
125
+ # NVIDIA GPUs
126
+ accelerators: T4:1
127
+ accelerators: L4:1
128
+ accelerators: A10G:1
129
+ accelerators: L40S:1
130
+ accelerators: A100:4
131
+ accelerators: A100-80GB:8
132
+ accelerators: H100:8
133
+
134
+ # Cloud-specific
135
+ accelerators: V100:4 # AWS/GCP
136
+ accelerators: TPU-v4-8 # GCP TPUs
137
+ ```
138
+
139
+ ### GPU fallbacks
140
+
141
+ ```yaml
142
+ resources:
143
+ accelerators:
144
+ H100: 8
145
+ A100-80GB: 8
146
+ A100: 8
147
+ any_of:
148
+ - cloud: gcp
149
+ - cloud: aws
150
+ - cloud: azure
151
+ ```
152
+
153
+ ### Spot instances
154
+
155
+ ```yaml
156
+ resources:
157
+ accelerators: A100:8
158
+ use_spot: true
159
+ spot_recovery: FAILOVER # Auto-recover on preemption
160
+ ```
161
+
162
+ ## Cluster management
163
+
164
+ ### Launch and execute
165
+
166
+ ```bash
167
+ # Launch new cluster
168
+ sky launch -c mycluster task.yaml
169
+
170
+ # Run on existing cluster (skip setup)
171
+ sky exec mycluster another_task.yaml
172
+
173
+ # Interactive SSH
174
+ ssh mycluster
175
+
176
+ # Stream logs
177
+ sky logs mycluster
178
+ ```
179
+
180
+ ### Autostop
181
+
182
+ ```yaml
183
+ resources:
184
+ accelerators: A100:4
185
+ autostop:
186
+ idle_minutes: 30
187
+ down: true # Terminate instead of stop
188
+ ```
189
+
190
+ ```bash
191
+ # Set autostop via CLI
192
+ sky autostop mycluster -i 30 --down
193
+ ```
194
+
195
+ ### Cluster status
196
+
197
+ ```bash
198
+ # All clusters
199
+ sky status
200
+
201
+ # Detailed view
202
+ sky status -a
203
+ ```
204
+
205
+ ## Distributed training
206
+
207
+ ### Multi-node setup
208
+
209
+ ```yaml
210
+ resources:
211
+ accelerators: A100:8
212
+
213
+ num_nodes: 4 # 4 nodes × 8 GPUs = 32 GPUs total
214
+
215
+ setup: |
216
+ pip install torch torchvision
217
+
218
+ run: |
219
+ torchrun \
220
+ --nnodes=$SKYPILOT_NUM_NODES \
221
+ --nproc_per_node=$SKYPILOT_NUM_GPUS_PER_NODE \
222
+ --node_rank=$SKYPILOT_NODE_RANK \
223
+ --master_addr=$(echo "$SKYPILOT_NODE_IPS" | head -n1) \
224
+ --master_port=12355 \
225
+ train.py
226
+ ```
227
+
228
+ ### Environment variables
229
+
230
+ | Variable | Description |
231
+ |----------|-------------|
232
+ | `SKYPILOT_NODE_RANK` | Node index (0 to num_nodes-1) |
233
+ | `SKYPILOT_NODE_IPS` | Newline-separated IP addresses |
234
+ | `SKYPILOT_NUM_NODES` | Total number of nodes |
235
+ | `SKYPILOT_NUM_GPUS_PER_NODE` | GPUs per node |
236
+
237
+ ### Head-node-only execution
238
+
239
+ ```bash
240
+ run: |
241
+ if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
242
+ python orchestrate.py
243
+ fi
244
+ ```
245
+
246
+ ## Managed jobs
247
+
248
+ ### Spot recovery
249
+
250
+ ```bash
251
+ # Launch managed job with spot recovery
252
+ sky jobs launch -n my-job train.yaml
253
+ ```
254
+
255
+ ### Checkpointing
256
+
257
+ ```yaml
258
+ name: training-job
259
+
260
+ file_mounts:
261
+ /checkpoints:
262
+ name: my-checkpoints
263
+ store: s3
264
+ mode: MOUNT
265
+
266
+ resources:
267
+ accelerators: A100:8
268
+ use_spot: true
269
+
270
+ run: |
271
+ python train.py \
272
+ --checkpoint-dir /checkpoints \
273
+ --resume-from-latest
274
+ ```
275
+
276
+ ### Job management
277
+
278
+ ```bash
279
+ # List jobs
280
+ sky jobs queue
281
+
282
+ # View logs
283
+ sky jobs logs my-job
284
+
285
+ # Cancel job
286
+ sky jobs cancel my-job
287
+ ```
288
+
289
+ ## File mounts and storage
290
+
291
+ ### Local file sync
292
+
293
+ ```yaml
294
+ workdir: ./my-project # Synced to ~/sky_workdir
295
+
296
+ file_mounts:
297
+ /data/config.yaml: ./config.yaml
298
+ ~/.vimrc: ~/.vimrc
299
+ ```
300
+
301
+ ### Cloud storage
302
+
303
+ ```yaml
304
+ file_mounts:
305
+ # Mount S3 bucket
306
+ /datasets:
307
+ source: s3://my-bucket/datasets
308
+ mode: MOUNT # Stream from S3
309
+
310
+ # Copy GCS bucket
311
+ /models:
312
+ source: gs://my-bucket/models
313
+ mode: COPY # Pre-fetch to disk
314
+
315
+ # Cached mount (fast writes)
316
+ /outputs:
317
+ name: my-outputs
318
+ store: s3
319
+ mode: MOUNT_CACHED
320
+ ```
321
+
322
+ ### Storage modes
323
+
324
+ | Mode | Description | Best For |
325
+ |------|-------------|----------|
326
+ | `MOUNT` | Stream from cloud | Large datasets, read-heavy |
327
+ | `COPY` | Pre-fetch to disk | Small files, random access |
328
+ | `MOUNT_CACHED` | Cache with async upload | Checkpoints, outputs |
329
+
330
+ ## Sky Serve (Model Serving)
331
+
332
+ ### Basic service
333
+
334
+ ```yaml
335
+ # service.yaml
336
+ service:
337
+ readiness_probe: /health
338
+ replica_policy:
339
+ min_replicas: 1
340
+ max_replicas: 10
341
+ target_qps_per_replica: 2.0
342
+
343
+ resources:
344
+ accelerators: A100:1
345
+
346
+ run: |
347
+ python -m vllm.entrypoints.openai.api_server \
348
+ --model meta-llama/Llama-2-7b-chat-hf \
349
+ --port 8000
350
+ ```
351
+
352
+ ```bash
353
+ # Deploy
354
+ sky serve up -n my-service service.yaml
355
+
356
+ # Check status
357
+ sky serve status
358
+
359
+ # Get endpoint
360
+ sky serve status my-service
361
+ ```
362
+
363
+ ### Autoscaling policies
364
+
365
+ ```yaml
366
+ service:
367
+ replica_policy:
368
+ min_replicas: 1
369
+ max_replicas: 10
370
+ target_qps_per_replica: 2.0
371
+ upscale_delay_seconds: 60
372
+ downscale_delay_seconds: 300
373
+ load_balancing_policy: round_robin
374
+ ```
375
+
376
+ ## Cost optimization
377
+
378
+ ### Automatic cloud selection
379
+
380
+ ```yaml
381
+ # SkyPilot finds cheapest option
382
+ resources:
383
+ accelerators: A100:8
384
+ # No cloud specified - auto-select cheapest
385
+ ```
386
+
387
+ ```bash
388
+ # Show optimizer decision
389
+ sky launch task.yaml --dryrun
390
+ ```
391
+
392
+ ### Cloud preferences
393
+
394
+ ```yaml
395
+ resources:
396
+ accelerators: A100:8
397
+ any_of:
398
+ - cloud: gcp
399
+ region: us-central1
400
+ - cloud: aws
401
+ region: us-east-1
402
+ - cloud: azure
403
+ ```
404
+
405
+ ### Environment variables
406
+
407
+ ```yaml
408
+ envs:
409
+ HF_TOKEN: $HF_TOKEN # Inherited from local env
410
+ WANDB_API_KEY: $WANDB_API_KEY
411
+
412
+ # Or use secrets
413
+ secrets:
414
+ - HF_TOKEN
415
+ - WANDB_API_KEY
416
+ ```
417
+
418
+ ## Common workflows
419
+
420
+ ### Workflow 1: Fine-tuning with checkpoints
421
+
422
+ ```yaml
423
+ name: llm-finetune
424
+
425
+ file_mounts:
426
+ /checkpoints:
427
+ name: finetune-checkpoints
428
+ store: s3
429
+ mode: MOUNT_CACHED
430
+
431
+ resources:
432
+ accelerators: A100:8
433
+ use_spot: true
434
+
435
+ setup: |
436
+ pip install transformers accelerate
437
+
438
+ run: |
439
+ python train.py \
440
+ --checkpoint-dir /checkpoints \
441
+ --resume
442
+ ```
443
+
444
+ ### Workflow 2: Hyperparameter sweep
445
+
446
+ ```yaml
447
+ name: hp-sweep-${RUN_ID}
448
+
449
+ envs:
450
+ RUN_ID: 0
451
+ LEARNING_RATE: 1e-4
452
+ BATCH_SIZE: 32
453
+
454
+ resources:
455
+ accelerators: A100:1
456
+ use_spot: true
457
+
458
+ run: |
459
+ python train.py \
460
+ --lr $LEARNING_RATE \
461
+ --batch-size $BATCH_SIZE \
462
+ --run-id $RUN_ID
463
+ ```
464
+
465
+ ```bash
466
+ # Launch multiple jobs
467
+ for i in {1..10}; do
468
+ sky jobs launch sweep.yaml \
469
+ --env RUN_ID=$i \
470
+ --env LEARNING_RATE=$(python -c "import random; print(10**random.uniform(-5,-3))")
471
+ done
472
+ ```
473
+
474
+ ## Debugging
475
+
476
+ ```bash
477
+ # SSH to cluster
478
+ ssh mycluster
479
+
480
+ # View logs
481
+ sky logs mycluster
482
+
483
+ # Check job queue
484
+ sky queue mycluster
485
+
486
+ # View managed job logs
487
+ sky jobs logs my-job
488
+ ```
489
+
490
+ ## Common issues
491
+
492
+ | Issue | Solution |
493
+ |-------|----------|
494
+ | Quota exceeded | Request quota increase, try different region |
495
+ | Spot preemption | Use `sky jobs launch` for auto-recovery |
496
+ | Slow file sync | Use `MOUNT_CACHED` mode for outputs |
497
+ | GPU not available | Use `any_of` for fallback clouds |
498
+
499
+ ## References
500
+
501
+ - **[Advanced Usage](references/advanced-usage.md)** - Multi-cloud, optimization, production patterns
502
+ - **[Troubleshooting](references/troubleshooting.md)** - Common issues and solutions
503
+
504
+ ## Resources
505
+
506
+ - **Documentation**: https://docs.skypilot.co
507
+ - **GitHub**: https://github.com/skypilot-org/skypilot
508
+ - **Slack**: https://slack.skypilot.co
509
+ - **Examples**: https://github.com/skypilot-org/skypilot/tree/master/examples