@synsci/cli-darwin-arm64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,545 @@
1
+ ---
2
+ name: lambda-labs-gpu-cloud
3
+ description: Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Infrastructure, GPU Cloud, Training, Inference, Lambda Labs]
8
+ dependencies: [lambda-cloud-client>=1.0.0]
9
+ ---
10
+
11
+ # Lambda Labs GPU Cloud
12
+
13
+ Comprehensive guide to running ML workloads on Lambda Labs GPU cloud with on-demand instances and 1-Click Clusters.
14
+
15
+ ## When to use Lambda Labs
16
+
17
+ **Use Lambda Labs when:**
18
+ - Need dedicated GPU instances with full SSH access
19
+ - Running long training jobs (hours to days)
20
+ - Want simple pricing with no egress fees
21
+ - Need persistent storage across sessions
22
+ - Require high-performance multi-node clusters (16-512 GPUs)
23
+ - Want pre-installed ML stack (Lambda Stack with PyTorch, CUDA, NCCL)
24
+
25
+ **Key features:**
26
+ - **GPU variety**: B200, H100, GH200, A100, A10, A6000, V100
27
+ - **Lambda Stack**: Pre-installed PyTorch, TensorFlow, CUDA, cuDNN, NCCL
28
+ - **Persistent filesystems**: Keep data across instance restarts
29
+ - **1-Click Clusters**: 16-512 GPU Slurm clusters with InfiniBand
30
+ - **Simple pricing**: Pay-per-minute, no egress fees
31
+ - **Global regions**: 12+ regions worldwide
32
+
33
+ **Use alternatives instead:**
34
+ - **Modal**: For serverless, auto-scaling workloads
35
+ - **SkyPilot**: For multi-cloud orchestration and cost optimization
36
+ - **RunPod**: For cheaper spot instances and serverless endpoints
37
+ - **Vast.ai**: For GPU marketplace with lowest prices
38
+
39
+ ## Quick start
40
+
41
+ ### Account setup
42
+
43
+ 1. Create account at https://lambda.ai
44
+ 2. Add payment method
45
+ 3. Generate API key from dashboard
46
+ 4. Add SSH key (required before launching instances)
47
+
48
+ ### Launch via console
49
+
50
+ 1. Go to https://cloud.lambda.ai/instances
51
+ 2. Click "Launch instance"
52
+ 3. Select GPU type and region
53
+ 4. Choose SSH key
54
+ 5. Optionally attach filesystem
55
+ 6. Launch and wait 3-15 minutes
56
+
57
+ ### Connect via SSH
58
+
59
+ ```bash
60
+ # Get instance IP from console
61
+ ssh ubuntu@<INSTANCE-IP>
62
+
63
+ # Or with specific key
64
+ ssh -i ~/.ssh/lambda_key ubuntu@<INSTANCE-IP>
65
+ ```
66
+
67
+ ## GPU instances
68
+
69
+ ### Available GPUs
70
+
71
+ | GPU | VRAM | Price/GPU/hr | Best For |
72
+ |-----|------|--------------|----------|
73
+ | B200 SXM6 | 180 GB | $4.99 | Largest models, fastest training |
74
+ | H100 SXM | 80 GB | $2.99-3.29 | Large model training |
75
+ | H100 PCIe | 80 GB | $2.49 | Cost-effective H100 |
76
+ | GH200 | 96 GB | $1.49 | Single-GPU large models |
77
+ | A100 80GB | 80 GB | $1.79 | Production training |
78
+ | A100 40GB | 40 GB | $1.29 | Standard training |
79
+ | A10 | 24 GB | $0.75 | Inference, fine-tuning |
80
+ | A6000 | 48 GB | $0.80 | Good VRAM/price ratio |
81
+ | V100 | 16 GB | $0.55 | Budget training |
82
+
83
+ ### Instance configurations
84
+
85
+ ```
86
+ 8x GPU: Best for distributed training (DDP, FSDP)
87
+ 4x GPU: Large models, multi-GPU training
88
+ 2x GPU: Medium workloads
89
+ 1x GPU: Fine-tuning, inference, development
90
+ ```
91
+
92
+ ### Launch times
93
+
94
+ - Single-GPU: 3-5 minutes
95
+ - Multi-GPU: 10-15 minutes
96
+
97
+ ## Lambda Stack
98
+
99
+ All instances come with Lambda Stack pre-installed:
100
+
101
+ ```bash
102
+ # Included software
103
+ - Ubuntu 22.04 LTS
104
+ - NVIDIA drivers (latest)
105
+ - CUDA 12.x
106
+ - cuDNN 8.x
107
+ - NCCL (for multi-GPU)
108
+ - PyTorch (latest)
109
+ - TensorFlow (latest)
110
+ - JAX
111
+ - JupyterLab
112
+ ```
113
+
114
+ ### Verify installation
115
+
116
+ ```bash
117
+ # Check GPU
118
+ nvidia-smi
119
+
120
+ # Check PyTorch
121
+ python -c "import torch; print(torch.cuda.is_available())"
122
+
123
+ # Check CUDA version
124
+ nvcc --version
125
+ ```
126
+
127
+ ## Python API
128
+
129
+ ### Installation
130
+
131
+ ```bash
132
+ pip install lambda-cloud-client
133
+ ```
134
+
135
+ ### Authentication
136
+
137
+ ```python
138
+ import os
139
+ import lambda_cloud_client
140
+
141
+ # Configure with API key
142
+ configuration = lambda_cloud_client.Configuration(
143
+ host="https://cloud.lambdalabs.com/api/v1",
144
+ access_token=os.environ["LAMBDA_API_KEY"]
145
+ )
146
+ ```
147
+
148
+ ### List available instances
149
+
150
+ ```python
151
+ with lambda_cloud_client.ApiClient(configuration) as api_client:
152
+ api = lambda_cloud_client.DefaultApi(api_client)
153
+
154
+ # Get available instance types
155
+ types = api.instance_types()
156
+ for name, info in types.data.items():
157
+ print(f"{name}: {info.instance_type.description}")
158
+ ```
159
+
160
+ ### Launch instance
161
+
162
+ ```python
163
+ from lambda_cloud_client.models import LaunchInstanceRequest
164
+
165
+ request = LaunchInstanceRequest(
166
+ region_name="us-west-1",
167
+ instance_type_name="gpu_1x_h100_sxm5",
168
+ ssh_key_names=["my-ssh-key"],
169
+ file_system_names=["my-filesystem"], # Optional
170
+ name="training-job"
171
+ )
172
+
173
+ response = api.launch_instance(request)
174
+ instance_id = response.data.instance_ids[0]
175
+ print(f"Launched: {instance_id}")
176
+ ```
177
+
178
+ ### List running instances
179
+
180
+ ```python
181
+ instances = api.list_instances()
182
+ for instance in instances.data:
183
+ print(f"{instance.name}: {instance.ip} ({instance.status})")
184
+ ```
185
+
186
+ ### Terminate instance
187
+
188
+ ```python
189
+ from lambda_cloud_client.models import TerminateInstanceRequest
190
+
191
+ request = TerminateInstanceRequest(
192
+ instance_ids=[instance_id]
193
+ )
194
+ api.terminate_instance(request)
195
+ ```
196
+
197
+ ### SSH key management
198
+
199
+ ```python
200
+ from lambda_cloud_client.models import AddSshKeyRequest
201
+
202
+ # Add SSH key
203
+ request = AddSshKeyRequest(
204
+ name="my-key",
205
+ public_key="ssh-rsa AAAA..."
206
+ )
207
+ api.add_ssh_key(request)
208
+
209
+ # List keys
210
+ keys = api.list_ssh_keys()
211
+
212
+ # Delete key
213
+ api.delete_ssh_key(key_id)
214
+ ```
215
+
216
+ ## CLI with curl
217
+
218
+ ### List instance types
219
+
220
+ ```bash
221
+ curl -u $LAMBDA_API_KEY: \
222
+ https://cloud.lambdalabs.com/api/v1/instance-types | jq
223
+ ```
224
+
225
+ ### Launch instance
226
+
227
+ ```bash
228
+ curl -u $LAMBDA_API_KEY: \
229
+ -X POST https://cloud.lambdalabs.com/api/v1/instance-operations/launch \
230
+ -H "Content-Type: application/json" \
231
+ -d '{
232
+ "region_name": "us-west-1",
233
+ "instance_type_name": "gpu_1x_h100_sxm5",
234
+ "ssh_key_names": ["my-key"]
235
+ }' | jq
236
+ ```
237
+
238
+ ### Terminate instance
239
+
240
+ ```bash
241
+ curl -u $LAMBDA_API_KEY: \
242
+ -X POST https://cloud.lambdalabs.com/api/v1/instance-operations/terminate \
243
+ -H "Content-Type: application/json" \
244
+ -d '{"instance_ids": ["<INSTANCE-ID>"]}' | jq
245
+ ```
246
+
247
+ ## Persistent storage
248
+
249
+ ### Filesystems
250
+
251
+ Filesystems persist data across instance restarts:
252
+
253
+ ```bash
254
+ # Mount location
255
+ /lambda/nfs/<FILESYSTEM_NAME>
256
+
257
+ # Example: save checkpoints
258
+ python train.py --checkpoint-dir /lambda/nfs/my-storage/checkpoints
259
+ ```
260
+
261
+ ### Create filesystem
262
+
263
+ 1. Go to Storage in Lambda console
264
+ 2. Click "Create filesystem"
265
+ 3. Select region (must match instance region)
266
+ 4. Name and create
267
+
268
+ ### Attach to instance
269
+
270
+ Filesystems must be attached at instance launch time:
271
+ - Via console: Select filesystem when launching
272
+ - Via API: Include `file_system_names` in launch request
273
+
274
+ ### Best practices
275
+
276
+ ```bash
277
+ # Store on filesystem (persists)
278
+ /lambda/nfs/storage/
279
+ ├── datasets/
280
+ ├── checkpoints/
281
+ ├── models/
282
+ └── outputs/
283
+
284
+ # Local SSD (faster, ephemeral)
285
+ /home/ubuntu/
286
+ └── working/ # Temporary files
287
+ ```
288
+
289
+ ## SSH configuration
290
+
291
+ ### Add SSH key
292
+
293
+ ```bash
294
+ # Generate key locally
295
+ ssh-keygen -t ed25519 -f ~/.ssh/lambda_key
296
+
297
+ # Add public key to Lambda console
298
+ # Or via API
299
+ ```
300
+
301
+ ### Multiple keys
302
+
303
+ ```bash
304
+ # On instance, add more keys
305
+ echo 'ssh-rsa AAAA...' >> ~/.ssh/authorized_keys
306
+ ```
307
+
308
+ ### Import from GitHub
309
+
310
+ ```bash
311
+ # On instance
312
+ ssh-import-id gh:username
313
+ ```
314
+
315
+ ### SSH tunneling
316
+
317
+ ```bash
318
+ # Forward Jupyter
319
+ ssh -L 8888:localhost:8888 ubuntu@<IP>
320
+
321
+ # Forward TensorBoard
322
+ ssh -L 6006:localhost:6006 ubuntu@<IP>
323
+
324
+ # Multiple ports
325
+ ssh -L 8888:localhost:8888 -L 6006:localhost:6006 ubuntu@<IP>
326
+ ```
327
+
328
+ ## JupyterLab
329
+
330
+ ### Launch from console
331
+
332
+ 1. Go to Instances page
333
+ 2. Click "Launch" in Cloud IDE column
334
+ 3. JupyterLab opens in browser
335
+
336
+ ### Manual access
337
+
338
+ ```bash
339
+ # On instance
340
+ jupyter lab --ip=0.0.0.0 --port=8888
341
+
342
+ # From local machine with tunnel
343
+ ssh -L 8888:localhost:8888 ubuntu@<IP>
344
+ # Open http://localhost:8888
345
+ ```
346
+
347
+ ## Training workflows
348
+
349
+ ### Single-GPU training
350
+
351
+ ```bash
352
+ # SSH to instance
353
+ ssh ubuntu@<IP>
354
+
355
+ # Clone repo
356
+ git clone https://github.com/user/project
357
+ cd project
358
+
359
+ # Install dependencies
360
+ pip install -r requirements.txt
361
+
362
+ # Train
363
+ python train.py --epochs 100 --checkpoint-dir /lambda/nfs/storage/checkpoints
364
+ ```
365
+
366
+ ### Multi-GPU training (single node)
367
+
368
+ ```python
369
+ # train_ddp.py
370
+ import torch
371
+ import torch.distributed as dist
372
+ from torch.nn.parallel import DistributedDataParallel as DDP
373
+
374
+ def main():
375
+ dist.init_process_group("nccl")
376
+ rank = dist.get_rank()
377
+ device = rank % torch.cuda.device_count()
378
+
379
+ model = MyModel().to(device)
380
+ model = DDP(model, device_ids=[device])
381
+
382
+ # Training loop...
383
+
384
+ if __name__ == "__main__":
385
+ main()
386
+ ```
387
+
388
+ ```bash
389
+ # Launch with torchrun (8 GPUs)
390
+ torchrun --nproc_per_node=8 train_ddp.py
391
+ ```
392
+
393
+ ### Checkpoint to filesystem
394
+
395
+ ```python
396
+ import os
397
+
398
+ checkpoint_dir = "/lambda/nfs/my-storage/checkpoints"
399
+ os.makedirs(checkpoint_dir, exist_ok=True)
400
+
401
+ # Save checkpoint
402
+ torch.save({
403
+ 'epoch': epoch,
404
+ 'model_state_dict': model.state_dict(),
405
+ 'optimizer_state_dict': optimizer.state_dict(),
406
+ 'loss': loss,
407
+ }, f"{checkpoint_dir}/checkpoint_{epoch}.pt")
408
+ ```
409
+
410
+ ## 1-Click Clusters
411
+
412
+ ### Overview
413
+
414
+ High-performance Slurm clusters with:
415
+ - 16-512 NVIDIA H100 or B200 GPUs
416
+ - NVIDIA Quantum-2 400 Gb/s InfiniBand
417
+ - GPUDirect RDMA at 3200 Gb/s
418
+ - Pre-installed distributed ML stack
419
+
420
+ ### Included software
421
+
422
+ - Ubuntu 22.04 LTS + Lambda Stack
423
+ - NCCL, Open MPI
424
+ - PyTorch with DDP and FSDP
425
+ - TensorFlow
426
+ - OFED drivers
427
+
428
+ ### Storage
429
+
430
+ - 24 TB NVMe per compute node (ephemeral)
431
+ - Lambda filesystems for persistent data
432
+
433
+ ### Multi-node training
434
+
435
+ ```bash
436
+ # On Slurm cluster
437
+ srun --nodes=4 --ntasks-per-node=8 --gpus-per-node=8 \
438
+ torchrun --nnodes=4 --nproc_per_node=8 \
439
+ --rdzv_backend=c10d --rdzv_endpoint=$MASTER_ADDR:29500 \
440
+ train.py
441
+ ```
442
+
443
+ ## Networking
444
+
445
+ ### Bandwidth
446
+
447
+ - Inter-instance (same region): up to 200 Gbps
448
+ - Internet outbound: 20 Gbps max
449
+
450
+ ### Firewall
451
+
452
+ - Default: Only port 22 (SSH) open
453
+ - Configure additional ports in Lambda console
454
+ - ICMP traffic allowed by default
455
+
456
+ ### Private IPs
457
+
458
+ ```bash
459
+ # Find private IP
460
+ ip addr show | grep 'inet '
461
+ ```
462
+
463
+ ## Common workflows
464
+
465
+ ### Workflow 1: Fine-tuning LLM
466
+
467
+ ```bash
468
+ # 1. Launch 8x H100 instance with filesystem
469
+
470
+ # 2. SSH and setup
471
+ ssh ubuntu@<IP>
472
+ pip install transformers accelerate peft
473
+
474
+ # 3. Download model to filesystem
475
+ python -c "
476
+ from transformers import AutoModelForCausalLM
477
+ model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf')
478
+ model.save_pretrained('/lambda/nfs/storage/models/llama-2-7b')
479
+ "
480
+
481
+ # 4. Fine-tune with checkpoints on filesystem
482
+ accelerate launch --num_processes 8 train.py \
483
+ --model_path /lambda/nfs/storage/models/llama-2-7b \
484
+ --output_dir /lambda/nfs/storage/outputs \
485
+ --checkpoint_dir /lambda/nfs/storage/checkpoints
486
+ ```
487
+
488
+ ### Workflow 2: Batch inference
489
+
490
+ ```bash
491
+ # 1. Launch A10 instance (cost-effective for inference)
492
+
493
+ # 2. Run inference
494
+ python inference.py \
495
+ --model /lambda/nfs/storage/models/fine-tuned \
496
+ --input /lambda/nfs/storage/data/inputs.jsonl \
497
+ --output /lambda/nfs/storage/data/outputs.jsonl
498
+ ```
499
+
500
+ ## Cost optimization
501
+
502
+ ### Choose right GPU
503
+
504
+ | Task | Recommended GPU |
505
+ |------|-----------------|
506
+ | LLM fine-tuning (7B) | A100 40GB |
507
+ | LLM fine-tuning (70B) | 8x H100 |
508
+ | Inference | A10, A6000 |
509
+ | Development | V100, A10 |
510
+ | Maximum performance | B200 |
511
+
512
+ ### Reduce costs
513
+
514
+ 1. **Use filesystems**: Avoid re-downloading data
515
+ 2. **Checkpoint frequently**: Resume interrupted training
516
+ 3. **Right-size**: Don't over-provision GPUs
517
+ 4. **Terminate idle**: No auto-stop, manually terminate
518
+
519
+ ### Monitor usage
520
+
521
+ - Dashboard shows real-time GPU utilization
522
+ - API for programmatic monitoring
523
+
524
+ ## Common issues
525
+
526
+ | Issue | Solution |
527
+ |-------|----------|
528
+ | Instance won't launch | Check region availability, try different GPU |
529
+ | SSH connection refused | Wait for instance to initialize (3-15 min) |
530
+ | Data lost after terminate | Use persistent filesystems |
531
+ | Slow data transfer | Use filesystem in same region |
532
+ | GPU not detected | Reboot instance, check drivers |
533
+
534
+ ## References
535
+
536
+ - **[Advanced Usage](references/advanced-usage.md)** - Multi-node training, API automation
537
+ - **[Troubleshooting](references/troubleshooting.md)** - Common issues and solutions
538
+
539
+ ## Resources
540
+
541
+ - **Documentation**: https://docs.lambda.ai
542
+ - **Console**: https://cloud.lambda.ai
543
+ - **Pricing**: https://lambda.ai/instances
544
+ - **Support**: https://support.lambdalabs.com
545
+ - **Blog**: https://lambda.ai/blog