@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,467 @@
1
+ ---
2
+ name: speculative-decoding
3
+ description: Accelerate LLM inference using speculative decoding, Medusa multiple heads, and lookahead decoding techniques. Use when optimizing inference speed (1.5-3.6× speedup), reducing latency for real-time applications, or deploying models with limited compute. Covers draft models, tree-based attention, Jacobi iteration, parallel token generation, and production deployment strategies.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Emerging Techniques, Speculative Decoding, Medusa, Lookahead Decoding, Fast Inference, Draft Models, Tree Attention, Parallel Generation, Latency Reduction, Inference Optimization]
8
+ dependencies: [transformers, torch]
9
+ ---
10
+
11
+ # Speculative Decoding: Accelerating LLM Inference
12
+
13
+ ## When to Use This Skill
14
+
15
+ Use Speculative Decoding when you need to:
16
+ - **Speed up inference** by 1.5-3.6× without quality loss
17
+ - **Reduce latency** for real-time applications (chatbots, code generation)
18
+ - **Optimize throughput** for high-volume serving
19
+ - **Deploy efficiently** on limited hardware
20
+ - **Generate faster** without changing model architecture
21
+
22
+ **Key Techniques**: Draft model speculative decoding, Medusa (multiple heads), Lookahead Decoding (Jacobi iteration)
23
+
24
+ **Papers**: Medusa (arXiv 2401.10774), Lookahead Decoding (ICML 2024), Speculative Decoding Survey (ACL 2024)
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ # Standard speculative decoding (transformers)
30
+ pip install transformers accelerate
31
+
32
+ # Medusa (multiple decoding heads)
33
+ git clone https://github.com/FasterDecoding/Medusa
34
+ cd Medusa
35
+ pip install -e .
36
+
37
+ # Lookahead Decoding
38
+ git clone https://github.com/hao-ai-lab/LookaheadDecoding
39
+ cd LookaheadDecoding
40
+ pip install -e .
41
+
42
+ # Optional: vLLM with speculative decoding
43
+ pip install vllm
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ### Basic Speculative Decoding (Draft Model)
49
+
50
+ ```python
51
+ from transformers import AutoModelForCausalLM, AutoTokenizer
52
+
53
+ # Load target model (large, slow)
54
+ target_model = AutoModelForCausalLM.from_pretrained(
55
+ "meta-llama/Llama-2-70b-hf",
56
+ device_map="auto",
57
+ torch_dtype=torch.float16
58
+ )
59
+
60
+ # Load draft model (small, fast)
61
+ draft_model = AutoModelForCausalLM.from_pretrained(
62
+ "meta-llama/Llama-2-7b-hf",
63
+ device_map="auto",
64
+ torch_dtype=torch.float16
65
+ )
66
+
67
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-hf")
68
+
69
+ # Generate with speculative decoding
70
+ prompt = "Explain quantum computing in simple terms:"
71
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
72
+
73
+ # Transformers 4.36+ supports assisted generation
74
+ outputs = target_model.generate(
75
+ **inputs,
76
+ assistant_model=draft_model, # Enable speculative decoding
77
+ max_new_tokens=256,
78
+ do_sample=True,
79
+ temperature=0.7,
80
+ )
81
+
82
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
+ print(response)
84
+ ```
85
+
86
+ ### Medusa (Multiple Decoding Heads)
87
+
88
+ ```python
89
+ from medusa.model.medusa_model import MedusaModel
90
+
91
+ # Load Medusa-enhanced model
92
+ model = MedusaModel.from_pretrained(
93
+ "FasterDecoding/medusa-vicuna-7b-v1.3", # Pre-trained with Medusa heads
94
+ torch_dtype=torch.float16,
95
+ device_map="auto"
96
+ )
97
+
98
+ tokenizer = AutoTokenizer.from_pretrained("FasterDecoding/medusa-vicuna-7b-v1.3")
99
+
100
+ # Generate with Medusa (2-3× speedup)
101
+ prompt = "Write a Python function to calculate fibonacci numbers:"
102
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
103
+
104
+ outputs = model.medusa_generate(
105
+ **inputs,
106
+ max_new_tokens=256,
107
+ temperature=0.7,
108
+ posterior_threshold=0.09, # Acceptance threshold
109
+ posterior_alpha=0.3, # Tree construction parameter
110
+ )
111
+
112
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
113
+ ```
114
+
115
+ ### Lookahead Decoding (Jacobi Iteration)
116
+
117
+ ```python
118
+ from lookahead.lookahead_decoding import LookaheadDecoding
119
+
120
+ # Load model
121
+ model = AutoModelForCausalLM.from_pretrained(
122
+ "meta-llama/Llama-2-7b-hf",
123
+ torch_dtype=torch.float16,
124
+ device_map="auto"
125
+ )
126
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
127
+
128
+ # Initialize lookahead decoding
129
+ lookahead = LookaheadDecoding(
130
+ model=model,
131
+ tokenizer=tokenizer,
132
+ window_size=15, # Lookahead window (W)
133
+ ngram_size=5, # N-gram size (N)
134
+ guess_size=5 # Number of parallel guesses
135
+ )
136
+
137
+ # Generate (1.5-2.3× speedup)
138
+ prompt = "Implement quicksort in Python:"
139
+ output = lookahead.generate(prompt, max_new_tokens=256)
140
+ print(output)
141
+ ```
142
+
143
+ ## Core Concepts
144
+
145
+ ### 1. Speculative Decoding (Draft Model)
146
+
147
+ **Idea**: Use small draft model to generate candidates, large target model to verify in parallel.
148
+
149
+ **Algorithm**:
150
+ 1. Draft model generates K tokens speculatively
151
+ 2. Target model evaluates all K tokens in parallel (single forward pass)
152
+ 3. Accept tokens where draft and target agree
153
+ 4. Reject first disagreement, continue from there
154
+
155
+ ```python
156
+ def speculative_decode(target_model, draft_model, prompt, K=4):
157
+ """Speculative decoding algorithm."""
158
+ # 1. Generate K draft tokens
159
+ draft_tokens = draft_model.generate(prompt, max_new_tokens=K)
160
+
161
+ # 2. Target model evaluates all K tokens in one forward pass
162
+ target_logits = target_model(draft_tokens) # Parallel!
163
+
164
+ # 3. Accept/reject based on probability match
165
+ accepted = []
166
+ for i in range(K):
167
+ p_draft = softmax(draft_model.logits[i])
168
+ p_target = softmax(target_logits[i])
169
+
170
+ # Acceptance probability
171
+ if random.random() < min(1, p_target[draft_tokens[i]] / p_draft[draft_tokens[i]]):
172
+ accepted.append(draft_tokens[i])
173
+ else:
174
+ break # Reject, resample from target
175
+
176
+ return accepted
177
+ ```
178
+
179
+ **Performance**:
180
+ - Speedup: 1.5-2× with good draft model
181
+ - Zero quality loss (mathematically equivalent to target model)
182
+ - Best when draft model is 5-10× smaller than target
183
+
184
+ ### 2. Medusa (Multiple Decoding Heads)
185
+
186
+ **Source**: arXiv 2401.10774 (2024)
187
+
188
+ **Innovation**: Add multiple prediction heads to existing model, predict future tokens without separate draft model.
189
+
190
+ **Architecture**:
191
+ ```
192
+ Input → Base LLM (frozen) → Hidden State
193
+ ├→ Head 1 (predicts token t+1)
194
+ ├→ Head 2 (predicts token t+2)
195
+ ├→ Head 3 (predicts token t+3)
196
+ └→ Head 4 (predicts token t+4)
197
+ ```
198
+
199
+ **Training**:
200
+ - **Medusa-1**: Freeze base LLM, train only heads
201
+ - 2.2× speedup, lossless
202
+ - **Medusa-2**: Fine-tune base LLM + heads together
203
+ - 2.3-3.6× speedup, better quality
204
+
205
+ **Tree-based Attention**:
206
+ ```python
207
+ # Medusa constructs tree of candidates
208
+ # Example: Predict 2 steps ahead with top-2 per step
209
+
210
+ # Root
211
+ # / \
212
+ # T1a T1b (Step 1: 2 candidates)
213
+ # / \ / \
214
+ # T2a T2b T2c T2d (Step 2: 4 candidates total)
215
+
216
+ # Single forward pass evaluates entire tree!
217
+ ```
218
+
219
+ **Advantages**:
220
+ - No separate draft model needed
221
+ - Minimal training (only heads)
222
+ - Compatible with any LLM
223
+
224
+ ### 3. Lookahead Decoding (Jacobi Iteration)
225
+
226
+ **Source**: ICML 2024
227
+
228
+ **Core idea**: Reformulate autoregressive decoding as solving system of equations, solve in parallel using Jacobi iteration.
229
+
230
+ **Mathematical formulation**:
231
+ ```
232
+ Traditional: y_t = f(x, y_1, ..., y_{t-1}) (sequential)
233
+ Jacobi: y_t^{(k+1)} = f(x, y_1^{(k)}, ..., y_{t-1}^{(k)}) (parallel)
234
+ ```
235
+
236
+ **Two branches**:
237
+
238
+ 1. **Lookahead Branch**: Generate n-grams in parallel
239
+ - Window size W: How many steps to look ahead
240
+ - N-gram size N: How many past tokens to use
241
+
242
+ 2. **Verification Branch**: Verify promising n-grams
243
+ - Match n-grams with generated tokens
244
+ - Accept if first token matches
245
+
246
+ ```python
247
+ class LookaheadDecoding:
248
+ def __init__(self, model, window_size=15, ngram_size=5):
249
+ self.model = model
250
+ self.W = window_size # Lookahead window
251
+ self.N = ngram_size # N-gram size
252
+
253
+ def generate_step(self, tokens):
254
+ # Lookahead branch: Generate W × N candidates
255
+ candidates = {}
256
+ for w in range(1, self.W + 1):
257
+ for n in range(1, self.N + 1):
258
+ # Generate n-gram starting at position w
259
+ ngram = self.generate_ngram(tokens, start=w, length=n)
260
+ candidates[(w, n)] = ngram
261
+
262
+ # Verification branch: Find matching n-grams
263
+ verified = []
264
+ for ngram in candidates.values():
265
+ if ngram[0] == tokens[-1]: # First token matches last input
266
+ if self.verify(tokens, ngram):
267
+ verified.append(ngram)
268
+
269
+ # Accept longest verified n-gram
270
+ return max(verified, key=len) if verified else [self.model.generate_next(tokens)]
271
+ ```
272
+
273
+ **Performance**:
274
+ - Speedup: 1.5-2.3× (up to 3.6× for code generation)
275
+ - No draft model or training needed
276
+ - Works out-of-the-box with any model
277
+
278
+ ## Method Comparison
279
+
280
+ | Method | Speedup | Training Needed | Draft Model | Quality Loss |
281
+ |--------|---------|-----------------|-------------|--------------|
282
+ | **Draft Model Speculative** | 1.5-2× | No | Yes (external) | None |
283
+ | **Medusa** | 2-3.6× | Minimal (heads only) | No (built-in heads) | None |
284
+ | **Lookahead** | 1.5-2.3× | None | No | None |
285
+ | **Naive Batching** | 1.2-1.5× | No | No | None |
286
+
287
+ ## Advanced Patterns
288
+
289
+ ### Training Medusa Heads
290
+
291
+ ```python
292
+ from medusa.model.medusa_model import MedusaModel
293
+ from medusa.model.kv_cache import initialize_past_key_values
294
+ import torch.nn as nn
295
+
296
+ # 1. Load base model
297
+ base_model = AutoModelForCausalLM.from_pretrained(
298
+ "lmsys/vicuna-7b-v1.3",
299
+ torch_dtype=torch.float16
300
+ )
301
+
302
+ # 2. Add Medusa heads
303
+ num_heads = 4
304
+ medusa_heads = nn.ModuleList([
305
+ nn.Linear(base_model.config.hidden_size, base_model.config.vocab_size, bias=False)
306
+ for _ in range(num_heads)
307
+ ])
308
+
309
+ # 3. Training loop (freeze base model for Medusa-1)
310
+ for param in base_model.parameters():
311
+ param.requires_grad = False # Freeze base
312
+
313
+ optimizer = torch.optim.Adam(medusa_heads.parameters(), lr=1e-3)
314
+
315
+ for batch in dataloader:
316
+ # Forward pass
317
+ hidden_states = base_model(**batch, output_hidden_states=True).hidden_states[-1]
318
+
319
+ # Predict future tokens with each head
320
+ loss = 0
321
+ for i, head in enumerate(medusa_heads):
322
+ logits = head(hidden_states)
323
+ # Target: tokens shifted by (i+1) positions
324
+ target = batch['input_ids'][:, i+1:]
325
+ loss += F.cross_entropy(logits[:, :-i-1], target)
326
+
327
+ # Backward
328
+ optimizer.zero_grad()
329
+ loss.backward()
330
+ optimizer.step()
331
+ ```
332
+
333
+ ### Hybrid: Speculative + Medusa
334
+
335
+ ```python
336
+ # Use Medusa as draft model for speculative decoding
337
+ draft_medusa = MedusaModel.from_pretrained("medusa-vicuna-7b")
338
+ target_model = AutoModelForCausalLM.from_pretrained("vicuna-33b")
339
+
340
+ # Draft generates multiple candidates with Medusa
341
+ draft_tokens = draft_medusa.medusa_generate(prompt, max_new_tokens=5)
342
+
343
+ # Target verifies in single forward pass
344
+ outputs = target_model.generate(
345
+ prompt,
346
+ assistant_model=draft_medusa, # Use Medusa as draft
347
+ max_new_tokens=256
348
+ )
349
+
350
+ # Combines benefits: Medusa speed + large model quality
351
+ ```
352
+
353
+ ### Optimal Draft Model Selection
354
+
355
+ ```python
356
+ def select_draft_model(target_model_size, target):
357
+ """Select optimal draft model for speculative decoding."""
358
+ # Rule: Draft should be 5-10× smaller
359
+ if target_model_size == "70B":
360
+ return "7B" # 10× smaller
361
+ elif target_model_size == "33B":
362
+ return "7B" # 5× smaller
363
+ elif target_model_size == "13B":
364
+ return "1B" # 13× smaller
365
+ else:
366
+ return None # Target too small, use Medusa/Lookahead instead
367
+
368
+ # Example
369
+ draft = select_draft_model("70B", target_model)
370
+ # Returns "7B" → Use Llama-2-7b as draft for Llama-2-70b
371
+ ```
372
+
373
+ ## Best Practices
374
+
375
+ ### 1. Choose the Right Method
376
+
377
+ ```python
378
+ # New deployment → Medusa (best overall speedup, no draft model)
379
+ if deploying_new_model:
380
+ use_method = "Medusa"
381
+
382
+ # Existing deployment with small model available → Draft speculative
383
+ elif have_small_version_of_model:
384
+ use_method = "Draft Model Speculative"
385
+
386
+ # Want zero training/setup → Lookahead
387
+ elif want_plug_and_play:
388
+ use_method = "Lookahead Decoding"
389
+ ```
390
+
391
+ ### 2. Hyperparameter Tuning
392
+
393
+ **Draft Model Speculative**:
394
+ ```python
395
+ # K = number of speculative tokens
396
+ K = 4 # Good default
397
+ K = 2 # Conservative (higher acceptance)
398
+ K = 8 # Aggressive (lower acceptance, but more when accepted)
399
+
400
+ # Rule: Larger K → more speedup IF draft model is good
401
+ ```
402
+
403
+ **Medusa**:
404
+ ```python
405
+ # Posterior threshold (acceptance confidence)
406
+ posterior_threshold = 0.09 # Standard (from paper)
407
+ posterior_threshold = 0.05 # More conservative (slower, higher quality)
408
+ posterior_threshold = 0.15 # More aggressive (faster, may degrade quality)
409
+
410
+ # Tree depth (how many steps ahead)
411
+ medusa_choices = [[0], [0, 0], [0, 1], [0, 0, 0]] # Depth 3 (standard)
412
+ ```
413
+
414
+ **Lookahead**:
415
+ ```python
416
+ # Window size W (lookahead distance)
417
+ # N-gram size N (context for generation)
418
+
419
+ # 7B model (more resources)
420
+ W, N = 15, 5
421
+
422
+ # 13B model (moderate)
423
+ W, N = 10, 5
424
+
425
+ # 33B+ model (limited resources)
426
+ W, N = 7, 5
427
+ ```
428
+
429
+ ### 3. Production Deployment
430
+
431
+ ```python
432
+ # vLLM with speculative decoding
433
+ from vllm import LLM, SamplingParams
434
+
435
+ # Initialize with draft model
436
+ llm = LLM(
437
+ model="meta-llama/Llama-2-70b-hf",
438
+ speculative_model="meta-llama/Llama-2-7b-hf", # Draft model
439
+ num_speculative_tokens=5,
440
+ use_v2_block_manager=True,
441
+ )
442
+
443
+ # Generate
444
+ prompts = ["Tell me about AI:", "Explain quantum physics:"]
445
+ sampling_params = SamplingParams(temperature=0.7, max_tokens=256)
446
+
447
+ outputs = llm.generate(prompts, sampling_params)
448
+ for output in outputs:
449
+ print(output.outputs[0].text)
450
+ ```
451
+
452
+ ## Resources
453
+
454
+ - **Medusa Paper**: https://arxiv.org/abs/2401.10774
455
+ - **Medusa GitHub**: https://github.com/FasterDecoding/Medusa
456
+ - **Lookahead Decoding (ICML 2024)**: https://lmsys.org/blog/2023-11-21-lookahead-decoding/
457
+ - **Lookahead GitHub**: https://github.com/hao-ai-lab/LookaheadDecoding
458
+ - **Speculative Decoding Survey (ACL 2024)**: https://aclanthology.org/2024.findings-acl.456.pdf
459
+ - **Comprehensive Survey**: https://arxiv.org/abs/2401.07851
460
+
461
+ ## See Also
462
+
463
+ - `references/draft_model.md` - Draft model selection and training
464
+ - `references/medusa.md` - Medusa architecture and training
465
+ - `references/lookahead.md` - Lookahead decoding implementation details
466
+
467
+