@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,462 @@
1
+ # Model Merging Evaluation
2
+
3
+ Complete guide to benchmarking and testing merged models based on research best practices.
4
+
5
+ ## Table of Contents
6
+ - Benchmark Suites
7
+ - Evaluation Metrics
8
+ - Testing Methodology
9
+ - Comparison Framework
10
+ - Quality Assurance
11
+
12
+ ## Benchmark Suites
13
+
14
+ ### Open LLM Leaderboard
15
+
16
+ **URL**: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
17
+
18
+ **Tasks** (6 benchmarks):
19
+ 1. **ARC** (AI2 Reasoning Challenge): 25-shot, science questions
20
+ 2. **HellaSwag**: 10-shot, commonsense reasoning
21
+ 3. **MMLU** (Massive Multitask Language Understanding): 5-shot, 57 subjects
22
+ 4. **TruthfulQA**: 0-shot, factual accuracy
23
+ 5. **Winogrande**: 5-shot, commonsense reasoning
24
+ 6. **GSM8K**: 5-shot, grade-school math
25
+
26
+ **Running Evaluation**:
27
+
28
+ ```python
29
+ from lm_eval import evaluator
30
+
31
+ model = "path/to/merged/model"
32
+
33
+ results = evaluator.simple_evaluate(
34
+ model="hf",
35
+ model_args=f"pretrained={model},dtype=float16",
36
+ tasks=[
37
+ "arc_challenge",
38
+ "hellaswag",
39
+ "hendrycksTest-*", # MMLU
40
+ "truthfulqa_mc",
41
+ "winogrande",
42
+ "gsm8k"
43
+ ],
44
+ num_fewshot=5,
45
+ batch_size=8
46
+ )
47
+
48
+ # Average score
49
+ avg_score = sum(results['results'].values()) / len(results['results'])
50
+ print(f"Average: {avg_score:.2f}")
51
+ ```
52
+
53
+ ### MT-Bench
54
+
55
+ **Focus**: Multi-turn conversation quality
56
+
57
+ **Installation**:
58
+
59
+ ```bash
60
+ git clone https://github.com/lm-sys/FastChat
61
+ cd FastChat
62
+ pip install -e .
63
+ ```
64
+
65
+ **Running**:
66
+
67
+ ```bash
68
+ # Generate responses
69
+ python gen_model_answer.py \
70
+ --model-path path/to/merged/model \
71
+ --model-id merged_model
72
+
73
+ # Judge with GPT-4
74
+ python gen_judgment.py \
75
+ --model-list merged_model \
76
+ --judge-model gpt-4
77
+
78
+ # View scores
79
+ python show_result.py
80
+ ```
81
+
82
+ **Metrics**:
83
+ - Turn 1 score (1-10)
84
+ - Turn 2 score (1-10)
85
+ - Average score
86
+
87
+ ### MMLU (Detailed)
88
+
89
+ **Subjects** (57 total):
90
+ - STEM: Math, Physics, Chemistry, Biology, Computer Science
91
+ - Humanities: History, Philosophy, Law
92
+ - Social Sciences: Economics, Psychology, Sociology
93
+ - Other: Professional subjects (Medicine, Accounting, etc.)
94
+
95
+ ```python
96
+ from lm_eval import evaluator
97
+
98
+ # Run all MMLU subjects
99
+ results = evaluator.simple_evaluate(
100
+ model="hf",
101
+ model_args=f"pretrained={model}",
102
+ tasks="hendrycksTest-*", # All MMLU tasks
103
+ num_fewshot=5
104
+ )
105
+
106
+ # Subject breakdown
107
+ for task, score in results['results'].items():
108
+ subject = task.replace('hendrycksTest-', '')
109
+ print(f"{subject}: {score['acc']:.2%}")
110
+ ```
111
+
112
+ ### HumanEval (Code)
113
+
114
+ **Focus**: Python code generation
115
+
116
+ ```python
117
+ from human_eval.data import write_jsonl, read_problems
118
+ from human_eval.evaluation import evaluate_functional_correctness
119
+
120
+ # Generate completions
121
+ problems = read_problems()
122
+ samples = []
123
+
124
+ for task_id, problem in problems.items():
125
+ prompt = problem['prompt']
126
+ completion = model.generate(prompt)
127
+ samples.append({
128
+ 'task_id': task_id,
129
+ 'completion': completion
130
+ })
131
+
132
+ write_jsonl("samples.jsonl", samples)
133
+
134
+ # Evaluate
135
+ results = evaluate_functional_correctness("samples.jsonl")
136
+ print(f"Pass@1: {results['pass@1']:.2%}")
137
+ ```
138
+
139
+ ## Evaluation Metrics
140
+
141
+ ### Performance Metrics
142
+
143
+ **Accuracy**: Correct predictions / total predictions
144
+ ```python
145
+ def accuracy(predictions, labels):
146
+ correct = sum(p == l for p, l in zip(predictions, labels))
147
+ return correct / len(predictions)
148
+ ```
149
+
150
+ **Perplexity**: Language modeling quality (lower is better)
151
+ ```python
152
+ import torch
153
+
154
+ def perplexity(model, text):
155
+ tokens = tokenizer(text, return_tensors='pt')
156
+ with torch.no_grad():
157
+ loss = model(**tokens).loss
158
+ return torch.exp(loss).item()
159
+ ```
160
+
161
+ **BLEU Score**: Translation/generation quality
162
+ ```python
163
+ from nltk.translate.bleu_score import sentence_bleu
164
+
165
+ reference = [["the", "cat", "sat", "on", "the", "mat"]]
166
+ candidate = ["the", "cat", "is", "on", "the", "mat"]
167
+
168
+ score = sentence_bleu(reference, candidate)
169
+ ```
170
+
171
+ ### Capability Retention
172
+
173
+ **Test**: Does merged model retain parent capabilities?
174
+
175
+ ```python
176
+ def test_capability_retention(merged_model, parent_models, test_suite):
177
+ """Check if merged model maintains parent capabilities."""
178
+ results = {}
179
+
180
+ # Baseline: Test parent models
181
+ for i, parent in enumerate(parent_models):
182
+ parent_score = evaluate(parent, test_suite)
183
+ results[f'parent_{i}'] = parent_score
184
+
185
+ # Test merged model
186
+ merged_score = evaluate(merged_model, test_suite)
187
+ results['merged'] = merged_score
188
+
189
+ # Retention percentage
190
+ avg_parent_score = sum(s for k, s in results.items() if k.startswith('parent')) / len(parent_models)
191
+ retention = merged_score / avg_parent_score
192
+
193
+ print(f"Capability Retention: {retention:.1%}")
194
+ return retention >= 0.95 # 95% retention threshold
195
+ ```
196
+
197
+ ### Conflict Detection
198
+
199
+ **Test**: Does model show conflicting behaviors?
200
+
201
+ ```python
202
+ def test_conflicts(model, test_pairs):
203
+ """Test for contradictory outputs."""
204
+ conflicts = []
205
+
206
+ for question_a, question_b, expected_consistency in test_pairs:
207
+ answer_a = model.generate(question_a)
208
+ answer_b = model.generate(question_b)
209
+
210
+ # Check consistency
211
+ is_consistent = check_semantic_similarity(answer_a, answer_b)
212
+
213
+ if is_consistent != expected_consistency:
214
+ conflicts.append((question_a, question_b, answer_a, answer_b))
215
+
216
+ conflict_rate = len(conflicts) / len(test_pairs)
217
+ print(f"Conflict Rate: {conflict_rate:.1%}")
218
+
219
+ return conflict_rate < 0.05 # <5% conflicts acceptable
220
+ ```
221
+
222
+ ## Testing Methodology
223
+
224
+ ### Pre-Merge Testing
225
+
226
+ **Before merging**, establish baselines:
227
+
228
+ ```python
229
+ # Test parent models
230
+ parent_1_scores = evaluate(parent_1, benchmark_suite)
231
+ parent_2_scores = evaluate(parent_2, benchmark_suite)
232
+
233
+ # Expected range for merged model
234
+ min_expected = min(parent_1_scores, parent_2_scores)
235
+ max_expected = max(parent_1_scores, parent_2_scores)
236
+
237
+ print(f"Expected merged score: {min_expected:.2f} - {max_expected:.2f}")
238
+ ```
239
+
240
+ ### Post-Merge Testing
241
+
242
+ **Comprehensive evaluation**:
243
+
244
+ ```python
245
+ def comprehensive_eval(merged_model):
246
+ """Full evaluation suite."""
247
+ results = {}
248
+
249
+ # 1. General capabilities
250
+ results['open_llm'] = evaluate_open_llm(merged_model)
251
+
252
+ # 2. Conversation
253
+ results['mt_bench'] = evaluate_mt_bench(merged_model)
254
+
255
+ # 3. Domain-specific
256
+ results['math'] = evaluate_math(merged_model) # GSM8K, MATH
257
+ results['code'] = evaluate_code(merged_model) # HumanEval
258
+ results['reasoning'] = evaluate_reasoning(merged_model) # ARC, HellaSwag
259
+
260
+ # 4. Safety
261
+ results['safety'] = evaluate_safety(merged_model) # TruthfulQA
262
+
263
+ return results
264
+ ```
265
+
266
+ ### A/B Testing
267
+
268
+ **Compare merged model vs parents**:
269
+
270
+ ```python
271
+ def ab_test(model_a, model_b, test_prompts, n_users=100):
272
+ """User preference testing."""
273
+ preferences = {'a': 0, 'b': 0, 'tie': 0}
274
+
275
+ for prompt in test_prompts:
276
+ response_a = model_a.generate(prompt)
277
+ response_b = model_b.generate(prompt)
278
+
279
+ # Simulated user preference (or use GPT-4 as judge)
280
+ preference = judge_responses(prompt, response_a, response_b)
281
+ preferences[preference] += 1
282
+
283
+ a_win_rate = preferences['a'] / (preferences['a'] + preferences['b'] + preferences['tie'])
284
+
285
+ print(f"Model A Win Rate: {a_win_rate:.1%}")
286
+ print(f"Tie Rate: {preferences['tie'] / len(test_prompts):.1%}")
287
+
288
+ return a_win_rate
289
+ ```
290
+
291
+ ## Comparison Framework
292
+
293
+ ### Score Comparison Table
294
+
295
+ ```python
296
+ import pandas as pd
297
+
298
+ def compare_models(models, benchmarks):
299
+ """Create comparison table."""
300
+ results = {}
301
+
302
+ for model_name, model_path in models.items():
303
+ results[model_name] = {}
304
+
305
+ for benchmark_name, benchmark_fn in benchmarks.items():
306
+ score = benchmark_fn(model_path)
307
+ results[model_name][benchmark_name] = score
308
+
309
+ # Create DataFrame
310
+ df = pd.DataFrame(results).T
311
+
312
+ # Add average column
313
+ df['Average'] = df.mean(axis=1)
314
+
315
+ # Highlight best
316
+ print(df.to_markdown())
317
+
318
+ return df
319
+
320
+ # Usage
321
+ models = {
322
+ 'Parent 1': 'path/to/parent1',
323
+ 'Parent 2': 'path/to/parent2',
324
+ 'Merged (SLERP t=0.5)': 'path/to/merged_0.5',
325
+ 'Merged (TIES)': 'path/to/merged_ties'
326
+ }
327
+
328
+ benchmarks = {
329
+ 'MMLU': evaluate_mmlu,
330
+ 'ARC': evaluate_arc,
331
+ 'GSM8K': evaluate_gsm8k
332
+ }
333
+
334
+ df = compare_models(models, benchmarks)
335
+ ```
336
+
337
+ ### Statistical Significance
338
+
339
+ ```python
340
+ from scipy import stats
341
+
342
+ def is_improvement_significant(scores_a, scores_b, alpha=0.05):
343
+ """Test if improvement is statistically significant."""
344
+ # Paired t-test
345
+ t_stat, p_value = stats.ttest_rel(scores_a, scores_b)
346
+
347
+ is_significant = p_value < alpha
348
+ improvement = (sum(scores_b) - sum(scores_a)) / len(scores_a)
349
+
350
+ print(f"Mean improvement: {improvement:.2f}")
351
+ print(f"P-value: {p_value:.4f}")
352
+ print(f"Significant: {is_significant}")
353
+
354
+ return is_significant
355
+ ```
356
+
357
+ ## Quality Assurance
358
+
359
+ ### Regression Testing
360
+
361
+ **Ensure no capability loss**:
362
+
363
+ ```python
364
+ def regression_test(merged_model, parent_models, critical_tests):
365
+ """Check for performance regressions."""
366
+ regressions = []
367
+
368
+ for test_name, test_fn in critical_tests.items():
369
+ # Parent scores
370
+ parent_scores = [test_fn(p) for p in parent_models]
371
+ min_parent_score = min(parent_scores)
372
+
373
+ # Merged score
374
+ merged_score = test_fn(merged_model)
375
+
376
+ # Regression if merged < min parent
377
+ if merged_score < min_parent_score * 0.95: # 5% tolerance
378
+ regressions.append({
379
+ 'test': test_name,
380
+ 'parents': parent_scores,
381
+ 'merged': merged_score,
382
+ 'delta': merged_score - min_parent_score
383
+ })
384
+
385
+ if regressions:
386
+ print(f"⚠️ {len(regressions)} regressions detected:")
387
+ for r in regressions:
388
+ print(f" - {r['test']}: {r['delta']:.2%} drop")
389
+
390
+ return len(regressions) == 0
391
+ ```
392
+
393
+ ### Sanity Checks
394
+
395
+ ```python
396
+ def sanity_checks(model):
397
+ """Basic functionality tests."""
398
+ tests = {
399
+ 'generates': lambda: model.generate("Hello") != "",
400
+ 'coherent': lambda: len(model.generate("The capital of France is")) > 5,
401
+ 'follows_instruction': lambda: "paris" in model.generate("What is the capital of France?").lower(),
402
+ 'no_repetition': lambda: not has_repetition(model.generate("Tell me about AI", max_length=100))
403
+ }
404
+
405
+ results = {name: test() for name, test in tests.items()}
406
+
407
+ passed = sum(results.values())
408
+ total = len(results)
409
+
410
+ print(f"Sanity Checks: {passed}/{total} passed")
411
+
412
+ for name, result in results.items():
413
+ status = "✓" if result else "✗"
414
+ print(f" {status} {name}")
415
+
416
+ return passed == total
417
+ ```
418
+
419
+ ### Deployment Checklist
420
+
421
+ Before deploying merged model:
422
+
423
+ - [ ] Open LLM Leaderboard score >= min(parent scores)
424
+ - [ ] MT-Bench score >= avg(parent scores)
425
+ - [ ] Domain-specific benchmarks pass
426
+ - [ ] No regressions in critical tests
427
+ - [ ] Sanity checks all pass
428
+ - [ ] A/B test win rate >= 45%
429
+ - [ ] Safety checks pass (TruthfulQA)
430
+ - [ ] Manual testing with diverse prompts
431
+ - [ ] Model size acceptable for deployment
432
+ - [ ] Inference speed acceptable
433
+
434
+ ## Benchmark Interpretation
435
+
436
+ ### Open LLM Leaderboard Ranges
437
+
438
+ | Score | Quality |
439
+ |-------|---------|
440
+ | <60 | Poor - likely broken |
441
+ | 60-65 | Below average |
442
+ | 65-70 | Average |
443
+ | 70-75 | Good |
444
+ | 75-80 | Excellent |
445
+ | >80 | State-of-art |
446
+
447
+ ### MT-Bench Ranges
448
+
449
+ | Score | Quality |
450
+ |-------|---------|
451
+ | <6.0 | Poor conversation |
452
+ | 6.0-7.0 | Acceptable |
453
+ | 7.0-8.0 | Good |
454
+ | 8.0-9.0 | Excellent |
455
+ | >9.0 | Near human-level |
456
+
457
+ ## Resources
458
+
459
+ - **lm-evaluation-harness**: https://github.com/EleutherAI/lm-evaluation-harness
460
+ - **MT-Bench**: https://github.com/lm-sys/FastChat
461
+ - **HumanEval**: https://github.com/openai/human-eval
462
+ - **Open LLM Leaderboard**: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard