@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,494 @@
1
+ ---
2
+ name: nemo-evaluator-sdk
3
+ description: Evaluates LLMs across 100+ benchmarks from 18+ harnesses (MMLU, HumanEval, GSM8K, safety, VLM) with multi-backend execution. Use when needing scalable evaluation on local Docker, Slurm HPC, or cloud platforms. NVIDIA's enterprise-grade platform with container-first architecture for reproducible benchmarking.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Evaluation, NeMo, NVIDIA, Benchmarking, MMLU, HumanEval, Multi-Backend, Slurm, Docker, Reproducible, Enterprise]
8
+ dependencies: [nemo-evaluator-launcher>=0.1.25, docker]
9
+ ---
10
+
11
+ # NeMo Evaluator SDK - Enterprise LLM Benchmarking
12
+
13
+ ## Quick Start
14
+
15
+ NeMo Evaluator SDK evaluates LLMs across 100+ benchmarks from 18+ harnesses using containerized, reproducible evaluation with multi-backend execution (local Docker, Slurm HPC, Lepton cloud).
16
+
17
+ **Installation**:
18
+ ```bash
19
+ pip install nemo-evaluator-launcher
20
+ ```
21
+
22
+ **Set API key and run evaluation**:
23
+ ```bash
24
+ export NGC_API_KEY=nvapi-your-key-here
25
+
26
+ # Create minimal config
27
+ cat > config.yaml << 'EOF'
28
+ defaults:
29
+ - execution: local
30
+ - deployment: none
31
+ - _self_
32
+
33
+ execution:
34
+ output_dir: ./results
35
+
36
+ target:
37
+ api_endpoint:
38
+ model_id: meta/llama-3.1-8b-instruct
39
+ url: https://integrate.api.nvidia.com/v1/chat/completions
40
+ api_key_name: NGC_API_KEY
41
+
42
+ evaluation:
43
+ tasks:
44
+ - name: ifeval
45
+ EOF
46
+
47
+ # Run evaluation
48
+ nemo-evaluator-launcher run --config-dir . --config-name config
49
+ ```
50
+
51
+ **View available tasks**:
52
+ ```bash
53
+ nemo-evaluator-launcher ls tasks
54
+ ```
55
+
56
+ ## Common Workflows
57
+
58
+ ### Workflow 1: Evaluate Model on Standard Benchmarks
59
+
60
+ Run core academic benchmarks (MMLU, GSM8K, IFEval) on any OpenAI-compatible endpoint.
61
+
62
+ **Checklist**:
63
+ ```
64
+ Standard Evaluation:
65
+ - [ ] Step 1: Configure API endpoint
66
+ - [ ] Step 2: Select benchmarks
67
+ - [ ] Step 3: Run evaluation
68
+ - [ ] Step 4: Check results
69
+ ```
70
+
71
+ **Step 1: Configure API endpoint**
72
+
73
+ ```yaml
74
+ # config.yaml
75
+ defaults:
76
+ - execution: local
77
+ - deployment: none
78
+ - _self_
79
+
80
+ execution:
81
+ output_dir: ./results
82
+
83
+ target:
84
+ api_endpoint:
85
+ model_id: meta/llama-3.1-8b-instruct
86
+ url: https://integrate.api.nvidia.com/v1/chat/completions
87
+ api_key_name: NGC_API_KEY
88
+ ```
89
+
90
+ For self-hosted endpoints (vLLM, TRT-LLM):
91
+ ```yaml
92
+ target:
93
+ api_endpoint:
94
+ model_id: my-model
95
+ url: http://localhost:8000/v1/chat/completions
96
+ api_key_name: "" # No key needed for local
97
+ ```
98
+
99
+ **Step 2: Select benchmarks**
100
+
101
+ Add tasks to your config:
102
+ ```yaml
103
+ evaluation:
104
+ tasks:
105
+ - name: ifeval # Instruction following
106
+ - name: gpqa_diamond # Graduate-level QA
107
+ env_vars:
108
+ HF_TOKEN: HF_TOKEN # Some tasks need HF token
109
+ - name: gsm8k_cot_instruct # Math reasoning
110
+ - name: humaneval # Code generation
111
+ ```
112
+
113
+ **Step 3: Run evaluation**
114
+
115
+ ```bash
116
+ # Run with config file
117
+ nemo-evaluator-launcher run \
118
+ --config-dir . \
119
+ --config-name config
120
+
121
+ # Override output directory
122
+ nemo-evaluator-launcher run \
123
+ --config-dir . \
124
+ --config-name config \
125
+ -o execution.output_dir=./my_results
126
+
127
+ # Limit samples for quick testing
128
+ nemo-evaluator-launcher run \
129
+ --config-dir . \
130
+ --config-name config \
131
+ -o +evaluation.nemo_evaluator_config.config.params.limit_samples=10
132
+ ```
133
+
134
+ **Step 4: Check results**
135
+
136
+ ```bash
137
+ # Check job status
138
+ nemo-evaluator-launcher status <invocation_id>
139
+
140
+ # List all runs
141
+ nemo-evaluator-launcher ls runs
142
+
143
+ # View results
144
+ cat results/<invocation_id>/<task>/artifacts/results.yml
145
+ ```
146
+
147
+ ### Workflow 2: Run Evaluation on Slurm HPC Cluster
148
+
149
+ Execute large-scale evaluation on HPC infrastructure.
150
+
151
+ **Checklist**:
152
+ ```
153
+ Slurm Evaluation:
154
+ - [ ] Step 1: Configure Slurm settings
155
+ - [ ] Step 2: Set up model deployment
156
+ - [ ] Step 3: Launch evaluation
157
+ - [ ] Step 4: Monitor job status
158
+ ```
159
+
160
+ **Step 1: Configure Slurm settings**
161
+
162
+ ```yaml
163
+ # slurm_config.yaml
164
+ defaults:
165
+ - execution: slurm
166
+ - deployment: vllm
167
+ - _self_
168
+
169
+ execution:
170
+ hostname: cluster.example.com
171
+ account: my_slurm_account
172
+ partition: gpu
173
+ output_dir: /shared/results
174
+ walltime: "04:00:00"
175
+ nodes: 1
176
+ gpus_per_node: 8
177
+ ```
178
+
179
+ **Step 2: Set up model deployment**
180
+
181
+ ```yaml
182
+ deployment:
183
+ checkpoint_path: /shared/models/llama-3.1-8b
184
+ tensor_parallel_size: 2
185
+ data_parallel_size: 4
186
+ max_model_len: 4096
187
+
188
+ target:
189
+ api_endpoint:
190
+ model_id: llama-3.1-8b
191
+ # URL auto-generated by deployment
192
+ ```
193
+
194
+ **Step 3: Launch evaluation**
195
+
196
+ ```bash
197
+ nemo-evaluator-launcher run \
198
+ --config-dir . \
199
+ --config-name slurm_config
200
+ ```
201
+
202
+ **Step 4: Monitor job status**
203
+
204
+ ```bash
205
+ # Check status (queries sacct)
206
+ nemo-evaluator-launcher status <invocation_id>
207
+
208
+ # View detailed info
209
+ nemo-evaluator-launcher info <invocation_id>
210
+
211
+ # Kill if needed
212
+ nemo-evaluator-launcher kill <invocation_id>
213
+ ```
214
+
215
+ ### Workflow 3: Compare Multiple Models
216
+
217
+ Benchmark multiple models on the same tasks for comparison.
218
+
219
+ **Checklist**:
220
+ ```
221
+ Model Comparison:
222
+ - [ ] Step 1: Create base config
223
+ - [ ] Step 2: Run evaluations with overrides
224
+ - [ ] Step 3: Export and compare results
225
+ ```
226
+
227
+ **Step 1: Create base config**
228
+
229
+ ```yaml
230
+ # base_eval.yaml
231
+ defaults:
232
+ - execution: local
233
+ - deployment: none
234
+ - _self_
235
+
236
+ execution:
237
+ output_dir: ./comparison_results
238
+
239
+ evaluation:
240
+ nemo_evaluator_config:
241
+ config:
242
+ params:
243
+ temperature: 0.01
244
+ parallelism: 4
245
+ tasks:
246
+ - name: mmlu_pro
247
+ - name: gsm8k_cot_instruct
248
+ - name: ifeval
249
+ ```
250
+
251
+ **Step 2: Run evaluations with model overrides**
252
+
253
+ ```bash
254
+ # Evaluate Llama 3.1 8B
255
+ nemo-evaluator-launcher run \
256
+ --config-dir . \
257
+ --config-name base_eval \
258
+ -o target.api_endpoint.model_id=meta/llama-3.1-8b-instruct \
259
+ -o target.api_endpoint.url=https://integrate.api.nvidia.com/v1/chat/completions
260
+
261
+ # Evaluate Mistral 7B
262
+ nemo-evaluator-launcher run \
263
+ --config-dir . \
264
+ --config-name base_eval \
265
+ -o target.api_endpoint.model_id=mistralai/mistral-7b-instruct-v0.3 \
266
+ -o target.api_endpoint.url=https://integrate.api.nvidia.com/v1/chat/completions
267
+ ```
268
+
269
+ **Step 3: Export and compare**
270
+
271
+ ```bash
272
+ # Export to MLflow
273
+ nemo-evaluator-launcher export <invocation_id_1> --dest mlflow
274
+ nemo-evaluator-launcher export <invocation_id_2> --dest mlflow
275
+
276
+ # Export to local JSON
277
+ nemo-evaluator-launcher export <invocation_id> --dest local --format json
278
+
279
+ # Export to Weights & Biases
280
+ nemo-evaluator-launcher export <invocation_id> --dest wandb
281
+ ```
282
+
283
+ ### Workflow 4: Safety and Vision-Language Evaluation
284
+
285
+ Evaluate models on safety benchmarks and VLM tasks.
286
+
287
+ **Checklist**:
288
+ ```
289
+ Safety/VLM Evaluation:
290
+ - [ ] Step 1: Configure safety tasks
291
+ - [ ] Step 2: Set up VLM tasks (if applicable)
292
+ - [ ] Step 3: Run evaluation
293
+ ```
294
+
295
+ **Step 1: Configure safety tasks**
296
+
297
+ ```yaml
298
+ evaluation:
299
+ tasks:
300
+ - name: aegis # Safety harness
301
+ - name: wildguard # Safety classification
302
+ - name: garak # Security probing
303
+ ```
304
+
305
+ **Step 2: Configure VLM tasks**
306
+
307
+ ```yaml
308
+ # For vision-language models
309
+ target:
310
+ api_endpoint:
311
+ type: vlm # Vision-language endpoint
312
+ model_id: nvidia/llama-3.2-90b-vision-instruct
313
+ url: https://integrate.api.nvidia.com/v1/chat/completions
314
+
315
+ evaluation:
316
+ tasks:
317
+ - name: ocrbench # OCR evaluation
318
+ - name: chartqa # Chart understanding
319
+ - name: mmmu # Multimodal understanding
320
+ ```
321
+
322
+ ## When to Use vs Alternatives
323
+
324
+ **Use NeMo Evaluator when:**
325
+ - Need **100+ benchmarks** from 18+ harnesses in one platform
326
+ - Running evaluations on **Slurm HPC clusters** or cloud
327
+ - Requiring **reproducible** containerized evaluation
328
+ - Evaluating against **OpenAI-compatible APIs** (vLLM, TRT-LLM, NIMs)
329
+ - Need **enterprise-grade** evaluation with result export (MLflow, W&B)
330
+
331
+ **Use alternatives instead:**
332
+ - **lm-evaluation-harness**: Simpler setup for quick local evaluation
333
+ - **bigcode-evaluation-harness**: Focused only on code benchmarks
334
+ - **HELM**: Stanford's broader evaluation (fairness, efficiency)
335
+ - **Custom scripts**: Highly specialized domain evaluation
336
+
337
+ ## Supported Harnesses and Tasks
338
+
339
+ | Harness | Task Count | Categories |
340
+ |---------|-----------|------------|
341
+ | `lm-evaluation-harness` | 60+ | MMLU, GSM8K, HellaSwag, ARC |
342
+ | `simple-evals` | 20+ | GPQA, MATH, AIME |
343
+ | `bigcode-evaluation-harness` | 25+ | HumanEval, MBPP, MultiPL-E |
344
+ | `safety-harness` | 3 | Aegis, WildGuard |
345
+ | `garak` | 1 | Security probing |
346
+ | `vlmevalkit` | 6+ | OCRBench, ChartQA, MMMU |
347
+ | `bfcl` | 6 | Function calling v2/v3 |
348
+ | `mtbench` | 2 | Multi-turn conversation |
349
+ | `livecodebench` | 10+ | Live coding evaluation |
350
+ | `helm` | 15 | Medical domain |
351
+ | `nemo-skills` | 8 | Math, science, agentic |
352
+
353
+ ## Common Issues
354
+
355
+ **Issue: Container pull fails**
356
+
357
+ Ensure NGC credentials are configured:
358
+ ```bash
359
+ docker login nvcr.io -u '$oauthtoken' -p $NGC_API_KEY
360
+ ```
361
+
362
+ **Issue: Task requires environment variable**
363
+
364
+ Some tasks need HF_TOKEN or JUDGE_API_KEY:
365
+ ```yaml
366
+ evaluation:
367
+ tasks:
368
+ - name: gpqa_diamond
369
+ env_vars:
370
+ HF_TOKEN: HF_TOKEN # Maps env var name to env var
371
+ ```
372
+
373
+ **Issue: Evaluation timeout**
374
+
375
+ Increase parallelism or reduce samples:
376
+ ```bash
377
+ -o +evaluation.nemo_evaluator_config.config.params.parallelism=8
378
+ -o +evaluation.nemo_evaluator_config.config.params.limit_samples=100
379
+ ```
380
+
381
+ **Issue: Slurm job not starting**
382
+
383
+ Check Slurm account and partition:
384
+ ```yaml
385
+ execution:
386
+ account: correct_account
387
+ partition: gpu
388
+ qos: normal # May need specific QOS
389
+ ```
390
+
391
+ **Issue: Different results than expected**
392
+
393
+ Verify configuration matches reported settings:
394
+ ```yaml
395
+ evaluation:
396
+ nemo_evaluator_config:
397
+ config:
398
+ params:
399
+ temperature: 0.0 # Deterministic
400
+ num_fewshot: 5 # Check paper's fewshot count
401
+ ```
402
+
403
+ ## CLI Reference
404
+
405
+ | Command | Description |
406
+ |---------|-------------|
407
+ | `run` | Execute evaluation with config |
408
+ | `status <id>` | Check job status |
409
+ | `info <id>` | View detailed job info |
410
+ | `ls tasks` | List available benchmarks |
411
+ | `ls runs` | List all invocations |
412
+ | `export <id>` | Export results (mlflow/wandb/local) |
413
+ | `kill <id>` | Terminate running job |
414
+
415
+ ## Configuration Override Examples
416
+
417
+ ```bash
418
+ # Override model endpoint
419
+ -o target.api_endpoint.model_id=my-model
420
+ -o target.api_endpoint.url=http://localhost:8000/v1/chat/completions
421
+
422
+ # Add evaluation parameters
423
+ -o +evaluation.nemo_evaluator_config.config.params.temperature=0.5
424
+ -o +evaluation.nemo_evaluator_config.config.params.parallelism=8
425
+ -o +evaluation.nemo_evaluator_config.config.params.limit_samples=50
426
+
427
+ # Change execution settings
428
+ -o execution.output_dir=/custom/path
429
+ -o execution.mode=parallel
430
+
431
+ # Dynamically set tasks
432
+ -o 'evaluation.tasks=[{name: ifeval}, {name: gsm8k}]'
433
+ ```
434
+
435
+ ## Python API Usage
436
+
437
+ For programmatic evaluation without the CLI:
438
+
439
+ ```python
440
+ from nemo_evaluator.core.evaluate import evaluate
441
+ from nemo_evaluator.api.api_dataclasses import (
442
+ EvaluationConfig,
443
+ EvaluationTarget,
444
+ ApiEndpoint,
445
+ EndpointType,
446
+ ConfigParams
447
+ )
448
+
449
+ # Configure evaluation
450
+ eval_config = EvaluationConfig(
451
+ type="mmlu_pro",
452
+ output_dir="./results",
453
+ params=ConfigParams(
454
+ limit_samples=10,
455
+ temperature=0.0,
456
+ max_new_tokens=1024,
457
+ parallelism=4
458
+ )
459
+ )
460
+
461
+ # Configure target endpoint
462
+ target_config = EvaluationTarget(
463
+ api_endpoint=ApiEndpoint(
464
+ model_id="meta/llama-3.1-8b-instruct",
465
+ url="https://integrate.api.nvidia.com/v1/chat/completions",
466
+ type=EndpointType.CHAT,
467
+ api_key="nvapi-your-key-here"
468
+ )
469
+ )
470
+
471
+ # Run evaluation
472
+ result = evaluate(eval_cfg=eval_config, target_cfg=target_config)
473
+ ```
474
+
475
+ ## Advanced Topics
476
+
477
+ **Multi-backend execution**: See [references/execution-backends.md](references/execution-backends.md)
478
+ **Configuration deep-dive**: See [references/configuration.md](references/configuration.md)
479
+ **Adapter and interceptor system**: See [references/adapter-system.md](references/adapter-system.md)
480
+ **Custom benchmark integration**: See [references/custom-benchmarks.md](references/custom-benchmarks.md)
481
+
482
+ ## Requirements
483
+
484
+ - **Python**: 3.10-3.13
485
+ - **Docker**: Required for local execution
486
+ - **NGC API Key**: For pulling containers and using NVIDIA Build
487
+ - **HF_TOKEN**: Required for some benchmarks (GPQA, MMLU)
488
+
489
+ ## Resources
490
+
491
+ - **GitHub**: https://github.com/NVIDIA-NeMo/Evaluator
492
+ - **NGC Containers**: nvcr.io/nvidia/eval-factory/
493
+ - **NVIDIA Build**: https://build.nvidia.com (free hosted models)
494
+ - **Documentation**: https://github.com/NVIDIA-NeMo/Evaluator/tree/main/docs