@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,382 @@
1
+ # Usage Examples
2
+
3
+ This document provides practical examples for both methods of adding evaluations to HuggingFace model cards.
4
+
5
+ ## Table of Contents
6
+ 1. [Setup](#setup)
7
+ 2. [Method 1: Extract from README](#method-1-extract-from-readme)
8
+ 3. [Method 2: Import from Artificial Analysis](#method-2-import-from-artificial-analysis)
9
+ 4. [Standalone vs Integrated](#standalone-vs-integrated)
10
+ 5. [Common Workflows](#common-workflows)
11
+
12
+ ## Setup
13
+
14
+ ### Initial Configuration
15
+
16
+ ```bash
17
+ # Navigate to skill directory
18
+ cd hf_evaluation_skill
19
+
20
+ # Install dependencies
21
+ uv add huggingface_hub python-dotenv pyyaml requests
22
+
23
+ # Configure environment variables
24
+ cp examples/.env.example .env
25
+ # Edit .env with your tokens
26
+ ```
27
+
28
+ Your `.env` file should contain:
29
+ ```env
30
+ HF_TOKEN=hf_your_write_token_here
31
+ AA_API_KEY=aa_your_api_key_here # Optional for AA imports
32
+ ```
33
+
34
+ ### Verify Installation
35
+
36
+ ```bash
37
+ cd scripts
38
+ python3 test_extraction.py
39
+ ```
40
+
41
+ ## Method 1: Extract from README
42
+
43
+ Extract evaluation tables from your model's existing README.
44
+
45
+ ### Basic Extraction
46
+
47
+ ```bash
48
+ # Preview what will be extracted (dry run)
49
+ python3 scripts/evaluation_manager.py extract-readme \
50
+ --repo-id "meta-llama/Llama-3.3-70B-Instruct" \
51
+ --dry-run
52
+ ```
53
+
54
+ ### Apply Extraction to Your Model
55
+
56
+ ```bash
57
+ # Extract and update model card directly
58
+ python3 scripts/evaluation_manager.py extract-readme \
59
+ --repo-id "your-username/your-model-7b"
60
+ ```
61
+
62
+ ### Custom Task and Dataset Names
63
+
64
+ ```bash
65
+ python3 scripts/evaluation_manager.py extract-readme \
66
+ --repo-id "your-username/your-model-7b" \
67
+ --task-type "text-generation" \
68
+ --dataset-name "Standard Benchmarks" \
69
+ --dataset-type "llm_benchmarks"
70
+ ```
71
+
72
+ ### Create Pull Request (for models you don't own)
73
+
74
+ ```bash
75
+ python3 scripts/evaluation_manager.py extract-readme \
76
+ --repo-id "organization/community-model" \
77
+ --create-pr
78
+ ```
79
+
80
+ ### Example README Format
81
+
82
+ Your model README should contain tables like:
83
+
84
+ ```markdown
85
+ ## Evaluation Results
86
+
87
+ | Benchmark | Score |
88
+ |---------------|-------|
89
+ | MMLU | 85.2 |
90
+ | HumanEval | 72.5 |
91
+ | GSM8K | 91.3 |
92
+ | HellaSwag | 88.9 |
93
+ ```
94
+
95
+ ## Method 2: Import from Artificial Analysis
96
+
97
+ Fetch benchmark scores directly from Artificial Analysis API.
98
+
99
+ ### Integrated Approach (Recommended)
100
+
101
+ ```bash
102
+ # Import scores for Claude Sonnet 4.5
103
+ python3 scripts/evaluation_manager.py import-aa \
104
+ --creator-slug "anthropic" \
105
+ --model-name "claude-sonnet-4" \
106
+ --repo-id "your-username/claude-mirror"
107
+ ```
108
+
109
+ ### With Pull Request
110
+
111
+ ```bash
112
+ # Create PR instead of direct commit
113
+ python3 scripts/evaluation_manager.py import-aa \
114
+ --creator-slug "openai" \
115
+ --model-name "gpt-4" \
116
+ --repo-id "your-username/gpt-4-mirror" \
117
+ --create-pr
118
+ ```
119
+
120
+ ### Standalone Script
121
+
122
+ For simple, one-off imports, use the standalone script:
123
+
124
+ ```bash
125
+ # Navigate to examples directory
126
+ cd examples
127
+
128
+ # Run standalone script
129
+ AA_API_KEY="your-key" HF_TOKEN="your-token" \
130
+ python3 artificial_analysis_to_hub.py \
131
+ --creator-slug "anthropic" \
132
+ --model-name "claude-sonnet-4" \
133
+ --repo-id "your-username/your-repo"
134
+ ```
135
+
136
+ ### Finding Creator Slug and Model Name
137
+
138
+ 1. Visit [Artificial Analysis](https://artificialanalysis.ai/)
139
+ 2. Navigate to the model you want to import
140
+ 3. The URL format is: `https://artificialanalysis.ai/models/{creator-slug}/{model-name}`
141
+ 4. Or check their [API documentation](https://artificialanalysis.ai/api)
142
+
143
+ Common examples:
144
+ - Anthropic: `--creator-slug "anthropic" --model-name "claude-sonnet-4"`
145
+ - OpenAI: `--creator-slug "openai" --model-name "gpt-4-turbo"`
146
+ - Meta: `--creator-slug "meta" --model-name "llama-3-70b"`
147
+
148
+ ## Standalone vs Integrated
149
+
150
+ ### Standalone Script Features
151
+ - ✓ Simple, single-purpose
152
+ - ✓ Can run via `uv run` from URL
153
+ - ✓ Minimal dependencies
154
+ - ✗ No README extraction
155
+ - ✗ No validation
156
+ - ✗ No dry-run mode
157
+
158
+ **Use when:** You only need AA imports and want a simple script.
159
+
160
+ ### Integrated Script Features
161
+ - ✓ Both README extraction AND AA import
162
+ - ✓ Validation and show commands
163
+ - ✓ Dry-run preview mode
164
+ - ✓ Better error handling
165
+ - ✓ Merge with existing evaluations
166
+ - ✓ More flexible options
167
+
168
+ **Use when:** You want full evaluation management capabilities.
169
+
170
+ ## Common Workflows
171
+
172
+ ### Workflow 1: New Model with README Tables
173
+
174
+ You've just created a model with evaluation tables in the README.
175
+
176
+ ```bash
177
+ # Step 1: Preview extraction
178
+ python3 scripts/evaluation_manager.py extract-readme \
179
+ --repo-id "your-username/new-model-7b" \
180
+ --dry-run
181
+
182
+ # Step 2: Apply if it looks good
183
+ python3 scripts/evaluation_manager.py extract-readme \
184
+ --repo-id "your-username/new-model-7b"
185
+
186
+ # Step 3: Validate
187
+ python3 scripts/evaluation_manager.py validate \
188
+ --repo-id "your-username/new-model-7b"
189
+
190
+ # Step 4: View results
191
+ python3 scripts/evaluation_manager.py show \
192
+ --repo-id "your-username/new-model-7b"
193
+ ```
194
+
195
+ ### Workflow 2: Model Benchmarked on AA
196
+
197
+ Your model appears on Artificial Analysis with fresh benchmarks.
198
+
199
+ ```bash
200
+ # Import scores and create PR for review
201
+ python3 scripts/evaluation_manager.py import-aa \
202
+ --creator-slug "your-org" \
203
+ --model-name "your-model" \
204
+ --repo-id "your-org/your-model-hf" \
205
+ --create-pr
206
+ ```
207
+
208
+ ### Workflow 3: Combine Both Methods
209
+
210
+ You have README tables AND AA scores.
211
+
212
+ ```bash
213
+ # Step 1: Extract from README
214
+ python3 scripts/evaluation_manager.py extract-readme \
215
+ --repo-id "your-username/hybrid-model"
216
+
217
+ # Step 2: Import from AA (will merge with existing)
218
+ python3 scripts/evaluation_manager.py import-aa \
219
+ --creator-slug "your-org" \
220
+ --model-name "hybrid-model" \
221
+ --repo-id "your-username/hybrid-model"
222
+
223
+ # Step 3: View combined results
224
+ python3 scripts/evaluation_manager.py show \
225
+ --repo-id "your-username/hybrid-model"
226
+ ```
227
+
228
+ ### Workflow 4: Contributing to Community Models
229
+
230
+ Help improve community models by adding missing evaluations.
231
+
232
+ ```bash
233
+ # Find a model with evaluations in README but no model-index
234
+ # Example: community/awesome-7b
235
+
236
+ # Create PR with extracted evaluations
237
+ python3 scripts/evaluation_manager.py extract-readme \
238
+ --repo-id "community/awesome-7b" \
239
+ --create-pr
240
+
241
+ # GitHub will notify the repository owner
242
+ # They can review and merge your PR
243
+ ```
244
+
245
+ ### Workflow 5: Batch Processing
246
+
247
+ Update multiple models at once.
248
+
249
+ ```bash
250
+ # Create a list of repos
251
+ cat > models.txt << EOF
252
+ your-org/model-1-7b
253
+ your-org/model-2-13b
254
+ your-org/model-3-70b
255
+ EOF
256
+
257
+ # Process each
258
+ while read repo_id; do
259
+ echo "Processing $repo_id..."
260
+ python3 scripts/evaluation_manager.py extract-readme \
261
+ --repo-id "$repo_id"
262
+ done < models.txt
263
+ ```
264
+
265
+ ### Workflow 6: Automated Updates (CI/CD)
266
+
267
+ Set up automatic evaluation updates using GitHub Actions.
268
+
269
+ ```yaml
270
+ # .github/workflows/update-evals.yml
271
+ name: Update Evaluations Weekly
272
+ on:
273
+ schedule:
274
+ - cron: '0 0 * * 0' # Every Sunday
275
+ workflow_dispatch: # Manual trigger
276
+
277
+ jobs:
278
+ update:
279
+ runs-on: ubuntu-latest
280
+ steps:
281
+ - uses: actions/checkout@v4
282
+
283
+ - name: Set up Python
284
+ uses: actions/setup-python@v4
285
+ with:
286
+ python-version: '3.13'
287
+
288
+ - name: Install dependencies
289
+ run: |
290
+ pip install huggingface-hub python-dotenv pyyaml requests
291
+
292
+ - name: Update from Artificial Analysis
293
+ env:
294
+ AA_API_KEY: ${{ secrets.AA_API_KEY }}
295
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
296
+ run: |
297
+ python scripts/evaluation_manager.py import-aa \
298
+ --creator-slug "${{ vars.AA_CREATOR_SLUG }}" \
299
+ --model-name "${{ vars.AA_MODEL_NAME }}" \
300
+ --repo-id "${{ github.repository }}" \
301
+ --create-pr
302
+ ```
303
+
304
+ ## Verification and Validation
305
+
306
+ ### Check Current Evaluations
307
+
308
+ ```bash
309
+ python3 scripts/evaluation_manager.py show \
310
+ --repo-id "your-username/your-model"
311
+ ```
312
+
313
+ ### Validate Format
314
+
315
+ ```bash
316
+ python3 scripts/evaluation_manager.py validate \
317
+ --repo-id "your-username/your-model"
318
+ ```
319
+
320
+ ### View in HuggingFace UI
321
+
322
+ After updating, visit:
323
+ ```
324
+ https://huggingface.co/your-username/your-model
325
+ ```
326
+
327
+ The evaluation widget should display your scores automatically.
328
+
329
+ ## Troubleshooting Examples
330
+
331
+ ### Problem: No tables found
332
+
333
+ ```bash
334
+ # Check what tables exist in your README
335
+ python3 scripts/evaluation_manager.py extract-readme \
336
+ --repo-id "your-username/your-model" \
337
+ --dry-run
338
+
339
+ # If no output, ensure your README has markdown tables with numeric scores
340
+ ```
341
+
342
+ ### Problem: AA model not found
343
+
344
+ ```bash
345
+ # Verify the creator and model slugs
346
+ # Check the AA website URL or API directly
347
+ curl -H "x-api-key: $AA_API_KEY" \
348
+ https://artificialanalysis.ai/api/v2/data/llms/models | jq
349
+ ```
350
+
351
+ ### Problem: Token permission error
352
+
353
+ ```bash
354
+ # Verify your token has write access
355
+ # Generate a new token at: https://huggingface.co/settings/tokens
356
+ # Ensure "Write" scope is enabled
357
+ ```
358
+
359
+ ## Tips and Best Practices
360
+
361
+ 1. **Always dry-run first**: Use `--dry-run` to preview changes
362
+ 2. **Use PRs for others' repos**: Always use `--create-pr` for repositories you don't own
363
+ 3. **Validate after updates**: Run `validate` to ensure proper formatting
364
+ 4. **Keep evaluations current**: Set up automated updates for AA scores
365
+ 5. **Document sources**: The tool automatically adds source attribution
366
+ 6. **Check the UI**: Always verify the evaluation widget displays correctly
367
+
368
+ ## Getting Help
369
+
370
+ ```bash
371
+ # General help
372
+ python3 scripts/evaluation_manager.py --help
373
+
374
+ # Command-specific help
375
+ python3 scripts/evaluation_manager.py extract-readme --help
376
+ python3 scripts/evaluation_manager.py import-aa --help
377
+ ```
378
+
379
+ For issues or questions, consult:
380
+ - `../SKILL.md` - Complete documentation
381
+ - `../README.md` - Troubleshooting guide
382
+ - `../QUICKSTART.md` - Quick start guide
@@ -0,0 +1,141 @@
1
+ # /// script
2
+ # requires-python = ">=3.13"
3
+ # dependencies = [
4
+ # "huggingface-hub>=1.1.4",
5
+ # "python-dotenv>=1.2.1",
6
+ # "pyyaml>=6.0.3",
7
+ # "requests>=2.32.5",
8
+ # ]
9
+ # ///
10
+
11
+ """
12
+ Add Artificial Analysis evaluations to a Hugging Face model card.
13
+
14
+ NOTE: This is a standalone reference script. For integrated functionality
15
+ with additional features (README extraction, validation, etc.), use:
16
+ ../scripts/evaluation_manager.py import-aa [options]
17
+
18
+ STANDALONE USAGE:
19
+ AA_API_KEY="<your-api-key>" HF_TOKEN="<your-huggingface-token>" \
20
+ python artificial_analysis_to_hub.py \
21
+ --creator-slug <artificial-analysis-creator-slug> \
22
+ --model-name <artificial-analysis-model-name> \
23
+ --repo-id <huggingface-repo-id>
24
+
25
+ INTEGRATED USAGE (Recommended):
26
+ python ../scripts/evaluation_manager.py import-aa \
27
+ --creator-slug <creator-slug> \
28
+ --model-name <model-name> \
29
+ --repo-id <repo-id> \
30
+ [--create-pr]
31
+ """
32
+
33
+ import argparse
34
+ import os
35
+
36
+ import requests
37
+ import dotenv
38
+ from huggingface_hub import ModelCard
39
+
40
+ dotenv.load_dotenv()
41
+
42
+ API_KEY = os.getenv("AA_API_KEY")
43
+ HF_TOKEN = os.getenv("HF_TOKEN")
44
+ URL = "https://artificialanalysis.ai/api/v2/data/llms/models"
45
+ HEADERS = {"x-api-key": API_KEY}
46
+
47
+ if not API_KEY:
48
+ raise ValueError("AA_API_KEY is not set")
49
+ if not HF_TOKEN:
50
+ raise ValueError("HF_TOKEN is not set")
51
+
52
+
53
+ def get_model_evaluations_data(creator_slug, model_name):
54
+ response = requests.get(URL, headers=HEADERS)
55
+ response_data = response.json()["data"]
56
+ for model in response_data:
57
+ if (
58
+ model["model_creator"]["slug"] == creator_slug
59
+ and model["slug"] == model_name
60
+ ):
61
+ return model
62
+ raise ValueError(f"Model {model_name} not found")
63
+
64
+
65
+ def aa_evaluations_to_model_index(
66
+ model,
67
+ dataset_name="Artificial Analysis Benchmarks",
68
+ dataset_type="artificial_analysis",
69
+ task_type="evaluation",
70
+ ):
71
+ if not model:
72
+ raise ValueError("Model data is required")
73
+
74
+ model_name = model.get("name", model.get("slug", "unknown-model"))
75
+ evaluations = model.get("evaluations", {})
76
+
77
+ metrics = []
78
+ for key, value in evaluations.items():
79
+ metrics.append(
80
+ {
81
+ "name": key.replace("_", " ").title(),
82
+ "type": key,
83
+ "value": value,
84
+ }
85
+ )
86
+
87
+ model_index = [
88
+ {
89
+ "name": model_name,
90
+ "results": [
91
+ {
92
+ "task": {"type": task_type},
93
+ "dataset": {"name": dataset_name, "type": dataset_type},
94
+ "metrics": metrics,
95
+ "source": {
96
+ "name": "Artificial Analysis API",
97
+ "url": "https://artificialanalysis.ai",
98
+ },
99
+ }
100
+ ],
101
+ }
102
+ ]
103
+
104
+ return model_index
105
+
106
+
107
+ def main():
108
+ parser = argparse.ArgumentParser()
109
+ parser.add_argument("--creator-slug", type=str, required=True)
110
+ parser.add_argument("--model-name", type=str, required=True)
111
+ parser.add_argument("--repo-id", type=str, required=True)
112
+ args = parser.parse_args()
113
+
114
+ aa_evaluations_data = get_model_evaluations_data(
115
+ creator_slug=args.creator_slug, model_name=args.model_name
116
+ )
117
+
118
+ model_index = aa_evaluations_to_model_index(model=aa_evaluations_data)
119
+
120
+ card = ModelCard.load(args.repo_id)
121
+ card.data["model-index"] = model_index
122
+
123
+ commit_message = (
124
+ f"Add Artificial Analysis evaluations for {args.model_name}"
125
+ )
126
+ commit_description = (
127
+ f"This commit adds the Artificial Analysis evaluations for the {args.model_name} model to this repository. "
128
+ "To see the scores, visit the [Artificial Analysis](https://artificialanalysis.ai) website."
129
+ )
130
+
131
+ card.push_to_hub(
132
+ args.repo_id,
133
+ token=HF_TOKEN,
134
+ commit_message=commit_message,
135
+ commit_description=commit_description,
136
+ create_pr=True,
137
+ )
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -0,0 +1,135 @@
1
+ # Example Evaluation Table Formats
2
+
3
+ This file shows various formats of evaluation tables that can be extracted from model README files.
4
+
5
+ ## Format 1: Benchmarks as Rows (Most Common)
6
+
7
+ ```markdown
8
+ | Benchmark | Score |
9
+ |-----------|-------|
10
+ | MMLU | 85.2 |
11
+ | HumanEval | 72.5 |
12
+ | GSM8K | 91.3 |
13
+ | HellaSwag | 88.9 |
14
+ ```
15
+
16
+ ## Format 2: Multiple Metric Columns
17
+
18
+ ```markdown
19
+ | Benchmark | Accuracy | F1 Score |
20
+ |-----------|----------|----------|
21
+ | MMLU | 85.2 | 0.84 |
22
+ | GSM8K | 91.3 | 0.91 |
23
+ | DROP | 78.5 | 0.77 |
24
+ ```
25
+
26
+ ## Format 3: Benchmarks as Columns
27
+
28
+ ```markdown
29
+ | MMLU | HumanEval | GSM8K | HellaSwag |
30
+ |------|-----------|-------|-----------|
31
+ | 85.2 | 72.5 | 91.3 | 88.9 |
32
+ ```
33
+
34
+ ## Format 4: Percentage Values
35
+
36
+ ```markdown
37
+ | Benchmark | Score |
38
+ |---------------|----------|
39
+ | MMLU | 85.2% |
40
+ | HumanEval | 72.5% |
41
+ | GSM8K | 91.3% |
42
+ | TruthfulQA | 68.7% |
43
+ ```
44
+
45
+ ## Format 5: Mixed Format with Categories
46
+
47
+ ```markdown
48
+ ### Reasoning
49
+
50
+ | Benchmark | Score |
51
+ |-----------|-------|
52
+ | MMLU | 85.2 |
53
+ | BBH | 82.4 |
54
+ | GPQA | 71.3 |
55
+
56
+ ### Coding
57
+
58
+ | Benchmark | Score |
59
+ |-----------|-------|
60
+ | HumanEval | 72.5 |
61
+ | MBPP | 78.9 |
62
+
63
+ ### Math
64
+
65
+ | Benchmark | Score |
66
+ |-----------|-------|
67
+ | GSM8K | 91.3 |
68
+ | MATH | 65.8 |
69
+ ```
70
+
71
+ ## Format 6: With Additional Columns
72
+
73
+ ```markdown
74
+ | Benchmark | Score | Rank | Notes |
75
+ |-----------|-------|------|--------------------|
76
+ | MMLU | 85.2 | #5 | 5-shot |
77
+ | HumanEval | 72.5 | #8 | pass@1 |
78
+ | GSM8K | 91.3 | #3 | 8-shot, maj@1 |
79
+ ```
80
+
81
+ ## How the Extractor Works
82
+
83
+ The script will:
84
+ 1. Find all markdown tables in the README
85
+ 2. Identify which tables contain evaluation results
86
+ 3. Parse the table structure (rows vs columns)
87
+ 4. Extract numeric values as scores
88
+ 5. Convert to model-index YAML format
89
+
90
+ ## Tips for README Authors
91
+
92
+ To ensure your evaluation tables are properly extracted:
93
+
94
+ 1. **Use clear headers**: Include "Benchmark", "Score", or similar terms
95
+ 2. **Keep it simple**: Stick to benchmark name + score columns
96
+ 3. **Use standard formats**: Follow markdown table syntax
97
+ 4. **Include numeric values**: Ensure scores are parseable numbers
98
+ 5. **Be consistent**: Use the same format across multiple tables
99
+
100
+ ## Example Complete README Section
101
+
102
+ ```markdown
103
+ # Model Card for MyModel-7B
104
+
105
+ ## Evaluation Results
106
+
107
+ Our model was evaluated on several standard benchmarks:
108
+
109
+ | Benchmark | Score |
110
+ |---------------|-------|
111
+ | MMLU | 85.2 |
112
+ | HumanEval | 72.5 |
113
+ | GSM8K | 91.3 |
114
+ | HellaSwag | 88.9 |
115
+ | ARC-Challenge | 81.7 |
116
+ | TruthfulQA | 68.7 |
117
+
118
+ ### Detailed Results
119
+
120
+ For more detailed results and methodology, see our [paper](link).
121
+ ```
122
+
123
+ ## Running the Extractor
124
+
125
+ ```bash
126
+ # Extract from this example
127
+ python scripts/evaluation_manager.py extract-readme \
128
+ --repo-id "your-username/your-model" \
129
+ --dry-run
130
+
131
+ # Apply to your model card
132
+ python scripts/evaluation_manager.py extract-readme \
133
+ --repo-id "your-username/your-model" \
134
+ --task-type "text-generation"
135
+ ```
@@ -0,0 +1,50 @@
1
+ {
2
+ "MMLU": {
3
+ "type": "mmlu",
4
+ "name": "Massive Multitask Language Understanding"
5
+ },
6
+ "HumanEval": {
7
+ "type": "humaneval",
8
+ "name": "Code Generation (HumanEval)"
9
+ },
10
+ "GSM8K": {
11
+ "type": "gsm8k",
12
+ "name": "Grade School Math"
13
+ },
14
+ "HellaSwag": {
15
+ "type": "hellaswag",
16
+ "name": "HellaSwag Common Sense"
17
+ },
18
+ "ARC-C": {
19
+ "type": "arc_challenge",
20
+ "name": "ARC Challenge"
21
+ },
22
+ "ARC-E": {
23
+ "type": "arc_easy",
24
+ "name": "ARC Easy"
25
+ },
26
+ "Winogrande": {
27
+ "type": "winogrande",
28
+ "name": "Winogrande"
29
+ },
30
+ "TruthfulQA": {
31
+ "type": "truthfulqa",
32
+ "name": "TruthfulQA"
33
+ },
34
+ "GPQA": {
35
+ "type": "gpqa",
36
+ "name": "Graduate-Level Google-Proof Q&A"
37
+ },
38
+ "DROP": {
39
+ "type": "drop",
40
+ "name": "Discrete Reasoning Over Paragraphs"
41
+ },
42
+ "BBH": {
43
+ "type": "bbh",
44
+ "name": "Big Bench Hard"
45
+ },
46
+ "MATH": {
47
+ "type": "math",
48
+ "name": "MATH Dataset"
49
+ }
50
+ }