@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# requires-python = ">=3.10"
|
|
3
|
+
# dependencies = [
|
|
4
|
+
# "huggingface-hub>=0.26.0",
|
|
5
|
+
# "python-dotenv>=1.2.1",
|
|
6
|
+
# ]
|
|
7
|
+
# ///
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
Submit vLLM-based evaluation jobs using the `hf jobs uv run` CLI.
|
|
11
|
+
|
|
12
|
+
This wrapper constructs the appropriate command to execute vLLM evaluation scripts
|
|
13
|
+
(lighteval or inspect-ai) on Hugging Face Jobs with GPU hardware.
|
|
14
|
+
|
|
15
|
+
Unlike run_eval_job.py (which uses inference providers/APIs), this script runs
|
|
16
|
+
models directly on the job's GPU using vLLM or HuggingFace Transformers.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
python run_vllm_eval_job.py \\
|
|
20
|
+
--model meta-llama/Llama-3.2-1B \\
|
|
21
|
+
--task mmlu \\
|
|
22
|
+
--framework lighteval \\
|
|
23
|
+
--hardware a10g-small
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import argparse
|
|
29
|
+
import os
|
|
30
|
+
import subprocess
|
|
31
|
+
import sys
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Optional
|
|
34
|
+
|
|
35
|
+
from huggingface_hub import get_token
|
|
36
|
+
from dotenv import load_dotenv
|
|
37
|
+
|
|
38
|
+
load_dotenv()
|
|
39
|
+
|
|
40
|
+
# Script paths for different evaluation frameworks
|
|
41
|
+
SCRIPT_DIR = Path(__file__).parent.resolve()
|
|
42
|
+
LIGHTEVAL_SCRIPT = SCRIPT_DIR / "lighteval_vllm_uv.py"
|
|
43
|
+
INSPECT_SCRIPT = SCRIPT_DIR / "inspect_vllm_uv.py"
|
|
44
|
+
|
|
45
|
+
# Hardware flavor recommendations for different model sizes
|
|
46
|
+
HARDWARE_RECOMMENDATIONS = {
|
|
47
|
+
"small": "t4-small", # < 3B parameters
|
|
48
|
+
"medium": "a10g-small", # 3B - 13B parameters
|
|
49
|
+
"large": "a10g-large", # 13B - 34B parameters
|
|
50
|
+
"xlarge": "a100-large", # 34B+ parameters
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def estimate_hardware(model_id: str) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Estimate appropriate hardware based on model ID naming conventions.
|
|
57
|
+
|
|
58
|
+
Returns a hardware flavor recommendation.
|
|
59
|
+
"""
|
|
60
|
+
model_lower = model_id.lower()
|
|
61
|
+
|
|
62
|
+
# Check for explicit size indicators in model name
|
|
63
|
+
if any(x in model_lower for x in ["70b", "72b", "65b"]):
|
|
64
|
+
return "a100-large"
|
|
65
|
+
elif any(x in model_lower for x in ["34b", "33b", "32b", "30b"]):
|
|
66
|
+
return "a10g-large"
|
|
67
|
+
elif any(x in model_lower for x in ["13b", "14b", "7b", "8b"]):
|
|
68
|
+
return "a10g-small"
|
|
69
|
+
elif any(x in model_lower for x in ["3b", "2b", "1b", "0.5b", "small", "mini"]):
|
|
70
|
+
return "t4-small"
|
|
71
|
+
|
|
72
|
+
# Default to medium hardware
|
|
73
|
+
return "a10g-small"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_lighteval_job(
|
|
77
|
+
model_id: str,
|
|
78
|
+
tasks: str,
|
|
79
|
+
hardware: str,
|
|
80
|
+
hf_token: Optional[str] = None,
|
|
81
|
+
max_samples: Optional[int] = None,
|
|
82
|
+
backend: str = "vllm",
|
|
83
|
+
batch_size: int = 1,
|
|
84
|
+
tensor_parallel_size: int = 1,
|
|
85
|
+
trust_remote_code: bool = False,
|
|
86
|
+
use_chat_template: bool = False,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Submit a lighteval evaluation job on HuggingFace Jobs.
|
|
90
|
+
"""
|
|
91
|
+
token = hf_token or os.getenv("HF_TOKEN") or get_token()
|
|
92
|
+
if not token:
|
|
93
|
+
raise ValueError("HF_TOKEN is required. Set it in environment or pass as argument.")
|
|
94
|
+
|
|
95
|
+
if not LIGHTEVAL_SCRIPT.exists():
|
|
96
|
+
raise FileNotFoundError(f"Script not found at {LIGHTEVAL_SCRIPT}")
|
|
97
|
+
|
|
98
|
+
print(f"Preparing lighteval job for {model_id}")
|
|
99
|
+
print(f" Tasks: {tasks}")
|
|
100
|
+
print(f" Backend: {backend}")
|
|
101
|
+
print(f" Hardware: {hardware}")
|
|
102
|
+
|
|
103
|
+
cmd = [
|
|
104
|
+
"hf", "jobs", "uv", "run",
|
|
105
|
+
str(LIGHTEVAL_SCRIPT),
|
|
106
|
+
"--flavor", hardware,
|
|
107
|
+
"--secrets", f"HF_TOKEN={token}",
|
|
108
|
+
"--",
|
|
109
|
+
"--model", model_id,
|
|
110
|
+
"--tasks", tasks,
|
|
111
|
+
"--backend", backend,
|
|
112
|
+
"--batch-size", str(batch_size),
|
|
113
|
+
"--tensor-parallel-size", str(tensor_parallel_size),
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
if max_samples:
|
|
117
|
+
cmd.extend(["--max-samples", str(max_samples)])
|
|
118
|
+
|
|
119
|
+
if trust_remote_code:
|
|
120
|
+
cmd.append("--trust-remote-code")
|
|
121
|
+
|
|
122
|
+
if use_chat_template:
|
|
123
|
+
cmd.append("--use-chat-template")
|
|
124
|
+
|
|
125
|
+
print(f"\nExecuting: {' '.join(cmd)}")
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
subprocess.run(cmd, check=True)
|
|
129
|
+
except subprocess.CalledProcessError as exc:
|
|
130
|
+
print("hf jobs command failed", file=sys.stderr)
|
|
131
|
+
raise
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def create_inspect_job(
|
|
135
|
+
model_id: str,
|
|
136
|
+
task: str,
|
|
137
|
+
hardware: str,
|
|
138
|
+
hf_token: Optional[str] = None,
|
|
139
|
+
limit: Optional[int] = None,
|
|
140
|
+
backend: str = "vllm",
|
|
141
|
+
tensor_parallel_size: int = 1,
|
|
142
|
+
trust_remote_code: bool = False,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Submit an inspect-ai evaluation job on HuggingFace Jobs.
|
|
146
|
+
"""
|
|
147
|
+
token = hf_token or os.getenv("HF_TOKEN") or get_token()
|
|
148
|
+
if not token:
|
|
149
|
+
raise ValueError("HF_TOKEN is required. Set it in environment or pass as argument.")
|
|
150
|
+
|
|
151
|
+
if not INSPECT_SCRIPT.exists():
|
|
152
|
+
raise FileNotFoundError(f"Script not found at {INSPECT_SCRIPT}")
|
|
153
|
+
|
|
154
|
+
print(f"Preparing inspect-ai job for {model_id}")
|
|
155
|
+
print(f" Task: {task}")
|
|
156
|
+
print(f" Backend: {backend}")
|
|
157
|
+
print(f" Hardware: {hardware}")
|
|
158
|
+
|
|
159
|
+
cmd = [
|
|
160
|
+
"hf", "jobs", "uv", "run",
|
|
161
|
+
str(INSPECT_SCRIPT),
|
|
162
|
+
"--flavor", hardware,
|
|
163
|
+
"--secrets", f"HF_TOKEN={token}",
|
|
164
|
+
"--",
|
|
165
|
+
"--model", model_id,
|
|
166
|
+
"--task", task,
|
|
167
|
+
"--backend", backend,
|
|
168
|
+
"--tensor-parallel-size", str(tensor_parallel_size),
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
if limit:
|
|
172
|
+
cmd.extend(["--limit", str(limit)])
|
|
173
|
+
|
|
174
|
+
if trust_remote_code:
|
|
175
|
+
cmd.append("--trust-remote-code")
|
|
176
|
+
|
|
177
|
+
print(f"\nExecuting: {' '.join(cmd)}")
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
subprocess.run(cmd, check=True)
|
|
181
|
+
except subprocess.CalledProcessError as exc:
|
|
182
|
+
print("hf jobs command failed", file=sys.stderr)
|
|
183
|
+
raise
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def main() -> None:
|
|
187
|
+
parser = argparse.ArgumentParser(
|
|
188
|
+
description="Submit vLLM-based evaluation jobs to HuggingFace Jobs",
|
|
189
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
190
|
+
epilog="""
|
|
191
|
+
Examples:
|
|
192
|
+
# Run lighteval with vLLM on A10G GPU
|
|
193
|
+
python run_vllm_eval_job.py \\
|
|
194
|
+
--model meta-llama/Llama-3.2-1B \\
|
|
195
|
+
--task "leaderboard|mmlu|5" \\
|
|
196
|
+
--framework lighteval \\
|
|
197
|
+
--hardware a10g-small
|
|
198
|
+
|
|
199
|
+
# Run inspect-ai on larger model with multi-GPU
|
|
200
|
+
python run_vllm_eval_job.py \\
|
|
201
|
+
--model meta-llama/Llama-3.2-70B \\
|
|
202
|
+
--task mmlu \\
|
|
203
|
+
--framework inspect \\
|
|
204
|
+
--hardware a100-large \\
|
|
205
|
+
--tensor-parallel-size 4
|
|
206
|
+
|
|
207
|
+
# Auto-detect hardware based on model size
|
|
208
|
+
python run_vllm_eval_job.py \\
|
|
209
|
+
--model meta-llama/Llama-3.2-1B \\
|
|
210
|
+
--task mmlu \\
|
|
211
|
+
--framework inspect
|
|
212
|
+
|
|
213
|
+
# Run with HF Transformers backend (instead of vLLM)
|
|
214
|
+
python run_vllm_eval_job.py \\
|
|
215
|
+
--model microsoft/phi-2 \\
|
|
216
|
+
--task mmlu \\
|
|
217
|
+
--framework inspect \\
|
|
218
|
+
--backend hf
|
|
219
|
+
|
|
220
|
+
Hardware flavors:
|
|
221
|
+
- t4-small: T4 GPU, good for models < 3B
|
|
222
|
+
- a10g-small: A10G GPU, good for models 3B-13B
|
|
223
|
+
- a10g-large: A10G GPU, good for models 13B-34B
|
|
224
|
+
- a100-large: A100 GPU, good for models 34B+
|
|
225
|
+
|
|
226
|
+
Frameworks:
|
|
227
|
+
- lighteval: HuggingFace's lighteval library
|
|
228
|
+
- inspect: UK AI Safety's inspect-ai library
|
|
229
|
+
|
|
230
|
+
Task formats:
|
|
231
|
+
- lighteval: "suite|task|num_fewshot" (e.g., "leaderboard|mmlu|5")
|
|
232
|
+
- inspect: task name (e.g., "mmlu", "gsm8k")
|
|
233
|
+
""",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
"--model",
|
|
238
|
+
required=True,
|
|
239
|
+
help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)",
|
|
240
|
+
)
|
|
241
|
+
parser.add_argument(
|
|
242
|
+
"--task",
|
|
243
|
+
required=True,
|
|
244
|
+
help="Evaluation task (format depends on framework)",
|
|
245
|
+
)
|
|
246
|
+
parser.add_argument(
|
|
247
|
+
"--framework",
|
|
248
|
+
choices=["lighteval", "inspect"],
|
|
249
|
+
default="lighteval",
|
|
250
|
+
help="Evaluation framework to use (default: lighteval)",
|
|
251
|
+
)
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"--hardware",
|
|
254
|
+
default=None,
|
|
255
|
+
help="Hardware flavor (auto-detected if not specified)",
|
|
256
|
+
)
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"--backend",
|
|
259
|
+
choices=["vllm", "hf", "accelerate"],
|
|
260
|
+
default="vllm",
|
|
261
|
+
help="Model backend (default: vllm)",
|
|
262
|
+
)
|
|
263
|
+
parser.add_argument(
|
|
264
|
+
"--limit",
|
|
265
|
+
"--max-samples",
|
|
266
|
+
type=int,
|
|
267
|
+
default=None,
|
|
268
|
+
dest="limit",
|
|
269
|
+
help="Limit number of samples to evaluate",
|
|
270
|
+
)
|
|
271
|
+
parser.add_argument(
|
|
272
|
+
"--batch-size",
|
|
273
|
+
type=int,
|
|
274
|
+
default=1,
|
|
275
|
+
help="Batch size for evaluation (lighteval only)",
|
|
276
|
+
)
|
|
277
|
+
parser.add_argument(
|
|
278
|
+
"--tensor-parallel-size",
|
|
279
|
+
type=int,
|
|
280
|
+
default=1,
|
|
281
|
+
help="Number of GPUs for tensor parallelism",
|
|
282
|
+
)
|
|
283
|
+
parser.add_argument(
|
|
284
|
+
"--trust-remote-code",
|
|
285
|
+
action="store_true",
|
|
286
|
+
help="Allow executing remote code from model repository",
|
|
287
|
+
)
|
|
288
|
+
parser.add_argument(
|
|
289
|
+
"--use-chat-template",
|
|
290
|
+
action="store_true",
|
|
291
|
+
help="Apply chat template (lighteval only)",
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
args = parser.parse_args()
|
|
295
|
+
|
|
296
|
+
# Auto-detect hardware if not specified
|
|
297
|
+
hardware = args.hardware or estimate_hardware(args.model)
|
|
298
|
+
print(f"Using hardware: {hardware}")
|
|
299
|
+
|
|
300
|
+
# Map backend names between frameworks
|
|
301
|
+
backend = args.backend
|
|
302
|
+
if args.framework == "lighteval" and backend == "hf":
|
|
303
|
+
backend = "accelerate" # lighteval uses "accelerate" for HF backend
|
|
304
|
+
|
|
305
|
+
if args.framework == "lighteval":
|
|
306
|
+
create_lighteval_job(
|
|
307
|
+
model_id=args.model,
|
|
308
|
+
tasks=args.task,
|
|
309
|
+
hardware=hardware,
|
|
310
|
+
max_samples=args.limit,
|
|
311
|
+
backend=backend,
|
|
312
|
+
batch_size=args.batch_size,
|
|
313
|
+
tensor_parallel_size=args.tensor_parallel_size,
|
|
314
|
+
trust_remote_code=args.trust_remote_code,
|
|
315
|
+
use_chat_template=args.use_chat_template,
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
create_inspect_job(
|
|
319
|
+
model_id=args.model,
|
|
320
|
+
task=args.task,
|
|
321
|
+
hardware=hardware,
|
|
322
|
+
limit=args.limit,
|
|
323
|
+
backend=backend if backend != "accelerate" else "hf",
|
|
324
|
+
tensor_parallel_size=args.tensor_parallel_size,
|
|
325
|
+
trust_remote_code=args.trust_remote_code,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
if __name__ == "__main__":
|
|
330
|
+
main()
|
|
331
|
+
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = [
|
|
5
|
+
# "pyyaml",
|
|
6
|
+
# ]
|
|
7
|
+
# ///
|
|
8
|
+
"""
|
|
9
|
+
Test script for evaluation extraction functionality.
|
|
10
|
+
|
|
11
|
+
This script demonstrates the table extraction capabilities without
|
|
12
|
+
requiring HF tokens or making actual API calls.
|
|
13
|
+
|
|
14
|
+
Note: This script imports from evaluation_manager.py (same directory).
|
|
15
|
+
Run from the scripts/ directory: cd scripts && uv run test_extraction.py
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import yaml
|
|
19
|
+
|
|
20
|
+
from evaluation_manager import (
|
|
21
|
+
extract_tables_from_markdown,
|
|
22
|
+
parse_markdown_table,
|
|
23
|
+
is_evaluation_table,
|
|
24
|
+
extract_metrics_from_table
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Sample README content with various table formats
|
|
28
|
+
SAMPLE_README = """
|
|
29
|
+
# My Awesome Model
|
|
30
|
+
|
|
31
|
+
## Evaluation Results
|
|
32
|
+
|
|
33
|
+
Here are the benchmark results:
|
|
34
|
+
|
|
35
|
+
| Benchmark | Score |
|
|
36
|
+
|-----------|-------|
|
|
37
|
+
| MMLU | 85.2 |
|
|
38
|
+
| HumanEval | 72.5 |
|
|
39
|
+
| GSM8K | 91.3 |
|
|
40
|
+
|
|
41
|
+
### Detailed Breakdown
|
|
42
|
+
|
|
43
|
+
| Category | MMLU | GSM8K | HumanEval |
|
|
44
|
+
|---------------|-------|-------|-----------|
|
|
45
|
+
| Performance | 85.2 | 91.3 | 72.5 |
|
|
46
|
+
|
|
47
|
+
## Other Information
|
|
48
|
+
|
|
49
|
+
This is not an evaluation table:
|
|
50
|
+
|
|
51
|
+
| Feature | Value |
|
|
52
|
+
|---------|-------|
|
|
53
|
+
| Size | 7B |
|
|
54
|
+
| Type | Chat |
|
|
55
|
+
|
|
56
|
+
## More Results
|
|
57
|
+
|
|
58
|
+
| Benchmark | Accuracy | F1 Score |
|
|
59
|
+
|---------------|----------|----------|
|
|
60
|
+
| HellaSwag | 88.9 | 0.87 |
|
|
61
|
+
| TruthfulQA | 68.7 | 0.65 |
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_table_extraction():
|
|
66
|
+
"""Test markdown table extraction."""
|
|
67
|
+
print("=" * 60)
|
|
68
|
+
print("TEST 1: Table Extraction")
|
|
69
|
+
print("=" * 60)
|
|
70
|
+
|
|
71
|
+
tables = extract_tables_from_markdown(SAMPLE_README)
|
|
72
|
+
print(f"Found {len(tables)} tables in the sample README\n")
|
|
73
|
+
|
|
74
|
+
for i, table in enumerate(tables, 1):
|
|
75
|
+
print(f"Table {i}:")
|
|
76
|
+
print(table[:100] + "..." if len(table) > 100 else table)
|
|
77
|
+
print()
|
|
78
|
+
|
|
79
|
+
return tables
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_table_parsing(tables):
|
|
83
|
+
"""Test table parsing."""
|
|
84
|
+
print("\n" + "=" * 60)
|
|
85
|
+
print("TEST 2: Table Parsing")
|
|
86
|
+
print("=" * 60)
|
|
87
|
+
|
|
88
|
+
parsed_tables = []
|
|
89
|
+
for i, table in enumerate(tables, 1):
|
|
90
|
+
print(f"\nParsing Table {i}:")
|
|
91
|
+
header, rows = parse_markdown_table(table)
|
|
92
|
+
|
|
93
|
+
print(f" Header: {header}")
|
|
94
|
+
print(f" Rows: {len(rows)}")
|
|
95
|
+
for j, row in enumerate(rows[:3], 1): # Show first 3 rows
|
|
96
|
+
print(f" Row {j}: {row}")
|
|
97
|
+
if len(rows) > 3:
|
|
98
|
+
print(f" ... and {len(rows) - 3} more rows")
|
|
99
|
+
|
|
100
|
+
parsed_tables.append((header, rows))
|
|
101
|
+
|
|
102
|
+
return parsed_tables
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_evaluation_detection(parsed_tables):
|
|
106
|
+
"""Test evaluation table detection."""
|
|
107
|
+
print("\n" + "=" * 60)
|
|
108
|
+
print("TEST 3: Evaluation Table Detection")
|
|
109
|
+
print("=" * 60)
|
|
110
|
+
|
|
111
|
+
eval_tables = []
|
|
112
|
+
for i, (header, rows) in enumerate(parsed_tables, 1):
|
|
113
|
+
is_eval = is_evaluation_table(header, rows)
|
|
114
|
+
status = "✓ IS" if is_eval else "✗ NOT"
|
|
115
|
+
print(f"\nTable {i}: {status} an evaluation table")
|
|
116
|
+
print(f" Header: {header}")
|
|
117
|
+
|
|
118
|
+
if is_eval:
|
|
119
|
+
eval_tables.append((header, rows))
|
|
120
|
+
|
|
121
|
+
print(f"\nFound {len(eval_tables)} evaluation tables")
|
|
122
|
+
return eval_tables
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_metric_extraction(eval_tables):
|
|
126
|
+
"""Test metric extraction."""
|
|
127
|
+
print("\n" + "=" * 60)
|
|
128
|
+
print("TEST 4: Metric Extraction")
|
|
129
|
+
print("=" * 60)
|
|
130
|
+
|
|
131
|
+
all_metrics = []
|
|
132
|
+
for i, (header, rows) in enumerate(eval_tables, 1):
|
|
133
|
+
print(f"\nExtracting metrics from table {i}:")
|
|
134
|
+
metrics = extract_metrics_from_table(header, rows, table_format="auto")
|
|
135
|
+
|
|
136
|
+
print(f" Extracted {len(metrics)} metrics:")
|
|
137
|
+
for metric in metrics:
|
|
138
|
+
print(f" - {metric['name']}: {metric['value']} (type: {metric['type']})")
|
|
139
|
+
|
|
140
|
+
all_metrics.extend(metrics)
|
|
141
|
+
|
|
142
|
+
return all_metrics
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_model_index_format(metrics):
|
|
146
|
+
"""Test model-index format generation."""
|
|
147
|
+
print("\n" + "=" * 60)
|
|
148
|
+
print("TEST 5: Model-Index Format")
|
|
149
|
+
print("=" * 60)
|
|
150
|
+
|
|
151
|
+
model_index = {
|
|
152
|
+
"model-index": [
|
|
153
|
+
{
|
|
154
|
+
"name": "test-model",
|
|
155
|
+
"results": [
|
|
156
|
+
{
|
|
157
|
+
"task": {"type": "text-generation"},
|
|
158
|
+
"dataset": {
|
|
159
|
+
"name": "Benchmarks",
|
|
160
|
+
"type": "benchmark"
|
|
161
|
+
},
|
|
162
|
+
"metrics": metrics,
|
|
163
|
+
"source": {
|
|
164
|
+
"name": "Model README",
|
|
165
|
+
"url": "https://huggingface.co/test/model"
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
]
|
|
169
|
+
}
|
|
170
|
+
]
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
print("\nGenerated model-index structure:")
|
|
174
|
+
print(yaml.dump(model_index, sort_keys=False, default_flow_style=False))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def main():
|
|
178
|
+
"""Run all tests."""
|
|
179
|
+
print("\n" + "=" * 60)
|
|
180
|
+
print("EVALUATION EXTRACTION TEST SUITE")
|
|
181
|
+
print("=" * 60)
|
|
182
|
+
print("\nThis test demonstrates the table extraction capabilities")
|
|
183
|
+
print("without requiring API access or tokens.\n")
|
|
184
|
+
|
|
185
|
+
# Run tests
|
|
186
|
+
tables = test_table_extraction()
|
|
187
|
+
parsed_tables = test_table_parsing(tables)
|
|
188
|
+
eval_tables = test_evaluation_detection(parsed_tables)
|
|
189
|
+
metrics = test_metric_extraction(eval_tables)
|
|
190
|
+
test_model_index_format(metrics)
|
|
191
|
+
|
|
192
|
+
# Summary
|
|
193
|
+
print("\n" + "=" * 60)
|
|
194
|
+
print("TEST SUMMARY")
|
|
195
|
+
print("=" * 60)
|
|
196
|
+
print(f"✓ Found {len(tables)} total tables")
|
|
197
|
+
print(f"✓ Identified {len(eval_tables)} evaluation tables")
|
|
198
|
+
print(f"✓ Extracted {len(metrics)} metrics")
|
|
199
|
+
print("✓ Generated model-index format successfully")
|
|
200
|
+
print("\n" + "=" * 60)
|
|
201
|
+
print("All tests completed! The extraction logic is working correctly.")
|
|
202
|
+
print("=" * 60 + "\n")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
if __name__ == "__main__":
|
|
206
|
+
main()
|