@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# Custom Benchmark Integration
|
|
2
|
+
|
|
3
|
+
NeMo Evaluator supports adding custom benchmarks through Framework Definition Files (FDFs) and custom containers.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Custom benchmarks are added by:
|
|
8
|
+
|
|
9
|
+
1. **Framework Definition Files (FDFs)**: YAML files that define evaluation tasks, commands, and output parsing
|
|
10
|
+
2. **Custom Containers**: Package your framework with nemo-evaluator for reproducible execution
|
|
11
|
+
|
|
12
|
+
> **Note**: NeMo Evaluator does not currently support programmatic harness APIs or custom metric implementations via Python classes. Customization is done through FDFs and containers.
|
|
13
|
+
|
|
14
|
+
## Framework Definition Files (FDFs)
|
|
15
|
+
|
|
16
|
+
FDFs are the primary way to add custom evaluations. An FDF declares framework metadata, default commands, and evaluation tasks.
|
|
17
|
+
|
|
18
|
+
### FDF Structure
|
|
19
|
+
|
|
20
|
+
```yaml
|
|
21
|
+
# framework_def.yaml
|
|
22
|
+
framework:
|
|
23
|
+
name: my-custom-framework
|
|
24
|
+
package_name: my_custom_eval
|
|
25
|
+
|
|
26
|
+
defaults:
|
|
27
|
+
command: "python -m my_custom_eval.run --model-id {model_id} --task {task} --output-dir {output_dir}"
|
|
28
|
+
|
|
29
|
+
evaluations:
|
|
30
|
+
- name: custom_task_1
|
|
31
|
+
defaults:
|
|
32
|
+
temperature: 0.0
|
|
33
|
+
max_new_tokens: 512
|
|
34
|
+
extra:
|
|
35
|
+
custom_param: value
|
|
36
|
+
|
|
37
|
+
- name: custom_task_2
|
|
38
|
+
defaults:
|
|
39
|
+
temperature: 0.7
|
|
40
|
+
max_new_tokens: 1024
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Key FDF Components
|
|
44
|
+
|
|
45
|
+
**Framework section**:
|
|
46
|
+
- `name`: Human-readable name for your framework
|
|
47
|
+
- `package_name`: Python package name
|
|
48
|
+
|
|
49
|
+
**Defaults section**:
|
|
50
|
+
- `command`: The command template to execute your evaluation
|
|
51
|
+
- Placeholders: `{model_id}`, `{task}`, `{output_dir}` are substituted at runtime
|
|
52
|
+
|
|
53
|
+
**Evaluations section**:
|
|
54
|
+
- List of tasks with their default parameters
|
|
55
|
+
- Each task can override the framework defaults
|
|
56
|
+
|
|
57
|
+
### Output Parser
|
|
58
|
+
|
|
59
|
+
When creating a custom FDF, you need an output parser function that translates your framework's results into NeMo Evaluator's standard schema:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
# my_custom_eval/parser.py
|
|
63
|
+
def parse_output(output_dir: str) -> dict:
|
|
64
|
+
"""
|
|
65
|
+
Parse evaluation results from output_dir.
|
|
66
|
+
|
|
67
|
+
Returns dict with metrics in NeMo Evaluator format.
|
|
68
|
+
"""
|
|
69
|
+
# Read your framework's output files
|
|
70
|
+
results_file = Path(output_dir) / "results.json"
|
|
71
|
+
with open(results_file) as f:
|
|
72
|
+
raw_results = json.load(f)
|
|
73
|
+
|
|
74
|
+
# Transform to standard schema
|
|
75
|
+
return {
|
|
76
|
+
"metrics": {
|
|
77
|
+
"accuracy": raw_results["score"],
|
|
78
|
+
"total_samples": raw_results["num_samples"]
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Custom Container Creation
|
|
84
|
+
|
|
85
|
+
Package your custom framework as a container for reproducibility.
|
|
86
|
+
|
|
87
|
+
### Dockerfile Example
|
|
88
|
+
|
|
89
|
+
```dockerfile
|
|
90
|
+
# Dockerfile
|
|
91
|
+
FROM python:3.10-slim
|
|
92
|
+
|
|
93
|
+
# Install nemo-evaluator
|
|
94
|
+
RUN pip install nemo-evaluator
|
|
95
|
+
|
|
96
|
+
# Install your custom framework
|
|
97
|
+
COPY my_custom_eval/ /opt/my_custom_eval/
|
|
98
|
+
RUN pip install /opt/my_custom_eval/
|
|
99
|
+
|
|
100
|
+
# Copy framework definition
|
|
101
|
+
COPY framework_def.yaml /opt/framework_def.yaml
|
|
102
|
+
|
|
103
|
+
# Set working directory
|
|
104
|
+
WORKDIR /opt
|
|
105
|
+
|
|
106
|
+
ENTRYPOINT ["python", "-m", "nemo_evaluator"]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Build and Push
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
docker build -t my-registry/custom-eval:1.0 .
|
|
113
|
+
docker push my-registry/custom-eval:1.0
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Register in mapping.toml
|
|
117
|
+
|
|
118
|
+
Add your custom container to the task registry:
|
|
119
|
+
|
|
120
|
+
```toml
|
|
121
|
+
# Add to mapping.toml
|
|
122
|
+
[my-custom-framework]
|
|
123
|
+
container = "my-registry/custom-eval:1.0"
|
|
124
|
+
|
|
125
|
+
[my-custom-framework.tasks.chat.custom_task_1]
|
|
126
|
+
required_env_vars = []
|
|
127
|
+
|
|
128
|
+
[my-custom-framework.tasks.chat.custom_task_2]
|
|
129
|
+
required_env_vars = ["CUSTOM_API_KEY"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Using Custom Datasets
|
|
133
|
+
|
|
134
|
+
### Dataset Mounting
|
|
135
|
+
|
|
136
|
+
Mount proprietary datasets at runtime rather than baking them into containers:
|
|
137
|
+
|
|
138
|
+
```yaml
|
|
139
|
+
# config.yaml
|
|
140
|
+
defaults:
|
|
141
|
+
- execution: local
|
|
142
|
+
- deployment: none
|
|
143
|
+
- _self_
|
|
144
|
+
|
|
145
|
+
execution:
|
|
146
|
+
output_dir: ./results
|
|
147
|
+
|
|
148
|
+
evaluation:
|
|
149
|
+
tasks:
|
|
150
|
+
- name: custom_task_1
|
|
151
|
+
dataset_dir: /path/to/local/data
|
|
152
|
+
dataset_mount_path: /data # Optional, defaults to /datasets
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
The launcher will mount the dataset directory into the container and set `NEMO_EVALUATOR_DATASET_DIR` environment variable.
|
|
156
|
+
|
|
157
|
+
### Task-Specific Environment Variables
|
|
158
|
+
|
|
159
|
+
Pass environment variables to specific tasks:
|
|
160
|
+
|
|
161
|
+
```yaml
|
|
162
|
+
evaluation:
|
|
163
|
+
tasks:
|
|
164
|
+
- name: gpqa_diamond
|
|
165
|
+
env_vars:
|
|
166
|
+
HF_TOKEN: HF_TOKEN # Maps to $HF_TOKEN from host
|
|
167
|
+
|
|
168
|
+
- name: custom_task
|
|
169
|
+
env_vars:
|
|
170
|
+
CUSTOM_API_KEY: MY_CUSTOM_KEY
|
|
171
|
+
DATA_PATH: /data/custom.jsonl
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Parameter Overrides
|
|
175
|
+
|
|
176
|
+
Override evaluation parameters at multiple levels:
|
|
177
|
+
|
|
178
|
+
### Global Overrides
|
|
179
|
+
|
|
180
|
+
Apply to all tasks:
|
|
181
|
+
|
|
182
|
+
```yaml
|
|
183
|
+
evaluation:
|
|
184
|
+
nemo_evaluator_config:
|
|
185
|
+
config:
|
|
186
|
+
params:
|
|
187
|
+
temperature: 0.0
|
|
188
|
+
max_new_tokens: 512
|
|
189
|
+
parallelism: 4
|
|
190
|
+
request_timeout: 300
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Task-Specific Overrides
|
|
194
|
+
|
|
195
|
+
Override for individual tasks:
|
|
196
|
+
|
|
197
|
+
```yaml
|
|
198
|
+
evaluation:
|
|
199
|
+
tasks:
|
|
200
|
+
- name: humaneval
|
|
201
|
+
nemo_evaluator_config:
|
|
202
|
+
config:
|
|
203
|
+
params:
|
|
204
|
+
temperature: 0.8
|
|
205
|
+
max_new_tokens: 1024
|
|
206
|
+
n_samples: 200 # Task-specific parameter
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### CLI Overrides
|
|
210
|
+
|
|
211
|
+
Override at runtime:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
nemo-evaluator-launcher run \
|
|
215
|
+
--config-dir . \
|
|
216
|
+
--config-name config \
|
|
217
|
+
-o +evaluation.nemo_evaluator_config.config.params.limit_samples=10
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Testing Custom Benchmarks
|
|
221
|
+
|
|
222
|
+
### Dry Run
|
|
223
|
+
|
|
224
|
+
Validate configuration without execution:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
nemo-evaluator-launcher run \
|
|
228
|
+
--config-dir . \
|
|
229
|
+
--config-name custom_config \
|
|
230
|
+
--dry-run
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Limited Sample Testing
|
|
234
|
+
|
|
235
|
+
Test with a small subset first:
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
nemo-evaluator-launcher run \
|
|
239
|
+
--config-dir . \
|
|
240
|
+
--config-name custom_config \
|
|
241
|
+
-o +evaluation.nemo_evaluator_config.config.params.limit_samples=5
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Check Results
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
# View results
|
|
248
|
+
cat results/<invocation_id>/<task>/artifacts/results.json
|
|
249
|
+
|
|
250
|
+
# Check logs
|
|
251
|
+
cat results/<invocation_id>/<task>/artifacts/logs/eval.log
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Best Practices
|
|
255
|
+
|
|
256
|
+
1. **Use FDFs**: Define custom benchmarks via Framework Definition Files
|
|
257
|
+
2. **Containerize**: Package frameworks as containers for reproducibility
|
|
258
|
+
3. **Mount data**: Use volume mounts for datasets instead of baking into images
|
|
259
|
+
4. **Test incrementally**: Use `limit_samples` for quick validation
|
|
260
|
+
5. **Version containers**: Tag containers with semantic versions
|
|
261
|
+
6. **Document parameters**: Include clear documentation in your FDF
|
|
262
|
+
|
|
263
|
+
## Limitations
|
|
264
|
+
|
|
265
|
+
Currently **not supported**:
|
|
266
|
+
- Custom Python metric classes via plugin system
|
|
267
|
+
- Programmatic harness registration via Python API
|
|
268
|
+
- Runtime metric injection via configuration
|
|
269
|
+
|
|
270
|
+
Custom scoring logic must be implemented within your evaluation framework and exposed through the FDF's output parser.
|
|
271
|
+
|
|
272
|
+
## Example: Complete Custom Setup
|
|
273
|
+
|
|
274
|
+
```yaml
|
|
275
|
+
# custom_eval_config.yaml
|
|
276
|
+
defaults:
|
|
277
|
+
- execution: local
|
|
278
|
+
- deployment: none
|
|
279
|
+
- _self_
|
|
280
|
+
|
|
281
|
+
execution:
|
|
282
|
+
output_dir: ./custom_results
|
|
283
|
+
|
|
284
|
+
target:
|
|
285
|
+
api_endpoint:
|
|
286
|
+
model_id: my-model
|
|
287
|
+
url: http://localhost:8000/v1/chat/completions
|
|
288
|
+
api_key_name: ""
|
|
289
|
+
|
|
290
|
+
evaluation:
|
|
291
|
+
nemo_evaluator_config:
|
|
292
|
+
config:
|
|
293
|
+
params:
|
|
294
|
+
parallelism: 4
|
|
295
|
+
request_timeout: 300
|
|
296
|
+
|
|
297
|
+
tasks:
|
|
298
|
+
- name: custom_task_1
|
|
299
|
+
dataset_dir: /data/benchmarks
|
|
300
|
+
env_vars:
|
|
301
|
+
DATA_VERSION: v2
|
|
302
|
+
nemo_evaluator_config:
|
|
303
|
+
config:
|
|
304
|
+
params:
|
|
305
|
+
temperature: 0.0
|
|
306
|
+
max_new_tokens: 256
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Run with:
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
nemo-evaluator-launcher run \
|
|
313
|
+
--config-dir . \
|
|
314
|
+
--config-name custom_eval_config
|
|
315
|
+
```
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
# Execution Backends
|
|
2
|
+
|
|
3
|
+
NeMo Evaluator supports three execution backends: Local (Docker), Slurm (HPC), and Lepton (Cloud). Each backend implements the same interface but has different configuration requirements.
|
|
4
|
+
|
|
5
|
+
## Backend Architecture
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
9
|
+
│ nemo-evaluator-launcher │
|
|
10
|
+
│ │
|
|
11
|
+
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
|
12
|
+
│ │ LocalExecutor │ │ SlurmExecutor │ │ LeptonExecutor│ │
|
|
13
|
+
│ │ (Docker) │ │ (SSH+sbatch)│ │ (Cloud API) │ │
|
|
14
|
+
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
|
15
|
+
│ │ │ │ │
|
|
16
|
+
└───────────┼────────────────┼─────────────────┼───────────────┘
|
|
17
|
+
│ │ │
|
|
18
|
+
▼ ▼ ▼
|
|
19
|
+
┌─────────┐ ┌───────────┐ ┌────────────┐
|
|
20
|
+
│ Docker │ │ Slurm │ │ Lepton AI │
|
|
21
|
+
│ Engine │ │ Cluster │ │ Platform │
|
|
22
|
+
└─────────┘ └───────────┘ └────────────┘
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Local Executor (Docker)
|
|
26
|
+
|
|
27
|
+
The local executor runs evaluation containers on your local machine using Docker.
|
|
28
|
+
|
|
29
|
+
### Prerequisites
|
|
30
|
+
|
|
31
|
+
- Docker installed and running
|
|
32
|
+
- `docker` command available in PATH
|
|
33
|
+
- GPU drivers and nvidia-container-toolkit for GPU tasks
|
|
34
|
+
|
|
35
|
+
### Configuration
|
|
36
|
+
|
|
37
|
+
```yaml
|
|
38
|
+
defaults:
|
|
39
|
+
- execution: local
|
|
40
|
+
- deployment: none
|
|
41
|
+
- _self_
|
|
42
|
+
|
|
43
|
+
execution:
|
|
44
|
+
output_dir: ./results
|
|
45
|
+
mode: sequential # or parallel
|
|
46
|
+
|
|
47
|
+
# Docker-specific options
|
|
48
|
+
docker_args:
|
|
49
|
+
- "--gpus=all"
|
|
50
|
+
- "--shm-size=16g"
|
|
51
|
+
|
|
52
|
+
# Container resource limits
|
|
53
|
+
memory_limit: "64g"
|
|
54
|
+
cpus: 8
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### How It Works
|
|
58
|
+
|
|
59
|
+
1. Launcher reads `mapping.toml` to find container image for task
|
|
60
|
+
2. Creates run configuration and mounts volumes
|
|
61
|
+
3. Executes `docker run` via subprocess
|
|
62
|
+
4. Monitors stage files (`stage.pre-start`, `stage.running`, `stage.exit`)
|
|
63
|
+
5. Collects results from mounted output directory
|
|
64
|
+
|
|
65
|
+
### Example Usage
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Simple local evaluation
|
|
69
|
+
nemo-evaluator-launcher run \
|
|
70
|
+
--config-dir . \
|
|
71
|
+
--config-name local_config
|
|
72
|
+
|
|
73
|
+
# With GPU allocation
|
|
74
|
+
nemo-evaluator-launcher run \
|
|
75
|
+
--config-dir . \
|
|
76
|
+
--config-name local_config \
|
|
77
|
+
-o 'execution.docker_args=["--gpus=all"]'
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Status Tracking
|
|
81
|
+
|
|
82
|
+
Status is tracked via file markers in the output directory:
|
|
83
|
+
|
|
84
|
+
| File | Meaning |
|
|
85
|
+
|------|---------|
|
|
86
|
+
| `stage.pre-start` | Container starting |
|
|
87
|
+
| `stage.running` | Evaluation in progress |
|
|
88
|
+
| `stage.exit` | Evaluation complete |
|
|
89
|
+
|
|
90
|
+
## Slurm Executor
|
|
91
|
+
|
|
92
|
+
The Slurm executor submits evaluation jobs to HPC clusters via SSH.
|
|
93
|
+
|
|
94
|
+
### Prerequisites
|
|
95
|
+
|
|
96
|
+
- SSH access to cluster head node
|
|
97
|
+
- Slurm commands available (`sbatch`, `squeue`, `sacct`)
|
|
98
|
+
- NGC containers accessible from compute nodes
|
|
99
|
+
- Shared filesystem for results
|
|
100
|
+
|
|
101
|
+
### Configuration
|
|
102
|
+
|
|
103
|
+
```yaml
|
|
104
|
+
defaults:
|
|
105
|
+
- execution: slurm
|
|
106
|
+
- deployment: vllm # or sglang, nim, none
|
|
107
|
+
- _self_
|
|
108
|
+
|
|
109
|
+
execution:
|
|
110
|
+
# SSH connection settings
|
|
111
|
+
hostname: cluster.example.com
|
|
112
|
+
username: myuser # Optional, uses SSH config
|
|
113
|
+
ssh_key_path: ~/.ssh/id_rsa
|
|
114
|
+
|
|
115
|
+
# Slurm job settings
|
|
116
|
+
account: my_account
|
|
117
|
+
partition: gpu
|
|
118
|
+
qos: normal
|
|
119
|
+
nodes: 1
|
|
120
|
+
gpus_per_node: 8
|
|
121
|
+
cpus_per_task: 32
|
|
122
|
+
memory: "256G"
|
|
123
|
+
walltime: "04:00:00"
|
|
124
|
+
|
|
125
|
+
# Output settings
|
|
126
|
+
output_dir: /shared/nfs/results
|
|
127
|
+
|
|
128
|
+
# Container settings
|
|
129
|
+
container_mounts:
|
|
130
|
+
- "/shared/data:/data:ro"
|
|
131
|
+
- "/shared/models:/models:ro"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Deployment Options
|
|
135
|
+
|
|
136
|
+
When running on Slurm, you can deploy models alongside evaluation:
|
|
137
|
+
|
|
138
|
+
```yaml
|
|
139
|
+
# vLLM deployment
|
|
140
|
+
deployment:
|
|
141
|
+
type: vllm
|
|
142
|
+
checkpoint_path: /models/llama-3.1-8b
|
|
143
|
+
tensor_parallel_size: 4
|
|
144
|
+
max_model_len: 8192
|
|
145
|
+
gpu_memory_utilization: 0.9
|
|
146
|
+
|
|
147
|
+
# SGLang deployment
|
|
148
|
+
deployment:
|
|
149
|
+
type: sglang
|
|
150
|
+
checkpoint_path: /models/llama-3.1-8b
|
|
151
|
+
tensor_parallel_size: 4
|
|
152
|
+
|
|
153
|
+
# NVIDIA NIM deployment
|
|
154
|
+
deployment:
|
|
155
|
+
type: nim
|
|
156
|
+
nim_model_name: meta/llama-3.1-8b-instruct
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Job Submission Flow
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
┌─────────────────┐
|
|
163
|
+
│ Launcher CLI │
|
|
164
|
+
└────────┬────────┘
|
|
165
|
+
│ SSH
|
|
166
|
+
▼
|
|
167
|
+
┌─────────────────┐
|
|
168
|
+
│ Cluster Head │
|
|
169
|
+
│ Node │
|
|
170
|
+
└────────┬────────┘
|
|
171
|
+
│ sbatch
|
|
172
|
+
▼
|
|
173
|
+
┌─────────────────┐
|
|
174
|
+
│ Compute Node │
|
|
175
|
+
│ │
|
|
176
|
+
│ ┌─────────────┐ │
|
|
177
|
+
│ │ Deployment │ │
|
|
178
|
+
│ │ Container │ │
|
|
179
|
+
│ └─────────────┘ │
|
|
180
|
+
│ │ │
|
|
181
|
+
│ ▼ │
|
|
182
|
+
│ ┌─────────────┐ │
|
|
183
|
+
│ │ Evaluation │ │
|
|
184
|
+
│ │ Container │ │
|
|
185
|
+
│ └─────────────┘ │
|
|
186
|
+
└─────────────────┘
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Status Queries
|
|
190
|
+
|
|
191
|
+
The Slurm executor queries job status via `sacct`:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Status command checks these Slurm states
|
|
195
|
+
sacct -j <job_id> --format=JobID,State,ExitCode
|
|
196
|
+
|
|
197
|
+
# Mapped to ExecutionState:
|
|
198
|
+
# PENDING -> pending
|
|
199
|
+
# RUNNING -> running
|
|
200
|
+
# COMPLETED -> completed
|
|
201
|
+
# FAILED -> failed
|
|
202
|
+
# CANCELLED -> cancelled
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Long-Running Jobs
|
|
206
|
+
|
|
207
|
+
For long-running evaluations on Slurm, consider:
|
|
208
|
+
|
|
209
|
+
```yaml
|
|
210
|
+
execution:
|
|
211
|
+
walltime: "24:00:00" # Extended walltime
|
|
212
|
+
# Use caching to resume from interruptions
|
|
213
|
+
|
|
214
|
+
target:
|
|
215
|
+
api_endpoint:
|
|
216
|
+
adapter_config:
|
|
217
|
+
interceptors:
|
|
218
|
+
- name: caching
|
|
219
|
+
config:
|
|
220
|
+
cache_dir: "/shared/cache"
|
|
221
|
+
reuse_cached_responses: true
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
The caching interceptor helps resume interrupted evaluations by reusing previous API responses.
|
|
225
|
+
|
|
226
|
+
## Lepton Executor
|
|
227
|
+
|
|
228
|
+
The Lepton executor runs evaluations on Lepton AI's cloud platform.
|
|
229
|
+
|
|
230
|
+
### Prerequisites
|
|
231
|
+
|
|
232
|
+
- Lepton AI account
|
|
233
|
+
- `LEPTON_API_TOKEN` environment variable set
|
|
234
|
+
- `leptonai` Python package (auto-installed)
|
|
235
|
+
|
|
236
|
+
### Configuration
|
|
237
|
+
|
|
238
|
+
```yaml
|
|
239
|
+
defaults:
|
|
240
|
+
- execution: lepton
|
|
241
|
+
- deployment: none
|
|
242
|
+
- _self_
|
|
243
|
+
|
|
244
|
+
execution:
|
|
245
|
+
# Lepton job settings
|
|
246
|
+
resource_shape: gpu.a100-80g
|
|
247
|
+
num_replicas: 1
|
|
248
|
+
|
|
249
|
+
# Environment
|
|
250
|
+
env_vars:
|
|
251
|
+
NGC_API_KEY: NGC_API_KEY
|
|
252
|
+
HF_TOKEN: HF_TOKEN
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### How It Works
|
|
256
|
+
|
|
257
|
+
1. Launcher creates Lepton job specification
|
|
258
|
+
2. Submits job via Lepton API
|
|
259
|
+
3. Optionally creates endpoint for model serving
|
|
260
|
+
4. Polls job status via API
|
|
261
|
+
5. Retrieves results when complete
|
|
262
|
+
|
|
263
|
+
### Endpoint Management
|
|
264
|
+
|
|
265
|
+
For evaluating Lepton-hosted models:
|
|
266
|
+
|
|
267
|
+
```yaml
|
|
268
|
+
target:
|
|
269
|
+
api_endpoint:
|
|
270
|
+
type: lepton
|
|
271
|
+
deployment_name: my-llama-deployment
|
|
272
|
+
# URL auto-generated from deployment
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Backend Selection Guide
|
|
276
|
+
|
|
277
|
+
| Use Case | Recommended Backend |
|
|
278
|
+
|----------|-------------------|
|
|
279
|
+
| Quick local testing | Local |
|
|
280
|
+
| Large-scale batch evaluation | Slurm |
|
|
281
|
+
| CI/CD pipeline | Local or Lepton |
|
|
282
|
+
| Multi-model comparison | Slurm (parallel jobs) |
|
|
283
|
+
| Cloud-native workflow | Lepton |
|
|
284
|
+
| Self-hosted model evaluation | Local or Slurm |
|
|
285
|
+
|
|
286
|
+
## Execution Database
|
|
287
|
+
|
|
288
|
+
All backends share the `ExecutionDB` for tracking jobs:
|
|
289
|
+
|
|
290
|
+
```
|
|
291
|
+
┌─────────────────────────────────────────────┐
|
|
292
|
+
│ ExecutionDB (SQLite) │
|
|
293
|
+
│ │
|
|
294
|
+
│ invocation_id │ job_id │ status │ backend │
|
|
295
|
+
│ ───────────────────────────────────────── │
|
|
296
|
+
│ inv_abc123 │ 12345 │ running │ slurm │
|
|
297
|
+
│ inv_def456 │ cont_1 │ done │ local │
|
|
298
|
+
└─────────────────────────────────────────────┘
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
Query via CLI:
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# List all invocations
|
|
305
|
+
nemo-evaluator-launcher ls runs
|
|
306
|
+
|
|
307
|
+
# Get specific invocation
|
|
308
|
+
nemo-evaluator-launcher info <invocation_id>
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
## Troubleshooting
|
|
312
|
+
|
|
313
|
+
### Local Executor
|
|
314
|
+
|
|
315
|
+
**Issue: Docker permission denied**
|
|
316
|
+
```bash
|
|
317
|
+
sudo usermod -aG docker $USER
|
|
318
|
+
newgrp docker
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
**Issue: GPU not available in container**
|
|
322
|
+
```bash
|
|
323
|
+
# Install nvidia-container-toolkit
|
|
324
|
+
sudo apt-get install nvidia-container-toolkit
|
|
325
|
+
sudo systemctl restart docker
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### Slurm Executor
|
|
329
|
+
|
|
330
|
+
**Issue: SSH connection fails**
|
|
331
|
+
```bash
|
|
332
|
+
# Test SSH connection
|
|
333
|
+
ssh -v cluster.example.com
|
|
334
|
+
|
|
335
|
+
# Check SSH key permissions
|
|
336
|
+
chmod 600 ~/.ssh/id_rsa
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
**Issue: Job stuck in pending**
|
|
340
|
+
```bash
|
|
341
|
+
# Check queue status
|
|
342
|
+
squeue -u $USER
|
|
343
|
+
|
|
344
|
+
# Check account limits
|
|
345
|
+
sacctmgr show associations user=$USER
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### Lepton Executor
|
|
349
|
+
|
|
350
|
+
**Issue: API token invalid**
|
|
351
|
+
```bash
|
|
352
|
+
# Verify token
|
|
353
|
+
curl -H "Authorization: Bearer $LEPTON_API_TOKEN" \
|
|
354
|
+
https://api.lepton.ai/v1/jobs
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
**Issue: Resource shape unavailable**
|
|
358
|
+
```bash
|
|
359
|
+
# List available shapes
|
|
360
|
+
lepton shape list
|
|
361
|
+
```
|