@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
# Creating Custom Tasks in BigCode Evaluation Harness
|
|
2
|
+
|
|
3
|
+
Guide to implementing custom evaluation tasks for code generation models.
|
|
4
|
+
|
|
5
|
+
## Task Architecture
|
|
6
|
+
|
|
7
|
+
All tasks inherit from a base `Task` class and implement standard methods:
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
class Task:
|
|
11
|
+
DATASET_PATH: str # HuggingFace dataset ID
|
|
12
|
+
DATASET_NAME: str # Dataset configuration (or None)
|
|
13
|
+
|
|
14
|
+
def __init__(self, stop_words, requires_execution):
|
|
15
|
+
"""Initialize task with stop words and execution flag."""
|
|
16
|
+
|
|
17
|
+
def get_dataset(self):
|
|
18
|
+
"""Return the evaluation dataset."""
|
|
19
|
+
|
|
20
|
+
def get_prompt(self, doc):
|
|
21
|
+
"""Format document into model prompt."""
|
|
22
|
+
|
|
23
|
+
def get_reference(self, doc):
|
|
24
|
+
"""Extract reference solution from document."""
|
|
25
|
+
|
|
26
|
+
def postprocess_generation(self, generation, idx):
|
|
27
|
+
"""Clean up model output."""
|
|
28
|
+
|
|
29
|
+
def process_results(self, generations, references):
|
|
30
|
+
"""Evaluate and return metrics."""
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Step-by-Step Implementation
|
|
34
|
+
|
|
35
|
+
### Step 1: Create Task File
|
|
36
|
+
|
|
37
|
+
Copy template to `bigcode_eval/tasks/<task_name>.py`:
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
"""
|
|
41
|
+
<Paper Title>
|
|
42
|
+
<Paper URL>
|
|
43
|
+
|
|
44
|
+
<Task Description>
|
|
45
|
+
|
|
46
|
+
Homepage: <Homepage URL>
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import json
|
|
50
|
+
from evaluate import load
|
|
51
|
+
from bigcode_eval.base import Task
|
|
52
|
+
|
|
53
|
+
class MyCustomTask(Task):
|
|
54
|
+
"""Custom code evaluation task."""
|
|
55
|
+
|
|
56
|
+
DATASET_PATH = "username/dataset-name" # HuggingFace dataset
|
|
57
|
+
DATASET_NAME = None # or specific config name
|
|
58
|
+
|
|
59
|
+
def __init__(self):
|
|
60
|
+
super().__init__(
|
|
61
|
+
stop_words=["\nclass", "\ndef", "\n#", "\nif", "\nprint"],
|
|
62
|
+
requires_execution=True, # Set True if running unit tests
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def get_dataset(self):
|
|
66
|
+
"""Load evaluation split."""
|
|
67
|
+
from datasets import load_dataset
|
|
68
|
+
return load_dataset(
|
|
69
|
+
self.DATASET_PATH,
|
|
70
|
+
self.DATASET_NAME,
|
|
71
|
+
split="test"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def get_prompt(self, doc):
|
|
75
|
+
"""Format problem into prompt for model."""
|
|
76
|
+
return doc["prompt"]
|
|
77
|
+
|
|
78
|
+
def get_reference(self, doc):
|
|
79
|
+
"""Return test cases or reference solution."""
|
|
80
|
+
return doc["test"]
|
|
81
|
+
|
|
82
|
+
def postprocess_generation(self, generation, idx):
|
|
83
|
+
"""Clean model output (remove extra text after solution)."""
|
|
84
|
+
# Common: stop at first occurrence of stop words
|
|
85
|
+
for stop_word in self.stop_words:
|
|
86
|
+
if stop_word in generation:
|
|
87
|
+
generation = generation[:generation.index(stop_word)]
|
|
88
|
+
return generation
|
|
89
|
+
|
|
90
|
+
def process_results(self, generations, references):
|
|
91
|
+
"""Execute tests and compute pass@k."""
|
|
92
|
+
code_metric = load("code_eval")
|
|
93
|
+
results, _ = code_metric.compute(
|
|
94
|
+
references=references,
|
|
95
|
+
predictions=generations,
|
|
96
|
+
k=[1, 10, 100]
|
|
97
|
+
)
|
|
98
|
+
return results
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Step 2: Register Task
|
|
102
|
+
|
|
103
|
+
Add to `bigcode_eval/tasks/__init__.py`:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from bigcode_eval.tasks import my_custom_task
|
|
107
|
+
|
|
108
|
+
TASK_REGISTRY = {
|
|
109
|
+
# ... existing tasks ...
|
|
110
|
+
"my-custom-task": my_custom_task.MyCustomTask,
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Step 3: Test Task
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Verify task loads correctly
|
|
118
|
+
python -c "from bigcode_eval.tasks import get_task; t = get_task('my-custom-task'); print(t)"
|
|
119
|
+
|
|
120
|
+
# Run small evaluation
|
|
121
|
+
accelerate launch main.py \
|
|
122
|
+
--model bigcode/starcoder2-7b \
|
|
123
|
+
--tasks my-custom-task \
|
|
124
|
+
--limit 5 \
|
|
125
|
+
--allow_code_execution
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Implementation Patterns
|
|
129
|
+
|
|
130
|
+
### Pattern 1: Code Execution with Unit Tests
|
|
131
|
+
|
|
132
|
+
For benchmarks that verify functional correctness:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
class CodeExecutionTask(Task):
|
|
136
|
+
def __init__(self):
|
|
137
|
+
super().__init__(
|
|
138
|
+
stop_words=["\nclass", "\ndef", "\n#"],
|
|
139
|
+
requires_execution=True, # CRITICAL: Enable execution
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def get_reference(self, doc):
|
|
143
|
+
"""Return test code to execute."""
|
|
144
|
+
return f"\n{doc['test']}\ncheck({doc['entry_point']})"
|
|
145
|
+
|
|
146
|
+
def process_results(self, generations, references):
|
|
147
|
+
code_metric = load("code_eval")
|
|
148
|
+
results, details = code_metric.compute(
|
|
149
|
+
references=references,
|
|
150
|
+
predictions=generations,
|
|
151
|
+
k=[1, 10, 100],
|
|
152
|
+
timeout=10.0, # Seconds per test
|
|
153
|
+
)
|
|
154
|
+
return results
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Pattern 2: BLEU Score Evaluation
|
|
158
|
+
|
|
159
|
+
For benchmarks without executable tests:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
class BLEUTask(Task):
|
|
163
|
+
def __init__(self):
|
|
164
|
+
super().__init__(
|
|
165
|
+
stop_words=["\n\n"],
|
|
166
|
+
requires_execution=False, # No code execution
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def get_reference(self, doc):
|
|
170
|
+
"""Return reference code string."""
|
|
171
|
+
return doc["canonical_solution"]
|
|
172
|
+
|
|
173
|
+
def process_results(self, generations, references):
|
|
174
|
+
from evaluate import load
|
|
175
|
+
bleu = load("bleu")
|
|
176
|
+
|
|
177
|
+
# Flatten generations (one per problem for BLEU)
|
|
178
|
+
predictions = [g[0] for g in generations]
|
|
179
|
+
|
|
180
|
+
results = bleu.compute(
|
|
181
|
+
predictions=predictions,
|
|
182
|
+
references=[[r] for r in references]
|
|
183
|
+
)
|
|
184
|
+
return {"bleu": results["bleu"]}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Pattern 3: Few-Shot Prompting
|
|
188
|
+
|
|
189
|
+
For tasks requiring in-context examples:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
class FewShotTask(Task):
|
|
193
|
+
def __init__(self):
|
|
194
|
+
super().__init__(stop_words=["\n\n"], requires_execution=True)
|
|
195
|
+
self.examples = self._load_examples()
|
|
196
|
+
|
|
197
|
+
def _load_examples(self):
|
|
198
|
+
"""Load few-shot examples from JSON."""
|
|
199
|
+
import os
|
|
200
|
+
path = os.path.join(
|
|
201
|
+
os.path.dirname(__file__),
|
|
202
|
+
"few_shot_examples",
|
|
203
|
+
"my_task_examples.json"
|
|
204
|
+
)
|
|
205
|
+
with open(path) as f:
|
|
206
|
+
return json.load(f)
|
|
207
|
+
|
|
208
|
+
def get_prompt(self, doc):
|
|
209
|
+
"""Build few-shot prompt."""
|
|
210
|
+
prompt = ""
|
|
211
|
+
for ex in self.examples[:3]: # 3-shot
|
|
212
|
+
prompt += f"Problem: {ex['problem']}\nSolution: {ex['solution']}\n\n"
|
|
213
|
+
prompt += f"Problem: {doc['problem']}\nSolution:"
|
|
214
|
+
return prompt
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Pattern 4: Fill-in-the-Middle (FIM)
|
|
218
|
+
|
|
219
|
+
For infilling tasks:
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
class FIMTask(Task):
|
|
223
|
+
FIM_PREFIX = "<fim_prefix>"
|
|
224
|
+
FIM_MIDDLE = "<fim_middle>"
|
|
225
|
+
FIM_SUFFIX = "<fim_suffix>"
|
|
226
|
+
|
|
227
|
+
def __init__(self):
|
|
228
|
+
super().__init__(
|
|
229
|
+
stop_words=["<|endoftext|>", self.FIM_MIDDLE],
|
|
230
|
+
requires_execution=False,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def get_prompt(self, doc):
|
|
234
|
+
"""Format as FIM prompt."""
|
|
235
|
+
prefix = doc["prefix"]
|
|
236
|
+
suffix = doc["suffix"]
|
|
237
|
+
return f"{self.FIM_PREFIX}{prefix}{self.FIM_SUFFIX}{suffix}{self.FIM_MIDDLE}"
|
|
238
|
+
|
|
239
|
+
def postprocess_generation(self, generation, idx):
|
|
240
|
+
"""Extract middle portion."""
|
|
241
|
+
if self.FIM_MIDDLE in generation:
|
|
242
|
+
generation = generation.split(self.FIM_MIDDLE)[0]
|
|
243
|
+
return generation.strip()
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Pattern 5: Instruction-Tuned Models
|
|
247
|
+
|
|
248
|
+
For chat/instruction models:
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
class InstructTask(Task):
|
|
252
|
+
def __init__(self):
|
|
253
|
+
super().__init__(
|
|
254
|
+
stop_words=["</s>", "[/INST]", "```\n"],
|
|
255
|
+
requires_execution=True,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def get_prompt(self, doc):
|
|
259
|
+
"""Format as instruction prompt."""
|
|
260
|
+
instruction = f"""Write a Python function that {doc['description']}.
|
|
261
|
+
|
|
262
|
+
Function signature: {doc['signature']}
|
|
263
|
+
|
|
264
|
+
Examples:
|
|
265
|
+
{doc['examples']}
|
|
266
|
+
|
|
267
|
+
Write only the function implementation:"""
|
|
268
|
+
return instruction
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## Dataset Format Requirements
|
|
272
|
+
|
|
273
|
+
### For HuggingFace Datasets
|
|
274
|
+
|
|
275
|
+
Your dataset should include:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
{
|
|
279
|
+
"prompt": "def function_name(args):\n '''Docstring'''",
|
|
280
|
+
"canonical_solution": " return result",
|
|
281
|
+
"test": "assert function_name(input) == expected",
|
|
282
|
+
"entry_point": "function_name"
|
|
283
|
+
}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Creating Dataset Factories
|
|
287
|
+
|
|
288
|
+
For tasks with multiple configurations:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
def create_all_tasks():
|
|
292
|
+
"""Create task variants for all languages."""
|
|
293
|
+
tasks = {}
|
|
294
|
+
for lang in ["python", "javascript", "java", "cpp"]:
|
|
295
|
+
tasks[f"my-task-{lang}"] = create_task_class(lang)
|
|
296
|
+
return tasks
|
|
297
|
+
|
|
298
|
+
def create_task_class(language):
|
|
299
|
+
class LanguageTask(Task):
|
|
300
|
+
DATASET_PATH = "username/dataset"
|
|
301
|
+
DATASET_NAME = language
|
|
302
|
+
# ... implementation
|
|
303
|
+
return LanguageTask
|
|
304
|
+
|
|
305
|
+
# In __init__.py:
|
|
306
|
+
TASK_REGISTRY = {
|
|
307
|
+
**my_module.create_all_tasks(),
|
|
308
|
+
}
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
## Testing Your Task
|
|
312
|
+
|
|
313
|
+
### Unit Tests
|
|
314
|
+
|
|
315
|
+
Create `tests/test_my_task.py`:
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
import pytest
|
|
319
|
+
from bigcode_eval.tasks import get_task
|
|
320
|
+
|
|
321
|
+
def test_task_loads():
|
|
322
|
+
task = get_task("my-custom-task")
|
|
323
|
+
assert task is not None
|
|
324
|
+
|
|
325
|
+
def test_dataset_loads():
|
|
326
|
+
task = get_task("my-custom-task")
|
|
327
|
+
dataset = task.get_dataset()
|
|
328
|
+
assert len(dataset) > 0
|
|
329
|
+
|
|
330
|
+
def test_prompt_format():
|
|
331
|
+
task = get_task("my-custom-task")
|
|
332
|
+
dataset = task.get_dataset()
|
|
333
|
+
prompt = task.get_prompt(dataset[0])
|
|
334
|
+
assert isinstance(prompt, str)
|
|
335
|
+
assert len(prompt) > 0
|
|
336
|
+
|
|
337
|
+
def test_postprocess():
|
|
338
|
+
task = get_task("my-custom-task")
|
|
339
|
+
raw = "def foo():\n return 1\n\nclass Bar:"
|
|
340
|
+
processed = task.postprocess_generation(raw, 0)
|
|
341
|
+
assert "class Bar" not in processed
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Run tests:
|
|
345
|
+
```bash
|
|
346
|
+
pytest tests/test_my_task.py -v
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Integration Test
|
|
350
|
+
|
|
351
|
+
```bash
|
|
352
|
+
# Small-scale evaluation
|
|
353
|
+
accelerate launch main.py \
|
|
354
|
+
--model bigcode/santacoder \
|
|
355
|
+
--tasks my-custom-task \
|
|
356
|
+
--limit 10 \
|
|
357
|
+
--n_samples 5 \
|
|
358
|
+
--allow_code_execution \
|
|
359
|
+
--save_generations
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
## Common Pitfalls
|
|
363
|
+
|
|
364
|
+
### 1. Missing `requires_execution=True`
|
|
365
|
+
|
|
366
|
+
If your task uses unit tests, you MUST set:
|
|
367
|
+
```python
|
|
368
|
+
super().__init__(requires_execution=True, ...)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### 2. Incorrect Stop Words
|
|
372
|
+
|
|
373
|
+
Stop words should match your programming language:
|
|
374
|
+
|
|
375
|
+
```python
|
|
376
|
+
# Python
|
|
377
|
+
stop_words=["\nclass", "\ndef", "\n#", "\nif __name__"]
|
|
378
|
+
|
|
379
|
+
# JavaScript
|
|
380
|
+
stop_words=["\nfunction", "\nconst", "\nlet", "\n//"]
|
|
381
|
+
|
|
382
|
+
# Java
|
|
383
|
+
stop_words=["\npublic", "\nprivate", "\nclass", "\n//"]
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
### 3. Not Handling Edge Cases in Postprocessing
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
def postprocess_generation(self, generation, idx):
|
|
390
|
+
# Handle empty generation
|
|
391
|
+
if not generation or not generation.strip():
|
|
392
|
+
return ""
|
|
393
|
+
|
|
394
|
+
# Handle multiple stop words
|
|
395
|
+
for sw in self.stop_words:
|
|
396
|
+
if sw in generation:
|
|
397
|
+
generation = generation[:generation.index(sw)]
|
|
398
|
+
|
|
399
|
+
# Remove trailing whitespace
|
|
400
|
+
return generation.rstrip()
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
### 4. Timeout Issues
|
|
404
|
+
|
|
405
|
+
For complex tests, increase timeout:
|
|
406
|
+
```python
|
|
407
|
+
results, _ = code_metric.compute(
|
|
408
|
+
references=references,
|
|
409
|
+
predictions=generations,
|
|
410
|
+
timeout=30.0, # Increase from default
|
|
411
|
+
)
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
## Contributing Your Task
|
|
415
|
+
|
|
416
|
+
1. Fork the repository
|
|
417
|
+
2. Create feature branch
|
|
418
|
+
3. Implement task following patterns above
|
|
419
|
+
4. Add tests
|
|
420
|
+
5. Update documentation
|
|
421
|
+
6. Submit PR with:
|
|
422
|
+
- Task description
|
|
423
|
+
- Example usage
|
|
424
|
+
- Expected results range
|