@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
# Deployment Guide
|
|
2
|
+
|
|
3
|
+
Complete guide to deploying MLflow models to production environments.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
- Deployment Options
|
|
7
|
+
- Local Serving
|
|
8
|
+
- REST API Serving
|
|
9
|
+
- Docker Deployment
|
|
10
|
+
- Cloud Deployment
|
|
11
|
+
- Batch Inference
|
|
12
|
+
- Production Patterns
|
|
13
|
+
- Monitoring
|
|
14
|
+
|
|
15
|
+
## Deployment Options
|
|
16
|
+
|
|
17
|
+
MLflow supports multiple deployment targets:
|
|
18
|
+
|
|
19
|
+
| Target | Use Case | Complexity |
|
|
20
|
+
|--------|----------|------------|
|
|
21
|
+
| **Local Server** | Development, testing | Low |
|
|
22
|
+
| **REST API** | Production serving | Medium |
|
|
23
|
+
| **Docker** | Containerized deployment | Medium |
|
|
24
|
+
| **AWS SageMaker** | Managed AWS deployment | High |
|
|
25
|
+
| **Azure ML** | Managed Azure deployment | High |
|
|
26
|
+
| **Kubernetes** | Scalable orchestration | High |
|
|
27
|
+
| **Batch** | Offline predictions | Low |
|
|
28
|
+
|
|
29
|
+
## Local Serving
|
|
30
|
+
|
|
31
|
+
### Serve Model Locally
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Serve registered model
|
|
35
|
+
mlflow models serve -m "models:/product-classifier/Production" -p 5001
|
|
36
|
+
|
|
37
|
+
# Serve from run
|
|
38
|
+
mlflow models serve -m "runs:/abc123/model" -p 5001
|
|
39
|
+
|
|
40
|
+
# Serve with custom host
|
|
41
|
+
mlflow models serve -m "models:/my-model/Production" -h 0.0.0.0 -p 8080
|
|
42
|
+
|
|
43
|
+
# Serve with workers (for scalability)
|
|
44
|
+
mlflow models serve -m "models:/my-model/Production" -p 5001 --workers 4
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**Output:**
|
|
48
|
+
```
|
|
49
|
+
Serving model on http://127.0.0.1:5001
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Test Local Server
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Single prediction
|
|
56
|
+
curl http://127.0.0.1:5001/invocations \
|
|
57
|
+
-H 'Content-Type: application/json' \
|
|
58
|
+
-d '{
|
|
59
|
+
"inputs": [[1.0, 2.0, 3.0, 4.0]]
|
|
60
|
+
}'
|
|
61
|
+
|
|
62
|
+
# Batch predictions
|
|
63
|
+
curl http://127.0.0.1:5001/invocations \
|
|
64
|
+
-H 'Content-Type: application/json' \
|
|
65
|
+
-d '{
|
|
66
|
+
"inputs": [
|
|
67
|
+
[1.0, 2.0, 3.0, 4.0],
|
|
68
|
+
[5.0, 6.0, 7.0, 8.0]
|
|
69
|
+
]
|
|
70
|
+
}'
|
|
71
|
+
|
|
72
|
+
# CSV input
|
|
73
|
+
curl http://127.0.0.1:5001/invocations \
|
|
74
|
+
-H 'Content-Type: text/csv' \
|
|
75
|
+
--data-binary @data.csv
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Python Client
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
import requests
|
|
82
|
+
import json
|
|
83
|
+
|
|
84
|
+
url = "http://127.0.0.1:5001/invocations"
|
|
85
|
+
|
|
86
|
+
data = {
|
|
87
|
+
"inputs": [[1.0, 2.0, 3.0, 4.0]]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
headers = {"Content-Type": "application/json"}
|
|
91
|
+
|
|
92
|
+
response = requests.post(url, json=data, headers=headers)
|
|
93
|
+
predictions = response.json()
|
|
94
|
+
|
|
95
|
+
print(predictions)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## REST API Serving
|
|
99
|
+
|
|
100
|
+
### Build Custom Serving API
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from flask import Flask, request, jsonify
|
|
104
|
+
import mlflow.pyfunc
|
|
105
|
+
|
|
106
|
+
app = Flask(__name__)
|
|
107
|
+
|
|
108
|
+
# Load model on startup
|
|
109
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
110
|
+
|
|
111
|
+
@app.route('/predict', methods=['POST'])
|
|
112
|
+
def predict():
|
|
113
|
+
"""Prediction endpoint."""
|
|
114
|
+
data = request.get_json()
|
|
115
|
+
inputs = data.get('inputs')
|
|
116
|
+
|
|
117
|
+
# Make predictions
|
|
118
|
+
predictions = model.predict(inputs)
|
|
119
|
+
|
|
120
|
+
return jsonify({
|
|
121
|
+
'predictions': predictions.tolist()
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
@app.route('/health', methods=['GET'])
|
|
125
|
+
def health():
|
|
126
|
+
"""Health check endpoint."""
|
|
127
|
+
return jsonify({'status': 'healthy'})
|
|
128
|
+
|
|
129
|
+
if __name__ == '__main__':
|
|
130
|
+
app.run(host='0.0.0.0', port=5001)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### FastAPI Serving
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from fastapi import FastAPI
|
|
137
|
+
from pydantic import BaseModel
|
|
138
|
+
import mlflow.pyfunc
|
|
139
|
+
import numpy as np
|
|
140
|
+
|
|
141
|
+
app = FastAPI()
|
|
142
|
+
|
|
143
|
+
# Load model
|
|
144
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
145
|
+
|
|
146
|
+
class PredictionRequest(BaseModel):
|
|
147
|
+
inputs: list
|
|
148
|
+
|
|
149
|
+
class PredictionResponse(BaseModel):
|
|
150
|
+
predictions: list
|
|
151
|
+
|
|
152
|
+
@app.post("/predict", response_model=PredictionResponse)
|
|
153
|
+
async def predict(request: PredictionRequest):
|
|
154
|
+
"""Make predictions."""
|
|
155
|
+
inputs = np.array(request.inputs)
|
|
156
|
+
predictions = model.predict(inputs)
|
|
157
|
+
|
|
158
|
+
return PredictionResponse(predictions=predictions.tolist())
|
|
159
|
+
|
|
160
|
+
@app.get("/health")
|
|
161
|
+
async def health():
|
|
162
|
+
"""Health check."""
|
|
163
|
+
return {"status": "healthy"}
|
|
164
|
+
|
|
165
|
+
# Run with: uvicorn main:app --host 0.0.0.0 --port 5001
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Docker Deployment
|
|
169
|
+
|
|
170
|
+
### Build Docker Image
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# Build Docker image with MLflow
|
|
174
|
+
mlflow models build-docker \
|
|
175
|
+
-m "models:/product-classifier/Production" \
|
|
176
|
+
-n product-classifier:v1
|
|
177
|
+
|
|
178
|
+
# Build with custom image name
|
|
179
|
+
mlflow models build-docker \
|
|
180
|
+
-m "runs:/abc123/model" \
|
|
181
|
+
-n my-registry/my-model:latest
|
|
182
|
+
|
|
183
|
+
# Build and enable MLServer (for KServe/Seldon)
|
|
184
|
+
mlflow models build-docker \
|
|
185
|
+
-m "models:/my-model/Production" \
|
|
186
|
+
-n my-model:v1 \
|
|
187
|
+
--enable-mlserver
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Run Docker Container
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# Run container
|
|
194
|
+
docker run -p 5001:8080 product-classifier:v1
|
|
195
|
+
|
|
196
|
+
# Run with environment variables
|
|
197
|
+
docker run \
|
|
198
|
+
-p 5001:8080 \
|
|
199
|
+
-e MLFLOW_TRACKING_URI=http://mlflow-server:5000 \
|
|
200
|
+
product-classifier:v1
|
|
201
|
+
|
|
202
|
+
# Run with GPU support
|
|
203
|
+
docker run --gpus all -p 5001:8080 product-classifier:v1
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Test Docker Container
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Test endpoint
|
|
210
|
+
curl http://localhost:5001/invocations \
|
|
211
|
+
-H 'Content-Type: application/json' \
|
|
212
|
+
-d '{"inputs": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Custom Dockerfile
|
|
216
|
+
|
|
217
|
+
```dockerfile
|
|
218
|
+
FROM python:3.9-slim
|
|
219
|
+
|
|
220
|
+
# Install MLflow
|
|
221
|
+
RUN pip install mlflow boto3
|
|
222
|
+
|
|
223
|
+
# Set working directory
|
|
224
|
+
WORKDIR /app
|
|
225
|
+
|
|
226
|
+
# Copy model (alternative to downloading from tracking server)
|
|
227
|
+
COPY model/ /app/model/
|
|
228
|
+
|
|
229
|
+
# Expose port
|
|
230
|
+
EXPOSE 8080
|
|
231
|
+
|
|
232
|
+
# Set environment variables
|
|
233
|
+
ENV MLFLOW_TRACKING_URI=http://mlflow-server:5000
|
|
234
|
+
|
|
235
|
+
# Serve model
|
|
236
|
+
CMD ["mlflow", "models", "serve", "-m", "/app/model", "-h", "0.0.0.0", "-p", "8080"]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Cloud Deployment
|
|
240
|
+
|
|
241
|
+
### AWS SageMaker
|
|
242
|
+
|
|
243
|
+
#### Deploy to SageMaker
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Build and push Docker image to ECR
|
|
247
|
+
mlflow sagemaker build-and-push-container
|
|
248
|
+
|
|
249
|
+
# Deploy model to SageMaker endpoint
|
|
250
|
+
mlflow deployments create \
|
|
251
|
+
-t sagemaker \
|
|
252
|
+
-m "models:/product-classifier/Production" \
|
|
253
|
+
--name product-classifier-endpoint \
|
|
254
|
+
--region-name us-west-2 \
|
|
255
|
+
--config instance_type=ml.m5.xlarge \
|
|
256
|
+
--config instance_count=1
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
#### Python API
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
import mlflow.sagemaker
|
|
263
|
+
|
|
264
|
+
# Deploy to SageMaker
|
|
265
|
+
mlflow.sagemaker.deploy(
|
|
266
|
+
app_name="product-classifier",
|
|
267
|
+
model_uri="models:/product-classifier/Production",
|
|
268
|
+
region_name="us-west-2",
|
|
269
|
+
mode="create",
|
|
270
|
+
instance_type="ml.m5.xlarge",
|
|
271
|
+
instance_count=1,
|
|
272
|
+
vpc_config={
|
|
273
|
+
"SecurityGroupIds": ["sg-123456"],
|
|
274
|
+
"Subnets": ["subnet-123456", "subnet-789012"]
|
|
275
|
+
}
|
|
276
|
+
)
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
#### Invoke SageMaker Endpoint
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
import boto3
|
|
283
|
+
import json
|
|
284
|
+
|
|
285
|
+
runtime = boto3.client('sagemaker-runtime', region_name='us-west-2')
|
|
286
|
+
|
|
287
|
+
# Prepare input
|
|
288
|
+
data = {
|
|
289
|
+
"inputs": [[1.0, 2.0, 3.0, 4.0]]
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Invoke endpoint
|
|
293
|
+
response = runtime.invoke_endpoint(
|
|
294
|
+
EndpointName='product-classifier',
|
|
295
|
+
ContentType='application/json',
|
|
296
|
+
Body=json.dumps(data)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Parse response
|
|
300
|
+
predictions = json.loads(response['Body'].read())
|
|
301
|
+
print(predictions)
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
#### Update SageMaker Endpoint
|
|
305
|
+
|
|
306
|
+
```bash
|
|
307
|
+
# Update endpoint with new model version
|
|
308
|
+
mlflow deployments update \
|
|
309
|
+
-t sagemaker \
|
|
310
|
+
-m "models:/product-classifier/Production" \
|
|
311
|
+
--name product-classifier-endpoint
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
#### Delete SageMaker Endpoint
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
# Delete endpoint
|
|
318
|
+
mlflow deployments delete -t sagemaker --name product-classifier-endpoint
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Azure ML
|
|
322
|
+
|
|
323
|
+
#### Deploy to Azure
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
# Deploy to Azure ML
|
|
327
|
+
mlflow deployments create \
|
|
328
|
+
-t azureml \
|
|
329
|
+
-m "models:/product-classifier/Production" \
|
|
330
|
+
--name product-classifier-azure \
|
|
331
|
+
--config workspace_name=my-workspace \
|
|
332
|
+
--config resource_group=my-resource-group
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
#### Python API
|
|
336
|
+
|
|
337
|
+
```python
|
|
338
|
+
import mlflow.azureml
|
|
339
|
+
|
|
340
|
+
# Deploy to Azure ML
|
|
341
|
+
mlflow.azureml.deploy(
|
|
342
|
+
model_uri="models:/product-classifier/Production",
|
|
343
|
+
workspace=workspace,
|
|
344
|
+
deployment_config=deployment_config,
|
|
345
|
+
service_name="product-classifier"
|
|
346
|
+
)
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Kubernetes (KServe)
|
|
350
|
+
|
|
351
|
+
#### Deploy to Kubernetes
|
|
352
|
+
|
|
353
|
+
```yaml
|
|
354
|
+
# kserve-inference.yaml
|
|
355
|
+
apiVersion: serving.kserve.io/v1beta1
|
|
356
|
+
kind: InferenceService
|
|
357
|
+
metadata:
|
|
358
|
+
name: product-classifier
|
|
359
|
+
spec:
|
|
360
|
+
predictor:
|
|
361
|
+
mlflow:
|
|
362
|
+
storageUri: "models:/product-classifier/Production"
|
|
363
|
+
protocolVersion: v2
|
|
364
|
+
runtimeVersion: 1.0.0
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
# Apply to cluster
|
|
369
|
+
kubectl apply -f kserve-inference.yaml
|
|
370
|
+
|
|
371
|
+
# Check status
|
|
372
|
+
kubectl get inferenceservice product-classifier
|
|
373
|
+
|
|
374
|
+
# Get endpoint URL
|
|
375
|
+
kubectl get inferenceservice product-classifier -o jsonpath='{.status.url}'
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
## Batch Inference
|
|
379
|
+
|
|
380
|
+
### Batch Prediction with Spark
|
|
381
|
+
|
|
382
|
+
```python
|
|
383
|
+
import mlflow.pyfunc
|
|
384
|
+
from pyspark.sql import SparkSession
|
|
385
|
+
|
|
386
|
+
# Load model as Spark UDF
|
|
387
|
+
model_uri = "models:/product-classifier/Production"
|
|
388
|
+
predict_udf = mlflow.pyfunc.spark_udf(spark, model_uri)
|
|
389
|
+
|
|
390
|
+
# Load data
|
|
391
|
+
df = spark.read.parquet("s3://bucket/data/")
|
|
392
|
+
|
|
393
|
+
# Apply predictions
|
|
394
|
+
predictions_df = df.withColumn(
|
|
395
|
+
"prediction",
|
|
396
|
+
predict_udf(*df.columns)
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Save results
|
|
400
|
+
predictions_df.write.parquet("s3://bucket/predictions/")
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
### Batch Prediction with Pandas
|
|
404
|
+
|
|
405
|
+
```python
|
|
406
|
+
import mlflow.pyfunc
|
|
407
|
+
import pandas as pd
|
|
408
|
+
|
|
409
|
+
# Load model
|
|
410
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
411
|
+
|
|
412
|
+
# Load data in batches
|
|
413
|
+
batch_size = 10000
|
|
414
|
+
|
|
415
|
+
for chunk in pd.read_csv("large_data.csv", chunksize=batch_size):
|
|
416
|
+
# Make predictions
|
|
417
|
+
predictions = model.predict(chunk)
|
|
418
|
+
|
|
419
|
+
# Save results
|
|
420
|
+
chunk['prediction'] = predictions
|
|
421
|
+
chunk.to_csv("predictions.csv", mode='a', header=False, index=False)
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
### Scheduled Batch Job
|
|
425
|
+
|
|
426
|
+
```python
|
|
427
|
+
import mlflow.pyfunc
|
|
428
|
+
import pandas as pd
|
|
429
|
+
from datetime import datetime
|
|
430
|
+
|
|
431
|
+
def batch_predict():
|
|
432
|
+
"""Daily batch prediction job."""
|
|
433
|
+
# Load model
|
|
434
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
435
|
+
|
|
436
|
+
# Load today's data
|
|
437
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
438
|
+
df = pd.read_parquet(f"s3://bucket/data/{today}/")
|
|
439
|
+
|
|
440
|
+
# Predict
|
|
441
|
+
predictions = model.predict(df)
|
|
442
|
+
|
|
443
|
+
# Save results
|
|
444
|
+
df['prediction'] = predictions
|
|
445
|
+
df['prediction_date'] = today
|
|
446
|
+
df.to_parquet(f"s3://bucket/predictions/{today}/")
|
|
447
|
+
|
|
448
|
+
print(f"✅ Batch prediction complete for {today}")
|
|
449
|
+
|
|
450
|
+
# Run with scheduler (e.g., Airflow, cron)
|
|
451
|
+
batch_predict()
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
## Production Patterns
|
|
455
|
+
|
|
456
|
+
### Blue-Green Deployment
|
|
457
|
+
|
|
458
|
+
```python
|
|
459
|
+
import mlflow.pyfunc
|
|
460
|
+
|
|
461
|
+
# Load both models
|
|
462
|
+
blue_model = mlflow.pyfunc.load_model("models:/product-classifier@blue")
|
|
463
|
+
green_model = mlflow.pyfunc.load_model("models:/product-classifier@green")
|
|
464
|
+
|
|
465
|
+
# Switch traffic (controlled by feature flag)
|
|
466
|
+
def get_model():
|
|
467
|
+
if feature_flag.is_enabled("use_green_model"):
|
|
468
|
+
return green_model
|
|
469
|
+
else:
|
|
470
|
+
return blue_model
|
|
471
|
+
|
|
472
|
+
# Serve predictions
|
|
473
|
+
def predict(inputs):
|
|
474
|
+
model = get_model()
|
|
475
|
+
return model.predict(inputs)
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
### Canary Deployment
|
|
479
|
+
|
|
480
|
+
```python
|
|
481
|
+
import random
|
|
482
|
+
import mlflow.pyfunc
|
|
483
|
+
|
|
484
|
+
# Load models
|
|
485
|
+
stable_model = mlflow.pyfunc.load_model("models:/product-classifier@stable")
|
|
486
|
+
canary_model = mlflow.pyfunc.load_model("models:/product-classifier@canary")
|
|
487
|
+
|
|
488
|
+
def predict_with_canary(inputs, canary_percentage=10):
|
|
489
|
+
"""Route traffic: 90% stable, 10% canary."""
|
|
490
|
+
if random.random() * 100 < canary_percentage:
|
|
491
|
+
model = canary_model
|
|
492
|
+
version = "canary"
|
|
493
|
+
else:
|
|
494
|
+
model = stable_model
|
|
495
|
+
version = "stable"
|
|
496
|
+
|
|
497
|
+
predictions = model.predict(inputs)
|
|
498
|
+
|
|
499
|
+
# Log which version was used
|
|
500
|
+
log_prediction_metrics(version, predictions)
|
|
501
|
+
|
|
502
|
+
return predictions
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### Shadow Deployment
|
|
506
|
+
|
|
507
|
+
```python
|
|
508
|
+
import mlflow.pyfunc
|
|
509
|
+
import asyncio
|
|
510
|
+
|
|
511
|
+
# Load models
|
|
512
|
+
production_model = mlflow.pyfunc.load_model("models:/product-classifier@production")
|
|
513
|
+
shadow_model = mlflow.pyfunc.load_model("models:/product-classifier@shadow")
|
|
514
|
+
|
|
515
|
+
async def predict_with_shadow(inputs):
|
|
516
|
+
"""Run shadow model in parallel, return production results."""
|
|
517
|
+
# Production prediction (synchronous)
|
|
518
|
+
production_preds = production_model.predict(inputs)
|
|
519
|
+
|
|
520
|
+
# Shadow prediction (async, don't block)
|
|
521
|
+
asyncio.create_task(shadow_predict(inputs))
|
|
522
|
+
|
|
523
|
+
return production_preds
|
|
524
|
+
|
|
525
|
+
async def shadow_predict(inputs):
|
|
526
|
+
"""Run shadow model and log results."""
|
|
527
|
+
shadow_preds = shadow_model.predict(inputs)
|
|
528
|
+
|
|
529
|
+
# Compare predictions
|
|
530
|
+
log_shadow_comparison(shadow_preds)
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
### Model Fallback
|
|
534
|
+
|
|
535
|
+
```python
|
|
536
|
+
import mlflow.pyfunc
|
|
537
|
+
|
|
538
|
+
class FallbackModel:
|
|
539
|
+
"""Model with fallback on error."""
|
|
540
|
+
|
|
541
|
+
def __init__(self, primary_uri, fallback_uri):
|
|
542
|
+
self.primary = mlflow.pyfunc.load_model(primary_uri)
|
|
543
|
+
self.fallback = mlflow.pyfunc.load_model(fallback_uri)
|
|
544
|
+
|
|
545
|
+
def predict(self, inputs):
|
|
546
|
+
try:
|
|
547
|
+
return self.primary.predict(inputs)
|
|
548
|
+
except Exception as e:
|
|
549
|
+
print(f"Primary model failed: {e}, using fallback")
|
|
550
|
+
return self.fallback.predict(inputs)
|
|
551
|
+
|
|
552
|
+
# Use it
|
|
553
|
+
model = FallbackModel(
|
|
554
|
+
primary_uri="models:/product-classifier@latest",
|
|
555
|
+
fallback_uri="models:/product-classifier@stable"
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
predictions = model.predict(inputs)
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
## Monitoring
|
|
562
|
+
|
|
563
|
+
### Log Predictions
|
|
564
|
+
|
|
565
|
+
```python
|
|
566
|
+
import mlflow
|
|
567
|
+
|
|
568
|
+
def predict_and_log(model, inputs):
|
|
569
|
+
"""Make predictions and log to MLflow."""
|
|
570
|
+
with mlflow.start_run(run_name="inference"):
|
|
571
|
+
# Predict
|
|
572
|
+
predictions = model.predict(inputs)
|
|
573
|
+
|
|
574
|
+
# Log inputs
|
|
575
|
+
mlflow.log_param("num_inputs", len(inputs))
|
|
576
|
+
|
|
577
|
+
# Log predictions
|
|
578
|
+
mlflow.log_metric("avg_prediction", predictions.mean())
|
|
579
|
+
mlflow.log_metric("max_prediction", predictions.max())
|
|
580
|
+
mlflow.log_metric("min_prediction", predictions.min())
|
|
581
|
+
|
|
582
|
+
# Log timestamp
|
|
583
|
+
import time
|
|
584
|
+
mlflow.log_param("timestamp", time.time())
|
|
585
|
+
|
|
586
|
+
return predictions
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
### Model Performance Monitoring
|
|
590
|
+
|
|
591
|
+
```python
|
|
592
|
+
import mlflow
|
|
593
|
+
from sklearn.metrics import accuracy_score
|
|
594
|
+
|
|
595
|
+
def monitor_model_performance(model, X_test, y_test):
|
|
596
|
+
"""Monitor production model performance."""
|
|
597
|
+
with mlflow.start_run(run_name="production-monitoring"):
|
|
598
|
+
# Predict
|
|
599
|
+
predictions = model.predict(X_test)
|
|
600
|
+
|
|
601
|
+
# Calculate metrics
|
|
602
|
+
accuracy = accuracy_score(y_test, predictions)
|
|
603
|
+
|
|
604
|
+
# Log metrics
|
|
605
|
+
mlflow.log_metric("production_accuracy", accuracy)
|
|
606
|
+
mlflow.log_param("test_samples", len(X_test))
|
|
607
|
+
|
|
608
|
+
# Alert if performance drops
|
|
609
|
+
if accuracy < 0.85:
|
|
610
|
+
print(f"⚠️ Alert: Production accuracy dropped to {accuracy}")
|
|
611
|
+
# Send alert (e.g., Slack, PagerDuty)
|
|
612
|
+
|
|
613
|
+
# Run periodically (e.g., daily)
|
|
614
|
+
monitor_model_performance(model, X_test, y_test)
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Request Logging
|
|
618
|
+
|
|
619
|
+
```python
|
|
620
|
+
from flask import Flask, request, jsonify
|
|
621
|
+
import mlflow.pyfunc
|
|
622
|
+
import time
|
|
623
|
+
|
|
624
|
+
app = Flask(__name__)
|
|
625
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
626
|
+
|
|
627
|
+
@app.route('/predict', methods=['POST'])
|
|
628
|
+
def predict():
|
|
629
|
+
start_time = time.time()
|
|
630
|
+
|
|
631
|
+
data = request.get_json()
|
|
632
|
+
inputs = data.get('inputs')
|
|
633
|
+
|
|
634
|
+
# Predict
|
|
635
|
+
predictions = model.predict(inputs)
|
|
636
|
+
|
|
637
|
+
# Calculate latency
|
|
638
|
+
latency = (time.time() - start_time) * 1000 # ms
|
|
639
|
+
|
|
640
|
+
# Log request
|
|
641
|
+
with mlflow.start_run(run_name="inference"):
|
|
642
|
+
mlflow.log_metric("latency_ms", latency)
|
|
643
|
+
mlflow.log_param("num_inputs", len(inputs))
|
|
644
|
+
|
|
645
|
+
return jsonify({
|
|
646
|
+
'predictions': predictions.tolist(),
|
|
647
|
+
'latency_ms': latency
|
|
648
|
+
})
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
## Best Practices
|
|
652
|
+
|
|
653
|
+
### 1. Use Model Registry URIs
|
|
654
|
+
|
|
655
|
+
```python
|
|
656
|
+
# ✅ Good: Load from registry
|
|
657
|
+
model = mlflow.pyfunc.load_model("models:/product-classifier/Production")
|
|
658
|
+
|
|
659
|
+
# ❌ Bad: Hard-code run IDs
|
|
660
|
+
model = mlflow.pyfunc.load_model("runs:/abc123/model")
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
### 2. Implement Health Checks
|
|
664
|
+
|
|
665
|
+
```python
|
|
666
|
+
@app.route('/health', methods=['GET'])
|
|
667
|
+
def health():
|
|
668
|
+
"""Comprehensive health check."""
|
|
669
|
+
try:
|
|
670
|
+
# Check model loaded
|
|
671
|
+
if model is None:
|
|
672
|
+
return jsonify({'status': 'unhealthy', 'reason': 'model not loaded'}), 503
|
|
673
|
+
|
|
674
|
+
# Check model can predict
|
|
675
|
+
test_input = [[1.0, 2.0, 3.0, 4.0]]
|
|
676
|
+
_ = model.predict(test_input)
|
|
677
|
+
|
|
678
|
+
return jsonify({'status': 'healthy'}), 200
|
|
679
|
+
|
|
680
|
+
except Exception as e:
|
|
681
|
+
return jsonify({'status': 'unhealthy', 'reason': str(e)}), 503
|
|
682
|
+
```
|
|
683
|
+
|
|
684
|
+
### 3. Version Your Deployment
|
|
685
|
+
|
|
686
|
+
```python
|
|
687
|
+
# Tag Docker images with model version
|
|
688
|
+
mlflow models build-docker \
|
|
689
|
+
-m "models:/product-classifier/Production" \
|
|
690
|
+
-n product-classifier:v5
|
|
691
|
+
|
|
692
|
+
# Track deployment version
|
|
693
|
+
client.set_model_version_tag(
|
|
694
|
+
name="product-classifier",
|
|
695
|
+
version="5",
|
|
696
|
+
key="deployed_as",
|
|
697
|
+
value="product-classifier:v5"
|
|
698
|
+
)
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
### 4. Use Environment Variables
|
|
702
|
+
|
|
703
|
+
```python
|
|
704
|
+
import os
|
|
705
|
+
import mlflow.pyfunc
|
|
706
|
+
|
|
707
|
+
# Configuration via environment
|
|
708
|
+
TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
|
|
709
|
+
MODEL_NAME = os.getenv("MODEL_NAME", "product-classifier")
|
|
710
|
+
MODEL_STAGE = os.getenv("MODEL_STAGE", "Production")
|
|
711
|
+
|
|
712
|
+
mlflow.set_tracking_uri(TRACKING_URI)
|
|
713
|
+
|
|
714
|
+
# Load model
|
|
715
|
+
model_uri = f"models:/{MODEL_NAME}/{MODEL_STAGE}"
|
|
716
|
+
model = mlflow.pyfunc.load_model(model_uri)
|
|
717
|
+
```
|
|
718
|
+
|
|
719
|
+
### 5. Implement Graceful Shutdown
|
|
720
|
+
|
|
721
|
+
```python
|
|
722
|
+
import signal
|
|
723
|
+
import sys
|
|
724
|
+
|
|
725
|
+
def signal_handler(sig, frame):
|
|
726
|
+
"""Handle shutdown gracefully."""
|
|
727
|
+
print("Shutting down gracefully...")
|
|
728
|
+
|
|
729
|
+
# Close connections
|
|
730
|
+
# Save state
|
|
731
|
+
# Finish pending requests
|
|
732
|
+
|
|
733
|
+
sys.exit(0)
|
|
734
|
+
|
|
735
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
736
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
## Resources
|
|
740
|
+
|
|
741
|
+
- **MLflow Deployment**: https://mlflow.org/docs/latest/deployment/
|
|
742
|
+
- **SageMaker Integration**: https://mlflow.org/docs/latest/python_api/mlflow.sagemaker.html
|
|
743
|
+
- **Azure ML Integration**: https://mlflow.org/docs/latest/python_api/mlflow.azureml.html
|
|
744
|
+
- **KServe Integration**: https://kserve.github.io/website/latest/modelserving/v1beta1/mlflow/v2/
|