@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
# GGUF Advanced Usage Guide
|
|
2
|
+
|
|
3
|
+
## Speculative Decoding
|
|
4
|
+
|
|
5
|
+
### Draft Model Approach
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Use smaller model as draft for faster generation
|
|
9
|
+
./llama-speculative \
|
|
10
|
+
-m large-model-q4_k_m.gguf \
|
|
11
|
+
-md draft-model-q4_k_m.gguf \
|
|
12
|
+
-p "Write a story about AI" \
|
|
13
|
+
-n 500 \
|
|
14
|
+
--draft 8 # Draft tokens before verification
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### Self-Speculative Decoding
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Use same model with different context for speculation
|
|
21
|
+
./llama-cli -m model-q4_k_m.gguf \
|
|
22
|
+
--lookup-cache-static lookup.bin \
|
|
23
|
+
--lookup-cache-dynamic lookup-dynamic.bin \
|
|
24
|
+
-p "Hello world"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Batched Inference
|
|
28
|
+
|
|
29
|
+
### Process Multiple Prompts
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from llama_cpp import Llama
|
|
33
|
+
|
|
34
|
+
llm = Llama(
|
|
35
|
+
model_path="model-q4_k_m.gguf",
|
|
36
|
+
n_ctx=4096,
|
|
37
|
+
n_gpu_layers=35,
|
|
38
|
+
n_batch=512 # Larger batch for parallel processing
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
prompts = [
|
|
42
|
+
"What is Python?",
|
|
43
|
+
"Explain machine learning.",
|
|
44
|
+
"Describe neural networks."
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
# Process in batch (each prompt gets separate context)
|
|
48
|
+
for prompt in prompts:
|
|
49
|
+
output = llm(prompt, max_tokens=100)
|
|
50
|
+
print(f"Q: {prompt}")
|
|
51
|
+
print(f"A: {output['choices'][0]['text']}\n")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Server Batching
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Start server with batching
|
|
58
|
+
./llama-server -m model-q4_k_m.gguf \
|
|
59
|
+
--host 0.0.0.0 \
|
|
60
|
+
--port 8080 \
|
|
61
|
+
-ngl 35 \
|
|
62
|
+
-c 4096 \
|
|
63
|
+
--parallel 4 # Concurrent requests
|
|
64
|
+
--cont-batching # Continuous batching
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Custom Model Conversion
|
|
68
|
+
|
|
69
|
+
### Convert with Vocabulary Modifications
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
# custom_convert.py
|
|
73
|
+
import sys
|
|
74
|
+
sys.path.insert(0, './llama.cpp')
|
|
75
|
+
|
|
76
|
+
from convert_hf_to_gguf import main
|
|
77
|
+
from gguf import GGUFWriter
|
|
78
|
+
|
|
79
|
+
# Custom conversion with modified vocab
|
|
80
|
+
def convert_with_custom_vocab(model_path, output_path):
|
|
81
|
+
# Load and modify tokenizer
|
|
82
|
+
from transformers import AutoTokenizer
|
|
83
|
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
84
|
+
|
|
85
|
+
# Add special tokens if needed
|
|
86
|
+
special_tokens = {"additional_special_tokens": ["<|custom|>"]}
|
|
87
|
+
tokenizer.add_special_tokens(special_tokens)
|
|
88
|
+
tokenizer.save_pretrained(model_path)
|
|
89
|
+
|
|
90
|
+
# Then run standard conversion
|
|
91
|
+
main([model_path, "--outfile", output_path])
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Convert Specific Architecture
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# For Mistral-style models
|
|
98
|
+
python convert_hf_to_gguf.py ./mistral-model \
|
|
99
|
+
--outfile mistral-f16.gguf \
|
|
100
|
+
--outtype f16
|
|
101
|
+
|
|
102
|
+
# For Qwen models
|
|
103
|
+
python convert_hf_to_gguf.py ./qwen-model \
|
|
104
|
+
--outfile qwen-f16.gguf \
|
|
105
|
+
--outtype f16
|
|
106
|
+
|
|
107
|
+
# For Phi models
|
|
108
|
+
python convert_hf_to_gguf.py ./phi-model \
|
|
109
|
+
--outfile phi-f16.gguf \
|
|
110
|
+
--outtype f16
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Advanced Quantization
|
|
114
|
+
|
|
115
|
+
### Mixed Quantization
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Quantize different layer types differently
|
|
119
|
+
./llama-quantize model-f16.gguf model-mixed.gguf Q4_K_M \
|
|
120
|
+
--allow-requantize \
|
|
121
|
+
--leave-output-tensor
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Quantization with Token Embeddings
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
# Keep embeddings at higher precision
|
|
128
|
+
./llama-quantize model-f16.gguf model-q4.gguf Q4_K_M \
|
|
129
|
+
--token-embedding-type f16
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### IQ Quantization (Importance-aware)
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# Ultra-low bit quantization with importance
|
|
136
|
+
./llama-quantize --imatrix model.imatrix \
|
|
137
|
+
model-f16.gguf model-iq2_xxs.gguf IQ2_XXS
|
|
138
|
+
|
|
139
|
+
# Available IQ types: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Memory Optimization
|
|
143
|
+
|
|
144
|
+
### Memory Mapping
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from llama_cpp import Llama
|
|
148
|
+
|
|
149
|
+
# Use memory mapping for large models
|
|
150
|
+
llm = Llama(
|
|
151
|
+
model_path="model-q4_k_m.gguf",
|
|
152
|
+
use_mmap=True, # Memory map the model
|
|
153
|
+
use_mlock=False, # Don't lock in RAM
|
|
154
|
+
n_gpu_layers=35
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Partial GPU Offload
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
# Calculate layers to offload based on VRAM
|
|
162
|
+
import subprocess
|
|
163
|
+
|
|
164
|
+
def get_free_vram_gb():
|
|
165
|
+
result = subprocess.run(
|
|
166
|
+
['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'],
|
|
167
|
+
capture_output=True, text=True
|
|
168
|
+
)
|
|
169
|
+
return int(result.stdout.strip()) / 1024
|
|
170
|
+
|
|
171
|
+
# Estimate layers based on VRAM (rough: 0.5GB per layer for 7B Q4)
|
|
172
|
+
free_vram = get_free_vram_gb()
|
|
173
|
+
layers_to_offload = int(free_vram / 0.5)
|
|
174
|
+
|
|
175
|
+
llm = Llama(
|
|
176
|
+
model_path="model-q4_k_m.gguf",
|
|
177
|
+
n_gpu_layers=min(layers_to_offload, 35) # Cap at total layers
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### KV Cache Optimization
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from llama_cpp import Llama
|
|
185
|
+
|
|
186
|
+
# Optimize KV cache for long contexts
|
|
187
|
+
llm = Llama(
|
|
188
|
+
model_path="model-q4_k_m.gguf",
|
|
189
|
+
n_ctx=8192, # Large context
|
|
190
|
+
n_gpu_layers=35,
|
|
191
|
+
type_k=1, # Q8_0 for K cache (1)
|
|
192
|
+
type_v=1, # Q8_0 for V cache (1)
|
|
193
|
+
# Or use Q4_0 (2) for more compression
|
|
194
|
+
)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Context Management
|
|
198
|
+
|
|
199
|
+
### Context Shifting
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from llama_cpp import Llama
|
|
203
|
+
|
|
204
|
+
llm = Llama(
|
|
205
|
+
model_path="model-q4_k_m.gguf",
|
|
206
|
+
n_ctx=4096,
|
|
207
|
+
n_gpu_layers=35
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Handle long conversations with context shifting
|
|
211
|
+
conversation = []
|
|
212
|
+
max_history = 10
|
|
213
|
+
|
|
214
|
+
def chat(user_message):
|
|
215
|
+
conversation.append({"role": "user", "content": user_message})
|
|
216
|
+
|
|
217
|
+
# Keep only recent history
|
|
218
|
+
if len(conversation) > max_history * 2:
|
|
219
|
+
conversation = conversation[-max_history * 2:]
|
|
220
|
+
|
|
221
|
+
response = llm.create_chat_completion(
|
|
222
|
+
messages=conversation,
|
|
223
|
+
max_tokens=256
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
assistant_message = response["choices"][0]["message"]["content"]
|
|
227
|
+
conversation.append({"role": "assistant", "content": assistant_message})
|
|
228
|
+
return assistant_message
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Save and Load State
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Save state to file
|
|
235
|
+
./llama-cli -m model.gguf \
|
|
236
|
+
-p "Once upon a time" \
|
|
237
|
+
--save-session session.bin \
|
|
238
|
+
-n 100
|
|
239
|
+
|
|
240
|
+
# Load and continue
|
|
241
|
+
./llama-cli -m model.gguf \
|
|
242
|
+
--load-session session.bin \
|
|
243
|
+
-p " and they lived" \
|
|
244
|
+
-n 100
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Grammar Constrained Generation
|
|
248
|
+
|
|
249
|
+
### JSON Output
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
from llama_cpp import Llama, LlamaGrammar
|
|
253
|
+
|
|
254
|
+
# Define JSON grammar
|
|
255
|
+
json_grammar = LlamaGrammar.from_string('''
|
|
256
|
+
root ::= object
|
|
257
|
+
object ::= "{" ws pair ("," ws pair)* "}" ws
|
|
258
|
+
pair ::= string ":" ws value
|
|
259
|
+
value ::= string | number | object | array | "true" | "false" | "null"
|
|
260
|
+
array ::= "[" ws value ("," ws value)* "]" ws
|
|
261
|
+
string ::= "\\"" [^"\\\\]* "\\""
|
|
262
|
+
number ::= [0-9]+
|
|
263
|
+
ws ::= [ \\t\\n]*
|
|
264
|
+
''')
|
|
265
|
+
|
|
266
|
+
llm = Llama(model_path="model-q4_k_m.gguf", n_gpu_layers=35)
|
|
267
|
+
|
|
268
|
+
output = llm(
|
|
269
|
+
"Output a JSON object with name and age:",
|
|
270
|
+
grammar=json_grammar,
|
|
271
|
+
max_tokens=100
|
|
272
|
+
)
|
|
273
|
+
print(output["choices"][0]["text"])
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Custom Grammar
|
|
277
|
+
|
|
278
|
+
```python
|
|
279
|
+
# Grammar for specific format
|
|
280
|
+
answer_grammar = LlamaGrammar.from_string('''
|
|
281
|
+
root ::= "Answer: " letter "\\n" "Explanation: " explanation
|
|
282
|
+
letter ::= [A-D]
|
|
283
|
+
explanation ::= [a-zA-Z0-9 .,!?]+
|
|
284
|
+
''')
|
|
285
|
+
|
|
286
|
+
output = llm(
|
|
287
|
+
"Q: What is 2+2? A) 3 B) 4 C) 5 D) 6",
|
|
288
|
+
grammar=answer_grammar,
|
|
289
|
+
max_tokens=100
|
|
290
|
+
)
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
## LoRA Integration
|
|
294
|
+
|
|
295
|
+
### Load LoRA Adapter
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
# Apply LoRA at runtime
|
|
299
|
+
./llama-cli -m base-model-q4_k_m.gguf \
|
|
300
|
+
--lora lora-adapter.gguf \
|
|
301
|
+
--lora-scale 1.0 \
|
|
302
|
+
-p "Hello!"
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Multiple LoRA Adapters
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
# Stack multiple adapters
|
|
309
|
+
./llama-cli -m base-model.gguf \
|
|
310
|
+
--lora adapter1.gguf --lora-scale 0.5 \
|
|
311
|
+
--lora adapter2.gguf --lora-scale 0.5 \
|
|
312
|
+
-p "Hello!"
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Python LoRA Usage
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
from llama_cpp import Llama
|
|
319
|
+
|
|
320
|
+
llm = Llama(
|
|
321
|
+
model_path="base-model-q4_k_m.gguf",
|
|
322
|
+
lora_path="lora-adapter.gguf",
|
|
323
|
+
lora_scale=1.0,
|
|
324
|
+
n_gpu_layers=35
|
|
325
|
+
)
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## Embedding Generation
|
|
329
|
+
|
|
330
|
+
### Extract Embeddings
|
|
331
|
+
|
|
332
|
+
```python
|
|
333
|
+
from llama_cpp import Llama
|
|
334
|
+
|
|
335
|
+
llm = Llama(
|
|
336
|
+
model_path="model-q4_k_m.gguf",
|
|
337
|
+
embedding=True, # Enable embedding mode
|
|
338
|
+
n_gpu_layers=35
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Get embeddings
|
|
342
|
+
embeddings = llm.embed("This is a test sentence.")
|
|
343
|
+
print(f"Embedding dimension: {len(embeddings)}")
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Batch Embeddings
|
|
347
|
+
|
|
348
|
+
```python
|
|
349
|
+
texts = [
|
|
350
|
+
"Machine learning is fascinating.",
|
|
351
|
+
"Deep learning uses neural networks.",
|
|
352
|
+
"Python is a programming language."
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
embeddings = [llm.embed(text) for text in texts]
|
|
356
|
+
|
|
357
|
+
# Calculate similarity
|
|
358
|
+
import numpy as np
|
|
359
|
+
|
|
360
|
+
def cosine_similarity(a, b):
|
|
361
|
+
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
|
362
|
+
|
|
363
|
+
sim = cosine_similarity(embeddings[0], embeddings[1])
|
|
364
|
+
print(f"Similarity: {sim:.4f}")
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
## Performance Tuning
|
|
368
|
+
|
|
369
|
+
### Benchmark Script
|
|
370
|
+
|
|
371
|
+
```python
|
|
372
|
+
import time
|
|
373
|
+
from llama_cpp import Llama
|
|
374
|
+
|
|
375
|
+
def benchmark(model_path, prompt, n_tokens=100, n_runs=5):
|
|
376
|
+
llm = Llama(
|
|
377
|
+
model_path=model_path,
|
|
378
|
+
n_gpu_layers=35,
|
|
379
|
+
n_ctx=2048,
|
|
380
|
+
verbose=False
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# Warmup
|
|
384
|
+
llm(prompt, max_tokens=10)
|
|
385
|
+
|
|
386
|
+
# Benchmark
|
|
387
|
+
times = []
|
|
388
|
+
for _ in range(n_runs):
|
|
389
|
+
start = time.time()
|
|
390
|
+
output = llm(prompt, max_tokens=n_tokens)
|
|
391
|
+
elapsed = time.time() - start
|
|
392
|
+
times.append(elapsed)
|
|
393
|
+
|
|
394
|
+
avg_time = sum(times) / len(times)
|
|
395
|
+
tokens_per_sec = n_tokens / avg_time
|
|
396
|
+
|
|
397
|
+
print(f"Model: {model_path}")
|
|
398
|
+
print(f"Avg time: {avg_time:.2f}s")
|
|
399
|
+
print(f"Tokens/sec: {tokens_per_sec:.1f}")
|
|
400
|
+
|
|
401
|
+
return tokens_per_sec
|
|
402
|
+
|
|
403
|
+
# Compare quantizations
|
|
404
|
+
for quant in ["q4_k_m", "q5_k_m", "q8_0"]:
|
|
405
|
+
benchmark(f"model-{quant}.gguf", "Explain quantum computing:", 100)
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
### Optimal Configuration Finder
|
|
409
|
+
|
|
410
|
+
```python
|
|
411
|
+
def find_optimal_config(model_path, target_vram_gb=8):
|
|
412
|
+
"""Find optimal n_gpu_layers and n_batch for target VRAM."""
|
|
413
|
+
from llama_cpp import Llama
|
|
414
|
+
import gc
|
|
415
|
+
|
|
416
|
+
best_config = None
|
|
417
|
+
best_speed = 0
|
|
418
|
+
|
|
419
|
+
for n_gpu_layers in range(0, 50, 5):
|
|
420
|
+
for n_batch in [128, 256, 512, 1024]:
|
|
421
|
+
try:
|
|
422
|
+
gc.collect()
|
|
423
|
+
llm = Llama(
|
|
424
|
+
model_path=model_path,
|
|
425
|
+
n_gpu_layers=n_gpu_layers,
|
|
426
|
+
n_batch=n_batch,
|
|
427
|
+
n_ctx=2048,
|
|
428
|
+
verbose=False
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Quick benchmark
|
|
432
|
+
start = time.time()
|
|
433
|
+
llm("Hello", max_tokens=50)
|
|
434
|
+
speed = 50 / (time.time() - start)
|
|
435
|
+
|
|
436
|
+
if speed > best_speed:
|
|
437
|
+
best_speed = speed
|
|
438
|
+
best_config = {
|
|
439
|
+
"n_gpu_layers": n_gpu_layers,
|
|
440
|
+
"n_batch": n_batch,
|
|
441
|
+
"speed": speed
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
del llm
|
|
445
|
+
gc.collect()
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
print(f"OOM at layers={n_gpu_layers}, batch={n_batch}")
|
|
449
|
+
break
|
|
450
|
+
|
|
451
|
+
return best_config
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
## Multi-GPU Setup
|
|
455
|
+
|
|
456
|
+
### Distribute Across GPUs
|
|
457
|
+
|
|
458
|
+
```bash
|
|
459
|
+
# Split model across multiple GPUs
|
|
460
|
+
./llama-cli -m large-model.gguf \
|
|
461
|
+
--tensor-split 0.5,0.5 \
|
|
462
|
+
-ngl 60 \
|
|
463
|
+
-p "Hello!"
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
### Python Multi-GPU
|
|
467
|
+
|
|
468
|
+
```python
|
|
469
|
+
import os
|
|
470
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
|
471
|
+
|
|
472
|
+
from llama_cpp import Llama
|
|
473
|
+
|
|
474
|
+
llm = Llama(
|
|
475
|
+
model_path="large-model-q4_k_m.gguf",
|
|
476
|
+
n_gpu_layers=60,
|
|
477
|
+
tensor_split=[0.5, 0.5] # Split evenly across 2 GPUs
|
|
478
|
+
)
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
## Custom Builds
|
|
482
|
+
|
|
483
|
+
### Build with All Optimizations
|
|
484
|
+
|
|
485
|
+
```bash
|
|
486
|
+
# Clean build with all CPU optimizations
|
|
487
|
+
make clean
|
|
488
|
+
LLAMA_OPENBLAS=1 LLAMA_BLAS_VENDOR=OpenBLAS make -j
|
|
489
|
+
|
|
490
|
+
# With CUDA and cuBLAS
|
|
491
|
+
make clean
|
|
492
|
+
GGML_CUDA=1 LLAMA_CUBLAS=1 make -j
|
|
493
|
+
|
|
494
|
+
# With specific CUDA architecture
|
|
495
|
+
GGML_CUDA=1 CUDA_DOCKER_ARCH=sm_86 make -j
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### CMake Build
|
|
499
|
+
|
|
500
|
+
```bash
|
|
501
|
+
mkdir build && cd build
|
|
502
|
+
cmake .. -DGGML_CUDA=ON -DCMAKE_BUILD_TYPE=Release
|
|
503
|
+
cmake --build . --config Release -j
|
|
504
|
+
```
|