@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
# Backend Configuration Guide
|
|
2
|
+
|
|
3
|
+
Complete guide to configuring Guidance with different LLM backends.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
- API-Based Models (Anthropic, OpenAI)
|
|
7
|
+
- Local Models (Transformers, llama.cpp)
|
|
8
|
+
- Backend Comparison
|
|
9
|
+
- Performance Tuning
|
|
10
|
+
- Advanced Configuration
|
|
11
|
+
|
|
12
|
+
## API-Based Models
|
|
13
|
+
|
|
14
|
+
### Anthropic Claude
|
|
15
|
+
|
|
16
|
+
#### Basic Setup
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from guidance import models
|
|
20
|
+
|
|
21
|
+
# Using environment variable
|
|
22
|
+
lm = models.Anthropic("claude-sonnet-4-5-20250929")
|
|
23
|
+
# Reads ANTHROPIC_API_KEY from environment
|
|
24
|
+
|
|
25
|
+
# Explicit API key
|
|
26
|
+
lm = models.Anthropic(
|
|
27
|
+
model="claude-sonnet-4-5-20250929",
|
|
28
|
+
api_key="your-api-key-here"
|
|
29
|
+
)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
#### Available Models
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
# Claude 3.5 Sonnet (Latest, recommended)
|
|
36
|
+
lm = models.Anthropic("claude-sonnet-4-5-20250929")
|
|
37
|
+
|
|
38
|
+
# Claude 3.7 Sonnet (Fast, cost-effective)
|
|
39
|
+
lm = models.Anthropic("claude-sonnet-3.7-20250219")
|
|
40
|
+
|
|
41
|
+
# Claude 3 Opus (Most capable)
|
|
42
|
+
lm = models.Anthropic("claude-3-opus-20240229")
|
|
43
|
+
|
|
44
|
+
# Claude 3.5 Haiku (Fastest, cheapest)
|
|
45
|
+
lm = models.Anthropic("claude-3-5-haiku-20241022")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
#### Configuration Options
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
lm = models.Anthropic(
|
|
52
|
+
model="claude-sonnet-4-5-20250929",
|
|
53
|
+
api_key="your-api-key",
|
|
54
|
+
max_tokens=4096, # Max tokens to generate
|
|
55
|
+
temperature=0.7, # Sampling temperature (0-1)
|
|
56
|
+
top_p=0.9, # Nucleus sampling
|
|
57
|
+
timeout=30, # Request timeout (seconds)
|
|
58
|
+
max_retries=3 # Retry failed requests
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
#### With Context Managers
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from guidance import models, system, user, assistant, gen
|
|
66
|
+
|
|
67
|
+
lm = models.Anthropic("claude-sonnet-4-5-20250929")
|
|
68
|
+
|
|
69
|
+
with system():
|
|
70
|
+
lm += "You are a helpful assistant."
|
|
71
|
+
|
|
72
|
+
with user():
|
|
73
|
+
lm += "What is the capital of France?"
|
|
74
|
+
|
|
75
|
+
with assistant():
|
|
76
|
+
lm += gen(max_tokens=50)
|
|
77
|
+
|
|
78
|
+
print(lm)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### OpenAI
|
|
82
|
+
|
|
83
|
+
#### Basic Setup
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from guidance import models
|
|
87
|
+
|
|
88
|
+
# Using environment variable
|
|
89
|
+
lm = models.OpenAI("gpt-4o")
|
|
90
|
+
# Reads OPENAI_API_KEY from environment
|
|
91
|
+
|
|
92
|
+
# Explicit API key
|
|
93
|
+
lm = models.OpenAI(
|
|
94
|
+
model="gpt-4o",
|
|
95
|
+
api_key="your-api-key-here"
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
#### Available Models
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
# GPT-4o (Latest, multimodal)
|
|
103
|
+
lm = models.OpenAI("gpt-4o")
|
|
104
|
+
|
|
105
|
+
# GPT-4o Mini (Fast, cost-effective)
|
|
106
|
+
lm = models.OpenAI("gpt-4o-mini")
|
|
107
|
+
|
|
108
|
+
# GPT-4 Turbo
|
|
109
|
+
lm = models.OpenAI("gpt-4-turbo")
|
|
110
|
+
|
|
111
|
+
# GPT-3.5 Turbo (Cheapest)
|
|
112
|
+
lm = models.OpenAI("gpt-3.5-turbo")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
#### Configuration Options
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
lm = models.OpenAI(
|
|
119
|
+
model="gpt-4o-mini",
|
|
120
|
+
api_key="your-api-key",
|
|
121
|
+
max_tokens=2048,
|
|
122
|
+
temperature=0.7,
|
|
123
|
+
top_p=1.0,
|
|
124
|
+
frequency_penalty=0.0,
|
|
125
|
+
presence_penalty=0.0,
|
|
126
|
+
timeout=30
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
#### Chat Format
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from guidance import models, gen
|
|
134
|
+
|
|
135
|
+
lm = models.OpenAI("gpt-4o-mini")
|
|
136
|
+
|
|
137
|
+
# OpenAI uses chat format
|
|
138
|
+
lm += [
|
|
139
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
140
|
+
{"role": "user", "content": "What is 2+2?"}
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
# Generate response
|
|
144
|
+
lm += gen(max_tokens=50)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Azure OpenAI
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from guidance import models
|
|
151
|
+
|
|
152
|
+
lm = models.AzureOpenAI(
|
|
153
|
+
model="gpt-4o",
|
|
154
|
+
azure_endpoint="https://your-resource.openai.azure.com/",
|
|
155
|
+
api_key="your-azure-api-key",
|
|
156
|
+
api_version="2024-02-15-preview",
|
|
157
|
+
deployment_name="your-deployment-name"
|
|
158
|
+
)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Local Models
|
|
162
|
+
|
|
163
|
+
### Transformers (Hugging Face)
|
|
164
|
+
|
|
165
|
+
#### Basic Setup
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from guidance.models import Transformers
|
|
169
|
+
|
|
170
|
+
# Load model from Hugging Face
|
|
171
|
+
lm = Transformers("microsoft/Phi-4-mini-instruct")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### GPU Configuration
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
# Use GPU
|
|
178
|
+
lm = Transformers(
|
|
179
|
+
"microsoft/Phi-4-mini-instruct",
|
|
180
|
+
device="cuda"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Use specific GPU
|
|
184
|
+
lm = Transformers(
|
|
185
|
+
"microsoft/Phi-4-mini-instruct",
|
|
186
|
+
device="cuda:0" # GPU 0
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Use CPU
|
|
190
|
+
lm = Transformers(
|
|
191
|
+
"microsoft/Phi-4-mini-instruct",
|
|
192
|
+
device="cpu"
|
|
193
|
+
)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
#### Advanced Configuration
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
lm = Transformers(
|
|
200
|
+
"microsoft/Phi-4-mini-instruct",
|
|
201
|
+
device="cuda",
|
|
202
|
+
torch_dtype="float16", # Use FP16 (faster, less memory)
|
|
203
|
+
load_in_8bit=True, # 8-bit quantization
|
|
204
|
+
max_memory={0: "20GB"}, # GPU memory limit
|
|
205
|
+
offload_folder="./offload" # Offload to disk if needed
|
|
206
|
+
)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
#### Popular Models
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
# Phi-4 (Microsoft)
|
|
213
|
+
lm = Transformers("microsoft/Phi-4-mini-instruct")
|
|
214
|
+
lm = Transformers("microsoft/Phi-3-medium-4k-instruct")
|
|
215
|
+
|
|
216
|
+
# Llama 3 (Meta)
|
|
217
|
+
lm = Transformers("meta-llama/Llama-3.1-8B-Instruct")
|
|
218
|
+
lm = Transformers("meta-llama/Llama-3.1-70B-Instruct")
|
|
219
|
+
|
|
220
|
+
# Mistral (Mistral AI)
|
|
221
|
+
lm = Transformers("mistralai/Mistral-7B-Instruct-v0.3")
|
|
222
|
+
lm = Transformers("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
|
223
|
+
|
|
224
|
+
# Qwen (Alibaba)
|
|
225
|
+
lm = Transformers("Qwen/Qwen2.5-7B-Instruct")
|
|
226
|
+
|
|
227
|
+
# Gemma (Google)
|
|
228
|
+
lm = Transformers("google/gemma-2-9b-it")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### Generation Configuration
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
lm = Transformers(
|
|
235
|
+
"microsoft/Phi-4-mini-instruct",
|
|
236
|
+
device="cuda"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Configure generation
|
|
240
|
+
from guidance import gen
|
|
241
|
+
|
|
242
|
+
result = lm + gen(
|
|
243
|
+
max_tokens=100,
|
|
244
|
+
temperature=0.7,
|
|
245
|
+
top_p=0.9,
|
|
246
|
+
top_k=50,
|
|
247
|
+
repetition_penalty=1.1
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### llama.cpp
|
|
252
|
+
|
|
253
|
+
#### Basic Setup
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from guidance.models import LlamaCpp
|
|
257
|
+
|
|
258
|
+
# Load GGUF model
|
|
259
|
+
lm = LlamaCpp(
|
|
260
|
+
model_path="/path/to/model.gguf",
|
|
261
|
+
n_ctx=4096 # Context window
|
|
262
|
+
)
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
#### GPU Configuration
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
# Use GPU acceleration
|
|
269
|
+
lm = LlamaCpp(
|
|
270
|
+
model_path="/path/to/model.gguf",
|
|
271
|
+
n_ctx=4096,
|
|
272
|
+
n_gpu_layers=35, # Offload 35 layers to GPU
|
|
273
|
+
n_threads=8 # CPU threads for remaining layers
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Full GPU offload
|
|
277
|
+
lm = LlamaCpp(
|
|
278
|
+
model_path="/path/to/model.gguf",
|
|
279
|
+
n_ctx=4096,
|
|
280
|
+
n_gpu_layers=-1 # Offload all layers
|
|
281
|
+
)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
#### Advanced Configuration
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
lm = LlamaCpp(
|
|
288
|
+
model_path="/path/to/llama-3.1-8b-instruct.Q4_K_M.gguf",
|
|
289
|
+
n_ctx=8192, # Context window (tokens)
|
|
290
|
+
n_gpu_layers=35, # GPU layers
|
|
291
|
+
n_threads=8, # CPU threads
|
|
292
|
+
n_batch=512, # Batch size for prompt processing
|
|
293
|
+
use_mmap=True, # Memory-map the model file
|
|
294
|
+
use_mlock=False, # Lock model in RAM
|
|
295
|
+
seed=42, # Random seed
|
|
296
|
+
verbose=False # Suppress verbose output
|
|
297
|
+
)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
#### Quantized Models
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
# Q4_K_M (4-bit, recommended for most cases)
|
|
304
|
+
lm = LlamaCpp("/path/to/model.Q4_K_M.gguf")
|
|
305
|
+
|
|
306
|
+
# Q5_K_M (5-bit, better quality)
|
|
307
|
+
lm = LlamaCpp("/path/to/model.Q5_K_M.gguf")
|
|
308
|
+
|
|
309
|
+
# Q8_0 (8-bit, high quality)
|
|
310
|
+
lm = LlamaCpp("/path/to/model.Q8_0.gguf")
|
|
311
|
+
|
|
312
|
+
# F16 (16-bit float, highest quality)
|
|
313
|
+
lm = LlamaCpp("/path/to/model.F16.gguf")
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
#### Popular GGUF Models
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
# Llama 3.1
|
|
320
|
+
lm = LlamaCpp("llama-3.1-8b-instruct.Q4_K_M.gguf")
|
|
321
|
+
|
|
322
|
+
# Mistral
|
|
323
|
+
lm = LlamaCpp("mistral-7b-instruct-v0.3.Q4_K_M.gguf")
|
|
324
|
+
|
|
325
|
+
# Phi-4
|
|
326
|
+
lm = LlamaCpp("phi-4-mini-instruct.Q4_K_M.gguf")
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
## Backend Comparison
|
|
330
|
+
|
|
331
|
+
### Feature Matrix
|
|
332
|
+
|
|
333
|
+
| Feature | Anthropic | OpenAI | Transformers | llama.cpp |
|
|
334
|
+
|---------|-----------|--------|--------------|-----------|
|
|
335
|
+
| Constrained Generation | ✅ Full | ✅ Full | ✅ Full | ✅ Full |
|
|
336
|
+
| Token Healing | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
|
|
337
|
+
| Streaming | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
|
|
338
|
+
| GPU Support | N/A | N/A | ✅ Yes | ✅ Yes |
|
|
339
|
+
| Quantization | N/A | N/A | ✅ Yes | ✅ Yes |
|
|
340
|
+
| Cost | $$$ | $$$ | Free | Free |
|
|
341
|
+
| Latency | Low | Low | Medium | Low |
|
|
342
|
+
| Setup Difficulty | Easy | Easy | Medium | Medium |
|
|
343
|
+
|
|
344
|
+
### Performance Characteristics
|
|
345
|
+
|
|
346
|
+
**Anthropic Claude:**
|
|
347
|
+
- **Latency**: 200-500ms (API call)
|
|
348
|
+
- **Throughput**: Limited by API rate limits
|
|
349
|
+
- **Cost**: $3-15 per 1M input tokens
|
|
350
|
+
- **Best for**: Production systems, high-quality outputs
|
|
351
|
+
|
|
352
|
+
**OpenAI:**
|
|
353
|
+
- **Latency**: 200-400ms (API call)
|
|
354
|
+
- **Throughput**: Limited by API rate limits
|
|
355
|
+
- **Cost**: $0.15-30 per 1M input tokens
|
|
356
|
+
- **Best for**: Cost-sensitive production, gpt-4o-mini
|
|
357
|
+
|
|
358
|
+
**Transformers:**
|
|
359
|
+
- **Latency**: 50-200ms (local inference)
|
|
360
|
+
- **Throughput**: GPU-dependent (10-100 tokens/sec)
|
|
361
|
+
- **Cost**: Hardware cost only
|
|
362
|
+
- **Best for**: Privacy-sensitive, high-volume, experimentation
|
|
363
|
+
|
|
364
|
+
**llama.cpp:**
|
|
365
|
+
- **Latency**: 30-150ms (local inference)
|
|
366
|
+
- **Throughput**: Hardware-dependent (20-150 tokens/sec)
|
|
367
|
+
- **Cost**: Hardware cost only
|
|
368
|
+
- **Best for**: Edge deployment, Apple Silicon, CPU inference
|
|
369
|
+
|
|
370
|
+
### Memory Requirements
|
|
371
|
+
|
|
372
|
+
**Transformers (FP16):**
|
|
373
|
+
- 7B model: ~14GB GPU VRAM
|
|
374
|
+
- 13B model: ~26GB GPU VRAM
|
|
375
|
+
- 70B model: ~140GB GPU VRAM (multi-GPU)
|
|
376
|
+
|
|
377
|
+
**llama.cpp (Q4_K_M):**
|
|
378
|
+
- 7B model: ~4.5GB RAM
|
|
379
|
+
- 13B model: ~8GB RAM
|
|
380
|
+
- 70B model: ~40GB RAM
|
|
381
|
+
|
|
382
|
+
**Optimization Tips:**
|
|
383
|
+
- Use quantized models (Q4_K_M) for lower memory
|
|
384
|
+
- Use GPU offloading for faster inference
|
|
385
|
+
- Use CPU inference for smaller models (<7B)
|
|
386
|
+
|
|
387
|
+
## Performance Tuning
|
|
388
|
+
|
|
389
|
+
### API Models (Anthropic, OpenAI)
|
|
390
|
+
|
|
391
|
+
#### Reduce Latency
|
|
392
|
+
|
|
393
|
+
```python
|
|
394
|
+
from guidance import models, gen
|
|
395
|
+
|
|
396
|
+
lm = models.Anthropic("claude-sonnet-4-5-20250929")
|
|
397
|
+
|
|
398
|
+
# Use lower max_tokens (faster response)
|
|
399
|
+
lm += gen(max_tokens=100) # Instead of 1000
|
|
400
|
+
|
|
401
|
+
# Use streaming (perceived latency reduction)
|
|
402
|
+
for chunk in lm.stream(gen(max_tokens=500)):
|
|
403
|
+
print(chunk, end="", flush=True)
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
#### Reduce Cost
|
|
407
|
+
|
|
408
|
+
```python
|
|
409
|
+
# Use cheaper models
|
|
410
|
+
lm = models.Anthropic("claude-3-5-haiku-20241022") # vs Sonnet
|
|
411
|
+
lm = models.OpenAI("gpt-4o-mini") # vs gpt-4o
|
|
412
|
+
|
|
413
|
+
# Reduce context size
|
|
414
|
+
# - Keep prompts concise
|
|
415
|
+
# - Avoid large few-shot examples
|
|
416
|
+
# - Use max_tokens limits
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
### Local Models (Transformers, llama.cpp)
|
|
420
|
+
|
|
421
|
+
#### Optimize GPU Usage
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
from guidance.models import Transformers
|
|
425
|
+
|
|
426
|
+
# Use FP16 for 2x speedup
|
|
427
|
+
lm = Transformers(
|
|
428
|
+
"meta-llama/Llama-3.1-8B-Instruct",
|
|
429
|
+
device="cuda",
|
|
430
|
+
torch_dtype="float16"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Use 8-bit quantization for 4x memory reduction
|
|
434
|
+
lm = Transformers(
|
|
435
|
+
"meta-llama/Llama-3.1-8B-Instruct",
|
|
436
|
+
device="cuda",
|
|
437
|
+
load_in_8bit=True
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
# Use flash attention (requires flash-attn package)
|
|
441
|
+
lm = Transformers(
|
|
442
|
+
"meta-llama/Llama-3.1-8B-Instruct",
|
|
443
|
+
device="cuda",
|
|
444
|
+
use_flash_attention_2=True
|
|
445
|
+
)
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
#### Optimize llama.cpp
|
|
449
|
+
|
|
450
|
+
```python
|
|
451
|
+
from guidance.models import LlamaCpp
|
|
452
|
+
|
|
453
|
+
# Maximize GPU layers
|
|
454
|
+
lm = LlamaCpp(
|
|
455
|
+
model_path="/path/to/model.Q4_K_M.gguf",
|
|
456
|
+
n_gpu_layers=-1 # All layers on GPU
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Optimize batch size
|
|
460
|
+
lm = LlamaCpp(
|
|
461
|
+
model_path="/path/to/model.Q4_K_M.gguf",
|
|
462
|
+
n_batch=512, # Larger batch = faster prompt processing
|
|
463
|
+
n_gpu_layers=-1
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Use Metal (Apple Silicon)
|
|
467
|
+
lm = LlamaCpp(
|
|
468
|
+
model_path="/path/to/model.Q4_K_M.gguf",
|
|
469
|
+
n_gpu_layers=-1, # Use Metal GPU acceleration
|
|
470
|
+
use_mmap=True
|
|
471
|
+
)
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
#### Batch Processing
|
|
475
|
+
|
|
476
|
+
```python
|
|
477
|
+
# Process multiple requests efficiently
|
|
478
|
+
requests = [
|
|
479
|
+
"What is 2+2?",
|
|
480
|
+
"What is the capital of France?",
|
|
481
|
+
"What is photosynthesis?"
|
|
482
|
+
]
|
|
483
|
+
|
|
484
|
+
# Bad: Sequential processing
|
|
485
|
+
for req in requests:
|
|
486
|
+
lm = Transformers("microsoft/Phi-4-mini-instruct")
|
|
487
|
+
lm += req + gen(max_tokens=50)
|
|
488
|
+
|
|
489
|
+
# Good: Reuse loaded model
|
|
490
|
+
lm = Transformers("microsoft/Phi-4-mini-instruct")
|
|
491
|
+
for req in requests:
|
|
492
|
+
lm += req + gen(max_tokens=50)
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
## Advanced Configuration
|
|
496
|
+
|
|
497
|
+
### Custom Model Configurations
|
|
498
|
+
|
|
499
|
+
```python
|
|
500
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
501
|
+
from guidance.models import Transformers
|
|
502
|
+
|
|
503
|
+
# Load custom model
|
|
504
|
+
tokenizer = AutoTokenizer.from_pretrained("your-model")
|
|
505
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
506
|
+
"your-model",
|
|
507
|
+
device_map="auto",
|
|
508
|
+
torch_dtype="float16"
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# Use with Guidance
|
|
512
|
+
lm = Transformers(model=model, tokenizer=tokenizer)
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
### Environment Variables
|
|
516
|
+
|
|
517
|
+
```bash
|
|
518
|
+
# API keys
|
|
519
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
520
|
+
export OPENAI_API_KEY="sk-..."
|
|
521
|
+
|
|
522
|
+
# Transformers cache
|
|
523
|
+
export HF_HOME="/path/to/cache"
|
|
524
|
+
export TRANSFORMERS_CACHE="/path/to/cache"
|
|
525
|
+
|
|
526
|
+
# GPU selection
|
|
527
|
+
export CUDA_VISIBLE_DEVICES=0,1 # Use GPU 0 and 1
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
### Debugging
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
# Enable verbose logging
|
|
534
|
+
import logging
|
|
535
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
536
|
+
|
|
537
|
+
# Check backend info
|
|
538
|
+
lm = models.Anthropic("claude-sonnet-4-5-20250929")
|
|
539
|
+
print(f"Model: {lm.model_name}")
|
|
540
|
+
print(f"Backend: {lm.backend}")
|
|
541
|
+
|
|
542
|
+
# Check GPU usage (Transformers)
|
|
543
|
+
lm = Transformers("microsoft/Phi-4-mini-instruct", device="cuda")
|
|
544
|
+
print(f"Device: {lm.device}")
|
|
545
|
+
print(f"Memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
## Resources
|
|
549
|
+
|
|
550
|
+
- **Anthropic Docs**: https://docs.anthropic.com
|
|
551
|
+
- **OpenAI Docs**: https://platform.openai.com/docs
|
|
552
|
+
- **Hugging Face Models**: https://huggingface.co/models
|
|
553
|
+
- **llama.cpp**: https://github.com/ggerganov/llama.cpp
|
|
554
|
+
- **GGUF Models**: https://huggingface.co/models?library=gguf
|