@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
# HQQ Advanced Usage Guide
|
|
2
|
+
|
|
3
|
+
## Custom Backend Configuration
|
|
4
|
+
|
|
5
|
+
### Backend Selection by Hardware
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from hqq.core.quantize import HQQLinear
|
|
9
|
+
import torch
|
|
10
|
+
|
|
11
|
+
def select_optimal_backend():
|
|
12
|
+
"""Select best backend based on hardware."""
|
|
13
|
+
device = torch.cuda.get_device_properties(0)
|
|
14
|
+
compute_cap = device.major * 10 + device.minor
|
|
15
|
+
|
|
16
|
+
if compute_cap >= 80: # Ampere+
|
|
17
|
+
return "marlin"
|
|
18
|
+
elif compute_cap >= 70: # Volta/Turing
|
|
19
|
+
return "aten"
|
|
20
|
+
else:
|
|
21
|
+
return "pytorch_compile"
|
|
22
|
+
|
|
23
|
+
backend = select_optimal_backend()
|
|
24
|
+
HQQLinear.set_backend(backend)
|
|
25
|
+
print(f"Using backend: {backend}")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Per-Layer Backend Assignment
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from hqq.core.quantize import HQQLinear
|
|
32
|
+
|
|
33
|
+
def set_layer_backends(model):
|
|
34
|
+
"""Assign optimal backends per layer type."""
|
|
35
|
+
for name, module in model.named_modules():
|
|
36
|
+
if isinstance(module, HQQLinear):
|
|
37
|
+
if "attn" in name:
|
|
38
|
+
module.set_backend("marlin") # Fast for attention
|
|
39
|
+
elif "mlp" in name:
|
|
40
|
+
module.set_backend("bitblas") # Flexible for MLP
|
|
41
|
+
else:
|
|
42
|
+
module.set_backend("aten")
|
|
43
|
+
|
|
44
|
+
set_layer_backends(model)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### TorchAO Integration
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from hqq.core.quantize import HQQLinear
|
|
51
|
+
import torchao
|
|
52
|
+
|
|
53
|
+
# Enable TorchAO int4 backend
|
|
54
|
+
HQQLinear.set_backend("torchao_int4")
|
|
55
|
+
|
|
56
|
+
# Configure TorchAO options
|
|
57
|
+
import torch
|
|
58
|
+
torch._inductor.config.coordinate_descent_tuning = True
|
|
59
|
+
torch._inductor.config.triton.unique_kernel_names = True
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Mixed Precision Quantization
|
|
63
|
+
|
|
64
|
+
### Layer-Specific Configuration
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from hqq.core.quantize import BaseQuantizeConfig
|
|
68
|
+
from transformers import AutoModelForCausalLM
|
|
69
|
+
|
|
70
|
+
# Define configs per layer pattern
|
|
71
|
+
quant_configs = {
|
|
72
|
+
# Embeddings: Keep full precision
|
|
73
|
+
"embed_tokens": None,
|
|
74
|
+
"lm_head": None,
|
|
75
|
+
|
|
76
|
+
# Attention: 4-bit with larger groups
|
|
77
|
+
"self_attn.q_proj": BaseQuantizeConfig(nbits=4, group_size=128),
|
|
78
|
+
"self_attn.k_proj": BaseQuantizeConfig(nbits=4, group_size=128),
|
|
79
|
+
"self_attn.v_proj": BaseQuantizeConfig(nbits=4, group_size=128),
|
|
80
|
+
"self_attn.o_proj": BaseQuantizeConfig(nbits=4, group_size=128),
|
|
81
|
+
|
|
82
|
+
# MLP: More aggressive 2-bit
|
|
83
|
+
"mlp.gate_proj": BaseQuantizeConfig(nbits=2, group_size=32),
|
|
84
|
+
"mlp.up_proj": BaseQuantizeConfig(nbits=2, group_size=32),
|
|
85
|
+
"mlp.down_proj": BaseQuantizeConfig(nbits=3, group_size=64),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def quantize_with_mixed_precision(model, configs):
|
|
89
|
+
"""Apply mixed precision quantization."""
|
|
90
|
+
from hqq.core.quantize import HQQLinear
|
|
91
|
+
|
|
92
|
+
for name, module in model.named_modules():
|
|
93
|
+
if isinstance(module, torch.nn.Linear):
|
|
94
|
+
for pattern, config in configs.items():
|
|
95
|
+
if pattern in name:
|
|
96
|
+
if config is None:
|
|
97
|
+
continue # Skip quantization
|
|
98
|
+
parent = get_parent_module(model, name)
|
|
99
|
+
setattr(parent, name.split(".")[-1],
|
|
100
|
+
HQQLinear(module, config))
|
|
101
|
+
break
|
|
102
|
+
return model
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Sensitivity-Based Quantization
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import torch
|
|
109
|
+
from hqq.core.quantize import BaseQuantizeConfig, HQQLinear
|
|
110
|
+
|
|
111
|
+
def measure_layer_sensitivity(model, calibration_data, layer_name):
|
|
112
|
+
"""Measure quantization sensitivity of a layer."""
|
|
113
|
+
original_output = None
|
|
114
|
+
quantized_output = None
|
|
115
|
+
|
|
116
|
+
# Get original output
|
|
117
|
+
def hook_original(module, input, output):
|
|
118
|
+
nonlocal original_output
|
|
119
|
+
original_output = output.clone()
|
|
120
|
+
|
|
121
|
+
layer = dict(model.named_modules())[layer_name]
|
|
122
|
+
handle = layer.register_forward_hook(hook_original)
|
|
123
|
+
|
|
124
|
+
with torch.no_grad():
|
|
125
|
+
model(calibration_data)
|
|
126
|
+
handle.remove()
|
|
127
|
+
|
|
128
|
+
# Quantize and measure error
|
|
129
|
+
for nbits in [4, 3, 2]:
|
|
130
|
+
config = BaseQuantizeConfig(nbits=nbits, group_size=64)
|
|
131
|
+
quant_layer = HQQLinear(layer, config)
|
|
132
|
+
|
|
133
|
+
with torch.no_grad():
|
|
134
|
+
quantized_output = quant_layer(calibration_data)
|
|
135
|
+
|
|
136
|
+
error = torch.mean((original_output - quantized_output) ** 2).item()
|
|
137
|
+
print(f"{layer_name} @ {nbits}-bit: MSE = {error:.6f}")
|
|
138
|
+
|
|
139
|
+
# Auto-select precision based on sensitivity
|
|
140
|
+
def auto_select_precision(sensitivity_results, threshold=0.01):
|
|
141
|
+
"""Select precision based on sensitivity threshold."""
|
|
142
|
+
configs = {}
|
|
143
|
+
for layer_name, errors in sensitivity_results.items():
|
|
144
|
+
for nbits, error in sorted(errors.items()):
|
|
145
|
+
if error < threshold:
|
|
146
|
+
configs[layer_name] = BaseQuantizeConfig(nbits=nbits, group_size=64)
|
|
147
|
+
break
|
|
148
|
+
return configs
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Advanced Quantization Options
|
|
152
|
+
|
|
153
|
+
### Custom Zero Point Handling
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
from hqq.core.quantize import BaseQuantizeConfig
|
|
157
|
+
|
|
158
|
+
# Symmetric quantization (zero point = 0)
|
|
159
|
+
config_symmetric = BaseQuantizeConfig(
|
|
160
|
+
nbits=4,
|
|
161
|
+
group_size=64,
|
|
162
|
+
axis=1,
|
|
163
|
+
zero_point=False # No zero point, symmetric
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Asymmetric quantization (learned zero point)
|
|
167
|
+
config_asymmetric = BaseQuantizeConfig(
|
|
168
|
+
nbits=4,
|
|
169
|
+
group_size=64,
|
|
170
|
+
axis=1,
|
|
171
|
+
zero_point=True # Include zero point
|
|
172
|
+
)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Axis Selection
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from hqq.core.quantize import BaseQuantizeConfig
|
|
179
|
+
|
|
180
|
+
# Quantize along output dimension (default, better for inference)
|
|
181
|
+
config_axis1 = BaseQuantizeConfig(
|
|
182
|
+
nbits=4,
|
|
183
|
+
group_size=64,
|
|
184
|
+
axis=1 # Output dimension
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Quantize along input dimension (better for some architectures)
|
|
188
|
+
config_axis0 = BaseQuantizeConfig(
|
|
189
|
+
nbits=4,
|
|
190
|
+
group_size=64,
|
|
191
|
+
axis=0 # Input dimension
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### Group Size Optimization
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
def find_optimal_group_size(layer, test_input, target_bits=4):
|
|
199
|
+
"""Find optimal group size for a layer."""
|
|
200
|
+
from hqq.core.quantize import BaseQuantizeConfig, HQQLinear
|
|
201
|
+
import torch
|
|
202
|
+
|
|
203
|
+
group_sizes = [16, 32, 64, 128, 256]
|
|
204
|
+
results = {}
|
|
205
|
+
|
|
206
|
+
with torch.no_grad():
|
|
207
|
+
original_output = layer(test_input)
|
|
208
|
+
|
|
209
|
+
for gs in group_sizes:
|
|
210
|
+
config = BaseQuantizeConfig(nbits=target_bits, group_size=gs)
|
|
211
|
+
quant_layer = HQQLinear(layer.clone(), config)
|
|
212
|
+
quant_output = quant_layer(test_input)
|
|
213
|
+
|
|
214
|
+
mse = torch.mean((original_output - quant_output) ** 2).item()
|
|
215
|
+
memory = quant_layer.W_q.numel() * target_bits / 8
|
|
216
|
+
|
|
217
|
+
results[gs] = {"mse": mse, "memory_bytes": memory}
|
|
218
|
+
print(f"Group size {gs}: MSE={mse:.6f}, Memory={memory/1024:.1f}KB")
|
|
219
|
+
|
|
220
|
+
return results
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Model Export and Deployment
|
|
224
|
+
|
|
225
|
+
### Export for ONNX
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
import torch
|
|
229
|
+
from transformers import AutoModelForCausalLM, HqqConfig
|
|
230
|
+
|
|
231
|
+
# Load quantized model
|
|
232
|
+
config = HqqConfig(nbits=4, group_size=64)
|
|
233
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
234
|
+
"meta-llama/Llama-3.1-8B",
|
|
235
|
+
quantization_config=config,
|
|
236
|
+
device_map="cpu"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Export to ONNX (requires dequantization for compatibility)
|
|
240
|
+
dummy_input = torch.randint(0, 32000, (1, 128))
|
|
241
|
+
torch.onnx.export(
|
|
242
|
+
model,
|
|
243
|
+
dummy_input,
|
|
244
|
+
"model_hqq.onnx",
|
|
245
|
+
input_names=["input_ids"],
|
|
246
|
+
output_names=["logits"],
|
|
247
|
+
dynamic_axes={"input_ids": {0: "batch", 1: "seq_len"}}
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### SafeTensors Export
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
from safetensors.torch import save_file
|
|
255
|
+
|
|
256
|
+
def export_hqq_safetensors(model, output_path):
|
|
257
|
+
"""Export HQQ model to safetensors format."""
|
|
258
|
+
tensors = {}
|
|
259
|
+
|
|
260
|
+
for name, param in model.named_parameters():
|
|
261
|
+
tensors[name] = param.data.cpu()
|
|
262
|
+
|
|
263
|
+
# Include quantization metadata
|
|
264
|
+
for name, module in model.named_modules():
|
|
265
|
+
if hasattr(module, "W_q"):
|
|
266
|
+
tensors[f"{name}.W_q"] = module.W_q.cpu()
|
|
267
|
+
tensors[f"{name}.scale"] = module.scale.cpu()
|
|
268
|
+
if hasattr(module, "zero"):
|
|
269
|
+
tensors[f"{name}.zero"] = module.zero.cpu()
|
|
270
|
+
|
|
271
|
+
save_file(tensors, output_path)
|
|
272
|
+
|
|
273
|
+
export_hqq_safetensors(model, "model_hqq.safetensors")
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## Performance Optimization
|
|
277
|
+
|
|
278
|
+
### Kernel Fusion
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
import torch
|
|
282
|
+
from hqq.core.quantize import HQQLinear
|
|
283
|
+
|
|
284
|
+
# Enable torch.compile for kernel fusion
|
|
285
|
+
def optimize_model(model):
|
|
286
|
+
"""Apply optimizations for inference."""
|
|
287
|
+
# Set optimal backend
|
|
288
|
+
HQQLinear.set_backend("marlin")
|
|
289
|
+
|
|
290
|
+
# Compile with optimizations
|
|
291
|
+
model = torch.compile(
|
|
292
|
+
model,
|
|
293
|
+
mode="reduce-overhead",
|
|
294
|
+
fullgraph=True
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
return model
|
|
298
|
+
|
|
299
|
+
model = optimize_model(model)
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Batch Size Optimization
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
def find_optimal_batch_size(model, tokenizer, max_batch=64):
|
|
306
|
+
"""Find optimal batch size for throughput."""
|
|
307
|
+
import time
|
|
308
|
+
|
|
309
|
+
prompt = "Hello, world!"
|
|
310
|
+
inputs = tokenizer([prompt], return_tensors="pt", padding=True)
|
|
311
|
+
|
|
312
|
+
results = {}
|
|
313
|
+
for batch_size in [1, 2, 4, 8, 16, 32, max_batch]:
|
|
314
|
+
try:
|
|
315
|
+
batch_inputs = {
|
|
316
|
+
k: v.repeat(batch_size, 1).to(model.device)
|
|
317
|
+
for k, v in inputs.items()
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# Warmup
|
|
321
|
+
model.generate(**batch_inputs, max_new_tokens=10)
|
|
322
|
+
|
|
323
|
+
# Benchmark
|
|
324
|
+
torch.cuda.synchronize()
|
|
325
|
+
start = time.time()
|
|
326
|
+
for _ in range(5):
|
|
327
|
+
model.generate(**batch_inputs, max_new_tokens=50)
|
|
328
|
+
torch.cuda.synchronize()
|
|
329
|
+
|
|
330
|
+
elapsed = (time.time() - start) / 5
|
|
331
|
+
throughput = batch_size * 50 / elapsed
|
|
332
|
+
|
|
333
|
+
results[batch_size] = {
|
|
334
|
+
"time": elapsed,
|
|
335
|
+
"throughput": throughput
|
|
336
|
+
}
|
|
337
|
+
print(f"Batch {batch_size}: {throughput:.1f} tokens/sec")
|
|
338
|
+
|
|
339
|
+
except torch.cuda.OutOfMemoryError:
|
|
340
|
+
print(f"Batch {batch_size}: OOM")
|
|
341
|
+
break
|
|
342
|
+
|
|
343
|
+
return results
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Memory-Efficient Inference
|
|
347
|
+
|
|
348
|
+
```python
|
|
349
|
+
import torch
|
|
350
|
+
from contextlib import contextmanager
|
|
351
|
+
|
|
352
|
+
@contextmanager
|
|
353
|
+
def low_memory_inference(model):
|
|
354
|
+
"""Context manager for memory-efficient inference."""
|
|
355
|
+
# Disable gradient computation
|
|
356
|
+
with torch.no_grad():
|
|
357
|
+
# Enable inference mode
|
|
358
|
+
with torch.inference_mode():
|
|
359
|
+
# Clear cache before inference
|
|
360
|
+
torch.cuda.empty_cache()
|
|
361
|
+
yield
|
|
362
|
+
# Clear cache after inference
|
|
363
|
+
torch.cuda.empty_cache()
|
|
364
|
+
|
|
365
|
+
# Usage
|
|
366
|
+
with low_memory_inference(model):
|
|
367
|
+
outputs = model.generate(**inputs, max_new_tokens=100)
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
## Benchmarking
|
|
371
|
+
|
|
372
|
+
### Comprehensive Benchmark Suite
|
|
373
|
+
|
|
374
|
+
```python
|
|
375
|
+
import time
|
|
376
|
+
import torch
|
|
377
|
+
from dataclasses import dataclass
|
|
378
|
+
from typing import Dict, List
|
|
379
|
+
|
|
380
|
+
@dataclass
|
|
381
|
+
class BenchmarkResult:
|
|
382
|
+
latency_ms: float
|
|
383
|
+
throughput: float
|
|
384
|
+
memory_mb: float
|
|
385
|
+
perplexity: float
|
|
386
|
+
|
|
387
|
+
def benchmark_hqq_model(model, tokenizer, test_texts: List[str]) -> BenchmarkResult:
|
|
388
|
+
"""Comprehensive benchmark for HQQ models."""
|
|
389
|
+
device = next(model.parameters()).device
|
|
390
|
+
|
|
391
|
+
# Prepare inputs
|
|
392
|
+
inputs = tokenizer(test_texts, return_tensors="pt", padding=True).to(device)
|
|
393
|
+
|
|
394
|
+
# Memory measurement
|
|
395
|
+
torch.cuda.reset_peak_memory_stats()
|
|
396
|
+
|
|
397
|
+
# Latency measurement
|
|
398
|
+
torch.cuda.synchronize()
|
|
399
|
+
start = time.time()
|
|
400
|
+
|
|
401
|
+
with torch.no_grad():
|
|
402
|
+
outputs = model.generate(
|
|
403
|
+
**inputs,
|
|
404
|
+
max_new_tokens=100,
|
|
405
|
+
do_sample=False
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
torch.cuda.synchronize()
|
|
409
|
+
latency = (time.time() - start) * 1000
|
|
410
|
+
|
|
411
|
+
# Calculate metrics
|
|
412
|
+
total_tokens = outputs.shape[0] * outputs.shape[1]
|
|
413
|
+
throughput = total_tokens / (latency / 1000)
|
|
414
|
+
memory = torch.cuda.max_memory_allocated() / 1024 / 1024
|
|
415
|
+
|
|
416
|
+
# Perplexity (simplified)
|
|
417
|
+
with torch.no_grad():
|
|
418
|
+
outputs = model(**inputs, labels=inputs["input_ids"])
|
|
419
|
+
perplexity = torch.exp(outputs.loss).item()
|
|
420
|
+
|
|
421
|
+
return BenchmarkResult(
|
|
422
|
+
latency_ms=latency,
|
|
423
|
+
throughput=throughput,
|
|
424
|
+
memory_mb=memory,
|
|
425
|
+
perplexity=perplexity
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# Compare different configurations
|
|
429
|
+
def compare_quantization_configs(model_name, configs: Dict[str, dict]):
|
|
430
|
+
"""Compare different HQQ configurations."""
|
|
431
|
+
results = {}
|
|
432
|
+
|
|
433
|
+
for name, config in configs.items():
|
|
434
|
+
print(f"\nBenchmarking: {name}")
|
|
435
|
+
model = load_hqq_model(model_name, **config)
|
|
436
|
+
result = benchmark_hqq_model(model, tokenizer, test_texts)
|
|
437
|
+
results[name] = result
|
|
438
|
+
|
|
439
|
+
print(f" Latency: {result.latency_ms:.1f}ms")
|
|
440
|
+
print(f" Throughput: {result.throughput:.1f} tok/s")
|
|
441
|
+
print(f" Memory: {result.memory_mb:.1f}MB")
|
|
442
|
+
print(f" Perplexity: {result.perplexity:.2f}")
|
|
443
|
+
|
|
444
|
+
del model
|
|
445
|
+
torch.cuda.empty_cache()
|
|
446
|
+
|
|
447
|
+
return results
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
## Integration Examples
|
|
451
|
+
|
|
452
|
+
### LangChain Integration
|
|
453
|
+
|
|
454
|
+
```python
|
|
455
|
+
from langchain_community.llms import HuggingFacePipeline
|
|
456
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig, pipeline
|
|
457
|
+
|
|
458
|
+
# Load HQQ model
|
|
459
|
+
config = HqqConfig(nbits=4, group_size=64)
|
|
460
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
461
|
+
"meta-llama/Llama-3.1-8B",
|
|
462
|
+
quantization_config=config,
|
|
463
|
+
device_map="auto"
|
|
464
|
+
)
|
|
465
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
|
|
466
|
+
|
|
467
|
+
# Create pipeline
|
|
468
|
+
pipe = pipeline(
|
|
469
|
+
"text-generation",
|
|
470
|
+
model=model,
|
|
471
|
+
tokenizer=tokenizer,
|
|
472
|
+
max_new_tokens=256
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
# Wrap for LangChain
|
|
476
|
+
llm = HuggingFacePipeline(pipeline=pipe)
|
|
477
|
+
|
|
478
|
+
# Use in chain
|
|
479
|
+
from langchain.chains import LLMChain
|
|
480
|
+
from langchain.prompts import PromptTemplate
|
|
481
|
+
|
|
482
|
+
prompt = PromptTemplate(
|
|
483
|
+
input_variables=["question"],
|
|
484
|
+
template="Answer the question: {question}"
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
chain = LLMChain(llm=llm, prompt=prompt)
|
|
488
|
+
result = chain.run("What is machine learning?")
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
### Gradio Interface
|
|
492
|
+
|
|
493
|
+
```python
|
|
494
|
+
import gradio as gr
|
|
495
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
|
|
496
|
+
|
|
497
|
+
# Load model
|
|
498
|
+
config = HqqConfig(nbits=4, group_size=64)
|
|
499
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
500
|
+
"meta-llama/Llama-3.1-8B",
|
|
501
|
+
quantization_config=config,
|
|
502
|
+
device_map="auto"
|
|
503
|
+
)
|
|
504
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
|
|
505
|
+
|
|
506
|
+
def generate(prompt, max_tokens, temperature):
|
|
507
|
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
508
|
+
outputs = model.generate(
|
|
509
|
+
**inputs,
|
|
510
|
+
max_new_tokens=int(max_tokens),
|
|
511
|
+
temperature=temperature,
|
|
512
|
+
do_sample=temperature > 0
|
|
513
|
+
)
|
|
514
|
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
515
|
+
|
|
516
|
+
demo = gr.Interface(
|
|
517
|
+
fn=generate,
|
|
518
|
+
inputs=[
|
|
519
|
+
gr.Textbox(label="Prompt"),
|
|
520
|
+
gr.Slider(10, 500, value=100, label="Max Tokens"),
|
|
521
|
+
gr.Slider(0, 2, value=0.7, label="Temperature")
|
|
522
|
+
],
|
|
523
|
+
outputs=gr.Textbox(label="Output"),
|
|
524
|
+
title="HQQ Quantized LLM"
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
demo.launch()
|
|
528
|
+
```
|