@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,324 @@
1
+ # AWQ Advanced Usage Guide
2
+
3
+ ## Quantization Algorithm Details
4
+
5
+ ### How AWQ Works
6
+
7
+ AWQ (Activation-aware Weight Quantization) is based on the key insight that not all weights in an LLM are equally important. The algorithm:
8
+
9
+ 1. **Identifies salient weights** (~1%) by examining activation distributions
10
+ 2. **Applies mathematical scaling** to protect critical channels
11
+ 3. **Quantizes remaining weights** to 4-bit with minimal error
12
+
13
+ **Core formula**: `L(s) = ||Q(W * s)(s^-1 * X) - W * X||`
14
+
15
+ Where:
16
+ - `Q` is the quantization function
17
+ - `W` is the weight matrix
18
+ - `s` is the scaling factor
19
+ - `X` is the input activation
20
+
21
+ ### Why AWQ Outperforms GPTQ
22
+
23
+ | Aspect | AWQ | GPTQ |
24
+ |--------|-----|------|
25
+ | Calibration approach | Activation-aware scaling | Hessian-based reconstruction |
26
+ | Overfitting risk | Low (no backprop) | Higher (reconstruction-based) |
27
+ | Calibration data | 128-1024 tokens | Larger datasets needed |
28
+ | Generalization | Better across domains | Can overfit to calibration |
29
+
30
+ ## WQLinear Kernel Variants
31
+
32
+ AutoAWQ provides multiple kernel implementations for different use cases:
33
+
34
+ ### WQLinear_GEMM
35
+ - **Use case**: Batch inference, training
36
+ - **Best for**: Batch sizes > 1, throughput optimization
37
+ - **Implementation**: General matrix multiplication
38
+
39
+ ```python
40
+ quant_config = {"version": "GEMM"}
41
+ ```
42
+
43
+ ### WQLinear_GEMV
44
+ - **Use case**: Single-token generation
45
+ - **Best for**: Streaming, chat applications
46
+ - **Speedup**: ~20% faster than GEMM for batch_size=1
47
+ - **Limitation**: Only works with batch_size=1
48
+
49
+ ```python
50
+ quant_config = {"version": "GEMV"}
51
+ ```
52
+
53
+ ### WQLinear_GEMVFast
54
+ - **Use case**: Optimized single-token generation
55
+ - **Requirements**: awq_v2_ext kernels installed
56
+ - **Best for**: Maximum single-token speed
57
+
58
+ ```python
59
+ # Requires autoawq[kernels] installation
60
+ quant_config = {"version": "gemv_fast"}
61
+ ```
62
+
63
+ ### WQLinear_Marlin
64
+ - **Use case**: High-throughput inference
65
+ - **Requirements**: Ampere+ GPUs (Compute Capability 8.0+)
66
+ - **Speedup**: 2x faster on A100/H100
67
+
68
+ ```python
69
+ from transformers import AwqConfig
70
+
71
+ config = AwqConfig(bits=4, version="marlin")
72
+ ```
73
+
74
+ ### WQLinear_Exllama / ExllamaV2
75
+ - **Use case**: AMD GPU compatibility, faster prefill
76
+ - **Benefits**: Works with ROCm
77
+
78
+ ```python
79
+ config = AwqConfig(bits=4, version="exllama")
80
+ ```
81
+
82
+ ### WQLinear_IPEX
83
+ - **Use case**: Intel CPU/XPU acceleration
84
+ - **Requirements**: Intel Extension for PyTorch, torch 2.4+
85
+
86
+ ```python
87
+ pip install autoawq[cpu]
88
+ ```
89
+
90
+ ## Group Size Configuration
91
+
92
+ Group size determines how weights are grouped for quantization:
93
+
94
+ | Group Size | Model Size | Accuracy | Speed | Use Case |
95
+ |------------|------------|----------|-------|----------|
96
+ | 32 | Larger | Best | Slower | Maximum accuracy |
97
+ | **128** | Medium | Good | Fast | **Recommended default** |
98
+ | 256 | Smaller | Lower | Faster | Speed-critical |
99
+
100
+ ```python
101
+ quant_config = {
102
+ "q_group_size": 128, # Recommended
103
+ "w_bit": 4,
104
+ "zero_point": True
105
+ }
106
+ ```
107
+
108
+ ## Zero-Point Quantization
109
+
110
+ Zero-point quantization adds an offset to handle asymmetric weight distributions:
111
+
112
+ ```python
113
+ # With zero-point (recommended for most models)
114
+ quant_config = {"zero_point": True, "w_bit": 4, "q_group_size": 128}
115
+
116
+ # Without zero-point (symmetric quantization)
117
+ quant_config = {"zero_point": False, "w_bit": 4, "q_group_size": 128}
118
+ ```
119
+
120
+ **When to disable zero-point**:
121
+ - Models with symmetric weight distributions
122
+ - When using specific kernels that don't support it
123
+
124
+ ## Custom Calibration Strategies
125
+
126
+ ### Domain-Specific Calibration
127
+
128
+ For domain-specific models, use relevant calibration data:
129
+
130
+ ```python
131
+ # Medical domain
132
+ medical_samples = [
133
+ "Patient presents with acute respiratory symptoms...",
134
+ "Differential diagnosis includes pneumonia, bronchitis...",
135
+ # More domain-specific examples
136
+ ]
137
+
138
+ model.quantize(
139
+ tokenizer,
140
+ quant_config=quant_config,
141
+ calib_data=medical_samples,
142
+ max_calib_samples=256
143
+ )
144
+ ```
145
+
146
+ ### Instruction-Tuned Model Calibration
147
+
148
+ For chat/instruction models, include conversational data:
149
+
150
+ ```python
151
+ chat_samples = [
152
+ "Human: What is machine learning?\nAssistant: Machine learning is...",
153
+ "Human: Explain neural networks.\nAssistant: Neural networks are...",
154
+ ]
155
+
156
+ model.quantize(tokenizer, quant_config=quant_config, calib_data=chat_samples)
157
+ ```
158
+
159
+ ### Calibration Parameters
160
+
161
+ ```python
162
+ model.quantize(
163
+ tokenizer,
164
+ quant_config=quant_config,
165
+ calib_data="pileval", # Dataset name or list
166
+ max_calib_samples=128, # Number of samples (more = slower but better)
167
+ max_calib_seq_len=512, # Sequence length
168
+ duo_scaling=True, # Scale weights and activations
169
+ apply_clip=True # Apply weight clipping
170
+ )
171
+ ```
172
+
173
+ ## Layer Fusion
174
+
175
+ Layer fusion combines multiple operations for better performance:
176
+
177
+ ### Automatic Fusion
178
+
179
+ ```python
180
+ model = AutoAWQForCausalLM.from_quantized(
181
+ model_name,
182
+ fuse_layers=True # Enables automatic fusion
183
+ )
184
+ ```
185
+
186
+ ### What Gets Fused
187
+
188
+ - **Attention**: Q, K, V projections combined
189
+ - **MLP**: Gate and Up projections fused
190
+ - **Normalization**: Replaced with FasterTransformerRMSNorm
191
+
192
+ ### Manual Fusion Configuration
193
+
194
+ ```python
195
+ from transformers import AwqConfig
196
+
197
+ config = AwqConfig(
198
+ bits=4,
199
+ fuse_max_seq_len=2048, # Max context for fused attention
200
+ do_fuse=True,
201
+ modules_to_fuse={
202
+ "attention": ["q_proj", "k_proj", "v_proj"],
203
+ "mlp": ["gate_proj", "up_proj"],
204
+ "layernorm": ["input_layernorm", "post_attention_layernorm"],
205
+ }
206
+ )
207
+ ```
208
+
209
+ ## Memory Optimization
210
+
211
+ ### Chunked Processing
212
+
213
+ For large models, AWQ processes in chunks to avoid OOM:
214
+
215
+ ```python
216
+ from awq import AutoAWQForCausalLM
217
+
218
+ # Reduce memory during quantization
219
+ model = AutoAWQForCausalLM.from_pretrained(
220
+ model_path,
221
+ low_cpu_mem_usage=True
222
+ )
223
+ ```
224
+
225
+ ### Multi-GPU Quantization
226
+
227
+ ```python
228
+ model = AutoAWQForCausalLM.from_pretrained(
229
+ "meta-llama/Llama-2-70b-hf",
230
+ device_map="auto"
231
+ )
232
+ ```
233
+
234
+ ### CPU Offloading
235
+
236
+ ```python
237
+ model = AutoAWQForCausalLM.from_quantized(
238
+ model_name,
239
+ device_map="auto",
240
+ max_memory={
241
+ 0: "24GB",
242
+ "cpu": "100GB"
243
+ }
244
+ )
245
+ ```
246
+
247
+ ## Modules to Not Convert
248
+
249
+ Some modules should remain in full precision:
250
+
251
+ ```python
252
+ # Visual encoder in multimodal models
253
+ class LlavaAWQForCausalLM(BaseAWQForCausalLM):
254
+ modules_to_not_convert = ["visual"]
255
+ ```
256
+
257
+ Common exclusions:
258
+ - `visual` - Vision encoders in VLMs
259
+ - `lm_head` - Output projection
260
+ - `embed_tokens` - Embedding layers
261
+
262
+ ## Saving and Loading
263
+
264
+ ### Save Quantized Model
265
+
266
+ ```python
267
+ # Save locally
268
+ model.save_quantized("./my-awq-model")
269
+ tokenizer.save_pretrained("./my-awq-model")
270
+
271
+ # Save with safetensors (recommended)
272
+ model.save_quantized("./my-awq-model", safetensors=True)
273
+
274
+ # Save sharded (for large models)
275
+ model.save_quantized("./my-awq-model", shard_size="5GB")
276
+ ```
277
+
278
+ ### Push to HuggingFace
279
+
280
+ ```python
281
+ model.push_to_hub("username/my-awq-model")
282
+ tokenizer.push_to_hub("username/my-awq-model")
283
+ ```
284
+
285
+ ### Load with Specific Backend
286
+
287
+ ```python
288
+ from awq import AutoAWQForCausalLM
289
+
290
+ # Load with specific kernel
291
+ model = AutoAWQForCausalLM.from_quantized(
292
+ model_name,
293
+ use_exllama=True, # ExLlama backend
294
+ use_exllama_v2=True, # ExLlamaV2 (faster)
295
+ use_marlin=True, # Marlin kernels
296
+ use_ipex=True, # Intel CPU
297
+ fuse_layers=True # Enable fusion
298
+ )
299
+ ```
300
+
301
+ ## Benchmarking Your Model
302
+
303
+ ```python
304
+ from awq.utils.utils import get_best_device
305
+ import time
306
+
307
+ model = AutoAWQForCausalLM.from_quantized(model_name, fuse_layers=True)
308
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
309
+
310
+ # Warmup
311
+ inputs = tokenizer("Hello", return_tensors="pt").to(get_best_device())
312
+ model.generate(**inputs, max_new_tokens=10)
313
+
314
+ # Benchmark
315
+ prompt = "Write a detailed essay about"
316
+ inputs = tokenizer(prompt, return_tensors="pt").to(get_best_device())
317
+
318
+ start = time.time()
319
+ outputs = model.generate(**inputs, max_new_tokens=200)
320
+ end = time.time()
321
+
322
+ tokens_generated = outputs.shape[1] - inputs.input_ids.shape[1]
323
+ print(f"Tokens/sec: {tokens_generated / (end - start):.2f}")
324
+ ```
@@ -0,0 +1,344 @@
1
+ # AWQ Troubleshooting Guide
2
+
3
+ ## Installation Issues
4
+
5
+ ### CUDA Version Mismatch
6
+
7
+ **Error**: `RuntimeError: CUDA error: no kernel image is available for execution`
8
+
9
+ **Fix**: Install matching CUDA version:
10
+ ```bash
11
+ # Check your CUDA version
12
+ nvcc --version
13
+
14
+ # Install matching autoawq
15
+ pip install autoawq --extra-index-url https://download.pytorch.org/whl/cu118 # For CUDA 11.8
16
+ pip install autoawq --extra-index-url https://download.pytorch.org/whl/cu121 # For CUDA 12.1
17
+ ```
18
+
19
+ ### Compute Capability Too Low
20
+
21
+ **Error**: `AssertionError: Compute capability must be >= 7.5`
22
+
23
+ **Fix**: AWQ requires NVIDIA GPUs with compute capability 7.5+ (Turing or newer):
24
+ - RTX 20xx series: 7.5 (supported)
25
+ - RTX 30xx series: 8.6 (supported)
26
+ - RTX 40xx series: 8.9 (supported)
27
+ - A100/H100: 8.0/9.0 (supported)
28
+
29
+ Older GPUs (GTX 10xx, V100) are not supported.
30
+
31
+ ### Transformers Version Conflict
32
+
33
+ **Error**: `ImportError: cannot import name 'AwqConfig'`
34
+
35
+ **Fix**: AutoAWQ may downgrade transformers. Reinstall correct version:
36
+ ```bash
37
+ pip install autoawq
38
+ pip install transformers>=4.45.0 --upgrade
39
+ ```
40
+
41
+ ### Triton Not Found (Linux)
42
+
43
+ **Error**: `ModuleNotFoundError: No module named 'triton'`
44
+
45
+ **Fix**:
46
+ ```bash
47
+ pip install triton
48
+ # Or install with kernels
49
+ pip install autoawq[kernels]
50
+ ```
51
+
52
+ ## Quantization Issues
53
+
54
+ ### CUDA Out of Memory During Quantization
55
+
56
+ **Error**: `torch.cuda.OutOfMemoryError: CUDA out of memory`
57
+
58
+ **Solutions**:
59
+
60
+ 1. **Reduce calibration samples**:
61
+ ```python
62
+ model.quantize(
63
+ tokenizer,
64
+ quant_config=quant_config,
65
+ max_calib_samples=64 # Reduce from 128
66
+ )
67
+ ```
68
+
69
+ 2. **Use CPU offloading**:
70
+ ```python
71
+ model = AutoAWQForCausalLM.from_pretrained(
72
+ model_path,
73
+ low_cpu_mem_usage=True
74
+ )
75
+ ```
76
+
77
+ 3. **Multi-GPU quantization**:
78
+ ```python
79
+ model = AutoAWQForCausalLM.from_pretrained(
80
+ model_path,
81
+ device_map="auto"
82
+ )
83
+ ```
84
+
85
+ ### NaN in Weights After Quantization
86
+
87
+ **Error**: `AssertionError: NaN detected in weights`
88
+
89
+ **Cause**: Calibration data issues or numerical instability.
90
+
91
+ **Fix**:
92
+ ```python
93
+ # Use more calibration samples
94
+ model.quantize(
95
+ tokenizer,
96
+ quant_config=quant_config,
97
+ max_calib_samples=256,
98
+ max_calib_seq_len=1024
99
+ )
100
+ ```
101
+
102
+ ### Empty Calibration Samples
103
+
104
+ **Error**: `ValueError: Calibration samples are empty`
105
+
106
+ **Fix**: Ensure tokenizer produces valid output:
107
+ ```python
108
+ # Check tokenizer
109
+ test = tokenizer("test", return_tensors="pt")
110
+ print(f"Token count: {test.input_ids.shape[1]}")
111
+
112
+ # Use explicit calibration data
113
+ calib_data = ["Your sample text here..."] * 128
114
+ model.quantize(tokenizer, quant_config=quant_config, calib_data=calib_data)
115
+ ```
116
+
117
+ ### Unsupported Model Architecture
118
+
119
+ **Error**: `TypeError: 'model_type' is not supported`
120
+
121
+ **Cause**: Model architecture not in AWQ registry.
122
+
123
+ **Check supported models**:
124
+ ```python
125
+ from awq.models import AWQ_CAUSAL_LM_MODEL_MAP
126
+ print(list(AWQ_CAUSAL_LM_MODEL_MAP.keys()))
127
+ ```
128
+
129
+ **Supported**: llama, mistral, qwen2, falcon, mpt, phi, gemma, etc.
130
+
131
+ ## Inference Issues
132
+
133
+ ### Slow Inference Speed
134
+
135
+ **Problem**: Inference slower than expected.
136
+
137
+ **Solutions**:
138
+
139
+ 1. **Enable layer fusion**:
140
+ ```python
141
+ model = AutoAWQForCausalLM.from_quantized(
142
+ model_name,
143
+ fuse_layers=True
144
+ )
145
+ ```
146
+
147
+ 2. **Use correct kernel for batch size**:
148
+ ```python
149
+ # For batch_size=1
150
+ quant_config = {"version": "GEMV"}
151
+
152
+ # For batch_size>1
153
+ quant_config = {"version": "GEMM"}
154
+ ```
155
+
156
+ 3. **Use Marlin on Ampere+ GPUs**:
157
+ ```python
158
+ from transformers import AwqConfig
159
+ config = AwqConfig(bits=4, version="marlin")
160
+ ```
161
+
162
+ ### Wrong Output / Garbage Text
163
+
164
+ **Problem**: Model produces nonsensical output after quantization.
165
+
166
+ **Causes and fixes**:
167
+
168
+ 1. **Poor calibration data**: Use domain-relevant data
169
+ ```python
170
+ calib_data = [
171
+ "Relevant examples from your use case...",
172
+ ]
173
+ model.quantize(tokenizer, quant_config=quant_config, calib_data=calib_data)
174
+ ```
175
+
176
+ 2. **Tokenizer mismatch**: Ensure same tokenizer
177
+ ```python
178
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
179
+ ```
180
+
181
+ 3. **Check generation config**:
182
+ ```python
183
+ outputs = model.generate(
184
+ **inputs,
185
+ max_new_tokens=200,
186
+ do_sample=True,
187
+ temperature=0.7,
188
+ pad_token_id=tokenizer.eos_token_id
189
+ )
190
+ ```
191
+
192
+ ### FlashAttention2 Incompatibility
193
+
194
+ **Error**: `ValueError: Cannot use FlashAttention2 with fused modules`
195
+
196
+ **Fix**: Disable one or the other:
197
+ ```python
198
+ # Option 1: Use fused modules (recommended for AWQ)
199
+ model = AutoAWQForCausalLM.from_quantized(model_name, fuse_layers=True)
200
+
201
+ # Option 2: Use FlashAttention2 without fusion
202
+ from transformers import AutoModelForCausalLM
203
+ model = AutoModelForCausalLM.from_pretrained(
204
+ model_name,
205
+ attn_implementation="flash_attention_2",
206
+ device_map="auto"
207
+ )
208
+ ```
209
+
210
+ ### AMD GPU Issues
211
+
212
+ **Error**: `RuntimeError: ROCm/HIP not found`
213
+
214
+ **Fix**: Use ExLlama backend for AMD:
215
+ ```python
216
+ from transformers import AwqConfig
217
+
218
+ config = AwqConfig(bits=4, version="exllama")
219
+ model = AutoModelForCausalLM.from_pretrained(
220
+ model_name,
221
+ quantization_config=config
222
+ )
223
+ ```
224
+
225
+ ## Loading Issues
226
+
227
+ ### Model Not Found
228
+
229
+ **Error**: `OSError: model_name is not a valid model identifier`
230
+
231
+ **Fix**: Check HuggingFace model exists:
232
+ ```bash
233
+ # Search AWQ models
234
+ https://huggingface.co/models?library=awq
235
+
236
+ # Common AWQ model providers
237
+ TheBloke, teknium, Qwen, NousResearch
238
+ ```
239
+
240
+ ### Safetensors Error
241
+
242
+ **Error**: `safetensors_rust.SafetensorError: Error while deserializing`
243
+
244
+ **Fix**: Try loading without safetensors:
245
+ ```python
246
+ model = AutoAWQForCausalLM.from_quantized(
247
+ model_name,
248
+ safetensors=False
249
+ )
250
+ ```
251
+
252
+ ### Device Map Conflicts
253
+
254
+ **Error**: `ValueError: You cannot use device_map with max_memory`
255
+
256
+ **Fix**: Use one or the other:
257
+ ```python
258
+ # Auto device map
259
+ model = AutoAWQForCausalLM.from_quantized(model_name, device_map="auto")
260
+
261
+ # OR manual memory limits
262
+ model = AutoAWQForCausalLM.from_quantized(
263
+ model_name,
264
+ max_memory={0: "20GB", 1: "20GB"}
265
+ )
266
+ ```
267
+
268
+ ## vLLM Integration Issues
269
+
270
+ ### Quantization Not Detected
271
+
272
+ **Error**: vLLM loads model in FP16 instead of quantized.
273
+
274
+ **Fix**: Explicitly specify quantization:
275
+ ```python
276
+ from vllm import LLM
277
+
278
+ llm = LLM(
279
+ model="TheBloke/Llama-2-7B-AWQ",
280
+ quantization="awq", # Explicitly set
281
+ dtype="half"
282
+ )
283
+ ```
284
+
285
+ ### Marlin Kernel Error in vLLM
286
+
287
+ **Error**: `RuntimeError: Marlin kernel not supported`
288
+
289
+ **Fix**: Check GPU compatibility:
290
+ ```python
291
+ import torch
292
+ print(torch.cuda.get_device_capability()) # Must be >= (8, 0)
293
+
294
+ # If not supported, use GEMM
295
+ llm = LLM(model="...", quantization="awq") # Uses GEMM by default
296
+ ```
297
+
298
+ ## Performance Debugging
299
+
300
+ ### Memory Usage Check
301
+
302
+ ```python
303
+ import torch
304
+
305
+ def print_gpu_memory():
306
+ for i in range(torch.cuda.device_count()):
307
+ allocated = torch.cuda.memory_allocated(i) / 1e9
308
+ reserved = torch.cuda.memory_reserved(i) / 1e9
309
+ print(f"GPU {i}: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
310
+
311
+ print_gpu_memory()
312
+ ```
313
+
314
+ ### Profiling Inference
315
+
316
+ ```python
317
+ import time
318
+
319
+ def benchmark_model(model, tokenizer, prompt, n_runs=5):
320
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
321
+
322
+ # Warmup
323
+ model.generate(**inputs, max_new_tokens=10)
324
+ torch.cuda.synchronize()
325
+
326
+ # Benchmark
327
+ times = []
328
+ for _ in range(n_runs):
329
+ start = time.perf_counter()
330
+ outputs = model.generate(**inputs, max_new_tokens=100)
331
+ torch.cuda.synchronize()
332
+ times.append(time.perf_counter() - start)
333
+
334
+ tokens = outputs.shape[1] - inputs.input_ids.shape[1]
335
+ avg_time = sum(times) / len(times)
336
+ print(f"Average: {tokens/avg_time:.2f} tokens/sec")
337
+ ```
338
+
339
+ ## Getting Help
340
+
341
+ 1. **Check deprecation notice**: AutoAWQ is deprecated, use llm-compressor for new projects
342
+ 2. **GitHub Issues**: https://github.com/casper-hansen/AutoAWQ/issues
343
+ 3. **HuggingFace Forums**: https://discuss.huggingface.co/
344
+ 4. **vLLM Discord**: For vLLM integration issues