@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,420 @@
1
+ # AutoGPT Troubleshooting Guide
2
+
3
+ ## Installation Issues
4
+
5
+ ### Docker compose fails
6
+
7
+ **Error**: `Cannot connect to the Docker daemon`
8
+
9
+ **Fix**:
10
+ ```bash
11
+ # Start Docker daemon
12
+ sudo systemctl start docker
13
+
14
+ # Or on macOS
15
+ open -a Docker
16
+
17
+ # Verify Docker is running
18
+ docker ps
19
+ ```
20
+
21
+ **Error**: `Port already in use`
22
+
23
+ **Fix**:
24
+ ```bash
25
+ # Find process using port
26
+ lsof -i :8006
27
+
28
+ # Kill process
29
+ kill -9 <PID>
30
+
31
+ # Or change port in docker-compose.yml
32
+ ```
33
+
34
+ ### Database migration fails
35
+
36
+ **Error**: `Migration failed: relation already exists`
37
+
38
+ **Fix**:
39
+ ```bash
40
+ # Reset database
41
+ docker compose down -v
42
+ docker compose up -d db
43
+
44
+ # Re-run migrations
45
+ cd backend
46
+ poetry run prisma migrate reset --force
47
+ poetry run prisma migrate deploy
48
+ ```
49
+
50
+ **Error**: `Connection refused to database`
51
+
52
+ **Fix**:
53
+ ```bash
54
+ # Check database is running
55
+ docker compose ps db
56
+
57
+ # Check database logs
58
+ docker compose logs db
59
+
60
+ # Verify DATABASE_URL in .env
61
+ echo $DATABASE_URL
62
+ ```
63
+
64
+ ### Frontend build fails
65
+
66
+ **Error**: `Module not found: Can't resolve '@/components/...'`
67
+
68
+ **Fix**:
69
+ ```bash
70
+ # Clear node modules and reinstall
71
+ rm -rf node_modules
72
+ rm -rf .next
73
+ npm install
74
+
75
+ # Or with pnpm
76
+ pnpm install --force
77
+ ```
78
+
79
+ **Error**: `Supabase client not initialized`
80
+
81
+ **Fix**:
82
+ ```bash
83
+ # Verify environment variables
84
+ cat .env | grep SUPABASE
85
+
86
+ # Required variables:
87
+ # NEXT_PUBLIC_SUPABASE_URL=http://localhost:8000
88
+ # NEXT_PUBLIC_SUPABASE_ANON_KEY=your-key
89
+ ```
90
+
91
+ ## Service Issues
92
+
93
+ ### Backend services not starting
94
+
95
+ **Error**: `rest_server exited with code 1`
96
+
97
+ **Diagnose**:
98
+ ```bash
99
+ # Check logs
100
+ docker compose logs rest_server
101
+
102
+ # Common issues:
103
+ # - Missing environment variables
104
+ # - Database connection failed
105
+ # - Redis connection failed
106
+ ```
107
+
108
+ **Fix**:
109
+ ```bash
110
+ # Verify all dependencies are running
111
+ docker compose ps
112
+
113
+ # Restart services in order
114
+ docker compose restart db redis rabbitmq
115
+ sleep 10
116
+ docker compose restart rest_server executor
117
+ ```
118
+
119
+ ### Executor not processing tasks
120
+
121
+ **Error**: Tasks stuck in QUEUED status
122
+
123
+ **Diagnose**:
124
+ ```bash
125
+ # Check executor logs
126
+ docker compose logs executor
127
+
128
+ # Check RabbitMQ queue
129
+ # Visit http://localhost:15672 (guest/guest)
130
+ # Look at queue depths
131
+ ```
132
+
133
+ **Fix**:
134
+ ```bash
135
+ # Restart executor
136
+ docker compose restart executor
137
+
138
+ # If queue is backlogged, scale executors
139
+ docker compose up -d --scale executor=3
140
+ ```
141
+
142
+ ### WebSocket connection fails
143
+
144
+ **Error**: `WebSocket connection to 'ws://localhost:8001/ws' failed`
145
+
146
+ **Fix**:
147
+ ```bash
148
+ # Check WebSocket server is running
149
+ docker compose logs websocket_server
150
+
151
+ # Verify port is accessible
152
+ nc -zv localhost 8001
153
+
154
+ # Check firewall rules
155
+ sudo ufw allow 8001
156
+ ```
157
+
158
+ ## Agent Execution Issues
159
+
160
+ ### Agent stuck in running state
161
+
162
+ **Diagnose**:
163
+ ```bash
164
+ # Check execution status via API
165
+ curl http://localhost:8006/api/v1/executions/{execution_id}
166
+
167
+ # Check node execution logs
168
+ docker compose logs executor | grep {execution_id}
169
+ ```
170
+
171
+ **Fix**:
172
+ ```python
173
+ # Cancel stuck execution via API
174
+ import requests
175
+
176
+ response = requests.post(
177
+ f"http://localhost:8006/api/v1/executions/{execution_id}/cancel",
178
+ headers={"Authorization": f"Bearer {token}"}
179
+ )
180
+ ```
181
+
182
+ ### LLM block timeout
183
+
184
+ **Error**: `TimeoutError: LLM call exceeded timeout`
185
+
186
+ **Fix**:
187
+ ```python
188
+ # Increase timeout in block configuration
189
+ {
190
+ "block_id": "llm-block",
191
+ "config": {
192
+ "timeout_seconds": 120, # Increase from default 60
193
+ "max_retries": 3
194
+ }
195
+ }
196
+ ```
197
+
198
+ ### Credential errors
199
+
200
+ **Error**: `CredentialsNotFoundError: No credentials for provider openai`
201
+
202
+ **Fix**:
203
+ 1. Navigate to Profile > Integrations
204
+ 2. Add OpenAI API key
205
+ 3. Ensure graph has credential mapping
206
+
207
+ ```json
208
+ {
209
+ "credential_mapping": {
210
+ "openai": "user_credential_id"
211
+ }
212
+ }
213
+ ```
214
+
215
+ ### Memory issues during execution
216
+
217
+ **Error**: `MemoryError` or container killed (OOMKilled)
218
+
219
+ **Fix**:
220
+ ```yaml
221
+ # Increase memory limits in docker-compose.yml
222
+ executor:
223
+ deploy:
224
+ resources:
225
+ limits:
226
+ memory: 4G
227
+ reservations:
228
+ memory: 2G
229
+ ```
230
+
231
+ ## Graph/Block Issues
232
+
233
+ ### Block not appearing in UI
234
+
235
+ **Diagnose**:
236
+ ```python
237
+ # Check block registration
238
+ from backend.data.block import get_all_blocks
239
+
240
+ blocks = get_all_blocks()
241
+ print([b.name for b in blocks])
242
+ ```
243
+
244
+ **Fix**:
245
+ ```python
246
+ # Ensure block is imported in __init__.py
247
+ # backend/blocks/__init__.py
248
+ from backend.blocks.my_block import MyBlock
249
+
250
+ BLOCKS = [
251
+ MyBlock,
252
+ # ...
253
+ ]
254
+ ```
255
+
256
+ ### Graph save fails
257
+
258
+ **Error**: `GraphValidationError: Invalid link configuration`
259
+
260
+ **Diagnose**:
261
+ ```python
262
+ # Validate graph structure
263
+ from backend.data.graph import validate_graph
264
+
265
+ errors = validate_graph(graph_data)
266
+ print(errors)
267
+ ```
268
+
269
+ **Fix**:
270
+ - Ensure all links connect valid nodes
271
+ - Check input/output name matches
272
+ - Verify required inputs are connected
273
+
274
+ ### Circular dependency detected
275
+
276
+ **Error**: `GraphValidationError: Circular dependency in graph`
277
+
278
+ **Fix**:
279
+ ```python
280
+ # Find cycle
281
+ import networkx as nx
282
+
283
+ G = nx.DiGraph()
284
+ for link in graph.links:
285
+ G.add_edge(link.source_id, link.sink_id)
286
+
287
+ cycles = list(nx.simple_cycles(G))
288
+ print(f"Cycles found: {cycles}")
289
+ ```
290
+
291
+ ## Performance Issues
292
+
293
+ ### Slow graph execution
294
+
295
+ **Diagnose**:
296
+ ```python
297
+ # Profile execution
298
+ import cProfile
299
+
300
+ profiler = cProfile.Profile()
301
+ profiler.enable()
302
+ await executor.execute_graph(graph_id, inputs)
303
+ profiler.disable()
304
+ profiler.print_stats(sort='cumulative')
305
+ ```
306
+
307
+ **Fix**:
308
+ - Parallelize independent nodes
309
+ - Reduce unnecessary API calls
310
+ - Cache repeated computations
311
+
312
+ ### High database query latency
313
+
314
+ **Diagnose**:
315
+ ```bash
316
+ # Enable query logging in PostgreSQL
317
+ docker exec -it autogpt-db psql -U postgres
318
+ \x
319
+ SHOW log_min_duration_statement;
320
+ SET log_min_duration_statement = 100; -- Log queries > 100ms
321
+ ```
322
+
323
+ **Fix**:
324
+ ```sql
325
+ -- Add missing indexes
326
+ CREATE INDEX CONCURRENTLY idx_executions_user_created
327
+ ON "AgentGraphExecution" ("userId", "createdAt" DESC);
328
+
329
+ ANALYZE "AgentGraphExecution";
330
+ ```
331
+
332
+ ### Redis memory growing
333
+
334
+ **Diagnose**:
335
+ ```bash
336
+ # Check Redis memory usage
337
+ docker exec -it autogpt-redis redis-cli INFO memory
338
+
339
+ # Check key count
340
+ docker exec -it autogpt-redis redis-cli DBSIZE
341
+ ```
342
+
343
+ **Fix**:
344
+ ```bash
345
+ # Clear expired keys
346
+ docker exec -it autogpt-redis redis-cli --scan --pattern "exec:*" | head -1000 | xargs docker exec -i autogpt-redis redis-cli DEL
347
+
348
+ # Set memory policy
349
+ docker exec -it autogpt-redis redis-cli CONFIG SET maxmemory-policy volatile-lru
350
+ ```
351
+
352
+ ## Debugging Tips
353
+
354
+ ### Enable debug logging
355
+
356
+ ```bash
357
+ # Set in .env
358
+ LOG_LEVEL=DEBUG
359
+
360
+ # Or for specific module
361
+ LOG_LEVEL_EXECUTOR=DEBUG
362
+ LOG_LEVEL_BLOCKS=DEBUG
363
+ ```
364
+
365
+ ### Trace execution flow
366
+
367
+ ```python
368
+ import logging
369
+
370
+ logging.basicConfig(level=logging.DEBUG)
371
+ logger = logging.getLogger("backend.executor")
372
+
373
+ # Add to executor
374
+ logger.debug(f"Executing node {node_id} with inputs: {inputs}")
375
+ ```
376
+
377
+ ### Test block in isolation
378
+
379
+ ```python
380
+ import asyncio
381
+ from backend.blocks.my_block import MyBlock
382
+
383
+ async def test_block():
384
+ block = MyBlock()
385
+ inputs = {"query": "test"}
386
+
387
+ async for output_name, value in block.execute(inputs):
388
+ print(f"{output_name}: {value}")
389
+
390
+ asyncio.run(test_block())
391
+ ```
392
+
393
+ ### Inspect message queues
394
+
395
+ ```bash
396
+ # RabbitMQ management UI
397
+ # http://localhost:15672 (guest/guest)
398
+
399
+ # List queues via CLI
400
+ docker exec autogpt-rabbitmq rabbitmqctl list_queues name messages consumers
401
+
402
+ # Purge a queue
403
+ docker exec autogpt-rabbitmq rabbitmqctl purge_queue graph-execution
404
+ ```
405
+
406
+ ## Getting Help
407
+
408
+ 1. **Documentation**: https://docs.agpt.co
409
+ 2. **GitHub Issues**: https://github.com/Significant-Gravitas/AutoGPT/issues
410
+ 3. **Discord**: https://discord.gg/autogpt
411
+
412
+ ### Reporting Issues
413
+
414
+ Include:
415
+ - AutoGPT version: `git describe --tags`
416
+ - Docker version: `docker --version`
417
+ - Error logs: `docker compose logs > logs.txt`
418
+ - Steps to reproduce
419
+ - Graph configuration (sanitized)
420
+ - Environment: OS, hardware specs
@@ -0,0 +1,310 @@
1
+ ---
2
+ name: awq-quantization
3
+ description: Activation-aware weight quantization for 4-bit LLM compression with 3x speedup and minimal accuracy loss. Use when deploying large models (7B-70B) on limited GPU memory, when you need faster inference than GPTQ with better accuracy preservation, or for instruction-tuned and multimodal models. MLSys 2024 Best Paper Award winner.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Optimization, AWQ, Quantization, 4-Bit, Activation-Aware, Memory Optimization, Fast Inference, vLLM Integration, Marlin Kernels]
8
+ dependencies: [autoawq, transformers>=4.45.0, torch>=2.0.0]
9
+ ---
10
+
11
+ # AWQ (Activation-aware Weight Quantization)
12
+
13
+ 4-bit quantization that preserves salient weights based on activation patterns, achieving 3x speedup with minimal accuracy loss.
14
+
15
+ ## When to use AWQ
16
+
17
+ **Use AWQ when:**
18
+ - Need 4-bit quantization with <5% accuracy loss
19
+ - Deploying instruction-tuned or chat models (AWQ generalizes better)
20
+ - Want ~2.5-3x inference speedup over FP16
21
+ - Using vLLM for production serving
22
+ - Have Ampere+ GPUs (A100, H100, RTX 40xx) for Marlin kernel support
23
+
24
+ **Use GPTQ instead when:**
25
+ - Need maximum ecosystem compatibility (more tools support GPTQ)
26
+ - Working with ExLlamaV2 backend specifically
27
+ - Have older GPUs without Marlin support
28
+
29
+ **Use bitsandbytes instead when:**
30
+ - Need zero calibration overhead (quantize on-the-fly)
31
+ - Want to fine-tune with QLoRA
32
+ - Prefer simpler integration
33
+
34
+ ## Quick start
35
+
36
+ ### Installation
37
+
38
+ ```bash
39
+ # Default (Triton kernels)
40
+ pip install autoawq
41
+
42
+ # With optimized CUDA kernels + Flash Attention
43
+ pip install autoawq[kernels]
44
+
45
+ # Intel CPU/XPU optimization
46
+ pip install autoawq[cpu]
47
+ ```
48
+
49
+ **Requirements**: Python 3.8+, CUDA 11.8+, Compute Capability 7.5+
50
+
51
+ ### Load pre-quantized model
52
+
53
+ ```python
54
+ from awq import AutoAWQForCausalLM
55
+ from transformers import AutoTokenizer
56
+
57
+ model_name = "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
58
+
59
+ model = AutoAWQForCausalLM.from_quantized(
60
+ model_name,
61
+ fuse_layers=True # Enable fused attention for speed
62
+ )
63
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
64
+
65
+ # Generate
66
+ inputs = tokenizer("Explain quantum computing", return_tensors="pt").to("cuda")
67
+ outputs = model.generate(**inputs, max_new_tokens=200)
68
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
69
+ ```
70
+
71
+ ### Quantize your own model
72
+
73
+ ```python
74
+ from awq import AutoAWQForCausalLM
75
+ from transformers import AutoTokenizer
76
+
77
+ model_path = "mistralai/Mistral-7B-Instruct-v0.2"
78
+
79
+ # Load model and tokenizer
80
+ model = AutoAWQForCausalLM.from_pretrained(model_path)
81
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
82
+
83
+ # Quantization config
84
+ quant_config = {
85
+ "zero_point": True, # Use zero-point quantization
86
+ "q_group_size": 128, # Group size (128 recommended)
87
+ "w_bit": 4, # 4-bit weights
88
+ "version": "GEMM" # GEMM for batch, GEMV for single-token
89
+ }
90
+
91
+ # Quantize (uses pileval dataset by default)
92
+ model.quantize(tokenizer, quant_config=quant_config)
93
+
94
+ # Save
95
+ model.save_quantized("mistral-7b-awq")
96
+ tokenizer.save_pretrained("mistral-7b-awq")
97
+ ```
98
+
99
+ **Timing**: ~10-15 min for 7B, ~1 hour for 70B models.
100
+
101
+ ## AWQ vs GPTQ vs bitsandbytes
102
+
103
+ | Feature | AWQ | GPTQ | bitsandbytes |
104
+ |---------|-----|------|--------------|
105
+ | **Speedup (4-bit)** | ~2.5-3x | ~2x | ~1.5x |
106
+ | **Accuracy loss** | <5% | ~5-10% | ~5-15% |
107
+ | **Calibration** | Minimal (128-1K tokens) | More extensive | None |
108
+ | **Overfitting risk** | Low | Higher | N/A |
109
+ | **Best for** | Production inference | GPU inference | Easy integration |
110
+ | **vLLM support** | Native | Yes | Limited |
111
+
112
+ **Key insight**: AWQ assumes not all weights are equally important. It protects ~1% of salient weights identified by activation patterns, reducing quantization error without mixed-precision overhead.
113
+
114
+ ## Kernel backends
115
+
116
+ ### GEMM (default, batch inference)
117
+
118
+ ```python
119
+ quant_config = {
120
+ "zero_point": True,
121
+ "q_group_size": 128,
122
+ "w_bit": 4,
123
+ "version": "GEMM" # Best for batch sizes > 1
124
+ }
125
+ ```
126
+
127
+ ### GEMV (single-token generation)
128
+
129
+ ```python
130
+ quant_config = {
131
+ "version": "GEMV" # 20% faster for batch_size=1
132
+ }
133
+ ```
134
+
135
+ **Limitation**: Only batch size 1, not good for large context.
136
+
137
+ ### Marlin (Ampere+ GPUs)
138
+
139
+ ```python
140
+ from transformers import AwqConfig, AutoModelForCausalLM
141
+
142
+ config = AwqConfig(
143
+ bits=4,
144
+ version="marlin" # 2x faster on A100/H100
145
+ )
146
+
147
+ model = AutoModelForCausalLM.from_pretrained(
148
+ "TheBloke/Mistral-7B-AWQ",
149
+ quantization_config=config
150
+ )
151
+ ```
152
+
153
+ **Requirements**: Compute Capability 8.0+ (A100, H100, RTX 40xx)
154
+
155
+ ### ExLlamaV2 (AMD compatible)
156
+
157
+ ```python
158
+ config = AwqConfig(
159
+ bits=4,
160
+ version="exllama" # Faster prefill, AMD GPU support
161
+ )
162
+ ```
163
+
164
+ ## HuggingFace Transformers integration
165
+
166
+ ### Direct loading
167
+
168
+ ```python
169
+ from transformers import AutoModelForCausalLM, AutoTokenizer
170
+
171
+ model = AutoModelForCausalLM.from_pretrained(
172
+ "TheBloke/zephyr-7B-alpha-AWQ",
173
+ device_map="auto"
174
+ )
175
+ tokenizer = AutoTokenizer.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ")
176
+ ```
177
+
178
+ ### Fused modules (recommended)
179
+
180
+ ```python
181
+ from transformers import AwqConfig, AutoModelForCausalLM
182
+
183
+ config = AwqConfig(
184
+ bits=4,
185
+ fuse_max_seq_len=512, # Max sequence length for fusing
186
+ do_fuse=True # Enable fused attention/MLP
187
+ )
188
+
189
+ model = AutoModelForCausalLM.from_pretrained(
190
+ "TheBloke/Mistral-7B-OpenOrca-AWQ",
191
+ quantization_config=config
192
+ )
193
+ ```
194
+
195
+ **Note**: Fused modules cannot combine with FlashAttention2.
196
+
197
+ ## vLLM integration
198
+
199
+ ```python
200
+ from vllm import LLM, SamplingParams
201
+
202
+ # vLLM auto-detects AWQ models
203
+ llm = LLM(
204
+ model="TheBloke/Llama-2-7B-AWQ",
205
+ quantization="awq",
206
+ dtype="half"
207
+ )
208
+
209
+ sampling = SamplingParams(temperature=0.7, max_tokens=200)
210
+ outputs = llm.generate(["Explain AI"], sampling)
211
+ ```
212
+
213
+ ## Performance benchmarks
214
+
215
+ ### Memory reduction
216
+
217
+ | Model | FP16 | AWQ 4-bit | Reduction |
218
+ |-------|------|-----------|-----------|
219
+ | Mistral 7B | 14 GB | 5.5 GB | 2.5x |
220
+ | Llama 2-13B | 26 GB | 10 GB | 2.6x |
221
+ | Llama 2-70B | 140 GB | 35 GB | 4x |
222
+
223
+ ### Inference speed (RTX 4090)
224
+
225
+ | Model | Prefill (tok/s) | Decode (tok/s) | Memory |
226
+ |-------|-----------------|----------------|--------|
227
+ | Mistral 7B GEMM | 3,897 | 114 | 5.55 GB |
228
+ | TinyLlama 1B GEMV | 5,179 | 431 | 2.10 GB |
229
+ | Llama 2-13B GEMM | 2,279 | 74 | 10.28 GB |
230
+
231
+ ### Accuracy (perplexity)
232
+
233
+ | Model | FP16 | AWQ 4-bit | Degradation |
234
+ |-------|------|-----------|-------------|
235
+ | Llama 3 8B | 8.20 | 8.48 | +3.4% |
236
+ | Mistral 7B | 5.25 | 5.42 | +3.2% |
237
+ | Qwen2 72B | 4.85 | 4.95 | +2.1% |
238
+
239
+ ## Custom calibration data
240
+
241
+ ```python
242
+ # Use custom dataset for domain-specific models
243
+ model.quantize(
244
+ tokenizer,
245
+ quant_config=quant_config,
246
+ calib_data="wikitext", # Or custom list of strings
247
+ max_calib_samples=256, # More samples = better accuracy
248
+ max_calib_seq_len=512 # Sequence length
249
+ )
250
+
251
+ # Or provide your own samples
252
+ calib_samples = [
253
+ "Your domain-specific text here...",
254
+ "More examples from your use case...",
255
+ ]
256
+ model.quantize(tokenizer, quant_config=quant_config, calib_data=calib_samples)
257
+ ```
258
+
259
+ ## Multi-GPU deployment
260
+
261
+ ```python
262
+ model = AutoAWQForCausalLM.from_quantized(
263
+ "TheBloke/Llama-2-70B-AWQ",
264
+ device_map="auto", # Auto-split across GPUs
265
+ max_memory={0: "40GB", 1: "40GB"}
266
+ )
267
+ ```
268
+
269
+ ## Supported models
270
+
271
+ 35+ architectures including:
272
+ - **Llama family**: Llama 2/3, Code Llama, Mistral, Mixtral
273
+ - **Qwen**: Qwen, Qwen2, Qwen2.5-VL
274
+ - **Others**: Falcon, MPT, Phi, Yi, DeepSeek, Gemma
275
+ - **Multimodal**: LLaVA, LLaVA-Next, Qwen2-VL
276
+
277
+ ## Common issues
278
+
279
+ **CUDA OOM during quantization**:
280
+ ```python
281
+ # Reduce batch size
282
+ model.quantize(tokenizer, quant_config=quant_config, max_calib_samples=64)
283
+ ```
284
+
285
+ **Slow inference**:
286
+ ```python
287
+ # Enable fused layers
288
+ model = AutoAWQForCausalLM.from_quantized(model_name, fuse_layers=True)
289
+ ```
290
+
291
+ **AMD GPU support**:
292
+ ```python
293
+ # Use ExLlama backend
294
+ config = AwqConfig(bits=4, version="exllama")
295
+ ```
296
+
297
+ ## Deprecation notice
298
+
299
+ AutoAWQ is officially deprecated. For new projects, consider:
300
+ - **vLLM llm-compressor**: https://github.com/vllm-project/llm-compressor
301
+ - **MLX-LM**: For Mac devices with Apple Silicon
302
+
303
+ Existing quantized models remain usable.
304
+
305
+ ## References
306
+
307
+ - **Paper**: AWQ: Activation-aware Weight Quantization (arXiv:2306.00978) - MLSys 2024 Best Paper
308
+ - **GitHub**: https://github.com/casper-hansen/AutoAWQ
309
+ - **MIT Han Lab**: https://github.com/mit-han-lab/llm-awq
310
+ - **Models**: https://huggingface.co/models?library=awq