@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,564 @@
1
+ ---
2
+ name: blip-2-vision-language
3
+ description: Vision-language pre-training framework bridging frozen image encoders and LLMs. Use when you need image captioning, visual question answering, image-text retrieval, or multimodal chat with state-of-the-art zero-shot performance.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Multimodal, Vision-Language, Image Captioning, VQA, Zero-Shot]
8
+ dependencies: [transformers>=4.30.0, torch>=1.10.0, Pillow]
9
+ ---
10
+
11
+ # BLIP-2: Vision-Language Pre-training
12
+
13
+ Comprehensive guide to using Salesforce's BLIP-2 for vision-language tasks with frozen image encoders and large language models.
14
+
15
+ ## When to use BLIP-2
16
+
17
+ **Use BLIP-2 when:**
18
+ - Need high-quality image captioning with natural descriptions
19
+ - Building visual question answering (VQA) systems
20
+ - Require zero-shot image-text understanding without task-specific training
21
+ - Want to leverage LLM reasoning for visual tasks
22
+ - Building multimodal conversational AI
23
+ - Need image-text retrieval or matching
24
+
25
+ **Key features:**
26
+ - **Q-Former architecture**: Lightweight query transformer bridges vision and language
27
+ - **Frozen backbone efficiency**: No need to fine-tune large vision/language models
28
+ - **Multiple LLM backends**: OPT (2.7B, 6.7B) and FlanT5 (XL, XXL)
29
+ - **Zero-shot capabilities**: Strong performance without task-specific training
30
+ - **Efficient training**: Only trains Q-Former (~188M parameters)
31
+ - **State-of-the-art results**: Beats larger models on VQA benchmarks
32
+
33
+ **Use alternatives instead:**
34
+ - **LLaVA**: For instruction-following multimodal chat
35
+ - **InstructBLIP**: For improved instruction-following (BLIP-2 successor)
36
+ - **GPT-4V/Claude 3**: For production multimodal chat (proprietary)
37
+ - **CLIP**: For simple image-text similarity without generation
38
+ - **Flamingo**: For few-shot visual learning
39
+
40
+ ## Quick start
41
+
42
+ ### Installation
43
+
44
+ ```bash
45
+ # HuggingFace Transformers (recommended)
46
+ pip install transformers accelerate torch Pillow
47
+
48
+ # Or LAVIS library (Salesforce official)
49
+ pip install salesforce-lavis
50
+ ```
51
+
52
+ ### Basic image captioning
53
+
54
+ ```python
55
+ import torch
56
+ from PIL import Image
57
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
58
+
59
+ # Load model and processor
60
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
61
+ model = Blip2ForConditionalGeneration.from_pretrained(
62
+ "Salesforce/blip2-opt-2.7b",
63
+ torch_dtype=torch.float16,
64
+ device_map="auto"
65
+ )
66
+
67
+ # Load image
68
+ image = Image.open("photo.jpg").convert("RGB")
69
+
70
+ # Generate caption
71
+ inputs = processor(images=image, return_tensors="pt").to("cuda", torch.float16)
72
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
73
+ caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
74
+ print(caption)
75
+ ```
76
+
77
+ ### Visual question answering
78
+
79
+ ```python
80
+ # Ask a question about the image
81
+ question = "What color is the car in this image?"
82
+
83
+ inputs = processor(images=image, text=question, return_tensors="pt").to("cuda", torch.float16)
84
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
85
+ answer = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
86
+ print(answer)
87
+ ```
88
+
89
+ ### Using LAVIS library
90
+
91
+ ```python
92
+ import torch
93
+ from lavis.models import load_model_and_preprocess
94
+ from PIL import Image
95
+
96
+ # Load model
97
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
98
+ model, vis_processors, txt_processors = load_model_and_preprocess(
99
+ name="blip2_opt",
100
+ model_type="pretrain_opt2.7b",
101
+ is_eval=True,
102
+ device=device
103
+ )
104
+
105
+ # Process image
106
+ image = Image.open("photo.jpg").convert("RGB")
107
+ image = vis_processors["eval"](image).unsqueeze(0).to(device)
108
+
109
+ # Caption
110
+ caption = model.generate({"image": image})
111
+ print(caption)
112
+
113
+ # VQA
114
+ question = txt_processors["eval"]("What is in this image?")
115
+ answer = model.generate({"image": image, "prompt": question})
116
+ print(answer)
117
+ ```
118
+
119
+ ## Core concepts
120
+
121
+ ### Architecture overview
122
+
123
+ ```
124
+ BLIP-2 Architecture:
125
+ ┌─────────────────────────────────────────────────────────────┐
126
+ │ Q-Former │
127
+ │ ┌─────────────────────────────────────────────────────┐ │
128
+ │ │ Learned Queries (32 queries × 768 dim) │ │
129
+ │ └────────────────────────┬────────────────────────────┘ │
130
+ │ │ │
131
+ │ ┌────────────────────────▼────────────────────────────┐ │
132
+ │ │ Cross-Attention with Image Features │ │
133
+ │ └────────────────────────┬────────────────────────────┘ │
134
+ │ │ │
135
+ │ ┌────────────────────────▼────────────────────────────┐ │
136
+ │ │ Self-Attention Layers (Transformer) │ │
137
+ │ └────────────────────────┬────────────────────────────┘ │
138
+ └───────────────────────────┼─────────────────────────────────┘
139
+
140
+ ┌───────────────────────────▼─────────────────────────────────┐
141
+ │ Frozen Vision Encoder │ Frozen LLM │
142
+ │ (ViT-G/14 from EVA-CLIP) │ (OPT or FlanT5) │
143
+ └─────────────────────────────────────────────────────────────┘
144
+ ```
145
+
146
+ ### Model variants
147
+
148
+ | Model | LLM Backend | Size | Use Case |
149
+ |-------|-------------|------|----------|
150
+ | `blip2-opt-2.7b` | OPT-2.7B | ~4GB | General captioning, VQA |
151
+ | `blip2-opt-6.7b` | OPT-6.7B | ~8GB | Better reasoning |
152
+ | `blip2-flan-t5-xl` | FlanT5-XL | ~5GB | Instruction following |
153
+ | `blip2-flan-t5-xxl` | FlanT5-XXL | ~13GB | Best quality |
154
+
155
+ ### Q-Former components
156
+
157
+ | Component | Description | Parameters |
158
+ |-----------|-------------|------------|
159
+ | Learned queries | Fixed set of learnable embeddings | 32 × 768 |
160
+ | Image transformer | Cross-attention to vision features | ~108M |
161
+ | Text transformer | Self-attention for text | ~108M |
162
+ | Linear projection | Maps to LLM dimension | Varies |
163
+
164
+ ## Advanced usage
165
+
166
+ ### Batch processing
167
+
168
+ ```python
169
+ from PIL import Image
170
+ import torch
171
+
172
+ # Load multiple images
173
+ images = [Image.open(f"image_{i}.jpg").convert("RGB") for i in range(4)]
174
+ questions = [
175
+ "What is shown in this image?",
176
+ "Describe the scene.",
177
+ "What colors are prominent?",
178
+ "Is there a person in this image?"
179
+ ]
180
+
181
+ # Process batch
182
+ inputs = processor(
183
+ images=images,
184
+ text=questions,
185
+ return_tensors="pt",
186
+ padding=True
187
+ ).to("cuda", torch.float16)
188
+
189
+ # Generate
190
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
191
+ answers = processor.batch_decode(generated_ids, skip_special_tokens=True)
192
+
193
+ for q, a in zip(questions, answers):
194
+ print(f"Q: {q}\nA: {a}\n")
195
+ ```
196
+
197
+ ### Controlling generation
198
+
199
+ ```python
200
+ # Control generation parameters
201
+ generated_ids = model.generate(
202
+ **inputs,
203
+ max_new_tokens=100,
204
+ min_length=20,
205
+ num_beams=5, # Beam search
206
+ no_repeat_ngram_size=2, # Avoid repetition
207
+ top_p=0.9, # Nucleus sampling
208
+ temperature=0.7, # Creativity
209
+ do_sample=True, # Enable sampling
210
+ )
211
+
212
+ # For deterministic output
213
+ generated_ids = model.generate(
214
+ **inputs,
215
+ max_new_tokens=50,
216
+ num_beams=5,
217
+ do_sample=False,
218
+ )
219
+ ```
220
+
221
+ ### Memory optimization
222
+
223
+ ```python
224
+ # 8-bit quantization
225
+ from transformers import BitsAndBytesConfig
226
+
227
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
228
+
229
+ model = Blip2ForConditionalGeneration.from_pretrained(
230
+ "Salesforce/blip2-opt-6.7b",
231
+ quantization_config=quantization_config,
232
+ device_map="auto"
233
+ )
234
+
235
+ # 4-bit quantization (more aggressive)
236
+ quantization_config = BitsAndBytesConfig(
237
+ load_in_4bit=True,
238
+ bnb_4bit_compute_dtype=torch.float16
239
+ )
240
+
241
+ model = Blip2ForConditionalGeneration.from_pretrained(
242
+ "Salesforce/blip2-flan-t5-xxl",
243
+ quantization_config=quantization_config,
244
+ device_map="auto"
245
+ )
246
+ ```
247
+
248
+ ### Image-text matching
249
+
250
+ ```python
251
+ # Using LAVIS for ITM (Image-Text Matching)
252
+ from lavis.models import load_model_and_preprocess
253
+
254
+ model, vis_processors, txt_processors = load_model_and_preprocess(
255
+ name="blip2_image_text_matching",
256
+ model_type="pretrain",
257
+ is_eval=True,
258
+ device=device
259
+ )
260
+
261
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
262
+ text = txt_processors["eval"]("a dog sitting on grass")
263
+
264
+ # Get matching score
265
+ itm_output = model({"image": image, "text_input": text}, match_head="itm")
266
+ itm_scores = torch.nn.functional.softmax(itm_output, dim=1)
267
+ print(f"Match probability: {itm_scores[:, 1].item():.3f}")
268
+ ```
269
+
270
+ ### Feature extraction
271
+
272
+ ```python
273
+ # Extract image features with Q-Former
274
+ from lavis.models import load_model_and_preprocess
275
+
276
+ model, vis_processors, _ = load_model_and_preprocess(
277
+ name="blip2_feature_extractor",
278
+ model_type="pretrain",
279
+ is_eval=True,
280
+ device=device
281
+ )
282
+
283
+ image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
284
+
285
+ # Get features
286
+ features = model.extract_features({"image": image}, mode="image")
287
+ image_embeds = features.image_embeds # Shape: [1, 32, 768]
288
+ image_features = features.image_embeds_proj # Projected for matching
289
+ ```
290
+
291
+ ## Common workflows
292
+
293
+ ### Workflow 1: Image captioning pipeline
294
+
295
+ ```python
296
+ import torch
297
+ from PIL import Image
298
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
299
+ from pathlib import Path
300
+
301
+ class ImageCaptioner:
302
+ def __init__(self, model_name="Salesforce/blip2-opt-2.7b"):
303
+ self.processor = Blip2Processor.from_pretrained(model_name)
304
+ self.model = Blip2ForConditionalGeneration.from_pretrained(
305
+ model_name,
306
+ torch_dtype=torch.float16,
307
+ device_map="auto"
308
+ )
309
+
310
+ def caption(self, image_path: str, prompt: str = None) -> str:
311
+ image = Image.open(image_path).convert("RGB")
312
+
313
+ if prompt:
314
+ inputs = self.processor(images=image, text=prompt, return_tensors="pt")
315
+ else:
316
+ inputs = self.processor(images=image, return_tensors="pt")
317
+
318
+ inputs = inputs.to("cuda", torch.float16)
319
+
320
+ generated_ids = self.model.generate(
321
+ **inputs,
322
+ max_new_tokens=50,
323
+ num_beams=5
324
+ )
325
+
326
+ return self.processor.decode(generated_ids[0], skip_special_tokens=True)
327
+
328
+ def caption_batch(self, image_paths: list, prompt: str = None) -> list:
329
+ images = [Image.open(p).convert("RGB") for p in image_paths]
330
+
331
+ if prompt:
332
+ inputs = self.processor(
333
+ images=images,
334
+ text=[prompt] * len(images),
335
+ return_tensors="pt",
336
+ padding=True
337
+ )
338
+ else:
339
+ inputs = self.processor(images=images, return_tensors="pt", padding=True)
340
+
341
+ inputs = inputs.to("cuda", torch.float16)
342
+
343
+ generated_ids = self.model.generate(**inputs, max_new_tokens=50)
344
+ return self.processor.batch_decode(generated_ids, skip_special_tokens=True)
345
+
346
+ # Usage
347
+ captioner = ImageCaptioner()
348
+
349
+ # Single image
350
+ caption = captioner.caption("photo.jpg")
351
+ print(f"Caption: {caption}")
352
+
353
+ # With prompt for style
354
+ caption = captioner.caption("photo.jpg", "a detailed description of")
355
+ print(f"Detailed: {caption}")
356
+
357
+ # Batch processing
358
+ captions = captioner.caption_batch(["img1.jpg", "img2.jpg", "img3.jpg"])
359
+ for i, cap in enumerate(captions):
360
+ print(f"Image {i+1}: {cap}")
361
+ ```
362
+
363
+ ### Workflow 2: Visual Q&A system
364
+
365
+ ```python
366
+ class VisualQA:
367
+ def __init__(self, model_name="Salesforce/blip2-flan-t5-xl"):
368
+ self.processor = Blip2Processor.from_pretrained(model_name)
369
+ self.model = Blip2ForConditionalGeneration.from_pretrained(
370
+ model_name,
371
+ torch_dtype=torch.float16,
372
+ device_map="auto"
373
+ )
374
+ self.current_image = None
375
+ self.current_inputs = None
376
+
377
+ def set_image(self, image_path: str):
378
+ """Load image for multiple questions."""
379
+ self.current_image = Image.open(image_path).convert("RGB")
380
+
381
+ def ask(self, question: str) -> str:
382
+ """Ask a question about the current image."""
383
+ if self.current_image is None:
384
+ raise ValueError("No image set. Call set_image() first.")
385
+
386
+ # Format question for FlanT5
387
+ prompt = f"Question: {question} Answer:"
388
+
389
+ inputs = self.processor(
390
+ images=self.current_image,
391
+ text=prompt,
392
+ return_tensors="pt"
393
+ ).to("cuda", torch.float16)
394
+
395
+ generated_ids = self.model.generate(
396
+ **inputs,
397
+ max_new_tokens=50,
398
+ num_beams=5
399
+ )
400
+
401
+ return self.processor.decode(generated_ids[0], skip_special_tokens=True)
402
+
403
+ def ask_multiple(self, questions: list) -> dict:
404
+ """Ask multiple questions about current image."""
405
+ return {q: self.ask(q) for q in questions}
406
+
407
+ # Usage
408
+ vqa = VisualQA()
409
+ vqa.set_image("scene.jpg")
410
+
411
+ # Ask questions
412
+ print(vqa.ask("What objects are in this image?"))
413
+ print(vqa.ask("What is the weather like?"))
414
+ print(vqa.ask("How many people are there?"))
415
+
416
+ # Batch questions
417
+ results = vqa.ask_multiple([
418
+ "What is the main subject?",
419
+ "What colors are dominant?",
420
+ "Is this indoors or outdoors?"
421
+ ])
422
+ ```
423
+
424
+ ### Workflow 3: Image search/retrieval
425
+
426
+ ```python
427
+ import torch
428
+ import numpy as np
429
+ from PIL import Image
430
+ from lavis.models import load_model_and_preprocess
431
+
432
+ class ImageSearchEngine:
433
+ def __init__(self):
434
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
435
+ self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(
436
+ name="blip2_feature_extractor",
437
+ model_type="pretrain",
438
+ is_eval=True,
439
+ device=self.device
440
+ )
441
+ self.image_features = []
442
+ self.image_paths = []
443
+
444
+ def index_images(self, image_paths: list):
445
+ """Build index from images."""
446
+ self.image_paths = image_paths
447
+
448
+ for path in image_paths:
449
+ image = Image.open(path).convert("RGB")
450
+ image = self.vis_processors["eval"](image).unsqueeze(0).to(self.device)
451
+
452
+ with torch.no_grad():
453
+ features = self.model.extract_features({"image": image}, mode="image")
454
+ # Use projected features for matching
455
+ self.image_features.append(
456
+ features.image_embeds_proj.mean(dim=1).cpu().numpy()
457
+ )
458
+
459
+ self.image_features = np.vstack(self.image_features)
460
+
461
+ def search(self, query: str, top_k: int = 5) -> list:
462
+ """Search images by text query."""
463
+ # Get text features
464
+ text = self.txt_processors["eval"](query)
465
+ text_input = {"text_input": [text]}
466
+
467
+ with torch.no_grad():
468
+ text_features = self.model.extract_features(text_input, mode="text")
469
+ text_embeds = text_features.text_embeds_proj[:, 0].cpu().numpy()
470
+
471
+ # Compute similarities
472
+ similarities = np.dot(self.image_features, text_embeds.T).squeeze()
473
+ top_indices = np.argsort(similarities)[::-1][:top_k]
474
+
475
+ return [(self.image_paths[i], similarities[i]) for i in top_indices]
476
+
477
+ # Usage
478
+ engine = ImageSearchEngine()
479
+ engine.index_images(["img1.jpg", "img2.jpg", "img3.jpg", ...])
480
+
481
+ # Search
482
+ results = engine.search("a sunset over the ocean", top_k=5)
483
+ for path, score in results:
484
+ print(f"{path}: {score:.3f}")
485
+ ```
486
+
487
+ ## Output format
488
+
489
+ ### Generation output
490
+
491
+ ```python
492
+ # Direct generation returns token IDs
493
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
494
+ # Shape: [batch_size, sequence_length]
495
+
496
+ # Decode to text
497
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)
498
+ # Returns: list of strings
499
+ ```
500
+
501
+ ### Feature extraction output
502
+
503
+ ```python
504
+ # Q-Former outputs
505
+ features = model.extract_features({"image": image}, mode="image")
506
+
507
+ features.image_embeds # [B, 32, 768] - Q-Former outputs
508
+ features.image_embeds_proj # [B, 32, 256] - Projected for matching
509
+ features.text_embeds # [B, seq_len, 768] - Text features
510
+ features.text_embeds_proj # [B, 256] - Projected text (CLS)
511
+ ```
512
+
513
+ ## Performance optimization
514
+
515
+ ### GPU memory requirements
516
+
517
+ | Model | FP16 VRAM | INT8 VRAM | INT4 VRAM |
518
+ |-------|-----------|-----------|-----------|
519
+ | blip2-opt-2.7b | ~8GB | ~5GB | ~3GB |
520
+ | blip2-opt-6.7b | ~16GB | ~9GB | ~5GB |
521
+ | blip2-flan-t5-xl | ~10GB | ~6GB | ~4GB |
522
+ | blip2-flan-t5-xxl | ~26GB | ~14GB | ~8GB |
523
+
524
+ ### Speed optimization
525
+
526
+ ```python
527
+ # Use Flash Attention if available
528
+ model = Blip2ForConditionalGeneration.from_pretrained(
529
+ "Salesforce/blip2-opt-2.7b",
530
+ torch_dtype=torch.float16,
531
+ attn_implementation="flash_attention_2", # Requires flash-attn
532
+ device_map="auto"
533
+ )
534
+
535
+ # Compile model (PyTorch 2.0+)
536
+ model = torch.compile(model)
537
+
538
+ # Use smaller images (if quality allows)
539
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
540
+ # Default is 224x224, which is optimal
541
+ ```
542
+
543
+ ## Common issues
544
+
545
+ | Issue | Solution |
546
+ |-------|----------|
547
+ | CUDA OOM | Use INT8/INT4 quantization, smaller model |
548
+ | Slow generation | Use greedy decoding, reduce max_new_tokens |
549
+ | Poor captions | Try FlanT5 variant, use prompts |
550
+ | Hallucinations | Lower temperature, use beam search |
551
+ | Wrong answers | Rephrase question, provide context |
552
+
553
+ ## References
554
+
555
+ - **[Advanced Usage](references/advanced-usage.md)** - Fine-tuning, integration, deployment
556
+ - **[Troubleshooting](references/troubleshooting.md)** - Common issues and solutions
557
+
558
+ ## Resources
559
+
560
+ - **Paper**: https://arxiv.org/abs/2301.12597
561
+ - **GitHub (LAVIS)**: https://github.com/salesforce/LAVIS
562
+ - **HuggingFace**: https://huggingface.co/Salesforce/blip2-opt-2.7b
563
+ - **Demo**: https://huggingface.co/spaces/Salesforce/BLIP2
564
+ - **InstructBLIP**: https://arxiv.org/abs/2305.06500 (successor)