@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,680 @@
1
+ # BLIP-2 Advanced Usage Guide
2
+
3
+ ## Fine-tuning BLIP-2
4
+
5
+ ### LoRA fine-tuning (recommended)
6
+
7
+ ```python
8
+ import torch
9
+ from transformers import Blip2ForConditionalGeneration, Blip2Processor
10
+ from peft import LoraConfig, get_peft_model
11
+
12
+ # Load base model
13
+ model = Blip2ForConditionalGeneration.from_pretrained(
14
+ "Salesforce/blip2-opt-2.7b",
15
+ torch_dtype=torch.float16,
16
+ device_map="auto"
17
+ )
18
+
19
+ # Configure LoRA for the language model
20
+ lora_config = LoraConfig(
21
+ r=16,
22
+ lora_alpha=32,
23
+ target_modules=["q_proj", "v_proj", "k_proj", "out_proj"],
24
+ lora_dropout=0.05,
25
+ bias="none",
26
+ task_type="CAUSAL_LM"
27
+ )
28
+
29
+ # Apply LoRA
30
+ model = get_peft_model(model, lora_config)
31
+ model.print_trainable_parameters()
32
+ # trainable params: ~4M, all params: ~3.8B (0.1%)
33
+ ```
34
+
35
+ ### Fine-tuning Q-Former only
36
+
37
+ ```python
38
+ # Freeze everything except Q-Former
39
+ for name, param in model.named_parameters():
40
+ if "qformer" not in name.lower():
41
+ param.requires_grad = False
42
+ else:
43
+ param.requires_grad = True
44
+
45
+ # Check trainable parameters
46
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
47
+ total = sum(p.numel() for p in model.parameters())
48
+ print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)")
49
+ ```
50
+
51
+ ### Custom dataset for fine-tuning
52
+
53
+ ```python
54
+ import torch
55
+ from torch.utils.data import Dataset, DataLoader
56
+ from PIL import Image
57
+
58
+ class CaptionDataset(Dataset):
59
+ def __init__(self, data, processor, max_length=128):
60
+ self.data = data # List of {"image_path": str, "caption": str}
61
+ self.processor = processor
62
+ self.max_length = max_length
63
+
64
+ def __len__(self):
65
+ return len(self.data)
66
+
67
+ def __getitem__(self, idx):
68
+ item = self.data[idx]
69
+ image = Image.open(item["image_path"]).convert("RGB")
70
+
71
+ # Process inputs
72
+ encoding = self.processor(
73
+ images=image,
74
+ text=item["caption"],
75
+ padding="max_length",
76
+ truncation=True,
77
+ max_length=self.max_length,
78
+ return_tensors="pt"
79
+ )
80
+
81
+ # Remove batch dimension
82
+ encoding = {k: v.squeeze(0) for k, v in encoding.items()}
83
+
84
+ # Labels for language modeling
85
+ encoding["labels"] = encoding["input_ids"].clone()
86
+
87
+ return encoding
88
+
89
+ # Create dataloader
90
+ dataset = CaptionDataset(train_data, processor)
91
+ dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
92
+ ```
93
+
94
+ ### Training loop
95
+
96
+ ```python
97
+ from transformers import AdamW, get_linear_schedule_with_warmup
98
+ from tqdm import tqdm
99
+
100
+ # Optimizer
101
+ optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
102
+
103
+ # Scheduler
104
+ num_epochs = 3
105
+ num_training_steps = len(dataloader) * num_epochs
106
+ scheduler = get_linear_schedule_with_warmup(
107
+ optimizer,
108
+ num_warmup_steps=num_training_steps // 10,
109
+ num_training_steps=num_training_steps
110
+ )
111
+
112
+ # Training
113
+ model.train()
114
+ for epoch in range(num_epochs):
115
+ total_loss = 0
116
+
117
+ for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
118
+ batch = {k: v.to("cuda") for k, v in batch.items()}
119
+
120
+ outputs = model(**batch)
121
+ loss = outputs.loss
122
+
123
+ loss.backward()
124
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
125
+
126
+ optimizer.step()
127
+ scheduler.step()
128
+ optimizer.zero_grad()
129
+
130
+ total_loss += loss.item()
131
+
132
+ avg_loss = total_loss / len(dataloader)
133
+ print(f"Epoch {epoch+1} - Loss: {avg_loss:.4f}")
134
+
135
+ # Save fine-tuned model
136
+ model.save_pretrained("blip2-finetuned")
137
+ processor.save_pretrained("blip2-finetuned")
138
+ ```
139
+
140
+ ### Fine-tuning with LAVIS
141
+
142
+ ```python
143
+ from lavis.models import load_model_and_preprocess
144
+ from lavis.common.registry import registry
145
+ from lavis.datasets.builders import load_dataset
146
+
147
+ # Load model
148
+ model, vis_processors, txt_processors = load_model_and_preprocess(
149
+ name="blip2_opt",
150
+ model_type="pretrain_opt2.7b",
151
+ is_eval=False, # Training mode
152
+ device="cuda"
153
+ )
154
+
155
+ # Load dataset
156
+ dataset = load_dataset("coco_caption")
157
+
158
+ # Get trainer class
159
+ runner_cls = registry.get_runner_class("runner_base")
160
+ runner = runner_cls(
161
+ cfg=cfg,
162
+ task=task,
163
+ model=model,
164
+ datasets=datasets
165
+ )
166
+
167
+ # Train
168
+ runner.train()
169
+ ```
170
+
171
+ ## Multi-GPU Training
172
+
173
+ ### DataParallel
174
+
175
+ ```python
176
+ import torch.nn as nn
177
+
178
+ model = Blip2ForConditionalGeneration.from_pretrained(
179
+ "Salesforce/blip2-opt-2.7b",
180
+ torch_dtype=torch.float16
181
+ )
182
+
183
+ # Wrap with DataParallel
184
+ if torch.cuda.device_count() > 1:
185
+ model = nn.DataParallel(model)
186
+
187
+ model.to("cuda")
188
+ ```
189
+
190
+ ### DistributedDataParallel
191
+
192
+ ```python
193
+ import torch.distributed as dist
194
+ from torch.nn.parallel import DistributedDataParallel as DDP
195
+ from torch.utils.data.distributed import DistributedSampler
196
+
197
+ def setup(rank, world_size):
198
+ dist.init_process_group("nccl", rank=rank, world_size=world_size)
199
+ torch.cuda.set_device(rank)
200
+
201
+ def train(rank, world_size):
202
+ setup(rank, world_size)
203
+
204
+ model = Blip2ForConditionalGeneration.from_pretrained(
205
+ "Salesforce/blip2-opt-2.7b",
206
+ torch_dtype=torch.float16
207
+ ).to(rank)
208
+
209
+ model = DDP(model, device_ids=[rank])
210
+
211
+ # Use DistributedSampler
212
+ sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank)
213
+ dataloader = DataLoader(dataset, sampler=sampler, batch_size=4)
214
+
215
+ # Training loop
216
+ for epoch in range(num_epochs):
217
+ sampler.set_epoch(epoch)
218
+ for batch in dataloader:
219
+ # ... training code
220
+ pass
221
+
222
+ dist.destroy_process_group()
223
+
224
+ # Launch
225
+ import torch.multiprocessing as mp
226
+ world_size = torch.cuda.device_count()
227
+ mp.spawn(train, args=(world_size,), nprocs=world_size)
228
+ ```
229
+
230
+ ### Accelerate integration
231
+
232
+ ```python
233
+ from accelerate import Accelerator
234
+ from transformers import Blip2ForConditionalGeneration, Blip2Processor
235
+
236
+ accelerator = Accelerator(mixed_precision="fp16")
237
+
238
+ model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
239
+ optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
240
+
241
+ # Prepare for distributed training
242
+ model, optimizer, dataloader = accelerator.prepare(
243
+ model, optimizer, dataloader
244
+ )
245
+
246
+ # Training loop
247
+ for batch in dataloader:
248
+ outputs = model(**batch)
249
+ loss = outputs.loss
250
+
251
+ accelerator.backward(loss)
252
+ optimizer.step()
253
+ optimizer.zero_grad()
254
+ ```
255
+
256
+ ## Integration Patterns
257
+
258
+ ### Gradio interface
259
+
260
+ ```python
261
+ import gradio as gr
262
+ import torch
263
+ from PIL import Image
264
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
265
+
266
+ # Load model
267
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
268
+ model = Blip2ForConditionalGeneration.from_pretrained(
269
+ "Salesforce/blip2-opt-2.7b",
270
+ torch_dtype=torch.float16,
271
+ device_map="auto"
272
+ )
273
+
274
+ def caption_image(image, question=None):
275
+ if question:
276
+ inputs = processor(images=image, text=question, return_tensors="pt")
277
+ else:
278
+ inputs = processor(images=image, return_tensors="pt")
279
+
280
+ inputs = inputs.to("cuda", torch.float16)
281
+
282
+ generated_ids = model.generate(**inputs, max_new_tokens=100)
283
+ return processor.decode(generated_ids[0], skip_special_tokens=True)
284
+
285
+ # Create interface
286
+ demo = gr.Interface(
287
+ fn=caption_image,
288
+ inputs=[
289
+ gr.Image(type="pil", label="Upload Image"),
290
+ gr.Textbox(label="Question (optional)", placeholder="What is in this image?")
291
+ ],
292
+ outputs=gr.Textbox(label="Response"),
293
+ title="BLIP-2 Demo",
294
+ examples=[
295
+ ["example1.jpg", None],
296
+ ["example2.jpg", "What colors are in this image?"]
297
+ ]
298
+ )
299
+
300
+ demo.launch()
301
+ ```
302
+
303
+ ### FastAPI server
304
+
305
+ ```python
306
+ from fastapi import FastAPI, UploadFile, File
307
+ from PIL import Image
308
+ import torch
309
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
310
+ import io
311
+
312
+ app = FastAPI()
313
+
314
+ # Load model at startup
315
+ processor = None
316
+ model = None
317
+
318
+ @app.on_event("startup")
319
+ async def load_model():
320
+ global processor, model
321
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
322
+ model = Blip2ForConditionalGeneration.from_pretrained(
323
+ "Salesforce/blip2-opt-2.7b",
324
+ torch_dtype=torch.float16,
325
+ device_map="auto"
326
+ )
327
+
328
+ @app.post("/caption")
329
+ async def caption(file: UploadFile = File(...), question: str = None):
330
+ # Read image
331
+ contents = await file.read()
332
+ image = Image.open(io.BytesIO(contents)).convert("RGB")
333
+
334
+ # Process
335
+ if question:
336
+ inputs = processor(images=image, text=question, return_tensors="pt")
337
+ else:
338
+ inputs = processor(images=image, return_tensors="pt")
339
+
340
+ inputs = inputs.to("cuda", torch.float16)
341
+
342
+ # Generate
343
+ generated_ids = model.generate(**inputs, max_new_tokens=100)
344
+ caption = processor.decode(generated_ids[0], skip_special_tokens=True)
345
+
346
+ return {"caption": caption}
347
+
348
+ @app.post("/batch_caption")
349
+ async def batch_caption(files: list[UploadFile] = File(...)):
350
+ images = []
351
+ for file in files:
352
+ contents = await file.read()
353
+ images.append(Image.open(io.BytesIO(contents)).convert("RGB"))
354
+
355
+ inputs = processor(images=images, return_tensors="pt", padding=True)
356
+ inputs = inputs.to("cuda", torch.float16)
357
+
358
+ generated_ids = model.generate(**inputs, max_new_tokens=100)
359
+ captions = processor.batch_decode(generated_ids, skip_special_tokens=True)
360
+
361
+ return {"captions": captions}
362
+
363
+ # Run: uvicorn server:app --host 0.0.0.0 --port 8000
364
+ ```
365
+
366
+ ### LangChain integration
367
+
368
+ ```python
369
+ from langchain.tools import BaseTool
370
+ from langchain.agents import initialize_agent, AgentType
371
+ from langchain.llms import OpenAI
372
+ import torch
373
+ from PIL import Image
374
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
375
+
376
+ class ImageCaptionTool(BaseTool):
377
+ name = "image_caption"
378
+ description = "Generate a caption for an image. Input should be an image file path."
379
+
380
+ def __init__(self):
381
+ super().__init__()
382
+ self.processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
383
+ self.model = Blip2ForConditionalGeneration.from_pretrained(
384
+ "Salesforce/blip2-opt-2.7b",
385
+ torch_dtype=torch.float16,
386
+ device_map="auto"
387
+ )
388
+
389
+ def _run(self, image_path: str) -> str:
390
+ image = Image.open(image_path).convert("RGB")
391
+ inputs = self.processor(images=image, return_tensors="pt").to("cuda", torch.float16)
392
+ generated_ids = self.model.generate(**inputs, max_new_tokens=50)
393
+ return self.processor.decode(generated_ids[0], skip_special_tokens=True)
394
+
395
+ class VisualQATool(BaseTool):
396
+ name = "visual_qa"
397
+ description = "Answer questions about an image. Input format: 'image_path|question'"
398
+
399
+ def __init__(self, processor, model):
400
+ super().__init__()
401
+ self.processor = processor
402
+ self.model = model
403
+
404
+ def _run(self, query: str) -> str:
405
+ image_path, question = query.split("|")
406
+ image = Image.open(image_path.strip()).convert("RGB")
407
+ inputs = self.processor(images=image, text=question.strip(), return_tensors="pt")
408
+ inputs = inputs.to("cuda", torch.float16)
409
+ generated_ids = self.model.generate(**inputs, max_new_tokens=50)
410
+ return self.processor.decode(generated_ids[0], skip_special_tokens=True)
411
+
412
+ # Use with agent
413
+ tools = [ImageCaptionTool(), VisualQATool(processor, model)]
414
+ agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
415
+ ```
416
+
417
+ ## ONNX Export and Deployment
418
+
419
+ ### Export to ONNX
420
+
421
+ ```python
422
+ import torch
423
+ from transformers import Blip2ForConditionalGeneration, Blip2Processor
424
+
425
+ model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
426
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
427
+
428
+ # Example inputs
429
+ image = Image.open("example.jpg").convert("RGB")
430
+ inputs = processor(images=image, return_tensors="pt")
431
+
432
+ # Export vision encoder
433
+ torch.onnx.export(
434
+ model.vision_model,
435
+ inputs["pixel_values"],
436
+ "blip2_vision.onnx",
437
+ input_names=["pixel_values"],
438
+ output_names=["image_embeds"],
439
+ dynamic_axes={
440
+ "pixel_values": {0: "batch_size"},
441
+ "image_embeds": {0: "batch_size"}
442
+ },
443
+ opset_version=14
444
+ )
445
+ ```
446
+
447
+ ### TensorRT optimization
448
+
449
+ ```python
450
+ import tensorrt as trt
451
+ import pycuda.driver as cuda
452
+
453
+ def build_engine(onnx_path, engine_path):
454
+ logger = trt.Logger(trt.Logger.WARNING)
455
+ builder = trt.Builder(logger)
456
+ network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
457
+ parser = trt.OnnxParser(network, logger)
458
+
459
+ with open(onnx_path, 'rb') as f:
460
+ parser.parse(f.read())
461
+
462
+ config = builder.create_builder_config()
463
+ config.set_flag(trt.BuilderFlag.FP16) # Enable FP16
464
+ config.max_workspace_size = 1 << 30 # 1GB
465
+
466
+ engine = builder.build_serialized_network(network, config)
467
+
468
+ with open(engine_path, 'wb') as f:
469
+ f.write(engine)
470
+
471
+ build_engine("blip2_vision.onnx", "blip2_vision.trt")
472
+ ```
473
+
474
+ ## Specialized Use Cases
475
+
476
+ ### Video captioning (frame-by-frame)
477
+
478
+ ```python
479
+ import cv2
480
+ import torch
481
+ from PIL import Image
482
+
483
+ def caption_video(video_path, sample_rate=30):
484
+ """Caption video by sampling frames."""
485
+ cap = cv2.VideoCapture(video_path)
486
+ fps = cap.get(cv2.CAP_PROP_FPS)
487
+ frame_interval = int(fps * sample_rate / 30) # Sample every N frames
488
+
489
+ captions = []
490
+ frame_count = 0
491
+
492
+ while cap.isOpened():
493
+ ret, frame = cap.read()
494
+ if not ret:
495
+ break
496
+
497
+ if frame_count % frame_interval == 0:
498
+ # Convert BGR to RGB
499
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
500
+ image = Image.fromarray(rgb_frame)
501
+
502
+ # Caption
503
+ inputs = processor(images=image, return_tensors="pt").to("cuda", torch.float16)
504
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
505
+ caption = processor.decode(generated_ids[0], skip_special_tokens=True)
506
+
507
+ timestamp = frame_count / fps
508
+ captions.append({"timestamp": timestamp, "caption": caption})
509
+
510
+ frame_count += 1
511
+
512
+ cap.release()
513
+ return captions
514
+
515
+ # Usage
516
+ captions = caption_video("video.mp4", sample_rate=1) # 1 frame per second
517
+ for c in captions:
518
+ print(f"[{c['timestamp']:.1f}s] {c['caption']}")
519
+ ```
520
+
521
+ ### Document understanding
522
+
523
+ ```python
524
+ def analyze_document(image_path):
525
+ """Extract information from document image."""
526
+ image = Image.open(image_path).convert("RGB")
527
+
528
+ questions = [
529
+ "What type of document is this?",
530
+ "What is the title of this document?",
531
+ "What are the main sections?",
532
+ "Summarize the key information."
533
+ ]
534
+
535
+ results = {}
536
+ for q in questions:
537
+ inputs = processor(images=image, text=q, return_tensors="pt").to("cuda", torch.float16)
538
+ generated_ids = model.generate(**inputs, max_new_tokens=100)
539
+ answer = processor.decode(generated_ids[0], skip_special_tokens=True)
540
+ results[q] = answer
541
+
542
+ return results
543
+
544
+ # Usage
545
+ doc_info = analyze_document("invoice.png")
546
+ for q, a in doc_info.items():
547
+ print(f"Q: {q}\nA: {a}\n")
548
+ ```
549
+
550
+ ### Medical image analysis
551
+
552
+ ```python
553
+ def analyze_medical_image(image_path, modality="xray"):
554
+ """Analyze medical images with specific prompts."""
555
+ image = Image.open(image_path).convert("RGB")
556
+
557
+ prompts = {
558
+ "xray": [
559
+ "Describe any abnormalities visible in this chest X-ray.",
560
+ "What anatomical structures are visible?",
561
+ "Is there any evidence of pathology?"
562
+ ],
563
+ "ct": [
564
+ "Describe the CT scan findings.",
565
+ "What organs are visible in this slice?",
566
+ "Are there any masses or lesions?"
567
+ ],
568
+ "mri": [
569
+ "Describe the MRI findings.",
570
+ "What tissues show abnormal signal intensity?",
571
+ "What is the most likely diagnosis?"
572
+ ]
573
+ }
574
+
575
+ results = []
576
+ for prompt in prompts.get(modality, prompts["xray"]):
577
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda", torch.float16)
578
+ generated_ids = model.generate(**inputs, max_new_tokens=150)
579
+ answer = processor.decode(generated_ids[0], skip_special_tokens=True)
580
+ results.append({"question": prompt, "answer": answer})
581
+
582
+ return results
583
+
584
+ # Note: BLIP-2 is not trained on medical data - use specialized models for clinical use
585
+ ```
586
+
587
+ ## Evaluation
588
+
589
+ ### Caption evaluation metrics
590
+
591
+ ```python
592
+ from pycocoevalcap.bleu.bleu import Bleu
593
+ from pycocoevalcap.meteor.meteor import Meteor
594
+ from pycocoevalcap.rouge.rouge import Rouge
595
+ from pycocoevalcap.cider.cider import Cider
596
+
597
+ def evaluate_captions(predictions, references):
598
+ """
599
+ Evaluate generated captions against references.
600
+
601
+ Args:
602
+ predictions: dict {image_id: [caption]}
603
+ references: dict {image_id: [ref1, ref2, ...]}
604
+ """
605
+ scorers = [
606
+ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
607
+ (Meteor(), "METEOR"),
608
+ (Rouge(), "ROUGE_L"),
609
+ (Cider(), "CIDEr"),
610
+ ]
611
+
612
+ results = {}
613
+ for scorer, method in scorers:
614
+ score, _ = scorer.compute_score(references, predictions)
615
+ if isinstance(method, list):
616
+ for sc, m in zip(score, method):
617
+ results[m] = sc
618
+ else:
619
+ results[method] = score
620
+
621
+ return results
622
+
623
+ # Usage
624
+ preds = {0: ["a cat sitting on a mat"], 1: ["a dog running in the park"]}
625
+ refs = {0: ["a cat on a mat", "cat sitting"], 1: ["dog in park", "running dog"]}
626
+ scores = evaluate_captions(preds, refs)
627
+ print(scores)
628
+ ```
629
+
630
+ ### VQA evaluation
631
+
632
+ ```python
633
+ def vqa_accuracy(predictions, ground_truths):
634
+ """
635
+ VQA accuracy metric (soft accuracy from VQA challenge).
636
+
637
+ Args:
638
+ predictions: list of predicted answers
639
+ ground_truths: list of lists (multiple annotator answers)
640
+ """
641
+ def compute_accuracy(pred, gts):
642
+ pred = pred.lower().strip()
643
+ gts = [gt.lower().strip() for gt in gts]
644
+
645
+ # Count matches
646
+ matches = sum(1 for gt in gts if pred == gt)
647
+ return min(matches / 3, 1.0) # Cap at 1.0
648
+
649
+ accuracies = []
650
+ for pred, gts in zip(predictions, ground_truths):
651
+ accuracies.append(compute_accuracy(pred, gts))
652
+
653
+ return sum(accuracies) / len(accuracies)
654
+
655
+ # Usage
656
+ preds = ["yes", "a dog", "blue"]
657
+ gts = [["yes", "yes", "no"], ["dog", "a dog", "puppy"], ["blue", "light blue", "azure"]]
658
+ acc = vqa_accuracy(preds, gts)
659
+ print(f"VQA Accuracy: {acc:.2%}")
660
+ ```
661
+
662
+ ## Model Comparison
663
+
664
+ ### BLIP-2 variants benchmark
665
+
666
+ | Model | COCO Caption (CIDEr) | VQAv2 (Acc) | GQA (Acc) | VRAM |
667
+ |-------|---------------------|-------------|-----------|------|
668
+ | blip2-opt-2.7b | 129.7 | 52.6 | 41.3 | 8GB |
669
+ | blip2-opt-6.7b | 133.4 | 54.2 | 42.8 | 16GB |
670
+ | blip2-flan-t5-xl | 138.1 | 62.9 | 44.1 | 10GB |
671
+ | blip2-flan-t5-xxl | 145.8 | 65.0 | 45.9 | 26GB |
672
+
673
+ ### Comparison with other models
674
+
675
+ | Model | Architecture | Zero-shot VQA | Training Cost |
676
+ |-------|-------------|---------------|---------------|
677
+ | BLIP-2 | Q-Former + LLM | Excellent | Low (Q-Former only) |
678
+ | LLaVA | Linear + LLM | Good | Medium |
679
+ | Flamingo | Perceiver + LLM | Excellent | High |
680
+ | InstructBLIP | Q-Former + LLM | Best | Low |