@trieungoctam/vibekit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +85 -0
  3. package/agents/debugger.md +158 -0
  4. package/agents/docs-manager.md +220 -0
  5. package/agents/planner.md +129 -0
  6. package/agents/researcher.md +58 -0
  7. package/agents/reviewer.md +152 -0
  8. package/agents/tester.md +126 -0
  9. package/bin/vibekit.js +18 -0
  10. package/hooks/lib/ck-config-utils.cjs +831 -0
  11. package/hooks/lib/colors.cjs +95 -0
  12. package/hooks/lib/config-counter.cjs +103 -0
  13. package/hooks/lib/context-builder.cjs +616 -0
  14. package/hooks/lib/git-info-cache.cjs +143 -0
  15. package/hooks/lib/hook-logger.cjs +92 -0
  16. package/hooks/lib/privacy-checker.cjs +297 -0
  17. package/hooks/lib/project-detector.cjs +474 -0
  18. package/hooks/lib/scout-checker.cjs +263 -0
  19. package/hooks/lib/transcript-parser.cjs +181 -0
  20. package/hooks/post-edit-simplify-reminder.cjs +156 -0
  21. package/hooks/privacy-block.cjs +166 -0
  22. package/hooks/scout-block.cjs +147 -0
  23. package/hooks/session-init.cjs +360 -0
  24. package/package.json +41 -0
  25. package/rules/development-rules.md +52 -0
  26. package/rules/documentation-management.md +121 -0
  27. package/rules/orchestration-protocol.md +43 -0
  28. package/rules/primary-workflow.md +57 -0
  29. package/rules/team-coordination-rules.md +90 -0
  30. package/skills/ai/agent-browser/SKILL.md +294 -0
  31. package/skills/ai/agent-browser/references/.gitkeep +0 -0
  32. package/skills/ai/agent-browser/references/agent-browser-vs-chrome-devtools.md +112 -0
  33. package/skills/ai/agent-browser/references/browserbase-cloud-setup.md +161 -0
  34. package/skills/ai/ai-artist/SKILL.md +122 -0
  35. package/skills/ai/ai-artist/data/awesome-prompts.csv +3592 -0
  36. package/skills/ai/ai-artist/data/lighting.csv +19 -0
  37. package/skills/ai/ai-artist/data/nano-banana-templates.csv +17 -0
  38. package/skills/ai/ai-artist/data/platforms.csv +11 -0
  39. package/skills/ai/ai-artist/data/styles.csv +26 -0
  40. package/skills/ai/ai-artist/data/techniques.csv +19 -0
  41. package/skills/ai/ai-artist/data/use-cases.csv +16 -0
  42. package/skills/ai/ai-artist/references/advanced-techniques.md +184 -0
  43. package/skills/ai/ai-artist/references/awesome-nano-banana-pro-prompts.md +8575 -0
  44. package/skills/ai/ai-artist/references/domain-code.md +66 -0
  45. package/skills/ai/ai-artist/references/domain-data.md +72 -0
  46. package/skills/ai/ai-artist/references/domain-marketing.md +66 -0
  47. package/skills/ai/ai-artist/references/domain-patterns.md +33 -0
  48. package/skills/ai/ai-artist/references/domain-writing.md +68 -0
  49. package/skills/ai/ai-artist/references/image-prompting.md +141 -0
  50. package/skills/ai/ai-artist/references/llm-prompting.md +165 -0
  51. package/skills/ai/ai-artist/references/nano-banana.md +136 -0
  52. package/skills/ai/ai-artist/references/reasoning-techniques.md +201 -0
  53. package/skills/ai/ai-artist/references/validation-workflow.md +117 -0
  54. package/skills/ai/ai-artist/scripts/core.py +197 -0
  55. package/skills/ai/ai-artist/scripts/extract_prompts.py +102 -0
  56. package/skills/ai/ai-artist/scripts/generate.py +370 -0
  57. package/skills/ai/ai-artist/scripts/search.py +147 -0
  58. package/skills/ai/ai-multimodal/.env.example +204 -0
  59. package/skills/ai/ai-multimodal/SKILL.md +110 -0
  60. package/skills/ai/ai-multimodal/references/audio-processing.md +387 -0
  61. package/skills/ai/ai-multimodal/references/image-generation.md +939 -0
  62. package/skills/ai/ai-multimodal/references/music-generation.md +311 -0
  63. package/skills/ai/ai-multimodal/references/video-analysis.md +515 -0
  64. package/skills/ai/ai-multimodal/references/video-generation.md +457 -0
  65. package/skills/ai/ai-multimodal/references/vision-understanding.md +492 -0
  66. package/skills/ai/ai-multimodal/scripts/.coverage +0 -0
  67. package/skills/ai/ai-multimodal/scripts/check_setup.py +315 -0
  68. package/skills/ai/ai-multimodal/scripts/document_converter.py +395 -0
  69. package/skills/ai/ai-multimodal/scripts/gemini_batch_process.py +1185 -0
  70. package/skills/ai/ai-multimodal/scripts/media_optimizer.py +506 -0
  71. package/skills/ai/ai-multimodal/scripts/requirements.txt +26 -0
  72. package/skills/ai/ai-multimodal/scripts/tests/.coverage +0 -0
  73. package/skills/ai/ai-multimodal/scripts/tests/requirements.txt +20 -0
  74. package/skills/ai/ai-multimodal/scripts/tests/test_document_converter.py +74 -0
  75. package/skills/ai/ai-multimodal/scripts/tests/test_gemini_batch_process.py +362 -0
  76. package/skills/ai/ai-multimodal/scripts/tests/test_media_optimizer.py +373 -0
  77. package/skills/ai/mcp-management/README.md +219 -0
  78. package/skills/ai/mcp-management/SKILL.md +210 -0
  79. package/skills/ai/mcp-management/assets/tools.json +3146 -0
  80. package/skills/ai/mcp-management/references/configuration.md +114 -0
  81. package/skills/ai/mcp-management/references/gemini-cli-integration.md +221 -0
  82. package/skills/ai/mcp-management/references/mcp-protocol.md +116 -0
  83. package/skills/ai/mcp-management/scripts/.env.example +10 -0
  84. package/skills/ai/mcp-management/scripts/cli.ts +195 -0
  85. package/skills/ai/mcp-management/scripts/dist/analyze-tools.js +70 -0
  86. package/skills/ai/mcp-management/scripts/dist/cli.js +160 -0
  87. package/skills/ai/mcp-management/scripts/dist/mcp-client.js +183 -0
  88. package/skills/ai/mcp-management/scripts/mcp-client.ts +230 -0
  89. package/skills/ai/mcp-management/scripts/package.json +20 -0
  90. package/skills/ai/mcp-management/scripts/tsconfig.json +15 -0
  91. package/skills/core/brainstorm/SKILL.md +164 -0
  92. package/skills/core/brainstorm/scripts/frame-template.html +214 -0
  93. package/skills/core/brainstorm/scripts/helper.js +88 -0
  94. package/skills/core/brainstorm/scripts/server.cjs +338 -0
  95. package/skills/core/brainstorm/scripts/start-server.sh +153 -0
  96. package/skills/core/brainstorm/scripts/stop-server.sh +55 -0
  97. package/skills/core/brainstorm/spec-document-reviewer-prompt.md +49 -0
  98. package/skills/core/brainstorm/visual-companion.md +286 -0
  99. package/skills/core/code-review/SKILL.md +147 -0
  100. package/skills/core/code-review/references/code-review-reception.md +113 -0
  101. package/skills/core/code-review/references/codebase-scan-workflow.md +29 -0
  102. package/skills/core/code-review/references/edge-case-scouting.md +119 -0
  103. package/skills/core/code-review/references/parallel-review-workflow.md +69 -0
  104. package/skills/core/code-review/references/requesting-code-review.md +116 -0
  105. package/skills/core/code-review/references/task-management-reviews.md +140 -0
  106. package/skills/core/code-review/references/verification-before-completion.md +139 -0
  107. package/skills/core/cook/README.md +86 -0
  108. package/skills/core/cook/SKILL.md +113 -0
  109. package/skills/core/cook/references/intent-detection.md +101 -0
  110. package/skills/core/cook/references/review-cycle.md +75 -0
  111. package/skills/core/cook/references/subagent-patterns.md +75 -0
  112. package/skills/core/cook/references/workflow-steps.md +172 -0
  113. package/skills/core/debug/SKILL.md +121 -0
  114. package/skills/core/debug/references/defense-in-depth.md +124 -0
  115. package/skills/core/debug/references/frontend-verification.md +103 -0
  116. package/skills/core/debug/references/investigation-methodology.md +101 -0
  117. package/skills/core/debug/references/log-and-ci-analysis.md +97 -0
  118. package/skills/core/debug/references/performance-diagnostics.md +113 -0
  119. package/skills/core/debug/references/reporting-standards.md +122 -0
  120. package/skills/core/debug/references/root-cause-tracing.md +122 -0
  121. package/skills/core/debug/references/systematic-debugging.md +102 -0
  122. package/skills/core/debug/references/task-management-debugging.md +155 -0
  123. package/skills/core/debug/references/verification.md +123 -0
  124. package/skills/core/debug/scripts/find-polluter.sh +63 -0
  125. package/skills/core/debug/scripts/find-polluter.test.md +102 -0
  126. package/skills/core/execute/SKILL.md +70 -0
  127. package/skills/core/fix/SKILL.md +111 -0
  128. package/skills/core/fix/references/complexity-assessment.md +72 -0
  129. package/skills/core/fix/references/mode-selection.md +46 -0
  130. package/skills/core/fix/references/parallel-exploration.md +100 -0
  131. package/skills/core/fix/references/review-cycle.md +77 -0
  132. package/skills/core/fix/references/skill-activation-matrix.md +78 -0
  133. package/skills/core/fix/references/task-orchestration.md +103 -0
  134. package/skills/core/fix/references/workflow-ci.md +28 -0
  135. package/skills/core/fix/references/workflow-deep.md +122 -0
  136. package/skills/core/fix/references/workflow-logs.md +72 -0
  137. package/skills/core/fix/references/workflow-quick.md +59 -0
  138. package/skills/core/fix/references/workflow-standard.md +111 -0
  139. package/skills/core/fix/references/workflow-test.md +75 -0
  140. package/skills/core/fix/references/workflow-types.md +33 -0
  141. package/skills/core/fix/references/workflow-ui.md +75 -0
  142. package/skills/core/plan/SKILL.md +145 -0
  143. package/skills/core/plan/plan-document-reviewer-prompt.md +49 -0
  144. package/skills/core/subagent-dev/SKILL.md +277 -0
  145. package/skills/core/subagent-dev/code-quality-reviewer-prompt.md +26 -0
  146. package/skills/core/subagent-dev/implementer-prompt.md +113 -0
  147. package/skills/core/subagent-dev/spec-reviewer-prompt.md +61 -0
  148. package/skills/core/tdd/SKILL.md +371 -0
  149. package/skills/core/tdd/testing-anti-patterns.md +299 -0
  150. package/skills/core/test/SKILL.md +109 -0
  151. package/skills/core/test/references/report-format.md +58 -0
  152. package/skills/core/test/references/test-execution-workflow.md +103 -0
  153. package/skills/core/test/references/ui-testing-workflow.md +65 -0
  154. package/skills/core/verify/SKILL.md +139 -0
  155. package/skills/dev/backend-dev/SKILL.md +96 -0
  156. package/skills/dev/backend-dev/references/backend-api-design.md +495 -0
  157. package/skills/dev/backend-dev/references/backend-architecture.md +454 -0
  158. package/skills/dev/backend-dev/references/backend-authentication.md +338 -0
  159. package/skills/dev/backend-dev/references/backend-code-quality.md +659 -0
  160. package/skills/dev/backend-dev/references/backend-debugging.md +904 -0
  161. package/skills/dev/backend-dev/references/backend-devops.md +494 -0
  162. package/skills/dev/backend-dev/references/backend-mindset.md +387 -0
  163. package/skills/dev/backend-dev/references/backend-performance.md +397 -0
  164. package/skills/dev/backend-dev/references/backend-security.md +290 -0
  165. package/skills/dev/backend-dev/references/backend-technologies.md +256 -0
  166. package/skills/dev/backend-dev/references/backend-testing.md +429 -0
  167. package/skills/dev/context-engineering/SKILL.md +108 -0
  168. package/skills/dev/context-engineering/references/context-compression.md +84 -0
  169. package/skills/dev/context-engineering/references/context-degradation.md +93 -0
  170. package/skills/dev/context-engineering/references/context-fundamentals.md +75 -0
  171. package/skills/dev/context-engineering/references/context-optimization.md +82 -0
  172. package/skills/dev/context-engineering/references/evaluation.md +89 -0
  173. package/skills/dev/context-engineering/references/memory-systems.md +88 -0
  174. package/skills/dev/context-engineering/references/multi-agent-patterns.md +90 -0
  175. package/skills/dev/context-engineering/references/project-development.md +97 -0
  176. package/skills/dev/context-engineering/references/runtime-awareness.md +202 -0
  177. package/skills/dev/context-engineering/references/tool-design.md +86 -0
  178. package/skills/dev/context-engineering/scripts/compression_evaluator.py +349 -0
  179. package/skills/dev/context-engineering/scripts/context_analyzer.py +317 -0
  180. package/skills/dev/context-engineering/scripts/tests/test_edge_cases.py +246 -0
  181. package/skills/dev/databases/SKILL.md +84 -0
  182. package/skills/dev/databases/analytics.md +198 -0
  183. package/skills/dev/databases/db-design.md +188 -0
  184. package/skills/dev/databases/incremental-etl.md +213 -0
  185. package/skills/dev/databases/references/mongodb-aggregation.md +447 -0
  186. package/skills/dev/databases/references/mongodb-atlas.md +465 -0
  187. package/skills/dev/databases/references/mongodb-crud.md +408 -0
  188. package/skills/dev/databases/references/mongodb-indexing.md +442 -0
  189. package/skills/dev/databases/references/postgresql-administration.md +594 -0
  190. package/skills/dev/databases/references/postgresql-performance.md +527 -0
  191. package/skills/dev/databases/references/postgresql-psql-cli.md +467 -0
  192. package/skills/dev/databases/references/postgresql-queries.md +475 -0
  193. package/skills/dev/databases/scripts/.coverage +0 -0
  194. package/skills/dev/databases/scripts/db_backup.py +502 -0
  195. package/skills/dev/databases/scripts/db_migrate.py +426 -0
  196. package/skills/dev/databases/scripts/db_performance_check.py +457 -0
  197. package/skills/dev/databases/scripts/requirements.txt +20 -0
  198. package/skills/dev/databases/scripts/tests/coverage-db.json +1 -0
  199. package/skills/dev/databases/scripts/tests/requirements.txt +4 -0
  200. package/skills/dev/databases/scripts/tests/test_db_backup.py +340 -0
  201. package/skills/dev/databases/scripts/tests/test_db_migrate.py +277 -0
  202. package/skills/dev/databases/scripts/tests/test_db_performance_check.py +370 -0
  203. package/skills/dev/databases/stacks/bigquery.md +231 -0
  204. package/skills/dev/databases/stacks/d1_cloudflare.md +137 -0
  205. package/skills/dev/databases/stacks/mysql.md +216 -0
  206. package/skills/dev/databases/stacks/postgres.md +235 -0
  207. package/skills/dev/databases/stacks/sqlite.md +244 -0
  208. package/skills/dev/databases/transactional.md +176 -0
  209. package/skills/dev/devops/.env.example +76 -0
  210. package/skills/dev/devops/SKILL.md +97 -0
  211. package/skills/dev/devops/references/browser-rendering.md +305 -0
  212. package/skills/dev/devops/references/cloudflare-d1-kv.md +123 -0
  213. package/skills/dev/devops/references/cloudflare-platform.md +271 -0
  214. package/skills/dev/devops/references/cloudflare-r2-storage.md +280 -0
  215. package/skills/dev/devops/references/cloudflare-workers-advanced.md +312 -0
  216. package/skills/dev/devops/references/cloudflare-workers-apis.md +309 -0
  217. package/skills/dev/devops/references/cloudflare-workers-basics.md +418 -0
  218. package/skills/dev/devops/references/docker-basics.md +297 -0
  219. package/skills/dev/devops/references/docker-compose.md +292 -0
  220. package/skills/dev/devops/references/gcloud-platform.md +297 -0
  221. package/skills/dev/devops/references/gcloud-services.md +304 -0
  222. package/skills/dev/devops/references/kubernetes-basics.md +99 -0
  223. package/skills/dev/devops/references/kubernetes-helm-advanced.md +75 -0
  224. package/skills/dev/devops/references/kubernetes-helm.md +81 -0
  225. package/skills/dev/devops/references/kubernetes-kubectl.md +74 -0
  226. package/skills/dev/devops/references/kubernetes-security-advanced.md +98 -0
  227. package/skills/dev/devops/references/kubernetes-security.md +95 -0
  228. package/skills/dev/devops/references/kubernetes-troubleshooting-advanced.md +74 -0
  229. package/skills/dev/devops/references/kubernetes-troubleshooting.md +49 -0
  230. package/skills/dev/devops/references/kubernetes-workflows-advanced.md +75 -0
  231. package/skills/dev/devops/references/kubernetes-workflows.md +78 -0
  232. package/skills/dev/devops/scripts/cloudflare_deploy.py +269 -0
  233. package/skills/dev/devops/scripts/docker_optimize.py +332 -0
  234. package/skills/dev/devops/scripts/requirements.txt +20 -0
  235. package/skills/dev/devops/scripts/tests/requirements.txt +3 -0
  236. package/skills/dev/devops/scripts/tests/test_cloudflare_deploy.py +285 -0
  237. package/skills/dev/devops/scripts/tests/test_docker_optimize.py +436 -0
  238. package/skills/dev/frontend-design/SKILL.md +78 -0
  239. package/skills/dev/frontend-design/references/ai-multimodal-overview.md +165 -0
  240. package/skills/dev/frontend-design/references/analysis-best-practices.md +80 -0
  241. package/skills/dev/frontend-design/references/analysis-prompts.md +141 -0
  242. package/skills/dev/frontend-design/references/analysis-techniques.md +118 -0
  243. package/skills/dev/frontend-design/references/animejs.md +396 -0
  244. package/skills/dev/frontend-design/references/asset-generation.md +337 -0
  245. package/skills/dev/frontend-design/references/design-extraction-overview.md +71 -0
  246. package/skills/dev/frontend-design/references/extraction-best-practices.md +141 -0
  247. package/skills/dev/frontend-design/references/extraction-output-templates.md +162 -0
  248. package/skills/dev/frontend-design/references/extraction-prompts.md +127 -0
  249. package/skills/dev/frontend-design/references/technical-accessibility.md +119 -0
  250. package/skills/dev/frontend-design/references/technical-best-practices.md +97 -0
  251. package/skills/dev/frontend-design/references/technical-optimization.md +44 -0
  252. package/skills/dev/frontend-design/references/technical-overview.md +90 -0
  253. package/skills/dev/frontend-design/references/technical-workflows.md +150 -0
  254. package/skills/dev/frontend-design/references/visual-analysis-overview.md +95 -0
  255. package/skills/dev/frontend-design/references/workflow-3d.md +102 -0
  256. package/skills/dev/frontend-design/references/workflow-describe.md +87 -0
  257. package/skills/dev/frontend-design/references/workflow-immersive.md +87 -0
  258. package/skills/dev/frontend-design/references/workflow-quick.md +57 -0
  259. package/skills/dev/frontend-design/references/workflow-screenshot.md +63 -0
  260. package/skills/dev/frontend-design/references/workflow-video.md +74 -0
  261. package/skills/dev/frontend-dev/SKILL.md +400 -0
  262. package/skills/dev/frontend-dev/resources/common-patterns.md +331 -0
  263. package/skills/dev/frontend-dev/resources/complete-examples.md +872 -0
  264. package/skills/dev/frontend-dev/resources/component-patterns.md +502 -0
  265. package/skills/dev/frontend-dev/resources/data-fetching.md +767 -0
  266. package/skills/dev/frontend-dev/resources/file-organization.md +502 -0
  267. package/skills/dev/frontend-dev/resources/loading-and-error-states.md +501 -0
  268. package/skills/dev/frontend-dev/resources/performance.md +406 -0
  269. package/skills/dev/frontend-dev/resources/routing-guide.md +364 -0
  270. package/skills/dev/frontend-dev/resources/styling-guide.md +428 -0
  271. package/skills/dev/frontend-dev/resources/typescript-standards.md +418 -0
  272. package/skills/dev/git/SKILL.md +114 -0
  273. package/skills/dev/git/references/branch-management.md +88 -0
  274. package/skills/dev/git/references/commit-standards.md +46 -0
  275. package/skills/dev/git/references/gh-cli-guide.md +109 -0
  276. package/skills/dev/git/references/safety-protocols.md +69 -0
  277. package/skills/dev/git/references/workflow-commit.md +58 -0
  278. package/skills/dev/git/references/workflow-merge.md +48 -0
  279. package/skills/dev/git/references/workflow-pr.md +58 -0
  280. package/skills/dev/git/references/workflow-push.md +52 -0
  281. package/skills/dev/git-worktree/SKILL.md +218 -0
  282. package/skills/utils/ask/SKILL.md +58 -0
  283. package/skills/utils/bootstrap/SKILL.md +101 -0
  284. package/skills/utils/bootstrap/references/shared-phases.md +59 -0
  285. package/skills/utils/bootstrap/references/workflow-auto.md +52 -0
  286. package/skills/utils/bootstrap/references/workflow-fast.md +50 -0
  287. package/skills/utils/bootstrap/references/workflow-full.md +60 -0
  288. package/skills/utils/bootstrap/references/workflow-parallel.md +59 -0
  289. package/skills/utils/ck-help/SKILL.md +102 -0
  290. package/skills/utils/ck-help/scripts/ck-help.py +1321 -0
  291. package/skills/utils/ck-help/scripts/commands_data.yaml +3 -0
  292. package/skills/utils/ck-help/scripts/skills_data.yaml +593 -0
  293. package/skills/utils/copywriting/SKILL.md +94 -0
  294. package/skills/utils/copywriting/references/copy-formulas.md +150 -0
  295. package/skills/utils/copywriting/references/cta-patterns.md +168 -0
  296. package/skills/utils/copywriting/references/email-copy.md +193 -0
  297. package/skills/utils/copywriting/references/headline-templates.md +140 -0
  298. package/skills/utils/copywriting/references/landing-page-copy.md +175 -0
  299. package/skills/utils/copywriting/references/power-words.md +189 -0
  300. package/skills/utils/copywriting/references/social-media-copy.md +222 -0
  301. package/skills/utils/copywriting/references/workflow-cro.md +83 -0
  302. package/skills/utils/copywriting/references/workflow-enhance.md +32 -0
  303. package/skills/utils/copywriting/references/workflow-fast.md +29 -0
  304. package/skills/utils/copywriting/references/workflow-good.md +39 -0
  305. package/skills/utils/copywriting/references/writing-styles.md +247 -0
  306. package/skills/utils/copywriting/scripts/extract-writing-styles.py +308 -0
  307. package/skills/utils/copywriting/templates/copy-brief.md +49 -0
  308. package/skills/utils/docs/SKILL.md +55 -0
  309. package/skills/utils/docs/references/init-workflow.md +32 -0
  310. package/skills/utils/docs/references/summarize-workflow.md +18 -0
  311. package/skills/utils/docs/references/update-workflow.md +59 -0
  312. package/skills/utils/journal/SKILL.md +11 -0
  313. package/skills/utils/kanban/SKILL.md +99 -0
  314. package/skills/utils/preview/SKILL.md +75 -0
  315. package/skills/utils/preview/references/generation-modes.md +95 -0
  316. package/skills/utils/preview/references/view-mode.md +42 -0
  317. package/skills/utils/repomix/SKILL.md +248 -0
  318. package/skills/utils/repomix/references/configuration.md +211 -0
  319. package/skills/utils/repomix/references/usage-patterns.md +232 -0
  320. package/skills/utils/repomix/scripts/.coverage +0 -0
  321. package/skills/utils/repomix/scripts/README.md +179 -0
  322. package/skills/utils/repomix/scripts/repomix_batch.py +455 -0
  323. package/skills/utils/repomix/scripts/repos.example.json +15 -0
  324. package/skills/utils/repomix/scripts/requirements.txt +15 -0
  325. package/skills/utils/repomix/scripts/tests/test_repomix_batch.py +531 -0
  326. package/skills/utils/research/SKILL.md +171 -0
  327. package/skills/utils/scout/SKILL.md +89 -0
  328. package/skills/utils/scout/references/external-scouting.md +140 -0
  329. package/skills/utils/scout/references/internal-scouting.md +119 -0
  330. package/skills/utils/scout/references/task-management-scouting.md +125 -0
  331. package/skills/utils/sequential-thinking/.env.example +8 -0
  332. package/skills/utils/sequential-thinking/README.md +183 -0
  333. package/skills/utils/sequential-thinking/SKILL.md +95 -0
  334. package/skills/utils/sequential-thinking/package.json +31 -0
  335. package/skills/utils/sequential-thinking/references/advanced-strategies.md +79 -0
  336. package/skills/utils/sequential-thinking/references/advanced-techniques.md +76 -0
  337. package/skills/utils/sequential-thinking/references/core-patterns.md +95 -0
  338. package/skills/utils/sequential-thinking/references/examples-api.md +88 -0
  339. package/skills/utils/sequential-thinking/references/examples-architecture.md +94 -0
  340. package/skills/utils/sequential-thinking/references/examples-debug.md +90 -0
  341. package/skills/utils/sequential-thinking/scripts/format-thought.js +159 -0
  342. package/skills/utils/sequential-thinking/scripts/process-thought.js +236 -0
  343. package/skills/utils/sequential-thinking/tests/format-thought.test.js +133 -0
  344. package/skills/utils/sequential-thinking/tests/process-thought.test.js +215 -0
  345. package/skills/utils/write-skill/SKILL.md +655 -0
  346. package/skills/utils/write-skill/anthropic-best-practices.md +1150 -0
  347. package/skills/utils/write-skill/examples/CLAUDE_MD_TESTING.md +189 -0
  348. package/skills/utils/write-skill/graphviz-conventions.dot +172 -0
  349. package/skills/utils/write-skill/persuasion-principles.md +187 -0
  350. package/skills/utils/write-skill/render-graphs.js +168 -0
  351. package/skills/utils/write-skill/testing-skills-with-subagents.md +384 -0
  352. package/src/commands/init.js +238 -0
@@ -0,0 +1,1185 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Batch process multiple media files using Gemini API.
4
+
5
+ Supports all Gemini modalities:
6
+ - Audio: Transcription, analysis, summarization
7
+ - Image: Captioning, detection, OCR, analysis
8
+ - Video: Summarization, Q&A, scene detection
9
+ - Document: PDF extraction, structured output
10
+ - Generation: Image creation via Imagen 4 or Nano Banana (Gemini native)
11
+ - Nano Banana Flash (gemini-2.5-flash-image): Speed/volume
12
+ - Nano Banana Pro (gemini-3-pro-image-preview): Quality/4K text/reasoning
13
+ - Imagen 4 (imagen-4.0-*): Production-grade generation
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import sys
20
+ import time
21
+ from pathlib import Path
22
+ from typing import List, Dict, Any, Optional
23
+ import csv
24
+ import shutil
25
+
26
+ # Import centralized environment resolver (works for both local and global installs)
27
+ CLAUDE_ROOT = Path(__file__).parent.parent.parent.parent
28
+ sys.path.insert(0, str(CLAUDE_ROOT / 'scripts'))
29
+ try:
30
+ from resolve_env import resolve_env
31
+ CENTRALIZED_RESOLVER_AVAILABLE = True
32
+ except ImportError:
33
+ # Fallback if centralized resolver not available
34
+ CENTRALIZED_RESOLVER_AVAILABLE = False
35
+ try:
36
+ from dotenv import load_dotenv
37
+ except ImportError:
38
+ load_dotenv = None
39
+
40
+ # Import key rotation support
41
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'common'))
42
+ try:
43
+ from api_key_rotator import KeyRotator, is_rate_limit_error
44
+ from api_key_helper import find_all_api_keys
45
+ KEY_ROTATION_AVAILABLE = True
46
+ except ImportError:
47
+ KEY_ROTATION_AVAILABLE = False
48
+ KeyRotator = None
49
+ is_rate_limit_error = None
50
+ find_all_api_keys = None
51
+
52
+ try:
53
+ from google import genai
54
+ from google.genai import types
55
+ except ImportError:
56
+ print("Error: google-genai package not installed")
57
+ print("Install with: pip install google-genai")
58
+ sys.exit(1)
59
+
60
+
61
+ # Image generation model configuration
62
+ # Default: gemini-2.5-flash-image (Nano Banana Flash - fast, cost-effective)
63
+ # Alternative: imagen-4.0-generate-001 (production quality)
64
+ # All image generation requires billing - no completely free option exists
65
+ IMAGE_MODEL_DEFAULT = 'gemini-2.5-flash-image' # Nano Banana Flash (~$1/1M tokens)
66
+ IMAGE_MODEL_FALLBACK = 'gemini-2.5-flash-image' # Fallback if Imagen fails (billing)
67
+ IMAGEN_MODELS = {
68
+ 'imagen-4.0-generate-001',
69
+ 'imagen-4.0-ultra-generate-001',
70
+ 'imagen-4.0-fast-generate-001',
71
+ }
72
+ # Video models have no fallback - Veo always requires billing
73
+
74
+
75
+ def find_api_key() -> Optional[str]:
76
+ """Find Gemini API key using centralized resolver or fallback.
77
+
78
+ Uses ~/.claude/scripts/resolve_env.py for consistent resolution across all skills.
79
+ Falls back to local resolution if centralized resolver not available.
80
+
81
+ Priority order (highest to lowest):
82
+ 1. process.env (runtime environment variables)
83
+ 2. PROJECT/.claude/skills/ai-multimodal/.env (skill-specific)
84
+ 3. PROJECT/.claude/skills/.env (shared skills)
85
+ 4. PROJECT/.claude/.env (project global)
86
+ 5. ~/.claude/skills/ai-multimodal/.env (user skill-specific)
87
+ 6. ~/.claude/skills/.env (user shared)
88
+ 7. ~/.claude/.env (user global)
89
+ """
90
+ if CENTRALIZED_RESOLVER_AVAILABLE:
91
+ # Use centralized resolver (recommended)
92
+ return resolve_env('GEMINI_API_KEY', skill='ai-multimodal')
93
+
94
+ # Fallback: Local resolution (legacy)
95
+ api_key = os.getenv('GEMINI_API_KEY')
96
+ if api_key:
97
+ return api_key
98
+
99
+ if load_dotenv:
100
+ script_dir = Path(__file__).parent
101
+ skill_dir = script_dir.parent
102
+ skills_dir = skill_dir.parent
103
+ claude_dir = skills_dir.parent
104
+
105
+ env_files = [
106
+ claude_dir / '.env',
107
+ skills_dir / '.env',
108
+ skill_dir / '.env',
109
+ ]
110
+
111
+ for env_file in env_files:
112
+ if env_file.exists():
113
+ load_dotenv(env_file, override=True)
114
+
115
+ api_key = os.getenv('GEMINI_API_KEY')
116
+ if api_key:
117
+ return api_key
118
+
119
+ return None
120
+
121
+
122
+ def get_default_model(task: str) -> str:
123
+ """Get default model for task from environment or fallback.
124
+
125
+ Priority:
126
+ 1. Environment variable for specific capability
127
+ 2. Legacy GEMINI_MODEL variable
128
+ 3. Hard-coded defaults
129
+ """
130
+ if task == 'generate': # Image generation
131
+ model = os.getenv('IMAGE_GEN_MODEL')
132
+ if model:
133
+ return model
134
+ # Fallback to legacy
135
+ model = os.getenv('GEMINI_IMAGE_GEN_MODEL')
136
+ if model:
137
+ return model
138
+ # Default to Nano Banana Flash (fast, cost-effective)
139
+ # Alternative: imagen-4.0-generate-001 for production quality
140
+ return 'gemini-2.5-flash-image'
141
+
142
+ elif task == 'generate-video':
143
+ model = os.getenv('VIDEO_GEN_MODEL')
144
+ if model:
145
+ return model
146
+ return 'veo-3.1-generate-preview' # New default
147
+
148
+ elif task in ['analyze', 'transcribe', 'extract']:
149
+ model = os.getenv('MULTIMODAL_MODEL')
150
+ if model:
151
+ return model
152
+ # Fallback to legacy
153
+ model = os.getenv('GEMINI_MODEL')
154
+ if model:
155
+ return model
156
+ return 'gemini-2.5-flash' # Existing default
157
+
158
+ return 'gemini-2.5-flash'
159
+
160
+
161
+ def validate_model_task_combination(model: str, task: str) -> None:
162
+ """Validate model is compatible with task.
163
+
164
+ Raises:
165
+ ValueError: If combination is invalid
166
+ """
167
+ # Video generation requires Veo
168
+ if task == 'generate-video':
169
+ if not model.startswith('veo-'):
170
+ raise ValueError(
171
+ f"Video generation requires Veo model, got '{model}'\n"
172
+ f"Valid models: veo-3.1-generate-preview, veo-3.1-fast-generate-preview, "
173
+ f"veo-3.0-generate-001, veo-3.0-fast-generate-001"
174
+ )
175
+
176
+ # Image generation models
177
+ if task == 'generate':
178
+ valid_image_models = [
179
+ 'imagen-4.0-generate-001',
180
+ 'imagen-4.0-ultra-generate-001',
181
+ 'imagen-4.0-fast-generate-001',
182
+ 'gemini-3-pro-image-preview',
183
+ 'gemini-2.5-flash-image',
184
+ 'gemini-2.5-flash-image-preview',
185
+ ]
186
+ if model not in valid_image_models:
187
+ # Allow gemini models for analysis-based generation (backward compat)
188
+ if not model.startswith('gemini-'):
189
+ raise ValueError(
190
+ f"Image generation requires Imagen/Gemini image model, got '{model}'\n"
191
+ f"Valid models: {', '.join(valid_image_models)}"
192
+ )
193
+
194
+
195
+ def infer_task_from_file(file_path: str) -> str:
196
+ """Infer task type from file extension.
197
+
198
+ Returns:
199
+ 'transcribe' for audio files
200
+ 'analyze' for image/video/document files
201
+ """
202
+ ext = Path(file_path).suffix.lower()
203
+
204
+ audio_extensions = {'.mp3', '.wav', '.aac', '.flac', '.ogg', '.aiff', '.m4a'}
205
+ image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif', '.gif', '.bmp'}
206
+ video_extensions = {'.mp4', '.mpeg', '.mov', '.avi', '.flv', '.mpg', '.webm', '.wmv', '.3gpp', '.mkv'}
207
+ document_extensions = {'.pdf', '.txt', '.html', '.md', '.doc', '.docx'}
208
+
209
+ if ext in audio_extensions:
210
+ return 'transcribe'
211
+ elif ext in image_extensions:
212
+ return 'analyze'
213
+ elif ext in video_extensions:
214
+ return 'analyze'
215
+ elif ext in document_extensions:
216
+ return 'extract'
217
+
218
+ # Default to analyze for unknown types
219
+ return 'analyze'
220
+
221
+
222
+ def get_mime_type(file_path: str) -> str:
223
+ """Determine MIME type from file extension."""
224
+ ext = Path(file_path).suffix.lower()
225
+
226
+ mime_types = {
227
+ # Audio
228
+ '.mp3': 'audio/mp3',
229
+ '.wav': 'audio/wav',
230
+ '.aac': 'audio/aac',
231
+ '.flac': 'audio/flac',
232
+ '.ogg': 'audio/ogg',
233
+ '.aiff': 'audio/aiff',
234
+ # Image
235
+ '.jpg': 'image/jpeg',
236
+ '.jpeg': 'image/jpeg',
237
+ '.png': 'image/png',
238
+ '.webp': 'image/webp',
239
+ '.heic': 'image/heic',
240
+ '.heif': 'image/heif',
241
+ # Video
242
+ '.mp4': 'video/mp4',
243
+ '.mpeg': 'video/mpeg',
244
+ '.mov': 'video/quicktime',
245
+ '.avi': 'video/x-msvideo',
246
+ '.flv': 'video/x-flv',
247
+ '.mpg': 'video/mpeg',
248
+ '.webm': 'video/webm',
249
+ '.wmv': 'video/x-ms-wmv',
250
+ '.3gpp': 'video/3gpp',
251
+ # Document
252
+ '.pdf': 'application/pdf',
253
+ '.txt': 'text/plain',
254
+ '.html': 'text/html',
255
+ '.md': 'text/markdown',
256
+ }
257
+
258
+ return mime_types.get(ext, 'application/octet-stream')
259
+
260
+
261
+ def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
262
+ """Upload file to Gemini File API."""
263
+ if verbose:
264
+ print(f"Uploading {file_path}...")
265
+
266
+ myfile = client.files.upload(file=file_path)
267
+
268
+ # Wait for processing (video/audio files need processing)
269
+ mime_type = get_mime_type(file_path)
270
+ if mime_type.startswith('video/') or mime_type.startswith('audio/'):
271
+ max_wait = 300 # 5 minutes
272
+ elapsed = 0
273
+ while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
274
+ time.sleep(2)
275
+ myfile = client.files.get(name=myfile.name)
276
+ elapsed += 2
277
+ if verbose and elapsed % 10 == 0:
278
+ print(f" Processing... {elapsed}s")
279
+
280
+ if myfile.state.name == 'FAILED':
281
+ raise ValueError(f"File processing failed: {file_path}")
282
+
283
+ if myfile.state.name == 'PROCESSING':
284
+ raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")
285
+
286
+ if verbose:
287
+ print(f" Uploaded: {myfile.name}")
288
+
289
+ return myfile
290
+
291
+
292
+ def _is_billing_error(error: Exception) -> bool:
293
+ """Check if error is due to billing/access restrictions."""
294
+ error_str = str(error).lower()
295
+ billing_indicators = [
296
+ 'billing',
297
+ 'billed users',
298
+ 'payment',
299
+ 'access denied',
300
+ 'not authorized',
301
+ 'permission denied',
302
+ ]
303
+ return any(indicator in error_str for indicator in billing_indicators)
304
+
305
+
306
+ def _is_free_tier_quota_error(error: Exception) -> bool:
307
+ """Check if error indicates free tier has zero quota for this model.
308
+
309
+ Free tier users have NO access to image/video generation models.
310
+ The API returns 'limit: 0' or 'RESOURCE_EXHAUSTED' with quota details.
311
+ """
312
+ error_str = str(error)
313
+ # Check for zero quota indicators
314
+ return (
315
+ 'RESOURCE_EXHAUSTED' in error_str and
316
+ ('limit: 0' in error_str or 'free_tier' in error_str.lower())
317
+ )
318
+
319
+
320
+ FREE_TIER_NO_ACCESS_MSG = """
321
+ [FREE TIER LIMITATION] Image/Video generation is NOT available on free tier.
322
+
323
+ Free tier users have zero quota (limit: 0) for:
324
+ - All Imagen models (imagen-4.0-*)
325
+ - All Veo models (veo-*)
326
+ - Gemini image models (gemini-*-image, gemini-*-image-preview)
327
+
328
+ To use image/video generation:
329
+ 1. Enable billing: https://aistudio.google.com/apikey
330
+ 2. Or use Google Cloud $300 free credits: https://cloud.google.com/free
331
+
332
+ STOP: Do not retry image/video generation on free tier - it will always fail.
333
+ """.strip()
334
+
335
+
336
+ def generate_image_imagen4(
337
+ client,
338
+ prompt: str,
339
+ model: str,
340
+ num_images: int = 1,
341
+ aspect_ratio: str = '1:1',
342
+ size: str = '1K',
343
+ verbose: bool = False
344
+ ) -> Dict[str, Any]:
345
+ """Generate image using Imagen 4 models.
346
+
347
+ Returns special status 'billing_required' if model needs billing,
348
+ allowing caller to fallback to free-tier generate_content API.
349
+ """
350
+ try:
351
+ # Build config based on model (Fast doesn't support imageSize)
352
+ config_params = {
353
+ 'numberOfImages': num_images,
354
+ 'aspectRatio': aspect_ratio
355
+ }
356
+
357
+ # Only Standard and Ultra support imageSize parameter
358
+ if 'fast' not in model.lower() and model.startswith('imagen-'):
359
+ config_params['imageSize'] = size
360
+
361
+ gen_config = types.GenerateImagesConfig(**config_params)
362
+
363
+ if verbose:
364
+ print(f" Generating with: {model}")
365
+ print(f" Config: {num_images} images, {aspect_ratio}", end='')
366
+ if 'fast' not in model.lower() and model.startswith('imagen-'):
367
+ print(f", {size}")
368
+ else:
369
+ print()
370
+
371
+ response = client.models.generate_images(
372
+ model=model,
373
+ prompt=prompt,
374
+ config=gen_config
375
+ )
376
+
377
+ # Save images
378
+ generated_files = []
379
+ for i, generated_image in enumerate(response.generated_images):
380
+ # Find project root
381
+ script_dir = Path(__file__).parent
382
+ project_root = script_dir
383
+ for parent in [script_dir] + list(script_dir.parents):
384
+ if (parent / '.git').exists() or (parent / '.claude').exists():
385
+ project_root = parent
386
+ break
387
+
388
+ output_dir = project_root / 'docs' / 'assets'
389
+ output_dir.mkdir(parents=True, exist_ok=True)
390
+ output_file = output_dir / f"imagen4_generated_{int(time.time())}_{i}.png"
391
+
392
+ with open(output_file, 'wb') as f:
393
+ f.write(generated_image.image.image_bytes)
394
+ generated_files.append(str(output_file))
395
+
396
+ if verbose:
397
+ print(f" Saved: {output_file}")
398
+
399
+ return {
400
+ 'status': 'success',
401
+ 'generated_images': generated_files,
402
+ 'model': model
403
+ }
404
+
405
+ except Exception as e:
406
+ # Return special status for billing errors so caller can fallback
407
+ if _is_billing_error(e) and model in IMAGEN_MODELS:
408
+ return {
409
+ 'status': 'billing_required',
410
+ 'original_model': model,
411
+ 'error': str(e)
412
+ }
413
+
414
+ if verbose:
415
+ print(f" Error: {str(e)}")
416
+ import traceback
417
+ traceback.print_exc()
418
+ return {
419
+ 'status': 'error',
420
+ 'error': str(e)
421
+ }
422
+
423
+
424
+ def generate_video_veo(
425
+ client,
426
+ prompt: str,
427
+ model: str,
428
+ resolution: str = '1080p',
429
+ aspect_ratio: str = '16:9',
430
+ reference_images: Optional[List[str]] = None,
431
+ verbose: bool = False
432
+ ) -> Dict[str, Any]:
433
+ """Generate video using Veo models.
434
+
435
+ For image-to-video with first/last frames (Veo 3.1):
436
+ - First reference image becomes the opening frame (image parameter)
437
+ - Second reference image becomes the closing frame (last_frame config)
438
+ - Model interpolates between them to create smooth video
439
+ """
440
+ try:
441
+ # Build config with snake_case for Python SDK
442
+ config_params = {
443
+ 'aspect_ratio': aspect_ratio,
444
+ 'resolution': resolution
445
+ }
446
+
447
+ # Prepare first frame and last frame images
448
+ first_frame = None
449
+ last_frame = None
450
+
451
+ if reference_images:
452
+ import mimetypes
453
+
454
+ def load_image(img_path_str: str) -> types.Image:
455
+ """Load image file as types.Image with bytes and mime type."""
456
+ img_path = Path(img_path_str)
457
+ image_bytes = img_path.read_bytes()
458
+ mime_type, _ = mimetypes.guess_type(str(img_path))
459
+ if not mime_type:
460
+ mime_type = 'image/png'
461
+ return types.Image(
462
+ image_bytes=image_bytes,
463
+ mime_type=mime_type
464
+ )
465
+
466
+ # First image = opening frame
467
+ if len(reference_images) >= 1:
468
+ first_frame = load_image(reference_images[0])
469
+
470
+ # Second image = closing frame (last_frame in config)
471
+ if len(reference_images) >= 2:
472
+ last_frame = load_image(reference_images[1])
473
+ config_params['last_frame'] = last_frame
474
+
475
+ gen_config = types.GenerateVideosConfig(**config_params)
476
+
477
+ if verbose:
478
+ print(f" Generating video with Veo: {model}")
479
+ print(f" Config: {resolution}, {aspect_ratio}")
480
+ if first_frame:
481
+ print(f" First frame: provided")
482
+ if last_frame:
483
+ print(f" Last frame: provided (interpolation mode)")
484
+
485
+ start = time.time()
486
+
487
+ if verbose:
488
+ print(f" Starting video generation (this may take 11s-6min)...")
489
+
490
+ # Call generate_videos with image parameter for first frame
491
+ operation = client.models.generate_videos(
492
+ model=model,
493
+ prompt=prompt,
494
+ image=first_frame, # First frame as opening image
495
+ config=gen_config
496
+ )
497
+
498
+ # Poll operation until complete
499
+ poll_count = 0
500
+ while not operation.done:
501
+ poll_count += 1
502
+ if verbose and poll_count % 3 == 0: # Update every 30s
503
+ elapsed = time.time() - start
504
+ print(f" Still generating... ({elapsed:.0f}s elapsed)")
505
+ time.sleep(10)
506
+ operation = client.operations.get(operation)
507
+
508
+ duration = time.time() - start
509
+
510
+ # Access generated video from operation response
511
+ generated_video = operation.response.generated_videos[0]
512
+
513
+ # Download the video file first
514
+ client.files.download(file=generated_video.video)
515
+
516
+ # Save video
517
+ script_dir = Path(__file__).parent
518
+ project_root = script_dir
519
+ for parent in [script_dir] + list(script_dir.parents):
520
+ if (parent / '.git').exists() or (parent / '.claude').exists():
521
+ project_root = parent
522
+ break
523
+
524
+ output_dir = project_root / 'docs' / 'assets'
525
+ output_dir.mkdir(parents=True, exist_ok=True)
526
+ output_file = output_dir / f"veo_generated_{int(time.time())}.mp4"
527
+
528
+ # Now save to file
529
+ generated_video.video.save(str(output_file))
530
+
531
+ file_size = output_file.stat().st_size / (1024 * 1024) # MB
532
+
533
+ if verbose:
534
+ print(f" Generated in {duration:.1f}s")
535
+ print(f" File size: {file_size:.2f} MB")
536
+ print(f" Saved: {output_file}")
537
+
538
+ return {
539
+ 'status': 'success',
540
+ 'generated_video': str(output_file),
541
+ 'generation_time': duration,
542
+ 'file_size_mb': file_size,
543
+ 'model': model
544
+ }
545
+
546
+ except Exception as e:
547
+ if verbose:
548
+ print(f" Error: {str(e)}")
549
+ import traceback
550
+ traceback.print_exc()
551
+ return {
552
+ 'status': 'error',
553
+ 'error': str(e)
554
+ }
555
+
556
+
557
+ def process_file(
558
+ client: genai.Client,
559
+ file_path: Optional[str],
560
+ prompt: str,
561
+ model: str,
562
+ task: str,
563
+ format_output: str,
564
+ aspect_ratio: Optional[str] = None,
565
+ image_size: Optional[str] = None,
566
+ verbose: bool = False,
567
+ max_retries: int = 3
568
+ ) -> Dict[str, Any]:
569
+ """Process a single file with retry logic.
570
+
571
+ Args:
572
+ image_size: Image size for Nano Banana models (1K, 2K, 4K). Must be uppercase K.
573
+ Note: Not all models support image_size - only pass when explicitly needed.
574
+ """
575
+
576
+ for attempt in range(max_retries):
577
+ try:
578
+ # For generation tasks without input files
579
+ if task == 'generate' and not file_path:
580
+ content = [prompt]
581
+ else:
582
+ # Process input file
583
+ file_path = Path(file_path)
584
+ # Determine if we need File API
585
+ file_size = file_path.stat().st_size
586
+ use_file_api = file_size > 20 * 1024 * 1024 # >20MB
587
+
588
+ if use_file_api:
589
+ # Upload to File API
590
+ myfile = upload_file(client, str(file_path), verbose)
591
+ content = [prompt, myfile]
592
+ else:
593
+ # Inline data
594
+ with open(file_path, 'rb') as f:
595
+ file_bytes = f.read()
596
+
597
+ mime_type = get_mime_type(str(file_path))
598
+ content = [
599
+ prompt,
600
+ types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
601
+ ]
602
+
603
+ # Configure request
604
+ config_args = {}
605
+ if task == 'generate':
606
+ # Nano Banana requires fully uppercase 'IMAGE' per API spec
607
+ config_args['response_modalities'] = ['IMAGE']
608
+ # Build image_config with aspect_ratio and/or image_size
609
+ image_config_args = {}
610
+ if aspect_ratio:
611
+ image_config_args['aspect_ratio'] = aspect_ratio
612
+ if image_size:
613
+ # image_size must be uppercase K (1K, 2K, 4K)
614
+ image_config_args['image_size'] = image_size
615
+ if image_config_args:
616
+ config_args['image_config'] = types.ImageConfig(**image_config_args)
617
+
618
+ if format_output == 'json':
619
+ config_args['response_mime_type'] = 'application/json'
620
+
621
+ config = types.GenerateContentConfig(**config_args) if config_args else None
622
+
623
+ # Generate content
624
+ response = client.models.generate_content(
625
+ model=model,
626
+ contents=content,
627
+ config=config
628
+ )
629
+
630
+ # Extract response
631
+ result = {
632
+ 'file': str(file_path) if file_path else 'generated',
633
+ 'status': 'success',
634
+ 'response': response.text if hasattr(response, 'text') else None
635
+ }
636
+
637
+ # Handle image output
638
+ if task == 'generate' and hasattr(response, 'candidates'):
639
+ for i, part in enumerate(response.candidates[0].content.parts):
640
+ if part.inline_data:
641
+ # Determine output directory - use project root docs/assets
642
+ if file_path:
643
+ output_dir = Path(file_path).parent
644
+ base_name = Path(file_path).stem
645
+ else:
646
+ # Find project root (look for .git or .claude directory)
647
+ script_dir = Path(__file__).parent
648
+ project_root = script_dir
649
+ for parent in [script_dir] + list(script_dir.parents):
650
+ if (parent / '.git').exists() or (parent / '.claude').exists():
651
+ project_root = parent
652
+ break
653
+
654
+ output_dir = project_root / 'docs' / 'assets'
655
+ output_dir.mkdir(parents=True, exist_ok=True)
656
+ base_name = "generated"
657
+
658
+ output_file = output_dir / f"{base_name}_generated_{i}.png"
659
+ with open(output_file, 'wb') as f:
660
+ f.write(part.inline_data.data)
661
+ result['generated_image'] = str(output_file)
662
+ if verbose:
663
+ print(f" Saved image to: {output_file}")
664
+
665
+ return result
666
+
667
+ except Exception as e:
668
+ # Don't retry on billing/free tier errors - they won't resolve
669
+ if _is_billing_error(e) or _is_free_tier_quota_error(e):
670
+ return {
671
+ 'file': str(file_path) if file_path else 'generated',
672
+ 'status': 'error',
673
+ 'error': str(e)
674
+ }
675
+
676
+ # Check if this is a rate limit error (candidate for key rotation)
677
+ is_rate_limited = (
678
+ KEY_ROTATION_AVAILABLE and
679
+ is_rate_limit_error and
680
+ is_rate_limit_error(e)
681
+ )
682
+
683
+ if attempt == max_retries - 1:
684
+ return {
685
+ 'file': str(file_path) if file_path else 'generated',
686
+ 'status': 'error',
687
+ 'error': str(e),
688
+ 'rate_limited': is_rate_limited # Flag for caller to handle rotation
689
+ }
690
+
691
+ wait_time = 2 ** attempt
692
+ if verbose:
693
+ print(f" Retry {attempt + 1} after {wait_time}s: {e}")
694
+ time.sleep(wait_time)
695
+
696
+
697
+ def batch_process(
698
+ files: List[str],
699
+ prompt: str,
700
+ model: str,
701
+ task: str,
702
+ format_output: str,
703
+ aspect_ratio: Optional[str] = None,
704
+ num_images: int = 1,
705
+ size: str = '1K',
706
+ resolution: str = '1080p',
707
+ reference_images: Optional[List[str]] = None,
708
+ output_file: Optional[str] = None,
709
+ verbose: bool = False,
710
+ dry_run: bool = False
711
+ ) -> List[Dict[str, Any]]:
712
+ """Batch process multiple files with automatic key rotation."""
713
+
714
+ # Initialize key rotator or fall back to single key
715
+ rotator = None
716
+ api_key = None
717
+
718
+ if KEY_ROTATION_AVAILABLE and find_all_api_keys:
719
+ all_keys = find_all_api_keys()
720
+ if all_keys:
721
+ if len(all_keys) > 1:
722
+ rotator = KeyRotator(keys=all_keys, verbose=verbose)
723
+ api_key = rotator.get_key()
724
+ if verbose:
725
+ print(f"✓ Key rotation enabled with {len(all_keys)} keys", file=sys.stderr)
726
+ else:
727
+ api_key = all_keys[0]
728
+ if verbose:
729
+ print(f"✓ Using single API key: {api_key[:8]}...", file=sys.stderr)
730
+
731
+ # Fallback to original single-key lookup
732
+ if not api_key:
733
+ api_key = find_api_key()
734
+
735
+ if not api_key:
736
+ print("Error: GEMINI_API_KEY not found")
737
+ print("\nSetup options:")
738
+ print("1. Run setup checker: python scripts/check_setup.py")
739
+ print("2. Show hierarchy: python ~/.claude/scripts/resolve_env.py --show-hierarchy --skill ai-multimodal")
740
+ print("3. Quick setup: export GEMINI_API_KEY='your-key'")
741
+ print("4. Create .env: cd ~/.claude/skills/ai-multimodal && cp .env.example .env")
742
+ print("\nFor key rotation, add multiple keys:")
743
+ print(" GEMINI_API_KEY=key1")
744
+ print(" GEMINI_API_KEY_2=key2")
745
+ print(" GEMINI_API_KEY_3=key3")
746
+ sys.exit(1)
747
+
748
+ if dry_run:
749
+ print("DRY RUN MODE - No API calls will be made")
750
+ print(f"Files to process: {len(files)}")
751
+ print(f"Model: {model}")
752
+ print(f"Task: {task}")
753
+ print(f"Prompt: {prompt}")
754
+ if rotator:
755
+ print(f"API keys available: {rotator.key_count}")
756
+ return []
757
+
758
+ # Create client with current key
759
+ client = genai.Client(api_key=api_key)
760
+ results = []
761
+
762
+ def get_client_with_rotation(error: Optional[Exception] = None) -> Optional[genai.Client]:
763
+ """Get client, rotating key if rate limited."""
764
+ nonlocal client, api_key
765
+
766
+ if error and rotator and is_rate_limit_error and is_rate_limit_error(error):
767
+ # Try to rotate to next key
768
+ if rotator.mark_rate_limited(str(error)):
769
+ new_key = rotator.get_key()
770
+ if new_key:
771
+ api_key = new_key
772
+ client = genai.Client(api_key=api_key)
773
+ return client
774
+ # All keys exhausted
775
+ return None
776
+ return client
777
+
778
+ # For generation tasks without input files, process once
779
+ if task == 'generate' and not files:
780
+ if verbose:
781
+ print(f"\nGenerating image from prompt...")
782
+
783
+ # Use Imagen 4 API for imagen models
784
+ if model.startswith('imagen-') or model in IMAGEN_MODELS:
785
+ result = generate_image_imagen4(
786
+ client=client,
787
+ prompt=prompt,
788
+ model=model,
789
+ num_images=num_images,
790
+ aspect_ratio=aspect_ratio or '1:1',
791
+ size=size or '1K', # Default to 1K for Imagen models
792
+ verbose=verbose
793
+ )
794
+
795
+ # Silent fallback to cheaper model if Imagen billing required
796
+ if result.get('status') == 'billing_required':
797
+ if verbose:
798
+ print(f" Falling back to: {IMAGE_MODEL_FALLBACK}")
799
+ result = process_file(
800
+ client=client,
801
+ file_path=None,
802
+ prompt=prompt,
803
+ model=IMAGE_MODEL_FALLBACK,
804
+ task=task,
805
+ format_output=format_output,
806
+ aspect_ratio=aspect_ratio,
807
+ image_size=size,
808
+ verbose=verbose
809
+ )
810
+ # Check if free tier (zero quota) - stop immediately with clear message
811
+ error_str = result.get('error', '')
812
+ if result.get('status') == 'error':
813
+ if _is_free_tier_quota_error(Exception(error_str)):
814
+ result['error'] = FREE_TIER_NO_ACCESS_MSG
815
+ elif _is_billing_error(Exception(error_str)):
816
+ result['error'] = (
817
+ "Image generation requires billing. Enable billing at: "
818
+ "https://aistudio.google.com/apikey or use Google Cloud credits."
819
+ )
820
+ else:
821
+ # Nano Banana (Flash/Pro) or other models via generate_content API
822
+ result = process_file(
823
+ client=client,
824
+ file_path=None,
825
+ prompt=prompt,
826
+ model=model,
827
+ task=task,
828
+ format_output=format_output,
829
+ aspect_ratio=aspect_ratio,
830
+ image_size=size,
831
+ verbose=verbose
832
+ )
833
+ # Check for free tier error
834
+ if result.get('status') == 'error':
835
+ error_str = result.get('error', '')
836
+ if _is_free_tier_quota_error(Exception(error_str)):
837
+ result['error'] = FREE_TIER_NO_ACCESS_MSG
838
+
839
+ results.append(result)
840
+
841
+ if verbose:
842
+ status = result.get('status', 'unknown')
843
+ print(f" Status: {status}")
844
+
845
+ elif task == 'generate-video' and not files:
846
+ if verbose:
847
+ print(f"\nGenerating video from prompt...")
848
+
849
+ result = generate_video_veo(
850
+ client=client,
851
+ prompt=prompt,
852
+ model=model,
853
+ resolution=resolution,
854
+ aspect_ratio=aspect_ratio or '16:9',
855
+ reference_images=reference_images,
856
+ verbose=verbose
857
+ )
858
+
859
+ # Check for free tier error - video gen has NO free tier access
860
+ if result.get('status') == 'error':
861
+ error_str = result.get('error', '')
862
+ if _is_free_tier_quota_error(Exception(error_str)) or _is_billing_error(Exception(error_str)):
863
+ result['error'] = FREE_TIER_NO_ACCESS_MSG
864
+
865
+ results.append(result)
866
+
867
+ if verbose:
868
+ status = result.get('status', 'unknown')
869
+ print(f" Status: {status}")
870
+ else:
871
+ # Process input files with key rotation support
872
+ for i, file_path in enumerate(files, 1):
873
+ if verbose:
874
+ print(f"\n[{i}/{len(files)}] Processing: {file_path}")
875
+
876
+ # Try processing with key rotation on rate limit
877
+ max_rotation_attempts = rotator.key_count if rotator else 1
878
+ result = None
879
+
880
+ for rotation_attempt in range(max_rotation_attempts):
881
+ result = process_file(
882
+ client=client,
883
+ file_path=file_path,
884
+ prompt=prompt,
885
+ model=model,
886
+ task=task,
887
+ format_output=format_output,
888
+ aspect_ratio=aspect_ratio,
889
+ image_size=size,
890
+ verbose=verbose
891
+ )
892
+
893
+ # Check if rate limited and can rotate
894
+ if (result.get('rate_limited') and rotator and
895
+ rotation_attempt < max_rotation_attempts - 1):
896
+ new_client = get_client_with_rotation(Exception(result.get('error', '')))
897
+ if new_client:
898
+ client = new_client
899
+ if verbose:
900
+ print(f" Retrying with rotated key...")
901
+ continue
902
+ else:
903
+ # All keys exhausted - mark result with clear error
904
+ if verbose:
905
+ print(f" ⚠ All API keys exhausted (on cooldown)", file=sys.stderr)
906
+ result['error'] = "All API keys exhausted (rate limited). Try again later."
907
+ break
908
+
909
+ results.append(result)
910
+
911
+ if verbose:
912
+ status = result.get('status', 'unknown')
913
+ print(f" Status: {status}")
914
+
915
+ # Save results
916
+ if output_file:
917
+ save_results(results, output_file, format_output)
918
+
919
+ return results
920
+
921
+
922
+ def print_results(results: List[Dict[str, Any]], task: str) -> None:
923
+ """Print results to stdout for LLM workflows.
924
+
925
+ Always prints actual results (not just success/fail counts) so LLMs
926
+ can continue processing based on the output.
927
+ """
928
+ if not results:
929
+ return
930
+
931
+ print("\n=== RESULTS ===\n")
932
+
933
+ for result in results:
934
+ file_name = result.get('file', 'generated')
935
+ status = result.get('status', 'unknown')
936
+
937
+ print(f"[{file_name}]")
938
+ print(f"Status: {status}")
939
+
940
+ if status == 'success':
941
+ # Print task-specific output
942
+ if task in ['analyze', 'transcribe', 'extract']:
943
+ response = result.get('response')
944
+ if response:
945
+ print(f"Result:\n{response}")
946
+
947
+ elif task == 'generate':
948
+ # Image generation
949
+ generated_images = result.get('generated_images', [])
950
+ if generated_images:
951
+ print(f"Generated images: {len(generated_images)}")
952
+ for img in generated_images:
953
+ print(f" - {img}")
954
+ else:
955
+ generated_image = result.get('generated_image')
956
+ if generated_image:
957
+ print(f"Generated image: {generated_image}")
958
+
959
+ elif task == 'generate-video':
960
+ generated_video = result.get('generated_video')
961
+ if generated_video:
962
+ print(f"Generated video: {generated_video}")
963
+ gen_time = result.get('generation_time')
964
+ if gen_time:
965
+ print(f"Generation time: {gen_time:.1f}s")
966
+ file_size = result.get('file_size_mb')
967
+ if file_size:
968
+ print(f"File size: {file_size:.2f} MB")
969
+
970
+ elif status == 'error':
971
+ error = result.get('error', 'Unknown error')
972
+ print(f"Error: {error}")
973
+
974
+ print() # Blank line between results
975
+
976
+
977
+ def save_results(results: List[Dict[str, Any]], output_file: str, format_output: str):
978
+ """Save results to file."""
979
+ output_path = Path(output_file)
980
+
981
+ # Special handling for image generation - if output has image extension, copy the generated image
982
+ image_extensions = {'.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp'}
983
+ video_extensions = {'.mp4', '.mov', '.avi', '.webm'}
984
+
985
+ if output_path.suffix.lower() in image_extensions and len(results) == 1:
986
+ # Ensure output directory exists
987
+ output_path.parent.mkdir(parents=True, exist_ok=True)
988
+
989
+ # Check for multiple generated images
990
+ generated_images = results[0].get('generated_images')
991
+ if generated_images:
992
+ # Copy first image to the specified output location
993
+ shutil.copy2(generated_images[0], output_path)
994
+ return
995
+
996
+ # Legacy single image field
997
+ generated_image = results[0].get('generated_image')
998
+ if generated_image:
999
+ shutil.copy2(generated_image, output_path)
1000
+ return
1001
+ else:
1002
+ # Don't write text reports to image files - save error as .txt instead
1003
+ output_path = output_path.with_suffix('.error.txt')
1004
+ output_path.parent.mkdir(parents=True, exist_ok=True) # Ensure directory exists
1005
+ print(f"Warning: Generation failed, saving error report to: {output_path}")
1006
+
1007
+ if output_path.suffix.lower() in video_extensions and len(results) == 1:
1008
+ # Ensure output directory exists
1009
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1010
+
1011
+ generated_video = results[0].get('generated_video')
1012
+ if generated_video:
1013
+ shutil.copy2(generated_video, output_path)
1014
+ return
1015
+ else:
1016
+ output_path = output_path.with_suffix('.error.txt')
1017
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1018
+ print(f"Warning: Video generation failed, saving error report to: {output_path}")
1019
+
1020
+ if format_output == 'json':
1021
+ with open(output_path, 'w', encoding='utf-8') as f:
1022
+ json.dump(results, f, indent=2)
1023
+ elif format_output == 'csv':
1024
+ with open(output_path, 'w', newline='', encoding='utf-8') as f:
1025
+ fieldnames = ['file', 'status', 'response', 'error']
1026
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
1027
+ writer.writeheader()
1028
+ for result in results:
1029
+ writer.writerow({
1030
+ 'file': result.get('file', ''),
1031
+ 'status': result.get('status', ''),
1032
+ 'response': result.get('response', ''),
1033
+ 'error': result.get('error', '')
1034
+ })
1035
+ else: # markdown
1036
+ with open(output_path, 'w', encoding='utf-8') as f:
1037
+ f.write("# Batch Processing Results\n\n")
1038
+ for i, result in enumerate(results, 1):
1039
+ f.write(f"## {i}. {result.get('file', 'Unknown')}\n\n")
1040
+ f.write(f"**Status**: {result.get('status', 'unknown')}\n\n")
1041
+ if result.get('response'):
1042
+ f.write(f"**Response**:\n\n{result['response']}\n\n")
1043
+ if result.get('error'):
1044
+ f.write(f"**Error**: {result['error']}\n\n")
1045
+
1046
+
1047
+ def main():
1048
+ parser = argparse.ArgumentParser(
1049
+ description='Batch process media files with Gemini API',
1050
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1051
+ epilog="""
1052
+ Examples:
1053
+ # Transcribe multiple audio files
1054
+ %(prog)s --files *.mp3 --task transcribe --model gemini-2.5-flash
1055
+
1056
+ # Analyze images
1057
+ %(prog)s --files *.jpg --task analyze --prompt "Describe this image" \\
1058
+ --model gemini-2.5-flash
1059
+
1060
+ # Process PDFs to JSON
1061
+ %(prog)s --files *.pdf --task extract --prompt "Extract data as JSON" \\
1062
+ --format json --output results.json
1063
+
1064
+ # Generate images with Nano Banana Flash (fast)
1065
+ %(prog)s --task generate --prompt "A mountain landscape at sunset" \\
1066
+ --model gemini-2.5-flash-image --aspect-ratio 16:9 --size 2K
1067
+
1068
+ # Generate images with Nano Banana Pro (4K text, reasoning)
1069
+ %(prog)s --task generate --prompt "Travel poster with text 'EXPLORE'" \\
1070
+ --model gemini-3-pro-image-preview --aspect-ratio 3:4 --size 4K
1071
+
1072
+ # Generate images with Imagen 4 (production quality)
1073
+ %(prog)s --task generate --prompt "Product photo of coffee mug" \\
1074
+ --model imagen-4.0-ultra-generate-001 --aspect-ratio 1:1 --size 2K
1075
+ """
1076
+ )
1077
+
1078
+ parser.add_argument('--files', nargs='*', help='Input files to process')
1079
+ parser.add_argument('--task',
1080
+ choices=['transcribe', 'analyze', 'extract', 'generate', 'generate-video'],
1081
+ help='Task to perform (auto-detected from file type if not specified)')
1082
+ parser.add_argument('--prompt', help='Prompt for analysis/generation')
1083
+ parser.add_argument('--model',
1084
+ help='Model to use (default: auto-detected from task and env vars)')
1085
+ parser.add_argument('--format', dest='format_output', default='text',
1086
+ choices=['text', 'json', 'csv', 'markdown'],
1087
+ help='Output format (default: text)')
1088
+
1089
+ # Image generation options
1090
+ # All 10 aspect ratios supported by Nano Banana / Imagen 4
1091
+ parser.add_argument('--aspect-ratio',
1092
+ choices=['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'],
1093
+ help='Aspect ratio for image/video generation')
1094
+ parser.add_argument('--num-images', type=int, default=1,
1095
+ help='Number of images to generate (1-4, default: 1)')
1096
+ # 4K available for Nano Banana Pro (gemini-3-pro-image-preview)
1097
+ # Note: Not all models support --size, only use when needed
1098
+ parser.add_argument('--size', choices=['1K', '2K', '4K'], default=None,
1099
+ help='Image size - 1K/2K for Imagen 4, 1K/2K/4K for Nano Banana (optional)')
1100
+
1101
+ # Video generation options
1102
+ parser.add_argument('--resolution', choices=['720p', '1080p'], default='1080p',
1103
+ help='Video resolution (default: 1080p)')
1104
+ parser.add_argument('--reference-images', nargs='+',
1105
+ help='Reference images for video generation (max 3)')
1106
+
1107
+ parser.add_argument('--output', help='Output file for results')
1108
+ parser.add_argument('--verbose', '-v', action='store_true',
1109
+ help='Verbose output')
1110
+ parser.add_argument('--dry-run', action='store_true',
1111
+ help='Show what would be done without making API calls')
1112
+
1113
+ args = parser.parse_args()
1114
+
1115
+ # Auto-detect task from file type if not specified
1116
+ if not args.task:
1117
+ if args.files and len(args.files) > 0:
1118
+ args.task = infer_task_from_file(args.files[0])
1119
+ if args.verbose:
1120
+ print(f"Auto-detected task: {args.task} (from file extension)")
1121
+ else:
1122
+ parser.error("--task required when no input files provided")
1123
+
1124
+ # Auto-detect model if not specified
1125
+ if not args.model:
1126
+ args.model = get_default_model(args.task)
1127
+ if args.verbose:
1128
+ print(f"Auto-detected model: {args.model}")
1129
+
1130
+ # Validate model/task combination
1131
+ try:
1132
+ validate_model_task_combination(args.model, args.task)
1133
+ except ValueError as e:
1134
+ parser.error(str(e))
1135
+
1136
+ # Validate arguments
1137
+ if args.task not in ['generate', 'generate-video'] and not args.files:
1138
+ parser.error("--files required for non-generation tasks")
1139
+
1140
+ if args.task in ['generate', 'generate-video'] and not args.prompt:
1141
+ parser.error("--prompt required for generation tasks")
1142
+
1143
+ if args.task not in ['generate', 'generate-video'] and not args.prompt:
1144
+ # Set default prompts
1145
+ if args.task == 'transcribe':
1146
+ args.prompt = 'Generate a transcript with timestamps'
1147
+ elif args.task == 'analyze':
1148
+ args.prompt = 'Analyze this content'
1149
+ elif args.task == 'extract':
1150
+ args.prompt = 'Extract key information'
1151
+
1152
+ # Process files
1153
+ files = args.files or []
1154
+ results = batch_process(
1155
+ files=files,
1156
+ prompt=args.prompt,
1157
+ model=args.model,
1158
+ task=args.task,
1159
+ format_output=args.format_output,
1160
+ aspect_ratio=args.aspect_ratio,
1161
+ num_images=args.num_images,
1162
+ size=args.size,
1163
+ resolution=args.resolution,
1164
+ reference_images=args.reference_images,
1165
+ output_file=args.output,
1166
+ verbose=args.verbose,
1167
+ dry_run=args.dry_run
1168
+ )
1169
+
1170
+ # Print results and summary
1171
+ if not args.dry_run and results:
1172
+ # Always print actual results for LLM workflows
1173
+ print_results(results, args.task)
1174
+
1175
+ # Print summary
1176
+ success = sum(1 for r in results if r.get('status') == 'success')
1177
+ failed = len(results) - success
1178
+ print(f"{'='*50}")
1179
+ print(f"Summary: {len(results)} processed, {success} success, {failed} failed")
1180
+ if args.output:
1181
+ print(f"Results saved to: {args.output}")
1182
+
1183
+
1184
+ if __name__ == '__main__':
1185
+ main()