@xaviele/ag-kit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/README.md +20 -0
  2. package/bin/cli.js +63 -0
  3. package/package.json +27 -0
  4. package/template/.agent/.shared/ui-ux-pro-max/data/charts.csv +26 -0
  5. package/template/.agent/.shared/ui-ux-pro-max/data/colors.csv +97 -0
  6. package/template/.agent/.shared/ui-ux-pro-max/data/icons.csv +101 -0
  7. package/template/.agent/.shared/ui-ux-pro-max/data/landing.csv +31 -0
  8. package/template/.agent/.shared/ui-ux-pro-max/data/products.csv +97 -0
  9. package/template/.agent/.shared/ui-ux-pro-max/data/prompts.csv +24 -0
  10. package/template/.agent/.shared/ui-ux-pro-max/data/react-performance.csv +45 -0
  11. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  12. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  13. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  14. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  15. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  16. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  17. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  18. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/react.csv +54 -0
  19. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  20. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  21. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  22. package/template/.agent/.shared/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  23. package/template/.agent/.shared/ui-ux-pro-max/data/styles.csv +59 -0
  24. package/template/.agent/.shared/ui-ux-pro-max/data/typography.csv +58 -0
  25. package/template/.agent/.shared/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  26. package/template/.agent/.shared/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  27. package/template/.agent/.shared/ui-ux-pro-max/data/web-interface.csv +31 -0
  28. package/template/.agent/.shared/ui-ux-pro-max/scripts/__pycache__/core.cpython-313.pyc +0 -0
  29. package/template/.agent/.shared/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-313.pyc +0 -0
  30. package/template/.agent/.shared/ui-ux-pro-max/scripts/core.py +258 -0
  31. package/template/.agent/.shared/ui-ux-pro-max/scripts/design_system.py +1067 -0
  32. package/template/.agent/.shared/ui-ux-pro-max/scripts/search.py +106 -0
  33. package/template/.agent/ARCHITECTURE.md +281 -0
  34. package/template/.agent/agents/backend-specialist.md +263 -0
  35. package/template/.agent/agents/code-archaeologist.md +106 -0
  36. package/template/.agent/agents/database-architect.md +226 -0
  37. package/template/.agent/agents/debugger.md +225 -0
  38. package/template/.agent/agents/devops-engineer.md +242 -0
  39. package/template/.agent/agents/documentation-writer.md +104 -0
  40. package/template/.agent/agents/explorer-agent.md +73 -0
  41. package/template/.agent/agents/frontend-specialist.md +593 -0
  42. package/template/.agent/agents/game-developer.md +162 -0
  43. package/template/.agent/agents/mobile-developer.md +377 -0
  44. package/template/.agent/agents/orchestrator.md +416 -0
  45. package/template/.agent/agents/penetration-tester.md +188 -0
  46. package/template/.agent/agents/performance-optimizer.md +187 -0
  47. package/template/.agent/agents/product-manager.md +112 -0
  48. package/template/.agent/agents/product-owner.md +95 -0
  49. package/template/.agent/agents/project-planner.md +406 -0
  50. package/template/.agent/agents/qa-automation-engineer.md +103 -0
  51. package/template/.agent/agents/security-auditor.md +170 -0
  52. package/template/.agent/agents/seo-specialist.md +111 -0
  53. package/template/.agent/agents/test-engineer.md +158 -0
  54. package/template/.agent/mcp_config.json +24 -0
  55. package/template/.agent/rules/GEMINI.md +273 -0
  56. package/template/.agent/scripts/auto_preview.py +148 -0
  57. package/template/.agent/scripts/checklist.py +217 -0
  58. package/template/.agent/scripts/session_manager.py +120 -0
  59. package/template/.agent/scripts/verify_all.py +327 -0
  60. package/template/.agent/skills/adr/SKILL.md +282 -0
  61. package/template/.agent/skills/alirezarezvani-code-to-prd/SKILL.md +499 -0
  62. package/template/.agent/skills/api-patterns/SKILL.md +81 -0
  63. package/template/.agent/skills/api-patterns/api-style.md +42 -0
  64. package/template/.agent/skills/api-patterns/auth.md +24 -0
  65. package/template/.agent/skills/api-patterns/documentation.md +26 -0
  66. package/template/.agent/skills/api-patterns/graphql.md +41 -0
  67. package/template/.agent/skills/api-patterns/rate-limiting.md +31 -0
  68. package/template/.agent/skills/api-patterns/response.md +37 -0
  69. package/template/.agent/skills/api-patterns/rest.md +40 -0
  70. package/template/.agent/skills/api-patterns/scripts/api_validator.py +211 -0
  71. package/template/.agent/skills/api-patterns/security-testing.md +122 -0
  72. package/template/.agent/skills/api-patterns/trpc.md +41 -0
  73. package/template/.agent/skills/api-patterns/versioning.md +22 -0
  74. package/template/.agent/skills/app-builder/SKILL.md +75 -0
  75. package/template/.agent/skills/app-builder/agent-coordination.md +71 -0
  76. package/template/.agent/skills/app-builder/feature-building.md +53 -0
  77. package/template/.agent/skills/app-builder/project-detection.md +34 -0
  78. package/template/.agent/skills/app-builder/scaffolding.md +118 -0
  79. package/template/.agent/skills/app-builder/tech-stack.md +41 -0
  80. package/template/.agent/skills/app-builder/templates/SKILL.md +39 -0
  81. package/template/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +76 -0
  82. package/template/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +92 -0
  83. package/template/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +88 -0
  84. package/template/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +88 -0
  85. package/template/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +83 -0
  86. package/template/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +90 -0
  87. package/template/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +90 -0
  88. package/template/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +122 -0
  89. package/template/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +122 -0
  90. package/template/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +169 -0
  91. package/template/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +134 -0
  92. package/template/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +83 -0
  93. package/template/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +119 -0
  94. package/template/.agent/skills/architecture/SKILL.md +55 -0
  95. package/template/.agent/skills/architecture/context-discovery.md +43 -0
  96. package/template/.agent/skills/architecture/examples.md +94 -0
  97. package/template/.agent/skills/architecture/pattern-selection.md +68 -0
  98. package/template/.agent/skills/architecture/patterns-reference.md +50 -0
  99. package/template/.agent/skills/architecture/trade-off-analysis.md +77 -0
  100. package/template/.agent/skills/bash-linux/SKILL.md +199 -0
  101. package/template/.agent/skills/behavioral-modes/SKILL.md +242 -0
  102. package/template/.agent/skills/brainstorming/SKILL.md +163 -0
  103. package/template/.agent/skills/brainstorming/dynamic-questioning.md +350 -0
  104. package/template/.agent/skills/claudekit-ai-multimodal/SKILL.md +353 -0
  105. package/template/.agent/skills/clean-code/SKILL.md +201 -0
  106. package/template/.agent/skills/code-review-checklist/SKILL.md +109 -0
  107. package/template/.agent/skills/database-design/SKILL.md +52 -0
  108. package/template/.agent/skills/database-design/database-selection.md +43 -0
  109. package/template/.agent/skills/database-design/indexing.md +39 -0
  110. package/template/.agent/skills/database-design/migrations.md +48 -0
  111. package/template/.agent/skills/database-design/optimization.md +36 -0
  112. package/template/.agent/skills/database-design/orm-selection.md +30 -0
  113. package/template/.agent/skills/database-design/schema-design.md +56 -0
  114. package/template/.agent/skills/database-design/scripts/schema_validator.py +172 -0
  115. package/template/.agent/skills/deployment-procedures/SKILL.md +241 -0
  116. package/template/.agent/skills/doc.md +177 -0
  117. package/template/.agent/skills/document/SKILL.md +250 -0
  118. package/template/.agent/skills/documentation-templates/SKILL.md +194 -0
  119. package/template/.agent/skills/frontend-design/SKILL.md +452 -0
  120. package/template/.agent/skills/frontend-design/animation-guide.md +331 -0
  121. package/template/.agent/skills/frontend-design/color-system.md +311 -0
  122. package/template/.agent/skills/frontend-design/decision-trees.md +418 -0
  123. package/template/.agent/skills/frontend-design/motion-graphics.md +306 -0
  124. package/template/.agent/skills/frontend-design/scripts/accessibility_checker.py +183 -0
  125. package/template/.agent/skills/frontend-design/scripts/ux_audit.py +722 -0
  126. package/template/.agent/skills/frontend-design/typography-system.md +345 -0
  127. package/template/.agent/skills/frontend-design/ux-psychology.md +1116 -0
  128. package/template/.agent/skills/frontend-design/visual-effects.md +383 -0
  129. package/template/.agent/skills/game-development/2d-games/SKILL.md +119 -0
  130. package/template/.agent/skills/game-development/3d-games/SKILL.md +135 -0
  131. package/template/.agent/skills/game-development/SKILL.md +167 -0
  132. package/template/.agent/skills/game-development/game-art/SKILL.md +185 -0
  133. package/template/.agent/skills/game-development/game-audio/SKILL.md +190 -0
  134. package/template/.agent/skills/game-development/game-design/SKILL.md +129 -0
  135. package/template/.agent/skills/game-development/mobile-games/SKILL.md +108 -0
  136. package/template/.agent/skills/game-development/multiplayer/SKILL.md +132 -0
  137. package/template/.agent/skills/game-development/pc-games/SKILL.md +144 -0
  138. package/template/.agent/skills/game-development/vr-ar/SKILL.md +123 -0
  139. package/template/.agent/skills/game-development/web-games/SKILL.md +150 -0
  140. package/template/.agent/skills/geo-fundamentals/SKILL.md +156 -0
  141. package/template/.agent/skills/geo-fundamentals/scripts/geo_checker.py +289 -0
  142. package/template/.agent/skills/i18n-localization/SKILL.md +154 -0
  143. package/template/.agent/skills/i18n-localization/scripts/i18n_checker.py +241 -0
  144. package/template/.agent/skills/intelligent-routing/SKILL.md +335 -0
  145. package/template/.agent/skills/lint-and-validate/SKILL.md +45 -0
  146. package/template/.agent/skills/lint-and-validate/scripts/lint_runner.py +184 -0
  147. package/template/.agent/skills/lint-and-validate/scripts/type_coverage.py +173 -0
  148. package/template/.agent/skills/mcp-builder/SKILL.md +176 -0
  149. package/template/.agent/skills/mindrally-meta-prompt/SKILL.md +129 -0
  150. package/template/.agent/skills/mobile-design/SKILL.md +394 -0
  151. package/template/.agent/skills/mobile-design/decision-trees.md +516 -0
  152. package/template/.agent/skills/mobile-design/mobile-backend.md +491 -0
  153. package/template/.agent/skills/mobile-design/mobile-color-system.md +420 -0
  154. package/template/.agent/skills/mobile-design/mobile-debugging.md +122 -0
  155. package/template/.agent/skills/mobile-design/mobile-design-thinking.md +357 -0
  156. package/template/.agent/skills/mobile-design/mobile-navigation.md +458 -0
  157. package/template/.agent/skills/mobile-design/mobile-performance.md +767 -0
  158. package/template/.agent/skills/mobile-design/mobile-testing.md +356 -0
  159. package/template/.agent/skills/mobile-design/mobile-typography.md +433 -0
  160. package/template/.agent/skills/mobile-design/platform-android.md +666 -0
  161. package/template/.agent/skills/mobile-design/platform-ios.md +561 -0
  162. package/template/.agent/skills/mobile-design/scripts/mobile_audit.py +670 -0
  163. package/template/.agent/skills/mobile-design/touch-psychology.md +537 -0
  164. package/template/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +351 -0
  165. package/template/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +240 -0
  166. package/template/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +490 -0
  167. package/template/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +264 -0
  168. package/template/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +581 -0
  169. package/template/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +432 -0
  170. package/template/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +684 -0
  171. package/template/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +150 -0
  172. package/template/.agent/skills/nextjs-react-expert/9-cache-components.md +103 -0
  173. package/template/.agent/skills/nextjs-react-expert/SKILL.md +293 -0
  174. package/template/.agent/skills/nextjs-react-expert/scripts/convert_rules.py +222 -0
  175. package/template/.agent/skills/nextjs-react-expert/scripts/react_performance_checker.py +252 -0
  176. package/template/.agent/skills/nodejs-best-practices/SKILL.md +333 -0
  177. package/template/.agent/skills/parallel-agents/SKILL.md +175 -0
  178. package/template/.agent/skills/performance-profiling/SKILL.md +143 -0
  179. package/template/.agent/skills/performance-profiling/scripts/lighthouse_audit.py +76 -0
  180. package/template/.agent/skills/plan-writing/SKILL.md +152 -0
  181. package/template/.agent/skills/pm-skills-create-prd/SKILL.md +88 -0
  182. package/template/.agent/skills/powershell-windows/SKILL.md +167 -0
  183. package/template/.agent/skills/prompt-engineering/SKILL.md +566 -0
  184. package/template/.agent/skills/python-patterns/SKILL.md +441 -0
  185. package/template/.agent/skills/red-team-tactics/SKILL.md +199 -0
  186. package/template/.agent/skills/rust-pro/SKILL.md +176 -0
  187. package/template/.agent/skills/seo-fundamentals/SKILL.md +129 -0
  188. package/template/.agent/skills/seo-fundamentals/scripts/seo_checker.py +219 -0
  189. package/template/.agent/skills/server-management/SKILL.md +161 -0
  190. package/template/.agent/skills/skills/adr/SKILL.md +282 -0
  191. package/template/.agent/skills/skills/alirezarezvani-code-to-prd/SKILL.md +499 -0
  192. package/template/.agent/skills/skills/claudekit-ai-multimodal/SKILL.md +353 -0
  193. package/template/.agent/skills/skills/document/SKILL.md +250 -0
  194. package/template/.agent/skills/skills/mindrally-meta-prompt/SKILL.md +129 -0
  195. package/template/.agent/skills/skills/pm-skills-create-prd/SKILL.md +88 -0
  196. package/template/.agent/skills/skills/prompt-engineering/SKILL.md +566 -0
  197. package/template/.agent/skills/systematic-debugging/SKILL.md +109 -0
  198. package/template/.agent/skills/tailwind-patterns/SKILL.md +269 -0
  199. package/template/.agent/skills/tdd-workflow/SKILL.md +149 -0
  200. package/template/.agent/skills/testing-patterns/SKILL.md +178 -0
  201. package/template/.agent/skills/testing-patterns/scripts/test_runner.py +219 -0
  202. package/template/.agent/skills/vulnerability-scanner/SKILL.md +276 -0
  203. package/template/.agent/skills/vulnerability-scanner/checklists.md +121 -0
  204. package/template/.agent/skills/vulnerability-scanner/scripts/security_scan.py +458 -0
  205. package/template/.agent/skills/web-design-guidelines/SKILL.md +57 -0
  206. package/template/.agent/skills/webapp-testing/SKILL.md +187 -0
  207. package/template/.agent/skills/webapp-testing/scripts/playwright_runner.py +173 -0
  208. package/template/.agent/skills/zalo-mini-app/SKILL.md +81 -0
  209. package/template/.agent/skills/zalo-mini-app/references/api-device.md +121 -0
  210. package/template/.agent/skills/zalo-mini-app/references/api-overview.md +88 -0
  211. package/template/.agent/skills/zalo-mini-app/references/api-storage.md +74 -0
  212. package/template/.agent/skills/zalo-mini-app/references/api-ui.md +124 -0
  213. package/template/.agent/skills/zalo-mini-app/references/api-user.md +113 -0
  214. package/template/.agent/skills/zalo-mini-app/references/api-zalo.md +127 -0
  215. package/template/.agent/skills/zalo-mini-app/references/design-guidelines.md +70 -0
  216. package/template/.agent/skills/zalo-mini-app/references/getting-started.md +95 -0
  217. package/template/.agent/skills/zalo-mini-app/references/react-best-practices.md +790 -0
  218. package/template/.agent/skills/zalo-mini-app/references/web-design-guidelines.md +591 -0
  219. package/template/.agent/skills/zalo-mini-app/references/zaui-display.md +103 -0
  220. package/template/.agent/skills/zalo-mini-app/references/zaui-form.md +108 -0
  221. package/template/.agent/skills/zalo-mini-app/references/zaui-layout.md +94 -0
  222. package/template/.agent/skills/zalo-mini-app/references/zaui-overlay.md +98 -0
  223. package/template/.agent/skills/zalo-mini-app/references/zaui-overview.md +82 -0
  224. package/template/.agent/workflows/brainstorm.md +113 -0
  225. package/template/.agent/workflows/create.md +59 -0
  226. package/template/.agent/workflows/debug.md +103 -0
  227. package/template/.agent/workflows/deploy.md +176 -0
  228. package/template/.agent/workflows/enhance.md +63 -0
  229. package/template/.agent/workflows/orchestrate.md +237 -0
  230. package/template/.agent/workflows/plan.md +89 -0
  231. package/template/.agent/workflows/preview.md +81 -0
  232. package/template/.agent/workflows/status.md +86 -0
  233. package/template/.agent/workflows/test.md +144 -0
  234. package/template/.agent/workflows/ui-ux-pro-max.md +296 -0
  235. package/template/.agent/workflows/veo-marketing.md +46 -0
@@ -0,0 +1,353 @@
1
+ ---
2
+ name: "ai-multimodal"
3
+ description: "Process and generate multimedia content using Google Gemini API. Capabilities include analyze audio files (transcription with timestamps, summarization, speech understanding, music/sound analysis up to 9.5 hours), understand images (captioning, object detection, OCR, visual Q&A, segmentation), process videos (scene detection, Q&A, temporal analysis, YouTube URLs, up to 6 hours), extract from documents (PDF tables, forms, charts, diagrams, multi-page), generate images (text-to-image, editing, composition, refinement). Use when working with audio/video files, analyzing images or screenshots, processing PDF documents, extracting structured data from media, creating images from text prompts, or implementing multimodal AI features. Supports multiple models (Gemini 2.5/2.0) with context windows up to 2M tokens."
4
+ version: 1.0.0
5
+ category: build
6
+ ---
7
+
8
+ # AI Multimodal Processing Skill
9
+
10
+ Process audio, images, videos, documents, and generate images using Google Gemini's multimodal API. Unified interface for all multimedia content understanding and generation.
11
+
12
+ ## Core Capabilities
13
+
14
+ ### Audio Processing
15
+ - Transcription with timestamps (up to 9.5 hours)
16
+ - Audio summarization and analysis
17
+ - Speech understanding and speaker identification
18
+ - Music and environmental sound analysis
19
+ - Text-to-speech generation with controllable voice
20
+
21
+ ### Image Understanding
22
+ - Image captioning and description
23
+ - Object detection with bounding boxes (2.0+)
24
+ - Pixel-level segmentation (2.5+)
25
+ - Visual question answering
26
+ - Multi-image comparison (up to 3,600 images)
27
+ - OCR and text extraction
28
+
29
+ ### Video Analysis
30
+ - Scene detection and summarization
31
+ - Video Q&A with temporal understanding
32
+ - Transcription with visual descriptions
33
+ - YouTube URL support
34
+ - Long video processing (up to 6 hours)
35
+ - Frame-level analysis
36
+
37
+ ### Document Extraction
38
+ - Native PDF vision processing (up to 1,000 pages)
39
+ - Table and form extraction
40
+ - Chart and diagram analysis
41
+ - Multi-page document understanding
42
+ - Structured data output (JSON schema)
43
+ - Format conversion (PDF to HTML/JSON)
44
+
45
+ ### Image Generation
46
+ - Text-to-image generation
47
+ - Image editing and modification
48
+ - Multi-image composition (up to 3 images)
49
+ - Iterative refinement
50
+ - Multiple aspect ratios (1:1, 16:9, 9:16, 4:3, 3:4)
51
+ - Controllable style and quality
52
+
53
+ ## Capability Matrix
54
+
55
+ | Task | Audio | Image | Video | Document | Generation |
56
+ |------|:-----:|:-----:|:-----:|:--------:|:----------:|
57
+ | Transcription | ✓ | - | ✓ | - | - |
58
+ | Summarization | ✓ | ✓ | ✓ | ✓ | - |
59
+ | Q&A | ✓ | ✓ | ✓ | ✓ | - |
60
+ | Object Detection | - | ✓ | ✓ | - | - |
61
+ | Text Extraction | - | ✓ | - | ✓ | - |
62
+ | Structured Output | ✓ | ✓ | ✓ | ✓ | - |
63
+ | Creation | TTS | - | - | - | ✓ |
64
+ | Timestamps | ✓ | - | ✓ | - | - |
65
+ | Segmentation | - | ✓ | - | - | - |
66
+
67
+ ## Model Selection Guide
68
+
69
+ ### Gemini 2.5 Series (Recommended)
70
+ - **gemini-2.5-pro**: Highest quality, all features, 1M-2M context
71
+ - **gemini-2.5-flash**: Best balance, all features, 1M-2M context
72
+ - **gemini-2.5-flash-lite**: Lightweight, segmentation support
73
+ - **gemini-2.5-flash-image**: Image generation only
74
+
75
+ ### Gemini 2.0 Series
76
+ - **gemini-2.0-flash**: Fast processing, object detection
77
+ - **gemini-2.0-flash-lite**: Lightweight option
78
+
79
+ ### Feature Requirements
80
+ - **Segmentation**: Requires 2.5+ models
81
+ - **Object Detection**: Requires 2.0+ models
82
+ - **Multi-video**: Requires 2.5+ models
83
+ - **Image Generation**: Requires flash-image model
84
+
85
+ ### Context Windows
86
+ - **2M tokens**: ~6 hours video (low-res) or ~2 hours (default)
87
+ - **1M tokens**: ~3 hours video (low-res) or ~1 hour (default)
88
+ - **Audio**: 32 tokens/second (1 min = 1,920 tokens)
89
+ - **PDF**: 258 tokens/page (fixed)
90
+ - **Image**: 258-1,548 tokens based on size
91
+
92
+ ## Quick Start
93
+
94
+ ### Prerequisites
95
+
96
+ **API Key Setup**: Supports both Google AI Studio and Vertex AI.
97
+
98
+ The skill checks for `GEMINI_API_KEY` in this order:
99
+ 1. Process environment: `export GEMINI_API_KEY="your-key"`
100
+ 2. Project root: `.env`
101
+ 3. `.claude/.env`
102
+ 4. `.claude/skills/.env`
103
+ 5. `.claude/skills/ai-multimodal/.env`
104
+
105
+ **Get API key**: https://aistudio.google.com/apikey
106
+
107
+ **For Vertex AI**:
108
+ ```bash
109
+ export GEMINI_USE_VERTEX=true
110
+ export VERTEX_PROJECT_ID=your-gcp-project-id
111
+ export VERTEX_LOCATION=us-central1 # Optional
112
+ ```
113
+
114
+ **Install SDK**:
115
+ ```bash
116
+ pip install google-genai python-dotenv pillow
117
+ ```
118
+
119
+ ### Common Patterns
120
+
121
+ **Transcribe Audio**:
122
+ ```bash
123
+ python scripts/gemini_batch_process.py \
124
+ --files audio.mp3 \
125
+ --task transcribe \
126
+ --model gemini-2.5-flash
127
+ ```
128
+
129
+ **Analyze Image**:
130
+ ```bash
131
+ python scripts/gemini_batch_process.py \
132
+ --files image.jpg \
133
+ --task analyze \
134
+ --prompt "Describe this image" \
135
+ --output docs/assets/<output-name>.md \
136
+ --model gemini-2.5-flash
137
+ ```
138
+
139
+ **Process Video**:
140
+ ```bash
141
+ python scripts/gemini_batch_process.py \
142
+ --files video.mp4 \
143
+ --task analyze \
144
+ --prompt "Summarize key points with timestamps" \
145
+ --output docs/assets/<output-name>.md \
146
+ --model gemini-2.5-flash
147
+ ```
148
+
149
+ **Extract from PDF**:
150
+ ```bash
151
+ python scripts/gemini_batch_process.py \
152
+ --files document.pdf \
153
+ --task extract \
154
+ --prompt "Extract table data as JSON" \
155
+ --output docs/assets/<output-name>.md \
156
+ --format json
157
+ ```
158
+
159
+ **Generate Image**:
160
+ ```bash
161
+ python scripts/gemini_batch_process.py \
162
+ --task generate \
163
+ --prompt "A futuristic city at sunset" \
164
+ --output docs/assets/<output-file-name> \
165
+ --model gemini-2.5-flash-image \
166
+ --aspect-ratio 16:9
167
+ ```
168
+
169
+ **Optimize Media**:
170
+ ```bash
171
+ # Prepare large video for processing
172
+ python scripts/media_optimizer.py \
173
+ --input large-video.mp4 \
174
+ --output docs/assets/<output-file-name> \
175
+ --target-size 100MB
176
+
177
+ # Batch optimize multiple files
178
+ python scripts/media_optimizer.py \
179
+ --input-dir ./videos \
180
+ --output-dir docs/assets/optimized \
181
+ --quality 85
182
+ ```
183
+
184
+ **Convert Documents to Markdown**:
185
+ ```bash
186
+ # Convert to PDF
187
+ python scripts/document_converter.py \
188
+ --input document.docx \
189
+ --output docs/assets/document.md
190
+
191
+ # Extract pages
192
+ python scripts/document_converter.py \
193
+ --input large.pdf \
194
+ --output docs/assets/chapter1.md \
195
+ --pages 1-20
196
+ ```
197
+
198
+ ## Supported Formats
199
+
200
+ ### Audio
201
+ - WAV, MP3, AAC, FLAC, OGG Vorbis, AIFF
202
+ - Max 9.5 hours per request
203
+ - Auto-downsampled to 16 Kbps mono
204
+
205
+ ### Images
206
+ - PNG, JPEG, WEBP, HEIC, HEIF
207
+ - Max 3,600 images per request
208
+ - Resolution: ≤384px = 258 tokens, larger = tiled
209
+
210
+ ### Video
211
+ - MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV, 3GPP
212
+ - Max 6 hours (low-res) or 2 hours (default)
213
+ - YouTube URLs supported (public only)
214
+
215
+ ### Documents
216
+ - PDF only for vision processing
217
+ - Max 1,000 pages
218
+ - TXT, HTML, Markdown supported (text-only)
219
+
220
+ ### Size Limits
221
+ - **Inline**: <20MB total request
222
+ - **File API**: 2GB per file, 20GB project quota
223
+ - **Retention**: 48 hours auto-delete
224
+
225
+ ## Reference Navigation
226
+
227
+ For detailed implementation guidance, see:
228
+
229
+ ### Audio Processing
230
+ - `references/audio-processing.md` - Transcription, analysis, TTS
231
+ - Timestamp handling and segment analysis
232
+ - Multi-speaker identification
233
+ - Non-speech audio analysis
234
+ - Text-to-speech generation
235
+
236
+ ### Image Understanding
237
+ - `references/vision-understanding.md` - Captioning, detection, OCR
238
+ - Object detection and localization
239
+ - Pixel-level segmentation
240
+ - Visual question answering
241
+ - Multi-image comparison
242
+
243
+ ### Video Analysis
244
+ - `references/video-analysis.md` - Scene detection, temporal understanding
245
+ - YouTube URL processing
246
+ - Timestamp-based queries
247
+ - Video clipping and FPS control
248
+ - Long video optimization
249
+
250
+ ### Document Extraction
251
+ - `references/document-extraction.md` - PDF processing, structured output
252
+ - Table and form extraction
253
+ - Chart and diagram analysis
254
+ - JSON schema validation
255
+ - Multi-page handling
256
+
257
+ ### Image Generation
258
+ - `references/image-generation.md` - Text-to-image, editing
259
+ - Prompt engineering strategies
260
+ - Image editing and composition
261
+ - Aspect ratio selection
262
+ - Safety settings
263
+
264
+ ## Cost Optimization
265
+
266
+ ### Token Costs
267
+ **Input Pricing**:
268
+ - Gemini 2.5 Flash: $1.00/1M input, $0.10/1M output
269
+ - Gemini 2.5 Pro: $3.00/1M input, $12.00/1M output
270
+ - Gemini 1.5 Flash: $0.70/1M input, $0.175/1M output
271
+
272
+ **Token Rates**:
273
+ - Audio: 32 tokens/second (1 min = 1,920 tokens)
274
+ - Video: ~300 tokens/second (default) or ~100 (low-res)
275
+ - PDF: 258 tokens/page (fixed)
276
+ - Image: 258-1,548 tokens based on size
277
+
278
+ **TTS Pricing**:
279
+ - Flash TTS: $10/1M tokens
280
+ - Pro TTS: $20/1M tokens
281
+
282
+ ### Best Practices
283
+ 1. Use `gemini-2.5-flash` for most tasks (best price/performance)
284
+ 2. Use File API for files >20MB or repeated queries
285
+ 3. Optimize media before upload (see `media_optimizer.py`)
286
+ 4. Process specific segments instead of full videos
287
+ 5. Use lower FPS for static content
288
+ 6. Implement context caching for repeated queries
289
+ 7. Batch process multiple files in parallel
290
+
291
+ ## Rate Limits
292
+
293
+ **Free Tier**:
294
+ - 10-15 RPM (requests per minute)
295
+ - 1M-4M TPM (tokens per minute)
296
+ - 1,500 RPD (requests per day)
297
+
298
+ **YouTube Limits**:
299
+ - Free tier: 8 hours/day
300
+ - Paid tier: No length limits
301
+ - Public videos only
302
+
303
+ **Storage Limits**:
304
+ - 20GB per project
305
+ - 2GB per file
306
+ - 48-hour retention
307
+
308
+ ## Error Handling
309
+
310
+ Common errors and solutions:
311
+ - **400**: Invalid format/size - validate before upload
312
+ - **401**: Invalid API key - check configuration
313
+ - **403**: Permission denied - verify API key restrictions
314
+ - **404**: File not found - ensure file uploaded and active
315
+ - **429**: Rate limit exceeded - implement exponential backoff
316
+ - **500**: Server error - retry with backoff
317
+
318
+ ## Scripts Overview
319
+
320
+ All scripts support unified API key detection and error handling:
321
+
322
+ **gemini_batch_process.py**: Batch process multiple media files
323
+ - Supports all modalities (audio, image, video, PDF)
324
+ - Progress tracking and error recovery
325
+ - Output formats: JSON, Markdown, CSV
326
+ - Rate limiting and retry logic
327
+ - Dry-run mode
328
+
329
+ **media_optimizer.py**: Prepare media for Gemini API
330
+ - Compress videos/audio for size limits
331
+ - Resize images appropriately
332
+ - Split long videos into chunks
333
+ - Format conversion
334
+ - Quality vs size optimization
335
+
336
+ **document_converter.py**: Convert documents to PDF
337
+ - Convert DOCX, XLSX, PPTX to PDF
338
+ - Extract page ranges
339
+ - Optimize PDFs for Gemini
340
+ - Extract images from PDFs
341
+ - Batch conversion support
342
+
343
+ Run any script with `--help` for detailed usage.
344
+
345
+ ## Resources
346
+
347
+ - [Audio API Docs](https://ai.google.dev/gemini-api/docs/audio)
348
+ - [Image API Docs](https://ai.google.dev/gemini-api/docs/image-understanding)
349
+ - [Video API Docs](https://ai.google.dev/gemini-api/docs/video-understanding)
350
+ - [Document API Docs](https://ai.google.dev/gemini-api/docs/document-processing)
351
+ - [Image Gen Docs](https://ai.google.dev/gemini-api/docs/image-generation)
352
+ - [Get API Key](https://aistudio.google.com/apikey)
353
+ - [Pricing](https://ai.google.dev/pricing)
@@ -0,0 +1,201 @@
1
+ ---
2
+ name: clean-code
3
+ description: Pragmatic coding standards - concise, direct, no over-engineering, no unnecessary comments
4
+ allowed-tools: Read, Write, Edit
5
+ version: 2.0
6
+ priority: CRITICAL
7
+ ---
8
+
9
+ # Clean Code - Pragmatic AI Coding Standards
10
+
11
+ > **CRITICAL SKILL** - Be **concise, direct, and solution-focused**.
12
+
13
+ ---
14
+
15
+ ## Core Principles
16
+
17
+ | Principle | Rule |
18
+ |-----------|------|
19
+ | **SRP** | Single Responsibility - each function/class does ONE thing |
20
+ | **DRY** | Don't Repeat Yourself - extract duplicates, reuse |
21
+ | **KISS** | Keep It Simple - simplest solution that works |
22
+ | **YAGNI** | You Aren't Gonna Need It - don't build unused features |
23
+ | **Boy Scout** | Leave code cleaner than you found it |
24
+
25
+ ---
26
+
27
+ ## Naming Rules
28
+
29
+ | Element | Convention |
30
+ |---------|------------|
31
+ | **Variables** | Reveal intent: `userCount` not `n` |
32
+ | **Functions** | Verb + noun: `getUserById()` not `user()` |
33
+ | **Booleans** | Question form: `isActive`, `hasPermission`, `canEdit` |
34
+ | **Constants** | SCREAMING_SNAKE: `MAX_RETRY_COUNT` |
35
+
36
+ > **Rule:** If you need a comment to explain a name, rename it.
37
+
38
+ ---
39
+
40
+ ## Function Rules
41
+
42
+ | Rule | Description |
43
+ |------|-------------|
44
+ | **Small** | Max 20 lines, ideally 5-10 |
45
+ | **One Thing** | Does one thing, does it well |
46
+ | **One Level** | One level of abstraction per function |
47
+ | **Few Args** | Max 3 arguments, prefer 0-2 |
48
+ | **No Side Effects** | Don't mutate inputs unexpectedly |
49
+
50
+ ---
51
+
52
+ ## Code Structure
53
+
54
+ | Pattern | Apply |
55
+ |---------|-------|
56
+ | **Guard Clauses** | Early returns for edge cases |
57
+ | **Flat > Nested** | Avoid deep nesting (max 2 levels) |
58
+ | **Composition** | Small functions composed together |
59
+ | **Colocation** | Keep related code close |
60
+
61
+ ---
62
+
63
+ ## AI Coding Style
64
+
65
+ | Situation | Action |
66
+ |-----------|--------|
67
+ | User asks for feature | Write it directly |
68
+ | User reports bug | Fix it, don't explain |
69
+ | No clear requirement | Ask, don't assume |
70
+
71
+ ---
72
+
73
+ ## Anti-Patterns (DON'T)
74
+
75
+ | ❌ Pattern | ✅ Fix |
76
+ |-----------|-------|
77
+ | Comment every line | Delete obvious comments |
78
+ | Helper for one-liner | Inline the code |
79
+ | Factory for 2 objects | Direct instantiation |
80
+ | utils.ts with 1 function | Put code where used |
81
+ | "First we import..." | Just write code |
82
+ | Deep nesting | Guard clauses |
83
+ | Magic numbers | Named constants |
84
+ | God functions | Split by responsibility |
85
+
86
+ ---
87
+
88
+ ## 🔴 Before Editing ANY File (THINK FIRST!)
89
+
90
+ **Before changing a file, ask yourself:**
91
+
92
+ | Question | Why |
93
+ |----------|-----|
94
+ | **What imports this file?** | They might break |
95
+ | **What does this file import?** | Interface changes |
96
+ | **What tests cover this?** | Tests might fail |
97
+ | **Is this a shared component?** | Multiple places affected |
98
+
99
+ **Quick Check:**
100
+ ```
101
+ File to edit: UserService.ts
102
+ └── Who imports this? → UserController.ts, AuthController.ts
103
+ └── Do they need changes too? → Check function signatures
104
+ ```
105
+
106
+ > 🔴 **Rule:** Edit the file + all dependent files in the SAME task.
107
+ > 🔴 **Never leave broken imports or missing updates.**
108
+
109
+ ---
110
+
111
+ ## Summary
112
+
113
+ | Do | Don't |
114
+ |----|-------|
115
+ | Write code directly | Write tutorials |
116
+ | Let code self-document | Add obvious comments |
117
+ | Fix bugs immediately | Explain the fix first |
118
+ | Inline small things | Create unnecessary files |
119
+ | Name things clearly | Use abbreviations |
120
+ | Keep functions small | Write 100+ line functions |
121
+
122
+ > **Remember: The user wants working code, not a programming lesson.**
123
+
124
+ ---
125
+
126
+ ## 🔴 Self-Check Before Completing (MANDATORY)
127
+
128
+ **Before saying "task complete", verify:**
129
+
130
+ | Check | Question |
131
+ |-------|----------|
132
+ | ✅ **Goal met?** | Did I do exactly what user asked? |
133
+ | ✅ **Files edited?** | Did I modify all necessary files? |
134
+ | ✅ **Code works?** | Did I test/verify the change? |
135
+ | ✅ **No errors?** | Lint and TypeScript pass? |
136
+ | ✅ **Nothing forgotten?** | Any edge cases missed? |
137
+
138
+ > 🔴 **Rule:** If ANY check fails, fix it before completing.
139
+
140
+ ---
141
+
142
+ ## Verification Scripts (MANDATORY)
143
+
144
+ > 🔴 **CRITICAL:** Each agent runs ONLY their own skill's scripts after completing work.
145
+
146
+ ### Agent → Script Mapping
147
+
148
+ | Agent | Script | Command |
149
+ |-------|--------|---------|
150
+ | **frontend-specialist** | UX Audit | `python .agent/skills/frontend-design/scripts/ux_audit.py .` |
151
+ | **frontend-specialist** | A11y Check | `python .agent/skills/frontend-design/scripts/accessibility_checker.py .` |
152
+ | **backend-specialist** | API Validator | `python .agent/skills/api-patterns/scripts/api_validator.py .` |
153
+ | **mobile-developer** | Mobile Audit | `python .agent/skills/mobile-design/scripts/mobile_audit.py .` |
154
+ | **database-architect** | Schema Validate | `python .agent/skills/database-design/scripts/schema_validator.py .` |
155
+ | **security-auditor** | Security Scan | `python .agent/skills/vulnerability-scanner/scripts/security_scan.py .` |
156
+ | **seo-specialist** | SEO Check | `python .agent/skills/seo-fundamentals/scripts/seo_checker.py .` |
157
+ | **seo-specialist** | GEO Check | `python .agent/skills/geo-fundamentals/scripts/geo_checker.py .` |
158
+ | **performance-optimizer** | Lighthouse | `python .agent/skills/performance-profiling/scripts/lighthouse_audit.py <url>` |
159
+ | **test-engineer** | Test Runner | `python .agent/skills/testing-patterns/scripts/test_runner.py .` |
160
+ | **test-engineer** | Playwright | `python .agent/skills/webapp-testing/scripts/playwright_runner.py <url>` |
161
+ | **Any agent** | Lint Check | `python .agent/skills/lint-and-validate/scripts/lint_runner.py .` |
162
+ | **Any agent** | Type Coverage | `python .agent/skills/lint-and-validate/scripts/type_coverage.py .` |
163
+ | **Any agent** | i18n Check | `python .agent/skills/i18n-localization/scripts/i18n_checker.py .` |
164
+
165
+ > ❌ **WRONG:** `test-engineer` running `ux_audit.py`
166
+ > ✅ **CORRECT:** `frontend-specialist` running `ux_audit.py`
167
+
168
+ ---
169
+
170
+ ### 🔴 Script Output Handling (READ → SUMMARIZE → ASK)
171
+
172
+ **When running a validation script, you MUST:**
173
+
174
+ 1. **Run the script** and capture ALL output
175
+ 2. **Parse the output** - identify errors, warnings, and passes
176
+ 3. **Summarize to user** in this format:
177
+
178
+ ```markdown
179
+ ## Script Results: [script_name.py]
180
+
181
+ ### ❌ Errors Found (X items)
182
+ - [File:Line] Error description 1
183
+ - [File:Line] Error description 2
184
+
185
+ ### ⚠️ Warnings (Y items)
186
+ - [File:Line] Warning description
187
+
188
+ ### ✅ Passed (Z items)
189
+ - Check 1 passed
190
+ - Check 2 passed
191
+
192
+ **Should I fix the X errors?**
193
+ ```
194
+
195
+ 4. **Wait for user confirmation** before fixing
196
+ 5. **After fixing** → Re-run script to confirm
197
+
198
+ > 🔴 **VIOLATION:** Running script and ignoring output = FAILED task.
199
+ > 🔴 **VIOLATION:** Auto-fixing without asking = Not allowed.
200
+ > 🔴 **Rule:** Always READ output → SUMMARIZE → ASK → then fix.
201
+
@@ -0,0 +1,109 @@
1
+ ---
2
+ name: code-review-checklist
3
+ description: Code review guidelines covering code quality, security, and best practices.
4
+ allowed-tools: Read, Glob, Grep
5
+ ---
6
+
7
+ # Code Review Checklist
8
+
9
+ ## Quick Review Checklist
10
+
11
+ ### Correctness
12
+ - [ ] Code does what it's supposed to do
13
+ - [ ] Edge cases handled
14
+ - [ ] Error handling in place
15
+ - [ ] No obvious bugs
16
+
17
+ ### Security
18
+ - [ ] Input validated and sanitized
19
+ - [ ] No SQL/NoSQL injection vulnerabilities
20
+ - [ ] No XSS or CSRF vulnerabilities
21
+ - [ ] No hardcoded secrets or sensitive credentials
22
+ - [ ] **AI-Specific:** Protection against Prompt Injection (if applicable)
23
+ - [ ] **AI-Specific:** Outputs are sanitized before being used in critical sinks
24
+
25
+ ### Performance
26
+ - [ ] No N+1 queries
27
+ - [ ] No unnecessary loops
28
+ - [ ] Appropriate caching
29
+ - [ ] Bundle size impact considered
30
+
31
+ ### Code Quality
32
+ - [ ] Clear naming
33
+ - [ ] DRY - no duplicate code
34
+ - [ ] SOLID principles followed
35
+ - [ ] Appropriate abstraction level
36
+
37
+ ### Testing
38
+ - [ ] Unit tests for new code
39
+ - [ ] Edge cases tested
40
+ - [ ] Tests readable and maintainable
41
+
42
+ ### Documentation
43
+ - [ ] Complex logic commented
44
+ - [ ] Public APIs documented
45
+ - [ ] README updated if needed
46
+
47
+ ## AI & LLM Review Patterns (2025)
48
+
49
+ ### Logic & Hallucinations
50
+ - [ ] **Chain of Thought:** Does the logic follow a verifiable path?
51
+ - [ ] **Edge Cases:** Did the AI account for empty states, timeouts, and partial failures?
52
+ - [ ] **External State:** Is the code making safe assumptions about file systems or networks?
53
+
54
+ ### Prompt Engineering Review
55
+ ```markdown
56
+ // ❌ Vague prompt in code
57
+ const response = await ai.generate(userInput);
58
+
59
+ // ✅ Structured & Safe prompt
60
+ const response = await ai.generate({
61
+ system: "You are a specialized parser...",
62
+ input: sanitize(userInput),
63
+ schema: ResponseSchema
64
+ });
65
+ ```
66
+
67
+ ## Anti-Patterns to Flag
68
+
69
+ ```typescript
70
+ // ❌ Magic numbers
71
+ if (status === 3) { ... }
72
+
73
+ // ✅ Named constants
74
+ if (status === Status.ACTIVE) { ... }
75
+
76
+ // ❌ Deep nesting
77
+ if (a) { if (b) { if (c) { ... } } }
78
+
79
+ // ✅ Early returns
80
+ if (!a) return;
81
+ if (!b) return;
82
+ if (!c) return;
83
+ // do work
84
+
85
+ // ❌ Long functions (100+ lines)
86
+ // ✅ Small, focused functions
87
+
88
+ // ❌ any type
89
+ const data: any = ...
90
+
91
+ // ✅ Proper types
92
+ const data: UserData = ...
93
+ ```
94
+
95
+ ## Review Comments Guide
96
+
97
+ ```
98
+ // Blocking issues use 🔴
99
+ 🔴 BLOCKING: SQL injection vulnerability here
100
+
101
+ // Important suggestions use 🟡
102
+ 🟡 SUGGESTION: Consider using useMemo for performance
103
+
104
+ // Minor nits use 🟢
105
+ 🟢 NIT: Prefer const over let for immutable variable
106
+
107
+ // Questions use ❓
108
+ ❓ QUESTION: What happens if user is null here?
109
+ ```
@@ -0,0 +1,52 @@
1
+ ---
2
+ name: database-design
3
+ description: Database design principles and decision-making. Schema design, indexing strategy, ORM selection, serverless databases.
4
+ allowed-tools: Read, Write, Edit, Glob, Grep
5
+ ---
6
+
7
+ # Database Design
8
+
9
+ > **Learn to THINK, not copy SQL patterns.**
10
+
11
+ ## 🎯 Selective Reading Rule
12
+
13
+ **Read ONLY files relevant to the request!** Check the content map, find what you need.
14
+
15
+ | File | Description | When to Read |
16
+ |------|-------------|--------------|
17
+ | `database-selection.md` | PostgreSQL vs Neon vs Turso vs SQLite | Choosing database |
18
+ | `orm-selection.md` | Drizzle vs Prisma vs Kysely | Choosing ORM |
19
+ | `schema-design.md` | Normalization, PKs, relationships | Designing schema |
20
+ | `indexing.md` | Index types, composite indexes | Performance tuning |
21
+ | `optimization.md` | N+1, EXPLAIN ANALYZE | Query optimization |
22
+ | `migrations.md` | Safe migrations, serverless DBs | Schema changes |
23
+
24
+ ---
25
+
26
+ ## ⚠️ Core Principle
27
+
28
+ - ASK user for database preferences when unclear
29
+ - Choose database/ORM based on CONTEXT
30
+ - Don't default to PostgreSQL for everything
31
+
32
+ ---
33
+
34
+ ## Decision Checklist
35
+
36
+ Before designing schema:
37
+
38
+ - [ ] Asked user about database preference?
39
+ - [ ] Chosen database for THIS context?
40
+ - [ ] Considered deployment environment?
41
+ - [ ] Planned index strategy?
42
+ - [ ] Defined relationship types?
43
+
44
+ ---
45
+
46
+ ## Anti-Patterns
47
+
48
+ ❌ Default to PostgreSQL for simple apps (SQLite may suffice)
49
+ ❌ Skip indexing
50
+ ❌ Use SELECT * in production
51
+ ❌ Store JSON when structured data is better
52
+ ❌ Ignore N+1 queries