ag-cortex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/.agent/commands/test-browser.md +339 -0
  2. package/.agent/rules/00-constitution.md +46 -0
  3. package/.agent/rules/project-rules.md +49 -0
  4. package/.agent/skills/agent-browser/SKILL.md +223 -0
  5. package/.agent/skills/agent-native-architecture/SKILL.md +435 -0
  6. package/.agent/skills/agent-native-architecture/references/action-parity-discipline.md +409 -0
  7. package/.agent/skills/agent-native-architecture/references/agent-execution-patterns.md +467 -0
  8. package/.agent/skills/agent-native-architecture/references/agent-native-testing.md +582 -0
  9. package/.agent/skills/agent-native-architecture/references/architecture-patterns.md +478 -0
  10. package/.agent/skills/agent-native-architecture/references/dynamic-context-injection.md +338 -0
  11. package/.agent/skills/agent-native-architecture/references/files-universal-interface.md +301 -0
  12. package/.agent/skills/agent-native-architecture/references/from-primitives-to-domain-tools.md +359 -0
  13. package/.agent/skills/agent-native-architecture/references/mcp-tool-design.md +506 -0
  14. package/.agent/skills/agent-native-architecture/references/mobile-patterns.md +871 -0
  15. package/.agent/skills/agent-native-architecture/references/product-implications.md +443 -0
  16. package/.agent/skills/agent-native-architecture/references/refactoring-to-prompt-native.md +317 -0
  17. package/.agent/skills/agent-native-architecture/references/self-modification.md +269 -0
  18. package/.agent/skills/agent-native-architecture/references/shared-workspace-architecture.md +680 -0
  19. package/.agent/skills/agent-native-architecture/references/system-prompt-design.md +250 -0
  20. package/.agent/skills/agent-native-reviewer/SKILL.md +246 -0
  21. package/.agent/skills/andrew-kane-gem-writer/SKILL.md +184 -0
  22. package/.agent/skills/andrew-kane-gem-writer/references/database-adapters.md +231 -0
  23. package/.agent/skills/andrew-kane-gem-writer/references/module-organization.md +121 -0
  24. package/.agent/skills/andrew-kane-gem-writer/references/rails-integration.md +183 -0
  25. package/.agent/skills/andrew-kane-gem-writer/references/resources.md +119 -0
  26. package/.agent/skills/andrew-kane-gem-writer/references/testing-patterns.md +261 -0
  27. package/.agent/skills/ankane-readme-writer/SKILL.md +50 -0
  28. package/.agent/skills/architecture-strategist/SKILL.md +52 -0
  29. package/.agent/skills/best-practices-researcher/SKILL.md +100 -0
  30. package/.agent/skills/bug-reproduction-validator/SKILL.md +67 -0
  31. package/.agent/skills/code-simplicity-reviewer/SKILL.md +85 -0
  32. package/.agent/skills/coding-tutor/.claude-plugin/plugin.json +9 -0
  33. package/.agent/skills/coding-tutor/README.md +37 -0
  34. package/.agent/skills/coding-tutor/commands/quiz-me.md +1 -0
  35. package/.agent/skills/coding-tutor/commands/sync-tutorials.md +25 -0
  36. package/.agent/skills/coding-tutor/commands/teach-me.md +1 -0
  37. package/.agent/skills/coding-tutor/skills/coding-tutor/SKILL.md +214 -0
  38. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/create_tutorial.py +202 -0
  39. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/index_tutorials.py +203 -0
  40. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/quiz_priority.py +190 -0
  41. package/.agent/skills/coding-tutor/skills/coding-tutor/scripts/setup_tutorials.py +132 -0
  42. package/.agent/skills/compound-docs/SKILL.md +510 -0
  43. package/.agent/skills/compound-docs/assets/critical-pattern-template.md +34 -0
  44. package/.agent/skills/compound-docs/assets/resolution-template.md +93 -0
  45. package/.agent/skills/compound-docs/references/yaml-schema.md +65 -0
  46. package/.agent/skills/compound-docs/schema.yaml +176 -0
  47. package/.agent/skills/create-agent-skills/SKILL.md +299 -0
  48. package/.agent/skills/create-agent-skills/references/api-security.md +226 -0
  49. package/.agent/skills/create-agent-skills/references/be-clear-and-direct.md +531 -0
  50. package/.agent/skills/create-agent-skills/references/best-practices.md +404 -0
  51. package/.agent/skills/create-agent-skills/references/common-patterns.md +595 -0
  52. package/.agent/skills/create-agent-skills/references/core-principles.md +437 -0
  53. package/.agent/skills/create-agent-skills/references/executable-code.md +175 -0
  54. package/.agent/skills/create-agent-skills/references/iteration-and-testing.md +474 -0
  55. package/.agent/skills/create-agent-skills/references/official-spec.md +185 -0
  56. package/.agent/skills/create-agent-skills/references/recommended-structure.md +168 -0
  57. package/.agent/skills/create-agent-skills/references/skill-structure.md +372 -0
  58. package/.agent/skills/create-agent-skills/references/using-scripts.md +113 -0
  59. package/.agent/skills/create-agent-skills/references/using-templates.md +112 -0
  60. package/.agent/skills/create-agent-skills/references/workflows-and-validation.md +510 -0
  61. package/.agent/skills/create-agent-skills/templates/router-skill.md +73 -0
  62. package/.agent/skills/create-agent-skills/templates/simple-skill.md +33 -0
  63. package/.agent/skills/create-agent-skills/workflows/add-reference.md +96 -0
  64. package/.agent/skills/create-agent-skills/workflows/add-script.md +93 -0
  65. package/.agent/skills/create-agent-skills/workflows/add-template.md +74 -0
  66. package/.agent/skills/create-agent-skills/workflows/add-workflow.md +120 -0
  67. package/.agent/skills/create-agent-skills/workflows/audit-skill.md +138 -0
  68. package/.agent/skills/create-agent-skills/workflows/create-domain-expertise-skill.md +605 -0
  69. package/.agent/skills/create-agent-skills/workflows/create-new-skill.md +191 -0
  70. package/.agent/skills/create-agent-skills/workflows/get-guidance.md +121 -0
  71. package/.agent/skills/create-agent-skills/workflows/upgrade-to-router.md +161 -0
  72. package/.agent/skills/create-agent-skills/workflows/verify-skill.md +204 -0
  73. package/.agent/skills/data-integrity-guardian/SKILL.md +70 -0
  74. package/.agent/skills/data-migration-expert/SKILL.md +97 -0
  75. package/.agent/skills/deployment-verification-agent/SKILL.md +159 -0
  76. package/.agent/skills/design-implementation-reviewer/SKILL.md +85 -0
  77. package/.agent/skills/design-iterator/SKILL.md +197 -0
  78. package/.agent/skills/dhh-rails-reviewer/SKILL.md +45 -0
  79. package/.agent/skills/dhh-rails-style/SKILL.md +184 -0
  80. package/.agent/skills/dhh-rails-style/references/architecture.md +653 -0
  81. package/.agent/skills/dhh-rails-style/references/controllers.md +303 -0
  82. package/.agent/skills/dhh-rails-style/references/frontend.md +510 -0
  83. package/.agent/skills/dhh-rails-style/references/gems.md +266 -0
  84. package/.agent/skills/dhh-rails-style/references/models.md +359 -0
  85. package/.agent/skills/dhh-rails-style/references/testing.md +338 -0
  86. package/.agent/skills/dspy-ruby/SKILL.md +594 -0
  87. package/.agent/skills/dspy-ruby/assets/config-template.rb +359 -0
  88. package/.agent/skills/dspy-ruby/assets/module-template.rb +326 -0
  89. package/.agent/skills/dspy-ruby/assets/signature-template.rb +143 -0
  90. package/.agent/skills/dspy-ruby/references/core-concepts.md +265 -0
  91. package/.agent/skills/dspy-ruby/references/optimization.md +623 -0
  92. package/.agent/skills/dspy-ruby/references/providers.md +305 -0
  93. package/.agent/skills/every-style-editor/SKILL.md +134 -0
  94. package/.agent/skills/every-style-editor/references/EVERY_WRITE_STYLE.md +529 -0
  95. package/.agent/skills/figma-design-sync/SKILL.md +166 -0
  96. package/.agent/skills/file-todos/SKILL.md +251 -0
  97. package/.agent/skills/file-todos/assets/todo-template.md +155 -0
  98. package/.agent/skills/framework-docs-researcher/SKILL.md +83 -0
  99. package/.agent/skills/frontend-design/SKILL.md +42 -0
  100. package/.agent/skills/gemini-imagegen/SKILL.md +237 -0
  101. package/.agent/skills/gemini-imagegen/requirements.txt +2 -0
  102. package/.agent/skills/gemini-imagegen/scripts/compose_images.py +168 -0
  103. package/.agent/skills/gemini-imagegen/scripts/edit_image.py +157 -0
  104. package/.agent/skills/gemini-imagegen/scripts/gemini_images.py +265 -0
  105. package/.agent/skills/gemini-imagegen/scripts/generate_image.py +147 -0
  106. package/.agent/skills/gemini-imagegen/scripts/multi_turn_chat.py +215 -0
  107. package/.agent/skills/git-history-analyzer/SKILL.md +42 -0
  108. package/.agent/skills/git-worktree/SKILL.md +302 -0
  109. package/.agent/skills/git-worktree/scripts/worktree-manager.sh +345 -0
  110. package/.agent/skills/julik-frontend-races-reviewer/SKILL.md +222 -0
  111. package/.agent/skills/kieran-python-reviewer/SKILL.md +104 -0
  112. package/.agent/skills/kieran-rails-reviewer/SKILL.md +86 -0
  113. package/.agent/skills/kieran-typescript-reviewer/SKILL.md +95 -0
  114. package/.agent/skills/lint/SKILL.md +16 -0
  115. package/.agent/skills/pattern-recognition-specialist/SKILL.md +57 -0
  116. package/.agent/skills/performance-oracle/SKILL.md +110 -0
  117. package/.agent/skills/pr-comment-resolver/SKILL.md +69 -0
  118. package/.agent/skills/rclone/SKILL.md +150 -0
  119. package/.agent/skills/rclone/scripts/check_setup.sh +60 -0
  120. package/.agent/skills/repo-research-analyst/SKILL.md +113 -0
  121. package/.agent/skills/security-sentinel/SKILL.md +93 -0
  122. package/.agent/skills/skill-creator/SKILL.md +209 -0
  123. package/.agent/skills/skill-creator/scripts/init_skill.py +304 -0
  124. package/.agent/skills/skill-creator/scripts/package_skill.py +112 -0
  125. package/.agent/skills/skill-creator/scripts/quick_validate.py +72 -0
  126. package/.agent/skills/spec-flow-analyzer/SKILL.md +113 -0
  127. package/.agent/skills/test-agent/SKILL.md +4 -0
  128. package/.agent/workflows/agent-native-audit.md +277 -0
  129. package/.agent/workflows/ask-user-question.md +21 -0
  130. package/.agent/workflows/changelog.md +137 -0
  131. package/.agent/workflows/compound.md +202 -0
  132. package/.agent/workflows/create-agent-skill.md +8 -0
  133. package/.agent/workflows/deepen-plan-research.md +334 -0
  134. package/.agent/workflows/deepen-plan-synthesis.md +182 -0
  135. package/.agent/workflows/deepen-plan.md +79 -0
  136. package/.agent/workflows/feature-video.md +342 -0
  137. package/.agent/workflows/generate-command.md +162 -0
  138. package/.agent/workflows/heal-skill.md +142 -0
  139. package/.agent/workflows/lfg.md +20 -0
  140. package/.agent/workflows/plan-analysis.md +67 -0
  141. package/.agent/workflows/plan-next-steps.md +63 -0
  142. package/.agent/workflows/plan-review.md +33 -0
  143. package/.agent/workflows/plan-synthesis.md +106 -0
  144. package/.agent/workflows/plan.md +49 -0
  145. package/.agent/workflows/report-bug.md +150 -0
  146. package/.agent/workflows/reproduce-bug.md +99 -0
  147. package/.agent/workflows/resolve-parallel.md +34 -0
  148. package/.agent/workflows/resolve-pr-parallel.md +49 -0
  149. package/.agent/workflows/resolve-todo-parallel.md +35 -0
  150. package/.agent/workflows/review-analysis.md +145 -0
  151. package/.agent/workflows/review-synthesis.md +262 -0
  152. package/.agent/workflows/review.md +64 -0
  153. package/.agent/workflows/ship.md +90 -0
  154. package/.agent/workflows/test-command.md +3 -0
  155. package/.agent/workflows/triage.md +310 -0
  156. package/.agent/workflows/work.md +157 -0
  157. package/.agent/workflows/xcode-test.md +332 -0
  158. package/LICENSE +22 -0
  159. package/README.md +49 -0
  160. package/bin/ag-cortex.js +54 -0
  161. package/lib/core.js +165 -0
  162. package/package.json +31 -0
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: frontend-design
3
+ description: This skill should be used when creating distinctive, production-grade frontend interfaces with high design quality. It applies when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics.
4
+ license: Complete terms in LICENSE.txt
5
+ ---
6
+
7
+ This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices.
8
+
9
+ The user provides frontend requirements: a component, page, application, or interface to build. They may include context about the purpose, audience, or technical constraints.
10
+
11
+ ## Design Thinking
12
+
13
+ Before coding, understand the context and commit to a BOLD aesthetic direction:
14
+ - **Purpose**: What problem does this interface solve? Who uses it?
15
+ - **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction.
16
+ - **Constraints**: Technical requirements (framework, performance, accessibility).
17
+ - **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember?
18
+
19
+ **CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work - the key is intentionality, not intensity.
20
+
21
+ Then implement working code (HTML/CSS/JS, React, Vue, etc.) that is:
22
+ - Production-grade and functional
23
+ - Visually striking and memorable
24
+ - Cohesive with a clear aesthetic point-of-view
25
+ - Meticulously refined in every detail
26
+
27
+ ## Frontend Aesthetics Guidelines
28
+
29
+ Focus on:
30
+ - **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt instead for distinctive choices that elevate the frontend's aesthetics; unexpected, characterful font choices. Pair a distinctive display font with a refined body font.
31
+ - **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes.
32
+ - **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals (animation-delay) creates more delight than scattered micro-interactions. Use scroll-triggering and hover states that surprise.
33
+ - **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density.
34
+ - **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Add contextual effects and textures that match the overall aesthetic. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, custom cursors, and grain overlays.
35
+
36
+ NEVER use generic AI-generated aesthetics like overused font families (Inter, Roboto, Arial, system fonts), cliched color schemes (particularly purple gradients on white backgrounds), predictable layouts and component patterns, and cookie-cutter design that lacks context-specific character.
37
+
38
+ Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices (Space Grotesk, for example) across generations.
39
+
40
+ **IMPORTANT**: Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details. Elegance comes from executing the vision well.
41
+
42
+ Remember: Antigravity is capable of extraordinary creative work. Don't hold back, show what can truly be created when thinking outside the box and committing fully to a distinctive vision.
@@ -0,0 +1,237 @@
1
+ ---
2
+ name: gemini-imagegen
3
+ description: This skill should be used when generating and editing images using the Gemini API (Nano Banana Pro). It applies when creating images from text prompts, editing existing images, applying style transfers, generating logos with text, creating stickers, product mockups, or any image generation/manipulation task. Supports text-to-image, image editing, multi-turn refinement, and composition from multiple reference images.
4
+ ---
5
+
6
+ # Gemini Image Generation (Nano Banana Pro)
7
+
8
+ Generate and edit images using Google's Gemini API. The environment variable `GEMINI_API_KEY` must be set.
9
+
10
+ ## Default Model
11
+
12
+ | Model | Resolution | Best For |
13
+ |-------|------------|----------|
14
+ | `gemini-3-pro-image-preview` | 1K-4K | All image generation (default) |
15
+
16
+ **Note:** Always use this Pro model. Only use a different model if explicitly requested.
17
+
18
+ ## Quick Reference
19
+
20
+ ### Default Settings
21
+ - **Model:** `gemini-3-pro-image-preview`
22
+ - **Resolution:** 1K (default, options: 1K, 2K, 4K)
23
+ - **Aspect Ratio:** 1:1 (default)
24
+
25
+ ### Available Aspect Ratios
26
+ `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
27
+
28
+ ### Available Resolutions
29
+ `1K` (default), `2K`, `4K`
30
+
31
+ ## Core API Pattern
32
+
33
+ ```python
34
+ import os
35
+ from google import genai
36
+ from google.genai import types
37
+
38
+ client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
39
+
40
+ # Basic generation (1K, 1:1 - defaults)
41
+ response = client.models.generate_content(
42
+ model="gemini-3-pro-image-preview",
43
+ contents=["Your prompt here"],
44
+ config=types.GenerateContentConfig(
45
+ response_modalities=['TEXT', 'IMAGE'],
46
+ ),
47
+ )
48
+
49
+ for part in response.parts:
50
+ if part.text:
51
+ print(part.text)
52
+ elif part.inline_data:
53
+ image = part.as_image()
54
+ image.save("output.png")
55
+ ```
56
+
57
+ ## Custom Resolution & Aspect Ratio
58
+
59
+ ```python
60
+ from google.genai import types
61
+
62
+ response = client.models.generate_content(
63
+ model="gemini-3-pro-image-preview",
64
+ contents=[prompt],
65
+ config=types.GenerateContentConfig(
66
+ response_modalities=['TEXT', 'IMAGE'],
67
+ image_config=types.ImageConfig(
68
+ aspect_ratio="16:9", # Wide format
69
+ image_size="2K" # Higher resolution
70
+ ),
71
+ )
72
+ )
73
+ ```
74
+
75
+ ### Resolution Examples
76
+
77
+ ```python
78
+ # 1K (default) - Fast, good for previews
79
+ image_config=types.ImageConfig(image_size="1K")
80
+
81
+ # 2K - Balanced quality/speed
82
+ image_config=types.ImageConfig(image_size="2K")
83
+
84
+ # 4K - Maximum quality, slower
85
+ image_config=types.ImageConfig(image_size="4K")
86
+ ```
87
+
88
+ ### Aspect Ratio Examples
89
+
90
+ ```python
91
+ # Square (default)
92
+ image_config=types.ImageConfig(aspect_ratio="1:1")
93
+
94
+ # Landscape wide
95
+ image_config=types.ImageConfig(aspect_ratio="16:9")
96
+
97
+ # Ultra-wide panoramic
98
+ image_config=types.ImageConfig(aspect_ratio="21:9")
99
+
100
+ # Portrait
101
+ image_config=types.ImageConfig(aspect_ratio="9:16")
102
+
103
+ # Photo standard
104
+ image_config=types.ImageConfig(aspect_ratio="4:3")
105
+ ```
106
+
107
+ ## Editing Images
108
+
109
+ Pass existing images with text prompts:
110
+
111
+ ```python
112
+ from PIL import Image
113
+
114
+ img = Image.open("input.png")
115
+ response = client.models.generate_content(
116
+ model="gemini-3-pro-image-preview",
117
+ contents=["Add a sunset to this scene", img],
118
+ config=types.GenerateContentConfig(
119
+ response_modalities=['TEXT', 'IMAGE'],
120
+ ),
121
+ )
122
+ ```
123
+
124
+ ## Multi-Turn Refinement
125
+
126
+ Use chat for iterative editing:
127
+
128
+ ```python
129
+ from google.genai import types
130
+
131
+ chat = client.chats.create(
132
+ model="gemini-3-pro-image-preview",
133
+ config=types.GenerateContentConfig(response_modalities=['TEXT', 'IMAGE'])
134
+ )
135
+
136
+ response = chat.send_message("Create a logo for 'Acme Corp'")
137
+ # Save first image...
138
+
139
+ response = chat.send_message("Make the text bolder and add a blue gradient")
140
+ # Save refined image...
141
+ ```
142
+
143
+ ## Prompting Best Practices
144
+
145
+ ### Photorealistic Scenes
146
+ Include camera details: lens type, lighting, angle, mood.
147
+ > "A photorealistic close-up portrait, 85mm lens, soft golden hour light, shallow depth of field"
148
+
149
+ ### Stylized Art
150
+ Specify style explicitly:
151
+ > "A kawaii-style sticker of a happy red panda, bold outlines, cel-shading, white background"
152
+
153
+ ### Text in Images
154
+ Be explicit about font style and placement:
155
+ > "Create a logo with text 'Daily Grind' in clean sans-serif, black and white, coffee bean motif"
156
+
157
+ ### Product Mockups
158
+ Describe lighting setup and surface:
159
+ > "Studio-lit product photo on polished concrete, three-point softbox setup, 45-degree angle"
160
+
161
+ ## Advanced Features
162
+
163
+ ### Google Search Grounding
164
+ Generate images based on real-time data:
165
+
166
+ ```python
167
+ response = client.models.generate_content(
168
+ model="gemini-3-pro-image-preview",
169
+ contents=["Visualize today's weather in Tokyo as an infographic"],
170
+ config=types.GenerateContentConfig(
171
+ response_modalities=['TEXT', 'IMAGE'],
172
+ tools=[{"google_search": {}}]
173
+ )
174
+ )
175
+ ```
176
+
177
+ ### Multiple Reference Images (Up to 14)
178
+ Combine elements from multiple sources:
179
+
180
+ ```python
181
+ response = client.models.generate_content(
182
+ model="gemini-3-pro-image-preview",
183
+ contents=[
184
+ "Create a group photo of these people in an office",
185
+ Image.open("person1.png"),
186
+ Image.open("person2.png"),
187
+ Image.open("person3.png"),
188
+ ],
189
+ config=types.GenerateContentConfig(
190
+ response_modalities=['TEXT', 'IMAGE'],
191
+ ),
192
+ )
193
+ ```
194
+
195
+ ## Important: File Format & Media Type
196
+
197
+ **CRITICAL:** The Gemini API returns images in JPEG format by default. When saving, always use `.jpg` extension to avoid media type mismatches.
198
+
199
+ ```python
200
+ # CORRECT - Use .jpg extension (Gemini returns JPEG)
201
+ image.save("output.jpg")
202
+
203
+ # WRONG - Will cause "Image does not match media type" errors
204
+ image.save("output.png") # Creates JPEG with PNG extension!
205
+ ```
206
+
207
+ ### Converting to PNG (if needed)
208
+
209
+ If you specifically need PNG format:
210
+
211
+ ```python
212
+ from PIL import Image
213
+
214
+ # Generate with Gemini
215
+ for part in response.parts:
216
+ if part.inline_data:
217
+ img = part.as_image()
218
+ # Convert to PNG by saving with explicit format
219
+ img.save("output.png", format="PNG")
220
+ ```
221
+
222
+ ### Verifying Image Format
223
+
224
+ Check actual format vs extension with the `file` command:
225
+
226
+ ```bash
227
+ file image.png
228
+ # If output shows "JPEG image data" - rename to .jpg!
229
+ ```
230
+
231
+ ## Notes
232
+
233
+ - All generated images include SynthID watermarks
234
+ - Gemini returns **JPEG format by default** - always use `.jpg` extension
235
+ - Image-only mode (`responseModalities: ["IMAGE"]`) won't work with Google Search grounding
236
+ - For editing, describe changes conversationally—the model understands semantic masking
237
+ - Default to 1K resolution for speed; use 2K/4K when quality is critical
@@ -0,0 +1,2 @@
1
+ google-genai>=1.0.0
2
+ Pillow>=10.0.0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Compose multiple images into a new image using Gemini API.
4
+
5
+ Usage:
6
+ python compose_images.py "instruction" output.png image1.png [image2.png ...]
7
+
8
+ Examples:
9
+ python compose_images.py "Create a group photo of these people" group.png person1.png person2.png
10
+ python compose_images.py "Put the cat from the first image on the couch from the second" result.png cat.png couch.png
11
+ python compose_images.py "Apply the art style from the first image to the scene in the second" styled.png style.png photo.png
12
+
13
+ Note: Supports up to 14 reference images (Gemini 3 Pro only).
14
+
15
+ Environment:
16
+ GEMINI_API_KEY - Required API key
17
+ """
18
+
19
+ import argparse
20
+ import os
21
+ import sys
22
+
23
+ from PIL import Image
24
+ from google import genai
25
+ from google.genai import types
26
+
27
+
28
+ def compose_images(
29
+ instruction: str,
30
+ output_path: str,
31
+ image_paths: list[str],
32
+ model: str = "gemini-3-pro-image-preview",
33
+ aspect_ratio: str | None = None,
34
+ image_size: str | None = None,
35
+ ) -> str | None:
36
+ """Compose multiple images based on instructions.
37
+
38
+ Args:
39
+ instruction: Text description of how to combine images
40
+ output_path: Path to save the result
41
+ image_paths: List of input image paths (up to 14)
42
+ model: Gemini model to use (pro recommended)
43
+ aspect_ratio: Output aspect ratio
44
+ image_size: Output resolution
45
+
46
+ Returns:
47
+ Any text response from the model, or None
48
+ """
49
+ api_key = os.environ.get("GEMINI_API_KEY")
50
+ if not api_key:
51
+ raise EnvironmentError("GEMINI_API_KEY environment variable not set")
52
+
53
+ if len(image_paths) > 14:
54
+ raise ValueError("Maximum 14 reference images supported")
55
+
56
+ if len(image_paths) < 1:
57
+ raise ValueError("At least one image is required")
58
+
59
+ # Verify all images exist
60
+ for path in image_paths:
61
+ if not os.path.exists(path):
62
+ raise FileNotFoundError(f"Image not found: {path}")
63
+
64
+ client = genai.Client(api_key=api_key)
65
+
66
+ # Load images
67
+ images = [Image.open(path) for path in image_paths]
68
+
69
+ # Build contents: instruction first, then images
70
+ contents = [instruction] + images
71
+
72
+ # Build config
73
+ config_kwargs = {"response_modalities": ["TEXT", "IMAGE"]}
74
+
75
+ image_config_kwargs = {}
76
+ if aspect_ratio:
77
+ image_config_kwargs["aspect_ratio"] = aspect_ratio
78
+ if image_size:
79
+ image_config_kwargs["image_size"] = image_size
80
+
81
+ if image_config_kwargs:
82
+ config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs)
83
+
84
+ config = types.GenerateContentConfig(**config_kwargs)
85
+
86
+ response = client.models.generate_content(
87
+ model=model,
88
+ contents=contents,
89
+ config=config,
90
+ )
91
+
92
+ text_response = None
93
+ image_saved = False
94
+
95
+ for part in response.parts:
96
+ if part.text is not None:
97
+ text_response = part.text
98
+ elif part.inline_data is not None:
99
+ image = part.as_image()
100
+ image.save(output_path)
101
+ image_saved = True
102
+
103
+ if not image_saved:
104
+ raise RuntimeError("No image was generated.")
105
+
106
+ return text_response
107
+
108
+
109
+ def main():
110
+ parser = argparse.ArgumentParser(
111
+ description="Compose multiple images using Gemini API",
112
+ formatter_class=argparse.RawDescriptionHelpFormatter,
113
+ epilog=__doc__,
114
+ )
115
+ parser.add_argument("instruction", help="Composition instruction")
116
+ parser.add_argument("output", help="Output file path")
117
+ parser.add_argument("images", nargs="+", help="Input images (up to 14)")
118
+ parser.add_argument(
119
+ "--model",
120
+ "-m",
121
+ default="gemini-3-pro-image-preview",
122
+ choices=["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
123
+ help="Model to use (pro recommended for composition)",
124
+ )
125
+ parser.add_argument(
126
+ "--aspect",
127
+ "-a",
128
+ choices=[
129
+ "1:1",
130
+ "2:3",
131
+ "3:2",
132
+ "3:4",
133
+ "4:3",
134
+ "4:5",
135
+ "5:4",
136
+ "9:16",
137
+ "16:9",
138
+ "21:9",
139
+ ],
140
+ help="Output aspect ratio",
141
+ )
142
+ parser.add_argument(
143
+ "--size", "-s", choices=["1K", "2K", "4K"], help="Output resolution"
144
+ )
145
+
146
+ args = parser.parse_args()
147
+
148
+ try:
149
+ text = compose_images(
150
+ instruction=args.instruction,
151
+ output_path=args.output,
152
+ image_paths=args.images,
153
+ model=args.model,
154
+ aspect_ratio=args.aspect,
155
+ image_size=args.size,
156
+ )
157
+
158
+ print(f"Composed image saved to: {args.output}")
159
+ if text:
160
+ print(f"Model response: {text}")
161
+
162
+ except Exception as e:
163
+ print(f"Error: {e}", file=sys.stderr)
164
+ sys.exit(1)
165
+
166
+
167
+ if __name__ == "__main__":
168
+ main()
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Edit existing images using Gemini API.
4
+
5
+ Usage:
6
+ python edit_image.py input.png "edit instruction" output.png [options]
7
+
8
+ Examples:
9
+ python edit_image.py photo.png "Add a rainbow in the sky" edited.png
10
+ python edit_image.py room.jpg "Change the sofa to red leather" room_edited.jpg
11
+ python edit_image.py portrait.png "Make it look like a Van Gogh painting" artistic.png --model gemini-3-pro-image-preview
12
+
13
+ Environment:
14
+ GEMINI_API_KEY - Required API key
15
+ """
16
+
17
+ import argparse
18
+ import os
19
+ import sys
20
+
21
+ from PIL import Image
22
+ from google import genai
23
+ from google.genai import types
24
+
25
+
26
+ def edit_image(
27
+ input_path: str,
28
+ instruction: str,
29
+ output_path: str,
30
+ model: str = "gemini-2.5-flash-image",
31
+ aspect_ratio: str | None = None,
32
+ image_size: str | None = None,
33
+ ) -> str | None:
34
+ """Edit an existing image based on text instructions.
35
+
36
+ Args:
37
+ input_path: Path to the input image
38
+ instruction: Text description of edits to make
39
+ output_path: Path to save the edited image
40
+ model: Gemini model to use
41
+ aspect_ratio: Output aspect ratio
42
+ image_size: Output resolution
43
+
44
+ Returns:
45
+ Any text response from the model, or None
46
+ """
47
+ api_key = os.environ.get("GEMINI_API_KEY")
48
+ if not api_key:
49
+ raise EnvironmentError("GEMINI_API_KEY environment variable not set")
50
+
51
+ if not os.path.exists(input_path):
52
+ raise FileNotFoundError(f"Input image not found: {input_path}")
53
+
54
+ client = genai.Client(api_key=api_key)
55
+
56
+ # Load input image
57
+ input_image = Image.open(input_path)
58
+
59
+ # Build config
60
+ config_kwargs = {"response_modalities": ["TEXT", "IMAGE"]}
61
+
62
+ image_config_kwargs = {}
63
+ if aspect_ratio:
64
+ image_config_kwargs["aspect_ratio"] = aspect_ratio
65
+ if image_size:
66
+ image_config_kwargs["image_size"] = image_size
67
+
68
+ if image_config_kwargs:
69
+ config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs)
70
+
71
+ config = types.GenerateContentConfig(**config_kwargs)
72
+
73
+ response = client.models.generate_content(
74
+ model=model,
75
+ contents=[instruction, input_image],
76
+ config=config,
77
+ )
78
+
79
+ text_response = None
80
+ image_saved = False
81
+
82
+ for part in response.parts:
83
+ if part.text is not None:
84
+ text_response = part.text
85
+ elif part.inline_data is not None:
86
+ image = part.as_image()
87
+ image.save(output_path)
88
+ image_saved = True
89
+
90
+ if not image_saved:
91
+ raise RuntimeError(
92
+ "No image was generated. Check your instruction and try again."
93
+ )
94
+
95
+ return text_response
96
+
97
+
98
+ def main():
99
+ parser = argparse.ArgumentParser(
100
+ description="Edit images using Gemini API",
101
+ formatter_class=argparse.RawDescriptionHelpFormatter,
102
+ epilog=__doc__,
103
+ )
104
+ parser.add_argument("input", help="Input image path")
105
+ parser.add_argument("instruction", help="Edit instruction")
106
+ parser.add_argument("output", help="Output file path")
107
+ parser.add_argument(
108
+ "--model",
109
+ "-m",
110
+ default="gemini-2.5-flash-image",
111
+ choices=["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
112
+ help="Model to use (default: gemini-2.5-flash-image)",
113
+ )
114
+ parser.add_argument(
115
+ "--aspect",
116
+ "-a",
117
+ choices=[
118
+ "1:1",
119
+ "2:3",
120
+ "3:2",
121
+ "3:4",
122
+ "4:3",
123
+ "4:5",
124
+ "5:4",
125
+ "9:16",
126
+ "16:9",
127
+ "21:9",
128
+ ],
129
+ help="Output aspect ratio",
130
+ )
131
+ parser.add_argument(
132
+ "--size", "-s", choices=["1K", "2K", "4K"], help="Output resolution"
133
+ )
134
+
135
+ args = parser.parse_args()
136
+
137
+ try:
138
+ text = edit_image(
139
+ input_path=args.input,
140
+ instruction=args.instruction,
141
+ output_path=args.output,
142
+ model=args.model,
143
+ aspect_ratio=args.aspect,
144
+ image_size=args.size,
145
+ )
146
+
147
+ print(f"Edited image saved to: {args.output}")
148
+ if text:
149
+ print(f"Model response: {text}")
150
+
151
+ except Exception as e:
152
+ print(f"Error: {e}", file=sys.stderr)
153
+ sys.exit(1)
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()