@fro.bot/systematic 1.18.8 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/docs/ankane-readme-writer.md +67 -0
- package/agents/review/julik-frontend-races-reviewer.md +223 -0
- package/agents/review/kieran-python-reviewer.md +135 -0
- package/agents/review/schema-drift-detector.md +156 -0
- package/agents/workflow/every-style-editor.md +66 -0
- package/commands/agent-native-audit.md +4 -2
- package/commands/changelog.md +139 -0
- package/commands/create-agent-skill.md +5 -2
- package/commands/deepen-plan.md +50 -20
- package/commands/deploy-docs.md +120 -0
- package/commands/feature-video.md +352 -0
- package/commands/generate_command.md +164 -0
- package/commands/heal-skill.md +149 -0
- package/commands/lfg.md +14 -8
- package/commands/report-bug.md +151 -0
- package/commands/reproduce-bug.md +100 -0
- package/commands/resolve_parallel.md +36 -0
- package/commands/resolve_todo_parallel.md +37 -0
- package/commands/slfg.md +33 -0
- package/commands/test-browser.md +340 -0
- package/commands/test-xcode.md +333 -0
- package/commands/triage.md +311 -0
- package/commands/workflows/brainstorm.md +6 -1
- package/commands/workflows/compound.md +16 -13
- package/commands/workflows/plan.md +49 -1
- package/commands/workflows/review.md +28 -24
- package/commands/workflows/work.md +60 -25
- package/package.json +1 -1
- package/skills/andrew-kane-gem-writer/SKILL.md +185 -0
- package/skills/andrew-kane-gem-writer/references/database-adapters.md +231 -0
- package/skills/andrew-kane-gem-writer/references/module-organization.md +121 -0
- package/skills/andrew-kane-gem-writer/references/rails-integration.md +183 -0
- package/skills/andrew-kane-gem-writer/references/resources.md +119 -0
- package/skills/andrew-kane-gem-writer/references/testing-patterns.md +261 -0
- package/skills/dhh-rails-style/SKILL.md +186 -0
- package/skills/dhh-rails-style/references/architecture.md +653 -0
- package/skills/dhh-rails-style/references/controllers.md +303 -0
- package/skills/dhh-rails-style/references/frontend.md +510 -0
- package/skills/dhh-rails-style/references/gems.md +266 -0
- package/skills/dhh-rails-style/references/models.md +359 -0
- package/skills/dhh-rails-style/references/testing.md +338 -0
- package/skills/dspy-ruby/SKILL.md +738 -0
- package/skills/dspy-ruby/assets/config-template.rb +187 -0
- package/skills/dspy-ruby/assets/module-template.rb +300 -0
- package/skills/dspy-ruby/assets/signature-template.rb +221 -0
- package/skills/dspy-ruby/references/core-concepts.md +674 -0
- package/skills/dspy-ruby/references/observability.md +366 -0
- package/skills/dspy-ruby/references/optimization.md +603 -0
- package/skills/dspy-ruby/references/providers.md +418 -0
- package/skills/dspy-ruby/references/toolsets.md +502 -0
- package/skills/every-style-editor/SKILL.md +135 -0
- package/skills/every-style-editor/references/EVERY_WRITE_STYLE.md +529 -0
- package/skills/gemini-imagegen/SKILL.md +238 -0
- package/skills/gemini-imagegen/requirements.txt +2 -0
- package/skills/gemini-imagegen/scripts/compose_images.py +157 -0
- package/skills/gemini-imagegen/scripts/edit_image.py +144 -0
- package/skills/gemini-imagegen/scripts/gemini_images.py +263 -0
- package/skills/gemini-imagegen/scripts/generate_image.py +133 -0
- package/skills/gemini-imagegen/scripts/multi_turn_chat.py +216 -0
- package/skills/rclone/SKILL.md +151 -0
- package/skills/rclone/scripts/check_setup.sh +60 -0
- package/skills/resolve-pr-parallel/SKILL.md +90 -0
- package/skills/resolve-pr-parallel/scripts/get-pr-comments +68 -0
- package/skills/resolve-pr-parallel/scripts/resolve-pr-thread +23 -0
- package/skills/setup/SKILL.md +168 -0
- package/skills/skill-creator/SKILL.md +211 -0
- package/skills/skill-creator/scripts/init_skill.py +303 -0
- package/skills/skill-creator/scripts/package_skill.py +110 -0
- package/skills/skill-creator/scripts/quick_validate.py +65 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: gemini-imagegen
|
|
3
|
+
description: This skill should be used when generating and editing images using the Gemini API (Nano Banana Pro). It applies when creating images from text prompts, editing existing images, applying style transfers, generating logos with text, creating stickers, product mockups, or any image generation/manipulation task. Supports text-to-image, image editing, multi-turn refinement, and composition from multiple reference images.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Gemini Image Generation (Nano Banana Pro)
|
|
7
|
+
|
|
8
|
+
Generate and edit images using Google's Gemini API. The environment variable `GEMINI_API_KEY` must be set.
|
|
9
|
+
|
|
10
|
+
## Default Model
|
|
11
|
+
|
|
12
|
+
| Model | Resolution | Best For |
|
|
13
|
+
|-------|------------|----------|
|
|
14
|
+
| `gemini-3-pro-image-preview` | 1K-4K | All image generation (default) |
|
|
15
|
+
|
|
16
|
+
**Note:** Always use this Pro model. Only use a different model if explicitly requested.
|
|
17
|
+
|
|
18
|
+
## Quick Reference
|
|
19
|
+
|
|
20
|
+
### Default Settings
|
|
21
|
+
- **Model:** `gemini-3-pro-image-preview`
|
|
22
|
+
- **Resolution:** 1K (default, options: 1K, 2K, 4K)
|
|
23
|
+
- **Aspect Ratio:** 1:1 (default)
|
|
24
|
+
|
|
25
|
+
### Available Aspect Ratios
|
|
26
|
+
`1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
|
|
27
|
+
|
|
28
|
+
### Available Resolutions
|
|
29
|
+
`1K` (default), `2K`, `4K`
|
|
30
|
+
|
|
31
|
+
## Core API Pattern
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import os
|
|
35
|
+
from google import genai
|
|
36
|
+
from google.genai import types
|
|
37
|
+
|
|
38
|
+
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
|
|
39
|
+
|
|
40
|
+
# Basic generation (1K, 1:1 - defaults)
|
|
41
|
+
response = client.models.generate_content(
|
|
42
|
+
model="gemini-3-pro-image-preview",
|
|
43
|
+
contents=["Your prompt here"],
|
|
44
|
+
config=types.GenerateContentConfig(
|
|
45
|
+
response_modalities=['TEXT', 'IMAGE'],
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
for part in response.parts:
|
|
50
|
+
if part.text:
|
|
51
|
+
print(part.text)
|
|
52
|
+
elif part.inline_data:
|
|
53
|
+
image = part.as_image()
|
|
54
|
+
image.save("output.png")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Custom Resolution & Aspect Ratio
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from google.genai import types
|
|
61
|
+
|
|
62
|
+
response = client.models.generate_content(
|
|
63
|
+
model="gemini-3-pro-image-preview",
|
|
64
|
+
contents=[prompt],
|
|
65
|
+
config=types.GenerateContentConfig(
|
|
66
|
+
response_modalities=['TEXT', 'IMAGE'],
|
|
67
|
+
image_config=types.ImageConfig(
|
|
68
|
+
aspect_ratio="16:9", # Wide format
|
|
69
|
+
image_size="2K" # Higher resolution
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Resolution Examples
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
# 1K (default) - Fast, good for previews
|
|
79
|
+
image_config=types.ImageConfig(image_size="1K")
|
|
80
|
+
|
|
81
|
+
# 2K - Balanced quality/speed
|
|
82
|
+
image_config=types.ImageConfig(image_size="2K")
|
|
83
|
+
|
|
84
|
+
# 4K - Maximum quality, slower
|
|
85
|
+
image_config=types.ImageConfig(image_size="4K")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Aspect Ratio Examples
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# Square (default)
|
|
92
|
+
image_config=types.ImageConfig(aspect_ratio="1:1")
|
|
93
|
+
|
|
94
|
+
# Landscape wide
|
|
95
|
+
image_config=types.ImageConfig(aspect_ratio="16:9")
|
|
96
|
+
|
|
97
|
+
# Ultra-wide panoramic
|
|
98
|
+
image_config=types.ImageConfig(aspect_ratio="21:9")
|
|
99
|
+
|
|
100
|
+
# Portrait
|
|
101
|
+
image_config=types.ImageConfig(aspect_ratio="9:16")
|
|
102
|
+
|
|
103
|
+
# Photo standard
|
|
104
|
+
image_config=types.ImageConfig(aspect_ratio="4:3")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Editing Images
|
|
108
|
+
|
|
109
|
+
Pass existing images with text prompts:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from PIL import Image
|
|
113
|
+
|
|
114
|
+
img = Image.open("input.png")
|
|
115
|
+
response = client.models.generate_content(
|
|
116
|
+
model="gemini-3-pro-image-preview",
|
|
117
|
+
contents=["Add a sunset to this scene", img],
|
|
118
|
+
config=types.GenerateContentConfig(
|
|
119
|
+
response_modalities=['TEXT', 'IMAGE'],
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Multi-Turn Refinement
|
|
125
|
+
|
|
126
|
+
Use chat for iterative editing:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from google.genai import types
|
|
130
|
+
|
|
131
|
+
chat = client.chats.create(
|
|
132
|
+
model="gemini-3-pro-image-preview",
|
|
133
|
+
config=types.GenerateContentConfig(response_modalities=['TEXT', 'IMAGE'])
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
response = chat.send_message("Create a logo for 'Acme Corp'")
|
|
137
|
+
# Save first image...
|
|
138
|
+
|
|
139
|
+
response = chat.send_message("Make the text bolder and add a blue gradient")
|
|
140
|
+
# Save refined image...
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Prompting Best Practices
|
|
144
|
+
|
|
145
|
+
### Photorealistic Scenes
|
|
146
|
+
Include camera details: lens type, lighting, angle, mood.
|
|
147
|
+
> "A photorealistic close-up portrait, 85mm lens, soft golden hour light, shallow depth of field"
|
|
148
|
+
|
|
149
|
+
### Stylized Art
|
|
150
|
+
Specify style explicitly:
|
|
151
|
+
> "A kawaii-style sticker of a happy red panda, bold outlines, cel-shading, white background"
|
|
152
|
+
|
|
153
|
+
### Text in Images
|
|
154
|
+
Be explicit about font style and placement:
|
|
155
|
+
> "Create a logo with text 'Daily Grind' in clean sans-serif, black and white, coffee bean motif"
|
|
156
|
+
|
|
157
|
+
### Product Mockups
|
|
158
|
+
Describe lighting setup and surface:
|
|
159
|
+
> "Studio-lit product photo on polished concrete, three-point softbox setup, 45-degree angle"
|
|
160
|
+
|
|
161
|
+
## Advanced Features
|
|
162
|
+
|
|
163
|
+
### Google Search Grounding
|
|
164
|
+
Generate images based on real-time data:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
response = client.models.generate_content(
|
|
168
|
+
model="gemini-3-pro-image-preview",
|
|
169
|
+
contents=["Visualize today's weather in Tokyo as an infographic"],
|
|
170
|
+
config=types.GenerateContentConfig(
|
|
171
|
+
response_modalities=['TEXT', 'IMAGE'],
|
|
172
|
+
tools=[{"google_search": {}}]
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Multiple Reference Images (Up to 14)
|
|
178
|
+
Combine elements from multiple sources:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
response = client.models.generate_content(
|
|
182
|
+
model="gemini-3-pro-image-preview",
|
|
183
|
+
contents=[
|
|
184
|
+
"Create a group photo of these people in an office",
|
|
185
|
+
Image.open("person1.png"),
|
|
186
|
+
Image.open("person2.png"),
|
|
187
|
+
Image.open("person3.png"),
|
|
188
|
+
],
|
|
189
|
+
config=types.GenerateContentConfig(
|
|
190
|
+
response_modalities=['TEXT', 'IMAGE'],
|
|
191
|
+
),
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Important: File Format & Media Type
|
|
196
|
+
|
|
197
|
+
**CRITICAL:** The Gemini API returns images in JPEG format by default. When saving, always use `.jpg` extension to avoid media type mismatches.
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
# CORRECT - Use .jpg extension (Gemini returns JPEG)
|
|
201
|
+
image.save("output.jpg")
|
|
202
|
+
|
|
203
|
+
# WRONG - Will cause "Image does not match media type" errors
|
|
204
|
+
image.save("output.png") # Creates JPEG with PNG extension!
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Converting to PNG (if needed)
|
|
208
|
+
|
|
209
|
+
If you specifically need PNG format:
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
from PIL import Image
|
|
213
|
+
|
|
214
|
+
# Generate with Gemini
|
|
215
|
+
for part in response.parts:
|
|
216
|
+
if part.inline_data:
|
|
217
|
+
img = part.as_image()
|
|
218
|
+
# Convert to PNG by saving with explicit format
|
|
219
|
+
img.save("output.png", format="PNG")
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Verifying Image Format
|
|
223
|
+
|
|
224
|
+
Check actual format vs extension with the `file` command:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
file image.png
|
|
228
|
+
# If output shows "JPEG image data" - rename to .jpg!
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## Notes
|
|
232
|
+
|
|
233
|
+
- All generated images include SynthID watermarks
|
|
234
|
+
- Gemini returns **JPEG format by default** - always use `.jpg` extension
|
|
235
|
+
- Image-only mode (`responseModalities: ["IMAGE"]`) won't work with Google Search grounding
|
|
236
|
+
- For editing, describe changes conversationally—the model understands semantic masking
|
|
237
|
+
- Default to 1K resolution for speed; use 2K/4K when quality is critical
|
|
238
|
+
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Compose multiple images into a new image using Gemini API.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python compose_images.py "instruction" output.png image1.png [image2.png ...]
|
|
7
|
+
|
|
8
|
+
Examples:
|
|
9
|
+
python compose_images.py "Create a group photo of these people" group.png person1.png person2.png
|
|
10
|
+
python compose_images.py "Put the cat from the first image on the couch from the second" result.png cat.png couch.png
|
|
11
|
+
python compose_images.py "Apply the art style from the first image to the scene in the second" styled.png style.png photo.png
|
|
12
|
+
|
|
13
|
+
Note: Supports up to 14 reference images (Gemini 3 Pro only).
|
|
14
|
+
|
|
15
|
+
Environment:
|
|
16
|
+
GEMINI_API_KEY - Required API key
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
from PIL import Image
|
|
24
|
+
from google import genai
|
|
25
|
+
from google.genai import types
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def compose_images(
|
|
29
|
+
instruction: str,
|
|
30
|
+
output_path: str,
|
|
31
|
+
image_paths: list[str],
|
|
32
|
+
model: str = "gemini-3-pro-image-preview",
|
|
33
|
+
aspect_ratio: str | None = None,
|
|
34
|
+
image_size: str | None = None,
|
|
35
|
+
) -> str | None:
|
|
36
|
+
"""Compose multiple images based on instructions.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
instruction: Text description of how to combine images
|
|
40
|
+
output_path: Path to save the result
|
|
41
|
+
image_paths: List of input image paths (up to 14)
|
|
42
|
+
model: Gemini model to use (pro recommended)
|
|
43
|
+
aspect_ratio: Output aspect ratio
|
|
44
|
+
image_size: Output resolution
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Any text response from the model, or None
|
|
48
|
+
"""
|
|
49
|
+
api_key = os.environ.get("GEMINI_API_KEY")
|
|
50
|
+
if not api_key:
|
|
51
|
+
raise EnvironmentError("GEMINI_API_KEY environment variable not set")
|
|
52
|
+
|
|
53
|
+
if len(image_paths) > 14:
|
|
54
|
+
raise ValueError("Maximum 14 reference images supported")
|
|
55
|
+
|
|
56
|
+
if len(image_paths) < 1:
|
|
57
|
+
raise ValueError("At least one image is required")
|
|
58
|
+
|
|
59
|
+
# Verify all images exist
|
|
60
|
+
for path in image_paths:
|
|
61
|
+
if not os.path.exists(path):
|
|
62
|
+
raise FileNotFoundError(f"Image not found: {path}")
|
|
63
|
+
|
|
64
|
+
client = genai.Client(api_key=api_key)
|
|
65
|
+
|
|
66
|
+
# Load images
|
|
67
|
+
images = [Image.open(path) for path in image_paths]
|
|
68
|
+
|
|
69
|
+
# Build contents: instruction first, then images
|
|
70
|
+
contents = [instruction] + images
|
|
71
|
+
|
|
72
|
+
# Build config
|
|
73
|
+
config_kwargs = {"response_modalities": ["TEXT", "IMAGE"]}
|
|
74
|
+
|
|
75
|
+
image_config_kwargs = {}
|
|
76
|
+
if aspect_ratio:
|
|
77
|
+
image_config_kwargs["aspect_ratio"] = aspect_ratio
|
|
78
|
+
if image_size:
|
|
79
|
+
image_config_kwargs["image_size"] = image_size
|
|
80
|
+
|
|
81
|
+
if image_config_kwargs:
|
|
82
|
+
config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs)
|
|
83
|
+
|
|
84
|
+
config = types.GenerateContentConfig(**config_kwargs)
|
|
85
|
+
|
|
86
|
+
response = client.models.generate_content(
|
|
87
|
+
model=model,
|
|
88
|
+
contents=contents,
|
|
89
|
+
config=config,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
text_response = None
|
|
93
|
+
image_saved = False
|
|
94
|
+
|
|
95
|
+
for part in response.parts:
|
|
96
|
+
if part.text is not None:
|
|
97
|
+
text_response = part.text
|
|
98
|
+
elif part.inline_data is not None:
|
|
99
|
+
image = part.as_image()
|
|
100
|
+
image.save(output_path)
|
|
101
|
+
image_saved = True
|
|
102
|
+
|
|
103
|
+
if not image_saved:
|
|
104
|
+
raise RuntimeError("No image was generated.")
|
|
105
|
+
|
|
106
|
+
return text_response
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def main():
|
|
110
|
+
parser = argparse.ArgumentParser(
|
|
111
|
+
description="Compose multiple images using Gemini API",
|
|
112
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
113
|
+
epilog=__doc__
|
|
114
|
+
)
|
|
115
|
+
parser.add_argument("instruction", help="Composition instruction")
|
|
116
|
+
parser.add_argument("output", help="Output file path")
|
|
117
|
+
parser.add_argument("images", nargs="+", help="Input images (up to 14)")
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--model", "-m",
|
|
120
|
+
default="gemini-3-pro-image-preview",
|
|
121
|
+
choices=["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
|
|
122
|
+
help="Model to use (pro recommended for composition)"
|
|
123
|
+
)
|
|
124
|
+
parser.add_argument(
|
|
125
|
+
"--aspect", "-a",
|
|
126
|
+
choices=["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
|
|
127
|
+
help="Output aspect ratio"
|
|
128
|
+
)
|
|
129
|
+
parser.add_argument(
|
|
130
|
+
"--size", "-s",
|
|
131
|
+
choices=["1K", "2K", "4K"],
|
|
132
|
+
help="Output resolution"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
args = parser.parse_args()
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
text = compose_images(
|
|
139
|
+
instruction=args.instruction,
|
|
140
|
+
output_path=args.output,
|
|
141
|
+
image_paths=args.images,
|
|
142
|
+
model=args.model,
|
|
143
|
+
aspect_ratio=args.aspect,
|
|
144
|
+
image_size=args.size,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
print(f"Composed image saved to: {args.output}")
|
|
148
|
+
if text:
|
|
149
|
+
print(f"Model response: {text}")
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
if __name__ == "__main__":
|
|
157
|
+
main()
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Edit existing images using Gemini API.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python edit_image.py input.png "edit instruction" output.png [options]
|
|
7
|
+
|
|
8
|
+
Examples:
|
|
9
|
+
python edit_image.py photo.png "Add a rainbow in the sky" edited.png
|
|
10
|
+
python edit_image.py room.jpg "Change the sofa to red leather" room_edited.jpg
|
|
11
|
+
python edit_image.py portrait.png "Make it look like a Van Gogh painting" artistic.png --model gemini-3-pro-image-preview
|
|
12
|
+
|
|
13
|
+
Environment:
|
|
14
|
+
GEMINI_API_KEY - Required API key
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
from PIL import Image
|
|
22
|
+
from google import genai
|
|
23
|
+
from google.genai import types
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def edit_image(
|
|
27
|
+
input_path: str,
|
|
28
|
+
instruction: str,
|
|
29
|
+
output_path: str,
|
|
30
|
+
model: str = "gemini-2.5-flash-image",
|
|
31
|
+
aspect_ratio: str | None = None,
|
|
32
|
+
image_size: str | None = None,
|
|
33
|
+
) -> str | None:
|
|
34
|
+
"""Edit an existing image based on text instructions.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
input_path: Path to the input image
|
|
38
|
+
instruction: Text description of edits to make
|
|
39
|
+
output_path: Path to save the edited image
|
|
40
|
+
model: Gemini model to use
|
|
41
|
+
aspect_ratio: Output aspect ratio
|
|
42
|
+
image_size: Output resolution
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Any text response from the model, or None
|
|
46
|
+
"""
|
|
47
|
+
api_key = os.environ.get("GEMINI_API_KEY")
|
|
48
|
+
if not api_key:
|
|
49
|
+
raise EnvironmentError("GEMINI_API_KEY environment variable not set")
|
|
50
|
+
|
|
51
|
+
if not os.path.exists(input_path):
|
|
52
|
+
raise FileNotFoundError(f"Input image not found: {input_path}")
|
|
53
|
+
|
|
54
|
+
client = genai.Client(api_key=api_key)
|
|
55
|
+
|
|
56
|
+
# Load input image
|
|
57
|
+
input_image = Image.open(input_path)
|
|
58
|
+
|
|
59
|
+
# Build config
|
|
60
|
+
config_kwargs = {"response_modalities": ["TEXT", "IMAGE"]}
|
|
61
|
+
|
|
62
|
+
image_config_kwargs = {}
|
|
63
|
+
if aspect_ratio:
|
|
64
|
+
image_config_kwargs["aspect_ratio"] = aspect_ratio
|
|
65
|
+
if image_size:
|
|
66
|
+
image_config_kwargs["image_size"] = image_size
|
|
67
|
+
|
|
68
|
+
if image_config_kwargs:
|
|
69
|
+
config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs)
|
|
70
|
+
|
|
71
|
+
config = types.GenerateContentConfig(**config_kwargs)
|
|
72
|
+
|
|
73
|
+
response = client.models.generate_content(
|
|
74
|
+
model=model,
|
|
75
|
+
contents=[instruction, input_image],
|
|
76
|
+
config=config,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
text_response = None
|
|
80
|
+
image_saved = False
|
|
81
|
+
|
|
82
|
+
for part in response.parts:
|
|
83
|
+
if part.text is not None:
|
|
84
|
+
text_response = part.text
|
|
85
|
+
elif part.inline_data is not None:
|
|
86
|
+
image = part.as_image()
|
|
87
|
+
image.save(output_path)
|
|
88
|
+
image_saved = True
|
|
89
|
+
|
|
90
|
+
if not image_saved:
|
|
91
|
+
raise RuntimeError("No image was generated. Check your instruction and try again.")
|
|
92
|
+
|
|
93
|
+
return text_response
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def main():
|
|
97
|
+
parser = argparse.ArgumentParser(
|
|
98
|
+
description="Edit images using Gemini API",
|
|
99
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
100
|
+
epilog=__doc__
|
|
101
|
+
)
|
|
102
|
+
parser.add_argument("input", help="Input image path")
|
|
103
|
+
parser.add_argument("instruction", help="Edit instruction")
|
|
104
|
+
parser.add_argument("output", help="Output file path")
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--model", "-m",
|
|
107
|
+
default="gemini-2.5-flash-image",
|
|
108
|
+
choices=["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
|
|
109
|
+
help="Model to use (default: gemini-2.5-flash-image)"
|
|
110
|
+
)
|
|
111
|
+
parser.add_argument(
|
|
112
|
+
"--aspect", "-a",
|
|
113
|
+
choices=["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
|
|
114
|
+
help="Output aspect ratio"
|
|
115
|
+
)
|
|
116
|
+
parser.add_argument(
|
|
117
|
+
"--size", "-s",
|
|
118
|
+
choices=["1K", "2K", "4K"],
|
|
119
|
+
help="Output resolution"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
args = parser.parse_args()
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
text = edit_image(
|
|
126
|
+
input_path=args.input,
|
|
127
|
+
instruction=args.instruction,
|
|
128
|
+
output_path=args.output,
|
|
129
|
+
model=args.model,
|
|
130
|
+
aspect_ratio=args.aspect,
|
|
131
|
+
image_size=args.size,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
print(f"Edited image saved to: {args.output}")
|
|
135
|
+
if text:
|
|
136
|
+
print(f"Model response: {text}")
|
|
137
|
+
|
|
138
|
+
except Exception as e:
|
|
139
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
140
|
+
sys.exit(1)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
if __name__ == "__main__":
|
|
144
|
+
main()
|