@swarmclawai/swarmclaw 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -85
- package/bin/server-cmd.js +64 -1
- package/package.json +2 -2
- package/skills/coding-agent/SKILL.md +111 -0
- package/skills/github/SKILL.md +140 -0
- package/skills/nano-banana-pro/SKILL.md +62 -0
- package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
- package/skills/nano-pdf/SKILL.md +53 -0
- package/skills/openai-image-gen/SKILL.md +78 -0
- package/skills/openai-image-gen/scripts/gen.py +328 -0
- package/skills/resourceful-problem-solving/SKILL.md +49 -0
- package/skills/skill-creator/SKILL.md +147 -0
- package/skills/skill-creator/scripts/init_skill.py +378 -0
- package/skills/skill-creator/scripts/quick_validate.py +159 -0
- package/skills/summarize/SKILL.md +77 -0
- package/src/app/api/auth/route.ts +20 -5
- package/src/app/api/chats/[id]/devserver/route.ts +13 -19
- package/src/app/api/chats/[id]/messages/route.ts +13 -15
- package/src/app/api/chats/[id]/route.ts +9 -10
- package/src/app/api/chats/[id]/stop/route.ts +5 -7
- package/src/app/api/chats/messages-route.test.ts +8 -6
- package/src/app/api/chats/route.ts +9 -10
- package/src/app/api/ip/route.ts +2 -2
- package/src/app/api/preview-server/route.ts +1 -1
- package/src/app/api/projects/[id]/route.ts +7 -46
- package/src/cli/server-cmd.test.js +74 -0
- package/src/components/chat/chat-area.tsx +45 -23
- package/src/components/chat/message-bubble.test.ts +35 -0
- package/src/components/chat/message-bubble.tsx +19 -9
- package/src/components/chat/message-list.tsx +37 -3
- package/src/components/input/chat-input.tsx +34 -14
- package/src/components/openclaw/openclaw-deploy-panel.tsx +4 -0
- package/src/instrumentation.ts +1 -1
- package/src/lib/chat/assistant-render-id.ts +3 -0
- package/src/lib/chat/chat-streaming-state.test.ts +42 -3
- package/src/lib/chat/chat-streaming-state.ts +20 -8
- package/src/lib/chat/queued-message-queue.test.ts +23 -1
- package/src/lib/chat/queued-message-queue.ts +11 -2
- package/src/lib/providers/cli-utils.test.ts +124 -0
- package/src/lib/server/activity/activity-log.ts +21 -0
- package/src/lib/server/agents/agent-availability.test.ts +10 -5
- package/src/lib/server/agents/agent-cascade.ts +79 -59
- package/src/lib/server/agents/agent-registry.ts +3 -1
- package/src/lib/server/agents/agent-repository.ts +90 -0
- package/src/lib/server/agents/delegation-job-repository.ts +53 -0
- package/src/lib/server/agents/delegation-jobs.ts +11 -4
- package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
- package/src/lib/server/agents/guardian.ts +2 -2
- package/src/lib/server/agents/main-agent-loop.ts +10 -3
- package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
- package/src/lib/server/agents/subagent-runtime.ts +9 -6
- package/src/lib/server/agents/subagent-swarm.ts +3 -2
- package/src/lib/server/agents/task-session.ts +3 -4
- package/src/lib/server/approvals/approval-repository.ts +30 -0
- package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
- package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
- package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
- package/src/lib/server/chat-execution/chat-execution.ts +84 -1926
- package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
- package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
- package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
- package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
- package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
- package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
- package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
- package/src/lib/server/chat-execution/post-stream-finalization.ts +1 -1
- package/src/lib/server/chat-execution/prompt-builder.ts +11 -0
- package/src/lib/server/chat-execution/prompt-sections.ts +5 -6
- package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
- package/src/lib/server/chat-execution/stream-agent-chat.ts +16 -13
- package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
- package/src/lib/server/connectors/connector-repository.ts +58 -0
- package/src/lib/server/connectors/runtime-state.test.ts +117 -0
- package/src/lib/server/credentials/credential-repository.ts +7 -0
- package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
- package/src/lib/server/memory/memory-abstract.test.ts +59 -0
- package/src/lib/server/missions/mission-repository.ts +74 -0
- package/src/lib/server/missions/mission-service/actions.ts +6 -0
- package/src/lib/server/missions/mission-service/bindings.ts +9 -0
- package/src/lib/server/missions/mission-service/context.ts +4 -0
- package/src/lib/server/missions/mission-service/core.ts +2269 -0
- package/src/lib/server/missions/mission-service/queries.ts +12 -0
- package/src/lib/server/missions/mission-service/recovery.ts +5 -0
- package/src/lib/server/missions/mission-service/ticks.ts +9 -0
- package/src/lib/server/missions/mission-service.test.ts +9 -2
- package/src/lib/server/missions/mission-service.ts +6 -2266
- package/src/lib/server/openclaw/deploy.test.ts +42 -3
- package/src/lib/server/openclaw/deploy.ts +26 -12
- package/src/lib/server/persistence/repository-utils.ts +154 -0
- package/src/lib/server/persistence/storage-context.ts +51 -0
- package/src/lib/server/persistence/transaction.ts +1 -0
- package/src/lib/server/projects/project-repository.ts +36 -0
- package/src/lib/server/projects/project-service.ts +79 -0
- package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
- package/src/lib/server/runtime/alert-dispatch.ts +1 -1
- package/src/lib/server/runtime/daemon-policy.ts +1 -1
- package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
- package/src/lib/server/runtime/daemon-state/health.ts +6 -0
- package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
- package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
- package/src/lib/server/runtime/daemon-state.test.ts +48 -0
- package/src/lib/server/runtime/daemon-state.ts +3 -1470
- package/src/lib/server/runtime/estop-repository.ts +4 -0
- package/src/lib/server/runtime/estop.ts +3 -1
- package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
- package/src/lib/server/runtime/heartbeat-service.ts +55 -34
- package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
- package/src/lib/server/runtime/idle-window.ts +2 -2
- package/src/lib/server/runtime/network.ts +11 -0
- package/src/lib/server/runtime/orchestrator-events.ts +2 -2
- package/src/lib/server/runtime/queue/claims.ts +4 -0
- package/src/lib/server/runtime/queue/core.ts +2079 -0
- package/src/lib/server/runtime/queue/execution.ts +7 -0
- package/src/lib/server/runtime/queue/followups.ts +4 -0
- package/src/lib/server/runtime/queue/queries.ts +12 -0
- package/src/lib/server/runtime/queue/recovery.ts +7 -0
- package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
- package/src/lib/server/runtime/queue-repository.ts +17 -0
- package/src/lib/server/runtime/queue.ts +5 -2061
- package/src/lib/server/runtime/run-ledger.ts +6 -5
- package/src/lib/server/runtime/run-repository.ts +73 -0
- package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
- package/src/lib/server/runtime/runtime-settings.ts +1 -1
- package/src/lib/server/runtime/runtime-state.ts +99 -0
- package/src/lib/server/runtime/scheduler.ts +4 -2
- package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
- package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
- package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
- package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
- package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
- package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
- package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
- package/src/lib/server/runtime/session-run-manager.ts +72 -1377
- package/src/lib/server/runtime/watch-job-repository.ts +35 -0
- package/src/lib/server/runtime/watch-jobs.ts +3 -1
- package/src/lib/server/schedules/schedule-repository.ts +42 -0
- package/src/lib/server/sessions/session-repository.ts +85 -0
- package/src/lib/server/settings/settings-repository.ts +25 -0
- package/src/lib/server/skills/skill-discovery.test.ts +2 -2
- package/src/lib/server/skills/skill-discovery.ts +2 -2
- package/src/lib/server/skills/skill-repository.ts +14 -0
- package/src/lib/server/storage.ts +13 -24
- package/src/lib/server/tasks/task-repository.ts +54 -0
- package/src/lib/server/usage/usage-repository.ts +30 -0
- package/src/lib/server/webhooks/webhook-repository.ts +10 -0
- package/src/lib/strip-internal-metadata.test.ts +42 -41
- package/src/stores/use-chat-store.test.ts +54 -0
- package/src/stores/use-chat-store.ts +21 -5
- /package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = [
|
|
5
|
+
# "google-genai>=1.0.0",
|
|
6
|
+
# "pillow>=10.0.0",
|
|
7
|
+
# ]
|
|
8
|
+
# ///
|
|
9
|
+
"""
|
|
10
|
+
Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API.
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY]
|
|
14
|
+
|
|
15
|
+
Multi-image editing (up to 14 images):
|
|
16
|
+
uv run generate_image.py --prompt "combine these images" --filename "output.png" -i img1.png -i img2.png -i img3.png
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
SUPPORTED_ASPECT_RATIOS = [
|
|
25
|
+
"1:1",
|
|
26
|
+
"2:3",
|
|
27
|
+
"3:2",
|
|
28
|
+
"3:4",
|
|
29
|
+
"4:3",
|
|
30
|
+
"4:5",
|
|
31
|
+
"5:4",
|
|
32
|
+
"9:16",
|
|
33
|
+
"16:9",
|
|
34
|
+
"21:9",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_api_key(provided_key: str | None) -> str | None:
|
|
39
|
+
"""Get API key from argument first, then environment."""
|
|
40
|
+
if provided_key:
|
|
41
|
+
return provided_key
|
|
42
|
+
return os.environ.get("GEMINI_API_KEY")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def auto_detect_resolution(max_input_dim: int) -> str:
|
|
46
|
+
"""Infer output resolution from the largest input image dimension."""
|
|
47
|
+
if max_input_dim >= 3000:
|
|
48
|
+
return "4K"
|
|
49
|
+
if max_input_dim >= 1500:
|
|
50
|
+
return "2K"
|
|
51
|
+
return "1K"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def choose_output_resolution(
|
|
55
|
+
requested_resolution: str | None,
|
|
56
|
+
max_input_dim: int,
|
|
57
|
+
has_input_images: bool,
|
|
58
|
+
) -> tuple[str, bool]:
|
|
59
|
+
"""Choose final resolution and whether it was auto-detected.
|
|
60
|
+
|
|
61
|
+
Auto-detection is only applied when the user did not pass --resolution.
|
|
62
|
+
"""
|
|
63
|
+
if requested_resolution is not None:
|
|
64
|
+
return requested_resolution, False
|
|
65
|
+
|
|
66
|
+
if has_input_images and max_input_dim > 0:
|
|
67
|
+
return auto_detect_resolution(max_input_dim), True
|
|
68
|
+
|
|
69
|
+
return "1K", False
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main():
|
|
73
|
+
parser = argparse.ArgumentParser(
|
|
74
|
+
description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)"
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--prompt", "-p",
|
|
78
|
+
required=True,
|
|
79
|
+
help="Image description/prompt"
|
|
80
|
+
)
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"--filename", "-f",
|
|
83
|
+
required=True,
|
|
84
|
+
help="Output filename (e.g., sunset-mountains.png)"
|
|
85
|
+
)
|
|
86
|
+
parser.add_argument(
|
|
87
|
+
"--input-image", "-i",
|
|
88
|
+
action="append",
|
|
89
|
+
dest="input_images",
|
|
90
|
+
metavar="IMAGE",
|
|
91
|
+
help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)."
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--resolution", "-r",
|
|
95
|
+
choices=["1K", "2K", "4K"],
|
|
96
|
+
default=None,
|
|
97
|
+
help="Output resolution: 1K, 2K, or 4K. If omitted with input images, auto-detect from largest image dimension."
|
|
98
|
+
)
|
|
99
|
+
parser.add_argument(
|
|
100
|
+
"--aspect-ratio", "-a",
|
|
101
|
+
choices=SUPPORTED_ASPECT_RATIOS,
|
|
102
|
+
default=None,
|
|
103
|
+
help=f"Output aspect ratio (default: model decides). Options: {', '.join(SUPPORTED_ASPECT_RATIOS)}"
|
|
104
|
+
)
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--api-key", "-k",
|
|
107
|
+
help="Gemini API key (overrides GEMINI_API_KEY env var)"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
args = parser.parse_args()
|
|
111
|
+
|
|
112
|
+
# Get API key
|
|
113
|
+
api_key = get_api_key(args.api_key)
|
|
114
|
+
if not api_key:
|
|
115
|
+
print("Error: No API key provided.", file=sys.stderr)
|
|
116
|
+
print("Please either:", file=sys.stderr)
|
|
117
|
+
print(" 1. Provide --api-key argument", file=sys.stderr)
|
|
118
|
+
print(" 2. Set GEMINI_API_KEY environment variable", file=sys.stderr)
|
|
119
|
+
sys.exit(1)
|
|
120
|
+
|
|
121
|
+
# Import here after checking API key to avoid slow import on error
|
|
122
|
+
from google import genai
|
|
123
|
+
from google.genai import types
|
|
124
|
+
from PIL import Image as PILImage
|
|
125
|
+
|
|
126
|
+
# Initialise client
|
|
127
|
+
client = genai.Client(api_key=api_key)
|
|
128
|
+
|
|
129
|
+
# Set up output path
|
|
130
|
+
output_path = Path(args.filename)
|
|
131
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
|
|
133
|
+
# Load input images if provided (up to 14 supported by Nano Banana Pro)
|
|
134
|
+
input_images = []
|
|
135
|
+
max_input_dim = 0
|
|
136
|
+
if args.input_images:
|
|
137
|
+
if len(args.input_images) > 14:
|
|
138
|
+
print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr)
|
|
139
|
+
sys.exit(1)
|
|
140
|
+
|
|
141
|
+
for img_path in args.input_images:
|
|
142
|
+
try:
|
|
143
|
+
with PILImage.open(img_path) as img:
|
|
144
|
+
copied = img.copy()
|
|
145
|
+
width, height = copied.size
|
|
146
|
+
input_images.append(copied)
|
|
147
|
+
print(f"Loaded input image: {img_path}")
|
|
148
|
+
|
|
149
|
+
# Track largest dimension for auto-resolution
|
|
150
|
+
max_input_dim = max(max_input_dim, width, height)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"Error loading input image '{img_path}': {e}", file=sys.stderr)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
|
|
155
|
+
output_resolution, auto_detected = choose_output_resolution(
|
|
156
|
+
requested_resolution=args.resolution,
|
|
157
|
+
max_input_dim=max_input_dim,
|
|
158
|
+
has_input_images=bool(input_images),
|
|
159
|
+
)
|
|
160
|
+
if auto_detected:
|
|
161
|
+
print(
|
|
162
|
+
f"Auto-detected resolution: {output_resolution} "
|
|
163
|
+
f"(from max input dimension {max_input_dim})"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Build contents (images first if editing, prompt only if generating)
|
|
167
|
+
if input_images:
|
|
168
|
+
contents = [*input_images, args.prompt]
|
|
169
|
+
img_count = len(input_images)
|
|
170
|
+
print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...")
|
|
171
|
+
else:
|
|
172
|
+
contents = args.prompt
|
|
173
|
+
print(f"Generating image with resolution {output_resolution}...")
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
# Build image config with optional aspect ratio
|
|
177
|
+
image_cfg_kwargs = {"image_size": output_resolution}
|
|
178
|
+
if args.aspect_ratio:
|
|
179
|
+
image_cfg_kwargs["aspect_ratio"] = args.aspect_ratio
|
|
180
|
+
|
|
181
|
+
response = client.models.generate_content(
|
|
182
|
+
model="gemini-3-pro-image-preview",
|
|
183
|
+
contents=contents,
|
|
184
|
+
config=types.GenerateContentConfig(
|
|
185
|
+
response_modalities=["TEXT", "IMAGE"],
|
|
186
|
+
image_config=types.ImageConfig(**image_cfg_kwargs)
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Process response and convert to PNG
|
|
191
|
+
image_saved = False
|
|
192
|
+
for part in response.parts:
|
|
193
|
+
if part.text is not None:
|
|
194
|
+
print(f"Model response: {part.text}")
|
|
195
|
+
elif part.inline_data is not None:
|
|
196
|
+
# Convert inline data to PIL Image and save as PNG
|
|
197
|
+
from io import BytesIO
|
|
198
|
+
|
|
199
|
+
# inline_data.data is already bytes, not base64
|
|
200
|
+
image_data = part.inline_data.data
|
|
201
|
+
if isinstance(image_data, str):
|
|
202
|
+
# If it's a string, it might be base64
|
|
203
|
+
import base64
|
|
204
|
+
image_data = base64.b64decode(image_data)
|
|
205
|
+
|
|
206
|
+
image = PILImage.open(BytesIO(image_data))
|
|
207
|
+
|
|
208
|
+
# Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed)
|
|
209
|
+
if image.mode == 'RGBA':
|
|
210
|
+
rgb_image = PILImage.new('RGB', image.size, (255, 255, 255))
|
|
211
|
+
rgb_image.paste(image, mask=image.split()[3])
|
|
212
|
+
rgb_image.save(str(output_path), 'PNG')
|
|
213
|
+
elif image.mode == 'RGB':
|
|
214
|
+
image.save(str(output_path), 'PNG')
|
|
215
|
+
else:
|
|
216
|
+
image.convert('RGB').save(str(output_path), 'PNG')
|
|
217
|
+
image_saved = True
|
|
218
|
+
|
|
219
|
+
if image_saved:
|
|
220
|
+
full_path = output_path.resolve()
|
|
221
|
+
print(f"\nImage saved: {full_path}")
|
|
222
|
+
# OpenClaw parses MEDIA: tokens and will attach the file on
|
|
223
|
+
# supported chat providers. Emit the canonical MEDIA:<path> form.
|
|
224
|
+
print(f"MEDIA:{full_path}")
|
|
225
|
+
else:
|
|
226
|
+
print("Error: No image was generated in the response.", file=sys.stderr)
|
|
227
|
+
sys.exit(1)
|
|
228
|
+
|
|
229
|
+
except Exception as e:
|
|
230
|
+
print(f"Error generating image: {e}", file=sys.stderr)
|
|
231
|
+
sys.exit(1)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
main()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: nano-pdf
|
|
3
|
+
description: Edit or create PDFs with natural-language instructions using the nano-pdf CLI. Use when asked to make a PDF, edit a PDF, add pages, change text in a PDF, or convert content to PDF format.
|
|
4
|
+
metadata:
|
|
5
|
+
{
|
|
6
|
+
"openclaw":
|
|
7
|
+
{
|
|
8
|
+
"emoji": "📄",
|
|
9
|
+
"requires": { "bins": ["nano-pdf"] },
|
|
10
|
+
"install":
|
|
11
|
+
[
|
|
12
|
+
{
|
|
13
|
+
"id": "uv",
|
|
14
|
+
"kind": "uv",
|
|
15
|
+
"package": "nano-pdf",
|
|
16
|
+
"bins": ["nano-pdf"],
|
|
17
|
+
"label": "Install nano-pdf (uv)",
|
|
18
|
+
},
|
|
19
|
+
],
|
|
20
|
+
},
|
|
21
|
+
}
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
# nano-pdf
|
|
25
|
+
|
|
26
|
+
Use `nano-pdf` to apply edits to a specific page in a PDF using a natural-language instruction.
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
nano-pdf edit deck.pdf 1 "Change the title to 'Q3 Results' and fix the typo in the subtitle"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Creating a New PDF
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
nano-pdf create output.pdf "Create a one-page summary of quarterly results with a header, bullet points, and a footer"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage in SwarmClaw
|
|
41
|
+
|
|
42
|
+
When a user asks to create or edit a PDF:
|
|
43
|
+
|
|
44
|
+
1. Check if `nano-pdf` is installed: `which nano-pdf`
|
|
45
|
+
2. If not installed, install via `uv tool install nano-pdf` or `pip install nano-pdf`
|
|
46
|
+
3. Run the appropriate command
|
|
47
|
+
4. Report the output file path to the user
|
|
48
|
+
|
|
49
|
+
## Notes
|
|
50
|
+
|
|
51
|
+
- Page numbers are 0-based or 1-based depending on the tool's version; if the result looks off by one, retry with the other.
|
|
52
|
+
- Always sanity-check the output PDF before reporting success.
|
|
53
|
+
- For multi-page edits, run separate commands per page.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: openai-image-gen
|
|
3
|
+
description: Generate images via OpenAI Images API (GPT Image, DALL-E 3, DALL-E 2). Supports batch generation with random prompt sampler and HTML gallery output. Use when asked to generate images with OpenAI and an OPENAI_API_KEY is available.
|
|
4
|
+
metadata:
|
|
5
|
+
{
|
|
6
|
+
"openclaw":
|
|
7
|
+
{
|
|
8
|
+
"emoji": "🎨",
|
|
9
|
+
"requires": { "bins": ["python3"], "env": ["OPENAI_API_KEY"] },
|
|
10
|
+
"primaryEnv": "OPENAI_API_KEY",
|
|
11
|
+
"install":
|
|
12
|
+
[
|
|
13
|
+
{
|
|
14
|
+
"id": "python-brew",
|
|
15
|
+
"kind": "brew",
|
|
16
|
+
"formula": "python",
|
|
17
|
+
"bins": ["python3"],
|
|
18
|
+
"label": "Install Python (brew)",
|
|
19
|
+
},
|
|
20
|
+
],
|
|
21
|
+
},
|
|
22
|
+
}
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
# OpenAI Image Gen
|
|
26
|
+
|
|
27
|
+
Generate images via the OpenAI Images API with an HTML gallery viewer.
|
|
28
|
+
|
|
29
|
+
## Run
|
|
30
|
+
|
|
31
|
+
Note: Image generation can take longer than typical timeouts. Set a higher timeout when running via shell (e.g., 300 seconds).
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python3 {baseDir}/scripts/gen.py
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Useful Flags
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# GPT image models with various options
|
|
41
|
+
python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
|
|
42
|
+
python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
|
|
43
|
+
python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
|
|
44
|
+
python3 {baseDir}/scripts/gen.py --model gpt-image-1.5 --background transparent --output-format webp
|
|
45
|
+
|
|
46
|
+
# DALL-E 3 (note: count is automatically limited to 1)
|
|
47
|
+
python3 {baseDir}/scripts/gen.py --model dall-e-3 --quality hd --size 1792x1024 --style vivid
|
|
48
|
+
python3 {baseDir}/scripts/gen.py --model dall-e-3 --style natural --prompt "serene mountain landscape"
|
|
49
|
+
|
|
50
|
+
# DALL-E 2
|
|
51
|
+
python3 {baseDir}/scripts/gen.py --model dall-e-2 --size 512x512 --count 4
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Model-Specific Parameters
|
|
55
|
+
|
|
56
|
+
### Size
|
|
57
|
+
|
|
58
|
+
- **GPT image models** (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`): `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto`. Default: `1024x1024`
|
|
59
|
+
- **dall-e-3**: `1024x1024`, `1792x1024`, or `1024x1792`. Default: `1024x1024`
|
|
60
|
+
- **dall-e-2**: `256x256`, `512x512`, or `1024x1024`. Default: `1024x1024`
|
|
61
|
+
|
|
62
|
+
### Quality
|
|
63
|
+
|
|
64
|
+
- **GPT image models**: `auto`, `high`, `medium`, or `low`. Default: `high`
|
|
65
|
+
- **dall-e-3**: `hd` or `standard`. Default: `standard`
|
|
66
|
+
- **dall-e-2**: `standard` only
|
|
67
|
+
|
|
68
|
+
### Other Parameters
|
|
69
|
+
|
|
70
|
+
- **GPT image models** support `--background` (`transparent`, `opaque`, `auto`) and `--output-format` (`png`, `jpeg`, `webp`)
|
|
71
|
+
- **dall-e-3** supports `--style` (`vivid` for hyper-real, `natural` for more natural looking)
|
|
72
|
+
- **dall-e-3** only supports `n=1`; the script automatically limits count to 1
|
|
73
|
+
|
|
74
|
+
## Output
|
|
75
|
+
|
|
76
|
+
- Image files (`*.png`, `*.jpeg`, or `*.webp` depending on model and format)
|
|
77
|
+
- `prompts.json` (prompt-to-file mapping)
|
|
78
|
+
- `index.html` (thumbnail gallery — open in browser to review)
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import base64
|
|
4
|
+
import datetime as dt
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import random
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
import urllib.error
|
|
11
|
+
import urllib.request
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from html import escape as html_escape
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def slugify(text: str) -> str:
|
|
18
|
+
text = text.lower().strip()
|
|
19
|
+
text = re.sub(r"[^a-z0-9]+", "-", text)
|
|
20
|
+
text = re.sub(r"-{2,}", "-", text).strip("-")
|
|
21
|
+
return text or "image"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def default_out_dir() -> Path:
|
|
25
|
+
now = dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
|
26
|
+
preferred = Path.home() / "Projects" / "tmp"
|
|
27
|
+
base = preferred if preferred.is_dir() else Path("./tmp")
|
|
28
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
return base / f"openai-image-gen-{now}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def pick_prompts(count: int) -> list[str]:
|
|
33
|
+
subjects = [
|
|
34
|
+
"a lobster astronaut",
|
|
35
|
+
"a brutalist lighthouse",
|
|
36
|
+
"a cozy reading nook",
|
|
37
|
+
"a cyberpunk noodle shop",
|
|
38
|
+
"a Vienna street at dusk",
|
|
39
|
+
"a minimalist product photo",
|
|
40
|
+
"a surreal underwater library",
|
|
41
|
+
]
|
|
42
|
+
styles = [
|
|
43
|
+
"ultra-detailed studio photo",
|
|
44
|
+
"35mm film still",
|
|
45
|
+
"isometric illustration",
|
|
46
|
+
"editorial photography",
|
|
47
|
+
"soft watercolor",
|
|
48
|
+
"architectural render",
|
|
49
|
+
"high-contrast monochrome",
|
|
50
|
+
]
|
|
51
|
+
lighting = [
|
|
52
|
+
"golden hour",
|
|
53
|
+
"overcast soft light",
|
|
54
|
+
"neon lighting",
|
|
55
|
+
"dramatic rim light",
|
|
56
|
+
"candlelight",
|
|
57
|
+
"foggy atmosphere",
|
|
58
|
+
]
|
|
59
|
+
prompts: list[str] = []
|
|
60
|
+
for _ in range(count):
|
|
61
|
+
prompts.append(
|
|
62
|
+
f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}"
|
|
63
|
+
)
|
|
64
|
+
return prompts
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_model_defaults(model: str) -> tuple[str, str]:
|
|
68
|
+
"""Return (default_size, default_quality) for the given model."""
|
|
69
|
+
if model == "dall-e-2":
|
|
70
|
+
# quality will be ignored
|
|
71
|
+
return ("1024x1024", "standard")
|
|
72
|
+
elif model == "dall-e-3":
|
|
73
|
+
return ("1024x1024", "standard")
|
|
74
|
+
else:
|
|
75
|
+
# GPT image or future models
|
|
76
|
+
return ("1024x1024", "high")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def normalize_optional_flag(
|
|
80
|
+
*,
|
|
81
|
+
model: str,
|
|
82
|
+
raw_value: str,
|
|
83
|
+
flag_name: str,
|
|
84
|
+
supported: Callable[[str], bool],
|
|
85
|
+
allowed: set[str],
|
|
86
|
+
allowed_text: str,
|
|
87
|
+
unsupported_message: str,
|
|
88
|
+
aliases: dict[str, str] | None = None,
|
|
89
|
+
) -> str:
|
|
90
|
+
"""Normalize a string flag, warn when unsupported, and reject invalid values."""
|
|
91
|
+
value = raw_value.strip().lower()
|
|
92
|
+
if not value:
|
|
93
|
+
return ""
|
|
94
|
+
|
|
95
|
+
if not supported(model):
|
|
96
|
+
print(unsupported_message.format(model=model), file=sys.stderr)
|
|
97
|
+
return ""
|
|
98
|
+
|
|
99
|
+
if aliases:
|
|
100
|
+
value = aliases.get(value, value)
|
|
101
|
+
|
|
102
|
+
if value not in allowed:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
f"Invalid --{flag_name} '{raw_value}'. Allowed values: {allowed_text}."
|
|
105
|
+
)
|
|
106
|
+
return value
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def normalize_background(model: str, background: str) -> str:
|
|
110
|
+
"""Validate --background for GPT image models."""
|
|
111
|
+
return normalize_optional_flag(
|
|
112
|
+
model=model,
|
|
113
|
+
raw_value=background,
|
|
114
|
+
flag_name="background",
|
|
115
|
+
supported=lambda candidate: candidate.startswith("gpt-image"),
|
|
116
|
+
allowed={"transparent", "opaque", "auto"},
|
|
117
|
+
allowed_text="transparent, opaque, auto",
|
|
118
|
+
unsupported_message=(
|
|
119
|
+
"Warning: --background is only supported for gpt-image models; "
|
|
120
|
+
"ignoring for '{model}'."
|
|
121
|
+
),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def normalize_style(model: str, style: str) -> str:
|
|
126
|
+
"""Validate --style for dall-e-3."""
|
|
127
|
+
return normalize_optional_flag(
|
|
128
|
+
model=model,
|
|
129
|
+
raw_value=style,
|
|
130
|
+
flag_name="style",
|
|
131
|
+
supported=lambda candidate: candidate == "dall-e-3",
|
|
132
|
+
allowed={"vivid", "natural"},
|
|
133
|
+
allowed_text="vivid, natural",
|
|
134
|
+
unsupported_message=(
|
|
135
|
+
"Warning: --style is only supported for dall-e-3; ignoring for '{model}'."
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def normalize_output_format(model: str, output_format: str) -> str:
|
|
141
|
+
"""Normalize output format for GPT image models and validate allowed values."""
|
|
142
|
+
return normalize_optional_flag(
|
|
143
|
+
model=model,
|
|
144
|
+
raw_value=output_format,
|
|
145
|
+
flag_name="output-format",
|
|
146
|
+
supported=lambda candidate: candidate.startswith("gpt-image"),
|
|
147
|
+
allowed={"png", "jpeg", "webp"},
|
|
148
|
+
allowed_text="png, jpeg, webp",
|
|
149
|
+
unsupported_message=(
|
|
150
|
+
"Warning: --output-format is only supported for gpt-image models; "
|
|
151
|
+
"ignoring for '{model}'."
|
|
152
|
+
),
|
|
153
|
+
aliases={"jpg": "jpeg"},
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def request_images(
|
|
158
|
+
api_key: str,
|
|
159
|
+
prompt: str,
|
|
160
|
+
model: str,
|
|
161
|
+
size: str,
|
|
162
|
+
quality: str,
|
|
163
|
+
background: str = "",
|
|
164
|
+
output_format: str = "",
|
|
165
|
+
style: str = "",
|
|
166
|
+
) -> dict:
|
|
167
|
+
url = "https://api.openai.com/v1/images/generations"
|
|
168
|
+
args = {
|
|
169
|
+
"model": model,
|
|
170
|
+
"prompt": prompt,
|
|
171
|
+
"size": size,
|
|
172
|
+
"n": 1,
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Quality parameter - dall-e-2 doesn't accept this parameter
|
|
176
|
+
if model != "dall-e-2":
|
|
177
|
+
args["quality"] = quality
|
|
178
|
+
|
|
179
|
+
# Note: response_format no longer supported by OpenAI Images API
|
|
180
|
+
# dall-e models now return URLs by default
|
|
181
|
+
|
|
182
|
+
if model.startswith("gpt-image"):
|
|
183
|
+
if background:
|
|
184
|
+
args["background"] = background
|
|
185
|
+
if output_format:
|
|
186
|
+
args["output_format"] = output_format
|
|
187
|
+
|
|
188
|
+
if model == "dall-e-3" and style:
|
|
189
|
+
args["style"] = style
|
|
190
|
+
|
|
191
|
+
body = json.dumps(args).encode("utf-8")
|
|
192
|
+
req = urllib.request.Request(
|
|
193
|
+
url,
|
|
194
|
+
method="POST",
|
|
195
|
+
headers={
|
|
196
|
+
"Authorization": f"Bearer {api_key}",
|
|
197
|
+
"Content-Type": "application/json",
|
|
198
|
+
},
|
|
199
|
+
data=body,
|
|
200
|
+
)
|
|
201
|
+
try:
|
|
202
|
+
with urllib.request.urlopen(req, timeout=300) as resp:
|
|
203
|
+
return json.loads(resp.read().decode("utf-8"))
|
|
204
|
+
except urllib.error.HTTPError as e:
|
|
205
|
+
payload = e.read().decode("utf-8", errors="replace")
|
|
206
|
+
raise RuntimeError(f"OpenAI Images API failed ({e.code}): {payload}") from e
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def write_gallery(out_dir: Path, items: list[dict]) -> None:
|
|
210
|
+
thumbs = "\n".join(
|
|
211
|
+
[
|
|
212
|
+
f"""
|
|
213
|
+
<figure>
|
|
214
|
+
<a href="{html_escape(it["file"], quote=True)}"><img src="{html_escape(it["file"], quote=True)}" loading="lazy" /></a>
|
|
215
|
+
<figcaption>{html_escape(it["prompt"])}</figcaption>
|
|
216
|
+
</figure>
|
|
217
|
+
""".strip()
|
|
218
|
+
for it in items
|
|
219
|
+
]
|
|
220
|
+
)
|
|
221
|
+
html = f"""<!doctype html>
|
|
222
|
+
<meta charset="utf-8" />
|
|
223
|
+
<title>openai-image-gen</title>
|
|
224
|
+
<style>
|
|
225
|
+
:root {{ color-scheme: dark; }}
|
|
226
|
+
body {{ margin: 24px; font: 14px/1.4 ui-sans-serif, system-ui; background: #0b0f14; color: #e8edf2; }}
|
|
227
|
+
h1 {{ font-size: 18px; margin: 0 0 16px; }}
|
|
228
|
+
.grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); gap: 16px; }}
|
|
229
|
+
figure {{ margin: 0; padding: 12px; border: 1px solid #1e2a36; border-radius: 14px; background: #0f1620; }}
|
|
230
|
+
img {{ width: 100%; height: auto; border-radius: 10px; display: block; }}
|
|
231
|
+
figcaption {{ margin-top: 10px; color: #b7c2cc; }}
|
|
232
|
+
code {{ color: #9cd1ff; }}
|
|
233
|
+
</style>
|
|
234
|
+
<h1>openai-image-gen</h1>
|
|
235
|
+
<p>Output: <code>{html_escape(out_dir.as_posix())}</code></p>
|
|
236
|
+
<div class="grid">
|
|
237
|
+
{thumbs}
|
|
238
|
+
</div>
|
|
239
|
+
"""
|
|
240
|
+
(out_dir / "index.html").write_text(html, encoding="utf-8")
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def main() -> int:
|
|
244
|
+
ap = argparse.ArgumentParser(description="Generate images via OpenAI Images API.")
|
|
245
|
+
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
|
|
246
|
+
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
|
|
247
|
+
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
|
|
248
|
+
ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.")
|
|
249
|
+
ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.")
|
|
250
|
+
ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.")
|
|
251
|
+
ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.")
|
|
252
|
+
ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.")
|
|
253
|
+
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
|
|
254
|
+
args = ap.parse_args()
|
|
255
|
+
|
|
256
|
+
api_key = (os.environ.get("OPENAI_API_KEY") or "").strip()
|
|
257
|
+
if not api_key:
|
|
258
|
+
print("Missing OPENAI_API_KEY", file=sys.stderr)
|
|
259
|
+
return 2
|
|
260
|
+
|
|
261
|
+
# Apply model-specific defaults if not specified
|
|
262
|
+
default_size, default_quality = get_model_defaults(args.model)
|
|
263
|
+
size = args.size or default_size
|
|
264
|
+
quality = args.quality or default_quality
|
|
265
|
+
|
|
266
|
+
count = args.count
|
|
267
|
+
if args.model == "dall-e-3" and count > 1:
|
|
268
|
+
print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr)
|
|
269
|
+
count = 1
|
|
270
|
+
|
|
271
|
+
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
|
|
272
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
273
|
+
|
|
274
|
+
prompts = [args.prompt] * count if args.prompt else pick_prompts(count)
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
normalized_background = normalize_background(args.model, args.background)
|
|
278
|
+
normalized_style = normalize_style(args.model, args.style)
|
|
279
|
+
normalized_output_format = normalize_output_format(args.model, args.output_format)
|
|
280
|
+
except ValueError as e:
|
|
281
|
+
print(str(e), file=sys.stderr)
|
|
282
|
+
return 2
|
|
283
|
+
|
|
284
|
+
# Determine file extension based on output format
|
|
285
|
+
if args.model.startswith("gpt-image") and normalized_output_format:
|
|
286
|
+
file_ext = normalized_output_format
|
|
287
|
+
else:
|
|
288
|
+
file_ext = "png"
|
|
289
|
+
|
|
290
|
+
items: list[dict] = []
|
|
291
|
+
for idx, prompt in enumerate(prompts, start=1):
|
|
292
|
+
print(f"[{idx}/{len(prompts)}] {prompt}")
|
|
293
|
+
res = request_images(
|
|
294
|
+
api_key,
|
|
295
|
+
prompt,
|
|
296
|
+
args.model,
|
|
297
|
+
size,
|
|
298
|
+
quality,
|
|
299
|
+
normalized_background,
|
|
300
|
+
normalized_output_format,
|
|
301
|
+
normalized_style,
|
|
302
|
+
)
|
|
303
|
+
data = res.get("data", [{}])[0]
|
|
304
|
+
image_b64 = data.get("b64_json")
|
|
305
|
+
image_url = data.get("url")
|
|
306
|
+
if not image_b64 and not image_url:
|
|
307
|
+
raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
|
|
308
|
+
|
|
309
|
+
filename = f"{idx:03d}-{slugify(prompt)[:40]}.{file_ext}"
|
|
310
|
+
filepath = out_dir / filename
|
|
311
|
+
if image_b64:
|
|
312
|
+
filepath.write_bytes(base64.b64decode(image_b64))
|
|
313
|
+
else:
|
|
314
|
+
try:
|
|
315
|
+
urllib.request.urlretrieve(image_url, filepath)
|
|
316
|
+
except urllib.error.URLError as e:
|
|
317
|
+
raise RuntimeError(f"Failed to download image from {image_url}: {e}") from e
|
|
318
|
+
|
|
319
|
+
items.append({"prompt": prompt, "file": filename})
|
|
320
|
+
|
|
321
|
+
(out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8")
|
|
322
|
+
write_gallery(out_dir, items)
|
|
323
|
+
print(f"\nWrote: {(out_dir / 'index.html').as_posix()}")
|
|
324
|
+
return 0
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
if __name__ == "__main__":
|
|
328
|
+
raise SystemExit(main())
|