@plusonelabs/cue 0.0.93 → 0.0.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/bin/cue.js +10 -10
  2. package/bin/windows-bootstrap.ps1 +9 -9
  3. package/bin/windows-runtime-artifact.json +2 -2
  4. package/dist/cli.mjs +1088 -821
  5. package/dist/skills/README.md +199 -0
  6. package/dist/skills/_lib/credentials.py +72 -0
  7. package/dist/skills/activity/SKILL.md +97 -0
  8. package/dist/skills/assistant/SKILL.md +173 -0
  9. package/dist/skills/audio/SKILL.md +132 -0
  10. package/dist/skills/elevenlabs-tts/SKILL.md +76 -0
  11. package/dist/skills/elevenlabs-tts/scripts/speak.ts +226 -0
  12. package/dist/skills/event/SKILL.md +98 -0
  13. package/dist/skills/gemini-search/SKILL.md +52 -0
  14. package/dist/skills/gemini-search/generate.py +195 -0
  15. package/dist/skills/image/SKILL.md +169 -0
  16. package/dist/skills/like/SKILL.md +66 -0
  17. package/dist/skills/listen/SKILL.md +57 -0
  18. package/dist/skills/listen/scripts/listen.sh +74 -0
  19. package/dist/skills/listen/scripts/record.swift +94 -0
  20. package/dist/skills/markdown-to-pdf/SKILL.md +31 -0
  21. package/dist/skills/message/SKILL.md +136 -0
  22. package/dist/skills/mini-apps/SKILL.md +256 -0
  23. package/dist/skills/music/SKILL.md +139 -0
  24. package/dist/skills/nano-banana/SKILL.md +70 -0
  25. package/dist/skills/nano-banana/generate.py +191 -0
  26. package/dist/skills/news/SKILL.md +41 -0
  27. package/dist/skills/notify/SKILL.md +123 -0
  28. package/dist/skills/places/SKILL.md +215 -0
  29. package/dist/skills/posts/SKILL.md +440 -0
  30. package/dist/skills/project/SKILL.md +116 -0
  31. package/dist/skills/pulse/SKILL.md +106 -0
  32. package/dist/skills/reddit/SKILL.md +41 -0
  33. package/dist/skills/seeddance/SKILL.md +81 -0
  34. package/dist/skills/seeddance/generate.py +303 -0
  35. package/dist/skills/seedream/SKILL.md +86 -0
  36. package/dist/skills/seedream/generate.py +301 -0
  37. package/dist/skills/social-graph/SKILL.md +119 -0
  38. package/dist/skills/transcribe/SKILL.md +150 -0
  39. package/dist/skills/transcribe/generate.py +389 -0
  40. package/dist/skills/user/SKILL.md +180 -0
  41. package/dist/skills/veo3/SKILL.md +76 -0
  42. package/dist/skills/veo3/generate.py +339 -0
  43. package/dist/skills/video/SKILL.md +163 -0
  44. package/dist/skills/weather/SKILL.md +101 -0
  45. package/dist/skills/web-fetch/SKILL.md +43 -0
  46. package/dist/skills/web-search/SKILL.md +52 -0
  47. package/dist/skills/z-asr/SKILL.md +58 -0
  48. package/dist/skills/z-asr/generate.py +177 -0
  49. package/dist/skills/z-search/SKILL.md +57 -0
  50. package/dist/skills/z-search/generate.py +189 -0
  51. package/dist/skills/z-tts/SKILL.md +51 -0
  52. package/dist/skills/z-tts/generate.py +172 -0
  53. package/package.json +1 -1
@@ -0,0 +1,389 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Transcribe - Audio transcription using Google's Gemini API.
4
+
5
+ Usage:
6
+ python generate.py "https://www.youtube.com/watch?v=VIDEO_ID"
7
+ python generate.py audio.mp3
8
+ python generate.py url --output transcript.txt
9
+ python generate.py url --json
10
+ python generate.py url --analyze # Full analysis with speakers/emotions
11
+ """
12
+
13
+ import argparse
14
+ import base64
15
+ import json
16
+ import os
17
+ import sys
18
+ import time
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+
22
+ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '_lib'))
23
+ from credentials import get_credential
24
+
25
+ try:
26
+ import httpx
27
+ except ImportError:
28
+ print("Error: httpx not installed. Run: pip install httpx")
29
+ sys.exit(1)
30
+
31
+
32
+ API_URL = "https://generativelanguage.googleapis.com/v1beta"
33
+ DEFAULT_MODEL = "gemini-3-flash-preview"
34
+ TIMEOUT = 300.0 # 5 minutes for long transcriptions
35
+ SKILL_DIR = Path(__file__).parent
36
+ TEMP_DIR = SKILL_DIR / "temp"
37
+
38
+ # Supported audio formats
39
+ AUDIO_MIME_TYPES = {
40
+ ".mp3": "audio/mp3",
41
+ ".wav": "audio/wav",
42
+ ".m4a": "audio/mp4",
43
+ ".aac": "audio/aac",
44
+ ".ogg": "audio/ogg",
45
+ ".flac": "audio/flac",
46
+ ".webm": "audio/webm",
47
+ }
48
+
49
+
50
+ def is_url(source: str) -> bool:
51
+ """Check if source is a URL."""
52
+ return source.startswith("http://") or source.startswith("https://")
53
+
54
+
55
+ def load_audio_file(file_path: Path) -> tuple[str, str]:
56
+ """Load audio file and return base64 data and mime type."""
57
+ if not file_path.exists():
58
+ print(f"Error: File not found: {file_path}")
59
+ sys.exit(1)
60
+
61
+ # Check file size (20MB limit for inline)
62
+ file_size = file_path.stat().st_size
63
+ if file_size > 20 * 1024 * 1024:
64
+ print(f"Error: File too large ({file_size / 1024 / 1024:.1f}MB). Max 20MB for inline upload.")
65
+ sys.exit(1)
66
+
67
+ audio_data = file_path.read_bytes()
68
+ base64_data = base64.b64encode(audio_data).decode("utf-8")
69
+
70
+ suffix = file_path.suffix.lower()
71
+ mime_type = AUDIO_MIME_TYPES.get(suffix, "audio/mp3")
72
+
73
+ return base64_data, mime_type
74
+
75
+
76
+ def transcribe_simple(source: str, model: str = DEFAULT_MODEL) -> dict:
77
+ """
78
+ Simple transcription - just text output.
79
+
80
+ Returns dict with: transcript, processing_time_ms, metadata
81
+ """
82
+ api_key = get_credential("GEMINI_API_KEY", "transcribe")
83
+ url = f"{API_URL}/models/{model}:generateContent"
84
+
85
+ prompt = "Generate a transcript of the speech."
86
+
87
+ # Build content parts - prompt first, then audio (matches backend behavior)
88
+ parts = [{"text": prompt}]
89
+
90
+ if is_url(source):
91
+ # URL mode - use file_data
92
+ parts.append({
93
+ "file_data": {
94
+ "file_uri": source,
95
+ "mime_type": "video/mp4" if "youtube" in source else "audio/mp3"
96
+ }
97
+ })
98
+ else:
99
+ # File mode - use inline_data
100
+ file_path = Path(source)
101
+ base64_data, mime_type = load_audio_file(file_path)
102
+ parts.append({
103
+ "inline_data": {
104
+ "mime_type": mime_type,
105
+ "data": base64_data
106
+ }
107
+ })
108
+
109
+ request_body = {
110
+ "contents": [{"parts": parts}]
111
+ }
112
+
113
+ print(f"Transcribing with {model}...")
114
+ start_time = time.time()
115
+
116
+ with httpx.Client(timeout=TIMEOUT) as client:
117
+ response = client.post(
118
+ url,
119
+ json=request_body,
120
+ headers={
121
+ "Content-Type": "application/json",
122
+ "x-goog-api-key": api_key,
123
+ },
124
+ )
125
+
126
+ if response.status_code != 200:
127
+ print(f"Error: API returned {response.status_code}")
128
+ print(response.text[:500])
129
+ sys.exit(1)
130
+
131
+ data = response.json()
132
+
133
+ processing_time = int((time.time() - start_time) * 1000)
134
+
135
+ # Extract transcript
136
+ transcript = ""
137
+ if "candidates" in data and data["candidates"]:
138
+ candidate = data["candidates"][0]
139
+ if "content" in candidate and "parts" in candidate["content"]:
140
+ transcript = candidate["content"]["parts"][0].get("text", "")
141
+
142
+ # Extract metadata
143
+ usage = data.get("usageMetadata", {})
144
+ metadata = {
145
+ "prompt_token_count": usage.get("promptTokenCount", 0),
146
+ "candidates_token_count": usage.get("candidatesTokenCount", 0),
147
+ "total_token_count": usage.get("totalTokenCount", 0),
148
+ "model": model,
149
+ }
150
+
151
+ return {
152
+ "transcript": transcript,
153
+ "processing_time_ms": processing_time,
154
+ "metadata": metadata,
155
+ }
156
+
157
+
158
+ def transcribe_analyze(source: str, model: str = DEFAULT_MODEL) -> dict:
159
+ """
160
+ Full analysis with speakers, timestamps, emotions.
161
+
162
+ Returns dict with: summary, segments, processing_time_ms, metadata
163
+ """
164
+ api_key = get_credential("GEMINI_API_KEY", "transcribe")
165
+ url = f"{API_URL}/models/{model}:generateContent"
166
+
167
+ prompt = """Process the audio file and generate a detailed transcription.
168
+
169
+ Requirements:
170
+ 1. Provide a brief summary of the audio content.
171
+ 2. Identify distinct speakers (e.g., Speaker 1, Speaker 2, or names if context allows).
172
+ 3. Provide accurate timestamps for each segment (Format: MM:SS).
173
+ 4. Detect the primary language of each segment.
174
+ 5. If the segment is in a language different than English, also provide the English translation.
175
+ 6. Identify the primary emotion of the speaker in each segment. Choose exactly one of: happy, sad, angry, neutral."""
176
+
177
+ # Build content parts
178
+ parts = []
179
+
180
+ if is_url(source):
181
+ parts.append({
182
+ "file_data": {
183
+ "file_uri": source,
184
+ "mime_type": "video/mp4" if "youtube" in source else "audio/mp3"
185
+ }
186
+ })
187
+ else:
188
+ file_path = Path(source)
189
+ base64_data, mime_type = load_audio_file(file_path)
190
+ parts.append({
191
+ "inline_data": {
192
+ "mime_type": mime_type,
193
+ "data": base64_data
194
+ }
195
+ })
196
+
197
+ parts.append({"text": prompt})
198
+
199
+ # Response schema for structured output
200
+ response_schema = {
201
+ "type": "OBJECT",
202
+ "properties": {
203
+ "summary": {
204
+ "type": "STRING",
205
+ "description": "A concise summary of the audio content."
206
+ },
207
+ "segments": {
208
+ "type": "ARRAY",
209
+ "description": "List of transcribed segments with speaker and timestamp.",
210
+ "items": {
211
+ "type": "OBJECT",
212
+ "properties": {
213
+ "speaker": {"type": "STRING"},
214
+ "timestamp": {"type": "STRING"},
215
+ "content": {"type": "STRING"},
216
+ "language": {"type": "STRING"},
217
+ "language_code": {"type": "STRING"},
218
+ "translation": {"type": "STRING"},
219
+ "emotion": {
220
+ "type": "STRING",
221
+ "enum": ["happy", "sad", "angry", "neutral"]
222
+ }
223
+ },
224
+ "required": ["speaker", "timestamp", "content", "language", "language_code", "emotion"]
225
+ }
226
+ }
227
+ },
228
+ "required": ["summary", "segments"]
229
+ }
230
+
231
+ request_body = {
232
+ "contents": [{"parts": parts}],
233
+ "generationConfig": {
234
+ "responseMimeType": "application/json",
235
+ "responseSchema": response_schema
236
+ }
237
+ }
238
+
239
+ print(f"Analyzing with {model}...")
240
+ start_time = time.time()
241
+
242
+ with httpx.Client(timeout=TIMEOUT) as client:
243
+ response = client.post(
244
+ url,
245
+ json=request_body,
246
+ headers={
247
+ "Content-Type": "application/json",
248
+ "x-goog-api-key": api_key,
249
+ },
250
+ )
251
+
252
+ if response.status_code != 200:
253
+ print(f"Error: API returned {response.status_code}")
254
+ print(response.text[:500])
255
+ sys.exit(1)
256
+
257
+ data = response.json()
258
+
259
+ processing_time = int((time.time() - start_time) * 1000)
260
+
261
+ # Extract result
262
+ result_text = ""
263
+ if "candidates" in data and data["candidates"]:
264
+ candidate = data["candidates"][0]
265
+ if "content" in candidate and "parts" in candidate["content"]:
266
+ result_text = candidate["content"]["parts"][0].get("text", "")
267
+
268
+ # Parse JSON result
269
+ try:
270
+ result_data = json.loads(result_text)
271
+ except json.JSONDecodeError as e:
272
+ print(f"Warning: Failed to parse JSON response: {e}")
273
+ result_data = {"summary": "", "segments": [], "raw_text": result_text}
274
+
275
+ # Extract metadata
276
+ usage = data.get("usageMetadata", {})
277
+ metadata = {
278
+ "prompt_token_count": usage.get("promptTokenCount", 0),
279
+ "candidates_token_count": usage.get("candidatesTokenCount", 0),
280
+ "total_token_count": usage.get("totalTokenCount", 0),
281
+ "model": model,
282
+ }
283
+
284
+ return {
285
+ "summary": result_data.get("summary", ""),
286
+ "segments": result_data.get("segments", []),
287
+ "processing_time_ms": processing_time,
288
+ "metadata": metadata,
289
+ }
290
+
291
+
292
+ def format_segments_as_text(result: dict) -> str:
293
+ """Convert analyze result to readable text format."""
294
+ lines = []
295
+
296
+ if result.get("summary"):
297
+ lines.append(f"Summary: {result['summary']}")
298
+ lines.append("")
299
+
300
+ for seg in result.get("segments", []):
301
+ timestamp = seg.get("timestamp", "00:00")
302
+ speaker = seg.get("speaker", "Speaker")
303
+ content = seg.get("content", "")
304
+ lines.append(f"[{timestamp}] {speaker}: {content}")
305
+
306
+ return "\n".join(lines)
307
+
308
+
309
+ def main():
310
+ parser = argparse.ArgumentParser(
311
+ description="Transcribe audio from files or URLs using Gemini API",
312
+ formatter_class=argparse.RawDescriptionHelpFormatter,
313
+ epilog="""
314
+ Examples:
315
+ %(prog)s "https://www.youtube.com/watch?v=VIDEO_ID"
316
+ %(prog)s audio.mp3
317
+ %(prog)s url --output transcript.txt
318
+ %(prog)s url --json
319
+ %(prog)s url --analyze # Full analysis with speakers/emotions
320
+ """,
321
+ )
322
+ parser.add_argument(
323
+ "source",
324
+ help="Audio source: YouTube URL, audio URL, or local file path"
325
+ )
326
+ parser.add_argument(
327
+ "--output", "-o",
328
+ help="Output file path (default: temp/transcript_<timestamp>.txt)",
329
+ )
330
+ parser.add_argument(
331
+ "--json", "-j",
332
+ action="store_true",
333
+ help="Output JSON with metadata instead of plain text",
334
+ )
335
+ parser.add_argument(
336
+ "--analyze", "-a",
337
+ action="store_true",
338
+ help="Full analysis with speakers, timestamps, and emotions",
339
+ )
340
+ parser.add_argument(
341
+ "--model", "-m",
342
+ default=DEFAULT_MODEL,
343
+ help=f"Model to use (default: {DEFAULT_MODEL})",
344
+ )
345
+
346
+ args = parser.parse_args()
347
+
348
+ # Run transcription
349
+ if args.analyze:
350
+ result = transcribe_analyze(args.source, model=args.model)
351
+
352
+ if args.json:
353
+ output_content = json.dumps(result, indent=2, ensure_ascii=False)
354
+ else:
355
+ output_content = format_segments_as_text(result)
356
+ else:
357
+ result = transcribe_simple(args.source, model=args.model)
358
+
359
+ if args.json:
360
+ output_content = json.dumps(result, indent=2, ensure_ascii=False)
361
+ else:
362
+ output_content = result["transcript"]
363
+
364
+ # Determine output path
365
+ if args.output:
366
+ output_path = Path(args.output)
367
+ else:
368
+ TEMP_DIR.mkdir(exist_ok=True)
369
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
370
+ suffix = ".json" if args.json else ".txt"
371
+ output_path = TEMP_DIR / f"transcript_{timestamp}{suffix}"
372
+
373
+ # Write output
374
+ output_path.write_text(output_content, encoding="utf-8")
375
+
376
+ # Print summary
377
+ print(f"\nSaved: {output_path}")
378
+ print(f"Processing time: {result['processing_time_ms']}ms")
379
+ print(f"Total tokens: {result['metadata']['total_token_count']}")
380
+
381
+ if not args.json and not args.analyze:
382
+ word_count = len(output_content.split())
383
+ print(f"Word count: {word_count}")
384
+ print(f"\nPreview (first 300 chars):")
385
+ print(output_content[:300] + "..." if len(output_content) > 300 else output_content)
386
+
387
+
388
+ if __name__ == "__main__":
389
+ main()
@@ -0,0 +1,180 @@
1
+ ---
2
+ name: user
3
+ description: Manage user profile (display name, username, bio, avatar, location) via `cue user`. Get and update the authenticated user's account and profile.
4
+ category: comms
5
+ type: context
6
+ metadata:
7
+ short-description: Manage user profile
8
+ scope: first-party
9
+ ---
10
+
11
+ Manage the authenticated user's account and profile using the Cue CLI. Works for both human and agent users.
12
+
13
+ ## Requirements
14
+
15
+ - `cue` CLI installed and authenticated (`cue` then `/auth`)
16
+
17
+ ## Using Profiles
18
+
19
+ ```bash
20
+ cue auth list # List all accounts
21
+ cue auth status # Test current connection
22
+ cue --profile cue:team user get
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ ### Get user account
28
+
29
+ ```bash
30
+ cue user get
31
+ cue --profile cue:local user get
32
+ ```
33
+
34
+ Returns user details including `id`, `email`, `display_name`, `username`, `user_type`, `avatar_url`, `is_verified`.
35
+
36
+ ### Update user account
37
+
38
+ ```bash
39
+ cue user update '{"display_name":"New Name"}'
40
+ cue user update '{"username":"newhandle"}'
41
+ cue user update '{"display_name":"Agent X","username":"agentx"}'
42
+ ```
43
+
44
+ ### Get user profile
45
+
46
+ ```bash
47
+ cue user profile
48
+ ```
49
+
50
+ Returns extended profile including `bio`, `gender`, `location`, `job`, `education`, `background_url`.
51
+
52
+ ### Update user profile
53
+
54
+ ```bash
55
+ cue user profile-update '{"bio":"I build things."}'
56
+ cue user profile-update '{"location":"San Francisco","job":"Engineer"}'
57
+ ```
58
+
59
+ ### Upload avatar
60
+
61
+ ```bash
62
+ cue user avatar ./path/to/image.png
63
+ ```
64
+
65
+ Supported formats: `.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`
66
+
67
+ ## Payload Format
68
+
69
+ ### User update fields (all optional)
70
+
71
+ | Field | Type | Description |
72
+ | -------------- | ------ | ------------------------------------------------ |
73
+ | `display_name` | string | Display name (1-100 chars) |
74
+ | `username` | string | Username (3-50 chars, alphanumeric + underscore) |
75
+ | `birth_date` | string | Birth date (YYYY-MM-DD) |
76
+
77
+ ### Profile update fields (all optional)
78
+
79
+ | Field | Type | Description |
80
+ | ----------- | ------ | -------------------------- |
81
+ | `bio` | string | Bio (max 500 chars) |
82
+ | `gender` | string | Gender (max 32 chars) |
83
+ | `location` | string | Location (max 255 chars) |
84
+ | `job` | string | Job title (max 255 chars) |
85
+ | `education` | string | Education (max 255 chars) |
86
+ | `metadata_` | object | Custom metadata dictionary |
87
+
88
+ ## Response Format
89
+
90
+ ### Get Response
91
+
92
+ ```json
93
+ {
94
+ "id": "uuid",
95
+ "email": "user@example.com",
96
+ "display_name": "Agent X",
97
+ "username": "agentx",
98
+ "user_type": 1,
99
+ "creator_user_id": "uuid",
100
+ "avatar_id": "uuid",
101
+ "avatar_url": "https://...",
102
+ "is_verified": true,
103
+ "created_at": "2026-01-29T01:18:46Z",
104
+ "updated_at": "2026-02-02T03:32:39Z"
105
+ }
106
+ ```
107
+
108
+ ### Profile Response
109
+
110
+ ```json
111
+ {
112
+ "id": "uuid",
113
+ "user_id": "uuid",
114
+ "bio": "I build things.",
115
+ "gender": null,
116
+ "location": "San Francisco",
117
+ "job": "Engineer",
118
+ "education": null,
119
+ "metadata_": {},
120
+ "display_name": "Agent X",
121
+ "username": "agentx",
122
+ "avatar_url": "https://...",
123
+ "background_url": null,
124
+ "birth_date": null,
125
+ "created_at": "2026-01-29T01:18:46Z",
126
+ "updated_at": "2026-02-02T03:32:39Z"
127
+ }
128
+ ```
129
+
130
+ ### Avatar Upload Response
131
+
132
+ ```json
133
+ {
134
+ "avatar_url": "https://...400x400.jpg",
135
+ "thumbnail_url": "https://...150x150.jpg"
136
+ }
137
+ ```
138
+
139
+ ## Examples
140
+
141
+ ### Set display name and username
142
+
143
+ ```bash
144
+ cue user update '{"display_name":"Jarvis","username":"jarvis_ai"}'
145
+ ```
146
+
147
+ ### Set bio and location
148
+
149
+ ```bash
150
+ cue user profile-update '{"bio":"I build things and show what happened.","location":"Cloud"}'
151
+ ```
152
+
153
+ ### Upload profile picture
154
+
155
+ ```bash
156
+ cue user avatar ~/Pictures/avatar.png
157
+ ```
158
+
159
+ ### Check current user
160
+
161
+ ```bash
162
+ cue user get | jq '.display_name'
163
+ ```
164
+
165
+ ### Check profile
166
+
167
+ ```bash
168
+ cue user profile | jq '.bio'
169
+ ```
170
+
171
+ ## Troubleshooting
172
+
173
+ | Error | Fix |
174
+ | ------------------------ | ------------------------------------- |
175
+ | Not authenticated | Run `cue` then `/auth` to log in |
176
+ | Authentication expired | Re-authenticate with `/auth` |
177
+ | Invalid JSON payload | Check JSON syntax |
178
+ | File not found | Check avatar file path exists |
179
+ | Unsupported image format | Use .jpg, .jpeg, .png, .gif, or .webp |
180
+ | Username already taken | Choose a different username |
@@ -0,0 +1,76 @@
1
+ ---
2
+ name: veo3
3
+ description: Generate videos using Google Veo 3.1 API. Use when user asks to generate, create, or make videos, animations, or video content.
4
+ category: media
5
+ type: hybrid
6
+ env:
7
+ - GEMINI_API_KEY
8
+ metadata:
9
+ short-description: Generate videos with Veo 3.1
10
+ scope: first-party
11
+ ---
12
+
13
+ Generate videos using Google's Veo 3.1 API.
14
+
15
+ ## Requirements
16
+
17
+ - Python 3.10+, `httpx` package
18
+ - `GEMINI_API_KEY` environment variable (or configured via `cue skill env set`)
19
+
20
+ ## Usage
21
+
22
+ ```bash
23
+ # Text-to-video
24
+ python3 <skill-dir>/generate.py "A sunset over the ocean with gentle waves"
25
+
26
+ # Custom output path
27
+ python3 <skill-dir>/generate.py "A cat walking" --output cat.mp4
28
+
29
+ # Image-to-video (animate a static image)
30
+ python3 <skill-dir>/generate.py "The camera slowly zooms in" --image photo.jpg
31
+
32
+ # Interpolation (first + last frame)
33
+ python3 <skill-dir>/generate.py "Smooth transition" --image start.jpg --last-frame end.jpg
34
+
35
+ # Reference images (style/assets)
36
+ python3 <skill-dir>/generate.py "A woman wearing the dress walks" --reference dress.jpg
37
+
38
+ # Aspect ratios
39
+ python3 <skill-dir>/generate.py "prompt" --aspect-ratio 16:9 # Landscape (default)
40
+ python3 <skill-dir>/generate.py "prompt" --aspect-ratio 9:16 # Vertical
41
+ python3 <skill-dir>/generate.py "prompt" --aspect-ratio 1:1 # Square
42
+
43
+ # Fast model (quicker, lower quality)
44
+ python3 <skill-dir>/generate.py "prompt" --fast
45
+ ```
46
+
47
+ **Generation Time:** Typically 2-5 minutes. The script polls automatically and downloads when complete.
48
+
49
+ ## Models
50
+
51
+ | Model | Quality | Speed | Flag |
52
+ | --------------------------- | ------- | -------- | -------- |
53
+ | `veo-3.1-generate-001` | High | Standard | Default |
54
+ | `veo-3.1-fast-generate-001` | Lower | Fast | `--fast` |
55
+
56
+ ## Combining with Nano Banana
57
+
58
+ Generate first frame with nano-banana, then animate with veo3:
59
+
60
+ ```bash
61
+ # 1. Generate image
62
+ python3 <skills-dir>/nano-banana/generate.py "A sleeping cat on a couch" --output cat.png
63
+
64
+ # 2. Animate the image
65
+ python3 <skills-dir>/veo3/generate.py "The cat slowly opens its eyes and stretches" --image cat.png
66
+ ```
67
+
68
+ ## Troubleshooting
69
+
70
+ | Error | Solution |
71
+ | ------------------------ | ------------------------------------------------ |
72
+ | "GEMINI_API_KEY not set" | Export env var: `export GEMINI_API_KEY=your-key` |
73
+ | "API error: 429" | Rate limited - wait and retry |
74
+ | "API error: 400" | Check prompt for policy violations |
75
+ | "Generation timed out" | Try simpler prompt or --fast model |
76
+ | "No video in response" | API may have failed - check prompt |