varg.ai-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.claude/settings.local.json +7 -0
  2. package/.env.example +24 -0
  3. package/CLAUDE.md +118 -0
  4. package/README.md +231 -0
  5. package/SKILLS.md +157 -0
  6. package/STRUCTURE.md +92 -0
  7. package/TEST_RESULTS.md +122 -0
  8. package/action/captions/SKILL.md +170 -0
  9. package/action/captions/index.ts +227 -0
  10. package/action/edit/SKILL.md +235 -0
  11. package/action/edit/index.ts +493 -0
  12. package/action/image/SKILL.md +140 -0
  13. package/action/image/index.ts +112 -0
  14. package/action/sync/SKILL.md +136 -0
  15. package/action/sync/index.ts +187 -0
  16. package/action/transcribe/SKILL.md +179 -0
  17. package/action/transcribe/index.ts +227 -0
  18. package/action/video/SKILL.md +116 -0
  19. package/action/video/index.ts +135 -0
  20. package/action/voice/SKILL.md +125 -0
  21. package/action/voice/index.ts +201 -0
  22. package/biome.json +33 -0
  23. package/index.ts +38 -0
  24. package/lib/README.md +144 -0
  25. package/lib/ai-sdk/fal.ts +106 -0
  26. package/lib/ai-sdk/replicate.ts +107 -0
  27. package/lib/elevenlabs.ts +382 -0
  28. package/lib/fal.ts +478 -0
  29. package/lib/ffmpeg.ts +467 -0
  30. package/lib/fireworks.ts +235 -0
  31. package/lib/groq.ts +246 -0
  32. package/lib/higgsfield.ts +176 -0
  33. package/lib/remotion/SKILL.md +823 -0
  34. package/lib/remotion/cli.ts +115 -0
  35. package/lib/remotion/functions.ts +283 -0
  36. package/lib/remotion/index.ts +19 -0
  37. package/lib/remotion/templates.ts +73 -0
  38. package/lib/replicate.ts +304 -0
  39. package/output.txt +1 -0
  40. package/package.json +35 -0
  41. package/pipeline/cookbooks/SKILL.md +285 -0
  42. package/pipeline/cookbooks/remotion-video.md +585 -0
  43. package/pipeline/cookbooks/round-video-character.md +337 -0
  44. package/pipeline/cookbooks/talking-character.md +59 -0
  45. package/test-import.ts +7 -0
  46. package/test-services.ts +97 -0
  47. package/tsconfig.json +29 -0
  48. package/utilities/s3.ts +147 -0
@@ -0,0 +1,7 @@
1
+ {
2
+ "permissions": {
3
+ "allow": ["Bash(mkdir:*)"],
4
+ "deny": [],
5
+ "ask": []
6
+ }
7
+ }
package/.env.example ADDED
@@ -0,0 +1,24 @@
1
+ # fal.ai api key
2
+ FAL_API_KEY=fal_xxx
3
+
4
+ # higgsfield credentials
5
+ HIGGSFIELD_API_KEY=hf_xxx
6
+ HIGGSFIELD_SECRET=secret_xxx
7
+
8
+ # elevenlabs api key
9
+ ELEVENLABS_API_KEY=el_xxx
10
+
11
+ # groq api key (ultra-fast whisper transcription)
12
+ GROQ_API_KEY=gsk_xxx
13
+
14
+ # fireworks api key (word-level transcription with timestamps)
15
+ FIREWORKS_API_KEY=fw_xxx
16
+
17
+ # cloudflare r2 / s3 storage
18
+ CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
19
+ CLOUDFLARE_ACCESS_KEY_ID=xxx
20
+ CLOUDFLARE_ACCESS_SECRET=xxx
21
+ CLOUDFLARE_R2_BUCKET=m
22
+
23
+ # replicate (optional)
24
+ REPLICATE_API_TOKEN=r8_xxx
package/CLAUDE.md ADDED
@@ -0,0 +1,118 @@
1
+ ---
2
+ description: Use Bun instead of Node.js, npm, pnpm, or vite. Use existing tools via bash commands.
3
+ globs: "*.ts, *.tsx, *.html, *.css, *.js, *.jsx, package.json"
4
+ alwaysApply: false
5
+ ---
6
+
7
+ Default to using Bun instead of Node.js.
8
+
9
+ ## Working with this SDK
10
+
11
+ - **Use existing tools**: Always use the built-in CLI tools via bash commands (e.g., `bun run lib/fal.ts`, `bun run lib/elevenlabs.ts`)
12
+ - **Don't write custom scripts**: Avoid creating new TypeScript/JavaScript scripts. Use the existing lib/ tools directly
13
+ - **Media folders**: Store input files in `media/` folder, outputs go to `output/` folder
14
+ - **Local file support**: Tools like `lib/fal.ts` support local file paths (e.g., `media/image.png`) in addition to URLs
15
+
16
+ - Use `bun <file>` instead of `node <file>` or `ts-node <file>`
17
+ - Use `bun test` instead of `jest` or `vitest`
18
+ - Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild`
19
+ - Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
20
+ - Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
21
+ - Bun automatically loads .env, so don't use dotenv.
22
+
23
+ ## APIs
24
+
25
+ - `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
26
+ - `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
27
+ - `Bun.redis` for Redis. Don't use `ioredis`.
28
+ - `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
29
+ - `WebSocket` is built-in. Don't use `ws`.
30
+ - Prefer `Bun.file` over `node:fs`'s readFile/writeFile
31
+ - Bun.$`ls` instead of execa.
32
+
33
+ ## Testing
34
+
35
+ Use `bun test` to run tests.
36
+
37
+ ```ts#index.test.ts
38
+ import { test, expect } from "bun:test";
39
+
40
+ test("hello world", () => {
41
+ expect(1).toBe(1);
42
+ });
43
+ ```
44
+
45
+ ## Frontend
46
+
47
+ Use HTML imports with `Bun.serve()`. Don't use `vite`. HTML imports fully support React, CSS, Tailwind.
48
+
49
+ Server:
50
+
51
+ ```ts#index.ts
52
+ import index from "./index.html"
53
+
54
+ Bun.serve({
55
+ routes: {
56
+ "/": index,
57
+ "/api/users/:id": {
58
+ GET: (req) => {
59
+ return new Response(JSON.stringify({ id: req.params.id }));
60
+ },
61
+ },
62
+ },
63
+ // optional websocket support
64
+ websocket: {
65
+ open: (ws) => {
66
+ ws.send("Hello, world!");
67
+ },
68
+ message: (ws, message) => {
69
+ ws.send(message);
70
+ },
71
+ close: (ws) => {
72
+ // handle close
73
+ }
74
+ },
75
+ development: {
76
+ hmr: true,
77
+ console: true,
78
+ }
79
+ })
80
+ ```
81
+
82
+ HTML files can import .tsx, .jsx or .js files directly and Bun's bundler will transpile & bundle automatically. `<link>` tags can point to stylesheets and Bun's CSS bundler will bundle.
83
+
84
+ ```html#index.html
85
+ <html>
86
+ <body>
87
+ <h1>Hello, world!</h1>
88
+ <script type="module" src="./frontend.tsx"></script>
89
+ </body>
90
+ </html>
91
+ ```
92
+
93
+ With the following `frontend.tsx`:
94
+
95
+ ```tsx#frontend.tsx
96
+ import React from "react";
97
+
98
+ // import .css files directly and it works
99
+ import './index.css';
100
+
101
+ import { createRoot } from "react-dom/client";
102
+
103
+ const root = createRoot(document.body);
104
+
105
+ export default function Frontend() {
106
+ return <h1>Hello, world!</h1>;
107
+ }
108
+
109
+ root.render(<Frontend />);
110
+ ```
111
+
112
+ Then, run index.ts
113
+
114
+ ```sh
115
+ bun --hot ./index.ts
116
+ ```
117
+
118
+ For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`.
package/README.md ADDED
@@ -0,0 +1,231 @@
1
+ # varg.ai sdk
2
+
3
+ video generation and editing tools sdk
4
+
5
+ ## folder structure
6
+
7
+ ```
8
+ sdk/
9
+
10
+ ├── media/ # working directory for media files (images, videos, audio)
11
+ ├── output/ # generated output files
12
+
13
+ ├── utilities/
14
+
15
+ ├── lib/
16
+ │ ├── pymovie/
17
+ │ ├── opencv/
18
+ │ ├── fal/
19
+ │ ├── higgsfield/
20
+ │ ├── ffmpeg/
21
+ │ ├── remotion/
22
+ │ ├── remotion.dev/
23
+ │ └── motion.dev/
24
+
25
+ ├── service/
26
+ │ ├── image/ # image generation + SKILL.md
27
+ │ ├── video/ # video generation + SKILL.md
28
+ │ ├── voice/ # voice synthesis + SKILL.md
29
+ │ ├── sync/ # lipsync + SKILL.md
30
+ │ ├── captions/ # video captions + SKILL.md
31
+ │ ├── edit/ # video editing + SKILL.md
32
+ │ └── transcribe/ # audio transcription + SKILL.md
33
+
34
+ └── pipeline/
35
+ └── cookbooks/
36
+ ```
37
+
38
+ ## installation
39
+
40
+ ```bash
41
+ bun install
42
+ ```
43
+
44
+ set environment variables in `.env`:
45
+ ```bash
46
+ FAL_API_KEY=fal_xxx
47
+ HIGGSFIELD_API_KEY=hf_xxx
48
+ HIGGSFIELD_SECRET=secret_xxx
49
+ REPLICATE_API_TOKEN=r8_xxx
50
+ ELEVENLABS_API_KEY=el_xxx
51
+ GROQ_API_KEY=gsk_xxx
52
+ FIREWORKS_API_KEY=fw_xxx
53
+ CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
54
+ CLOUDFLARE_ACCESS_KEY_ID=xxx
55
+ CLOUDFLARE_ACCESS_SECRET=xxx
56
+ CLOUDFLARE_R2_BUCKET=m
57
+ ```
58
+
59
+ ## usage
60
+
61
+ ### as cli
62
+
63
+ ```bash
64
+ # generate image with ai-sdk (recommended)
65
+ bun run lib/ai-sdk/fal.ts generate_image "a beautiful sunset" "fal-ai/flux/dev" "16:9"
66
+
67
+ # generate image with fal client (advanced features)
68
+ bun run lib/fal.ts generate_image "a beautiful sunset"
69
+
70
+ # generate video from image (supports local files)
71
+ bun run lib/fal.ts image_to_video "person talking" media/image.jpg 5
72
+ bun run lib/fal.ts image_to_video "person talking" https://example.com/image.jpg 5
73
+
74
+ # generate soul character
75
+ bun run lib/higgsfield.ts generate_soul "professional headshot"
76
+
77
+ # generate video with replicate
78
+ bun run lib/replicate.ts minimax "person walking on beach"
79
+
80
+ # generate voice with elevenlabs
81
+ bun run lib/elevenlabs.ts tts "hello world" rachel output.mp3
82
+
83
+ # transcribe audio to text/subtitles
84
+ bun run service/transcribe media/audio.mp3 groq
85
+ bun run service/transcribe media/audio.mp3 fireworks output.srt
86
+ bun run lib/fireworks.ts media/audio.mp3 output.srt
87
+
88
+ # edit video with ffmpeg
89
+ bun run lib/ffmpeg.ts concat output.mp4 video1.mp4 video2.mp4
90
+
91
+ # lipsync video with audio
92
+ bun run service/sync overlay video.mp4 audio.mp3 synced.mp4
93
+
94
+ # upload file to s3
95
+ bun run utilities/s3.ts upload ./video.mp4 videos/output.mp4
96
+ ```
97
+
98
+ ### as library
99
+
100
+ ```typescript
101
+ import { generateImage, imageToVideo } from "varg.ai-sdk"
102
+ import { uploadFromUrl } from "varg.ai-sdk"
103
+
104
+ // generate image
105
+ const img = await generateImage({
106
+ prompt: "a beautiful sunset",
107
+ model: "fal-ai/flux-pro/v1.1",
108
+ })
109
+
110
+ // animate it
111
+ const video = await imageToVideo({
112
+ prompt: "camera pan across scene",
113
+ imageUrl: img.data.images[0].url,
114
+ duration: 5,
115
+ })
116
+
117
+ // upload to s3
118
+ const url = await uploadFromUrl(
119
+ video.data.video.url,
120
+ "videos/sunset.mp4"
121
+ )
122
+
123
+ console.log(`uploaded: ${url}`)
124
+ ```
125
+
126
+ ## modules
127
+
128
+ ### lib
129
+ core libraries for video/audio/ai processing:
130
+ - **ai-sdk/fal**: fal.ai using vercel ai sdk (recommended for images)
131
+ - **ai-sdk/replicate**: replicate.com using vercel ai sdk
132
+ - **fal**: fal.ai using direct client (for video & advanced features, supports local file uploads)
133
+ - **higgsfield**: soul character generation
134
+ - **replicate**: replicate.com api (minimax, kling, luma, flux)
135
+ - **elevenlabs**: text-to-speech and voice generation
136
+ - **groq**: ultra-fast whisper transcription (audio to text)
137
+ - **fireworks**: word-level audio transcription with timestamps (srt/vtt)
138
+ - **ffmpeg**: video editing operations (concat, trim, resize, etc.)
139
+ - **remotion**: programmatic video creation with react
140
+
141
+ ### media folder
142
+ - **media/**: working directory for storing input media files (images, videos, audio)
143
+ - **output/**: directory for generated/processed output files
144
+ - use `media/` for source files, `output/` for results
145
+ - fal.ts supports local file paths from `media/` folder
146
+
147
+ ### service
148
+ high-level services combining multiple libs. each service includes a SKILL.md for claude code agent skills:
149
+ - **image**: image generation (fal + higgsfield)
150
+ - **video**: video generation from image/text
151
+ - **voice**: voice generation with multiple providers (elevenlabs)
152
+ - **transcribe**: audio transcription with groq whisper or fireworks (srt support)
153
+ - **sync**: lipsync workflows (wav2lip, audio overlay)
154
+ - **captions**: auto-generate and overlay subtitles on videos
155
+ - **edit**: video editing workflows (resize, trim, concat, social media prep)
156
+
157
+ ### utilities
158
+ - **s3**: cloudflare r2 / s3 storage operations
159
+
160
+ ### pipeline
161
+ - **cookbooks**: step-by-step recipes for complex workflows (includes talking-character SKILL.md)
162
+
163
+ ## key learnings
164
+
165
+ ### remotion batch rendering with variations
166
+ when creating multiple video variations (e.g., 15 videos with different images):
167
+
168
+ **❌ don't do this:**
169
+ ```bash
170
+ # overwriting files causes caching issues
171
+ for i in 1..15; do
172
+ cp woman-$i-before.jpg lib/remotion/public/before.jpg # overwrites!
173
+ cp woman-$i-after.jpg lib/remotion/public/after.jpg # overwrites!
174
+ render video
175
+ done
176
+ # result: all videos show the same woman (the last one)
177
+ ```
178
+
179
+ **✅ do this instead:**
180
+ ```typescript
181
+ // 1. use unique filenames for each variation
182
+ // lib/remotion/public/woman-01-before.jpg, woman-02-before.jpg, etc.
183
+
184
+ // 2. pass variation id as prop
185
+ interface Props { variationId?: string }
186
+ const MyComp: React.FC<Props> = ({ variationId = "01" }) => {
187
+ const beforeImg = staticFile(`woman-${variationId}-before.jpg`);
188
+ const afterImg = staticFile(`woman-${variationId}-after.jpg`);
189
+ }
190
+
191
+ // 3. register multiple compositions with unique props
192
+ registerRoot(() => (
193
+ <>
194
+ {Array.from({ length: 15 }, (_, i) => {
195
+ const variationId = String(i + 1).padStart(2, "0");
196
+ return (
197
+ <Composition
198
+ id={`MyVideo-${variationId}`}
199
+ component={MyComp}
200
+ defaultProps={{ variationId }}
201
+ {...otherProps}
202
+ />
203
+ );
204
+ })}
205
+ </>
206
+ ));
207
+
208
+ // 4. render each composition
209
+ bun run lib/remotion/index.ts render root.tsx MyVideo-01 output-01.mp4
210
+ bun run lib/remotion/index.ts render root.tsx MyVideo-02 output-02.mp4
211
+ ```
212
+
213
+ **why this matters:**
214
+ - remotion's `staticFile()` caches based on filename
215
+ - overwriting files between renders causes all videos to use the last cached version
216
+ - unique filenames + props ensure each render uses correct assets
217
+
218
+ ### fal.ai nsfw content filtering
219
+ fal.ai automatically filters content that may be nsfw:
220
+
221
+ **symptoms:**
222
+ - image generation succeeds but returns empty file (~7.6KB)
223
+ - no error message
224
+ - happens with certain clothing/body descriptions
225
+
226
+ **solution:**
227
+ - be explicit about modest, full-coverage clothing:
228
+ - ✅ "long sleeve athletic top and full length leggings"
229
+ - ❌ "athletic wear" (vague, may trigger filter)
230
+ - add "professional", "modest", "appropriate" to prompts
231
+ - always check file sizes after batch generation (< 10KB = filtered)
package/SKILLS.md ADDED
@@ -0,0 +1,157 @@
1
+ # agent skills
2
+
3
+ this sdk includes claude code agent skills for each service. each skill is co-located with its service code.
4
+
5
+ ## available skills
6
+
7
+ ### service skills
8
+
9
+ located in `service/<name>/SKILL.md`:
10
+
11
+ 1. **image-generation** (`service/image/`)
12
+ - generate ai images using fal (flux models) or higgsfield soul characters
13
+ - cli: `bun run service/image fal|soul <prompt> [options]`
14
+
15
+ 2. **video-generation** (`service/video/`)
16
+ - generate videos from images (local or url) or text prompts using fal.ai
17
+ - supports local image files - automatically uploads to fal storage
18
+ - cli: `bun run service/video from_image|from_text <args>`
19
+
20
+ 3. **voice-synthesis** (`service/voice/`)
21
+ - generate realistic text-to-speech audio using elevenlabs
22
+ - cli: `bun run service/voice generate|elevenlabs <text> [options]`
23
+
24
+ 3b. **music-generation** (`lib/elevenlabs.ts`)
25
+ - generate music from text prompts using elevenlabs
26
+ - generate sound effects from descriptions
27
+ - cli: `bun run lib/elevenlabs.ts music|sfx <prompt> [options]`
28
+
29
+ 4. **video-lipsync** (`service/sync/`)
30
+ - sync video with audio using wav2lip or simple overlay
31
+ - cli: `bun run service/sync sync|wav2lip|overlay <args>`
32
+
33
+ 5. **video-captions** (`service/captions/`)
34
+ - add auto-generated or custom subtitles to videos
35
+ - cli: `bun run service/captions <videoPath> [options]`
36
+
37
+ 6. **video-editing** (`service/edit/`)
38
+ - edit videos with ffmpeg (resize, trim, concat, social media prep)
39
+ - cli: `bun run service/edit social|montage|trim|resize|merge_audio <args>`
40
+
41
+ 7. **audio-transcription** (`service/transcribe/`)
42
+ - transcribe audio to text or subtitles using groq/fireworks
43
+ - cli: `bun run service/transcribe <audioUrl> <provider> [outputPath]`
44
+
45
+ ### utility skills
46
+
47
+ 8. **telegram-send** (external: `/Users/aleks/Github/Badaboom1995/rumble-b2c`)
48
+ - send videos to telegram users/channels as round videos
49
+ - automatically converts to 512x512 square format for telegram
50
+ - cli: `cd /Users/aleks/Github/Badaboom1995/rumble-b2c && bun run scripts/telegram-send-video.ts <videoPath> <@username>`
51
+ - example: `cd /Users/aleks/Github/Badaboom1995/rumble-b2c && bun run scripts/telegram-send-video.ts /path/to/video.mp4 @caffeinum`
52
+
53
+ ### pipeline skills
54
+
55
+ located in `pipeline/cookbooks/SKILL.md`:
56
+
57
+ 9. **talking-character-pipeline** (`pipeline/cookbooks/`)
58
+ - complete workflow to create talking character videos
59
+ - combines: character generation → voiceover → animation → lipsync → captions → social prep
60
+
61
+ 10. **round-video-character** (`pipeline/cookbooks/round-video-character.md`)
62
+ - create realistic round selfie videos for telegram using nano banana pro + wan 2.5
63
+ - workflow: generate selfie first frame (person in setting) → voiceover → wan 2.5 video
64
+ - uses: `bun run lib/fal.ts`, `bun run lib/replicate.ts`, `bun run lib/elevenlabs.ts`
65
+ - input: text script + profile photo
66
+ - output: extreme close-up selfie video with authentic camera shake, lighting, and audio
67
+
68
+ ## structure
69
+
70
+ each skill follows this pattern:
71
+
72
+ ```
73
+ service/<name>/
74
+ ├── index.ts # service implementation
75
+ └── SKILL.md # claude code agent skill
76
+ ```
77
+
78
+ ## how skills work
79
+
80
+ skills are **model-invoked** - claude autonomously decides when to use them based on your request and the skill's description.
81
+
82
+ **example:**
83
+ - you say: "create a talking character video"
84
+ - claude reads `talking-character-pipeline` skill
85
+ - claude executes the workflow using the pipeline steps
86
+
87
+ ## using skills
88
+
89
+ ### in claude code
90
+
91
+ skills are automatically discovered when you're in the sdk directory:
92
+
93
+ ```
94
+ user: create an image of a sunset
95
+ claude: [uses image-generation skill]
96
+ bun run service/image fal "beautiful sunset over mountains"
97
+ ```
98
+
99
+ ### manually
100
+
101
+ you can also run services directly:
102
+
103
+ ```bash
104
+ # generate image
105
+ bun run service/image fal "sunset over mountains" true
106
+
107
+ # generate video from that image
108
+ bun run service/video from_image "camera pan" https://image-url.jpg 5 true
109
+
110
+ # add voice
111
+ bun run service/voice elevenlabs "this is a beautiful sunset" rachel true
112
+
113
+ # sync with video
114
+ bun run service/sync wav2lip https://video-url.mp4 https://audio-url.mp3
115
+ ```
116
+
117
+ ## skill features
118
+
119
+ each skill includes:
120
+
121
+ - **name**: unique skill identifier
122
+ - **description**: when claude should use this skill
123
+ - **allowed-tools**: restricted to Read, Bash for safety
124
+ - **usage examples**: cli and programmatic examples
125
+ - **when to use**: specific use cases
126
+ - **tips**: best practices
127
+ - **environment variables**: required api keys
128
+
129
+ ## benefits
130
+
131
+ - **discoverability**: claude knows all available services
132
+ - **context**: skills provide usage examples and best practices
133
+ - **safety**: `allowed-tools` limits to read-only and bash execution
134
+ - **documentation**: skills serve as living documentation
135
+
136
+ ## skill reference
137
+
138
+ | skill | service | primary use case |
139
+ |-------|---------|------------------|
140
+ | image-generation | image | create ai images, character headshots |
141
+ | video-generation | video | animate images, generate video clips |
142
+ | voice-synthesis | voice | text-to-speech, voiceovers |
143
+ | music-generation | elevenlabs | generate music, create sound effects |
144
+ | video-lipsync | sync | sync audio with video, talking characters |
145
+ | video-captions | captions | add subtitles, accessibility |
146
+ | video-editing | edit | resize, trim, social media optimization |
147
+ | audio-transcription | transcribe | speech-to-text, subtitle generation |
148
+ | telegram-send | external | send videos to telegram as round videos |
149
+ | talking-character-pipeline | pipeline | end-to-end talking character videos |
150
+ | round-video-character | pipeline | telegram round selfie videos with wan 2.5 |
151
+
152
+ ## see also
153
+
154
+ - [README.md](README.md) - sdk overview and installation
155
+ - [STRUCTURE.md](STRUCTURE.md) - detailed module organization
156
+ - [pipeline/cookbooks/talking-character.md](pipeline/cookbooks/talking-character.md) - talking character workflow
157
+ - [pipeline/cookbooks/round-video-character.md](pipeline/cookbooks/round-video-character.md) - telegram round selfie video cookbook
package/STRUCTURE.md ADDED
@@ -0,0 +1,92 @@
1
+ # sdk structure
2
+
3
+ ## lib/ - two fal implementations
4
+
5
+ ### lib/ai-sdk/fal.ts
6
+ uses `@ai-sdk/fal` with vercel ai sdk
7
+
8
+ **when to use:**
9
+ - standard image generation
10
+ - need consistent api across providers
11
+ - want automatic image format handling
12
+ - prefer typed aspect ratios
13
+
14
+ **commands:**
15
+ ```bash
16
+ bun run lib/ai-sdk/fal.ts generate_image <prompt> [model] [aspectRatio]
17
+ ```
18
+
19
+ ### lib/fal.ts
20
+ uses `@fal-ai/client` directly
21
+
22
+ **when to use:**
23
+ - video generation (image-to-video, text-to-video)
24
+ - advanced fal features
25
+ - need queue/streaming updates
26
+ - custom api parameters
27
+
28
+ **commands:**
29
+ ```bash
30
+ bun run lib/fal.ts generate_image <prompt> [model] [imageSize]
31
+ bun run lib/fal.ts image_to_video <prompt> <imageUrl> [duration]
32
+ bun run lib/fal.ts text_to_video <prompt> [duration]
33
+ ```
34
+
35
+ ### lib/higgsfield.ts
36
+ uses `@higgsfield/client` for soul character generation
37
+
38
+ **commands:**
39
+ ```bash
40
+ bun run lib/higgsfield.ts generate_soul <prompt> [customReferenceId]
41
+ bun run lib/higgsfield.ts create_character <name> <imageUrl1> [imageUrl2...]
42
+ bun run lib/higgsfield.ts list_styles
43
+ ```
44
+
45
+ ## service/ - high-level wrappers
46
+
47
+ ### service/image.ts
48
+ combines fal + higgsfield for image generation
49
+
50
+ ```bash
51
+ bun run service/image.ts fal <prompt> [model] [upload]
52
+ bun run service/image.ts soul <prompt> [customReferenceId] [upload]
53
+ ```
54
+
55
+ ### service/video.ts
56
+ video generation with optional s3 upload
57
+
58
+ ```bash
59
+ bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
60
+ bun run service/video.ts from_text <prompt> [duration] [upload]
61
+ ```
62
+
63
+ ## utilities/
64
+
65
+ ### utilities/s3.ts
66
+ cloudflare r2 / s3 storage operations
67
+
68
+ ```bash
69
+ bun run utilities/s3.ts upload <filePath> <objectKey>
70
+ bun run utilities/s3.ts upload_from_url <url> <objectKey>
71
+ bun run utilities/s3.ts presigned_url <objectKey> [expiresIn]
72
+ ```
73
+
74
+ ## pipeline/cookbooks/
75
+ markdown guides for complex workflows
76
+
77
+ - `talking-character.md`: create talking character videos
78
+
79
+ ## dependencies
80
+
81
+ - `@ai-sdk/fal` - vercel ai sdk fal provider
82
+ - `@fal-ai/client` - official fal client
83
+ - `@higgsfield/client` - higgsfield api client
84
+ - `@aws-sdk/client-s3` - s3 storage
85
+ - `ai` - vercel ai sdk core
86
+
87
+ ## key decisions
88
+
89
+ 1. **two fal implementations** - ai-sdk for simplicity, client for power
90
+ 2. **all scripts are cli + library** - can be run directly or imported
91
+ 3. **consistent logging** - `[module] message` format
92
+ 4. **auto image opening** - ai-sdk version opens images automatically