agent-media 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -78
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -8,48 +8,66 @@ Media processing CLI for AI agents.
|
|
|
8
8
|
|
|
9
9
|
## Quick Start
|
|
10
10
|
|
|
11
|
+
### Local processing (no API key needed)
|
|
12
|
+
|
|
13
|
+
Uses [Sharp](https://sharp.pixelplumbing.com/) for fast local image processing.
|
|
14
|
+
|
|
15
|
+
We're working on adding [transformers.js](https://github.com/huggingface/transformers.js) for local AI features soon.
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
bunx agent-media@latest image resize --in sunset-mountains.jpg --width 800
|
|
19
|
+
bunx agent-media@latest image convert --in sunset-mountains.png --format webp
|
|
20
|
+
bunx agent-media@latest image extend --in sunset-mountains.jpg --padding 50 --color "#FFFFFF"
|
|
21
|
+
bunx agent-media@latest audio extract --in video.mp4
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### AI-powered features
|
|
25
|
+
|
|
11
26
|
Requires an API key from one of these providers:
|
|
12
27
|
|
|
13
28
|
- [fal.ai](https://fal.ai/dashboard/keys) → `FAL_API_KEY`
|
|
14
29
|
- [Replicate](https://replicate.com/account/api-tokens) → `REPLICATE_API_TOKEN`
|
|
15
30
|
- [Runpod](https://www.runpod.io/console/user/settings) → `RUNPOD_API_KEY`
|
|
16
31
|
|
|
32
|
+
### bunx
|
|
33
|
+
|
|
17
34
|
```bash
|
|
18
35
|
# Generate an image
|
|
19
|
-
|
|
36
|
+
bunx agent-media@latest image generate --prompt "a robot painting a sunset"
|
|
20
37
|
|
|
21
38
|
# Edit the generated image
|
|
22
|
-
|
|
39
|
+
bunx agent-media@latest image edit --in .agent-media/generated_*.png --prompt "add a cat watching"
|
|
23
40
|
|
|
24
41
|
# Remove background
|
|
25
|
-
|
|
42
|
+
bunx agent-media@latest image remove-background --in .agent-media/edited_*.png
|
|
26
43
|
|
|
27
|
-
#
|
|
28
|
-
|
|
44
|
+
# Transcribe with speaker identification
|
|
45
|
+
bunx agent-media@latest audio transcribe --in audio.mp3 --diarize
|
|
29
46
|
```
|
|
30
47
|
|
|
31
|
-
|
|
48
|
+
### npx
|
|
32
49
|
|
|
33
50
|
```bash
|
|
34
|
-
#
|
|
35
|
-
npx agent-media@latest
|
|
51
|
+
# Generate an image
|
|
52
|
+
npx agent-media@latest image generate --prompt "a robot painting a sunset"
|
|
36
53
|
|
|
37
|
-
#
|
|
38
|
-
npx agent-media@latest
|
|
39
|
-
```
|
|
54
|
+
# Edit the generated image
|
|
55
|
+
npx agent-media@latest image edit --in .agent-media/generated_*.png --prompt "add a cat watching"
|
|
40
56
|
|
|
41
|
-
|
|
57
|
+
# Remove background
|
|
58
|
+
npx agent-media@latest image remove-background --in .agent-media/edited_*.png
|
|
42
59
|
|
|
43
|
-
|
|
44
|
-
npx agent-media@latest
|
|
45
|
-
npx agent-media@latest image convert --in photo.png --format webp
|
|
46
|
-
npx agent-media@latest image extend --in photo.jpg --padding 50 --color "#FFFFFF"
|
|
60
|
+
# Transcribe with speaker identification
|
|
61
|
+
npx agent-media@latest audio transcribe --in audio.mp3 --diarize
|
|
47
62
|
```
|
|
48
63
|
|
|
49
64
|
## Installation
|
|
50
65
|
|
|
51
66
|
```bash
|
|
52
|
-
# Use directly with
|
|
67
|
+
# Use directly with bunx (no install)
|
|
68
|
+
bunx agent-media@latest --help
|
|
69
|
+
|
|
70
|
+
# Or with npx
|
|
53
71
|
npx agent-media@latest --help
|
|
54
72
|
|
|
55
73
|
# Or install globally
|
|
@@ -69,34 +87,38 @@ pnpm install && pnpm build && pnpm link --global
|
|
|
69
87
|
- Node.js >= 18.0.0
|
|
70
88
|
- API key for AI features (generate, edit, remove-background, transcribe)
|
|
71
89
|
|
|
72
|
-
|
|
90
|
+
---
|
|
73
91
|
|
|
74
|
-
|
|
92
|
+
## image
|
|
75
93
|
|
|
76
94
|
```bash
|
|
77
|
-
|
|
78
|
-
agent-media image
|
|
79
|
-
agent-media image remove-background --in <path> # Remove background
|
|
80
|
-
agent-media image generate --prompt <text> # Generate from prompt
|
|
81
|
-
agent-media image extend --in <path> --padding <px> --color <hex> # Extend canvas
|
|
82
|
-
agent-media image edit --in <path> --prompt <text> # Edit with prompt
|
|
83
|
-
```
|
|
95
|
+
# Resize image
|
|
96
|
+
agent-media@latest image resize --in <path> [options]
|
|
84
97
|
|
|
85
|
-
|
|
98
|
+
# Convert format
|
|
99
|
+
agent-media@latest image convert --in <path> --format <f>
|
|
86
100
|
|
|
87
|
-
|
|
88
|
-
agent-media
|
|
89
|
-
agent-media audio transcribe --in <audio> # Transcribe audio to text
|
|
90
|
-
```
|
|
101
|
+
# Extend canvas with padding
|
|
102
|
+
agent-media@latest image extend --in <path> --padding <px> --color <hex>
|
|
91
103
|
|
|
92
|
-
|
|
104
|
+
# Generate image from text
|
|
105
|
+
agent-media@latest image generate --prompt <text>
|
|
106
|
+
|
|
107
|
+
# Edit image with text prompt
|
|
108
|
+
agent-media@latest image edit --in <path> --prompt <text>
|
|
109
|
+
|
|
110
|
+
# Remove background
|
|
111
|
+
agent-media@latest image remove-background --in <path>
|
|
112
|
+
```
|
|
93
113
|
|
|
94
114
|
### resize
|
|
95
115
|
|
|
116
|
+
*local*
|
|
117
|
+
|
|
96
118
|
```bash
|
|
97
|
-
agent-media image resize --in
|
|
98
|
-
agent-media image resize --in
|
|
99
|
-
agent-media image resize --in
|
|
119
|
+
agent-media@latest image resize --in sunset-mountains.jpg --width 800
|
|
120
|
+
agent-media@latest image resize --in sunset-mountains.jpg --height 600
|
|
121
|
+
agent-media@latest image resize --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.jpg --width 800
|
|
100
122
|
```
|
|
101
123
|
|
|
102
124
|
| Option | Description |
|
|
@@ -105,14 +127,15 @@ agent-media image resize --in photo.jpg --width 800 --height 600
|
|
|
105
127
|
| `--width <px>` | Target width in pixels |
|
|
106
128
|
| `--height <px>` | Target height in pixels |
|
|
107
129
|
| `--out <dir>` | Output directory |
|
|
108
|
-
| `--provider <name>` | Provider (local) |
|
|
109
130
|
|
|
110
131
|
### convert
|
|
111
132
|
|
|
133
|
+
*local*
|
|
134
|
+
|
|
112
135
|
```bash
|
|
113
|
-
agent-media image convert --in
|
|
114
|
-
agent-media image convert --in
|
|
115
|
-
agent-media image convert --in
|
|
136
|
+
agent-media@latest image convert --in sunset-mountains.png --format webp
|
|
137
|
+
agent-media@latest image convert --in sunset-mountains.jpg --format png
|
|
138
|
+
agent-media@latest image convert --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.png --format jpg --quality 90
|
|
116
139
|
```
|
|
117
140
|
|
|
118
141
|
| Option | Description |
|
|
@@ -121,26 +144,33 @@ agent-media image convert --in photo.png --format jpg --quality 90
|
|
|
121
144
|
| `--format <f>` | Output format: png, jpg, webp (required) |
|
|
122
145
|
| `--quality <n>` | Quality 1-100 for lossy formats (default: 80) |
|
|
123
146
|
| `--out <dir>` | Output directory |
|
|
124
|
-
| `--provider <name>` | Provider (local) |
|
|
125
147
|
|
|
126
|
-
###
|
|
148
|
+
### extend
|
|
149
|
+
|
|
150
|
+
*local*
|
|
151
|
+
|
|
152
|
+
Extend image canvas by adding padding on all sides with a solid background color.
|
|
127
153
|
|
|
128
154
|
```bash
|
|
129
|
-
agent-media image
|
|
130
|
-
agent-media image
|
|
155
|
+
agent-media@latest image extend --in sunset-mountains.jpg --padding 50 --color "#E4ECF8"
|
|
156
|
+
agent-media@latest image extend --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.png --padding 100 --color "#FFFFFF"
|
|
131
157
|
```
|
|
132
158
|
|
|
133
159
|
| Option | Description |
|
|
134
160
|
|--------|-------------|
|
|
135
161
|
| `--in <path>` | Input file path or URL (required) |
|
|
162
|
+
| `--padding <px>` | Padding size in pixels to add on all sides (required) |
|
|
163
|
+
| `--color <hex>` | Background color for extended area (required). Also flattens transparency. |
|
|
164
|
+
| `--dpi <n>` | DPI/density for output image (default: 300) |
|
|
136
165
|
| `--out <dir>` | Output directory |
|
|
137
|
-
| `--provider <name>` | Provider (fal, replicate) |
|
|
138
166
|
|
|
139
167
|
### generate
|
|
140
168
|
|
|
169
|
+
*API key required*
|
|
170
|
+
|
|
141
171
|
```bash
|
|
142
|
-
agent-media image generate --prompt "a cat wearing a hat"
|
|
143
|
-
agent-media image generate --prompt "sunset over mountains" --width 1024 --height 768
|
|
172
|
+
agent-media@latest image generate --prompt "a cat wearing a hat"
|
|
173
|
+
agent-media@latest image generate --prompt "sunset over mountains" --width 1024 --height 768
|
|
144
174
|
```
|
|
145
175
|
|
|
146
176
|
| Option | Description |
|
|
@@ -152,48 +182,61 @@ agent-media image generate --prompt "sunset over mountains" --width 1024 --heigh
|
|
|
152
182
|
| `--provider <name>` | Provider (fal, replicate, runpod) |
|
|
153
183
|
| `--model <name>` | Model override (e.g., `fal-ai/flux-2`, `black-forest-labs/flux-2-dev`) |
|
|
154
184
|
|
|
155
|
-
###
|
|
185
|
+
### edit
|
|
156
186
|
|
|
157
|
-
|
|
187
|
+
*API key required*
|
|
188
|
+
|
|
189
|
+
Edit an image using a text prompt (image-to-image).
|
|
158
190
|
|
|
159
191
|
```bash
|
|
160
|
-
agent-media image
|
|
161
|
-
agent-media image
|
|
192
|
+
agent-media@latest image edit --in sunset-mountains.jpg --prompt "make the sky more vibrant"
|
|
193
|
+
agent-media@latest image edit --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/portrait-headshot.png --prompt "add sunglasses"
|
|
162
194
|
```
|
|
163
195
|
|
|
164
196
|
| Option | Description |
|
|
165
197
|
|--------|-------------|
|
|
166
198
|
| `--in <path>` | Input file path or URL (required) |
|
|
167
|
-
| `--
|
|
168
|
-
| `--color <hex>` | Background color for extended area (required). Also flattens transparency. |
|
|
169
|
-
| `--dpi <n>` | DPI/density for output image (default: 300) |
|
|
199
|
+
| `--prompt <text>` | Text description of the desired edit (required) |
|
|
170
200
|
| `--out <dir>` | Output directory |
|
|
171
|
-
| `--provider <name>` | Provider (
|
|
201
|
+
| `--provider <name>` | Provider (fal, replicate, runpod) |
|
|
202
|
+
| `--model <name>` | Model override (e.g., `fal-ai/flux-2/edit`) |
|
|
172
203
|
|
|
173
|
-
###
|
|
204
|
+
### remove-background
|
|
174
205
|
|
|
175
|
-
|
|
206
|
+
*API key required*
|
|
176
207
|
|
|
177
208
|
```bash
|
|
178
|
-
agent-media image
|
|
179
|
-
agent-media image
|
|
209
|
+
agent-media@latest image remove-background --in portrait-headshot.png
|
|
210
|
+
agent-media@latest image remove-background --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/portrait-headshot.png
|
|
180
211
|
```
|
|
181
212
|
|
|
182
213
|
| Option | Description |
|
|
183
214
|
|--------|-------------|
|
|
184
215
|
| `--in <path>` | Input file path or URL (required) |
|
|
185
|
-
| `--prompt <text>` | Text description of the desired edit (required) |
|
|
186
216
|
| `--out <dir>` | Output directory |
|
|
187
|
-
| `--provider <name>` | Provider (fal, replicate
|
|
188
|
-
|
|
217
|
+
| `--provider <name>` | Provider (fal, replicate) |
|
|
218
|
+
|
|
219
|
+
---
|
|
189
220
|
|
|
190
|
-
|
|
221
|
+
## audio
|
|
191
222
|
|
|
192
|
-
|
|
223
|
+
```bash
|
|
224
|
+
# Extract audio from video
|
|
225
|
+
agent-media@latest audio extract --in <video>
|
|
226
|
+
|
|
227
|
+
# Transcribe audio to text
|
|
228
|
+
agent-media@latest audio transcribe --in <audio>
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### extract
|
|
232
|
+
|
|
233
|
+
*local*
|
|
234
|
+
|
|
235
|
+
Extract audio track from a video file.
|
|
193
236
|
|
|
194
237
|
```bash
|
|
195
|
-
agent-media audio extract --in video.mp4
|
|
196
|
-
agent-media audio extract --in video.mp4 --format wav
|
|
238
|
+
agent-media@latest audio extract --in video.mp4
|
|
239
|
+
agent-media@latest audio extract --in video.mp4 --format wav
|
|
197
240
|
```
|
|
198
241
|
|
|
199
242
|
| Option | Description |
|
|
@@ -202,13 +245,15 @@ agent-media audio extract --in video.mp4 --format wav
|
|
|
202
245
|
| `--format <f>` | Output format: mp3, wav (default: mp3) |
|
|
203
246
|
| `--out <dir>` | Output directory |
|
|
204
247
|
|
|
205
|
-
###
|
|
248
|
+
### transcribe
|
|
249
|
+
|
|
250
|
+
*API key required*
|
|
206
251
|
|
|
207
252
|
Transcribe audio to text with timestamps. Supports speaker identification.
|
|
208
253
|
|
|
209
254
|
```bash
|
|
210
|
-
agent-media audio transcribe --in audio.mp3
|
|
211
|
-
agent-media audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
255
|
+
agent-media@latest audio transcribe --in audio.mp3
|
|
256
|
+
agent-media@latest audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
212
257
|
```
|
|
213
258
|
|
|
214
259
|
| Option | Description |
|
|
@@ -221,6 +266,8 @@ agent-media audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
|
221
266
|
| `--provider <name>` | Provider (fal, replicate) |
|
|
222
267
|
| `--model <name>` | Model override |
|
|
223
268
|
|
|
269
|
+
---
|
|
270
|
+
|
|
224
271
|
## Output Format
|
|
225
272
|
|
|
226
273
|
All commands return JSON to stdout:
|
|
@@ -286,8 +333,8 @@ Use `--model <name>` to override the default model for any command.
|
|
|
286
333
|
### Just ask the agent
|
|
287
334
|
|
|
288
335
|
```
|
|
289
|
-
Use agent-media to resize this image to 800px wide.
|
|
290
|
-
Run agent-media --help to see available commands.
|
|
336
|
+
Use agent-media@latest to resize this image to 800px wide.
|
|
337
|
+
Run agent-media@latest --help to see available commands.
|
|
291
338
|
```
|
|
292
339
|
|
|
293
340
|
### AGENTS.md / CLAUDE.md
|
|
@@ -297,15 +344,15 @@ Add to your project instructions:
|
|
|
297
344
|
```markdown
|
|
298
345
|
## Media Processing
|
|
299
346
|
|
|
300
|
-
Use `agent-media` for image and audio operations. Run `agent-media --help` for commands.
|
|
347
|
+
Use `agent-media@latest` for image and audio operations. Run `agent-media@latest --help` for commands.
|
|
301
348
|
|
|
302
|
-
- `agent-media image resize --in <path> --width <px>` - Resize image
|
|
303
|
-
- `agent-media image convert --in <path> --format <f>` - Convert format
|
|
304
|
-
- `agent-media image generate --prompt <text>` - Generate image
|
|
305
|
-
- `agent-media image edit --in <path> --prompt <text>` - Edit image
|
|
306
|
-
- `agent-media image remove-background --in <path>` - Remove background
|
|
307
|
-
- `agent-media audio extract --in <video>` - Extract audio from video
|
|
308
|
-
- `agent-media audio transcribe --in <audio>` - Transcribe audio
|
|
349
|
+
- `agent-media@latest image resize --in <path> --width <px>` - Resize image
|
|
350
|
+
- `agent-media@latest image convert --in <path> --format <f>` - Convert format
|
|
351
|
+
- `agent-media@latest image generate --prompt <text>` - Generate image
|
|
352
|
+
- `agent-media@latest image edit --in <path> --prompt <text>` - Edit image
|
|
353
|
+
- `agent-media@latest image remove-background --in <path>` - Remove background
|
|
354
|
+
- `agent-media@latest audio extract --in <video>` - Extract audio from video
|
|
355
|
+
- `agent-media@latest audio transcribe --in <audio>` - Transcribe audio
|
|
309
356
|
|
|
310
357
|
All commands output JSON with `ok: true/false` and exit 0/1.
|
|
311
358
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-media",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.5",
|
|
4
4
|
"description": "Agent-first media toolkit CLI",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -34,8 +34,8 @@
|
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"commander": "^12.0.0",
|
|
36
36
|
"dotenv": "^17.2.3",
|
|
37
|
-
"@agent-media/audio": "0.3.0",
|
|
38
37
|
"@agent-media/core": "0.3.0",
|
|
38
|
+
"@agent-media/audio": "0.3.0",
|
|
39
39
|
"@agent-media/providers": "0.2.0",
|
|
40
40
|
"@agent-media/image": "0.2.0"
|
|
41
41
|
},
|