agent-media 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -48
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -10,15 +10,19 @@ Media processing CLI for AI agents.
|
|
|
10
10
|
|
|
11
11
|
### Local processing (no API key needed)
|
|
12
12
|
|
|
13
|
-
Uses [Sharp](https://sharp.pixelplumbing.com/) for
|
|
13
|
+
Uses [Sharp](https://sharp.pixelplumbing.com/) for image operations and [transformers.js](https://huggingface.co/docs/transformers.js) for local AI (background removal, transcription).
|
|
14
14
|
|
|
15
15
|
```bash
|
|
16
|
-
bunx agent-media@latest image resize --in
|
|
17
|
-
bunx agent-media@latest image convert --in
|
|
18
|
-
bunx agent-media@latest image extend --in
|
|
16
|
+
bunx agent-media@latest image resize --in sunset-mountains.jpg --width 800
|
|
17
|
+
bunx agent-media@latest image convert --in sunset-mountains.png --format webp
|
|
18
|
+
bunx agent-media@latest image extend --in sunset-mountains.jpg --padding 50 --color "#FFFFFF"
|
|
19
|
+
bunx agent-media@latest image remove-background --in portrait-headshot.png --provider transformers
|
|
19
20
|
bunx agent-media@latest audio extract --in video.mp4
|
|
21
|
+
bunx agent-media@latest audio transcribe --in audio.mp3 --provider transformers
|
|
20
22
|
```
|
|
21
23
|
|
|
24
|
+
> **Note**: You may see a `mutex lock failed` error with `--provider transformers` — ignore it, the output is correct if JSON shows `"ok": true`.
|
|
25
|
+
|
|
22
26
|
### AI-powered features
|
|
23
27
|
|
|
24
28
|
Requires an API key from one of these providers:
|
|
@@ -85,32 +89,38 @@ pnpm install && pnpm build && pnpm link --global
|
|
|
85
89
|
- Node.js >= 18.0.0
|
|
86
90
|
- API key for AI features (generate, edit, remove-background, transcribe)
|
|
87
91
|
|
|
92
|
+
---
|
|
93
|
+
|
|
88
94
|
## image
|
|
89
95
|
|
|
90
96
|
```bash
|
|
91
|
-
|
|
92
|
-
agent-media@latest image
|
|
93
|
-
agent-media@latest image remove-background --in <path> # Remove background
|
|
94
|
-
agent-media@latest image generate --prompt <text> # Generate from prompt
|
|
95
|
-
agent-media@latest image extend --in <path> --padding <px> --color <hex> # Extend canvas
|
|
96
|
-
agent-media@latest image edit --in <path> --prompt <text> # Edit with prompt
|
|
97
|
-
```
|
|
97
|
+
# Resize image
|
|
98
|
+
agent-media@latest image resize --in <path> [options]
|
|
98
99
|
|
|
99
|
-
|
|
100
|
+
# Convert format
|
|
101
|
+
agent-media@latest image convert --in <path> --format <f>
|
|
100
102
|
|
|
101
|
-
|
|
102
|
-
agent-media@latest
|
|
103
|
-
agent-media@latest audio transcribe --in <audio> # Transcribe audio to text
|
|
104
|
-
```
|
|
103
|
+
# Extend canvas with padding
|
|
104
|
+
agent-media@latest image extend --in <path> --padding <px> --color <hex>
|
|
105
105
|
|
|
106
|
-
|
|
106
|
+
# Generate image from text
|
|
107
|
+
agent-media@latest image generate --prompt <text>
|
|
108
|
+
|
|
109
|
+
# Edit image with text prompt
|
|
110
|
+
agent-media@latest image edit --in <path> --prompt <text>
|
|
111
|
+
|
|
112
|
+
# Remove background
|
|
113
|
+
agent-media@latest image remove-background --in <path>
|
|
114
|
+
```
|
|
107
115
|
|
|
108
116
|
### resize
|
|
109
117
|
|
|
118
|
+
*local*
|
|
119
|
+
|
|
110
120
|
```bash
|
|
111
|
-
agent-media@latest image resize --in
|
|
112
|
-
agent-media@latest image resize --in
|
|
113
|
-
agent-media@latest image resize --in
|
|
121
|
+
agent-media@latest image resize --in sunset-mountains.jpg --width 800
|
|
122
|
+
agent-media@latest image resize --in sunset-mountains.jpg --height 600
|
|
123
|
+
agent-media@latest image resize --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.jpg --width 800
|
|
114
124
|
```
|
|
115
125
|
|
|
116
126
|
| Option | Description |
|
|
@@ -119,14 +129,15 @@ agent-media@latest image resize --in photo.jpg --width 800 --height 600
|
|
|
119
129
|
| `--width <px>` | Target width in pixels |
|
|
120
130
|
| `--height <px>` | Target height in pixels |
|
|
121
131
|
| `--out <dir>` | Output directory |
|
|
122
|
-
| `--provider <name>` | Provider (local) |
|
|
123
132
|
|
|
124
133
|
### convert
|
|
125
134
|
|
|
135
|
+
*local*
|
|
136
|
+
|
|
126
137
|
```bash
|
|
127
|
-
agent-media@latest image convert --in
|
|
128
|
-
agent-media@latest image convert --in
|
|
129
|
-
agent-media@latest image convert --in
|
|
138
|
+
agent-media@latest image convert --in sunset-mountains.png --format webp
|
|
139
|
+
agent-media@latest image convert --in sunset-mountains.jpg --format png
|
|
140
|
+
agent-media@latest image convert --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.png --format jpg --quality 90
|
|
130
141
|
```
|
|
131
142
|
|
|
132
143
|
| Option | Description |
|
|
@@ -135,23 +146,30 @@ agent-media@latest image convert --in photo.png --format jpg --quality 90
|
|
|
135
146
|
| `--format <f>` | Output format: png, jpg, webp (required) |
|
|
136
147
|
| `--quality <n>` | Quality 1-100 for lossy formats (default: 80) |
|
|
137
148
|
| `--out <dir>` | Output directory |
|
|
138
|
-
| `--provider <name>` | Provider (local) |
|
|
139
149
|
|
|
140
|
-
###
|
|
150
|
+
### extend
|
|
151
|
+
|
|
152
|
+
*local*
|
|
153
|
+
|
|
154
|
+
Extend image canvas by adding padding on all sides with a solid background color.
|
|
141
155
|
|
|
142
156
|
```bash
|
|
143
|
-
agent-media@latest image
|
|
144
|
-
agent-media@latest image
|
|
157
|
+
agent-media@latest image extend --in sunset-mountains.jpg --padding 50 --color "#E4ECF8"
|
|
158
|
+
agent-media@latest image extend --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/sunset-mountains.png --padding 100 --color "#FFFFFF"
|
|
145
159
|
```
|
|
146
160
|
|
|
147
161
|
| Option | Description |
|
|
148
162
|
|--------|-------------|
|
|
149
163
|
| `--in <path>` | Input file path or URL (required) |
|
|
164
|
+
| `--padding <px>` | Padding size in pixels to add on all sides (required) |
|
|
165
|
+
| `--color <hex>` | Background color for extended area (required). Also flattens transparency. |
|
|
166
|
+
| `--dpi <n>` | DPI/density for output image (default: 300) |
|
|
150
167
|
| `--out <dir>` | Output directory |
|
|
151
|
-
| `--provider <name>` | Provider (fal, replicate) |
|
|
152
168
|
|
|
153
169
|
### generate
|
|
154
170
|
|
|
171
|
+
*API key required*
|
|
172
|
+
|
|
155
173
|
```bash
|
|
156
174
|
agent-media@latest image generate --prompt "a cat wearing a hat"
|
|
157
175
|
agent-media@latest image generate --prompt "sunset over mountains" --width 1024 --height 768
|
|
@@ -166,44 +184,57 @@ agent-media@latest image generate --prompt "sunset over mountains" --width 1024
|
|
|
166
184
|
| `--provider <name>` | Provider (fal, replicate, runpod) |
|
|
167
185
|
| `--model <name>` | Model override (e.g., `fal-ai/flux-2`, `black-forest-labs/flux-2-dev`) |
|
|
168
186
|
|
|
169
|
-
###
|
|
187
|
+
### edit
|
|
170
188
|
|
|
171
|
-
|
|
189
|
+
*API key required*
|
|
190
|
+
|
|
191
|
+
Edit an image using a text prompt (image-to-image).
|
|
172
192
|
|
|
173
193
|
```bash
|
|
174
|
-
agent-media@latest image
|
|
175
|
-
agent-media@latest image
|
|
194
|
+
agent-media@latest image edit --in sunset-mountains.jpg --prompt "make the sky more vibrant"
|
|
195
|
+
agent-media@latest image edit --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/portrait-headshot.png --prompt "add sunglasses"
|
|
176
196
|
```
|
|
177
197
|
|
|
178
198
|
| Option | Description |
|
|
179
199
|
|--------|-------------|
|
|
180
200
|
| `--in <path>` | Input file path or URL (required) |
|
|
181
|
-
| `--
|
|
182
|
-
| `--color <hex>` | Background color for extended area (required). Also flattens transparency. |
|
|
183
|
-
| `--dpi <n>` | DPI/density for output image (default: 300) |
|
|
201
|
+
| `--prompt <text>` | Text description of the desired edit (required) |
|
|
184
202
|
| `--out <dir>` | Output directory |
|
|
185
|
-
| `--provider <name>` | Provider (
|
|
203
|
+
| `--provider <name>` | Provider (fal, replicate, runpod) |
|
|
204
|
+
| `--model <name>` | Model override (e.g., `fal-ai/flux-2/edit`) |
|
|
186
205
|
|
|
187
|
-
###
|
|
206
|
+
### remove-background
|
|
188
207
|
|
|
189
|
-
|
|
208
|
+
*API key required*
|
|
190
209
|
|
|
191
210
|
```bash
|
|
192
|
-
agent-media@latest image
|
|
193
|
-
agent-media@latest image
|
|
211
|
+
agent-media@latest image remove-background --in portrait-headshot.png
|
|
212
|
+
agent-media@latest image remove-background --in https://ytrzap04kkm0giml.public.blob.vercel-storage.com/portrait-headshot.png
|
|
194
213
|
```
|
|
195
214
|
|
|
196
215
|
| Option | Description |
|
|
197
216
|
|--------|-------------|
|
|
198
217
|
| `--in <path>` | Input file path or URL (required) |
|
|
199
|
-
| `--prompt <text>` | Text description of the desired edit (required) |
|
|
200
218
|
| `--out <dir>` | Output directory |
|
|
201
|
-
| `--provider <name>` | Provider (fal, replicate
|
|
202
|
-
| `--model <name>` | Model override (e.g., `fal-ai/flux-2/edit`) |
|
|
219
|
+
| `--provider <name>` | Provider (fal, replicate) |
|
|
203
220
|
|
|
204
|
-
|
|
221
|
+
---
|
|
205
222
|
|
|
206
|
-
|
|
223
|
+
## audio
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
# Extract audio from video
|
|
227
|
+
agent-media@latest audio extract --in <video>
|
|
228
|
+
|
|
229
|
+
# Transcribe audio to text
|
|
230
|
+
agent-media@latest audio transcribe --in <audio>
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### extract
|
|
234
|
+
|
|
235
|
+
*local*
|
|
236
|
+
|
|
237
|
+
Extract audio track from a video file.
|
|
207
238
|
|
|
208
239
|
```bash
|
|
209
240
|
agent-media@latest audio extract --in video.mp4
|
|
@@ -216,7 +247,9 @@ agent-media@latest audio extract --in video.mp4 --format wav
|
|
|
216
247
|
| `--format <f>` | Output format: mp3, wav (default: mp3) |
|
|
217
248
|
| `--out <dir>` | Output directory |
|
|
218
249
|
|
|
219
|
-
###
|
|
250
|
+
### transcribe
|
|
251
|
+
|
|
252
|
+
*API key required*
|
|
220
253
|
|
|
221
254
|
Transcribe audio to text with timestamps. Supports speaker identification.
|
|
222
255
|
|
|
@@ -235,6 +268,8 @@ agent-media@latest audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
|
235
268
|
| `--provider <name>` | Provider (fal, replicate) |
|
|
236
269
|
| `--model <name>` | Model override |
|
|
237
270
|
|
|
271
|
+
---
|
|
272
|
+
|
|
238
273
|
## Output Format
|
|
239
274
|
|
|
240
275
|
All commands return JSON to stdout:
|
|
@@ -272,6 +307,7 @@ Exit code is `0` on success, `1` on error.
|
|
|
272
307
|
| Provider | resize | convert | extend | generate | edit | remove-background | transcribe |
|
|
273
308
|
|----------|--------|---------|--------|----------|------|-------------------|------------|
|
|
274
309
|
| **local** | ✓ | ✓ | ✓ | - | - | - | - |
|
|
310
|
+
| **transformers** | - | - | - | - | - | `Xenova/modnet` | `moonshine-base` |
|
|
275
311
|
| **fal** | - | - | - | `fal-ai/flux-2` | `fal-ai/flux-2/edit` | `fal-ai/birefnet/v2` | `fal-ai/wizper` |
|
|
276
312
|
| **replicate** | - | - | - | `black-forest-labs/flux-2-dev` | `black-forest-labs/flux-kontext-dev` | `men1scus/birefnet` | WhisperX |
|
|
277
313
|
| **runpod** | - | - | - | `alibaba/wan-2.6` | `google/nano-banana-pro-edit` | - | - |
|
|
@@ -326,6 +362,7 @@ All commands output JSON with `ok: true/false` and exit 0/1.
|
|
|
326
362
|
|
|
327
363
|
## Roadmap
|
|
328
364
|
|
|
329
|
-
- [
|
|
365
|
+
- [x] Local CPU background removal via transformers.js/ONNX (zero API keys)
|
|
366
|
+
- [x] Local CPU transcription via transformers.js/ONNX (zero API keys)
|
|
330
367
|
- [ ] Video processing actions
|
|
331
368
|
- [ ] Batch processing support
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-media",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Agent-first media toolkit CLI",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"commander": "^12.0.0",
|
|
36
36
|
"dotenv": "^17.2.3",
|
|
37
|
+
"@agent-media/audio": "0.3.1",
|
|
38
|
+
"@agent-media/image": "0.2.1",
|
|
37
39
|
"@agent-media/core": "0.3.0",
|
|
38
|
-
"@agent-media/
|
|
39
|
-
"@agent-media/providers": "0.2.0",
|
|
40
|
-
"@agent-media/audio": "0.3.0"
|
|
40
|
+
"@agent-media/providers": "0.3.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^22.0.0",
|