agent-media 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -58
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -8,49 +8,65 @@ Media processing CLI for AI agents.
|
|
|
8
8
|
|
|
9
9
|
## Quick Start
|
|
10
10
|
|
|
11
|
+
### Local processing (no API key needed)
|
|
12
|
+
|
|
13
|
+
Uses [Sharp](https://sharp.pixelplumbing.com/) for fast local image processing. We're working on adding [transformers.js](https://huggingface.co/docs/transformers.js) for local AI features soon.
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bunx agent-media@latest image resize --in photo.jpg --width 800
|
|
17
|
+
bunx agent-media@latest image convert --in photo.png --format webp
|
|
18
|
+
bunx agent-media@latest image extend --in photo.jpg --padding 50 --color "#FFFFFF"
|
|
19
|
+
bunx agent-media@latest audio extract --in video.mp4
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### AI-powered features
|
|
23
|
+
|
|
11
24
|
Requires an API key from one of these providers:
|
|
12
25
|
|
|
13
26
|
- [fal.ai](https://fal.ai/dashboard/keys) → `FAL_API_KEY`
|
|
14
27
|
- [Replicate](https://replicate.com/account/api-tokens) → `REPLICATE_API_TOKEN`
|
|
15
28
|
- [Runpod](https://www.runpod.io/console/user/settings) → `RUNPOD_API_KEY`
|
|
16
29
|
|
|
30
|
+
### bunx
|
|
31
|
+
|
|
17
32
|
```bash
|
|
18
33
|
# Generate an image
|
|
19
|
-
|
|
34
|
+
bunx agent-media@latest image generate --prompt "a robot painting a sunset"
|
|
20
35
|
|
|
21
36
|
# Edit the generated image
|
|
22
|
-
|
|
37
|
+
bunx agent-media@latest image edit --in .agent-media/generated_*.png --prompt "add a cat watching"
|
|
23
38
|
|
|
24
39
|
# Remove background
|
|
25
|
-
|
|
40
|
+
bunx agent-media@latest image remove-background --in .agent-media/edited_*.png
|
|
26
41
|
|
|
27
|
-
#
|
|
28
|
-
|
|
42
|
+
# Transcribe with speaker identification
|
|
43
|
+
bunx agent-media@latest audio transcribe --in audio.mp3 --diarize
|
|
29
44
|
```
|
|
30
45
|
|
|
31
|
-
|
|
46
|
+
### npx
|
|
32
47
|
|
|
33
48
|
```bash
|
|
34
|
-
#
|
|
35
|
-
npx agent-media
|
|
49
|
+
# Generate an image
|
|
50
|
+
npx agent-media@latest image generate --prompt "a robot painting a sunset"
|
|
36
51
|
|
|
37
|
-
#
|
|
38
|
-
npx agent-media
|
|
39
|
-
```
|
|
52
|
+
# Edit the generated image
|
|
53
|
+
npx agent-media@latest image edit --in .agent-media/generated_*.png --prompt "add a cat watching"
|
|
40
54
|
|
|
41
|
-
|
|
55
|
+
# Remove background
|
|
56
|
+
npx agent-media@latest image remove-background --in .agent-media/edited_*.png
|
|
42
57
|
|
|
43
|
-
|
|
44
|
-
npx agent-media
|
|
45
|
-
npx agent-media image convert --in photo.png --format webp
|
|
46
|
-
npx agent-media image extend --in photo.jpg --padding 50 --color "#FFFFFF"
|
|
58
|
+
# Transcribe with speaker identification
|
|
59
|
+
npx agent-media@latest audio transcribe --in audio.mp3 --diarize
|
|
47
60
|
```
|
|
48
61
|
|
|
49
62
|
## Installation
|
|
50
63
|
|
|
51
64
|
```bash
|
|
52
|
-
# Use directly with
|
|
53
|
-
|
|
65
|
+
# Use directly with bunx (no install)
|
|
66
|
+
bunx agent-media@latest --help
|
|
67
|
+
|
|
68
|
+
# Or with npx
|
|
69
|
+
npx agent-media@latest --help
|
|
54
70
|
|
|
55
71
|
# Or install globally
|
|
56
72
|
npm install -g agent-media
|
|
@@ -69,24 +85,22 @@ pnpm install && pnpm build && pnpm link --global
|
|
|
69
85
|
- Node.js >= 18.0.0
|
|
70
86
|
- API key for AI features (generate, edit, remove-background, transcribe)
|
|
71
87
|
|
|
72
|
-
##
|
|
73
|
-
|
|
74
|
-
### Image Commands
|
|
88
|
+
## image
|
|
75
89
|
|
|
76
90
|
```bash
|
|
77
|
-
agent-media image resize --in <path> [options] # Resize image
|
|
78
|
-
agent-media image convert --in <path> --format <f> # Convert format
|
|
79
|
-
agent-media image remove-background --in <path> # Remove background
|
|
80
|
-
agent-media image generate --prompt <text> # Generate from prompt
|
|
81
|
-
agent-media image extend --in <path> --padding <px> --color <hex> # Extend canvas
|
|
82
|
-
agent-media image edit --in <path> --prompt <text> # Edit with prompt
|
|
91
|
+
agent-media@latest image resize --in <path> [options] # Resize image
|
|
92
|
+
agent-media@latest image convert --in <path> --format <f> # Convert format
|
|
93
|
+
agent-media@latest image remove-background --in <path> # Remove background
|
|
94
|
+
agent-media@latest image generate --prompt <text> # Generate from prompt
|
|
95
|
+
agent-media@latest image extend --in <path> --padding <px> --color <hex> # Extend canvas
|
|
96
|
+
agent-media@latest image edit --in <path> --prompt <text> # Edit with prompt
|
|
83
97
|
```
|
|
84
98
|
|
|
85
|
-
|
|
99
|
+
## audio
|
|
86
100
|
|
|
87
101
|
```bash
|
|
88
|
-
agent-media audio extract --in <video> # Extract audio from video
|
|
89
|
-
agent-media audio transcribe --in <audio> # Transcribe audio to text
|
|
102
|
+
agent-media@latest audio extract --in <video> # Extract audio from video
|
|
103
|
+
agent-media@latest audio transcribe --in <audio> # Transcribe audio to text
|
|
90
104
|
```
|
|
91
105
|
|
|
92
106
|
---
|
|
@@ -94,9 +108,9 @@ agent-media audio transcribe --in <audio> # Transcribe audio to text
|
|
|
94
108
|
### resize
|
|
95
109
|
|
|
96
110
|
```bash
|
|
97
|
-
agent-media image resize --in photo.jpg --width 800
|
|
98
|
-
agent-media image resize --in photo.jpg --height 600
|
|
99
|
-
agent-media image resize --in photo.jpg --width 800 --height 600
|
|
111
|
+
agent-media@latest image resize --in photo.jpg --width 800
|
|
112
|
+
agent-media@latest image resize --in photo.jpg --height 600
|
|
113
|
+
agent-media@latest image resize --in photo.jpg --width 800 --height 600
|
|
100
114
|
```
|
|
101
115
|
|
|
102
116
|
| Option | Description |
|
|
@@ -110,9 +124,9 @@ agent-media image resize --in photo.jpg --width 800 --height 600
|
|
|
110
124
|
### convert
|
|
111
125
|
|
|
112
126
|
```bash
|
|
113
|
-
agent-media image convert --in photo.png --format webp
|
|
114
|
-
agent-media image convert --in photo.jpg --format png
|
|
115
|
-
agent-media image convert --in photo.png --format jpg --quality 90
|
|
127
|
+
agent-media@latest image convert --in photo.png --format webp
|
|
128
|
+
agent-media@latest image convert --in photo.jpg --format png
|
|
129
|
+
agent-media@latest image convert --in photo.png --format jpg --quality 90
|
|
116
130
|
```
|
|
117
131
|
|
|
118
132
|
| Option | Description |
|
|
@@ -126,8 +140,8 @@ agent-media image convert --in photo.png --format jpg --quality 90
|
|
|
126
140
|
### remove-background
|
|
127
141
|
|
|
128
142
|
```bash
|
|
129
|
-
agent-media image remove-background --in portrait.jpg
|
|
130
|
-
agent-media image remove-background --in https://example.com/photo.jpg
|
|
143
|
+
agent-media@latest image remove-background --in portrait.jpg
|
|
144
|
+
agent-media@latest image remove-background --in https://example.com/photo.jpg
|
|
131
145
|
```
|
|
132
146
|
|
|
133
147
|
| Option | Description |
|
|
@@ -139,8 +153,8 @@ agent-media image remove-background --in https://example.com/photo.jpg
|
|
|
139
153
|
### generate
|
|
140
154
|
|
|
141
155
|
```bash
|
|
142
|
-
agent-media image generate --prompt "a cat wearing a hat"
|
|
143
|
-
agent-media image generate --prompt "sunset over mountains" --width 1024 --height 768
|
|
156
|
+
agent-media@latest image generate --prompt "a cat wearing a hat"
|
|
157
|
+
agent-media@latest image generate --prompt "sunset over mountains" --width 1024 --height 768
|
|
144
158
|
```
|
|
145
159
|
|
|
146
160
|
| Option | Description |
|
|
@@ -157,8 +171,8 @@ agent-media image generate --prompt "sunset over mountains" --width 1024 --heigh
|
|
|
157
171
|
Extend image canvas by adding padding on all sides with a solid background color.
|
|
158
172
|
|
|
159
173
|
```bash
|
|
160
|
-
agent-media image extend --in photo.jpg --padding 50 --color "#E4ECF8"
|
|
161
|
-
agent-media image extend --in photo.png --padding 100 --color "#FFFFFF" --dpi 300
|
|
174
|
+
agent-media@latest image extend --in photo.jpg --padding 50 --color "#E4ECF8"
|
|
175
|
+
agent-media@latest image extend --in photo.png --padding 100 --color "#FFFFFF" --dpi 300
|
|
162
176
|
```
|
|
163
177
|
|
|
164
178
|
| Option | Description |
|
|
@@ -175,8 +189,8 @@ agent-media image extend --in photo.png --padding 100 --color "#FFFFFF" --dpi 30
|
|
|
175
189
|
Edit an image using a text prompt (image-to-image).
|
|
176
190
|
|
|
177
191
|
```bash
|
|
178
|
-
agent-media image edit --in photo.jpg --prompt "make the sky more vibrant"
|
|
179
|
-
agent-media image edit --in portrait.jpg --prompt "add sunglasses"
|
|
192
|
+
agent-media@latest image edit --in photo.jpg --prompt "make the sky more vibrant"
|
|
193
|
+
agent-media@latest image edit --in portrait.jpg --prompt "add sunglasses"
|
|
180
194
|
```
|
|
181
195
|
|
|
182
196
|
| Option | Description |
|
|
@@ -192,8 +206,8 @@ agent-media image edit --in portrait.jpg --prompt "add sunglasses"
|
|
|
192
206
|
Extract audio track from a video file. Uses local ffmpeg, no API key needed.
|
|
193
207
|
|
|
194
208
|
```bash
|
|
195
|
-
agent-media audio extract --in video.mp4
|
|
196
|
-
agent-media audio extract --in video.mp4 --format wav
|
|
209
|
+
agent-media@latest audio extract --in video.mp4
|
|
210
|
+
agent-media@latest audio extract --in video.mp4 --format wav
|
|
197
211
|
```
|
|
198
212
|
|
|
199
213
|
| Option | Description |
|
|
@@ -207,8 +221,8 @@ agent-media audio extract --in video.mp4 --format wav
|
|
|
207
221
|
Transcribe audio to text with timestamps. Supports speaker identification.
|
|
208
222
|
|
|
209
223
|
```bash
|
|
210
|
-
agent-media audio transcribe --in audio.mp3
|
|
211
|
-
agent-media audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
224
|
+
agent-media@latest audio transcribe --in audio.mp3
|
|
225
|
+
agent-media@latest audio transcribe --in audio.mp3 --diarize --speakers 2
|
|
212
226
|
```
|
|
213
227
|
|
|
214
228
|
| Option | Description |
|
|
@@ -286,8 +300,8 @@ Use `--model <name>` to override the default model for any command.
|
|
|
286
300
|
### Just ask the agent
|
|
287
301
|
|
|
288
302
|
```
|
|
289
|
-
Use agent-media to resize this image to 800px wide.
|
|
290
|
-
Run agent-media --help to see available commands.
|
|
303
|
+
Use agent-media@latest to resize this image to 800px wide.
|
|
304
|
+
Run agent-media@latest --help to see available commands.
|
|
291
305
|
```
|
|
292
306
|
|
|
293
307
|
### AGENTS.md / CLAUDE.md
|
|
@@ -297,15 +311,15 @@ Add to your project instructions:
|
|
|
297
311
|
```markdown
|
|
298
312
|
## Media Processing
|
|
299
313
|
|
|
300
|
-
Use `agent-media` for image and audio operations. Run `agent-media --help` for commands.
|
|
314
|
+
Use `agent-media@latest` for image and audio operations. Run `agent-media@latest --help` for commands.
|
|
301
315
|
|
|
302
|
-
- `agent-media image resize --in <path> --width <px>` - Resize image
|
|
303
|
-
- `agent-media image convert --in <path> --format <f>` - Convert format
|
|
304
|
-
- `agent-media image generate --prompt <text>` - Generate image
|
|
305
|
-
- `agent-media image edit --in <path> --prompt <text>` - Edit image
|
|
306
|
-
- `agent-media image remove-background --in <path>` - Remove background
|
|
307
|
-
- `agent-media audio extract --in <video>` - Extract audio from video
|
|
308
|
-
- `agent-media audio transcribe --in <audio>` - Transcribe audio
|
|
316
|
+
- `agent-media@latest image resize --in <path> --width <px>` - Resize image
|
|
317
|
+
- `agent-media@latest image convert --in <path> --format <f>` - Convert format
|
|
318
|
+
- `agent-media@latest image generate --prompt <text>` - Generate image
|
|
319
|
+
- `agent-media@latest image edit --in <path> --prompt <text>` - Edit image
|
|
320
|
+
- `agent-media@latest image remove-background --in <path>` - Remove background
|
|
321
|
+
- `agent-media@latest audio extract --in <video>` - Extract audio from video
|
|
322
|
+
- `agent-media@latest audio transcribe --in <audio>` - Transcribe audio
|
|
309
323
|
|
|
310
324
|
All commands output JSON with `ok: true/false` and exit 0/1.
|
|
311
325
|
```
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-media",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"description": "Agent-first media toolkit CLI",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
8
8
|
"url": "https://github.com/TimPietrusky/agent-media.git",
|
|
9
|
-
"directory": "packages/
|
|
9
|
+
"directory": "packages/agent-media"
|
|
10
10
|
},
|
|
11
11
|
"keywords": [
|
|
12
12
|
"media",
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"commander": "^12.0.0",
|
|
36
36
|
"dotenv": "^17.2.3",
|
|
37
|
-
"@agent-media/audio": "0.3.0",
|
|
38
37
|
"@agent-media/core": "0.3.0",
|
|
39
38
|
"@agent-media/image": "0.2.0",
|
|
40
|
-
"@agent-media/providers": "0.2.0"
|
|
39
|
+
"@agent-media/providers": "0.2.0",
|
|
40
|
+
"@agent-media/audio": "0.3.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^22.0.0",
|