agent-media 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -4
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -10,17 +10,19 @@ Media processing CLI for AI agents.
|
|
|
10
10
|
|
|
11
11
|
### Local processing (no API key needed)
|
|
12
12
|
|
|
13
|
-
Uses [Sharp](https://sharp.pixelplumbing.com/) for
|
|
14
|
-
|
|
15
|
-
We're working on adding [transformers.js](https://github.com/huggingface/transformers.js) for local AI features soon.
|
|
13
|
+
Uses [Sharp](https://sharp.pixelplumbing.com/) for image operations and [transformers.js](https://huggingface.co/docs/transformers.js) for local AI (background removal, transcription).
|
|
16
14
|
|
|
17
15
|
```bash
|
|
18
16
|
bunx agent-media@latest image resize --in sunset-mountains.jpg --width 800
|
|
19
17
|
bunx agent-media@latest image convert --in sunset-mountains.png --format webp
|
|
20
18
|
bunx agent-media@latest image extend --in sunset-mountains.jpg --padding 50 --color "#FFFFFF"
|
|
19
|
+
bunx agent-media@latest image remove-background --in portrait-headshot.png --provider transformers
|
|
21
20
|
bunx agent-media@latest audio extract --in video.mp4
|
|
21
|
+
bunx agent-media@latest audio transcribe --in audio.mp3 --provider transformers
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
+
> **Note**: You may see a `mutex lock failed` error with `--provider transformers` — ignore it, the output is correct if JSON shows `"ok": true`.
|
|
25
|
+
|
|
24
26
|
### AI-powered features
|
|
25
27
|
|
|
26
28
|
Requires an API key from one of these providers:
|
|
@@ -305,6 +307,7 @@ Exit code is `0` on success, `1` on error.
|
|
|
305
307
|
| Provider | resize | convert | extend | generate | edit | remove-background | transcribe |
|
|
306
308
|
|----------|--------|---------|--------|----------|------|-------------------|------------|
|
|
307
309
|
| **local** | ✓ | ✓ | ✓ | - | - | - | - |
|
|
310
|
+
| **transformers** | - | - | - | - | - | `Xenova/modnet` | `moonshine-base` |
|
|
308
311
|
| **fal** | - | - | - | `fal-ai/flux-2` | `fal-ai/flux-2/edit` | `fal-ai/birefnet/v2` | `fal-ai/wizper` |
|
|
309
312
|
| **replicate** | - | - | - | `black-forest-labs/flux-2-dev` | `black-forest-labs/flux-kontext-dev` | `men1scus/birefnet` | WhisperX |
|
|
310
313
|
| **runpod** | - | - | - | `alibaba/wan-2.6` | `google/nano-banana-pro-edit` | - | - |
|
|
@@ -359,6 +362,7 @@ All commands output JSON with `ok: true/false` and exit 0/1.
|
|
|
359
362
|
|
|
360
363
|
## Roadmap
|
|
361
364
|
|
|
362
|
-
- [
|
|
365
|
+
- [x] Local CPU background removal via transformers.js/ONNX (zero API keys)
|
|
366
|
+
- [x] Local CPU transcription via transformers.js/ONNX (zero API keys)
|
|
363
367
|
- [ ] Video processing actions
|
|
364
368
|
- [ ] Batch processing support
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-media",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Agent-first media toolkit CLI",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -34,10 +34,10 @@
|
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"commander": "^12.0.0",
|
|
36
36
|
"dotenv": "^17.2.3",
|
|
37
|
+
"@agent-media/audio": "0.3.1",
|
|
38
|
+
"@agent-media/image": "0.2.1",
|
|
37
39
|
"@agent-media/core": "0.3.0",
|
|
38
|
-
"@agent-media/
|
|
39
|
-
"@agent-media/providers": "0.2.0",
|
|
40
|
-
"@agent-media/image": "0.2.0"
|
|
40
|
+
"@agent-media/providers": "0.3.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@types/node": "^22.0.0",
|