ff-toolkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ff_toolkit-0.1.0/.github/workflows/publish.yml +21 -0
- ff_toolkit-0.1.0/.gitignore +18 -0
- ff_toolkit-0.1.0/LICENSE +21 -0
- ff_toolkit-0.1.0/PKG-INFO +223 -0
- ff_toolkit-0.1.0/README.md +195 -0
- ff_toolkit-0.1.0/docs/demo.svg +113 -0
- ff_toolkit-0.1.0/examples/agent_loop_example.py +84 -0
- ff_toolkit-0.1.0/examples/anthropic_example.py +73 -0
- ff_toolkit-0.1.0/examples/local_example.py +90 -0
- ff_toolkit-0.1.0/examples/openai_example.py +69 -0
- ff_toolkit-0.1.0/pyproject.toml +44 -0
- ff_toolkit-0.1.0/src/ff_kit/__init__.py +30 -0
- ff_toolkit-0.1.0/src/ff_kit/cli.py +159 -0
- ff_toolkit-0.1.0/src/ff_kit/core/__init__.py +7 -0
- ff_toolkit-0.1.0/src/ff_kit/core/add_subtitles.py +64 -0
- ff_toolkit-0.1.0/src/ff_kit/core/clip.py +56 -0
- ff_toolkit-0.1.0/src/ff_kit/core/extract_audio.py +54 -0
- ff_toolkit-0.1.0/src/ff_kit/core/merge.py +89 -0
- ff_toolkit-0.1.0/src/ff_kit/core/transcode.py +76 -0
- ff_toolkit-0.1.0/src/ff_kit/dispatch.py +93 -0
- ff_toolkit-0.1.0/src/ff_kit/executor.py +120 -0
- ff_toolkit-0.1.0/src/ff_kit/mcp/__init__.py +0 -0
- ff_toolkit-0.1.0/src/ff_kit/mcp/__main__.py +6 -0
- ff_toolkit-0.1.0/src/ff_kit/mcp/server.py +116 -0
- ff_toolkit-0.1.0/src/ff_kit/schemas/__init__.py +4 -0
- ff_toolkit-0.1.0/src/ff_kit/schemas/anthropic.py +218 -0
- ff_toolkit-0.1.0/src/ff_kit/schemas/openai.py +238 -0
- ff_toolkit-0.1.0/tests/__init__.py +0 -0
- ff_toolkit-0.1.0/tests/test_cli.py +73 -0
- ff_toolkit-0.1.0/tests/test_core.py +231 -0
- ff_toolkit-0.1.0/tests/test_mcp.py +76 -0
- ff_toolkit-0.1.0/tests/test_schemas.py +69 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: pypi
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
- run: pip install build
|
|
20
|
+
- run: python -m build
|
|
21
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
ff_toolkit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dekko
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ff-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: FFmpeg operations as LLM-callable tools — clip, merge, extract audio, add subtitles, transcode.
|
|
5
|
+
Project-URL: Homepage, https://github.com/inthepond/ff-kit
|
|
6
|
+
Project-URL: Repository, https://github.com/inthepond/ff-kit
|
|
7
|
+
Project-URL: Issues, https://github.com/inthepond/ff-kit/issues
|
|
8
|
+
Author-email: Dekko <dekko.etangs@outlook.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,audio,ffmpeg,llm,mcp,tool-calling,video
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
21
|
+
Classifier: Topic :: Multimedia :: Video
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest-mock>=3.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# ff-kit
|
|
30
|
+
|
|
31
|
+
FFmpeg operations as LLM-callable tools.
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<img src="https://raw.githubusercontent.com/inthepond/ff-kit/main/docs/demo.svg" alt="ff-kit demo" width="720">
|
|
35
|
+
</p>
|
|
36
|
+
|
|
37
|
+
> **Stop hand-writing FFmpeg subprocess calls and JSON tool schemas.**
|
|
38
|
+
> `ff-kit` gives you 5 production-ready media operations, dual-format LLM schemas (OpenAI + Anthropic), and an MCP server — all in one `pip install`.
|
|
39
|
+
|
|
40
|
+
## Real-World Use Cases
|
|
41
|
+
|
|
42
|
+
**"My agent pipeline needs to process uploaded videos"** — Give your agent `openai_tools()` or `anthropic_tools()` and let it decide how to clip, transcode, or extract audio. The `dispatch()` function handles execution.
|
|
43
|
+
|
|
44
|
+
**"I need to batch-extract 16kHz WAV for ASR"** — One line: `extract_audio("video.mp4", "out.wav", codec="pcm_s16le", sample_rate=16000, channels=1)`
|
|
45
|
+
|
|
46
|
+
**"I want FFmpeg tools in Claude Desktop / Cursor"** — Add the MCP server config (3 lines of JSON) and Claude can edit your videos directly.
|
|
47
|
+
|
|
48
|
+
**"I'm tired of writing the same FFmpeg commands"** — Use the CLI: `ffkit clip input.mp4 output.mp4 --start 00:01:00 --duration 30`
|
|
49
|
+
|
|
50
|
+
## 60-Second Quick Start
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Install (requires FFmpeg on PATH)
|
|
54
|
+
pip install ff-toolkit
|
|
55
|
+
|
|
56
|
+
# Verify it works — no API keys needed
|
|
57
|
+
ffkit probe some_video.mp4
|
|
58
|
+
|
|
59
|
+
# Or run the full demo with a generated test video
|
|
60
|
+
python -m ff_kit.examples.local
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Python API
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from ff_kit import clip, extract_audio, merge, transcode
|
|
67
|
+
|
|
68
|
+
# Trim seconds 60-90
|
|
69
|
+
clip("raw.mp4", "highlight.mp4", start="00:01:00", duration="30")
|
|
70
|
+
|
|
71
|
+
# Extract 16kHz mono audio for Whisper/Paraformer
|
|
72
|
+
extract_audio("raw.mp4", "speech.wav", codec="pcm_s16le", sample_rate=16000, channels=1)
|
|
73
|
+
|
|
74
|
+
# Concatenate intro + main + outro
|
|
75
|
+
merge(["intro.mp4", "main.mp4", "outro.mp4"], "final.mp4")
|
|
76
|
+
|
|
77
|
+
# Compress to 720p WebM for web delivery
|
|
78
|
+
transcode("raw.mp4", "web.webm", video_codec="libvpx-vp9", resolution="1280x720", crf=30)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### CLI
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
ffkit clip raw.mp4 highlight.mp4 --start 00:01:00 --duration 30
|
|
85
|
+
ffkit extract-audio raw.mp4 speech.wav --codec pcm_s16le --sample-rate 16000 --channels 1
|
|
86
|
+
ffkit merge intro.mp4 main.mp4 outro.mp4 -o final.mp4
|
|
87
|
+
ffkit transcode raw.mp4 web.webm --video-codec libvpx-vp9 --resolution 1280x720 --crf 30
|
|
88
|
+
ffkit probe video.mp4
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### With OpenAI (3 lines to integrate)
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from ff_kit.schemas.openai import openai_tools
|
|
95
|
+
from ff_kit.dispatch import dispatch
|
|
96
|
+
|
|
97
|
+
# 1. Pass tools to the model
|
|
98
|
+
response = client.chat.completions.create(
|
|
99
|
+
model="gpt-4o",
|
|
100
|
+
messages=messages,
|
|
101
|
+
tools=openai_tools(), # ← that's it
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# 2. Execute whatever the model calls
|
|
105
|
+
tc = response.choices[0].message.tool_calls[0]
|
|
106
|
+
result = dispatch(tc.function.name, json.loads(tc.function.arguments))
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### With Anthropic (3 lines to integrate)
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from ff_kit.schemas.anthropic import anthropic_tools
|
|
113
|
+
from ff_kit.dispatch import dispatch
|
|
114
|
+
|
|
115
|
+
response = client.messages.create(
|
|
116
|
+
model="claude-sonnet-4-20250514",
|
|
117
|
+
max_tokens=1024,
|
|
118
|
+
tools=anthropic_tools(), # ← that's it
|
|
119
|
+
messages=messages,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
for block in response.content:
|
|
123
|
+
if block.type == "tool_use":
|
|
124
|
+
result = dispatch(block.name, block.input)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### As an MCP Server (Claude Desktop / Cursor)
|
|
128
|
+
|
|
129
|
+
Add to your config (`claude_desktop_config.json` or Cursor settings):
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"mcpServers": {
|
|
134
|
+
"ff-kit": {
|
|
135
|
+
"command": "ffkit-mcp",
|
|
136
|
+
"args": []
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
That's it. Claude can now clip, merge, extract audio, add subtitles, and transcode your files.
|
|
143
|
+
|
|
144
|
+
## Operations
|
|
145
|
+
|
|
146
|
+
| Tool | What it does | Example |
|
|
147
|
+
|------|-------------|---------|
|
|
148
|
+
| `ffkit_clip` | Trim a segment by start + end/duration | Cut highlight reel from raw footage |
|
|
149
|
+
| `ffkit_merge` | Concatenate multiple files | Join intro + content + outro |
|
|
150
|
+
| `ffkit_extract_audio` | Extract audio, optionally re-encode | Get 16kHz WAV for speech recognition |
|
|
151
|
+
| `ffkit_add_subtitles` | Burn or embed subtitles (.srt/.ass/.vtt) | Hard-sub a translated SRT into video |
|
|
152
|
+
| `ffkit_transcode` | Convert format, codec, resolution, bitrate | Compress 4K MP4 to 720p WebM for web |
|
|
153
|
+
|
|
154
|
+
## How It Works
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
Your Agent ff-kit FFmpeg
|
|
158
|
+
│ │ │
|
|
159
|
+
├─ openai_tools() ──────────┤ │
|
|
160
|
+
│ or anthropic_tools() │ │
|
|
161
|
+
│ │ │
|
|
162
|
+
├─ LLM returns tool call ──►│ │
|
|
163
|
+
│ │ │
|
|
164
|
+
├─ dispatch(name, args) ───►├─ validates & builds cmd ────►│
|
|
165
|
+
│ │ │
|
|
166
|
+
│◄── FFmpegResult ─────────┤◄── subprocess result ────────┤
|
|
167
|
+
│ │ │
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Project Structure
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
ff-kit/
|
|
174
|
+
├── src/ff_kit/
|
|
175
|
+
│ ├── __init__.py # Public API: clip, merge, extract_audio, ...
|
|
176
|
+
│ ├── cli.py # CLI entry point (ffkit command)
|
|
177
|
+
│ ├── executor.py # FFmpeg subprocess runner + probe
|
|
178
|
+
│ ├── dispatch.py # Tool name → function router
|
|
179
|
+
│ ├── core/ # One module per operation
|
|
180
|
+
│ │ ├── clip.py
|
|
181
|
+
│ │ ├── merge.py
|
|
182
|
+
│ │ ├── extract_audio.py
|
|
183
|
+
│ │ ├── add_subtitles.py
|
|
184
|
+
│ │ └── transcode.py
|
|
185
|
+
│ ├── schemas/ # LLM tool definitions
|
|
186
|
+
│ │ ├── openai.py # OpenAI function-calling format
|
|
187
|
+
│ │ └── anthropic.py # Anthropic tool-use format
|
|
188
|
+
│ └── mcp/ # MCP server (stdio JSON-RPC)
|
|
189
|
+
│ └── server.py
|
|
190
|
+
├── examples/
|
|
191
|
+
│ ├── local_example.py # ← Run this first! No API key needed
|
|
192
|
+
│ ├── openai_example.py
|
|
193
|
+
│ ├── anthropic_example.py
|
|
194
|
+
│ └── agent_loop_example.py
|
|
195
|
+
└── tests/ # 30 tests, all mocked (no FFmpeg needed)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Development
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
git clone https://github.com/inthepond/ff-kit.git
|
|
202
|
+
cd ff-kit
|
|
203
|
+
pip install -e ".[dev]"
|
|
204
|
+
pytest -v # 30 tests, runs in <1s
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## FAQ
|
|
208
|
+
|
|
209
|
+
**Q: Do I need FFmpeg installed?**
|
|
210
|
+
Yes, for actual media operations. Tests are fully mocked and don't need FFmpeg. Install it from [ffmpeg.org/download](https://ffmpeg.org/download.html) or `brew install ffmpeg` / `apt install ffmpeg`.
|
|
211
|
+
|
|
212
|
+
**Q: Can I add custom operations?**
|
|
213
|
+
Yes — add a function in `core/`, register it in `dispatch.py`'s `_REGISTRY`, and add schema entries in `schemas/openai.py` and `schemas/anthropic.py`. See any existing operation as a template.
|
|
214
|
+
|
|
215
|
+
**Q: Why not just use LangChain / CrewAI tools?**
|
|
216
|
+
Those frameworks are great, but they're heavy dependencies. ff-kit is zero-dependency (beyond Python stdlib) and works with any LLM provider. You can use it inside LangChain if you want, or standalone.
|
|
217
|
+
|
|
218
|
+
**Q: What about streaming / progress callbacks?**
|
|
219
|
+
Not in v0.1. FFmpeg progress parsing is planned for v0.2.
|
|
220
|
+
|
|
221
|
+
## License
|
|
222
|
+
|
|
223
|
+
MIT
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# ff-kit
|
|
2
|
+
|
|
3
|
+
FFmpeg operations as LLM-callable tools.
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<img src="https://raw.githubusercontent.com/inthepond/ff-kit/main/docs/demo.svg" alt="ff-kit demo" width="720">
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
> **Stop hand-writing FFmpeg subprocess calls and JSON tool schemas.**
|
|
10
|
+
> `ff-kit` gives you 5 production-ready media operations, dual-format LLM schemas (OpenAI + Anthropic), and an MCP server — all in one `pip install`.
|
|
11
|
+
|
|
12
|
+
## Real-World Use Cases
|
|
13
|
+
|
|
14
|
+
**"My agent pipeline needs to process uploaded videos"** — Give your agent `openai_tools()` or `anthropic_tools()` and let it decide how to clip, transcode, or extract audio. The `dispatch()` function handles execution.
|
|
15
|
+
|
|
16
|
+
**"I need to batch-extract 16kHz WAV for ASR"** — One line: `extract_audio("video.mp4", "out.wav", codec="pcm_s16le", sample_rate=16000, channels=1)`
|
|
17
|
+
|
|
18
|
+
**"I want FFmpeg tools in Claude Desktop / Cursor"** — Add the MCP server config (3 lines of JSON) and Claude can edit your videos directly.
|
|
19
|
+
|
|
20
|
+
**"I'm tired of writing the same FFmpeg commands"** — Use the CLI: `ffkit clip input.mp4 output.mp4 --start 00:01:00 --duration 30`
|
|
21
|
+
|
|
22
|
+
## 60-Second Quick Start
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Install (requires FFmpeg on PATH)
|
|
26
|
+
pip install ff-toolkit
|
|
27
|
+
|
|
28
|
+
# Verify it works — no API keys needed
|
|
29
|
+
ffkit probe some_video.mp4
|
|
30
|
+
|
|
31
|
+
# Or run the full demo with a generated test video
|
|
32
|
+
python -m ff_kit.examples.local
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Python API
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from ff_kit import clip, extract_audio, merge, transcode
|
|
39
|
+
|
|
40
|
+
# Trim seconds 60-90
|
|
41
|
+
clip("raw.mp4", "highlight.mp4", start="00:01:00", duration="30")
|
|
42
|
+
|
|
43
|
+
# Extract 16kHz mono audio for Whisper/Paraformer
|
|
44
|
+
extract_audio("raw.mp4", "speech.wav", codec="pcm_s16le", sample_rate=16000, channels=1)
|
|
45
|
+
|
|
46
|
+
# Concatenate intro + main + outro
|
|
47
|
+
merge(["intro.mp4", "main.mp4", "outro.mp4"], "final.mp4")
|
|
48
|
+
|
|
49
|
+
# Compress to 720p WebM for web delivery
|
|
50
|
+
transcode("raw.mp4", "web.webm", video_codec="libvpx-vp9", resolution="1280x720", crf=30)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### CLI
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
ffkit clip raw.mp4 highlight.mp4 --start 00:01:00 --duration 30
|
|
57
|
+
ffkit extract-audio raw.mp4 speech.wav --codec pcm_s16le --sample-rate 16000 --channels 1
|
|
58
|
+
ffkit merge intro.mp4 main.mp4 outro.mp4 -o final.mp4
|
|
59
|
+
ffkit transcode raw.mp4 web.webm --video-codec libvpx-vp9 --resolution 1280x720 --crf 30
|
|
60
|
+
ffkit probe video.mp4
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### With OpenAI (3 lines to integrate)
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from ff_kit.schemas.openai import openai_tools
|
|
67
|
+
from ff_kit.dispatch import dispatch
|
|
68
|
+
|
|
69
|
+
# 1. Pass tools to the model
|
|
70
|
+
response = client.chat.completions.create(
|
|
71
|
+
model="gpt-4o",
|
|
72
|
+
messages=messages,
|
|
73
|
+
tools=openai_tools(), # ← that's it
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# 2. Execute whatever the model calls
|
|
77
|
+
tc = response.choices[0].message.tool_calls[0]
|
|
78
|
+
result = dispatch(tc.function.name, json.loads(tc.function.arguments))
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### With Anthropic (3 lines to integrate)
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from ff_kit.schemas.anthropic import anthropic_tools
|
|
85
|
+
from ff_kit.dispatch import dispatch
|
|
86
|
+
|
|
87
|
+
response = client.messages.create(
|
|
88
|
+
model="claude-sonnet-4-20250514",
|
|
89
|
+
max_tokens=1024,
|
|
90
|
+
tools=anthropic_tools(), # ← that's it
|
|
91
|
+
messages=messages,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
for block in response.content:
|
|
95
|
+
if block.type == "tool_use":
|
|
96
|
+
result = dispatch(block.name, block.input)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### As an MCP Server (Claude Desktop / Cursor)
|
|
100
|
+
|
|
101
|
+
Add to your config (`claude_desktop_config.json` or Cursor settings):
|
|
102
|
+
|
|
103
|
+
```json
|
|
104
|
+
{
|
|
105
|
+
"mcpServers": {
|
|
106
|
+
"ff-kit": {
|
|
107
|
+
"command": "ffkit-mcp",
|
|
108
|
+
"args": []
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
That's it. Claude can now clip, merge, extract audio, add subtitles, and transcode your files.
|
|
115
|
+
|
|
116
|
+
## Operations
|
|
117
|
+
|
|
118
|
+
| Tool | What it does | Example |
|
|
119
|
+
|------|-------------|---------|
|
|
120
|
+
| `ffkit_clip` | Trim a segment by start + end/duration | Cut highlight reel from raw footage |
|
|
121
|
+
| `ffkit_merge` | Concatenate multiple files | Join intro + content + outro |
|
|
122
|
+
| `ffkit_extract_audio` | Extract audio, optionally re-encode | Get 16kHz WAV for speech recognition |
|
|
123
|
+
| `ffkit_add_subtitles` | Burn or embed subtitles (.srt/.ass/.vtt) | Hard-sub a translated SRT into video |
|
|
124
|
+
| `ffkit_transcode` | Convert format, codec, resolution, bitrate | Compress 4K MP4 to 720p WebM for web |
|
|
125
|
+
|
|
126
|
+
## How It Works
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
Your Agent ff-kit FFmpeg
|
|
130
|
+
│ │ │
|
|
131
|
+
├─ openai_tools() ──────────┤ │
|
|
132
|
+
│ or anthropic_tools() │ │
|
|
133
|
+
│ │ │
|
|
134
|
+
├─ LLM returns tool call ──►│ │
|
|
135
|
+
│ │ │
|
|
136
|
+
├─ dispatch(name, args) ───►├─ validates & builds cmd ────►│
|
|
137
|
+
│ │ │
|
|
138
|
+
│◄── FFmpegResult ─────────┤◄── subprocess result ────────┤
|
|
139
|
+
│ │ │
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Project Structure
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
ff-kit/
|
|
146
|
+
├── src/ff_kit/
|
|
147
|
+
│ ├── __init__.py # Public API: clip, merge, extract_audio, ...
|
|
148
|
+
│ ├── cli.py # CLI entry point (ffkit command)
|
|
149
|
+
│ ├── executor.py # FFmpeg subprocess runner + probe
|
|
150
|
+
│ ├── dispatch.py # Tool name → function router
|
|
151
|
+
│ ├── core/ # One module per operation
|
|
152
|
+
│ │ ├── clip.py
|
|
153
|
+
│ │ ├── merge.py
|
|
154
|
+
│ │ ├── extract_audio.py
|
|
155
|
+
│ │ ├── add_subtitles.py
|
|
156
|
+
│ │ └── transcode.py
|
|
157
|
+
│ ├── schemas/ # LLM tool definitions
|
|
158
|
+
│ │ ├── openai.py # OpenAI function-calling format
|
|
159
|
+
│ │ └── anthropic.py # Anthropic tool-use format
|
|
160
|
+
│ └── mcp/ # MCP server (stdio JSON-RPC)
|
|
161
|
+
│ └── server.py
|
|
162
|
+
├── examples/
|
|
163
|
+
│ ├── local_example.py # ← Run this first! No API key needed
|
|
164
|
+
│ ├── openai_example.py
|
|
165
|
+
│ ├── anthropic_example.py
|
|
166
|
+
│ └── agent_loop_example.py
|
|
167
|
+
└── tests/ # 30 tests, all mocked (no FFmpeg needed)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
git clone https://github.com/inthepond/ff-kit.git
|
|
174
|
+
cd ff-kit
|
|
175
|
+
pip install -e ".[dev]"
|
|
176
|
+
pytest -v # 30 tests, runs in <1s
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## FAQ
|
|
180
|
+
|
|
181
|
+
**Q: Do I need FFmpeg installed?**
|
|
182
|
+
Yes, for actual media operations. Tests are fully mocked and don't need FFmpeg. Install it from [ffmpeg.org/download](https://ffmpeg.org/download.html) or `brew install ffmpeg` / `apt install ffmpeg`.
|
|
183
|
+
|
|
184
|
+
**Q: Can I add custom operations?**
|
|
185
|
+
Yes — add a function in `core/`, register it in `dispatch.py`'s `_REGISTRY`, and add schema entries in `schemas/openai.py` and `schemas/anthropic.py`. See any existing operation as a template.
|
|
186
|
+
|
|
187
|
+
**Q: Why not just use LangChain / CrewAI tools?**
|
|
188
|
+
Those frameworks are great, but they're heavy dependencies. ff-kit is zero-dependency (beyond Python stdlib) and works with any LLM provider. You can use it inside LangChain if you want, or standalone.
|
|
189
|
+
|
|
190
|
+
**Q: What about streaming / progress callbacks?**
|
|
191
|
+
Not in v0.1. FFmpeg progress parsing is planned for v0.2.
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
MIT
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="720" height="460">
|
|
2
|
+
<defs>
|
|
3
|
+
<style>
|
|
4
|
+
text { font-family: 'SFMono-Regular', 'SF Mono', 'Menlo', 'Consolas', 'Liberation Mono', monospace; font-size: 13px; }
|
|
5
|
+
</style>
|
|
6
|
+
</defs>
|
|
7
|
+
|
|
8
|
+
<!-- Terminal window background -->
|
|
9
|
+
<rect fill="#1e1e2e" width="720" height="460" rx="10" />
|
|
10
|
+
<rect fill="#313244" width="720" height="32" rx="10" />
|
|
11
|
+
<rect fill="#313244" width="720" height="16" y="16" />
|
|
12
|
+
<circle fill="#f38ba8" cx="20" cy="16" r="6" />
|
|
13
|
+
<circle fill="#f9e2af" cx="40" cy="16" r="6" />
|
|
14
|
+
<circle fill="#a6e3a1" cx="60" cy="16" r="6" />
|
|
15
|
+
<text x="320" y="21" text-anchor="middle" fill="#a6adc8" font-size="12">ff-kit demo</text>
|
|
16
|
+
|
|
17
|
+
<!-- Line 1: install command -->
|
|
18
|
+
<g opacity="0">
|
|
19
|
+
<set attributeName="opacity" to="1" begin="0.5s" fill="freeze" />
|
|
20
|
+
<text fill="#89b4fa" x="16" y="60">$</text>
|
|
21
|
+
<text fill="#cdd6f4" x="30" y="60">pip install ff-toolkit</text>
|
|
22
|
+
</g>
|
|
23
|
+
|
|
24
|
+
<!-- Line 2: install success -->
|
|
25
|
+
<g opacity="0">
|
|
26
|
+
<set attributeName="opacity" to="1" begin="1.0s" fill="freeze" />
|
|
27
|
+
<text fill="#a6e3a1" x="16" y="80">Successfully installed ff-toolkit-0.1.0</text>
|
|
28
|
+
</g>
|
|
29
|
+
|
|
30
|
+
<!-- Line 3: comment -->
|
|
31
|
+
<g opacity="0">
|
|
32
|
+
<set attributeName="opacity" to="1" begin="1.5s" fill="freeze" />
|
|
33
|
+
<text fill="#6c7086" x="16" y="108"># Clip the first 30 seconds from a video</text>
|
|
34
|
+
</g>
|
|
35
|
+
|
|
36
|
+
<!-- Line 4: clip command -->
|
|
37
|
+
<g opacity="0">
|
|
38
|
+
<set attributeName="opacity" to="1" begin="2.2s" fill="freeze" />
|
|
39
|
+
<text fill="#89b4fa" x="16" y="128">$</text>
|
|
40
|
+
<text fill="#cdd6f4" x="30" y="128">ffkit clip raw.mp4 intro.mp4 --start 0 --duration 30</text>
|
|
41
|
+
</g>
|
|
42
|
+
|
|
43
|
+
<!-- Line 5: clip output -->
|
|
44
|
+
<g opacity="0">
|
|
45
|
+
<set attributeName="opacity" to="1" begin="2.7s" fill="freeze" />
|
|
46
|
+
<text fill="#a6e3a1" x="16" y="148">Done: intro.mp4</text>
|
|
47
|
+
</g>
|
|
48
|
+
|
|
49
|
+
<!-- Line 6: comment -->
|
|
50
|
+
<g opacity="0">
|
|
51
|
+
<set attributeName="opacity" to="1" begin="3.2s" fill="freeze" />
|
|
52
|
+
<text fill="#6c7086" x="16" y="176"># Extract 16kHz mono WAV for Whisper/Paraformer</text>
|
|
53
|
+
</g>
|
|
54
|
+
|
|
55
|
+
<!-- Line 7: extract command -->
|
|
56
|
+
<g opacity="0">
|
|
57
|
+
<set attributeName="opacity" to="1" begin="3.5s" fill="freeze" />
|
|
58
|
+
<text fill="#89b4fa" x="16" y="196">$</text>
|
|
59
|
+
<text fill="#cdd6f4" x="30" y="196">ffkit extract-audio raw.mp4 speech.wav --codec pcm_s16le --sample-rate 16000</text>
|
|
60
|
+
</g>
|
|
61
|
+
|
|
62
|
+
<!-- Line 8: extract output -->
|
|
63
|
+
<g opacity="0">
|
|
64
|
+
<set attributeName="opacity" to="1" begin="4.2s" fill="freeze" />
|
|
65
|
+
<text fill="#a6e3a1" x="16" y="216">Done: speech.wav</text>
|
|
66
|
+
</g>
|
|
67
|
+
|
|
68
|
+
<!-- Line 9: comment -->
|
|
69
|
+
<g opacity="0">
|
|
70
|
+
<set attributeName="opacity" to="1" begin="4.7s" fill="freeze" />
|
|
71
|
+
<text fill="#6c7086" x="16" y="244"># Print tool schemas for your LLM agent</text>
|
|
72
|
+
</g>
|
|
73
|
+
|
|
74
|
+
<!-- Line 10: list-tools command -->
|
|
75
|
+
<g opacity="0">
|
|
76
|
+
<set attributeName="opacity" to="1" begin="5.2s" fill="freeze" />
|
|
77
|
+
<text fill="#89b4fa" x="16" y="264">$</text>
|
|
78
|
+
<text fill="#cdd6f4" x="30" y="264">ffkit list-tools --format openai | head -8</text>
|
|
79
|
+
</g>
|
|
80
|
+
|
|
81
|
+
<!-- JSON output lines -->
|
|
82
|
+
<g opacity="0">
|
|
83
|
+
<set attributeName="opacity" to="1" begin="5.5s" fill="freeze" />
|
|
84
|
+
<text fill="#a6adc8" x="16" y="284">[{</text>
|
|
85
|
+
</g>
|
|
86
|
+
<g opacity="0">
|
|
87
|
+
<set attributeName="opacity" to="1" begin="5.7s" fill="freeze" />
|
|
88
|
+
<text fill="#a6adc8" x="16" y="304"> "type": "function",</text>
|
|
89
|
+
</g>
|
|
90
|
+
<g opacity="0">
|
|
91
|
+
<set attributeName="opacity" to="1" begin="5.9s" fill="freeze" />
|
|
92
|
+
<text fill="#a6adc8" x="16" y="324"> "function": {</text>
|
|
93
|
+
</g>
|
|
94
|
+
<g opacity="0">
|
|
95
|
+
<set attributeName="opacity" to="1" begin="6.1s" fill="freeze" />
|
|
96
|
+
<text fill="#a6adc8" x="16" y="344"> "name": </text>
|
|
97
|
+
<text fill="#f9e2af" x="118" y="344">"ffkit_clip"</text>
|
|
98
|
+
<text fill="#a6adc8" x="210" y="344">,</text>
|
|
99
|
+
</g>
|
|
100
|
+
<g opacity="0">
|
|
101
|
+
<set attributeName="opacity" to="1" begin="6.3s" fill="freeze" />
|
|
102
|
+
<text fill="#a6adc8" x="16" y="364"> "description": "Trim a segment from a media file..."</text>
|
|
103
|
+
</g>
|
|
104
|
+
|
|
105
|
+
<!-- Final prompt with blinking cursor -->
|
|
106
|
+
<g opacity="0">
|
|
107
|
+
<set attributeName="opacity" to="1" begin="7.0s" fill="freeze" />
|
|
108
|
+
<text fill="#89b4fa" x="16" y="400">$</text>
|
|
109
|
+
<rect fill="#cdd6f4" x="30" y="389" width="8" height="16">
|
|
110
|
+
<animate attributeName="opacity" values="1;0;1" dur="1s" repeatCount="indefinite" begin="7.0s" />
|
|
111
|
+
</rect>
|
|
112
|
+
</g>
|
|
113
|
+
</svg>
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Example: a minimal agent loop with ff-kit — no framework needed.
|
|
3
|
+
|
|
4
|
+
This shows how to build a simple tool-calling loop using ff-kit's
|
|
5
|
+
dispatch() and schema system. Works with OpenAI, but the pattern
|
|
6
|
+
is the same for any provider.
|
|
7
|
+
|
|
8
|
+
export OPENAI_API_KEY=sk-...
|
|
9
|
+
python examples/agent_loop_example.py
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from openai import OpenAI
|
|
16
|
+
from ff_kit.schemas.openai import openai_tools
|
|
17
|
+
from ff_kit.dispatch import dispatch
|
|
18
|
+
|
|
19
|
+
client = OpenAI()
|
|
20
|
+
tools = openai_tools()
|
|
21
|
+
|
|
22
|
+
SYSTEM = (
|
|
23
|
+
"You are a video editing assistant. You have access to ffkit tools "
|
|
24
|
+
"for clipping, merging, extracting audio, adding subtitles, and "
|
|
25
|
+
"transcoding media files. When the user asks for a media operation, "
|
|
26
|
+
"call the appropriate tool. You can chain multiple tools for complex "
|
|
27
|
+
"workflows (e.g., extract audio then transcode it)."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def run_agent(user_message: str, max_turns: int = 5) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Run a simple tool-calling agent loop.
|
|
34
|
+
|
|
35
|
+
The LLM can call multiple tools across multiple turns.
|
|
36
|
+
Returns the final text response.
|
|
37
|
+
"""
|
|
38
|
+
messages = [
|
|
39
|
+
{"role": "system", "content": SYSTEM},
|
|
40
|
+
{"role": "user", "content": user_message},
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
for turn in range(max_turns):
|
|
44
|
+
response = client.chat.completions.create(
|
|
45
|
+
model="gpt-4o",
|
|
46
|
+
messages=messages,
|
|
47
|
+
tools=tools,
|
|
48
|
+
)
|
|
49
|
+
msg = response.choices[0].message
|
|
50
|
+
|
|
51
|
+
# If no tool calls, we're done
|
|
52
|
+
if not msg.tool_calls:
|
|
53
|
+
return msg.content or ""
|
|
54
|
+
|
|
55
|
+
# Execute each tool call
|
|
56
|
+
messages.append(msg.model_dump())
|
|
57
|
+
for tc in msg.tool_calls:
|
|
58
|
+
print(f" [Turn {turn + 1}] Calling {tc.function.name}...")
|
|
59
|
+
args = json.loads(tc.function.arguments)
|
|
60
|
+
result = dispatch(tc.function.name, args)
|
|
61
|
+
print(f" [Turn {turn + 1}] Result: {result['status']}")
|
|
62
|
+
|
|
63
|
+
messages.append({
|
|
64
|
+
"role": "tool",
|
|
65
|
+
"tool_call_id": tc.id,
|
|
66
|
+
"content": json.dumps(result),
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
return "Agent reached max turns without a final response."
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
# Try some real-world prompts:
|
|
74
|
+
prompts = [
|
|
75
|
+
"Extract the audio from meeting.mp4 as a 16kHz mono WAV file for transcription.",
|
|
76
|
+
"Clip the first 60 seconds of raw_footage.mp4 and transcode it to 720p WebM.",
|
|
77
|
+
"Merge intro.mp4, main.mp4, and outro.mp4 into final_video.mp4",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
for prompt in prompts:
|
|
81
|
+
print(f"\nUser: {prompt}")
|
|
82
|
+
print("-" * 60)
|
|
83
|
+
answer = run_agent(prompt)
|
|
84
|
+
print(f"Assistant: {answer}\n")
|