cutted 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cutted/__init__.py +1 -1
- cutted/app.py +54 -29
- cutted/core/audio_processor.py +44 -11
- cutted/core/gemini.py +13 -4
- cutted-0.3.3.dist-info/METADATA +55 -0
- cutted-0.3.3.dist-info/RECORD +12 -0
- cutted-0.3.1.dist-info/METADATA +0 -68
- cutted-0.3.1.dist-info/RECORD +0 -12
- {cutted-0.3.1.dist-info → cutted-0.3.3.dist-info}/WHEEL +0 -0
- {cutted-0.3.1.dist-info → cutted-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {cutted-0.3.1.dist-info → cutted-0.3.3.dist-info}/top_level.txt +0 -0
cutted/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "0.3.
|
1
|
+
__version__ = "0.3.3"
|
2
2
|
__author__ = "simon0302010"
|
cutted/app.py
CHANGED
@@ -25,6 +25,8 @@ class CuttedApp:
|
|
25
25
|
self.canvas = None
|
26
26
|
self.cursor_line = None
|
27
27
|
self.last_slider_update = 0
|
28
|
+
self.slider_value = 0
|
29
|
+
self.playback_start_time = 0
|
28
30
|
self.is_playing = False
|
29
31
|
self.last_states = []
|
30
32
|
self.setup_ui()
|
@@ -58,11 +60,19 @@ class CuttedApp:
|
|
58
60
|
if whisper_support:
|
59
61
|
self.use_transcript_checkbox = customtkinter.CTkCheckBox(
|
60
62
|
self.root,
|
61
|
-
text="
|
63
|
+
text="Send transcript to Gemini (slower, more accurate)",
|
62
64
|
text_color="#888888",
|
63
65
|
font=("Arial", 12)
|
64
66
|
)
|
65
67
|
self.use_transcript_checkbox.place(relx=0.0, rely=1.0, anchor="w", y=-12)
|
68
|
+
|
69
|
+
self.use_audio_checkbox = customtkinter.CTkCheckBox(
|
70
|
+
self.root,
|
71
|
+
text="Send audio to Gemini (buggy)",
|
72
|
+
text_color="#888888",
|
73
|
+
font=("Arial", 12)
|
74
|
+
)
|
75
|
+
self.use_audio_checkbox.place(relx=1.0, rely=1.0, anchor="e", y=-12)
|
66
76
|
|
67
77
|
self.play_button = customtkinter.CTkButton(self.root, text="Play", command=self.play_audio, width=50)
|
68
78
|
self.play_button.place(relx=0.3, rely=1.0, anchor="s", y=-30)
|
@@ -105,7 +115,7 @@ class CuttedApp:
|
|
105
115
|
self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
|
106
116
|
self.canvas.draw()
|
107
117
|
|
108
|
-
self.audio_lenght = int(round(self.AudioProcessor.
|
118
|
+
self.audio_lenght = int(round(self.AudioProcessor.get_length()))
|
109
119
|
|
110
120
|
slider_width = self.root.winfo_width() - 40
|
111
121
|
self.slider = customtkinter.CTkSlider(
|
@@ -145,11 +155,16 @@ class CuttedApp:
|
|
145
155
|
return
|
146
156
|
|
147
157
|
start_time = self.slider.get() if hasattr(self, 'slider') else 0
|
158
|
+
self.playback_start_time = start_time
|
148
159
|
self.AudioProcessor.play_audio(start_time)
|
149
160
|
|
150
161
|
def stop_audio(self):
|
151
|
-
self.AudioProcessor.stop_audio()
|
162
|
+
rel_pos = self.AudioProcessor.stop_audio()
|
152
163
|
self.is_playing = False
|
164
|
+
abs_pos = self.playback_start_time + rel_pos
|
165
|
+
self.slider.set(abs_pos)
|
166
|
+
self.set_cursor(abs_pos)
|
167
|
+
print_info(f"Absolute position in audio: {abs_pos:.2f}s")
|
153
168
|
|
154
169
|
def export_audio(self):
|
155
170
|
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
@@ -176,6 +191,7 @@ class CuttedApp:
|
|
176
191
|
print_success(f"Audio exported to {save_path}")
|
177
192
|
|
178
193
|
def send_prompt(self):
|
194
|
+
print(self.AudioProcessor.get_waveform_summary())
|
179
195
|
self.save_state()
|
180
196
|
|
181
197
|
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
@@ -183,32 +199,41 @@ class CuttedApp:
|
|
183
199
|
return
|
184
200
|
|
185
201
|
text = self.entry.get()
|
186
|
-
|
187
|
-
|
188
|
-
if
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
202
|
+
if text.strip():
|
203
|
+
full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length())}s long. The cursor of the user is currently at {self.slider_value}s."
|
204
|
+
full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
|
205
|
+
full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
|
206
|
+
full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
|
207
|
+
if whisper_support:
|
208
|
+
if self.use_transcript_checkbox.get():
|
209
|
+
if not self.whisper:
|
210
|
+
messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running. Press OK to continue.")
|
211
|
+
self.whisper = transcribe.Whisper()
|
212
|
+
transcript = self.whisper.transcribe(self.AudioProcessor.audio_path)
|
213
|
+
full_prompt += f"\nThis is a transcript with per word timestamps of the audio:\n{transcript}"
|
214
|
+
full_prompt += "\nThe transcript likely has issues. If you need infos about some words they might just be misspelled in the audio."
|
215
|
+
full_prompt += f"\n\nUser Prompt: {text}"
|
216
|
+
self.entry.delete(0, "end")
|
217
|
+
|
218
|
+
if self.use_audio_checkbox.get():
|
219
|
+
function_call, text_result = self.gemini.generate(full_prompt, audio_base64=self.AudioProcessor.get_audio_base64())
|
220
|
+
else:
|
221
|
+
function_call, text_result = self.gemini.generate(full_prompt)
|
222
|
+
|
223
|
+
if function_call:
|
224
|
+
print_info(f"Gemini called {function_call.name}")
|
225
|
+
if function_call.name == "cut_audio":
|
226
|
+
print_info("Cut function called")
|
227
|
+
args = function_call.args
|
228
|
+
result = self.AudioProcessor.cut(args["start"], args["end"])
|
229
|
+
if not result:
|
230
|
+
messagebox.showerror("Error", "Please try again.")
|
231
|
+
self.update_plot()
|
232
|
+
elif text_result:
|
233
|
+
messagebox.showerror("Error", text_result.strip())
|
234
|
+
else:
|
235
|
+
messagebox.showerror("Error", "Gemini returned no data")
|
236
|
+
print_fail("Gemini returned no data")
|
212
237
|
|
213
238
|
def save_state(self):
|
214
239
|
if hasattr(self.AudioProcessor, "audio") and self.AudioProcessor.audio is not None:
|
cutted/core/audio_processor.py
CHANGED
@@ -4,9 +4,8 @@ from .logger import *
|
|
4
4
|
import numpy as np
|
5
5
|
from matplotlib.figure import Figure
|
6
6
|
import pygame
|
7
|
+
import base64
|
7
8
|
import io
|
8
|
-
import threading
|
9
|
-
import time
|
10
9
|
|
11
10
|
class AudioProcessor:
|
12
11
|
def __init__(self):
|
@@ -63,7 +62,21 @@ class AudioProcessor:
|
|
63
62
|
|
64
63
|
return fig
|
65
64
|
|
66
|
-
def
|
65
|
+
def get_waveform_summary(self):
|
66
|
+
num_samples = round(self.get_length())
|
67
|
+
if self.audio is None:
|
68
|
+
return "No audio loaded."
|
69
|
+
samples = np.array(self.audio.get_array_of_samples())
|
70
|
+
if self.audio.channels == 2:
|
71
|
+
samples = samples.reshape((-1, 2))
|
72
|
+
samples = samples.mean(axis=1)
|
73
|
+
samples = samples / np.max(np.abs(samples))
|
74
|
+
indices = np.linspace(0, len(samples)-1, num_samples).astype(int)
|
75
|
+
summary = samples[indices]
|
76
|
+
return f"Waveform samples (normalized, {num_samples} points):\n" + \
|
77
|
+
" ".join(f"{x:.2f}" for x in summary)
|
78
|
+
|
79
|
+
def get_length(self):
|
67
80
|
self.duration = self.audio.duration_seconds
|
68
81
|
self.duration = round(self.duration, 2)
|
69
82
|
return self.duration
|
@@ -71,17 +84,25 @@ class AudioProcessor:
|
|
71
84
|
def cut(self, start, end):
|
72
85
|
if len(start) == len(end):
|
73
86
|
if len(start) == 1:
|
74
|
-
|
75
|
-
|
76
|
-
|
87
|
+
single_start = max(0, start[0])
|
88
|
+
single_end = max(0, end[0])
|
89
|
+
if single_end <= single_start:
|
90
|
+
print_fail("End time must be greater than start time.")
|
91
|
+
return False
|
92
|
+
print_info(f"Cutting from {single_start} to {single_end}")
|
93
|
+
start_ms = round(single_start * 1000)
|
94
|
+
end_ms = round(single_end * 1000)
|
77
95
|
self.audio = self.audio[:start_ms] + self.audio[end_ms:]
|
78
96
|
return True
|
79
97
|
else:
|
80
98
|
time_sets = list(zip(start, end))
|
81
99
|
subtract_time = 0
|
82
100
|
for single_start, single_end in time_sets:
|
83
|
-
single_start = single_start - subtract_time
|
84
|
-
single_end = single_end - subtract_time
|
101
|
+
single_start = max(0, single_start - subtract_time)
|
102
|
+
single_end = max(0, single_end - subtract_time)
|
103
|
+
if single_end <= single_start:
|
104
|
+
print_fail("End time must be greater than start time.")
|
105
|
+
continue
|
85
106
|
print_info(f"Cutting from {single_start} to {single_end}")
|
86
107
|
start_ms = round(single_start * 1000)
|
87
108
|
end_ms = round(single_end * 1000)
|
@@ -124,11 +145,15 @@ class AudioProcessor:
|
|
124
145
|
def stop_audio(self):
|
125
146
|
try:
|
126
147
|
if pygame.mixer.get_init():
|
148
|
+
pos_ms = pygame.mixer.music.get_pos()
|
149
|
+
pos_sec = pos_ms / 1000 if pos_ms >= 0 else 0
|
127
150
|
pygame.mixer.music.stop()
|
128
151
|
self.is_playing_var = False
|
129
|
-
print_info("Audio playback stopped")
|
152
|
+
print_info(f"Audio playback stopped at {pos_sec:.2f}s")
|
153
|
+
return pos_sec
|
130
154
|
except Exception as e:
|
131
155
|
print_warn(f"Error stopping audio: {e}")
|
156
|
+
return 0
|
132
157
|
|
133
158
|
def is_playing(self):
|
134
159
|
try:
|
@@ -143,11 +168,19 @@ class AudioProcessor:
|
|
143
168
|
return None
|
144
169
|
|
145
170
|
return {
|
146
|
-
"duration": self.
|
171
|
+
"duration": self.get_length(),
|
147
172
|
"channels": self.audio.channels,
|
148
173
|
"frame_rate": self.audio.frame_rate,
|
149
174
|
"sample_width": self.audio.sample_width
|
150
175
|
}
|
151
176
|
|
152
177
|
def export_audio(self, path, format: str = "mp3"):
|
153
|
-
self.audio.export(path, format=format)
|
178
|
+
self.audio.export(path, format=format)
|
179
|
+
|
180
|
+
def get_audio_base64(self):
|
181
|
+
buffer = io.BytesIO()
|
182
|
+
self.audio.export(buffer, format="mp3")
|
183
|
+
buffer.seek(0)
|
184
|
+
audio_bytes = buffer.read()
|
185
|
+
audio_base64 = base64.b64encode(audio_bytes)
|
186
|
+
return audio_base64
|
cutted/core/gemini.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import os
|
2
2
|
import sys
|
3
|
+
import base64
|
3
4
|
from google import genai
|
4
5
|
from google.genai import types
|
5
6
|
|
@@ -15,13 +16,21 @@ class GeminiClient:
|
|
15
16
|
api_key=GEMINI_API_KEY,
|
16
17
|
)
|
17
18
|
|
18
|
-
def generate(self, prompt: str, model: str = "gemini-2.0-flash"):
|
19
|
+
def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
|
20
|
+
parts=[
|
21
|
+
types.Part.from_text(text=prompt),
|
22
|
+
]
|
23
|
+
|
24
|
+
if audio_base64:
|
25
|
+
parts.append(types.Part.from_bytes(
|
26
|
+
mime_type="audio/mpeg",
|
27
|
+
data=base64.b64decode(audio_base64)
|
28
|
+
))
|
29
|
+
|
19
30
|
contents = [
|
20
31
|
types.Content(
|
21
32
|
role="user",
|
22
|
-
parts=
|
23
|
-
types.Part.from_text(text=prompt),
|
24
|
-
],
|
33
|
+
parts=parts
|
25
34
|
),
|
26
35
|
]
|
27
36
|
tools = [
|
@@ -0,0 +1,55 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: cutted
|
3
|
+
Version: 0.3.3
|
4
|
+
Summary: AI-powered audio editor controllable via natural language.
|
5
|
+
Author-email: simon0302010 <simon0302010@gmail.com>
|
6
|
+
License-Expression: GPL-3.0
|
7
|
+
Project-URL: Homepage, https://github.com/simon0302010/Cutted
|
8
|
+
Requires-Python: <=3.13,>=3.9
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: customtkinter
|
12
|
+
Requires-Dist: matplotlib
|
13
|
+
Requires-Dist: numpy
|
14
|
+
Requires-Dist: pydub
|
15
|
+
Requires-Dist: pygame
|
16
|
+
Requires-Dist: google-genai
|
17
|
+
Requires-Dist: python-dotenv
|
18
|
+
Provides-Extra: whisper
|
19
|
+
Requires-Dist: whisper-timestamped; extra == "whisper"
|
20
|
+
Dynamic: license-file
|
21
|
+
|
22
|
+

|
23
|
+

|
24
|
+

|
25
|
+
|
26
|
+
# Cutted
|
27
|
+
|
28
|
+
AI-powered audio editor controlled by natural language 🚀
|
29
|
+
|
30
|
+
Let AI handle your audio editing with simple commands:
|
31
|
+
- Automatically detect quiet or loud parts
|
32
|
+
- Transcribe audio (if Whisper is installed)
|
33
|
+
- Cut, trim, or adjust volume for specific segments
|
34
|
+
|
35
|
+
## Installation
|
36
|
+
|
37
|
+
1. **Install system dependencies (e.g., FFmpeg).**
|
38
|
+
2. **Install Cutted**
|
39
|
+
```bash
|
40
|
+
pip install cutted
|
41
|
+
```
|
42
|
+
**With Whisper support**
|
43
|
+
```bash
|
44
|
+
pip install cutted[whisper]
|
45
|
+
```
|
46
|
+
|
47
|
+
## Usage
|
48
|
+
|
49
|
+
1. **Launch the app:**
|
50
|
+
```bash
|
51
|
+
python -m cutted
|
52
|
+
```
|
53
|
+
2. **Load an audio file** (MP3, WAV, etc.)
|
54
|
+
3. **Play, Cut, Undo** – all from the GUI or with text commands ✂️
|
55
|
+
4. **Export** as MP3 or WAV
|
@@ -0,0 +1,12 @@
|
|
1
|
+
cutted/__init__.py,sha256=F1mzO6qI2gD6d_DQsjZItLuIfNfa1Te5KwCMThWDQT4,49
|
2
|
+
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
+
cutted/app.py,sha256=HaJ3yu8-WVDGKZmFcTVG4CK8q13eHyUQaIyOWMZWV68,10764
|
4
|
+
cutted/core/audio_processor.py,sha256=gKBJ1wpdrX2IozmXDzW7MoFVp9uq0Pb3ezz9R3Ahmnw,6691
|
5
|
+
cutted/core/gemini.py,sha256=yHsQXk4tDHcW5qJBuL2LpPvdChimxlmEbu76BvsoeY4,3108
|
6
|
+
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
+
cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
|
8
|
+
cutted-0.3.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
+
cutted-0.3.3.dist-info/METADATA,sha256=ehEIEm5qz3wVqBY74Itpwooe4SYOFpXISXUMZO7LVh4,1503
|
10
|
+
cutted-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
+
cutted-0.3.3.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
+
cutted-0.3.3.dist-info/RECORD,,
|
cutted-0.3.1.dist-info/METADATA
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: cutted
|
3
|
-
Version: 0.3.1
|
4
|
-
Summary: AI-powered audio editor controllable via natural language.
|
5
|
-
Author-email: simon0302010 <simon0302010@gmail.com>
|
6
|
-
License-Expression: GPL-3.0
|
7
|
-
Project-URL: Homepage, https://github.com/simon0302010/Cutted
|
8
|
-
Requires-Python: <=3.13,>=3.9
|
9
|
-
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE
|
11
|
-
Requires-Dist: customtkinter
|
12
|
-
Requires-Dist: matplotlib
|
13
|
-
Requires-Dist: numpy
|
14
|
-
Requires-Dist: pydub
|
15
|
-
Requires-Dist: pygame
|
16
|
-
Requires-Dist: google-genai
|
17
|
-
Requires-Dist: python-dotenv
|
18
|
-
Provides-Extra: whisper
|
19
|
-
Requires-Dist: whisper-timestamped; extra == "whisper"
|
20
|
-
Dynamic: license-file
|
21
|
-
|
22
|
-
# Cutted
|
23
|
-
AI-powered audio editor controllable via natural language
|
24
|
-
|
25
|
-
Note: This app is currently not functional.
|
26
|
-
|
27
|
-
# Installation
|
28
|
-
|
29
|
-
Install dependencies:
|
30
|
-
|
31
|
-
**Debian:**
|
32
|
-
```bash
|
33
|
-
sudo apt update
|
34
|
-
sudo apt install ffmpeg git
|
35
|
-
```
|
36
|
-
|
37
|
-
**Arch Linux:**
|
38
|
-
```bash
|
39
|
-
sudo pacman -Syu ffmpeg git
|
40
|
-
```
|
41
|
-
|
42
|
-
Clone the repository:
|
43
|
-
|
44
|
-
```bash
|
45
|
-
git clone https://github.com/simon0302010/Cutted.git
|
46
|
-
cd Cutted
|
47
|
-
```
|
48
|
-
|
49
|
-
Create a virtual environment:
|
50
|
-
|
51
|
-
```bash
|
52
|
-
python -m venv venv
|
53
|
-
source venv/bin/activate
|
54
|
-
```
|
55
|
-
|
56
|
-
Install the package:
|
57
|
-
|
58
|
-
```bash
|
59
|
-
pip install .
|
60
|
-
```
|
61
|
-
|
62
|
-
# Usage
|
63
|
-
|
64
|
-
Run the package:
|
65
|
-
|
66
|
-
```bash
|
67
|
-
python -m cutted
|
68
|
-
```
|
cutted-0.3.1.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
cutted/__init__.py,sha256=Q7ZgbkmUOc-Gzy42zzZcKnzKxdZ023pITyXAC0BDBlM,49
|
2
|
-
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
-
cutted/app.py,sha256=xf5LFHdPLz1WNP05eRruHX2hKnaei8CUrqzVSODweAM,8952
|
4
|
-
cutted/core/audio_processor.py,sha256=7-XCuPPTlozeuaD2LqyzwRGinu0NvowTLbAh2X4XJ98,5182
|
5
|
-
cutted/core/gemini.py,sha256=Ts_EbC1-rO9jIsdSlzKcmjLVS1o663GmfTdzmix12kE,2872
|
6
|
-
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
-
cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
|
8
|
-
cutted-0.3.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
-
cutted-0.3.1.dist-info/METADATA,sha256=6plv-IaqUXQeeAnChYaMBjYSLh8FBTAtvyQqZkRTuuA,1201
|
10
|
-
cutted-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
-
cutted-0.3.1.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
-
cutted-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|