cutted 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cutted/__init__.py +1 -1
- cutted/app.py +44 -16
- cutted/core/audio_processor.py +56 -8
- cutted/core/gemini.py +75 -16
- {cutted-0.3.2.dist-info → cutted-0.4.0.dist-info}/METADATA +1 -1
- cutted-0.4.0.dist-info/RECORD +12 -0
- cutted-0.3.2.dist-info/RECORD +0 -12
- {cutted-0.3.2.dist-info → cutted-0.4.0.dist-info}/WHEEL +0 -0
- {cutted-0.3.2.dist-info → cutted-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {cutted-0.3.2.dist-info → cutted-0.4.0.dist-info}/top_level.txt +0 -0
cutted/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.4.0"
|
2
2
|
__author__ = "simon0302010"
|
cutted/app.py
CHANGED
@@ -25,6 +25,8 @@ class CuttedApp:
|
|
25
25
|
self.canvas = None
|
26
26
|
self.cursor_line = None
|
27
27
|
self.last_slider_update = 0
|
28
|
+
self.slider_value = 0
|
29
|
+
self.playback_start_time = 0
|
28
30
|
self.is_playing = False
|
29
31
|
self.last_states = []
|
30
32
|
self.setup_ui()
|
@@ -107,45 +109,54 @@ class CuttedApp:
|
|
107
109
|
def update_plot(self):
|
108
110
|
if self.canvas:
|
109
111
|
self.canvas.get_tk_widget().destroy()
|
110
|
-
|
112
|
+
if hasattr(self, "slider") and self.slider is not None:
|
113
|
+
self.slider.destroy()
|
114
|
+
|
111
115
|
fig = self.AudioProcessor.plot_audio()
|
112
116
|
self.ax = fig.axes[0]
|
113
117
|
self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
|
114
118
|
self.canvas.draw()
|
115
|
-
|
116
|
-
self.
|
117
|
-
|
119
|
+
|
120
|
+
self.audio_length = float(self.AudioProcessor.get_length())
|
121
|
+
|
118
122
|
slider_width = self.root.winfo_width() - 40
|
123
|
+
if self.slider_value > self.audio_length:
|
124
|
+
self.slider_value = self.audio_length
|
125
|
+
if self.slider_value < 0:
|
126
|
+
self.slider_value = 0
|
127
|
+
|
119
128
|
self.slider = customtkinter.CTkSlider(
|
120
|
-
self.root, from_=0, to=self.
|
129
|
+
self.root, from_=0, to=self.audio_length, command=self.set_cursor, width=slider_width
|
121
130
|
)
|
122
|
-
self.slider.set(
|
123
|
-
|
131
|
+
self.slider.set(self.slider_value)
|
124
132
|
self.slider.place(relx=0.5, rely=1.0, anchor="s", y=-130)
|
125
|
-
|
133
|
+
self.set_cursor(self.slider_value)
|
134
|
+
|
126
135
|
self.canvas.get_tk_widget().pack(
|
127
136
|
fill=customtkinter.BOTH,
|
128
137
|
expand=True,
|
129
138
|
padx=10,
|
130
139
|
pady=10,
|
131
140
|
)
|
132
|
-
|
133
|
-
self.cursor_line = self.ax.axvline(x=
|
141
|
+
|
142
|
+
self.cursor_line = self.ax.axvline(x=self.slider_value, color="red", linewidth=2)
|
134
143
|
self.canvas.draw_idle()
|
135
144
|
|
136
145
|
def set_cursor(self, value):
|
137
146
|
now = time.time()
|
138
|
-
if now - self.last_slider_update < 0.
|
147
|
+
if now - self.last_slider_update < 0.05: # 100ms
|
139
148
|
return
|
140
149
|
self.last_slider_update = now
|
141
150
|
|
142
|
-
self.slider_value = round(value)
|
151
|
+
self.slider_value = round(value, 2)
|
143
152
|
|
144
153
|
if self.cursor_line:
|
145
154
|
self.cursor_line.set_xdata([self.slider_value, self.slider_value])
|
146
155
|
self.canvas.draw_idle()
|
156
|
+
self.slider.set(self.slider_value)
|
157
|
+
self.set_cursor(self.slider_value)
|
147
158
|
|
148
|
-
|
159
|
+
print_info(f"Slider Value: {self.slider_value}")
|
149
160
|
|
150
161
|
def play_audio(self):
|
151
162
|
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
@@ -153,11 +164,16 @@ class CuttedApp:
|
|
153
164
|
return
|
154
165
|
|
155
166
|
start_time = self.slider.get() if hasattr(self, 'slider') else 0
|
167
|
+
self.playback_start_time = start_time
|
156
168
|
self.AudioProcessor.play_audio(start_time)
|
157
169
|
|
158
170
|
def stop_audio(self):
|
159
|
-
self.AudioProcessor.stop_audio()
|
171
|
+
rel_pos = self.AudioProcessor.stop_audio()
|
160
172
|
self.is_playing = False
|
173
|
+
abs_pos = self.playback_start_time + rel_pos
|
174
|
+
self.slider.set(abs_pos)
|
175
|
+
self.set_cursor(abs_pos)
|
176
|
+
print_info(f"Absolute position in audio: {abs_pos:.2f}s")
|
161
177
|
|
162
178
|
def export_audio(self):
|
163
179
|
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
@@ -184,6 +200,7 @@ class CuttedApp:
|
|
184
200
|
print_success(f"Audio exported to {save_path}")
|
185
201
|
|
186
202
|
def send_prompt(self):
|
203
|
+
print(self.AudioProcessor.get_waveform_summary())
|
187
204
|
self.save_state()
|
188
205
|
|
189
206
|
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
@@ -192,14 +209,18 @@ class CuttedApp:
|
|
192
209
|
|
193
210
|
text = self.entry.get()
|
194
211
|
if text.strip():
|
195
|
-
full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.
|
212
|
+
full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length(), 2)}s long. The cursor of the user is currently at {self.slider_value}s."
|
213
|
+
full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
|
214
|
+
full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
|
215
|
+
full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
|
196
216
|
if whisper_support:
|
197
217
|
if self.use_transcript_checkbox.get():
|
198
218
|
if not self.whisper:
|
199
|
-
messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running.")
|
219
|
+
messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running. Press OK to continue.")
|
200
220
|
self.whisper = transcribe.Whisper()
|
201
221
|
transcript = self.whisper.transcribe(self.AudioProcessor.audio_path)
|
202
222
|
full_prompt += f"\nThis is a transcript with per word timestamps of the audio:\n{transcript}"
|
223
|
+
full_prompt += "\nThe transcript likely has issues. If you need infos about some words they might just be misspelled in the audio."
|
203
224
|
full_prompt += f"\n\nUser Prompt: {text}"
|
204
225
|
self.entry.delete(0, "end")
|
205
226
|
|
@@ -216,10 +237,17 @@ class CuttedApp:
|
|
216
237
|
result = self.AudioProcessor.cut(args["start"], args["end"])
|
217
238
|
if not result:
|
218
239
|
messagebox.showerror("Error", "Please try again.")
|
240
|
+
if function_call.name == "change_volume":
|
241
|
+
print_info("Change Volume function called")
|
242
|
+
args = function_call.args
|
243
|
+
result = self.AudioProcessor.change_volume(args["start"], args["end"], args["volume"])
|
244
|
+
if not result:
|
245
|
+
messagebox.showerror("Error", "Please try again.")
|
219
246
|
self.update_plot()
|
220
247
|
elif text_result:
|
221
248
|
messagebox.showerror("Error", text_result.strip())
|
222
249
|
else:
|
250
|
+
messagebox.showerror("Error", "Gemini returned no data")
|
223
251
|
print_fail("Gemini returned no data")
|
224
252
|
|
225
253
|
def save_state(self):
|
cutted/core/audio_processor.py
CHANGED
@@ -62,7 +62,21 @@ class AudioProcessor:
|
|
62
62
|
|
63
63
|
return fig
|
64
64
|
|
65
|
-
def
|
65
|
+
def get_waveform_summary(self):
|
66
|
+
num_samples = round(self.get_length())
|
67
|
+
if self.audio is None:
|
68
|
+
return "No audio loaded."
|
69
|
+
samples = np.array(self.audio.get_array_of_samples())
|
70
|
+
if self.audio.channels == 2:
|
71
|
+
samples = samples.reshape((-1, 2))
|
72
|
+
samples = samples.mean(axis=1)
|
73
|
+
samples = samples / np.max(np.abs(samples))
|
74
|
+
indices = np.linspace(0, len(samples)-1, num_samples).astype(int)
|
75
|
+
summary = samples[indices]
|
76
|
+
return f"Waveform samples (normalized, {num_samples} points):\n" + \
|
77
|
+
" ".join(f"{x:.2f}" for x in summary)
|
78
|
+
|
79
|
+
def get_length(self):
|
66
80
|
self.duration = self.audio.duration_seconds
|
67
81
|
self.duration = round(self.duration, 2)
|
68
82
|
return self.duration
|
@@ -70,17 +84,25 @@ class AudioProcessor:
|
|
70
84
|
def cut(self, start, end):
|
71
85
|
if len(start) == len(end):
|
72
86
|
if len(start) == 1:
|
73
|
-
|
74
|
-
|
75
|
-
|
87
|
+
single_start = max(0, start[0])
|
88
|
+
single_end = max(0, end[0])
|
89
|
+
if single_end <= single_start:
|
90
|
+
print_fail("End time must be greater than start time.")
|
91
|
+
return False
|
92
|
+
print_info(f"Cutting from {single_start} to {single_end}")
|
93
|
+
start_ms = round(single_start * 1000)
|
94
|
+
end_ms = round(single_end * 1000)
|
76
95
|
self.audio = self.audio[:start_ms] + self.audio[end_ms:]
|
77
96
|
return True
|
78
97
|
else:
|
79
98
|
time_sets = list(zip(start, end))
|
80
99
|
subtract_time = 0
|
81
100
|
for single_start, single_end in time_sets:
|
82
|
-
single_start = single_start - subtract_time
|
83
|
-
single_end = single_end - subtract_time
|
101
|
+
single_start = max(0, single_start - subtract_time)
|
102
|
+
single_end = max(0, single_end - subtract_time)
|
103
|
+
if single_end <= single_start:
|
104
|
+
print_fail("End time must be greater than start time.")
|
105
|
+
continue
|
84
106
|
print_info(f"Cutting from {single_start} to {single_end}")
|
85
107
|
start_ms = round(single_start * 1000)
|
86
108
|
end_ms = round(single_end * 1000)
|
@@ -90,6 +112,28 @@ class AudioProcessor:
|
|
90
112
|
else:
|
91
113
|
return False
|
92
114
|
|
115
|
+
def change_volume(self, start, end, volume):
|
116
|
+
if len(start) == len(end) == len(volume):
|
117
|
+
time_sets = list(zip(start, end, volume))
|
118
|
+
for single_start, single_end, single_volume in time_sets:
|
119
|
+
if single_end <= single_start:
|
120
|
+
print_fail("End time must be greater than start time.")
|
121
|
+
continue
|
122
|
+
print_info(f"Changing volume of {single_start} - {single_end} to {str(single_volume)}")
|
123
|
+
|
124
|
+
start_ms = round(single_start * 1000)
|
125
|
+
end_ms = round(single_end * 1000)
|
126
|
+
part1 = self.audio[:start_ms]
|
127
|
+
part2 = self.audio[start_ms:end_ms]
|
128
|
+
part3 = self.audio[end_ms:]
|
129
|
+
|
130
|
+
part2 = part2.apply_gain(ratio_to_db(single_volume))
|
131
|
+
|
132
|
+
self.audio = part1 + part2 + part3
|
133
|
+
return True
|
134
|
+
else:
|
135
|
+
return False
|
136
|
+
|
93
137
|
def play_audio(self, start_time=0):
|
94
138
|
if self.audio is None:
|
95
139
|
print_fail("No audio loaded.")
|
@@ -123,11 +167,15 @@ class AudioProcessor:
|
|
123
167
|
def stop_audio(self):
|
124
168
|
try:
|
125
169
|
if pygame.mixer.get_init():
|
170
|
+
pos_ms = pygame.mixer.music.get_pos()
|
171
|
+
pos_sec = pos_ms / 1000 if pos_ms >= 0 else 0
|
126
172
|
pygame.mixer.music.stop()
|
127
173
|
self.is_playing_var = False
|
128
|
-
print_info("Audio playback stopped")
|
174
|
+
print_info(f"Audio playback stopped at {pos_sec:.2f}s")
|
175
|
+
return pos_sec
|
129
176
|
except Exception as e:
|
130
177
|
print_warn(f"Error stopping audio: {e}")
|
178
|
+
return 0
|
131
179
|
|
132
180
|
def is_playing(self):
|
133
181
|
try:
|
@@ -142,7 +190,7 @@ class AudioProcessor:
|
|
142
190
|
return None
|
143
191
|
|
144
192
|
return {
|
145
|
-
"duration": self.
|
193
|
+
"duration": self.get_length(),
|
146
194
|
"channels": self.audio.channels,
|
147
195
|
"frame_rate": self.audio.frame_rate,
|
148
196
|
"sample_width": self.audio.sample_width
|
cutted/core/gemini.py
CHANGED
@@ -15,6 +15,7 @@ class GeminiClient:
|
|
15
15
|
self.client = genai.Client(
|
16
16
|
api_key=GEMINI_API_KEY,
|
17
17
|
)
|
18
|
+
self.contents = []
|
18
19
|
|
19
20
|
def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
|
20
21
|
parts=[
|
@@ -27,38 +28,76 @@ class GeminiClient:
|
|
27
28
|
data=base64.b64decode(audio_base64)
|
28
29
|
))
|
29
30
|
|
30
|
-
contents
|
31
|
+
self.contents.append(
|
31
32
|
types.Content(
|
32
33
|
role="user",
|
33
34
|
parts=parts
|
34
|
-
)
|
35
|
-
|
35
|
+
)
|
36
|
+
)
|
36
37
|
tools = [
|
37
38
|
types.Tool(
|
38
39
|
function_declarations=[
|
39
40
|
types.FunctionDeclaration(
|
40
41
|
name="cut_audio",
|
41
|
-
description=
|
42
|
+
description=(
|
43
|
+
"Remove one or more segments from the audio by specifying start and end times in seconds. "
|
44
|
+
"You can cut multiple segments at once by providing lists of start and end values. "
|
45
|
+
"Each segment defined by a start and end pair will be removed from the audio."
|
46
|
+
),
|
47
|
+
parameters=genai.types.Schema(
|
48
|
+
type=genai.types.Type.OBJECT,
|
49
|
+
required=["start", "end"],
|
50
|
+
properties={
|
51
|
+
"start": genai.types.Schema(
|
52
|
+
type=genai.types.Type.ARRAY,
|
53
|
+
items=genai.types.Schema(
|
54
|
+
type=genai.types.Type.NUMBER,
|
55
|
+
),
|
56
|
+
),
|
57
|
+
"end": genai.types.Schema(
|
58
|
+
type=genai.types.Type.ARRAY,
|
59
|
+
items=genai.types.Schema(
|
60
|
+
type=genai.types.Type.NUMBER,
|
61
|
+
),
|
62
|
+
),
|
63
|
+
},
|
64
|
+
),
|
65
|
+
),
|
66
|
+
types.FunctionDeclaration(
|
67
|
+
name="change_volume",
|
68
|
+
description=(
|
69
|
+
"Adjust the volume of specific segments in the audio by specifying lists of start times, end times, "
|
70
|
+
"and volume factors. Each segment between a start and end time will have its volume changed by the "
|
71
|
+
"corresponding factor (e.g., 0.5 for half volume, 2.0 for double volume). Multiple segments can be "
|
72
|
+
"adjusted at once by providing lists of values."
|
73
|
+
),
|
42
74
|
parameters=genai.types.Schema(
|
43
|
-
type
|
44
|
-
required
|
45
|
-
properties
|
75
|
+
type=genai.types.Type.OBJECT,
|
76
|
+
required=["start", "end", "volume"],
|
77
|
+
properties={
|
46
78
|
"start": genai.types.Schema(
|
47
|
-
type
|
48
|
-
items
|
49
|
-
type
|
79
|
+
type=genai.types.Type.ARRAY,
|
80
|
+
items=genai.types.Schema(
|
81
|
+
type=genai.types.Type.NUMBER,
|
50
82
|
),
|
51
83
|
),
|
52
84
|
"end": genai.types.Schema(
|
53
|
-
type
|
54
|
-
items
|
55
|
-
type
|
85
|
+
type=genai.types.Type.ARRAY,
|
86
|
+
items=genai.types.Schema(
|
87
|
+
type=genai.types.Type.NUMBER,
|
56
88
|
),
|
89
|
+
),
|
90
|
+
"volume": genai.types.Schema(
|
91
|
+
type=genai.types.Type.ARRAY,
|
92
|
+
items=genai.types.Schema(
|
93
|
+
type=genai.types.Type.NUMBER,
|
57
94
|
),
|
95
|
+
),
|
58
96
|
},
|
59
97
|
),
|
60
98
|
),
|
61
|
-
]
|
99
|
+
]
|
100
|
+
)
|
62
101
|
]
|
63
102
|
generate_content_config = types.GenerateContentConfig(
|
64
103
|
tools=tools,
|
@@ -67,7 +106,7 @@ class GeminiClient:
|
|
67
106
|
|
68
107
|
response = self.client.models.generate_content(
|
69
108
|
model=model,
|
70
|
-
contents=contents,
|
109
|
+
contents=self.contents,
|
71
110
|
config=generate_content_config,
|
72
111
|
)
|
73
112
|
|
@@ -81,7 +120,27 @@ class GeminiClient:
|
|
81
120
|
if part.text:
|
82
121
|
text_response = part.text
|
83
122
|
except TypeError:
|
84
|
-
|
123
|
+
pass
|
124
|
+
|
125
|
+
model_parts = []
|
126
|
+
if text_response:
|
127
|
+
model_parts.append(
|
128
|
+
types.Part.from_text(text=text_response)
|
129
|
+
)
|
130
|
+
if function_call:
|
131
|
+
model_parts.append(
|
132
|
+
types.Part.from_function_call(
|
133
|
+
name=function_call.name,
|
134
|
+
args=function_call.args
|
135
|
+
)
|
136
|
+
)
|
137
|
+
|
138
|
+
self.contents.append(
|
139
|
+
types.Content(
|
140
|
+
role="model",
|
141
|
+
parts=model_parts
|
142
|
+
)
|
143
|
+
)
|
85
144
|
|
86
145
|
return function_call, text_response
|
87
146
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
cutted/__init__.py,sha256=7eWSPFhMOE6u-f5s_QomBlTjCze86s3xKt-v0bLJAAs,49
|
2
|
+
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
+
cutted/app.py,sha256=LyVv3RHiAemtcEgLiZCcyenAD_QliaFIZ08JfCNwxq4,11543
|
4
|
+
cutted/core/audio_processor.py,sha256=OBxNAKs67zqIfnJIsSgaMDa0UbB0R0mccyWcgNs5Bk0,7663
|
5
|
+
cutted/core/gemini.py,sha256=0ATm5gHZWmu4k_9SZOsOx5XYY7nEer5LOzt1SMfQlIk,5836
|
6
|
+
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
+
cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
|
8
|
+
cutted-0.4.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
+
cutted-0.4.0.dist-info/METADATA,sha256=wEeg-ive3z6zwUmLvZ_3Lh8W0xhsj12mMgSMASxWdUg,1503
|
10
|
+
cutted-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
+
cutted-0.4.0.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
+
cutted-0.4.0.dist-info/RECORD,,
|
cutted-0.3.2.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
cutted/__init__.py,sha256=ttFLenYuOpXQTaR9nB0dF-3zFB_PeksXf9R4r_TB8S8,49
|
2
|
-
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
-
cutted/app.py,sha256=Ay_yVrPt1TQAE3lqmrII88lFoGZb5Mh5cDwediM9ZG8,9592
|
4
|
-
cutted/core/audio_processor.py,sha256=JdPeWO_jAIn_uZFeZYQJX3RC0Vy8GClKrX7xGk4pXR4,5426
|
5
|
-
cutted/core/gemini.py,sha256=yHsQXk4tDHcW5qJBuL2LpPvdChimxlmEbu76BvsoeY4,3108
|
6
|
-
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
-
cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
|
8
|
-
cutted-0.3.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
-
cutted-0.3.2.dist-info/METADATA,sha256=MPuSW4_LjNch0PXeFrGtRW1C4CuJ7unNv1ZZhvFHmSM,1503
|
10
|
-
cutted-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
-
cutted-0.3.2.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
-
cutted-0.3.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|