cutted 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutted/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.3.2"
1
+ __version__ = "0.4.0"
2
2
  __author__ = "simon0302010"
cutted/app.py CHANGED
@@ -25,6 +25,8 @@ class CuttedApp:
25
25
  self.canvas = None
26
26
  self.cursor_line = None
27
27
  self.last_slider_update = 0
28
+ self.slider_value = 0
29
+ self.playback_start_time = 0
28
30
  self.is_playing = False
29
31
  self.last_states = []
30
32
  self.setup_ui()
@@ -107,45 +109,54 @@ class CuttedApp:
107
109
  def update_plot(self):
108
110
  if self.canvas:
109
111
  self.canvas.get_tk_widget().destroy()
110
-
112
+ if hasattr(self, "slider") and self.slider is not None:
113
+ self.slider.destroy()
114
+
111
115
  fig = self.AudioProcessor.plot_audio()
112
116
  self.ax = fig.axes[0]
113
117
  self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
114
118
  self.canvas.draw()
115
-
116
- self.audio_lenght = int(round(self.AudioProcessor.get_lenght()))
117
-
119
+
120
+ self.audio_length = float(self.AudioProcessor.get_length())
121
+
118
122
  slider_width = self.root.winfo_width() - 40
123
+ if self.slider_value > self.audio_length:
124
+ self.slider_value = self.audio_length
125
+ if self.slider_value < 0:
126
+ self.slider_value = 0
127
+
119
128
  self.slider = customtkinter.CTkSlider(
120
- self.root, from_=0, to=self.audio_lenght, command=self.set_cursor, width=slider_width
129
+ self.root, from_=0, to=self.audio_length, command=self.set_cursor, width=slider_width
121
130
  )
122
- self.slider.set(0)
123
-
131
+ self.slider.set(self.slider_value)
124
132
  self.slider.place(relx=0.5, rely=1.0, anchor="s", y=-130)
125
-
133
+ self.set_cursor(self.slider_value)
134
+
126
135
  self.canvas.get_tk_widget().pack(
127
136
  fill=customtkinter.BOTH,
128
137
  expand=True,
129
138
  padx=10,
130
139
  pady=10,
131
140
  )
132
-
133
- self.cursor_line = self.ax.axvline(x=0, color="red", linewidth=2)
141
+
142
+ self.cursor_line = self.ax.axvline(x=self.slider_value, color="red", linewidth=2)
134
143
  self.canvas.draw_idle()
135
144
 
136
145
  def set_cursor(self, value):
137
146
  now = time.time()
138
- if now - self.last_slider_update < 0.1: # 100ms
147
+ if now - self.last_slider_update < 0.05: # 100ms
139
148
  return
140
149
  self.last_slider_update = now
141
150
 
142
- self.slider_value = round(value)
151
+ self.slider_value = round(value, 2)
143
152
 
144
153
  if self.cursor_line:
145
154
  self.cursor_line.set_xdata([self.slider_value, self.slider_value])
146
155
  self.canvas.draw_idle()
156
+ self.slider.set(self.slider_value)
157
+ self.set_cursor(self.slider_value)
147
158
 
148
- print(f"Slider Value: {self.slider_value}")
159
+ print_info(f"Slider Value: {self.slider_value}")
149
160
 
150
161
  def play_audio(self):
151
162
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -153,11 +164,16 @@ class CuttedApp:
153
164
  return
154
165
 
155
166
  start_time = self.slider.get() if hasattr(self, 'slider') else 0
167
+ self.playback_start_time = start_time
156
168
  self.AudioProcessor.play_audio(start_time)
157
169
 
158
170
  def stop_audio(self):
159
- self.AudioProcessor.stop_audio()
171
+ rel_pos = self.AudioProcessor.stop_audio()
160
172
  self.is_playing = False
173
+ abs_pos = self.playback_start_time + rel_pos
174
+ self.slider.set(abs_pos)
175
+ self.set_cursor(abs_pos)
176
+ print_info(f"Absolute position in audio: {abs_pos:.2f}s")
161
177
 
162
178
  def export_audio(self):
163
179
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -184,6 +200,7 @@ class CuttedApp:
184
200
  print_success(f"Audio exported to {save_path}")
185
201
 
186
202
  def send_prompt(self):
203
+ print(self.AudioProcessor.get_waveform_summary())
187
204
  self.save_state()
188
205
 
189
206
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -192,14 +209,18 @@ class CuttedApp:
192
209
 
193
210
  text = self.entry.get()
194
211
  if text.strip():
195
- full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_lenght())}s long."
212
+ full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length(), 2)}s long. The cursor of the user is currently at {self.slider_value}s."
213
+ full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
214
+ full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
215
+ full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
196
216
  if whisper_support:
197
217
  if self.use_transcript_checkbox.get():
198
218
  if not self.whisper:
199
- messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running.")
219
+ messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running. Press OK to continue.")
200
220
  self.whisper = transcribe.Whisper()
201
221
  transcript = self.whisper.transcribe(self.AudioProcessor.audio_path)
202
222
  full_prompt += f"\nThis is a transcript with per word timestamps of the audio:\n{transcript}"
223
+ full_prompt += "\nThe transcript likely has issues. If you need infos about some words they might just be misspelled in the audio."
203
224
  full_prompt += f"\n\nUser Prompt: {text}"
204
225
  self.entry.delete(0, "end")
205
226
 
@@ -216,10 +237,17 @@ class CuttedApp:
216
237
  result = self.AudioProcessor.cut(args["start"], args["end"])
217
238
  if not result:
218
239
  messagebox.showerror("Error", "Please try again.")
240
+ if function_call.name == "change_volume":
241
+ print_info("Change Volume function called")
242
+ args = function_call.args
243
+ result = self.AudioProcessor.change_volume(args["start"], args["end"], args["volume"])
244
+ if not result:
245
+ messagebox.showerror("Error", "Please try again.")
219
246
  self.update_plot()
220
247
  elif text_result:
221
248
  messagebox.showerror("Error", text_result.strip())
222
249
  else:
250
+ messagebox.showerror("Error", "Gemini returned no data")
223
251
  print_fail("Gemini returned no data")
224
252
 
225
253
  def save_state(self):
@@ -62,7 +62,21 @@ class AudioProcessor:
62
62
 
63
63
  return fig
64
64
 
65
- def get_lenght(self):
65
+ def get_waveform_summary(self):
66
+ num_samples = round(self.get_length())
67
+ if self.audio is None:
68
+ return "No audio loaded."
69
+ samples = np.array(self.audio.get_array_of_samples())
70
+ if self.audio.channels == 2:
71
+ samples = samples.reshape((-1, 2))
72
+ samples = samples.mean(axis=1)
73
+ samples = samples / np.max(np.abs(samples))
74
+ indices = np.linspace(0, len(samples)-1, num_samples).astype(int)
75
+ summary = samples[indices]
76
+ return f"Waveform samples (normalized, {num_samples} points):\n" + \
77
+ " ".join(f"{x:.2f}" for x in summary)
78
+
79
+ def get_length(self):
66
80
  self.duration = self.audio.duration_seconds
67
81
  self.duration = round(self.duration, 2)
68
82
  return self.duration
@@ -70,17 +84,25 @@ class AudioProcessor:
70
84
  def cut(self, start, end):
71
85
  if len(start) == len(end):
72
86
  if len(start) == 1:
73
- print_info(f"Cutting from {start[0]} to {end[0]}")
74
- start_ms = round(start[0] * 1000)
75
- end_ms = round(end[0] * 1000)
87
+ single_start = max(0, start[0])
88
+ single_end = max(0, end[0])
89
+ if single_end <= single_start:
90
+ print_fail("End time must be greater than start time.")
91
+ return False
92
+ print_info(f"Cutting from {single_start} to {single_end}")
93
+ start_ms = round(single_start * 1000)
94
+ end_ms = round(single_end * 1000)
76
95
  self.audio = self.audio[:start_ms] + self.audio[end_ms:]
77
96
  return True
78
97
  else:
79
98
  time_sets = list(zip(start, end))
80
99
  subtract_time = 0
81
100
  for single_start, single_end in time_sets:
82
- single_start = single_start - subtract_time
83
- single_end = single_end - subtract_time
101
+ single_start = max(0, single_start - subtract_time)
102
+ single_end = max(0, single_end - subtract_time)
103
+ if single_end <= single_start:
104
+ print_fail("End time must be greater than start time.")
105
+ continue
84
106
  print_info(f"Cutting from {single_start} to {single_end}")
85
107
  start_ms = round(single_start * 1000)
86
108
  end_ms = round(single_end * 1000)
@@ -90,6 +112,28 @@ class AudioProcessor:
90
112
  else:
91
113
  return False
92
114
 
115
+ def change_volume(self, start, end, volume):
116
+ if len(start) == len(end) == len(volume):
117
+ time_sets = list(zip(start, end, volume))
118
+ for single_start, single_end, single_volume in time_sets:
119
+ if single_end <= single_start:
120
+ print_fail("End time must be greater than start time.")
121
+ continue
122
+ print_info(f"Changing volume of {single_start} - {single_end} to {str(single_volume)}")
123
+
124
+ start_ms = round(single_start * 1000)
125
+ end_ms = round(single_end * 1000)
126
+ part1 = self.audio[:start_ms]
127
+ part2 = self.audio[start_ms:end_ms]
128
+ part3 = self.audio[end_ms:]
129
+
130
+ part2 = part2.apply_gain(ratio_to_db(single_volume))
131
+
132
+ self.audio = part1 + part2 + part3
133
+ return True
134
+ else:
135
+ return False
136
+
93
137
  def play_audio(self, start_time=0):
94
138
  if self.audio is None:
95
139
  print_fail("No audio loaded.")
@@ -123,11 +167,15 @@ class AudioProcessor:
123
167
  def stop_audio(self):
124
168
  try:
125
169
  if pygame.mixer.get_init():
170
+ pos_ms = pygame.mixer.music.get_pos()
171
+ pos_sec = pos_ms / 1000 if pos_ms >= 0 else 0
126
172
  pygame.mixer.music.stop()
127
173
  self.is_playing_var = False
128
- print_info("Audio playback stopped")
174
+ print_info(f"Audio playback stopped at {pos_sec:.2f}s")
175
+ return pos_sec
129
176
  except Exception as e:
130
177
  print_warn(f"Error stopping audio: {e}")
178
+ return 0
131
179
 
132
180
  def is_playing(self):
133
181
  try:
@@ -142,7 +190,7 @@ class AudioProcessor:
142
190
  return None
143
191
 
144
192
  return {
145
- "duration": self.get_lenght(),
193
+ "duration": self.get_length(),
146
194
  "channels": self.audio.channels,
147
195
  "frame_rate": self.audio.frame_rate,
148
196
  "sample_width": self.audio.sample_width
cutted/core/gemini.py CHANGED
@@ -15,6 +15,7 @@ class GeminiClient:
15
15
  self.client = genai.Client(
16
16
  api_key=GEMINI_API_KEY,
17
17
  )
18
+ self.contents = []
18
19
 
19
20
  def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
20
21
  parts=[
@@ -27,38 +28,76 @@ class GeminiClient:
27
28
  data=base64.b64decode(audio_base64)
28
29
  ))
29
30
 
30
- contents = [
31
+ self.contents.append(
31
32
  types.Content(
32
33
  role="user",
33
34
  parts=parts
34
- ),
35
- ]
35
+ )
36
+ )
36
37
  tools = [
37
38
  types.Tool(
38
39
  function_declarations=[
39
40
  types.FunctionDeclaration(
40
41
  name="cut_audio",
41
- description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
42
+ description=(
43
+ "Remove one or more segments from the audio by specifying start and end times in seconds. "
44
+ "You can cut multiple segments at once by providing lists of start and end values. "
45
+ "Each segment defined by a start and end pair will be removed from the audio."
46
+ ),
47
+ parameters=genai.types.Schema(
48
+ type=genai.types.Type.OBJECT,
49
+ required=["start", "end"],
50
+ properties={
51
+ "start": genai.types.Schema(
52
+ type=genai.types.Type.ARRAY,
53
+ items=genai.types.Schema(
54
+ type=genai.types.Type.NUMBER,
55
+ ),
56
+ ),
57
+ "end": genai.types.Schema(
58
+ type=genai.types.Type.ARRAY,
59
+ items=genai.types.Schema(
60
+ type=genai.types.Type.NUMBER,
61
+ ),
62
+ ),
63
+ },
64
+ ),
65
+ ),
66
+ types.FunctionDeclaration(
67
+ name="change_volume",
68
+ description=(
69
+ "Adjust the volume of specific segments in the audio by specifying lists of start times, end times, "
70
+ "and volume factors. Each segment between a start and end time will have its volume changed by the "
71
+ "corresponding factor (e.g., 0.5 for half volume, 2.0 for double volume). Multiple segments can be "
72
+ "adjusted at once by providing lists of values."
73
+ ),
42
74
  parameters=genai.types.Schema(
43
- type = genai.types.Type.OBJECT,
44
- required = ["start", "end"],
45
- properties = {
75
+ type=genai.types.Type.OBJECT,
76
+ required=["start", "end", "volume"],
77
+ properties={
46
78
  "start": genai.types.Schema(
47
- type = genai.types.Type.ARRAY,
48
- items = genai.types.Schema(
49
- type = genai.types.Type.NUMBER,
79
+ type=genai.types.Type.ARRAY,
80
+ items=genai.types.Schema(
81
+ type=genai.types.Type.NUMBER,
50
82
  ),
51
83
  ),
52
84
  "end": genai.types.Schema(
53
- type = genai.types.Type.ARRAY,
54
- items = genai.types.Schema(
55
- type = genai.types.Type.NUMBER,
85
+ type=genai.types.Type.ARRAY,
86
+ items=genai.types.Schema(
87
+ type=genai.types.Type.NUMBER,
56
88
  ),
89
+ ),
90
+ "volume": genai.types.Schema(
91
+ type=genai.types.Type.ARRAY,
92
+ items=genai.types.Schema(
93
+ type=genai.types.Type.NUMBER,
57
94
  ),
95
+ ),
58
96
  },
59
97
  ),
60
98
  ),
61
- ])
99
+ ]
100
+ )
62
101
  ]
63
102
  generate_content_config = types.GenerateContentConfig(
64
103
  tools=tools,
@@ -67,7 +106,7 @@ class GeminiClient:
67
106
 
68
107
  response = self.client.models.generate_content(
69
108
  model=model,
70
- contents=contents,
109
+ contents=self.contents,
71
110
  config=generate_content_config,
72
111
  )
73
112
 
@@ -81,7 +120,27 @@ class GeminiClient:
81
120
  if part.text:
82
121
  text_response = part.text
83
122
  except TypeError:
84
- return None, None
123
+ pass
124
+
125
+ model_parts = []
126
+ if text_response:
127
+ model_parts.append(
128
+ types.Part.from_text(text=text_response)
129
+ )
130
+ if function_call:
131
+ model_parts.append(
132
+ types.Part.from_function_call(
133
+ name=function_call.name,
134
+ args=function_call.args
135
+ )
136
+ )
137
+
138
+ self.contents.append(
139
+ types.Content(
140
+ role="model",
141
+ parts=model_parts
142
+ )
143
+ )
85
144
 
86
145
  return function_call, text_response
87
146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cutted
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: AI-powered audio editor controllable via natural language.
5
5
  Author-email: simon0302010 <simon0302010@gmail.com>
6
6
  License-Expression: GPL-3.0
@@ -0,0 +1,12 @@
1
+ cutted/__init__.py,sha256=7eWSPFhMOE6u-f5s_QomBlTjCze86s3xKt-v0bLJAAs,49
2
+ cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
+ cutted/app.py,sha256=LyVv3RHiAemtcEgLiZCcyenAD_QliaFIZ08JfCNwxq4,11543
4
+ cutted/core/audio_processor.py,sha256=OBxNAKs67zqIfnJIsSgaMDa0UbB0R0mccyWcgNs5Bk0,7663
5
+ cutted/core/gemini.py,sha256=0ATm5gHZWmu4k_9SZOsOx5XYY7nEer5LOzt1SMfQlIk,5836
6
+ cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
+ cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
+ cutted-0.4.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
+ cutted-0.4.0.dist-info/METADATA,sha256=wEeg-ive3z6zwUmLvZ_3Lh8W0xhsj12mMgSMASxWdUg,1503
10
+ cutted-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ cutted-0.4.0.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
+ cutted-0.4.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- cutted/__init__.py,sha256=ttFLenYuOpXQTaR9nB0dF-3zFB_PeksXf9R4r_TB8S8,49
2
- cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
- cutted/app.py,sha256=Ay_yVrPt1TQAE3lqmrII88lFoGZb5Mh5cDwediM9ZG8,9592
4
- cutted/core/audio_processor.py,sha256=JdPeWO_jAIn_uZFeZYQJX3RC0Vy8GClKrX7xGk4pXR4,5426
5
- cutted/core/gemini.py,sha256=yHsQXk4tDHcW5qJBuL2LpPvdChimxlmEbu76BvsoeY4,3108
6
- cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
- cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
- cutted-0.3.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
- cutted-0.3.2.dist-info/METADATA,sha256=MPuSW4_LjNch0PXeFrGtRW1C4CuJ7unNv1ZZhvFHmSM,1503
10
- cutted-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- cutted-0.3.2.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
- cutted-0.3.2.dist-info/RECORD,,
File without changes