cutted 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutted/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.3.1"
1
+ __version__ = "0.3.3"
2
2
  __author__ = "simon0302010"
cutted/app.py CHANGED
@@ -25,6 +25,8 @@ class CuttedApp:
25
25
  self.canvas = None
26
26
  self.cursor_line = None
27
27
  self.last_slider_update = 0
28
+ self.slider_value = 0
29
+ self.playback_start_time = 0
28
30
  self.is_playing = False
29
31
  self.last_states = []
30
32
  self.setup_ui()
@@ -58,11 +60,19 @@ class CuttedApp:
58
60
  if whisper_support:
59
61
  self.use_transcript_checkbox = customtkinter.CTkCheckBox(
60
62
  self.root,
61
- text="Give Gemini a transcript (very slow)",
63
+ text="Send transcript to Gemini (slower, more accurate)",
62
64
  text_color="#888888",
63
65
  font=("Arial", 12)
64
66
  )
65
67
  self.use_transcript_checkbox.place(relx=0.0, rely=1.0, anchor="w", y=-12)
68
+
69
+ self.use_audio_checkbox = customtkinter.CTkCheckBox(
70
+ self.root,
71
+ text="Send audio to Gemini (buggy)",
72
+ text_color="#888888",
73
+ font=("Arial", 12)
74
+ )
75
+ self.use_audio_checkbox.place(relx=1.0, rely=1.0, anchor="e", y=-12)
66
76
 
67
77
  self.play_button = customtkinter.CTkButton(self.root, text="Play", command=self.play_audio, width=50)
68
78
  self.play_button.place(relx=0.3, rely=1.0, anchor="s", y=-30)
@@ -105,7 +115,7 @@ class CuttedApp:
105
115
  self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
106
116
  self.canvas.draw()
107
117
 
108
- self.audio_lenght = int(round(self.AudioProcessor.get_lenght()))
118
+ self.audio_lenght = int(round(self.AudioProcessor.get_length()))
109
119
 
110
120
  slider_width = self.root.winfo_width() - 40
111
121
  self.slider = customtkinter.CTkSlider(
@@ -145,11 +155,16 @@ class CuttedApp:
145
155
  return
146
156
 
147
157
  start_time = self.slider.get() if hasattr(self, 'slider') else 0
158
+ self.playback_start_time = start_time
148
159
  self.AudioProcessor.play_audio(start_time)
149
160
 
150
161
  def stop_audio(self):
151
- self.AudioProcessor.stop_audio()
162
+ rel_pos = self.AudioProcessor.stop_audio()
152
163
  self.is_playing = False
164
+ abs_pos = self.playback_start_time + rel_pos
165
+ self.slider.set(abs_pos)
166
+ self.set_cursor(abs_pos)
167
+ print_info(f"Absolute position in audio: {abs_pos:.2f}s")
153
168
 
154
169
  def export_audio(self):
155
170
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -176,6 +191,7 @@ class CuttedApp:
176
191
  print_success(f"Audio exported to {save_path}")
177
192
 
178
193
  def send_prompt(self):
194
+ print(self.AudioProcessor.get_waveform_summary())
179
195
  self.save_state()
180
196
 
181
197
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -183,32 +199,41 @@ class CuttedApp:
183
199
  return
184
200
 
185
201
  text = self.entry.get()
186
- full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_lenght())}s long."
187
- if whisper_support:
188
- if self.use_transcript_checkbox.get():
189
- if not self.whisper:
190
- messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running.")
191
- self.whisper = transcribe.Whisper()
192
- transcript = self.whisper.transcribe(self.AudioProcessor.audio_path)
193
- full_prompt += f"\nThis is a transcript with per word timestamps of the audio:\n{transcript}"
194
- full_prompt += f"\n\nUser Prompt: {text}"
195
- self.entry.delete(0, "end")
196
-
197
- function_call, text_result = self.gemini.generate(full_prompt)
198
-
199
- if function_call:
200
- print_info(f"Gemini called {function_call.name}")
201
- if function_call.name == "cut_audio":
202
- print_info("Cut function called")
203
- args = function_call.args
204
- result = self.AudioProcessor.cut(args["start"], args["end"])
205
- if not result:
206
- messagebox.showerror("Error", "Please try again.")
207
- self.update_plot()
208
- elif text_result:
209
- messagebox.showerror("Error", text_result.strip())
210
- else:
211
- print_fail("Gemini returned no data")
202
+ if text.strip():
203
+ full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length())}s long. The cursor of the user is currently at {self.slider_value}s."
204
+ full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
205
+ full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
206
+ full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
207
+ if whisper_support:
208
+ if self.use_transcript_checkbox.get():
209
+ if not self.whisper:
210
+ messagebox.showinfo("Info", "Loading Whisper model. This may take a few minutes depending on your internet connection. See the progress in your command line. If this window appears to be frozen, the transcription is running. Press OK to continue.")
211
+ self.whisper = transcribe.Whisper()
212
+ transcript = self.whisper.transcribe(self.AudioProcessor.audio_path)
213
+ full_prompt += f"\nThis is a transcript with per word timestamps of the audio:\n{transcript}"
214
+ full_prompt += "\nThe transcript likely has issues. If you need infos about some words they might just be misspelled in the audio."
215
+ full_prompt += f"\n\nUser Prompt: {text}"
216
+ self.entry.delete(0, "end")
217
+
218
+ if self.use_audio_checkbox.get():
219
+ function_call, text_result = self.gemini.generate(full_prompt, audio_base64=self.AudioProcessor.get_audio_base64())
220
+ else:
221
+ function_call, text_result = self.gemini.generate(full_prompt)
222
+
223
+ if function_call:
224
+ print_info(f"Gemini called {function_call.name}")
225
+ if function_call.name == "cut_audio":
226
+ print_info("Cut function called")
227
+ args = function_call.args
228
+ result = self.AudioProcessor.cut(args["start"], args["end"])
229
+ if not result:
230
+ messagebox.showerror("Error", "Please try again.")
231
+ self.update_plot()
232
+ elif text_result:
233
+ messagebox.showerror("Error", text_result.strip())
234
+ else:
235
+ messagebox.showerror("Error", "Gemini returned no data")
236
+ print_fail("Gemini returned no data")
212
237
 
213
238
  def save_state(self):
214
239
  if hasattr(self.AudioProcessor, "audio") and self.AudioProcessor.audio is not None:
@@ -4,9 +4,8 @@ from .logger import *
4
4
  import numpy as np
5
5
  from matplotlib.figure import Figure
6
6
  import pygame
7
+ import base64
7
8
  import io
8
- import threading
9
- import time
10
9
 
11
10
  class AudioProcessor:
12
11
  def __init__(self):
@@ -63,7 +62,21 @@ class AudioProcessor:
63
62
 
64
63
  return fig
65
64
 
66
- def get_lenght(self):
65
+ def get_waveform_summary(self):
66
+ num_samples = round(self.get_length())
67
+ if self.audio is None:
68
+ return "No audio loaded."
69
+ samples = np.array(self.audio.get_array_of_samples())
70
+ if self.audio.channels == 2:
71
+ samples = samples.reshape((-1, 2))
72
+ samples = samples.mean(axis=1)
73
+ samples = samples / np.max(np.abs(samples))
74
+ indices = np.linspace(0, len(samples)-1, num_samples).astype(int)
75
+ summary = samples[indices]
76
+ return f"Waveform samples (normalized, {num_samples} points):\n" + \
77
+ " ".join(f"{x:.2f}" for x in summary)
78
+
79
+ def get_length(self):
67
80
  self.duration = self.audio.duration_seconds
68
81
  self.duration = round(self.duration, 2)
69
82
  return self.duration
@@ -71,17 +84,25 @@ class AudioProcessor:
71
84
  def cut(self, start, end):
72
85
  if len(start) == len(end):
73
86
  if len(start) == 1:
74
- print_info(f"Cutting from {start[0]} to {end[0]}")
75
- start_ms = round(start[0] * 1000)
76
- end_ms = round(end[0] * 1000)
87
+ single_start = max(0, start[0])
88
+ single_end = max(0, end[0])
89
+ if single_end <= single_start:
90
+ print_fail("End time must be greater than start time.")
91
+ return False
92
+ print_info(f"Cutting from {single_start} to {single_end}")
93
+ start_ms = round(single_start * 1000)
94
+ end_ms = round(single_end * 1000)
77
95
  self.audio = self.audio[:start_ms] + self.audio[end_ms:]
78
96
  return True
79
97
  else:
80
98
  time_sets = list(zip(start, end))
81
99
  subtract_time = 0
82
100
  for single_start, single_end in time_sets:
83
- single_start = single_start - subtract_time
84
- single_end = single_end - subtract_time
101
+ single_start = max(0, single_start - subtract_time)
102
+ single_end = max(0, single_end - subtract_time)
103
+ if single_end <= single_start:
104
+ print_fail("End time must be greater than start time.")
105
+ continue
85
106
  print_info(f"Cutting from {single_start} to {single_end}")
86
107
  start_ms = round(single_start * 1000)
87
108
  end_ms = round(single_end * 1000)
@@ -124,11 +145,15 @@ class AudioProcessor:
124
145
  def stop_audio(self):
125
146
  try:
126
147
  if pygame.mixer.get_init():
148
+ pos_ms = pygame.mixer.music.get_pos()
149
+ pos_sec = pos_ms / 1000 if pos_ms >= 0 else 0
127
150
  pygame.mixer.music.stop()
128
151
  self.is_playing_var = False
129
- print_info("Audio playback stopped")
152
+ print_info(f"Audio playback stopped at {pos_sec:.2f}s")
153
+ return pos_sec
130
154
  except Exception as e:
131
155
  print_warn(f"Error stopping audio: {e}")
156
+ return 0
132
157
 
133
158
  def is_playing(self):
134
159
  try:
@@ -143,11 +168,19 @@ class AudioProcessor:
143
168
  return None
144
169
 
145
170
  return {
146
- "duration": self.get_lenght(),
171
+ "duration": self.get_length(),
147
172
  "channels": self.audio.channels,
148
173
  "frame_rate": self.audio.frame_rate,
149
174
  "sample_width": self.audio.sample_width
150
175
  }
151
176
 
152
177
  def export_audio(self, path, format: str = "mp3"):
153
- self.audio.export(path, format=format)
178
+ self.audio.export(path, format=format)
179
+
180
+ def get_audio_base64(self):
181
+ buffer = io.BytesIO()
182
+ self.audio.export(buffer, format="mp3")
183
+ buffer.seek(0)
184
+ audio_bytes = buffer.read()
185
+ audio_base64 = base64.b64encode(audio_bytes)
186
+ return audio_base64
cutted/core/gemini.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import sys
3
+ import base64
3
4
  from google import genai
4
5
  from google.genai import types
5
6
 
@@ -15,13 +16,21 @@ class GeminiClient:
15
16
  api_key=GEMINI_API_KEY,
16
17
  )
17
18
 
18
- def generate(self, prompt: str, model: str = "gemini-2.0-flash"):
19
+ def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
20
+ parts=[
21
+ types.Part.from_text(text=prompt),
22
+ ]
23
+
24
+ if audio_base64:
25
+ parts.append(types.Part.from_bytes(
26
+ mime_type="audio/mpeg",
27
+ data=base64.b64decode(audio_base64)
28
+ ))
29
+
19
30
  contents = [
20
31
  types.Content(
21
32
  role="user",
22
- parts=[
23
- types.Part.from_text(text=prompt),
24
- ],
33
+ parts=parts
25
34
  ),
26
35
  ]
27
36
  tools = [
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.4
2
+ Name: cutted
3
+ Version: 0.3.3
4
+ Summary: AI-powered audio editor controllable via natural language.
5
+ Author-email: simon0302010 <simon0302010@gmail.com>
6
+ License-Expression: GPL-3.0
7
+ Project-URL: Homepage, https://github.com/simon0302010/Cutted
8
+ Requires-Python: <=3.13,>=3.9
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: customtkinter
12
+ Requires-Dist: matplotlib
13
+ Requires-Dist: numpy
14
+ Requires-Dist: pydub
15
+ Requires-Dist: pygame
16
+ Requires-Dist: google-genai
17
+ Requires-Dist: python-dotenv
18
+ Provides-Extra: whisper
19
+ Requires-Dist: whisper-timestamped; extra == "whisper"
20
+ Dynamic: license-file
21
+
22
+ ![PyPI](https://img.shields.io/pypi/v/cutted?color=blue)
23
+ ![PyPI - License](https://img.shields.io/pypi/l/lyriks-video)
24
+ ![Hackatime](https://hackatime-badge.hackclub.com/U08HC7N4JJW/Cutted)
25
+
26
+ # Cutted
27
+
28
+ AI-powered audio editor controlled by natural language 🚀
29
+
30
+ Let AI handle your audio editing with simple commands:
31
+ - Automatically detect quiet or loud parts
32
+ - Transcribe audio (if Whisper is installed)
33
+ - Cut, trim, or adjust volume for specific segments
34
+
35
+ ## Installation
36
+
37
+ 1. **Install system dependencies (e.g., FFmpeg).**
38
+ 2. **Install Cutted**
39
+ ```bash
40
+ pip install cutted
41
+ ```
42
+ **With Whisper support**
43
+ ```bash
44
+ pip install cutted[whisper]
45
+ ```
46
+
47
+ ## Usage
48
+
49
+ 1. **Launch the app:**
50
+ ```bash
51
+ python -m cutted
52
+ ```
53
+ 2. **Load an audio file** (MP3, WAV, etc.)
54
+ 3. **Play, Cut, Undo** – all from the GUI or with text commands ✂️
55
+ 4. **Export** as MP3 or WAV
@@ -0,0 +1,12 @@
1
+ cutted/__init__.py,sha256=F1mzO6qI2gD6d_DQsjZItLuIfNfa1Te5KwCMThWDQT4,49
2
+ cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
+ cutted/app.py,sha256=HaJ3yu8-WVDGKZmFcTVG4CK8q13eHyUQaIyOWMZWV68,10764
4
+ cutted/core/audio_processor.py,sha256=gKBJ1wpdrX2IozmXDzW7MoFVp9uq0Pb3ezz9R3Ahmnw,6691
5
+ cutted/core/gemini.py,sha256=yHsQXk4tDHcW5qJBuL2LpPvdChimxlmEbu76BvsoeY4,3108
6
+ cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
+ cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
+ cutted-0.3.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
+ cutted-0.3.3.dist-info/METADATA,sha256=ehEIEm5qz3wVqBY74Itpwooe4SYOFpXISXUMZO7LVh4,1503
10
+ cutted-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ cutted-0.3.3.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
+ cutted-0.3.3.dist-info/RECORD,,
@@ -1,68 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: cutted
3
- Version: 0.3.1
4
- Summary: AI-powered audio editor controllable via natural language.
5
- Author-email: simon0302010 <simon0302010@gmail.com>
6
- License-Expression: GPL-3.0
7
- Project-URL: Homepage, https://github.com/simon0302010/Cutted
8
- Requires-Python: <=3.13,>=3.9
9
- Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: customtkinter
12
- Requires-Dist: matplotlib
13
- Requires-Dist: numpy
14
- Requires-Dist: pydub
15
- Requires-Dist: pygame
16
- Requires-Dist: google-genai
17
- Requires-Dist: python-dotenv
18
- Provides-Extra: whisper
19
- Requires-Dist: whisper-timestamped; extra == "whisper"
20
- Dynamic: license-file
21
-
22
- # Cutted
23
- AI-powered audio editor controllable via natural language
24
-
25
- Note: This app is currently not functional.
26
-
27
- # Installation
28
-
29
- Install dependencies:
30
-
31
- **Debian:**
32
- ```bash
33
- sudo apt update
34
- sudo apt install ffmpeg git
35
- ```
36
-
37
- **Arch Linux:**
38
- ```bash
39
- sudo pacman -Syu ffmpeg git
40
- ```
41
-
42
- Clone the repository:
43
-
44
- ```bash
45
- git clone https://github.com/simon0302010/Cutted.git
46
- cd Cutted
47
- ```
48
-
49
- Create a virtual environment:
50
-
51
- ```bash
52
- python -m venv venv
53
- source venv/bin/activate
54
- ```
55
-
56
- Install the package:
57
-
58
- ```bash
59
- pip install .
60
- ```
61
-
62
- # Usage
63
-
64
- Run the package:
65
-
66
- ```bash
67
- python -m cutted
68
- ```
@@ -1,12 +0,0 @@
1
- cutted/__init__.py,sha256=Q7ZgbkmUOc-Gzy42zzZcKnzKxdZ023pITyXAC0BDBlM,49
2
- cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
- cutted/app.py,sha256=xf5LFHdPLz1WNP05eRruHX2hKnaei8CUrqzVSODweAM,8952
4
- cutted/core/audio_processor.py,sha256=7-XCuPPTlozeuaD2LqyzwRGinu0NvowTLbAh2X4XJ98,5182
5
- cutted/core/gemini.py,sha256=Ts_EbC1-rO9jIsdSlzKcmjLVS1o663GmfTdzmix12kE,2872
6
- cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
- cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
- cutted-0.3.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
- cutted-0.3.1.dist-info/METADATA,sha256=6plv-IaqUXQeeAnChYaMBjYSLh8FBTAtvyQqZkRTuuA,1201
10
- cutted-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- cutted-0.3.1.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
- cutted-0.3.1.dist-info/RECORD,,
File without changes