cutted 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutted/app.py CHANGED
@@ -1,22 +1,20 @@
1
1
  import time
2
- import threading
3
2
  import customtkinter
3
+ import tkinter.messagebox as messagebox
4
+ from .core import gemini
4
5
  from .core.logger import *
5
6
  from .core import audio_processor
6
7
  from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
7
8
 
8
- import simpleaudio as sa # Add this import
9
-
10
9
  customtkinter.set_appearance_mode("Dark")
11
10
 
12
11
  class CuttedApp:
13
12
  def __init__(self):
14
13
  self.AudioProcessor = audio_processor.AudioProcessor()
14
+ self.gemini = gemini.GeminiClient()
15
15
  self.canvas = None
16
16
  self.cursor_line = None
17
17
  self.last_slider_update = 0
18
- self.play_obj = None
19
- self.play_thread = None
20
18
  self.is_playing = False
21
19
  self.setup_ui()
22
20
 
@@ -39,6 +37,9 @@ class CuttedApp:
39
37
 
40
38
  button = customtkinter.CTkButton(self.root, text="Load audio", command=self.select_file)
41
39
  button.place(relx=0.5, rely=1.0, anchor="s", y=-30)
40
+
41
+ export_button = customtkinter.CTkButton(self.root, text="Export", command=self.export_audio, width=70)
42
+ export_button.place(relx=0.9, rely=1.0, anchor="s", y=-30)
42
43
 
43
44
  self.play_button = customtkinter.CTkButton(self.root, text="Play", command=self.play_audio, width=50)
44
45
  self.play_button.place(relx=0.3, rely=1.0, anchor="s", y=-30)
@@ -120,33 +121,62 @@ class CuttedApp:
120
121
  print_fail("No audio loaded.")
121
122
  return
122
123
 
123
- self.stop_audio()
124
-
125
- start_ms = int(self.slider.get() * 1000)
126
- audio = self.AudioProcessor.audio[start_ms:]
127
- raw_data = audio.raw_data
128
- num_channels = audio.channels
129
- bytes_per_sample = audio.sample_width
130
- sample_rate = audio.frame_rate
131
-
132
- def playback():
133
- self.is_playing = True
134
- self.play_obj = sa.play_buffer(raw_data, num_channels, bytes_per_sample, sample_rate)
135
- self.play_obj.wait_done()
136
- self.is_playing = False
137
-
138
- self.play_thread = threading.Thread(target=playback, daemon=True)
139
- self.play_thread.start()
124
+ start_time = self.slider.get() if hasattr(self, 'slider') else 0
125
+ self.AudioProcessor.play_audio(start_time)
140
126
 
141
127
  def stop_audio(self):
142
- if self.play_obj is not None and self.is_playing:
143
- self.play_obj.stop()
144
- self.is_playing = False
128
+ self.AudioProcessor.stop_audio()
129
+ self.is_playing = False
130
+
131
+ def export_audio(self):
132
+ if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
133
+ print_fail("No audio loaded.")
134
+ return
135
+
136
+ save_path = customtkinter.filedialog.asksaveasfilename(
137
+ defaultextension=".mp3",
138
+ filetypes=[
139
+ ("MP3 files", "*.mp3"),
140
+ ("WAV files", "*.wav"),
141
+ ]
142
+ )
143
+
144
+ if save_path:
145
+ if save_path.lower().endswith(".wav"):
146
+ format = "wav"
147
+ elif save_path.lower().endswith(".mp3"):
148
+ format = "mp3"
149
+ else:
150
+ format = "mp3"
151
+
152
+ self.AudioProcessor.export_audio(save_path, format)
153
+ print_success(f"Audio exported to {save_path}")
145
154
 
146
155
  def send_prompt(self):
156
+ if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
157
+ print_fail("No audio loaded.")
158
+ return
159
+
147
160
  text = self.entry.get()
148
- print(f"Prompt: {text}")
161
+ full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_lenght())}s long."
162
+ full_prompt += f"\n\nUser Prompt: {text}"
149
163
  self.entry.delete(0, "end")
164
+
165
+ function_call, text_result = self.gemini.generate(full_prompt)
166
+
167
+ if function_call:
168
+ print_info(f"Gemini called {function_call.name}")
169
+ if function_call.name == "cut_audio":
170
+ print_info("Cut function called")
171
+ args = function_call.args
172
+ result = self.AudioProcessor.cut(args["start"], args["end"])
173
+ if not result:
174
+ messagebox.showerror("Error", "Please try again.")
175
+ self.update_plot()
176
+ elif text_result:
177
+ messagebox.showerror("Error", text_result.strip())
178
+ else:
179
+ print_fail("Gemini returned no data")
150
180
 
151
181
  def run(self):
152
182
  self.root.mainloop()
@@ -3,10 +3,26 @@ from pydub.utils import ratio_to_db
3
3
  from .logger import *
4
4
  import numpy as np
5
5
  from matplotlib.figure import Figure
6
+ import pygame
7
+ import io
8
+ import threading
9
+ import time
6
10
 
7
11
  class AudioProcessor:
8
12
  def __init__(self):
9
13
  self.audio_path = None
14
+ self.audio = None
15
+ self.is_playing_var = False
16
+ self.play_thread = None
17
+ self._init_pygame()
18
+
19
+ def _init_pygame(self):
20
+ try:
21
+ pygame.mixer.pre_init(frequency=44100, size=-16, channels=2, buffer=1024)
22
+ pygame.mixer.init()
23
+ print_success("Pygame initialized")
24
+ except pygame.error as e:
25
+ print_warn(f"Pygame initialization warning: {e}")
10
26
 
11
27
  def load_audio(self, audio_path: str, volume: float = 1.0):
12
28
  self.audio_path = audio_path
@@ -27,7 +43,6 @@ class AudioProcessor:
27
43
  samples = samples / np.max(np.abs(samples))
28
44
 
29
45
  times = np.linspace(0, len(samples) / self.audio.frame_rate, num=len(samples))
30
- print(times)
31
46
 
32
47
  fig = Figure(figsize=(5, 4), facecolor="#242424")
33
48
  ax = fig.add_subplot()
@@ -54,10 +69,85 @@ class AudioProcessor:
54
69
  return self.duration
55
70
 
56
71
  def cut(self, start, end):
57
- if type(start) == list and type(end) == list:
58
- print("Cutting multiple segments")
72
+ if len(start) == len(end):
73
+ if len(start) == 1:
74
+ print_info(f"Cutting from {start[0]} to {end[0]}")
75
+ start_ms = round(start[0] * 1000)
76
+ end_ms = round(end[0] * 1000)
77
+ self.audio = self.audio[:start_ms] + self.audio[end_ms:]
78
+ return True
79
+ else:
80
+ time_sets = list(zip(start, end))
81
+ subtract_time = 0
82
+ for single_start, single_end in time_sets:
83
+ single_start = single_start - subtract_time
84
+ single_end = single_end - subtract_time
85
+ print_info(f"Cutting from {single_start} to {single_end}")
86
+ start_ms = round(single_start * 1000)
87
+ end_ms = round(single_end * 1000)
88
+ self.audio = self.audio[:start_ms] + self.audio[end_ms:]
89
+ subtract_time += single_end - single_start
90
+ return True
91
+ else:
92
+ return False
93
+
94
+ def play_audio(self, start_time=0):
95
+ if self.audio is None:
96
+ print_fail("No audio loaded.")
97
+ return False
98
+
99
+ try:
100
+ self.stop_audio()
101
+
102
+ start_ms = int(start_time * 1000)
103
+ audio_segment = self.audio[start_ms:]
104
+
105
+ audio_segment = audio_segment.set_frame_rate(22050)
106
+ audio_segment = audio_segment.set_channels(2)
107
+ audio_segment = audio_segment.set_sample_width(2)
108
+
109
+ audio_data = io.BytesIO()
110
+ audio_segment.export(audio_data, format="wav")
111
+ audio_data.seek(0)
112
+
113
+ pygame.mixer.music.load(audio_data)
114
+ pygame.mixer.music.play()
115
+ self.is_playing_var = True
116
+
117
+ print_success(f"Playing audio from {start_time}s")
118
+ return True
119
+
120
+ except Exception as e:
121
+ print_fail(f"Error playing audio: {e}")
122
+ return False
123
+
124
+ def stop_audio(self):
125
+ try:
126
+ if pygame.mixer.get_init():
127
+ pygame.mixer.music.stop()
128
+ self.is_playing_var = False
129
+ print_info("Audio playback stopped")
130
+ except Exception as e:
131
+ print_warn(f"Error stopping audio: {e}")
132
+
133
+ def is_playing(self):
134
+ try:
135
+ if pygame.mixer.get_init():
136
+ return pygame.mixer.music.get_busy()
137
+ return False
138
+ except:
139
+ return False
140
+
141
+ def get_audio_info(self):
142
+ if self.audio is None:
143
+ return None
59
144
 
60
- start_ms = round(start * 1000)
61
- end_ms = round(end * 1000)
145
+ return {
146
+ "duration": self.get_lenght(),
147
+ "channels": self.audio.channels,
148
+ "frame_rate": self.audio.frame_rate,
149
+ "sample_width": self.audio.sample_width
150
+ }
62
151
 
63
- self.audio = self.audio[:start_ms] + self.audio[end_ms:]
152
+ def export_audio(self, path, format: str = "mp3"):
153
+ self.audio.export(path, format=format)
cutted/core/gemini.py CHANGED
@@ -1,8 +1,81 @@
1
1
  import os
2
+ import sys
2
3
  from google import genai
3
4
  from google.genai import types
4
5
 
5
6
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
6
7
 
7
8
  if not GEMINI_API_KEY:
8
- print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
9
+ print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
10
+ sys.exit(0)
11
+
12
+ class GeminiClient:
13
+ def __init__(self):
14
+ self.client = genai.Client(
15
+ api_key=GEMINI_API_KEY,
16
+ )
17
+
18
+ def generate(self, prompt: str, model: str = "gemini-2.0-flash"):
19
+ contents = [
20
+ types.Content(
21
+ role="user",
22
+ parts=[
23
+ types.Part.from_text(text=prompt),
24
+ ],
25
+ ),
26
+ ]
27
+ tools = [
28
+ types.Tool(
29
+ function_declarations=[
30
+ types.FunctionDeclaration(
31
+ name="cut_audio",
32
+ description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
33
+ parameters=genai.types.Schema(
34
+ type = genai.types.Type.OBJECT,
35
+ required = ["start", "end"],
36
+ properties = {
37
+ "start": genai.types.Schema(
38
+ type = genai.types.Type.ARRAY,
39
+ items = genai.types.Schema(
40
+ type = genai.types.Type.NUMBER,
41
+ ),
42
+ ),
43
+ "end": genai.types.Schema(
44
+ type = genai.types.Type.ARRAY,
45
+ items = genai.types.Schema(
46
+ type = genai.types.Type.NUMBER,
47
+ ),
48
+ ),
49
+ },
50
+ ),
51
+ ),
52
+ ])
53
+ ]
54
+ generate_content_config = types.GenerateContentConfig(
55
+ tools=tools,
56
+ response_mime_type="text/plain",
57
+ )
58
+
59
+ response = self.client.models.generate_content(
60
+ model=model,
61
+ contents=contents,
62
+ config=generate_content_config,
63
+ )
64
+
65
+ function_call = None
66
+ text_response = None
67
+ try:
68
+ for candidate in response.candidates:
69
+ for part in candidate.content.parts:
70
+ if part.function_call:
71
+ function_call = part.function_call
72
+ if part.text:
73
+ text_response = part.text
74
+ except TypeError:
75
+ return None, None
76
+
77
+ return function_call, text_response
78
+
79
+ if __name__ == "__main__":
80
+ gemini = GeminiClient()
81
+ print(gemini.generate("cut from 10 to 20.5"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cutted
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: AI-powered audio editor controllable via natural language.
5
5
  Author-email: simon0302010 <simon0302010@gmail.com>
6
6
  License-Expression: GPL-3.0
@@ -12,9 +12,9 @@ Requires-Dist: customtkinter
12
12
  Requires-Dist: matplotlib
13
13
  Requires-Dist: numpy
14
14
  Requires-Dist: pydub
15
- Requires-Dist: simpleaudio
15
+ Requires-Dist: pygame
16
16
  Requires-Dist: google-genai
17
- Requires-Dist: dotenv
17
+ Requires-Dist: python-dotenv
18
18
  Dynamic: license-file
19
19
 
20
20
  # Cutted
@@ -0,0 +1,12 @@
1
+ cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
2
+ cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
+ cutted/app.py,sha256=qZ6idEqEmdtCLOahBN4803u-t12oNRhTAgbN3XiIiKY,6803
4
+ cutted/core/audio_processor.py,sha256=7-XCuPPTlozeuaD2LqyzwRGinu0NvowTLbAh2X4XJ98,5182
5
+ cutted/core/gemini.py,sha256=Ts_EbC1-rO9jIsdSlzKcmjLVS1o663GmfTdzmix12kE,2872
6
+ cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
+ cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
8
+ cutted-0.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
+ cutted-0.2.0.dist-info/METADATA,sha256=4hl_l2XcGg74HKYMcbAlklxduA8nw066deSAVBBuvOs,1122
10
+ cutted-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ cutted-0.2.0.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
+ cutted-0.2.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
2
- cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
- cutted/app.py,sha256=aTkYl-wTX6NqiGNuMXa6MvztK9D8m7H6Q39HHy5RB7w,5322
4
- cutted/core/audio_processor.py,sha256=-RXq9iCOhr0Eq6odl9piZqrwVHh0RX5LB9A-Ip4xyOY,2010
5
- cutted/core/gemini.py,sha256=BF9TMkPLzzR-cDPOFMGPnVzaZIzY00GpEIQkvBkCL9E,223
6
- cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
- cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
8
- cutted-0.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
- cutted-0.1.1.dist-info/METADATA,sha256=z5O4yFd7SYWaVrYZ4WkBpB8bXmkwC3sStARpIpiMGOs,1120
10
- cutted-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- cutted-0.1.1.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
- cutted-0.1.1.dist-info/RECORD,,
File without changes