cutted 0.3.3__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cutted
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary: AI-powered audio editor controllable via natural language.
5
5
  Author-email: simon0302010 <simon0302010@gmail.com>
6
6
  License-Expression: GPL-3.0
@@ -0,0 +1,2 @@
1
+ __version__ = "0.4.0"
2
+ __author__ = "simon0302010"
@@ -109,45 +109,54 @@ class CuttedApp:
109
109
  def update_plot(self):
110
110
  if self.canvas:
111
111
  self.canvas.get_tk_widget().destroy()
112
-
112
+ if hasattr(self, "slider") and self.slider is not None:
113
+ self.slider.destroy()
114
+
113
115
  fig = self.AudioProcessor.plot_audio()
114
116
  self.ax = fig.axes[0]
115
117
  self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
116
118
  self.canvas.draw()
117
-
118
- self.audio_lenght = int(round(self.AudioProcessor.get_length()))
119
-
119
+
120
+ self.audio_length = float(self.AudioProcessor.get_length())
121
+
120
122
  slider_width = self.root.winfo_width() - 40
123
+ if self.slider_value > self.audio_length:
124
+ self.slider_value = self.audio_length
125
+ if self.slider_value < 0:
126
+ self.slider_value = 0
127
+
121
128
  self.slider = customtkinter.CTkSlider(
122
- self.root, from_=0, to=self.audio_lenght, command=self.set_cursor, width=slider_width
129
+ self.root, from_=0, to=self.audio_length, command=self.set_cursor, width=slider_width
123
130
  )
124
- self.slider.set(0)
125
-
131
+ self.slider.set(self.slider_value)
126
132
  self.slider.place(relx=0.5, rely=1.0, anchor="s", y=-130)
127
-
133
+ self.set_cursor(self.slider_value)
134
+
128
135
  self.canvas.get_tk_widget().pack(
129
136
  fill=customtkinter.BOTH,
130
137
  expand=True,
131
138
  padx=10,
132
139
  pady=10,
133
140
  )
134
-
135
- self.cursor_line = self.ax.axvline(x=0, color="red", linewidth=2)
141
+
142
+ self.cursor_line = self.ax.axvline(x=self.slider_value, color="red", linewidth=2)
136
143
  self.canvas.draw_idle()
137
144
 
138
145
  def set_cursor(self, value):
139
146
  now = time.time()
140
- if now - self.last_slider_update < 0.1: # 100ms
147
+ if now - self.last_slider_update < 0.05: # 100ms
141
148
  return
142
149
  self.last_slider_update = now
143
150
 
144
- self.slider_value = round(value)
151
+ self.slider_value = round(value, 2)
145
152
 
146
153
  if self.cursor_line:
147
154
  self.cursor_line.set_xdata([self.slider_value, self.slider_value])
148
155
  self.canvas.draw_idle()
156
+ self.slider.set(self.slider_value)
157
+ self.set_cursor(self.slider_value)
149
158
 
150
- print(f"Slider Value: {self.slider_value}")
159
+ print_info(f"Slider Value: {self.slider_value}")
151
160
 
152
161
  def play_audio(self):
153
162
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -200,7 +209,7 @@ class CuttedApp:
200
209
 
201
210
  text = self.entry.get()
202
211
  if text.strip():
203
- full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length())}s long. The cursor of the user is currently at {self.slider_value}s."
212
+ full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length(), 2)}s long. The cursor of the user is currently at {self.slider_value}s."
204
213
  full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
205
214
  full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
206
215
  full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
@@ -228,6 +237,12 @@ class CuttedApp:
228
237
  result = self.AudioProcessor.cut(args["start"], args["end"])
229
238
  if not result:
230
239
  messagebox.showerror("Error", "Please try again.")
240
+ if function_call.name == "change_volume":
241
+ print_info("Change Volume function called")
242
+ args = function_call.args
243
+ result = self.AudioProcessor.change_volume(args["start"], args["end"], args["volume"])
244
+ if not result:
245
+ messagebox.showerror("Error", "Please try again.")
231
246
  self.update_plot()
232
247
  elif text_result:
233
248
  messagebox.showerror("Error", text_result.strip())
@@ -112,6 +112,28 @@ class AudioProcessor:
112
112
  else:
113
113
  return False
114
114
 
115
+ def change_volume(self, start, end, volume):
116
+ if len(start) == len(end) == len(volume):
117
+ time_sets = list(zip(start, end, volume))
118
+ for single_start, single_end, single_volume in time_sets:
119
+ if single_end <= single_start:
120
+ print_fail("End time must be greater than start time.")
121
+ continue
122
+ print_info(f"Changing volume of {single_start} - {single_end} to {str(single_volume)}")
123
+
124
+ start_ms = round(single_start * 1000)
125
+ end_ms = round(single_end * 1000)
126
+ part1 = self.audio[:start_ms]
127
+ part2 = self.audio[start_ms:end_ms]
128
+ part3 = self.audio[end_ms:]
129
+
130
+ part2 = part2.apply_gain(ratio_to_db(single_volume))
131
+
132
+ self.audio = part1 + part2 + part3
133
+ return True
134
+ else:
135
+ return False
136
+
115
137
  def play_audio(self, start_time=0):
116
138
  if self.audio is None:
117
139
  print_fail("No audio loaded.")
@@ -0,0 +1,149 @@
1
+ import os
2
+ import sys
3
+ import base64
4
+ from google import genai
5
+ from google.genai import types
6
+
7
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
8
+
9
+ if not GEMINI_API_KEY:
10
+ print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
11
+ sys.exit(0)
12
+
13
+ class GeminiClient:
14
+ def __init__(self):
15
+ self.client = genai.Client(
16
+ api_key=GEMINI_API_KEY,
17
+ )
18
+ self.contents = []
19
+
20
+ def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
21
+ parts=[
22
+ types.Part.from_text(text=prompt),
23
+ ]
24
+
25
+ if audio_base64:
26
+ parts.append(types.Part.from_bytes(
27
+ mime_type="audio/mpeg",
28
+ data=base64.b64decode(audio_base64)
29
+ ))
30
+
31
+ self.contents.append(
32
+ types.Content(
33
+ role="user",
34
+ parts=parts
35
+ )
36
+ )
37
+ tools = [
38
+ types.Tool(
39
+ function_declarations=[
40
+ types.FunctionDeclaration(
41
+ name="cut_audio",
42
+ description=(
43
+ "Remove one or more segments from the audio by specifying start and end times in seconds. "
44
+ "You can cut multiple segments at once by providing lists of start and end values. "
45
+ "Each segment defined by a start and end pair will be removed from the audio."
46
+ ),
47
+ parameters=genai.types.Schema(
48
+ type=genai.types.Type.OBJECT,
49
+ required=["start", "end"],
50
+ properties={
51
+ "start": genai.types.Schema(
52
+ type=genai.types.Type.ARRAY,
53
+ items=genai.types.Schema(
54
+ type=genai.types.Type.NUMBER,
55
+ ),
56
+ ),
57
+ "end": genai.types.Schema(
58
+ type=genai.types.Type.ARRAY,
59
+ items=genai.types.Schema(
60
+ type=genai.types.Type.NUMBER,
61
+ ),
62
+ ),
63
+ },
64
+ ),
65
+ ),
66
+ types.FunctionDeclaration(
67
+ name="change_volume",
68
+ description=(
69
+ "Adjust the volume of specific segments in the audio by specifying lists of start times, end times, "
70
+ "and volume factors. Each segment between a start and end time will have its volume changed by the "
71
+ "corresponding factor (e.g., 0.5 for half volume, 2.0 for double volume). Multiple segments can be "
72
+ "adjusted at once by providing lists of values."
73
+ ),
74
+ parameters=genai.types.Schema(
75
+ type=genai.types.Type.OBJECT,
76
+ required=["start", "end", "volume"],
77
+ properties={
78
+ "start": genai.types.Schema(
79
+ type=genai.types.Type.ARRAY,
80
+ items=genai.types.Schema(
81
+ type=genai.types.Type.NUMBER,
82
+ ),
83
+ ),
84
+ "end": genai.types.Schema(
85
+ type=genai.types.Type.ARRAY,
86
+ items=genai.types.Schema(
87
+ type=genai.types.Type.NUMBER,
88
+ ),
89
+ ),
90
+ "volume": genai.types.Schema(
91
+ type=genai.types.Type.ARRAY,
92
+ items=genai.types.Schema(
93
+ type=genai.types.Type.NUMBER,
94
+ ),
95
+ ),
96
+ },
97
+ ),
98
+ ),
99
+ ]
100
+ )
101
+ ]
102
+ generate_content_config = types.GenerateContentConfig(
103
+ tools=tools,
104
+ response_mime_type="text/plain",
105
+ )
106
+
107
+ response = self.client.models.generate_content(
108
+ model=model,
109
+ contents=self.contents,
110
+ config=generate_content_config,
111
+ )
112
+
113
+ function_call = None
114
+ text_response = None
115
+ try:
116
+ for candidate in response.candidates:
117
+ for part in candidate.content.parts:
118
+ if part.function_call:
119
+ function_call = part.function_call
120
+ if part.text:
121
+ text_response = part.text
122
+ except TypeError:
123
+ pass
124
+
125
+ model_parts = []
126
+ if text_response:
127
+ model_parts.append(
128
+ types.Part.from_text(text=text_response)
129
+ )
130
+ if function_call:
131
+ model_parts.append(
132
+ types.Part.from_function_call(
133
+ name=function_call.name,
134
+ args=function_call.args
135
+ )
136
+ )
137
+
138
+ self.contents.append(
139
+ types.Content(
140
+ role="model",
141
+ parts=model_parts
142
+ )
143
+ )
144
+
145
+ return function_call, text_response
146
+
147
+ if __name__ == "__main__":
148
+ gemini = GeminiClient()
149
+ print(gemini.generate("cut from 10 to 20.5"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cutted
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary: AI-powered audio editor controllable via natural language.
5
5
  Author-email: simon0302010 <simon0302010@gmail.com>
6
6
  License-Expression: GPL-3.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cutted"
7
- version = "0.3.3"
7
+ version = "0.4.0"
8
8
  description = "AI-powered audio editor controllable via natural language."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9, <=3.13"
@@ -1,2 +0,0 @@
1
- __version__ = "0.3.3"
2
- __author__ = "simon0302010"
@@ -1,90 +0,0 @@
1
- import os
2
- import sys
3
- import base64
4
- from google import genai
5
- from google.genai import types
6
-
7
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
8
-
9
- if not GEMINI_API_KEY:
10
- print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
11
- sys.exit(0)
12
-
13
- class GeminiClient:
14
- def __init__(self):
15
- self.client = genai.Client(
16
- api_key=GEMINI_API_KEY,
17
- )
18
-
19
- def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
20
- parts=[
21
- types.Part.from_text(text=prompt),
22
- ]
23
-
24
- if audio_base64:
25
- parts.append(types.Part.from_bytes(
26
- mime_type="audio/mpeg",
27
- data=base64.b64decode(audio_base64)
28
- ))
29
-
30
- contents = [
31
- types.Content(
32
- role="user",
33
- parts=parts
34
- ),
35
- ]
36
- tools = [
37
- types.Tool(
38
- function_declarations=[
39
- types.FunctionDeclaration(
40
- name="cut_audio",
41
- description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
42
- parameters=genai.types.Schema(
43
- type = genai.types.Type.OBJECT,
44
- required = ["start", "end"],
45
- properties = {
46
- "start": genai.types.Schema(
47
- type = genai.types.Type.ARRAY,
48
- items = genai.types.Schema(
49
- type = genai.types.Type.NUMBER,
50
- ),
51
- ),
52
- "end": genai.types.Schema(
53
- type = genai.types.Type.ARRAY,
54
- items = genai.types.Schema(
55
- type = genai.types.Type.NUMBER,
56
- ),
57
- ),
58
- },
59
- ),
60
- ),
61
- ])
62
- ]
63
- generate_content_config = types.GenerateContentConfig(
64
- tools=tools,
65
- response_mime_type="text/plain",
66
- )
67
-
68
- response = self.client.models.generate_content(
69
- model=model,
70
- contents=contents,
71
- config=generate_content_config,
72
- )
73
-
74
- function_call = None
75
- text_response = None
76
- try:
77
- for candidate in response.candidates:
78
- for part in candidate.content.parts:
79
- if part.function_call:
80
- function_call = part.function_call
81
- if part.text:
82
- text_response = part.text
83
- except TypeError:
84
- return None, None
85
-
86
- return function_call, text_response
87
-
88
- if __name__ == "__main__":
89
- gemini = GeminiClient()
90
- print(gemini.generate("cut from 10 to 20.5"))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes