cutted 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cutted/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.3.3"
1
+ __version__ = "0.4.0"
2
2
  __author__ = "simon0302010"
cutted/app.py CHANGED
@@ -109,45 +109,54 @@ class CuttedApp:
109
109
  def update_plot(self):
110
110
  if self.canvas:
111
111
  self.canvas.get_tk_widget().destroy()
112
-
112
+ if hasattr(self, "slider") and self.slider is not None:
113
+ self.slider.destroy()
114
+
113
115
  fig = self.AudioProcessor.plot_audio()
114
116
  self.ax = fig.axes[0]
115
117
  self.canvas = FigureCanvasTkAgg(fig, master=self.plot_frame)
116
118
  self.canvas.draw()
117
-
118
- self.audio_lenght = int(round(self.AudioProcessor.get_length()))
119
-
119
+
120
+ self.audio_length = float(self.AudioProcessor.get_length())
121
+
120
122
  slider_width = self.root.winfo_width() - 40
123
+ if self.slider_value > self.audio_length:
124
+ self.slider_value = self.audio_length
125
+ if self.slider_value < 0:
126
+ self.slider_value = 0
127
+
121
128
  self.slider = customtkinter.CTkSlider(
122
- self.root, from_=0, to=self.audio_lenght, command=self.set_cursor, width=slider_width
129
+ self.root, from_=0, to=self.audio_length, command=self.set_cursor, width=slider_width
123
130
  )
124
- self.slider.set(0)
125
-
131
+ self.slider.set(self.slider_value)
126
132
  self.slider.place(relx=0.5, rely=1.0, anchor="s", y=-130)
127
-
133
+ self.set_cursor(self.slider_value)
134
+
128
135
  self.canvas.get_tk_widget().pack(
129
136
  fill=customtkinter.BOTH,
130
137
  expand=True,
131
138
  padx=10,
132
139
  pady=10,
133
140
  )
134
-
135
- self.cursor_line = self.ax.axvline(x=0, color="red", linewidth=2)
141
+
142
+ self.cursor_line = self.ax.axvline(x=self.slider_value, color="red", linewidth=2)
136
143
  self.canvas.draw_idle()
137
144
 
138
145
  def set_cursor(self, value):
139
146
  now = time.time()
140
- if now - self.last_slider_update < 0.1: # 100ms
147
+ if now - self.last_slider_update < 0.05: # 100ms
141
148
  return
142
149
  self.last_slider_update = now
143
150
 
144
- self.slider_value = round(value)
151
+ self.slider_value = round(value, 2)
145
152
 
146
153
  if self.cursor_line:
147
154
  self.cursor_line.set_xdata([self.slider_value, self.slider_value])
148
155
  self.canvas.draw_idle()
156
+ self.slider.set(self.slider_value)
157
+ self.set_cursor(self.slider_value)
149
158
 
150
- print(f"Slider Value: {self.slider_value}")
159
+ print_info(f"Slider Value: {self.slider_value}")
151
160
 
152
161
  def play_audio(self):
153
162
  if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
@@ -200,7 +209,7 @@ class CuttedApp:
200
209
 
201
210
  text = self.entry.get()
202
211
  if text.strip():
203
- full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length())}s long. The cursor of the user is currently at {self.slider_value}s."
212
+ full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_length(), 2)}s long. The cursor of the user is currently at {self.slider_value}s."
204
213
  full_prompt += "\nHere is a the waveform samples of the audio. You can use them to determine silent parts, loud parts, silences, beats and much more.\nYou are forced to used these if the user requires you to cut out silent of quiet parts for example."
205
214
  full_prompt += "\nAll of your tools should be enough to fullfill almost every task.\nNEVER ASK FOR CONFIRMATION FROM THE USER. DO EVERYTHING!"
206
215
  full_prompt += f"\n{self.AudioProcessor.get_waveform_summary()}\n"
@@ -228,6 +237,12 @@ class CuttedApp:
228
237
  result = self.AudioProcessor.cut(args["start"], args["end"])
229
238
  if not result:
230
239
  messagebox.showerror("Error", "Please try again.")
240
+ if function_call.name == "change_volume":
241
+ print_info("Change Volume function called")
242
+ args = function_call.args
243
+ result = self.AudioProcessor.change_volume(args["start"], args["end"], args["volume"])
244
+ if not result:
245
+ messagebox.showerror("Error", "Please try again.")
231
246
  self.update_plot()
232
247
  elif text_result:
233
248
  messagebox.showerror("Error", text_result.strip())
@@ -112,6 +112,28 @@ class AudioProcessor:
112
112
  else:
113
113
  return False
114
114
 
115
+ def change_volume(self, start, end, volume):
116
+ if len(start) == len(end) == len(volume):
117
+ time_sets = list(zip(start, end, volume))
118
+ for single_start, single_end, single_volume in time_sets:
119
+ if single_end <= single_start:
120
+ print_fail("End time must be greater than start time.")
121
+ continue
122
+ print_info(f"Changing volume of {single_start} - {single_end} to {str(single_volume)}")
123
+
124
+ start_ms = round(single_start * 1000)
125
+ end_ms = round(single_end * 1000)
126
+ part1 = self.audio[:start_ms]
127
+ part2 = self.audio[start_ms:end_ms]
128
+ part3 = self.audio[end_ms:]
129
+
130
+ part2 = part2.apply_gain(ratio_to_db(single_volume))
131
+
132
+ self.audio = part1 + part2 + part3
133
+ return True
134
+ else:
135
+ return False
136
+
115
137
  def play_audio(self, start_time=0):
116
138
  if self.audio is None:
117
139
  print_fail("No audio loaded.")
cutted/core/gemini.py CHANGED
@@ -15,6 +15,7 @@ class GeminiClient:
15
15
  self.client = genai.Client(
16
16
  api_key=GEMINI_API_KEY,
17
17
  )
18
+ self.contents = []
18
19
 
19
20
  def generate(self, prompt: str, model: str = "gemini-2.0-flash", audio_base64 = None):
20
21
  parts=[
@@ -27,38 +28,76 @@ class GeminiClient:
27
28
  data=base64.b64decode(audio_base64)
28
29
  ))
29
30
 
30
- contents = [
31
+ self.contents.append(
31
32
  types.Content(
32
33
  role="user",
33
34
  parts=parts
34
- ),
35
- ]
35
+ )
36
+ )
36
37
  tools = [
37
38
  types.Tool(
38
39
  function_declarations=[
39
40
  types.FunctionDeclaration(
40
41
  name="cut_audio",
41
- description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
42
+ description=(
43
+ "Remove one or more segments from the audio by specifying start and end times in seconds. "
44
+ "You can cut multiple segments at once by providing lists of start and end values. "
45
+ "Each segment defined by a start and end pair will be removed from the audio."
46
+ ),
47
+ parameters=genai.types.Schema(
48
+ type=genai.types.Type.OBJECT,
49
+ required=["start", "end"],
50
+ properties={
51
+ "start": genai.types.Schema(
52
+ type=genai.types.Type.ARRAY,
53
+ items=genai.types.Schema(
54
+ type=genai.types.Type.NUMBER,
55
+ ),
56
+ ),
57
+ "end": genai.types.Schema(
58
+ type=genai.types.Type.ARRAY,
59
+ items=genai.types.Schema(
60
+ type=genai.types.Type.NUMBER,
61
+ ),
62
+ ),
63
+ },
64
+ ),
65
+ ),
66
+ types.FunctionDeclaration(
67
+ name="change_volume",
68
+ description=(
69
+ "Adjust the volume of specific segments in the audio by specifying lists of start times, end times, "
70
+ "and volume factors. Each segment between a start and end time will have its volume changed by the "
71
+ "corresponding factor (e.g., 0.5 for half volume, 2.0 for double volume). Multiple segments can be "
72
+ "adjusted at once by providing lists of values."
73
+ ),
42
74
  parameters=genai.types.Schema(
43
- type = genai.types.Type.OBJECT,
44
- required = ["start", "end"],
45
- properties = {
75
+ type=genai.types.Type.OBJECT,
76
+ required=["start", "end", "volume"],
77
+ properties={
46
78
  "start": genai.types.Schema(
47
- type = genai.types.Type.ARRAY,
48
- items = genai.types.Schema(
49
- type = genai.types.Type.NUMBER,
79
+ type=genai.types.Type.ARRAY,
80
+ items=genai.types.Schema(
81
+ type=genai.types.Type.NUMBER,
50
82
  ),
51
83
  ),
52
84
  "end": genai.types.Schema(
53
- type = genai.types.Type.ARRAY,
54
- items = genai.types.Schema(
55
- type = genai.types.Type.NUMBER,
85
+ type=genai.types.Type.ARRAY,
86
+ items=genai.types.Schema(
87
+ type=genai.types.Type.NUMBER,
56
88
  ),
89
+ ),
90
+ "volume": genai.types.Schema(
91
+ type=genai.types.Type.ARRAY,
92
+ items=genai.types.Schema(
93
+ type=genai.types.Type.NUMBER,
57
94
  ),
95
+ ),
58
96
  },
59
97
  ),
60
98
  ),
61
- ])
99
+ ]
100
+ )
62
101
  ]
63
102
  generate_content_config = types.GenerateContentConfig(
64
103
  tools=tools,
@@ -67,7 +106,7 @@ class GeminiClient:
67
106
 
68
107
  response = self.client.models.generate_content(
69
108
  model=model,
70
- contents=contents,
109
+ contents=self.contents,
71
110
  config=generate_content_config,
72
111
  )
73
112
 
@@ -81,7 +120,27 @@ class GeminiClient:
81
120
  if part.text:
82
121
  text_response = part.text
83
122
  except TypeError:
84
- return None, None
123
+ pass
124
+
125
+ model_parts = []
126
+ if text_response:
127
+ model_parts.append(
128
+ types.Part.from_text(text=text_response)
129
+ )
130
+ if function_call:
131
+ model_parts.append(
132
+ types.Part.from_function_call(
133
+ name=function_call.name,
134
+ args=function_call.args
135
+ )
136
+ )
137
+
138
+ self.contents.append(
139
+ types.Content(
140
+ role="model",
141
+ parts=model_parts
142
+ )
143
+ )
85
144
 
86
145
  return function_call, text_response
87
146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cutted
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary: AI-powered audio editor controllable via natural language.
5
5
  Author-email: simon0302010 <simon0302010@gmail.com>
6
6
  License-Expression: GPL-3.0
@@ -0,0 +1,12 @@
1
+ cutted/__init__.py,sha256=7eWSPFhMOE6u-f5s_QomBlTjCze86s3xKt-v0bLJAAs,49
2
+ cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
+ cutted/app.py,sha256=LyVv3RHiAemtcEgLiZCcyenAD_QliaFIZ08JfCNwxq4,11543
4
+ cutted/core/audio_processor.py,sha256=OBxNAKs67zqIfnJIsSgaMDa0UbB0R0mccyWcgNs5Bk0,7663
5
+ cutted/core/gemini.py,sha256=0ATm5gHZWmu4k_9SZOsOx5XYY7nEer5LOzt1SMfQlIk,5836
6
+ cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
+ cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
+ cutted-0.4.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
+ cutted-0.4.0.dist-info/METADATA,sha256=wEeg-ive3z6zwUmLvZ_3Lh8W0xhsj12mMgSMASxWdUg,1503
10
+ cutted-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ cutted-0.4.0.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
+ cutted-0.4.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- cutted/__init__.py,sha256=F1mzO6qI2gD6d_DQsjZItLuIfNfa1Te5KwCMThWDQT4,49
2
- cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
3
- cutted/app.py,sha256=HaJ3yu8-WVDGKZmFcTVG4CK8q13eHyUQaIyOWMZWV68,10764
4
- cutted/core/audio_processor.py,sha256=gKBJ1wpdrX2IozmXDzW7MoFVp9uq0Pb3ezz9R3Ahmnw,6691
5
- cutted/core/gemini.py,sha256=yHsQXk4tDHcW5qJBuL2LpPvdChimxlmEbu76BvsoeY4,3108
6
- cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
7
- cutted/core/transcribe.py,sha256=0e7aCva4y6D-gKe1xw5HT9VoFgbvHGgV6utn12r8wXA,986
8
- cutted-0.3.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
9
- cutted-0.3.3.dist-info/METADATA,sha256=ehEIEm5qz3wVqBY74Itpwooe4SYOFpXISXUMZO7LVh4,1503
10
- cutted-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- cutted-0.3.3.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
12
- cutted-0.3.3.dist-info/RECORD,,
File without changes