cutted 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cutted/app.py +30 -26
- cutted/core/audio_processor.py +96 -6
- cutted/core/gemini.py +74 -1
- {cutted-0.1.1.dist-info → cutted-0.1.2.dist-info}/METADATA +3 -3
- cutted-0.1.2.dist-info/RECORD +12 -0
- cutted-0.1.1.dist-info/RECORD +0 -12
- {cutted-0.1.1.dist-info → cutted-0.1.2.dist-info}/WHEEL +0 -0
- {cutted-0.1.1.dist-info → cutted-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {cutted-0.1.1.dist-info → cutted-0.1.2.dist-info}/top_level.txt +0 -0
cutted/app.py
CHANGED
@@ -1,22 +1,20 @@
|
|
1
1
|
import time
|
2
|
-
import threading
|
3
2
|
import customtkinter
|
3
|
+
import tkinter.messagebox as messagebox
|
4
|
+
from .core import gemini
|
4
5
|
from .core.logger import *
|
5
6
|
from .core import audio_processor
|
6
7
|
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
7
8
|
|
8
|
-
import simpleaudio as sa # Add this import
|
9
|
-
|
10
9
|
customtkinter.set_appearance_mode("Dark")
|
11
10
|
|
12
11
|
class CuttedApp:
|
13
12
|
def __init__(self):
|
14
13
|
self.AudioProcessor = audio_processor.AudioProcessor()
|
14
|
+
self.gemini = gemini.GeminiClient()
|
15
15
|
self.canvas = None
|
16
16
|
self.cursor_line = None
|
17
17
|
self.last_slider_update = 0
|
18
|
-
self.play_obj = None
|
19
|
-
self.play_thread = None
|
20
18
|
self.is_playing = False
|
21
19
|
self.setup_ui()
|
22
20
|
|
@@ -120,33 +118,39 @@ class CuttedApp:
|
|
120
118
|
print_fail("No audio loaded.")
|
121
119
|
return
|
122
120
|
|
123
|
-
self.
|
124
|
-
|
125
|
-
start_ms = int(self.slider.get() * 1000)
|
126
|
-
audio = self.AudioProcessor.audio[start_ms:]
|
127
|
-
raw_data = audio.raw_data
|
128
|
-
num_channels = audio.channels
|
129
|
-
bytes_per_sample = audio.sample_width
|
130
|
-
sample_rate = audio.frame_rate
|
131
|
-
|
132
|
-
def playback():
|
133
|
-
self.is_playing = True
|
134
|
-
self.play_obj = sa.play_buffer(raw_data, num_channels, bytes_per_sample, sample_rate)
|
135
|
-
self.play_obj.wait_done()
|
136
|
-
self.is_playing = False
|
137
|
-
|
138
|
-
self.play_thread = threading.Thread(target=playback, daemon=True)
|
139
|
-
self.play_thread.start()
|
121
|
+
start_time = self.slider.get() if hasattr(self, 'slider') else 0
|
122
|
+
self.AudioProcessor.play_audio(start_time)
|
140
123
|
|
141
124
|
def stop_audio(self):
|
142
|
-
|
143
|
-
|
144
|
-
|
125
|
+
self.AudioProcessor.stop_audio()
|
126
|
+
self.is_playing = False
|
127
|
+
self.play_button.configure(text="Play")
|
145
128
|
|
146
129
|
def send_prompt(self):
|
130
|
+
if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
|
131
|
+
print_fail("No audio loaded.")
|
132
|
+
return
|
133
|
+
|
147
134
|
text = self.entry.get()
|
148
|
-
|
135
|
+
full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_lenght())}s long."
|
136
|
+
full_prompt += f"\n\nUser Prompt: {text}"
|
149
137
|
self.entry.delete(0, "end")
|
138
|
+
|
139
|
+
function_call, text_result = self.gemini.generate(full_prompt)
|
140
|
+
|
141
|
+
if function_call:
|
142
|
+
print_info(f"Gemini called {function_call.name}")
|
143
|
+
if function_call.name == "cut_audio":
|
144
|
+
print_info("Cut function called")
|
145
|
+
args = function_call.args
|
146
|
+
result = self.AudioProcessor.cut(args["start"], args["end"])
|
147
|
+
if not result:
|
148
|
+
messagebox.showerror("Error", "Please try again.")
|
149
|
+
self.update_plot()
|
150
|
+
elif text_result:
|
151
|
+
messagebox.showerror("Error", text_result.strip())
|
152
|
+
else:
|
153
|
+
print_fail("Gemini returned no data")
|
150
154
|
|
151
155
|
def run(self):
|
152
156
|
self.root.mainloop()
|
cutted/core/audio_processor.py
CHANGED
@@ -3,10 +3,26 @@ from pydub.utils import ratio_to_db
|
|
3
3
|
from .logger import *
|
4
4
|
import numpy as np
|
5
5
|
from matplotlib.figure import Figure
|
6
|
+
import pygame
|
7
|
+
import io
|
8
|
+
import threading
|
9
|
+
import time
|
6
10
|
|
7
11
|
class AudioProcessor:
|
8
12
|
def __init__(self):
|
9
13
|
self.audio_path = None
|
14
|
+
self.audio = None
|
15
|
+
self.is_playing_var = False
|
16
|
+
self.play_thread = None
|
17
|
+
self._init_pygame()
|
18
|
+
|
19
|
+
def _init_pygame(self):
|
20
|
+
try:
|
21
|
+
pygame.mixer.pre_init(frequency=44100, size=-16, channels=2, buffer=1024)
|
22
|
+
pygame.mixer.init()
|
23
|
+
print_success("Pygame initialized")
|
24
|
+
except pygame.error as e:
|
25
|
+
print_warn(f"Pygame initialization warning: {e}")
|
10
26
|
|
11
27
|
def load_audio(self, audio_path: str, volume: float = 1.0):
|
12
28
|
self.audio_path = audio_path
|
@@ -27,7 +43,6 @@ class AudioProcessor:
|
|
27
43
|
samples = samples / np.max(np.abs(samples))
|
28
44
|
|
29
45
|
times = np.linspace(0, len(samples) / self.audio.frame_rate, num=len(samples))
|
30
|
-
print(times)
|
31
46
|
|
32
47
|
fig = Figure(figsize=(5, 4), facecolor="#242424")
|
33
48
|
ax = fig.add_subplot()
|
@@ -54,10 +69,85 @@ class AudioProcessor:
|
|
54
69
|
return self.duration
|
55
70
|
|
56
71
|
def cut(self, start, end):
|
57
|
-
if
|
58
|
-
|
72
|
+
if len(start) == len(end):
|
73
|
+
if len(start) == 1:
|
74
|
+
print_info(f"Cutting from {start[0]} to {end[0]}")
|
75
|
+
start_ms = round(start[0] * 1000)
|
76
|
+
end_ms = round(end[0] * 1000)
|
77
|
+
self.audio = self.audio[:start_ms] + self.audio[end_ms:]
|
78
|
+
return True
|
79
|
+
else:
|
80
|
+
time_sets = list(zip(start, end))
|
81
|
+
subtract_time = 0
|
82
|
+
for single_start, single_end in time_sets:
|
83
|
+
single_start = single_start - subtract_time
|
84
|
+
single_end = single_end - subtract_time
|
85
|
+
print_info(f"Cutting from {single_start} to {single_end}")
|
86
|
+
start_ms = round(single_start * 1000)
|
87
|
+
end_ms = round(single_end * 1000)
|
88
|
+
self.audio = self.audio[:start_ms] + self.audio[end_ms:]
|
89
|
+
subtract_time += single_end - single_start
|
90
|
+
return True
|
91
|
+
else:
|
92
|
+
return False
|
93
|
+
|
94
|
+
def play_audio(self, start_time=0):
|
95
|
+
if self.audio is None:
|
96
|
+
print_fail("No audio loaded.")
|
97
|
+
return False
|
98
|
+
|
99
|
+
try:
|
100
|
+
self.stop_audio()
|
101
|
+
|
102
|
+
start_ms = int(start_time * 1000)
|
103
|
+
audio_segment = self.audio[start_ms:]
|
104
|
+
|
105
|
+
audio_segment = audio_segment.set_frame_rate(22050)
|
106
|
+
audio_segment = audio_segment.set_channels(2)
|
107
|
+
audio_segment = audio_segment.set_sample_width(2)
|
108
|
+
|
109
|
+
audio_data = io.BytesIO()
|
110
|
+
audio_segment.export(audio_data, format="wav")
|
111
|
+
audio_data.seek(0)
|
112
|
+
|
113
|
+
pygame.mixer.music.load(audio_data)
|
114
|
+
pygame.mixer.music.play()
|
115
|
+
self.is_playing_var = True
|
116
|
+
|
117
|
+
print_success(f"Playing audio from {start_time}s")
|
118
|
+
return True
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
print_fail(f"Error playing audio: {e}")
|
122
|
+
return False
|
123
|
+
|
124
|
+
def stop_audio(self):
|
125
|
+
try:
|
126
|
+
if pygame.mixer.get_init():
|
127
|
+
pygame.mixer.music.stop()
|
128
|
+
self.is_playing_var = False
|
129
|
+
print_info("Audio playback stopped")
|
130
|
+
except Exception as e:
|
131
|
+
print_warn(f"Error stopping audio: {e}")
|
132
|
+
|
133
|
+
def is_playing(self):
|
134
|
+
try:
|
135
|
+
if pygame.mixer.get_init():
|
136
|
+
return pygame.mixer.music.get_busy()
|
137
|
+
return False
|
138
|
+
except:
|
139
|
+
return False
|
140
|
+
|
141
|
+
def get_audio_info(self):
|
142
|
+
if self.audio is None:
|
143
|
+
return None
|
59
144
|
|
60
|
-
|
61
|
-
|
145
|
+
return {
|
146
|
+
"duration": self.get_lenght(),
|
147
|
+
"channels": self.audio.channels,
|
148
|
+
"frame_rate": self.audio.frame_rate,
|
149
|
+
"sample_width": self.audio.sample_width
|
150
|
+
}
|
62
151
|
|
63
|
-
|
152
|
+
def export_audio(self, path, format: str = "mp3"):
|
153
|
+
self.audio.export(path, format=format)
|
cutted/core/gemini.py
CHANGED
@@ -1,8 +1,81 @@
|
|
1
1
|
import os
|
2
|
+
import sys
|
2
3
|
from google import genai
|
3
4
|
from google.genai import types
|
4
5
|
|
5
6
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
6
7
|
|
7
8
|
if not GEMINI_API_KEY:
|
8
|
-
print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
|
9
|
+
print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
|
10
|
+
sys.exit(0)
|
11
|
+
|
12
|
+
class GeminiClient:
|
13
|
+
def __init__(self):
|
14
|
+
self.client = genai.Client(
|
15
|
+
api_key=GEMINI_API_KEY,
|
16
|
+
)
|
17
|
+
|
18
|
+
def generate(self, prompt: str, model: str = "gemini-2.0-flash"):
|
19
|
+
contents = [
|
20
|
+
types.Content(
|
21
|
+
role="user",
|
22
|
+
parts=[
|
23
|
+
types.Part.from_text(text=prompt),
|
24
|
+
],
|
25
|
+
),
|
26
|
+
]
|
27
|
+
tools = [
|
28
|
+
types.Tool(
|
29
|
+
function_declarations=[
|
30
|
+
types.FunctionDeclaration(
|
31
|
+
name="cut_audio",
|
32
|
+
description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
|
33
|
+
parameters=genai.types.Schema(
|
34
|
+
type = genai.types.Type.OBJECT,
|
35
|
+
required = ["start", "end"],
|
36
|
+
properties = {
|
37
|
+
"start": genai.types.Schema(
|
38
|
+
type = genai.types.Type.ARRAY,
|
39
|
+
items = genai.types.Schema(
|
40
|
+
type = genai.types.Type.NUMBER,
|
41
|
+
),
|
42
|
+
),
|
43
|
+
"end": genai.types.Schema(
|
44
|
+
type = genai.types.Type.ARRAY,
|
45
|
+
items = genai.types.Schema(
|
46
|
+
type = genai.types.Type.NUMBER,
|
47
|
+
),
|
48
|
+
),
|
49
|
+
},
|
50
|
+
),
|
51
|
+
),
|
52
|
+
])
|
53
|
+
]
|
54
|
+
generate_content_config = types.GenerateContentConfig(
|
55
|
+
tools=tools,
|
56
|
+
response_mime_type="text/plain",
|
57
|
+
)
|
58
|
+
|
59
|
+
response = self.client.models.generate_content(
|
60
|
+
model=model,
|
61
|
+
contents=contents,
|
62
|
+
config=generate_content_config,
|
63
|
+
)
|
64
|
+
|
65
|
+
function_call = None
|
66
|
+
text_response = None
|
67
|
+
try:
|
68
|
+
for candidate in response.candidates:
|
69
|
+
for part in candidate.content.parts:
|
70
|
+
if part.function_call:
|
71
|
+
function_call = part.function_call
|
72
|
+
if part.text:
|
73
|
+
text_response = part.text
|
74
|
+
except TypeError:
|
75
|
+
return None, None
|
76
|
+
|
77
|
+
return function_call, text_response
|
78
|
+
|
79
|
+
if __name__ == "__main__":
|
80
|
+
gemini = GeminiClient()
|
81
|
+
print(gemini.generate("cut from 10 to 20.5"))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cutted
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
4
|
Summary: AI-powered audio editor controllable via natural language.
|
5
5
|
Author-email: simon0302010 <simon0302010@gmail.com>
|
6
6
|
License-Expression: GPL-3.0
|
@@ -12,9 +12,9 @@ Requires-Dist: customtkinter
|
|
12
12
|
Requires-Dist: matplotlib
|
13
13
|
Requires-Dist: numpy
|
14
14
|
Requires-Dist: pydub
|
15
|
-
Requires-Dist:
|
15
|
+
Requires-Dist: pygame
|
16
16
|
Requires-Dist: google-genai
|
17
|
-
Requires-Dist: dotenv
|
17
|
+
Requires-Dist: python-dotenv
|
18
18
|
Dynamic: license-file
|
19
19
|
|
20
20
|
# Cutted
|
@@ -0,0 +1,12 @@
|
|
1
|
+
cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
|
2
|
+
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
+
cutted/app.py,sha256=xGnPZXkHfPOID91joER5Z6hTbsnjlMj_V_JlDvDOqpM,5849
|
4
|
+
cutted/core/audio_processor.py,sha256=7-XCuPPTlozeuaD2LqyzwRGinu0NvowTLbAh2X4XJ98,5182
|
5
|
+
cutted/core/gemini.py,sha256=Ts_EbC1-rO9jIsdSlzKcmjLVS1o663GmfTdzmix12kE,2872
|
6
|
+
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
+
cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
|
8
|
+
cutted-0.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
+
cutted-0.1.2.dist-info/METADATA,sha256=dhNLoSMQ1aov1tLCxJBWMibPoUSpGhxN8TtIOzAUUpQ,1122
|
10
|
+
cutted-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
+
cutted-0.1.2.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
+
cutted-0.1.2.dist-info/RECORD,,
|
cutted-0.1.1.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
|
2
|
-
cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
|
3
|
-
cutted/app.py,sha256=aTkYl-wTX6NqiGNuMXa6MvztK9D8m7H6Q39HHy5RB7w,5322
|
4
|
-
cutted/core/audio_processor.py,sha256=-RXq9iCOhr0Eq6odl9piZqrwVHh0RX5LB9A-Ip4xyOY,2010
|
5
|
-
cutted/core/gemini.py,sha256=BF9TMkPLzzR-cDPOFMGPnVzaZIzY00GpEIQkvBkCL9E,223
|
6
|
-
cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
|
7
|
-
cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
|
8
|
-
cutted-0.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
9
|
-
cutted-0.1.1.dist-info/METADATA,sha256=z5O4yFd7SYWaVrYZ4WkBpB8bXmkwC3sStARpIpiMGOs,1120
|
10
|
-
cutted-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
-
cutted-0.1.1.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
|
12
|
-
cutted-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|