PyPI - cutted - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

cutted 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

cutted/app.py +56 -26
cutted/core/audio_processor.py +96 -6
cutted/core/gemini.py +74 -1
{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/METADATA +3 -3
cutted-0.2.0.dist-info/RECORD +12 -0
cutted-0.1.1.dist-info/RECORD +0 -12
{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/WHEEL +0 -0
{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/licenses/LICENSE +0 -0
{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/top_level.txt +0 -0

cutted/app.py CHANGED Viewed

@@ -1,22 +1,20 @@
 import time
-import threading
 import customtkinter
+import tkinter.messagebox as messagebox
+from .core import gemini
 from .core.logger import *
 from .core import audio_processor
 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
-import simpleaudio as sa  # Add this import
 customtkinter.set_appearance_mode("Dark")
 class CuttedApp:
     def __init__(self):
         self.AudioProcessor = audio_processor.AudioProcessor()
+        self.gemini = gemini.GeminiClient()
         self.canvas = None
         self.cursor_line = None
         self.last_slider_update = 0
-        self.play_obj = None
-        self.play_thread = None
         self.is_playing = False
         self.setup_ui()
@@ -39,6 +37,9 @@ class CuttedApp:
         button = customtkinter.CTkButton(self.root, text="Load audio", command=self.select_file)
         button.place(relx=0.5, rely=1.0, anchor="s", y=-30)
+        export_button = customtkinter.CTkButton(self.root, text="Export", command=self.export_audio, width=70)
+        export_button.place(relx=0.9, rely=1.0, anchor="s", y=-30)
         self.play_button = customtkinter.CTkButton(self.root, text="Play", command=self.play_audio, width=50)
         self.play_button.place(relx=0.3, rely=1.0, anchor="s", y=-30)
@@ -120,33 +121,62 @@ class CuttedApp:
             print_fail("No audio loaded.")
             return
-        self.stop_audio()
-        start_ms = int(self.slider.get() * 1000)
-        audio = self.AudioProcessor.audio[start_ms:]
-        raw_data = audio.raw_data
-        num_channels = audio.channels
-        bytes_per_sample = audio.sample_width
-        sample_rate = audio.frame_rate
-        def playback():
-            self.is_playing = True
-            self.play_obj = sa.play_buffer(raw_data, num_channels, bytes_per_sample, sample_rate)
-            self.play_obj.wait_done()
-            self.is_playing = False
-        self.play_thread = threading.Thread(target=playback, daemon=True)
-        self.play_thread.start()
+        start_time = self.slider.get() if hasattr(self, 'slider') else 0
+        self.AudioProcessor.play_audio(start_time)
     def stop_audio(self):
-        if self.play_obj is not None and self.is_playing:
-            self.play_obj.stop()
-            self.is_playing = False
+        self.AudioProcessor.stop_audio()
+        self.is_playing = False
+    def export_audio(self):
+        if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
+            print_fail("No audio loaded.")
+            return
+        save_path = customtkinter.filedialog.asksaveasfilename(
+            defaultextension=".mp3",
+            filetypes=[
+                ("MP3 files", "*.mp3"),
+                ("WAV files", "*.wav"),
+            ]
+        )
+        if save_path:
+            if save_path.lower().endswith(".wav"):
+                format = "wav"
+            elif save_path.lower().endswith(".mp3"):
+                format = "mp3"
+            else:
+                format = "mp3"
+            self.AudioProcessor.export_audio(save_path, format)
+            print_success(f"Audio exported to {save_path}")
     def send_prompt(self):
+        if not hasattr(self.AudioProcessor, "audio") or self.AudioProcessor.audio is None:
+            print_fail("No audio loaded.")
+            return
         text = self.entry.get()
-        print(f"Prompt: {text}")
+        full_prompt = f"You are a audio editing AI. You are controllable via natural language and editing a audio file. The audio file is {round(self.AudioProcessor.get_lenght())}s long."
+        full_prompt += f"\n\nUser Prompt: {text}"
         self.entry.delete(0, "end")
+        function_call, text_result = self.gemini.generate(full_prompt)
+        if function_call:
+            print_info(f"Gemini called {function_call.name}")
+            if function_call.name == "cut_audio":
+                print_info("Cut function called")
+                args = function_call.args
+                result = self.AudioProcessor.cut(args["start"], args["end"])
+                if not result:
+                    messagebox.showerror("Error", "Please try again.")
+            self.update_plot()
+        elif text_result:
+            messagebox.showerror("Error", text_result.strip())
+        else:
+            print_fail("Gemini returned no data")
     def run(self):
         self.root.mainloop()

cutted/core/audio_processor.py CHANGED Viewed

@@ -3,10 +3,26 @@ from pydub.utils import ratio_to_db
 from .logger import *
 import numpy as np
 from matplotlib.figure import Figure
+import pygame
+import io
+import threading
+import time
 class AudioProcessor:
     def __init__(self):
         self.audio_path = None
+        self.audio = None
+        self.is_playing_var = False
+        self.play_thread = None
+        self._init_pygame()
+    def _init_pygame(self):
+        try:
+            pygame.mixer.pre_init(frequency=44100, size=-16, channels=2, buffer=1024)
+            pygame.mixer.init()
+            print_success("Pygame initialized")
+        except pygame.error as e:
+            print_warn(f"Pygame initialization warning: {e}")
     def load_audio(self, audio_path: str, volume: float = 1.0):
         self.audio_path = audio_path
@@ -27,7 +43,6 @@ class AudioProcessor:
         samples = samples / np.max(np.abs(samples))
         times = np.linspace(0, len(samples) / self.audio.frame_rate, num=len(samples))
-        print(times)
         fig = Figure(figsize=(5, 4), facecolor="#242424")
         ax = fig.add_subplot()
@@ -54,10 +69,85 @@ class AudioProcessor:
         return self.duration
     def cut(self, start, end):
-        if type(start) == list and type(end) == list:
-            print("Cutting multiple segments")
+        if len(start) == len(end):
+            if len(start) == 1:
+                print_info(f"Cutting from {start[0]} to {end[0]}")
+                start_ms = round(start[0] * 1000)
+                end_ms = round(end[0] * 1000)
+                self.audio = self.audio[:start_ms] + self.audio[end_ms:]
+                return True
+            else:
+                time_sets = list(zip(start, end))
+                subtract_time = 0
+                for single_start, single_end in time_sets:
+                    single_start = single_start - subtract_time
+                    single_end = single_end - subtract_time
+                    print_info(f"Cutting from {single_start} to {single_end}")
+                    start_ms = round(single_start * 1000)
+                    end_ms = round(single_end * 1000)
+                    self.audio = self.audio[:start_ms] + self.audio[end_ms:]
+                    subtract_time += single_end - single_start
+                return True
+        else:
+            return False
+    def play_audio(self, start_time=0):
+        if self.audio is None:
+            print_fail("No audio loaded.")
+            return False
+        try:
+            self.stop_audio()
+            start_ms = int(start_time * 1000)
+            audio_segment = self.audio[start_ms:]
+            audio_segment = audio_segment.set_frame_rate(22050)
+            audio_segment = audio_segment.set_channels(2)
+            audio_segment = audio_segment.set_sample_width(2)
+            audio_data = io.BytesIO()
+            audio_segment.export(audio_data, format="wav")
+            audio_data.seek(0)
+            pygame.mixer.music.load(audio_data)
+            pygame.mixer.music.play()
+            self.is_playing_var = True
+            print_success(f"Playing audio from {start_time}s")
+            return True
+        except Exception as e:
+            print_fail(f"Error playing audio: {e}")
+            return False
+    def stop_audio(self):
+        try:
+            if pygame.mixer.get_init():
+                pygame.mixer.music.stop()
+                self.is_playing_var = False
+                print_info("Audio playback stopped")
+        except Exception as e:
+            print_warn(f"Error stopping audio: {e}")
+    def is_playing(self):
+        try:
+            if pygame.mixer.get_init():
+                return pygame.mixer.music.get_busy()
+            return False
+        except:
+            return False
+    def get_audio_info(self):
+        if self.audio is None:
+            return None
-        start_ms = round(start * 1000)
-        end_ms = round(end * 1000)
+        return {
+            "duration": self.get_lenght(),
+            "channels": self.audio.channels,
+            "frame_rate": self.audio.frame_rate,
+            "sample_width": self.audio.sample_width
+        }
-        self.audio = self.audio[:start_ms] + self.audio[end_ms:]
+    def export_audio(self, path, format: str = "mp3"):
+        self.audio.export(path, format=format)

cutted/core/gemini.py CHANGED Viewed

@@ -1,8 +1,81 @@
 import os
+import sys
 from google import genai
 from google.genai import types
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
-    print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
+    print("Please set the environment variable GEMINI_API_KEY to your Gemini API Key.")
+    sys.exit(0)
+class GeminiClient:
+    def __init__(self):
+        self.client = genai.Client(
+            api_key=GEMINI_API_KEY,
+        )
+    def generate(self, prompt: str, model: str = "gemini-2.0-flash"):
+        contents = [
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_text(text=prompt),
+                ],
+            ),
+        ]
+        tools = [
+            types.Tool(
+                function_declarations=[
+                    types.FunctionDeclaration(
+                        name="cut_audio",
+                        description="Cuts specified parts out of audio. Multiple parts can be cut if a list of both start and end values is used as property.",
+                        parameters=genai.types.Schema(
+                            type = genai.types.Type.OBJECT,
+                            required = ["start", "end"],
+                            properties = {
+                                "start": genai.types.Schema(
+                                    type = genai.types.Type.ARRAY,
+                                    items = genai.types.Schema(
+                                        type = genai.types.Type.NUMBER,
+                                    ),
+                                ),
+                                "end": genai.types.Schema(
+                                    type = genai.types.Type.ARRAY,
+                                    items = genai.types.Schema(
+                                        type = genai.types.Type.NUMBER,
+                                    ),
+                                    ),
+                            },
+                        ),
+                    ),
+                ])
+        ]
+        generate_content_config = types.GenerateContentConfig(
+            tools=tools,
+            response_mime_type="text/plain",
+        )
+        response = self.client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config,
+        )
+        function_call = None
+        text_response = None
+        try:
+            for candidate in response.candidates:
+                for part in candidate.content.parts:
+                    if part.function_call:
+                        function_call = part.function_call
+                    if part.text:
+                        text_response = part.text
+        except TypeError:
+            return None, None
+        return function_call, text_response
+if __name__ == "__main__":
+    gemini = GeminiClient()
+    print(gemini.generate("cut from 10 to 20.5"))

{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cutted
-Version: 0.1.1
+Version: 0.2.0
 Summary: AI-powered audio editor controllable via natural language.
 Author-email: simon0302010 <simon0302010@gmail.com>
 License-Expression: GPL-3.0
@@ -12,9 +12,9 @@ Requires-Dist: customtkinter
 Requires-Dist: matplotlib
 Requires-Dist: numpy
 Requires-Dist: pydub
-Requires-Dist: simpleaudio
+Requires-Dist: pygame
 Requires-Dist: google-genai
-Requires-Dist: dotenv
+Requires-Dist: python-dotenv
 Dynamic: license-file
 # Cutted

cutted-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
+cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
+cutted/app.py,sha256=qZ6idEqEmdtCLOahBN4803u-t12oNRhTAgbN3XiIiKY,6803
+cutted/core/audio_processor.py,sha256=7-XCuPPTlozeuaD2LqyzwRGinu0NvowTLbAh2X4XJ98,5182
+cutted/core/gemini.py,sha256=Ts_EbC1-rO9jIsdSlzKcmjLVS1o663GmfTdzmix12kE,2872
+cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
+cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
+cutted-0.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+cutted-0.2.0.dist-info/METADATA,sha256=4hl_l2XcGg74HKYMcbAlklxduA8nw066deSAVBBuvOs,1122
+cutted-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cutted-0.2.0.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
+cutted-0.2.0.dist-info/RECORD,,

cutted-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-cutted/__init__.py,sha256=CP0x3JIScNbFVSOoF3eIQTKD5gDRfWXcCFE46rlZCio,49
-cutted/__main__.py,sha256=lYGLgtIZ_vGZIJmWG6ZQoqOdyOJnaWEA4NBn5Rc7Q8E,61
-cutted/app.py,sha256=aTkYl-wTX6NqiGNuMXa6MvztK9D8m7H6Q39HHy5RB7w,5322
-cutted/core/audio_processor.py,sha256=-RXq9iCOhr0Eq6odl9piZqrwVHh0RX5LB9A-Ip4xyOY,2010
-cutted/core/gemini.py,sha256=BF9TMkPLzzR-cDPOFMGPnVzaZIzY00GpEIQkvBkCL9E,223
-cutted/core/logger.py,sha256=AjqrgW2LV9HdPkPQ8oOmyd9lWzVSIg46r74ILR7mVHo,585
-cutted/core/transcribe.py,sha256=cm6ziM3_grXKpUCFHiAU7-6lFK_SVsf7-6n14vMYQng,992
-cutted-0.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-cutted-0.1.1.dist-info/METADATA,sha256=z5O4yFd7SYWaVrYZ4WkBpB8bXmkwC3sStARpIpiMGOs,1120
-cutted-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cutted-0.1.1.dist-info/top_level.txt,sha256=PL6glZvzRyKWCDn5aoYI9uH8HlEA5Qd_XFJowJKARYI,7
-cutted-0.1.1.dist-info/RECORD,,

{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cutted-0.1.1.dist-info → cutted-0.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

cutted 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

cutted 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl