PyPI - pygpt-net - Versions diffs - 2.4.49__py3-none-any.whl → 2.4.51__py3-none-any.whl - Mend

pygpt-net 2.4.49py3-none-any.whl → 2.4.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

CHANGELOG.md +10 -0
README.md +12 -62
pygpt_net/CHANGELOG.txt +10 -0
pygpt_net/__init__.py +3 -3
pygpt_net/controller/access/voice.py +19 -36
pygpt_net/controller/audio/__init__.py +15 -1
pygpt_net/controller/lang/custom.py +2 -1
pygpt_net/controller/ui/tabs.py +7 -1
pygpt_net/core/audio/__init__.py +10 -17
pygpt_net/core/audio/capture.py +349 -0
pygpt_net/data/config/config.json +5 -3
pygpt_net/data/config/models.json +3 -3
pygpt_net/data/config/modes.json +3 -3
pygpt_net/data/config/settings.json +13 -0
pygpt_net/data/locale/locale.de.ini +3 -0
pygpt_net/data/locale/locale.en.ini +3 -0
pygpt_net/data/locale/locale.es.ini +3 -0
pygpt_net/data/locale/locale.fr.ini +3 -0
pygpt_net/data/locale/locale.it.ini +3 -0
pygpt_net/data/locale/locale.pl.ini +3 -0
pygpt_net/data/locale/locale.uk.ini +3 -0
pygpt_net/data/locale/locale.zh.ini +3 -0
pygpt_net/plugin/audio_input/simple.py +45 -55
pygpt_net/provider/core/config/patch.py +9 -1
pygpt_net/ui/layout/chat/input.py +0 -12
pygpt_net/ui/menu/__init__.py +4 -3
pygpt_net/ui/widget/audio/input_button.py +84 -24
pygpt_net/ui/widget/dialog/snap.py +2 -2
pygpt_net/ui/widget/dialog/update.py +3 -2
{pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/METADATA +13 -63
{pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/RECORD +34 -33
{pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/LICENSE +0 -0
{pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/WHEEL +0 -0
{pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/entry_points.txt +0 -0

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # CHANGELOG
+## 2.4.51 (2025-01-17)
+- Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
+- A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
+## 2.4.50 (2025-01-16)
+- Refactored audio input core.
+- Added audio input volume progress bar.
 ## 2.4.49 (2025-01-16)
 - Fix: stream render in Assistants mode.

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
-Release: **2.4.49** | build: **2025.01.16** | Python: **>=3.10, <3.13**
+Release: **2.4.51** | build: **2025.01.17** | Python: **>=3.10, <3.13**
 > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
 >
@@ -118,6 +118,7 @@ sudo snap connect pygpt:camera
 ```commandline
 sudo snap connect pygpt:audio-record :audio-record
+sudo snap connect pygpt:alsa
 ```
 **Connecting IPython in Docker in Snap version**:
@@ -3952,6 +3953,16 @@ may consume additional tokens that are not displayed in the main window.
 ## Recent changes:
+**2.4.51 (2025-01-17)**
+- Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
+- A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
+**2.4.50 (2025-01-16)**
+- Refactored audio input core.
+- Added audio input volume progress bar.
 **2.4.49 (2025-01-16)**
 - Fix: stream render in Assistants mode.
@@ -3977,67 +3988,6 @@ may consume additional tokens that are not displayed in the main window.
 - Introduced a new mode in "Chat with Files": "Retrieve Only", which allows for retrieving raw documents from the index.
 - Fixed a bug related to tool calls in the Gemini provider when using Chat with Files mode.
-**2.4.45 (2024-12-16)**
-- Enhanced web data loaders UI.
-**2.4.44 (2024-12-16)**
-- Enhanced web data loaders.
-- Web loaders have been added to attachments, allowing external web content to be attached to context via the "+Web" button in the Attachments tab.
-- Improved handling of attachments in groups and added an attachment icon when a group contains attachments.
-**2.4.43 (2024-12-15)**
-- Fix: Bug on attachment upload.
-- Added: Attachments uploaded in groups are now available for all contexts in the group (beta).
-**2.4.42 (2024-12-15)**
-- Added Mailer plugin, which allows sending and retrieving emails from the server, and reading them. It currently supports only SMTP.
-- Added 'web_request' command to the Web Search plugin, enabling GET/POST/PUT and other connections to any address and API endpoint. It also supports sending POST data, files, headers, cookies, and more.
-- Improved audio output.
-- Enhanced visibility of the Video menu.
-- Other fixes.
-**2.4.41 (2024-12-14)**
-- Improved switching between columns on a split screen.
-- Added visual identification of the active column.
-**2.4.40 (2024-12-13)**
-- Enhanced Split Screen mode, now promoted from beta to stable.
-- Python Code Interpreter tool added to the Tabs.
-- HTML/JS Canvas tool added to the Tabs.
-- Added attachment icon to the context list if context has attachments.
-- Improved audio playback.
-- Improved web search.
-- Added a thumbnail image to web search results.
-- Added a new commands to web search: "extract_images" and "extract_links".
-- Added the option "Use raw content (without summarization)" to the web search plugin, which provides a more detailed result to the main model.
-- Extended the default maximum result characters to 50,000 in the web search plugin.
-**2.4.39 (2024-12-09)**
-- Added "Split Screen" mode (accessible via the switch in the bottom-right corner of the screen), which allows you to work in two windows simultaneously. It is currently experimental (beta). Future updates will include Code Interpreter and Canvas running in tabs.
-- Fixed: Language switch.
-**2.4.38 (2024-12-08)**
-- Added the ability to select a style for chat display between: Blocks, ChatGPT-like, and ChatGPT-like Wide. New option in the menu: Config -> Theme -> Style...
-- Added configuration options for audio input in Settings -> Audio -> Audio Input Device, Channels, and Sampling rate.
-**2.4.37 (2024-11-30)**
-- The `Query only` mode in `Uploaded` tab has been renamed to `RAG`.
-- New options have been added under `Settings -> Files and Attachments`:
-  - `Use history in RAG query`: When enabled, the content of the entire conversation will be used when preparing a query if the mode is set to RAG or Summary.
-  - `RAG limit`: This option is applicable only if 'Use history in RAG query' is enabled. It specifies the limit on how many recent entries in the conversation will be used when generating a query for RAG. A value of 0 indicates no limit.
-- Cache: dynamic parts of the system prompt (from plugins) have been moved to the very end of the prompt stack to enable the use of prompt cache mechanisms in OpenAI.
 # Credits and links
 **Official website:** <https://pygpt.net>

pygpt_net/CHANGELOG.txt CHANGED Viewed

@@ -1,3 +1,13 @@
+2.4.51 (2025-01-17)
+- Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
+- A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
+2.4.50 (2025-01-16)
+- Refactored audio input core.
+- Added audio input volume progress bar.
 2.4.49 (2025-01-16)
 - Fix: stream render in Assistants mode.

pygpt_net/__init__.py CHANGED Viewed

@@ -6,15 +6,15 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.01.16 01:00:00                  #
+# Updated Date: 2025.01.17 03:00:00                  #
 # ================================================== #
 __author__ = "Marcin Szczygliński"
 __copyright__ = "Copyright 2025, Marcin Szczygliński"
 __credits__ = ["Marcin Szczygliński"]
 __license__ = "MIT"
-__version__ = "2.4.49"
-__build__ = "2025.01.16"
+__version__ = "2.4.51"
+__build__ = "2025.01.17"
 __maintainer__ = "Marcin Szczygliński"
 __github__ = "https://github.com/szczyglis-dev/py-gpt"
 __report__ = "https://github.com/szczyglis-dev/py-gpt/issues"

pygpt_net/controller/access/voice.py CHANGED Viewed

@@ -6,13 +6,11 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.14 08:00:00                  #
+# Updated Date: 2025.01.16 17:00:00                  #
 # ================================================== #
 from typing import Optional, List, Dict, Any
-import pyaudio
-import wave
 import os
 from PySide6.QtCore import QTimer, Slot
@@ -38,9 +36,6 @@ class Voice:
         """
         self.window = window
         self.is_recording = False
-        self.frames = []
-        self.p = None
-        self.stream = None
         self.timer = None
         self.input_file = "voice_control.wav"
         self.thread_started = False
@@ -218,15 +213,6 @@ class Voice:
     def start_recording(self):
         """Start recording"""
-        self.frames = []  # clear audio frames
-        def callback(in_data, frame_count, time_info, status):
-            self.frames.append(in_data)
-            if self.is_recording:
-                return (in_data, pyaudio.paContinue)
-            else:
-                return (in_data, pyaudio.paComplete)
         try:
             self.is_recording = True
             self.switch_btn_stop()
@@ -235,23 +221,26 @@ class Voice:
             if self.window.controller.audio.is_playing():
                 self.window.controller.audio.stop_output()
+            # set audio volume bar
+            self.window.core.audio.capture.set_bar(
+                self.window.ui.nodes['voice.control.btn'].bar
+            )
             # start timeout timer to prevent infinite recording
             if self.timer is None:
                 self.timer = QTimer()
                 self.timer.timeout.connect(self.stop_timeout)
                 self.timer.start(self.TIMEOUT_SECONDS * 1000)
-            self.p = pyaudio.PyAudio()
-            self.stream = self.p.open(format=pyaudio.paInt16,
-                                      channels=1,
-                                      rate=44100,
-                                      input=True,
-                                      frames_per_buffer=1024,
-                                      stream_callback=callback)
+            if not self.window.core.audio.capture.check_audio_input():
+                raise Exception("Audio input not working.")
+                # IMPORTANT!!!!
+                # Stop here if audio input not working!
+                # This prevents the app from freezing when audio input is not working!
+            self.window.core.audio.capture.start()  # start recording if audio is OK
             self.window.update_status(trans('audio.speak.now'))
             self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STARTED))  # app event
-            self.stream.start_stream()
         except Exception as e:
             self.is_recording = False
             self.window.core.debug.log(e)
@@ -270,35 +259,29 @@ class Voice:
         :param timeout: True if stopped due to timeout
         """
+        self.window.core.audio.capture.reset_audio_level()
         self.is_recording = False
         if self.timer:
             self.timer.stop()
             self.timer = None
         self.switch_btn_start()  # switch button to start
         path = os.path.join(self.window.core.config.path, self.input_file)
+        self.window.core.audio.capture.set_path(path)
-        if self.stream is not None:
-            self.stream.stop_stream()
-            self.stream.close()
-            self.p.terminate()
+        if self.window.core.audio.capture.has_source():
+            self.window.core.audio.capture.stop()  # stop recording
             # abort if timeout
             if timeout:
                 self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED))  # app event
                 self.window.update_status("Aborted.".format(self.TIMEOUT_SECONDS))
                 return
-            if self.frames:
-                if len(self.frames) < self.MIN_FRAMES:
+            if self.window.core.audio.capture.has_frames():
+                frames = self.window.core.audio.capture.get_frames()
+                if len(frames) < self.MIN_FRAMES:
                     self.window.update_status(trans("status.audio.too_short"))
                     self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED))  # app event
                     return
-                wf = wave.open(path, 'wb')
-                wf.setnchannels(1)
-                wf.setsampwidth(self.p.get_sample_size(pyaudio.paInt16))
-                wf.setframerate(44100)
-                wf.writeframes(b''.join(self.frames))
-                wf.close()
                 self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_SENT))  # app event
                 self.handle_thread(True)  # handle transcription in simple mode
         else:

pygpt_net/controller/audio/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.14 18:00:00                  #
+# Updated Date: 2025.01.17 02:00:00                  #
 # ================================================== #
 import os
@@ -29,6 +29,8 @@ class Audio:
     def setup(self):
         """Setup controller"""
         self.update()
+        if self.window.core.config.get("audio.input.continuous", False):
+            self.window.ui.plugin_addon['audio.input.btn'].continuous.setChecked(True)
     def toggle_input(
             self,
@@ -54,6 +56,18 @@ class Audio:
         else:
             self.enable_output()
+    def toggle_continuous(self, state: bool):
+        """
+        Toggle continuous audio input
+        :param state: True to enable, False to disable
+        """
+        if state:
+            self.window.core.config.set("audio.input.continuous", True)
+        else:
+            self.window.core.config.set("audio.input.continuous", False)
+        self.window.core.config.save()
     def enable_output(self):
         """Enable audio output"""
         self.toggle_output_icon(True)

pygpt_net/controller/lang/custom.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.09 00:00:00                  #
+# Updated Date: 2025.01.17 02:00:00                  #
 # ================================================== #
 from PySide6.QtCore import Qt
@@ -46,6 +46,7 @@ class Custom:
         # checkboxes
         self.window.ui.plugin_addon['audio.input'].btn_toggle.setText(trans('audio.speak.btn'))
+        self.window.ui.plugin_addon['audio.input.btn'].continuous.setText(trans('audio.speak.btn.continuous'))
         self.window.ui.config['assistant']['tool.file_search'].box.setText(trans('assistant.tool.file_search'))
         self.window.ui.config['assistant']['tool.code_interpreter'].box.setText(
             trans('assistant.tool.code_interpreter')

pygpt_net/controller/ui/tabs.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.14 07:00:00                  #
+# Updated Date: 2025.01.17 02:00:00                  #
 # ================================================== #
 from typing import Any, Optional
@@ -182,6 +182,12 @@ class Tabs:
         if prev_tab != idx or prev_column != column_idx:
             self.window.dispatch(AppEvent(AppEvent.TAB_SELECTED))  # app event
+        # show/hide audio record footer
+        if tab.type == Tab.TAB_NOTEPAD:
+            self.window.ui.plugin_addon['audio.input.btn'].notepad_footer.setVisible(True)
+        else:
+            self.window.ui.plugin_addon['audio.input.btn'].notepad_footer.setVisible(False)
         self.window.controller.ui.update()
         self.update_current()

pygpt_net/core/audio/__init__.py CHANGED Viewed

@@ -6,17 +6,19 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.12.14 18:00:00                  #
+# Updated Date: 2025.01.16 17:00:00                  #
 # ================================================== #
 import re
 from typing import Union, Optional, Tuple, List
+from PySide6.QtMultimedia import QMediaDevices
 from bs4 import UnicodeDammit
 from pygpt_net.provider.audio_input.base import BaseProvider as InputBaseProvider
 from pygpt_net.provider.audio_output.base import BaseProvider as OutputBaseProvider
+from .capture import Capture
 from .whisper import Whisper
@@ -28,6 +30,7 @@ class Audio:
         :param window: Window instance
         """
         self.window = window
+        self.capture = Capture(window)
         self.whisper = Whisper(window)
         self.providers = {
             "input": {},
@@ -41,21 +44,12 @@ class Audio:
         :return devices list: [(id, name)]
         """
-        import pyaudio
-        devices = []
-        try:
-            p = pyaudio.PyAudio()
-            num_devices = p.get_device_count()
-            for i in range(num_devices):
-                info = p.get_device_info_by_index(i)
-                if info["maxInputChannels"] > 0:
-                    dammit = UnicodeDammit(info["name"])
-                    devices.append((i, dammit.unicode_markup))
-                    # print(f"Device ID {i}: {info['name']}")
-            p.terminate()
-        except Exception as e:
-            print(f"Audio input devices receive error: {e}")
-        return devices
+        devices = QMediaDevices.audioInputs()
+        devices_list = []
+        for index, device in enumerate(devices):
+            dammit = UnicodeDammit(device.description())
+            devices_list.append((index, dammit.unicode_markup))
+        return devices_list
     def is_device_compatible(self, device_index: int) -> bool:
         """
@@ -69,7 +63,6 @@ class Audio:
         channels = int(self.window.core.config.get('audio.input.channels', 1))
         p = pyaudio.PyAudio()
         info = p.get_device_info_by_index(device_index)
-        supported = False
         try:
             p.is_format_supported(
                 rate=rate,

pygpt-net 2.4.49__py3-none-any.whl → 2.4.51__py3-none-any.whl

pygpt-net 2.4.49py3-none-any.whl → 2.4.51py3-none-any.whl