pygpt-net 2.4.49__py3-none-any.whl → 2.4.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. CHANGELOG.md +10 -0
  2. README.md +12 -62
  3. pygpt_net/CHANGELOG.txt +10 -0
  4. pygpt_net/__init__.py +3 -3
  5. pygpt_net/controller/access/voice.py +19 -36
  6. pygpt_net/controller/audio/__init__.py +15 -1
  7. pygpt_net/controller/lang/custom.py +2 -1
  8. pygpt_net/controller/ui/tabs.py +7 -1
  9. pygpt_net/core/audio/__init__.py +10 -17
  10. pygpt_net/core/audio/capture.py +349 -0
  11. pygpt_net/data/config/config.json +5 -3
  12. pygpt_net/data/config/models.json +3 -3
  13. pygpt_net/data/config/modes.json +3 -3
  14. pygpt_net/data/config/settings.json +13 -0
  15. pygpt_net/data/locale/locale.de.ini +3 -0
  16. pygpt_net/data/locale/locale.en.ini +3 -0
  17. pygpt_net/data/locale/locale.es.ini +3 -0
  18. pygpt_net/data/locale/locale.fr.ini +3 -0
  19. pygpt_net/data/locale/locale.it.ini +3 -0
  20. pygpt_net/data/locale/locale.pl.ini +3 -0
  21. pygpt_net/data/locale/locale.uk.ini +3 -0
  22. pygpt_net/data/locale/locale.zh.ini +3 -0
  23. pygpt_net/plugin/audio_input/simple.py +45 -55
  24. pygpt_net/provider/core/config/patch.py +9 -1
  25. pygpt_net/ui/layout/chat/input.py +0 -12
  26. pygpt_net/ui/menu/__init__.py +4 -3
  27. pygpt_net/ui/widget/audio/input_button.py +84 -24
  28. pygpt_net/ui/widget/dialog/snap.py +2 -2
  29. pygpt_net/ui/widget/dialog/update.py +3 -2
  30. {pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/METADATA +13 -63
  31. {pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/RECORD +34 -33
  32. {pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/LICENSE +0 -0
  33. {pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/WHEEL +0 -0
  34. {pygpt_net-2.4.49.dist-info → pygpt_net-2.4.51.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 2.4.51 (2025-01-17)
4
+
5
+ - Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
6
+ - A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
7
+
8
+ ## 2.4.50 (2025-01-16)
9
+
10
+ - Refactored audio input core.
11
+ - Added audio input volume progress bar.
12
+
3
13
  ## 2.4.49 (2025-01-16)
4
14
 
5
15
  - Fix: stream render in Assistants mode.
README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![pygpt](https://snapcraft.io/pygpt/badge.svg)](https://snapcraft.io/pygpt)
4
4
 
5
- Release: **2.4.49** | build: **2025.01.16** | Python: **>=3.10, <3.13**
5
+ Release: **2.4.51** | build: **2025.01.17** | Python: **>=3.10, <3.13**
6
6
 
7
7
  > Official website: https://pygpt.net | Documentation: https://pygpt.readthedocs.io
8
8
  >
@@ -118,6 +118,7 @@ sudo snap connect pygpt:camera
118
118
 
119
119
  ```commandline
120
120
  sudo snap connect pygpt:audio-record :audio-record
121
+ sudo snap connect pygpt:alsa
121
122
  ```
122
123
 
123
124
  **Connecting IPython in Docker in Snap version**:
@@ -3952,6 +3953,16 @@ may consume additional tokens that are not displayed in the main window.
3952
3953
 
3953
3954
  ## Recent changes:
3954
3955
 
3956
+ **2.4.51 (2025-01-17)**
3957
+
3958
+ - Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
3959
+ - A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
3960
+
3961
+ **2.4.50 (2025-01-16)**
3962
+
3963
+ - Refactored audio input core.
3964
+ - Added audio input volume progress bar.
3965
+
3955
3966
  **2.4.49 (2025-01-16)**
3956
3967
 
3957
3968
  - Fix: stream render in Assistants mode.
@@ -3977,67 +3988,6 @@ may consume additional tokens that are not displayed in the main window.
3977
3988
  - Introduced a new mode in "Chat with Files": "Retrieve Only", which allows for retrieving raw documents from the index.
3978
3989
  - Fixed a bug related to tool calls in the Gemini provider when using Chat with Files mode.
3979
3990
 
3980
- **2.4.45 (2024-12-16)**
3981
-
3982
- - Enhanced web data loaders UI.
3983
-
3984
- **2.4.44 (2024-12-16)**
3985
-
3986
- - Enhanced web data loaders.
3987
- - Web loaders have been added to attachments, allowing external web content to be attached to context via the "+Web" button in the Attachments tab.
3988
- - Improved handling of attachments in groups and added an attachment icon when a group contains attachments.
3989
-
3990
- **2.4.43 (2024-12-15)**
3991
-
3992
- - Fix: Bug on attachment upload.
3993
- - Added: Attachments uploaded in groups are now available for all contexts in the group (beta).
3994
-
3995
- **2.4.42 (2024-12-15)**
3996
-
3997
- - Added Mailer plugin, which allows sending and retrieving emails from the server, and reading them. It currently supports only SMTP.
3998
- - Added 'web_request' command to the Web Search plugin, enabling GET/POST/PUT and other connections to any address and API endpoint. It also supports sending POST data, files, headers, cookies, and more.
3999
- - Improved audio output.
4000
- - Enhanced visibility of the Video menu.
4001
- - Other fixes.
4002
-
4003
- **2.4.41 (2024-12-14)**
4004
-
4005
- - Improved switching between columns on a split screen.
4006
- - Added visual identification of the active column.
4007
-
4008
- **2.4.40 (2024-12-13)**
4009
-
4010
- - Enhanced Split Screen mode, now promoted from beta to stable.
4011
- - Python Code Interpreter tool added to the Tabs.
4012
- - HTML/JS Canvas tool added to the Tabs.
4013
- - Added attachment icon to the context list if context has attachments.
4014
- - Improved audio playback.
4015
- - Improved web search.
4016
- - Added a thumbnail image to web search results.
4017
- - Added a new commands to web search: "extract_images" and "extract_links".
4018
- - Added the option "Use raw content (without summarization)" to the web search plugin, which provides a more detailed result to the main model.
4019
- - Extended the default maximum result characters to 50,000 in the web search plugin.
4020
-
4021
- **2.4.39 (2024-12-09)**
4022
-
4023
- - Added "Split Screen" mode (accessible via the switch in the bottom-right corner of the screen), which allows you to work in two windows simultaneously. It is currently experimental (beta). Future updates will include Code Interpreter and Canvas running in tabs.
4024
-
4025
- - Fixed: Language switch.
4026
-
4027
- **2.4.38 (2024-12-08)**
4028
-
4029
- - Added the ability to select a style for chat display between: Blocks, ChatGPT-like, and ChatGPT-like Wide. New option in the menu: Config -> Theme -> Style...
4030
- - Added configuration options for audio input in Settings -> Audio -> Audio Input Device, Channels, and Sampling rate.
4031
-
4032
- **2.4.37 (2024-11-30)**
4033
-
4034
- - The `Query only` mode in `Uploaded` tab has been renamed to `RAG`.
4035
- - New options have been added under `Settings -> Files and Attachments`:
4036
- - `Use history in RAG query`: When enabled, the content of the entire conversation will be used when preparing a query if the mode is set to RAG or Summary.
4037
- - `RAG limit`: This option is applicable only if 'Use history in RAG query' is enabled. It specifies the limit on how many recent entries in the conversation will be used when generating a query for RAG. A value of 0 indicates no limit.
4038
- - Cache: dynamic parts of the system prompt (from plugins) have been moved to the very end of the prompt stack to enable the use of prompt cache mechanisms in OpenAI.
4039
-
4040
-
4041
3991
  # Credits and links
4042
3992
 
4043
3993
  **Official website:** <https://pygpt.net>
pygpt_net/CHANGELOG.txt CHANGED
@@ -1,3 +1,13 @@
1
+ 2.4.51 (2025-01-17)
2
+
3
+ - Added a "Continuous recording" mode under Audio Input in the Notepad tab, allowing for recording long voice notes and real-time auto-transcription. (beta)
4
+ - A new option has been added in Settings -> Audio -> Continuous recording auto-transcribe interval.
5
+
6
+ 2.4.50 (2025-01-16)
7
+
8
+ - Refactored audio input core.
9
+ - Added audio input volume progress bar.
10
+
1
11
  2.4.49 (2025-01-16)
2
12
 
3
13
  - Fix: stream render in Assistants mode.
pygpt_net/__init__.py CHANGED
@@ -6,15 +6,15 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.01.16 01:00:00 #
9
+ # Updated Date: 2025.01.17 03:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  __author__ = "Marcin Szczygliński"
13
13
  __copyright__ = "Copyright 2025, Marcin Szczygliński"
14
14
  __credits__ = ["Marcin Szczygliński"]
15
15
  __license__ = "MIT"
16
- __version__ = "2.4.49"
17
- __build__ = "2025.01.16"
16
+ __version__ = "2.4.51"
17
+ __build__ = "2025.01.17"
18
18
  __maintainer__ = "Marcin Szczygliński"
19
19
  __github__ = "https://github.com/szczyglis-dev/py-gpt"
20
20
  __report__ = "https://github.com/szczyglis-dev/py-gpt/issues"
@@ -6,13 +6,11 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 08:00:00 #
9
+ # Updated Date: 2025.01.16 17:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Optional, List, Dict, Any
13
13
 
14
- import pyaudio
15
- import wave
16
14
  import os
17
15
 
18
16
  from PySide6.QtCore import QTimer, Slot
@@ -38,9 +36,6 @@ class Voice:
38
36
  """
39
37
  self.window = window
40
38
  self.is_recording = False
41
- self.frames = []
42
- self.p = None
43
- self.stream = None
44
39
  self.timer = None
45
40
  self.input_file = "voice_control.wav"
46
41
  self.thread_started = False
@@ -218,15 +213,6 @@ class Voice:
218
213
 
219
214
  def start_recording(self):
220
215
  """Start recording"""
221
- self.frames = [] # clear audio frames
222
-
223
- def callback(in_data, frame_count, time_info, status):
224
- self.frames.append(in_data)
225
- if self.is_recording:
226
- return (in_data, pyaudio.paContinue)
227
- else:
228
- return (in_data, pyaudio.paComplete)
229
-
230
216
  try:
231
217
  self.is_recording = True
232
218
  self.switch_btn_stop()
@@ -235,23 +221,26 @@ class Voice:
235
221
  if self.window.controller.audio.is_playing():
236
222
  self.window.controller.audio.stop_output()
237
223
 
224
+ # set audio volume bar
225
+ self.window.core.audio.capture.set_bar(
226
+ self.window.ui.nodes['voice.control.btn'].bar
227
+ )
228
+
238
229
  # start timeout timer to prevent infinite recording
239
230
  if self.timer is None:
240
231
  self.timer = QTimer()
241
232
  self.timer.timeout.connect(self.stop_timeout)
242
233
  self.timer.start(self.TIMEOUT_SECONDS * 1000)
243
234
 
244
- self.p = pyaudio.PyAudio()
245
- self.stream = self.p.open(format=pyaudio.paInt16,
246
- channels=1,
247
- rate=44100,
248
- input=True,
249
- frames_per_buffer=1024,
250
- stream_callback=callback)
235
+ if not self.window.core.audio.capture.check_audio_input():
236
+ raise Exception("Audio input not working.")
237
+ # IMPORTANT!!!!
238
+ # Stop here if audio input not working!
239
+ # This prevents the app from freezing when audio input is not working!
251
240
 
241
+ self.window.core.audio.capture.start() # start recording if audio is OK
252
242
  self.window.update_status(trans('audio.speak.now'))
253
243
  self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STARTED)) # app event
254
- self.stream.start_stream()
255
244
  except Exception as e:
256
245
  self.is_recording = False
257
246
  self.window.core.debug.log(e)
@@ -270,35 +259,29 @@ class Voice:
270
259
 
271
260
  :param timeout: True if stopped due to timeout
272
261
  """
262
+ self.window.core.audio.capture.reset_audio_level()
273
263
  self.is_recording = False
274
264
  if self.timer:
275
265
  self.timer.stop()
276
266
  self.timer = None
277
267
  self.switch_btn_start() # switch button to start
278
268
  path = os.path.join(self.window.core.config.path, self.input_file)
269
+ self.window.core.audio.capture.set_path(path)
279
270
 
280
- if self.stream is not None:
281
- self.stream.stop_stream()
282
- self.stream.close()
283
- self.p.terminate()
284
-
271
+ if self.window.core.audio.capture.has_source():
272
+ self.window.core.audio.capture.stop() # stop recording
285
273
  # abort if timeout
286
274
  if timeout:
287
275
  self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED)) # app event
288
276
  self.window.update_status("Aborted.".format(self.TIMEOUT_SECONDS))
289
277
  return
290
278
 
291
- if self.frames:
292
- if len(self.frames) < self.MIN_FRAMES:
279
+ if self.window.core.audio.capture.has_frames():
280
+ frames = self.window.core.audio.capture.get_frames()
281
+ if len(frames) < self.MIN_FRAMES:
293
282
  self.window.update_status(trans("status.audio.too_short"))
294
283
  self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED)) # app event
295
284
  return
296
- wf = wave.open(path, 'wb')
297
- wf.setnchannels(1)
298
- wf.setsampwidth(self.p.get_sample_size(pyaudio.paInt16))
299
- wf.setframerate(44100)
300
- wf.writeframes(b''.join(self.frames))
301
- wf.close()
302
285
  self.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_SENT)) # app event
303
286
  self.handle_thread(True) # handle transcription in simple mode
304
287
  else:
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 18:00:00 #
9
+ # Updated Date: 2025.01.17 02:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -29,6 +29,8 @@ class Audio:
29
29
  def setup(self):
30
30
  """Setup controller"""
31
31
  self.update()
32
+ if self.window.core.config.get("audio.input.continuous", False):
33
+ self.window.ui.plugin_addon['audio.input.btn'].continuous.setChecked(True)
32
34
 
33
35
  def toggle_input(
34
36
  self,
@@ -54,6 +56,18 @@ class Audio:
54
56
  else:
55
57
  self.enable_output()
56
58
 
59
+ def toggle_continuous(self, state: bool):
60
+ """
61
+ Toggle continuous audio input
62
+
63
+ :param state: True to enable, False to disable
64
+ """
65
+ if state:
66
+ self.window.core.config.set("audio.input.continuous", True)
67
+ else:
68
+ self.window.core.config.set("audio.input.continuous", False)
69
+ self.window.core.config.save()
70
+
57
71
  def enable_output(self):
58
72
  """Enable audio output"""
59
73
  self.toggle_output_icon(True)
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.09 00:00:00 #
9
+ # Updated Date: 2025.01.17 02:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from PySide6.QtCore import Qt
@@ -46,6 +46,7 @@ class Custom:
46
46
 
47
47
  # checkboxes
48
48
  self.window.ui.plugin_addon['audio.input'].btn_toggle.setText(trans('audio.speak.btn'))
49
+ self.window.ui.plugin_addon['audio.input.btn'].continuous.setText(trans('audio.speak.btn.continuous'))
49
50
  self.window.ui.config['assistant']['tool.file_search'].box.setText(trans('assistant.tool.file_search'))
50
51
  self.window.ui.config['assistant']['tool.code_interpreter'].box.setText(
51
52
  trans('assistant.tool.code_interpreter')
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 07:00:00 #
9
+ # Updated Date: 2025.01.17 02:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Any, Optional
@@ -182,6 +182,12 @@ class Tabs:
182
182
  if prev_tab != idx or prev_column != column_idx:
183
183
  self.window.dispatch(AppEvent(AppEvent.TAB_SELECTED)) # app event
184
184
 
185
+ # show/hide audio record footer
186
+ if tab.type == Tab.TAB_NOTEPAD:
187
+ self.window.ui.plugin_addon['audio.input.btn'].notepad_footer.setVisible(True)
188
+ else:
189
+ self.window.ui.plugin_addon['audio.input.btn'].notepad_footer.setVisible(False)
190
+
185
191
  self.window.controller.ui.update()
186
192
  self.update_current()
187
193
 
@@ -6,17 +6,19 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.12.14 18:00:00 #
9
+ # Updated Date: 2025.01.16 17:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import re
13
13
  from typing import Union, Optional, Tuple, List
14
14
 
15
+ from PySide6.QtMultimedia import QMediaDevices
15
16
  from bs4 import UnicodeDammit
16
17
 
17
18
  from pygpt_net.provider.audio_input.base import BaseProvider as InputBaseProvider
18
19
  from pygpt_net.provider.audio_output.base import BaseProvider as OutputBaseProvider
19
20
 
21
+ from .capture import Capture
20
22
  from .whisper import Whisper
21
23
 
22
24
 
@@ -28,6 +30,7 @@ class Audio:
28
30
  :param window: Window instance
29
31
  """
30
32
  self.window = window
33
+ self.capture = Capture(window)
31
34
  self.whisper = Whisper(window)
32
35
  self.providers = {
33
36
  "input": {},
@@ -41,21 +44,12 @@ class Audio:
41
44
 
42
45
  :return devices list: [(id, name)]
43
46
  """
44
- import pyaudio
45
- devices = []
46
- try:
47
- p = pyaudio.PyAudio()
48
- num_devices = p.get_device_count()
49
- for i in range(num_devices):
50
- info = p.get_device_info_by_index(i)
51
- if info["maxInputChannels"] > 0:
52
- dammit = UnicodeDammit(info["name"])
53
- devices.append((i, dammit.unicode_markup))
54
- # print(f"Device ID {i}: {info['name']}")
55
- p.terminate()
56
- except Exception as e:
57
- print(f"Audio input devices receive error: {e}")
58
- return devices
47
+ devices = QMediaDevices.audioInputs()
48
+ devices_list = []
49
+ for index, device in enumerate(devices):
50
+ dammit = UnicodeDammit(device.description())
51
+ devices_list.append((index, dammit.unicode_markup))
52
+ return devices_list
59
53
 
60
54
  def is_device_compatible(self, device_index: int) -> bool:
61
55
  """
@@ -69,7 +63,6 @@ class Audio:
69
63
  channels = int(self.window.core.config.get('audio.input.channels', 1))
70
64
  p = pyaudio.PyAudio()
71
65
  info = p.get_device_info_by_index(device_index)
72
- supported = False
73
66
  try:
74
67
  p.is_format_supported(
75
68
  rate=rate,