pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +7 -1
  4. pygpt_net/app_core.py +3 -1
  5. pygpt_net/config.py +3 -1
  6. pygpt_net/controller/__init__.py +9 -2
  7. pygpt_net/controller/audio/audio.py +38 -1
  8. pygpt_net/controller/audio/ui.py +2 -2
  9. pygpt_net/controller/chat/audio.py +1 -8
  10. pygpt_net/controller/chat/common.py +23 -62
  11. pygpt_net/controller/chat/handler/__init__.py +0 -0
  12. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  13. pygpt_net/controller/chat/output.py +8 -3
  14. pygpt_net/controller/chat/stream.py +3 -1071
  15. pygpt_net/controller/chat/text.py +3 -2
  16. pygpt_net/controller/kernel/kernel.py +11 -3
  17. pygpt_net/controller/kernel/reply.py +5 -1
  18. pygpt_net/controller/lang/custom.py +2 -2
  19. pygpt_net/controller/media/__init__.py +12 -0
  20. pygpt_net/controller/media/media.py +115 -0
  21. pygpt_net/controller/realtime/__init__.py +12 -0
  22. pygpt_net/controller/realtime/manager.py +53 -0
  23. pygpt_net/controller/realtime/realtime.py +293 -0
  24. pygpt_net/controller/ui/mode.py +23 -2
  25. pygpt_net/controller/ui/ui.py +19 -1
  26. pygpt_net/core/audio/audio.py +6 -1
  27. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  28. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  29. pygpt_net/core/audio/backend/native/player.py +139 -0
  30. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  31. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  32. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  33. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  34. pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
  35. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  36. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  37. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  38. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  39. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  40. pygpt_net/core/audio/backend/shared/player.py +137 -0
  41. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  42. pygpt_net/core/audio/capture.py +5 -0
  43. pygpt_net/core/audio/output.py +14 -2
  44. pygpt_net/core/audio/whisper.py +6 -2
  45. pygpt_net/core/bridge/bridge.py +2 -1
  46. pygpt_net/core/bridge/worker.py +4 -1
  47. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  48. pygpt_net/core/events/__init__.py +2 -1
  49. pygpt_net/core/events/realtime.py +55 -0
  50. pygpt_net/core/image/image.py +56 -5
  51. pygpt_net/core/realtime/__init__.py +0 -0
  52. pygpt_net/core/realtime/options.py +87 -0
  53. pygpt_net/core/realtime/shared/__init__.py +0 -0
  54. pygpt_net/core/realtime/shared/audio.py +213 -0
  55. pygpt_net/core/realtime/shared/loop.py +64 -0
  56. pygpt_net/core/realtime/shared/session.py +59 -0
  57. pygpt_net/core/realtime/shared/text.py +37 -0
  58. pygpt_net/core/realtime/shared/tools.py +276 -0
  59. pygpt_net/core/realtime/shared/turn.py +38 -0
  60. pygpt_net/core/realtime/shared/types.py +16 -0
  61. pygpt_net/core/realtime/worker.py +160 -0
  62. pygpt_net/core/render/web/body.py +24 -3
  63. pygpt_net/core/text/utils.py +54 -2
  64. pygpt_net/core/types/__init__.py +1 -0
  65. pygpt_net/core/types/image.py +54 -0
  66. pygpt_net/core/video/__init__.py +12 -0
  67. pygpt_net/core/video/video.py +290 -0
  68. pygpt_net/data/config/config.json +26 -5
  69. pygpt_net/data/config/models.json +221 -103
  70. pygpt_net/data/config/settings.json +244 -6
  71. pygpt_net/data/css/web-blocks.css +6 -0
  72. pygpt_net/data/css/web-chatgpt.css +6 -0
  73. pygpt_net/data/css/web-chatgpt_wide.css +6 -0
  74. pygpt_net/data/locale/locale.de.ini +35 -7
  75. pygpt_net/data/locale/locale.en.ini +56 -17
  76. pygpt_net/data/locale/locale.es.ini +35 -7
  77. pygpt_net/data/locale/locale.fr.ini +35 -7
  78. pygpt_net/data/locale/locale.it.ini +35 -7
  79. pygpt_net/data/locale/locale.pl.ini +38 -7
  80. pygpt_net/data/locale/locale.uk.ini +35 -7
  81. pygpt_net/data/locale/locale.zh.ini +31 -3
  82. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  83. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  84. pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
  85. pygpt_net/item/model.py +22 -1
  86. pygpt_net/plugin/audio_input/plugin.py +37 -4
  87. pygpt_net/plugin/audio_input/simple.py +57 -8
  88. pygpt_net/plugin/cmd_files/worker.py +3 -0
  89. pygpt_net/provider/api/google/__init__.py +76 -7
  90. pygpt_net/provider/api/google/audio.py +8 -1
  91. pygpt_net/provider/api/google/chat.py +45 -6
  92. pygpt_net/provider/api/google/image.py +226 -86
  93. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  94. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  95. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  96. pygpt_net/provider/api/google/video.py +364 -0
  97. pygpt_net/provider/api/openai/__init__.py +22 -2
  98. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  99. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  100. pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
  101. pygpt_net/provider/audio_input/google_genai.py +103 -0
  102. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  103. pygpt_net/provider/audio_output/google_tts.py +0 -12
  104. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  105. pygpt_net/provider/core/config/patch.py +241 -178
  106. pygpt_net/provider/core/model/patch.py +28 -2
  107. pygpt_net/provider/llms/google.py +8 -9
  108. pygpt_net/provider/web/duckduck_search.py +212 -0
  109. pygpt_net/ui/layout/toolbox/audio.py +55 -0
  110. pygpt_net/ui/layout/toolbox/footer.py +14 -42
  111. pygpt_net/ui/layout/toolbox/image.py +7 -13
  112. pygpt_net/ui/layout/toolbox/raw.py +52 -0
  113. pygpt_net/ui/layout/toolbox/split.py +48 -0
  114. pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
  115. pygpt_net/ui/layout/toolbox/video.py +49 -0
  116. pygpt_net/ui/widget/option/combo.py +15 -1
  117. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
  118. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
  119. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  120. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
  121. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
  122. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Optional
@@ -111,6 +111,7 @@ class Text:
111
111
  # if prev ctx is not empty, then copy input name to current ctx
112
112
  if prev_ctx is not None and prev_ctx.sub_call is True: # sub_call = sent from expert
113
113
  ctx.input_name = prev_ctx.input_name
114
+
114
115
  if reply:
115
116
  ctx.extra["sub_reply"] = True # mark as sub reply in extra data
116
117
 
@@ -238,7 +239,7 @@ class Text:
238
239
  """
239
240
  core = self.window.core
240
241
  stream = core.config.get("stream")
241
- if mode in (MODE_AGENT_LLAMA, MODE_AUDIO):
242
+ if mode in (MODE_AGENT_LLAMA):
242
243
  return False # TODO: check if this is correct in agent
243
244
  elif mode == MODE_LLAMA_INDEX:
244
245
  if core.config.get("llama.idx.mode") == "retrieval":
@@ -6,13 +6,13 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.06 19:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import threading
13
13
  from typing import Any, Dict, Optional, Union, List
14
14
 
15
- from PySide6.QtCore import QObject, Slot
15
+ from PySide6.QtCore import Slot
16
16
  from PySide6.QtWidgets import QApplication
17
17
 
18
18
  from pygpt_net.core.types import (
@@ -23,7 +23,7 @@ from pygpt_net.core.types import (
23
23
  MODE_EXPERT,
24
24
  MODE_LLAMA_INDEX,
25
25
  )
26
- from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent
26
+ from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent, RealtimeEvent, Event
27
27
  from pygpt_net.core.bridge.context import BridgeContext
28
28
  from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.utils import trans
@@ -95,6 +95,13 @@ class Kernel:
95
95
  KernelEvent.INPUT_USER,
96
96
  KernelEvent.FORCE_CALL,
97
97
  KernelEvent.STATUS,
98
+ Event.AUDIO_INPUT_RECORD_TOGGLE,
99
+ RealtimeEvent.RT_INPUT_AUDIO_DELTA,
100
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP,
101
+ RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START,
102
+ RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT,
103
+ RealtimeEvent.RT_OUTPUT_TURN_END,
104
+ RealtimeEvent.RT_OUTPUT_READY,
98
105
  ]
99
106
 
100
107
  def init(self):
@@ -281,6 +288,7 @@ class Kernel:
281
288
  self.window.dispatch(KernelEvent(KernelEvent.TERMINATE))
282
289
  self.stop(exit=True)
283
290
  self.window.controller.plugins.destroy()
291
+ self.window.controller.realtime.shutdown()
284
292
 
285
293
  def stop(self, exit: bool = False):
286
294
  """
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -109,6 +109,10 @@ class Reply:
109
109
  core.ctx.update_item(self.reply_ctx) # update context in db
110
110
  self.window.update_status('...')
111
111
 
112
+ # append tool calls from previous context (used for tool results handling)
113
+ if self.reply_ctx.tool_calls:
114
+ prev_ctx.extra["prev_tool_calls"] = self.reply_ctx.tool_calls
115
+
112
116
  # tool output append
113
117
  dispatch(RenderEvent(RenderEvent.TOOL_UPDATE, {
114
118
  "meta": self.reply_ctx.meta,
@@ -55,8 +55,8 @@ class Custom:
55
55
  self.window.ui.config['preset'][MODE_CHAT].box.setText(trans("preset.chat"))
56
56
  self.window.ui.config['preset'][MODE_COMPLETION].box.setText(trans("preset.completion"))
57
57
  self.window.ui.config['preset'][MODE_IMAGE].box.setText(trans("preset.img"))
58
- self.window.ui.config['preset'][MODE_VISION].box.setText(trans("preset.vision"))
59
- #self.window.ui.config['preset'][MODE_LANGCHAIN].box.setText(trans("preset.langchain"))
58
+ # self.window.ui.config['preset'][MODE_VISION].box.setText(trans("preset.vision"))
59
+ # self.window.ui.config['preset'][MODE_LANGCHAIN].box.setText(trans("preset.langchain"))
60
60
  self.window.ui.config['preset'][MODE_LLAMA_INDEX].box.setText(trans("preset.llama_index"))
61
61
  self.window.ui.config['preset'][MODE_AGENT].box.setText(trans("preset.agent"))
62
62
  self.window.ui.config['preset'][MODE_AGENT_LLAMA].box.setText(trans("preset.agent_llama"))
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .media import Media
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Any
13
+
14
+
15
+ class Media:
16
+ def __init__(self, window=None):
17
+ """
18
+ Media (video, image, music) controller
19
+
20
+ :param window: Window instance
21
+ """
22
+ self.window = window
23
+ self.initialized = False
24
+
25
+ def setup(self):
26
+ """Setup UI"""
27
+ # raw mode for images/video
28
+ if self.window.core.config.get('img_raw'):
29
+ self.window.ui.config['global']['img_raw'].setChecked(True)
30
+ else:
31
+ self.window.ui.config['global']['img_raw'].setChecked(False)
32
+
33
+ # image: resolution
34
+ resolution = self.window.core.config.get('img_resolution', '1024x1024')
35
+ self.window.controller.config.apply_value(
36
+ parent_id="global",
37
+ key="img_resolution",
38
+ option=self.window.core.image.get_resolution_option(),
39
+ value=resolution,
40
+ )
41
+
42
+ # video: aspect ratio
43
+ aspect_ratio = self.window.core.config.get('video.aspect_ratio', '16:9')
44
+ self.window.controller.config.apply_value(
45
+ parent_id="global",
46
+ key="video.aspect_ratio",
47
+ option=self.window.core.video.get_aspect_ratio_option(),
48
+ value=aspect_ratio,
49
+ )
50
+
51
+ # -- add hooks --
52
+ if not self.initialized:
53
+ self.window.ui.add_hook("update.global.img_resolution", self.hook_update)
54
+ self.window.ui.add_hook("update.global.video.aspect_ratio", self.hook_update)
55
+
56
+ def reload(self):
57
+ """Reload UI"""
58
+ self.setup()
59
+
60
+ def hook_update(self, key: str, value: Any, caller, *args, **kwargs):
61
+ """
62
+ Hook for updating media options
63
+
64
+ :param key: config key
65
+ :param value: new value
66
+ :param caller: caller object
67
+ """
68
+ if key == "img_resolution":
69
+ if not value:
70
+ return
71
+ self.window.core.config.set('img_resolution', value)
72
+ elif key == "video.aspect_ratio":
73
+ if not value:
74
+ return
75
+ self.window.core.config.set('video.aspect_ratio', value)
76
+
77
+ def enable_raw(self):
78
+ """Enable prompt enhancement for images"""
79
+ self.window.core.config.set('img_raw', True)
80
+ self.window.core.config.save()
81
+
82
+ def disable_raw(self):
83
+ """Disable prompt enhancement for images"""
84
+ self.window.core.config.set('img_raw', False)
85
+ self.window.core.config.save()
86
+
87
+ def toggle_raw(self):
88
+ """Save prompt enhancement option for images"""
89
+ state = self.window.ui.config['global']['img_raw'].isChecked()
90
+ if not state:
91
+ self.disable_raw()
92
+ else:
93
+ self.enable_raw()
94
+
95
+ def is_image_model(self) -> bool:
96
+ """
97
+ Check if the model is an image generation model
98
+
99
+ :return: True if the model is an image generation model
100
+ """
101
+ current = self.window.core.config.get("model")
102
+ model_data = self.window.core.models.get(current)
103
+ if model_data:
104
+ return model_data.is_image_output()
105
+
106
+ def is_video_model(self) -> bool:
107
+ """
108
+ Check if the model is a video generation model
109
+
110
+ :return: True if the model is a video generation model
111
+ """
112
+ current = self.window.core.config.get("model")
113
+ model_data = self.window.core.models.get(current)
114
+ if model_data:
115
+ return model_data.is_video_output()
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
+ # ================================================== #
11
+
12
+ from .realtime import Realtime
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional
13
+
14
+ from pygpt_net.core.realtime.worker import RealtimeWorker, RealtimeOptions
15
+ from pygpt_net.item.ctx import CtxItem
16
+
17
+ class Manager:
18
+ """
19
+ Manager that mirrors chat.stream controller shape.
20
+
21
+ Starts a RealtimeWorker and routes text events and lifecycle to the UI.
22
+ Audio is forwarded by the main-thread via RT_OUTPUT_AUDIO_DELTA events.
23
+ """
24
+ def __init__(self, window=None):
25
+ self.window = window
26
+ self.worker: Optional[RealtimeWorker] = None
27
+ self.ctx: Optional[CtxItem] = None
28
+ self.provider: Optional[str] = None
29
+ self.opts: Optional[RealtimeOptions] = None
30
+
31
+ def start(
32
+ self,
33
+ ctx: CtxItem,
34
+ opts: RealtimeOptions
35
+ ):
36
+ """
37
+ Start realtime worker
38
+
39
+ :param ctx: CtxItem
40
+ :param opts: RealtimeOptions
41
+ """
42
+ self.ctx = ctx
43
+ self.opts = opts
44
+ self.provider = opts.provider
45
+
46
+ worker = RealtimeWorker(self.window, ctx, opts)
47
+ self.worker = worker
48
+ self.window.core.debug.info(f"[realtime] Begin: provider={opts.provider}, model={opts.model}")
49
+ self.window.threadpool.start(worker)
50
+
51
+ def shutdown(self):
52
+ """Shutdown realtime worker"""
53
+ self.worker = None
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from PySide6.QtCore import Slot, QTimer
13
+
14
+ from pygpt_net.core.events import (
15
+ RealtimeEvent,
16
+ RenderEvent,
17
+ BaseEvent,
18
+ AppEvent,
19
+ KernelEvent,
20
+ Event,
21
+ )
22
+ from pygpt_net.core.realtime.worker import RealtimeSignals
23
+ from pygpt_net.core.types import MODE_AUDIO
24
+ from pygpt_net.utils import trans
25
+ from pygpt_net.core.tabs import Tab
26
+
27
+ from .manager import Manager
28
+
29
+ class Realtime:
30
+ def __init__(self, window=None):
31
+ """
32
+ Realtime controller
33
+
34
+ :param window: Window instance
35
+ """
36
+ self.window = window
37
+ self.manager = Manager(window)
38
+ self.signals = RealtimeSignals()
39
+ self.signals.response.connect(self.handle_response)
40
+ self.current_active = None # openai | google
41
+ self.allowed_modes = [MODE_AUDIO]
42
+ self.manual_commit_sent = False
43
+
44
+ def setup(self):
45
+ """Setup realtime core, signals, etc. in main thread"""
46
+ self.window.core.audio.setup() # setup RT signals in audio input/output core
47
+
48
+ def is_enabled(self) -> bool:
49
+ """
50
+ Check if realtime is enabled in settings
51
+
52
+ :return: True if enabled, False otherwise
53
+ """
54
+ mode = self.window.core.config.get("mode")
55
+ if mode == MODE_AUDIO:
56
+ if self.window.controller.ui.tabs.get_current_type() != Tab.TAB_NOTEPAD:
57
+ return True
58
+ return False
59
+
60
+ @Slot(object)
61
+ def handle(self, event: BaseEvent):
62
+ """
63
+ Handle realtime event (returned from dispatcher)
64
+
65
+ :param event: RealtimeEvent instance
66
+ """
67
+ # check if mode is supported
68
+ if not self.is_supported() and isinstance(event, RealtimeEvent):
69
+ event.stop = True # stop further propagation
70
+ return # ignore if not in realtime mode
71
+
72
+ # ----------------------------------------------------
73
+
74
+ # audio output chunk: send to audio output handler
75
+ if event.name == RealtimeEvent.RT_OUTPUT_AUDIO_DELTA:
76
+ self.set_idle()
77
+ payload = event.data.get("payload", None)
78
+ if payload:
79
+ self.window.core.audio.output.handle_realtime(payload, self.signals)
80
+
81
+ # audio input chunk: send to the active realtime client
82
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_DELTA:
83
+ self.set_idle()
84
+ if self.current_active == "google":
85
+ self.window.core.api.google.realtime.handle_audio_input(event)
86
+ elif self.current_active == "openai":
87
+ self.window.core.api.openai.realtime.handle_audio_input(event)
88
+
89
+ # begin: first text chunk or audio chunk received, start rendering
90
+ elif event.name == RealtimeEvent.RT_OUTPUT_READY:
91
+ ctx = event.data.get('ctx', None)
92
+ if ctx:
93
+ self.window.dispatch(RenderEvent(RenderEvent.STREAM_BEGIN, {
94
+ "meta": ctx.meta,
95
+ "ctx": ctx,
96
+ }))
97
+ self.set_busy()
98
+
99
+ # commit: audio buffer sent, stop audio input and finalize the response
100
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT:
101
+ self.set_busy()
102
+ if self.manual_commit_sent:
103
+ self.manual_commit_sent = False
104
+ return # abort if manual commit was already sent
105
+ self.window.controller.audio.execute_input_stop()
106
+
107
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP:
108
+ self.manual_commit_sent = True
109
+ self.set_busy()
110
+ QTimer.singleShot(0, lambda: self.manual_commit())
111
+
112
+ elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START:
113
+ self.set_idle()
114
+ self.window.controller.chat.input.execute("...", force=True)
115
+ self.window.dispatch(KernelEvent(KernelEvent.STATUS, {
116
+ 'status': trans("speech.listening"),
117
+ }))
118
+
119
+ # text delta: append text chunk to the response
120
+ elif event.name == RealtimeEvent.RT_OUTPUT_TEXT_DELTA:
121
+ self.set_idle()
122
+ ctx = event.data.get('ctx', None)
123
+ chunk = event.data.get('chunk', "")
124
+ if chunk and ctx:
125
+ self.window.dispatch(RenderEvent(RenderEvent.STREAM_APPEND, {
126
+ "meta": ctx.meta,
127
+ "ctx": ctx,
128
+ "chunk": chunk,
129
+ "begin": False,
130
+ }))
131
+
132
+ # audio end: on stop audio playback
133
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_END:
134
+ self.set_idle()
135
+ self.window.controller.chat.common.unlock_input()
136
+ if self.is_loop():
137
+ QTimer.singleShot(500, lambda: self.next_turn()) # wait a bit before next turn
138
+
139
+ # end of turn: finalize the response
140
+ elif event.name == RealtimeEvent.RT_OUTPUT_TURN_END:
141
+ self.set_idle()
142
+ ctx = event.data.get('ctx', None)
143
+ if ctx:
144
+ self.end_turn(ctx)
145
+ if self.window.controller.audio.is_recording():
146
+ self.window.update_status(trans("speech.listening"))
147
+ self.window.controller.chat.common.unlock_input()
148
+
149
+ # volume change: update volume in audio output handler
150
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED:
151
+ volume = event.data.get("volume", 1.0)
152
+ self.window.controller.audio.ui.on_output_volume_change(volume)
153
+
154
+ # error: audio output error
155
+ elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_ERROR:
156
+ self.set_idle()
157
+ error = event.data.get("error")
158
+ self.window.core.debug.log(error)
159
+ self.window.controller.chat.common.unlock_input()
160
+
161
+ # -----------------------------------
162
+
163
+ # app events, always handled
164
+ elif event.name == AppEvent.MODE_SELECTED:
165
+ mode = self.window.core.config.get("mode")
166
+ if mode != MODE_AUDIO:
167
+ QTimer.singleShot(0, lambda: self.reset())
168
+
169
+ elif event.name == AppEvent.CTX_CREATED:
170
+ QTimer.singleShot(0, lambda: self.reset())
171
+
172
+ elif event.name == AppEvent.CTX_SELECTED:
173
+ QTimer.singleShot(0, lambda: self.reset())
174
+
175
+ def next_turn(self):
176
+ """Start next turn in loop mode (if enabled)"""
177
+ self.window.dispatch(Event(Event.AUDIO_INPUT_RECORD_TOGGLE))
178
+ if self.window.controller.audio.is_recording():
179
+ QTimer.singleShot(100, lambda: self.window.update_status(trans("speech.listening")))
180
+
181
+ def is_loop(self) -> bool:
182
+ """
183
+ Check if loop recording is enabled
184
+
185
+ :return: True if loop recording is enabled, False otherwise
186
+ """
187
+ if self.window.controller.kernel.stopped():
188
+ return False
189
+ return self.window.core.config.get("audio.input.loop", False)
190
+
191
+ @Slot(object)
192
+ def handle_response(self, event: RealtimeEvent):
193
+ """
194
+ Handle response event (send to kernel -> dispatcher)
195
+
196
+ :param event: RealtimeEvent instance
197
+ """
198
+ self.window.controller.kernel.listener(event)
199
+
200
+ def is_auto_turn(self) -> bool:
201
+ """
202
+ Check if auto-turn is enabled
203
+
204
+ :return: True if auto-turn is enabled, False otherwise
205
+ """
206
+ return self.window.core.config.get("audio.input.auto_turn", True)
207
+
208
+ def manual_commit(self):
209
+ """Manually commit the response (end of turn)"""
210
+ if self.current_active == "google":
211
+ self.window.core.api.google.realtime.manual_commit()
212
+ elif self.current_active == "openai":
213
+ self.window.core.api.openai.realtime.manual_commit()
214
+
215
+ def end_turn(self, ctx):
216
+ """
217
+ End of realtime turn - finalize the response
218
+
219
+ :param ctx: Context instance
220
+ """
221
+ self.set_idle()
222
+ if not ctx:
223
+ return
224
+ self.window.controller.chat.output.handle_after(
225
+ ctx=ctx,
226
+ mode=MODE_AUDIO,
227
+ stream=True,
228
+ )
229
+ self.window.controller.chat.output.post_handle(
230
+ ctx=ctx,
231
+ mode=MODE_AUDIO,
232
+ stream=True,
233
+ )
234
+ self.window.controller.chat.output.handle_end(
235
+ ctx=ctx,
236
+ mode=MODE_AUDIO,
237
+ )
238
+ self.window.controller.chat.common.show_response_tokens(ctx)
239
+
240
+ def shutdown(self):
241
+ """Shutdown all realtime threads and async loops"""
242
+ try:
243
+ self.window.core.api.openai.realtime.shutdown()
244
+ except Exception as e:
245
+ self.window.core.debug.log(f"[openai] Realtime shutdown error: {e}")
246
+ try:
247
+ self.window.core.api.google.realtime.shutdown()
248
+ except Exception as e:
249
+ self.window.core.debug.log(f"[google] Realtime shutdown error: {e}")
250
+ try:
251
+ self.manager.shutdown()
252
+ except Exception as e:
253
+ self.window.core.debug.log(f"[manager] Realtime shutdown error: {e}")
254
+
255
+ def reset(self):
256
+ """Reset realtime session"""
257
+ try:
258
+ self.window.core.api.openai.realtime.reset()
259
+ except Exception as e:
260
+ self.window.core.debug.log(f"[openai] Realtime reset error: {e}")
261
+ try:
262
+ self.window.core.api.google.realtime.reset()
263
+ except Exception as e:
264
+ self.window.core.debug.log(f"[google] Realtime reset error: {e}")
265
+
266
+ def is_supported(self) -> bool:
267
+ """
268
+ Check if current mode supports realtime
269
+
270
+ :return: True if mode supports realtime, False otherwise
271
+ """
272
+ mode = self.window.core.config.get("mode")
273
+ return mode in self.allowed_modes
274
+
275
+ def set_current_active(self, provider: str):
276
+ """
277
+ Set the current active realtime provider
278
+
279
+ :param provider: Provider name (openai, google)
280
+ """
281
+ self.current_active = provider.lower() if provider else None
282
+
283
+ def set_idle(self):
284
+ """Set kernel state to IDLE"""
285
+ QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_IDLE, {
286
+ "id": "realtime",
287
+ })))
288
+
289
+ def set_busy(self):
290
+ """Set kernel state to BUSY"""
291
+ QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {
292
+ "id": "realtime",
293
+ })))
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from pygpt_net.core.types import (
@@ -20,6 +20,7 @@ from pygpt_net.core.types import (
20
20
  MODE_COMPUTER,
21
21
  MODE_AGENT_OPENAI,
22
22
  MODE_COMPLETION,
23
+ MODE_AUDIO,
23
24
  )
24
25
  from pygpt_net.core.tabs.tab import Tab
25
26
  from pygpt_net.core.events import Event
@@ -55,6 +56,14 @@ class Mode:
55
56
  is_image = mode == MODE_IMAGE
56
57
  is_llama_index = mode == MODE_LLAMA_INDEX
57
58
  is_completion = mode == MODE_COMPLETION
59
+ is_audio = mode == MODE_AUDIO
60
+
61
+ if not is_audio:
62
+ self.window.ui.nodes['audio.auto_turn'].setVisible(False)
63
+ self.window.ui.nodes["audio.loop"].setVisible(False)
64
+ else:
65
+ self.window.ui.nodes['audio.auto_turn'].setVisible(True)
66
+ self.window.ui.nodes["audio.loop"].setVisible(True)
58
67
 
59
68
  if not is_assistant:
60
69
  ui_nodes['presets.widget'].setVisible(True)
@@ -131,9 +140,21 @@ class Mode:
131
140
  ui_tabs['preset.editor.extra'].setTabText(0, trans("preset.prompt"))
132
141
 
133
142
  if is_image:
134
- ui_nodes['dalle.options'].setVisible(True)
143
+ ui_nodes['media.raw'].setVisible(True)
144
+ if ctrl.media.is_video_model():
145
+ ui_nodes['video.options'].setVisible(True)
146
+ ui_nodes['dalle.options'].setVisible(False)
147
+ elif ctrl.media.is_image_model():
148
+ ui_nodes['dalle.options'].setVisible(True)
149
+ ui_nodes['video.options'].setVisible(False)
150
+ else:
151
+ ui_nodes['media.raw'].setVisible(False)
152
+ ui_nodes['dalle.options'].setVisible(False)
153
+ ui_nodes['video.options'].setVisible(False)
135
154
  else:
155
+ ui_nodes['media.raw'].setVisible(False)
136
156
  ui_nodes['dalle.options'].setVisible(False)
157
+ ui_nodes['video.options'].setVisible(False)
137
158
 
138
159
  if is_agent:
139
160
  ui_nodes['agent.options'].setVisible(True)
@@ -13,6 +13,7 @@ from typing import Optional
13
13
 
14
14
  from PySide6.QtGui import QColor
15
15
 
16
+ from pygpt_net.core.types import MODE_IMAGE
16
17
  from pygpt_net.core.events import BaseEvent, Event
17
18
  from pygpt_net.utils import trans
18
19
 
@@ -64,6 +65,7 @@ class UI:
64
65
  self.update_tokens()
65
66
  self.vision.update()
66
67
  self.window.controller.agent.legacy.update()
68
+ self.img_update_available_resolutions()
67
69
 
68
70
  def handle(self, event: BaseEvent):
69
71
  """
@@ -215,4 +217,20 @@ class UI:
215
217
  def on_global_stop(self):
216
218
  """Global stop button action"""
217
219
  if self.stop_action == "idx":
218
- self.window.controller.idx.force_stop()
220
+ self.window.controller.idx.force_stop()
221
+
222
+ def img_update_available_resolutions(self):
223
+ """Update available resolutions for images"""
224
+ mode = self.window.core.config.get('mode')
225
+ if mode != MODE_IMAGE:
226
+ return
227
+ model = self.window.core.config.get('model')
228
+ keys = self.window.core.image.get_available_resolutions(model)
229
+ current = self.window.core.config.get('img_resolution', '1024x1024')
230
+ self.window.ui.config['global']['img_resolution'].set_keys(keys, lock=False)
231
+ self.window.controller.config.apply_value(
232
+ parent_id="global",
233
+ key="img_resolution",
234
+ option=self.window.core.image.get_resolution_option(),
235
+ value=current,
236
+ )