pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import Optional
|
|
@@ -111,6 +111,7 @@ class Text:
|
|
|
111
111
|
# if prev ctx is not empty, then copy input name to current ctx
|
|
112
112
|
if prev_ctx is not None and prev_ctx.sub_call is True: # sub_call = sent from expert
|
|
113
113
|
ctx.input_name = prev_ctx.input_name
|
|
114
|
+
|
|
114
115
|
if reply:
|
|
115
116
|
ctx.extra["sub_reply"] = True # mark as sub reply in extra data
|
|
116
117
|
|
|
@@ -238,7 +239,7 @@ class Text:
|
|
|
238
239
|
"""
|
|
239
240
|
core = self.window.core
|
|
240
241
|
stream = core.config.get("stream")
|
|
241
|
-
if mode in (MODE_AGENT_LLAMA
|
|
242
|
+
if mode in (MODE_AGENT_LLAMA):
|
|
242
243
|
return False # TODO: check if this is correct in agent
|
|
243
244
|
elif mode == MODE_LLAMA_INDEX:
|
|
244
245
|
if core.config.get("llama.idx.mode") == "retrieval":
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import threading
|
|
13
13
|
from typing import Any, Dict, Optional, Union, List
|
|
14
14
|
|
|
15
|
-
from PySide6.QtCore import
|
|
15
|
+
from PySide6.QtCore import Slot
|
|
16
16
|
from PySide6.QtWidgets import QApplication
|
|
17
17
|
|
|
18
18
|
from pygpt_net.core.types import (
|
|
@@ -23,7 +23,7 @@ from pygpt_net.core.types import (
|
|
|
23
23
|
MODE_EXPERT,
|
|
24
24
|
MODE_LLAMA_INDEX,
|
|
25
25
|
)
|
|
26
|
-
from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent
|
|
26
|
+
from pygpt_net.core.events import KernelEvent, RenderEvent, BaseEvent, RealtimeEvent, Event
|
|
27
27
|
from pygpt_net.core.bridge.context import BridgeContext
|
|
28
28
|
from pygpt_net.item.ctx import CtxItem
|
|
29
29
|
from pygpt_net.utils import trans
|
|
@@ -95,6 +95,13 @@ class Kernel:
|
|
|
95
95
|
KernelEvent.INPUT_USER,
|
|
96
96
|
KernelEvent.FORCE_CALL,
|
|
97
97
|
KernelEvent.STATUS,
|
|
98
|
+
Event.AUDIO_INPUT_RECORD_TOGGLE,
|
|
99
|
+
RealtimeEvent.RT_INPUT_AUDIO_DELTA,
|
|
100
|
+
RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP,
|
|
101
|
+
RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START,
|
|
102
|
+
RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT,
|
|
103
|
+
RealtimeEvent.RT_OUTPUT_TURN_END,
|
|
104
|
+
RealtimeEvent.RT_OUTPUT_READY,
|
|
98
105
|
]
|
|
99
106
|
|
|
100
107
|
def init(self):
|
|
@@ -281,6 +288,7 @@ class Kernel:
|
|
|
281
288
|
self.window.dispatch(KernelEvent(KernelEvent.TERMINATE))
|
|
282
289
|
self.stop(exit=True)
|
|
283
290
|
self.window.controller.plugins.destroy()
|
|
291
|
+
self.window.controller.realtime.shutdown()
|
|
284
292
|
|
|
285
293
|
def stop(self, exit: bool = False):
|
|
286
294
|
"""
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.23
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import json
|
|
@@ -109,6 +109,10 @@ class Reply:
|
|
|
109
109
|
core.ctx.update_item(self.reply_ctx) # update context in db
|
|
110
110
|
self.window.update_status('...')
|
|
111
111
|
|
|
112
|
+
# append tool calls from previous context (used for tool results handling)
|
|
113
|
+
if self.reply_ctx.tool_calls:
|
|
114
|
+
prev_ctx.extra["prev_tool_calls"] = self.reply_ctx.tool_calls
|
|
115
|
+
|
|
112
116
|
# tool output append
|
|
113
117
|
dispatch(RenderEvent(RenderEvent.TOOL_UPDATE, {
|
|
114
118
|
"meta": self.reply_ctx.meta,
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .realtime import Realtime
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from pygpt_net.core.realtime.worker import RealtimeWorker, RealtimeOptions
|
|
15
|
+
from pygpt_net.item.ctx import CtxItem
|
|
16
|
+
|
|
17
|
+
class Manager:
|
|
18
|
+
"""
|
|
19
|
+
Manager that mirrors chat.stream controller shape.
|
|
20
|
+
|
|
21
|
+
Starts a RealtimeWorker and routes text events and lifecycle to the UI.
|
|
22
|
+
Audio is forwarded by the main-thread via RT_OUTPUT_AUDIO_DELTA events.
|
|
23
|
+
"""
|
|
24
|
+
def __init__(self, window=None):
|
|
25
|
+
self.window = window
|
|
26
|
+
self.worker: Optional[RealtimeWorker] = None
|
|
27
|
+
self.ctx: Optional[CtxItem] = None
|
|
28
|
+
self.provider: Optional[str] = None
|
|
29
|
+
self.opts: Optional[RealtimeOptions] = None
|
|
30
|
+
|
|
31
|
+
def start(
|
|
32
|
+
self,
|
|
33
|
+
ctx: CtxItem,
|
|
34
|
+
opts: RealtimeOptions
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Start realtime worker
|
|
38
|
+
|
|
39
|
+
:param ctx: CtxItem
|
|
40
|
+
:param opts: RealtimeOptions
|
|
41
|
+
"""
|
|
42
|
+
self.ctx = ctx
|
|
43
|
+
self.opts = opts
|
|
44
|
+
self.provider = opts.provider
|
|
45
|
+
|
|
46
|
+
worker = RealtimeWorker(self.window, ctx, opts)
|
|
47
|
+
self.worker = worker
|
|
48
|
+
self.window.core.debug.info(f"[realtime] Begin: provider={opts.provider}, model={opts.model}")
|
|
49
|
+
self.window.threadpool.start(worker)
|
|
50
|
+
|
|
51
|
+
def shutdown(self):
|
|
52
|
+
"""Shutdown realtime worker"""
|
|
53
|
+
self.worker = None
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from PySide6.QtCore import Slot, QTimer
|
|
13
|
+
|
|
14
|
+
from pygpt_net.core.events import RealtimeEvent, RenderEvent, BaseEvent, AppEvent, KernelEvent
|
|
15
|
+
from pygpt_net.core.realtime.worker import RealtimeSignals
|
|
16
|
+
from pygpt_net.core.types import MODE_AUDIO
|
|
17
|
+
from pygpt_net.utils import trans
|
|
18
|
+
from pygpt_net.core.tabs import Tab
|
|
19
|
+
|
|
20
|
+
from .manager import Manager
|
|
21
|
+
|
|
22
|
+
class Realtime:
|
|
23
|
+
def __init__(self, window=None):
|
|
24
|
+
"""
|
|
25
|
+
Realtime controller
|
|
26
|
+
|
|
27
|
+
:param window: Window instance
|
|
28
|
+
"""
|
|
29
|
+
self.window = window
|
|
30
|
+
self.manager = Manager(window)
|
|
31
|
+
self.signals = RealtimeSignals()
|
|
32
|
+
self.signals.response.connect(self.handle_response)
|
|
33
|
+
self.current_active = None # openai | google
|
|
34
|
+
self.allowed_modes = [MODE_AUDIO]
|
|
35
|
+
self.manual_commit_sent = False
|
|
36
|
+
|
|
37
|
+
def setup(self):
|
|
38
|
+
"""Setup realtime core, signals, etc. in main thread"""
|
|
39
|
+
self.window.core.audio.setup() # setup RT signals in audio input/output core
|
|
40
|
+
|
|
41
|
+
def is_enabled(self) -> bool:
|
|
42
|
+
"""
|
|
43
|
+
Check if realtime is enabled in settings
|
|
44
|
+
|
|
45
|
+
:return: True if enabled, False otherwise
|
|
46
|
+
"""
|
|
47
|
+
mode = self.window.core.config.get("mode")
|
|
48
|
+
if mode == MODE_AUDIO:
|
|
49
|
+
if self.window.controller.ui.tabs.get_current_type() != Tab.TAB_NOTEPAD:
|
|
50
|
+
return True
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
@Slot(object)
|
|
54
|
+
def handle(self, event: BaseEvent):
|
|
55
|
+
"""
|
|
56
|
+
Handle realtime event (returned from dispatcher)
|
|
57
|
+
|
|
58
|
+
:param event: RealtimeEvent instance
|
|
59
|
+
"""
|
|
60
|
+
# check if mode is supported
|
|
61
|
+
if not self.is_supported() and isinstance(event, RealtimeEvent):
|
|
62
|
+
event.stop = True # stop further propagation
|
|
63
|
+
return # ignore if not in realtime mode
|
|
64
|
+
|
|
65
|
+
# ----------------------------------------------------
|
|
66
|
+
|
|
67
|
+
# audio output chunk: send to audio output handler
|
|
68
|
+
if event.name == RealtimeEvent.RT_OUTPUT_AUDIO_DELTA:
|
|
69
|
+
self.set_idle()
|
|
70
|
+
payload = event.data.get("payload", None)
|
|
71
|
+
if payload:
|
|
72
|
+
self.window.core.audio.output.handle_realtime(payload, self.signals)
|
|
73
|
+
|
|
74
|
+
# audio input chunk: send to the active realtime client
|
|
75
|
+
elif event.name == RealtimeEvent.RT_INPUT_AUDIO_DELTA:
|
|
76
|
+
self.set_idle()
|
|
77
|
+
if self.current_active == "google":
|
|
78
|
+
self.window.core.api.google.realtime.handle_audio_input(event)
|
|
79
|
+
elif self.current_active == "openai":
|
|
80
|
+
self.window.core.api.openai.realtime.handle_audio_input(event)
|
|
81
|
+
|
|
82
|
+
# begin: first text chunk or audio chunk received, start rendering
|
|
83
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_READY:
|
|
84
|
+
ctx = event.data.get('ctx', None)
|
|
85
|
+
if ctx:
|
|
86
|
+
self.window.dispatch(RenderEvent(RenderEvent.STREAM_BEGIN, {
|
|
87
|
+
"meta": ctx.meta,
|
|
88
|
+
"ctx": ctx,
|
|
89
|
+
}))
|
|
90
|
+
self.set_busy()
|
|
91
|
+
|
|
92
|
+
# commit: audio buffer sent, stop audio input and finalize the response
|
|
93
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_COMMIT:
|
|
94
|
+
self.set_busy()
|
|
95
|
+
if self.manual_commit_sent:
|
|
96
|
+
self.manual_commit_sent = False
|
|
97
|
+
return # abort if manual commit was already sent
|
|
98
|
+
self.window.controller.audio.execute_input_stop()
|
|
99
|
+
|
|
100
|
+
elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP:
|
|
101
|
+
self.manual_commit_sent = True
|
|
102
|
+
self.set_busy()
|
|
103
|
+
QTimer.singleShot(0, lambda: self.manual_commit())
|
|
104
|
+
|
|
105
|
+
elif event.name == RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START:
|
|
106
|
+
self.set_idle()
|
|
107
|
+
self.window.controller.chat.input.execute("...", force=True)
|
|
108
|
+
self.window.dispatch(KernelEvent(KernelEvent.STATUS, {
|
|
109
|
+
'status': trans("speech.listening"),
|
|
110
|
+
}))
|
|
111
|
+
|
|
112
|
+
# text delta: append text chunk to the response
|
|
113
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_TEXT_DELTA:
|
|
114
|
+
self.set_idle()
|
|
115
|
+
ctx = event.data.get('ctx', None)
|
|
116
|
+
chunk = event.data.get('chunk', "")
|
|
117
|
+
if chunk and ctx:
|
|
118
|
+
self.window.dispatch(RenderEvent(RenderEvent.STREAM_APPEND, {
|
|
119
|
+
"meta": ctx.meta,
|
|
120
|
+
"ctx": ctx,
|
|
121
|
+
"chunk": chunk,
|
|
122
|
+
"begin": False,
|
|
123
|
+
}))
|
|
124
|
+
|
|
125
|
+
# audio end: stop audio playback
|
|
126
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_END:
|
|
127
|
+
self.set_idle()
|
|
128
|
+
self.window.controller.chat.common.unlock_input()
|
|
129
|
+
|
|
130
|
+
# end of turn: finalize the response
|
|
131
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_TURN_END:
|
|
132
|
+
self.set_idle()
|
|
133
|
+
ctx = event.data.get('ctx', None)
|
|
134
|
+
if ctx:
|
|
135
|
+
self.end_turn(ctx)
|
|
136
|
+
if self.window.controller.audio.is_recording():
|
|
137
|
+
self.window.update_status(trans("speech.listening"))
|
|
138
|
+
self.window.controller.chat.common.unlock_input()
|
|
139
|
+
|
|
140
|
+
# volume change: update volume in audio output handler
|
|
141
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_VOLUME_CHANGED:
|
|
142
|
+
volume = event.data.get("volume", 1.0)
|
|
143
|
+
self.window.controller.audio.ui.on_output_volume_change(volume)
|
|
144
|
+
|
|
145
|
+
# error: audio output error
|
|
146
|
+
elif event.name == RealtimeEvent.RT_OUTPUT_AUDIO_ERROR:
|
|
147
|
+
self.set_idle()
|
|
148
|
+
error = event.data.get("error")
|
|
149
|
+
self.window.core.debug.log(error)
|
|
150
|
+
self.window.controller.chat.common.unlock_input()
|
|
151
|
+
|
|
152
|
+
# -----------------------------------
|
|
153
|
+
|
|
154
|
+
# app events, always handled
|
|
155
|
+
elif event.name == AppEvent.MODE_SELECTED:
|
|
156
|
+
mode = self.window.core.config.get("mode")
|
|
157
|
+
if mode != MODE_AUDIO:
|
|
158
|
+
QTimer.singleShot(0, lambda: self.reset())
|
|
159
|
+
|
|
160
|
+
elif event.name == AppEvent.CTX_CREATED:
|
|
161
|
+
QTimer.singleShot(0, lambda: self.reset())
|
|
162
|
+
|
|
163
|
+
elif event.name == AppEvent.CTX_SELECTED:
|
|
164
|
+
QTimer.singleShot(0, lambda: self.reset())
|
|
165
|
+
|
|
166
|
+
@Slot(object)
|
|
167
|
+
def handle_response(self, event: RealtimeEvent):
|
|
168
|
+
"""
|
|
169
|
+
Handle response event (send to kernel -> dispatcher)
|
|
170
|
+
|
|
171
|
+
:param event: RealtimeEvent instance
|
|
172
|
+
"""
|
|
173
|
+
self.window.controller.kernel.listener(event)
|
|
174
|
+
|
|
175
|
+
def is_auto_turn(self) -> bool:
|
|
176
|
+
"""
|
|
177
|
+
Check if auto-turn is enabled
|
|
178
|
+
|
|
179
|
+
:return: True if auto-turn is enabled, False otherwise
|
|
180
|
+
"""
|
|
181
|
+
return self.window.core.config.get("audio.input.auto_turn", True)
|
|
182
|
+
|
|
183
|
+
def manual_commit(self):
|
|
184
|
+
"""Manually commit the response (end of turn)"""
|
|
185
|
+
if self.current_active == "google":
|
|
186
|
+
self.window.core.api.google.realtime.manual_commit()
|
|
187
|
+
elif self.current_active == "openai":
|
|
188
|
+
self.window.core.api.openai.realtime.manual_commit()
|
|
189
|
+
|
|
190
|
+
def end_turn(self, ctx):
|
|
191
|
+
"""
|
|
192
|
+
End of realtime turn - finalize the response
|
|
193
|
+
|
|
194
|
+
:param ctx: Context instance
|
|
195
|
+
"""
|
|
196
|
+
self.set_idle()
|
|
197
|
+
if not ctx:
|
|
198
|
+
return
|
|
199
|
+
self.window.controller.chat.output.handle_after(
|
|
200
|
+
ctx=ctx,
|
|
201
|
+
mode=MODE_AUDIO,
|
|
202
|
+
stream=True,
|
|
203
|
+
)
|
|
204
|
+
self.window.controller.chat.output.post_handle(
|
|
205
|
+
ctx=ctx,
|
|
206
|
+
mode=MODE_AUDIO,
|
|
207
|
+
stream=True,
|
|
208
|
+
)
|
|
209
|
+
self.window.controller.chat.output.handle_end(
|
|
210
|
+
ctx=ctx,
|
|
211
|
+
mode=MODE_AUDIO,
|
|
212
|
+
)
|
|
213
|
+
self.window.controller.chat.common.show_response_tokens(ctx)
|
|
214
|
+
|
|
215
|
+
def shutdown(self):
|
|
216
|
+
"""Shutdown all realtime threads and async loops"""
|
|
217
|
+
try:
|
|
218
|
+
self.window.core.api.openai.realtime.shutdown()
|
|
219
|
+
except Exception as e:
|
|
220
|
+
self.window.core.debug.log(f"[openai] Realtime shutdown error: {e}")
|
|
221
|
+
try:
|
|
222
|
+
self.window.core.api.google.realtime.shutdown()
|
|
223
|
+
except Exception as e:
|
|
224
|
+
self.window.core.debug.log(f"[google] Realtime shutdown error: {e}")
|
|
225
|
+
try:
|
|
226
|
+
self.manager.shutdown()
|
|
227
|
+
except Exception as e:
|
|
228
|
+
self.window.core.debug.log(f"[manager] Realtime shutdown error: {e}")
|
|
229
|
+
|
|
230
|
+
def reset(self):
|
|
231
|
+
"""Reset realtime session"""
|
|
232
|
+
try:
|
|
233
|
+
self.window.core.api.openai.realtime.reset()
|
|
234
|
+
except Exception as e:
|
|
235
|
+
self.window.core.debug.log(f"[openai] Realtime reset error: {e}")
|
|
236
|
+
try:
|
|
237
|
+
self.window.core.api.google.realtime.reset()
|
|
238
|
+
except Exception as e:
|
|
239
|
+
self.window.core.debug.log(f"[google] Realtime reset error: {e}")
|
|
240
|
+
|
|
241
|
+
def is_supported(self) -> bool:
|
|
242
|
+
"""
|
|
243
|
+
Check if current mode supports realtime
|
|
244
|
+
|
|
245
|
+
:return: True if mode supports realtime, False otherwise
|
|
246
|
+
"""
|
|
247
|
+
mode = self.window.core.config.get("mode")
|
|
248
|
+
return mode in self.allowed_modes
|
|
249
|
+
|
|
250
|
+
def set_current_active(self, provider: str):
|
|
251
|
+
"""
|
|
252
|
+
Set the current active realtime provider
|
|
253
|
+
|
|
254
|
+
:param provider: Provider name (openai, google)
|
|
255
|
+
"""
|
|
256
|
+
self.current_active = provider.lower() if provider else None
|
|
257
|
+
|
|
258
|
+
def set_idle(self):
|
|
259
|
+
"""Set kernel state to IDLE"""
|
|
260
|
+
QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_IDLE, {
|
|
261
|
+
"id": "realtime",
|
|
262
|
+
})))
|
|
263
|
+
|
|
264
|
+
def set_busy(self):
|
|
265
|
+
"""Set kernel state to BUSY"""
|
|
266
|
+
QTimer.singleShot(0, lambda: self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {
|
|
267
|
+
"id": "realtime",
|
|
268
|
+
})))
|
pygpt_net/controller/ui/mode.py
CHANGED
|
@@ -20,6 +20,7 @@ from pygpt_net.core.types import (
|
|
|
20
20
|
MODE_COMPUTER,
|
|
21
21
|
MODE_AGENT_OPENAI,
|
|
22
22
|
MODE_COMPLETION,
|
|
23
|
+
MODE_AUDIO,
|
|
23
24
|
)
|
|
24
25
|
from pygpt_net.core.tabs.tab import Tab
|
|
25
26
|
from pygpt_net.core.events import Event
|
|
@@ -55,6 +56,12 @@ class Mode:
|
|
|
55
56
|
is_image = mode == MODE_IMAGE
|
|
56
57
|
is_llama_index = mode == MODE_LLAMA_INDEX
|
|
57
58
|
is_completion = mode == MODE_COMPLETION
|
|
59
|
+
is_audio = mode == MODE_AUDIO
|
|
60
|
+
|
|
61
|
+
if not is_audio:
|
|
62
|
+
self.window.ui.nodes['audio.auto_turn'].setVisible(False)
|
|
63
|
+
else:
|
|
64
|
+
self.window.ui.nodes['audio.auto_turn'].setVisible(True)
|
|
58
65
|
|
|
59
66
|
if not is_assistant:
|
|
60
67
|
ui_nodes['presets.widget'].setVisible(True)
|
pygpt_net/controller/ui/ui.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Optional
|
|
|
13
13
|
|
|
14
14
|
from PySide6.QtGui import QColor
|
|
15
15
|
|
|
16
|
+
from pygpt_net.core.types import MODE_IMAGE
|
|
16
17
|
from pygpt_net.core.events import BaseEvent, Event
|
|
17
18
|
from pygpt_net.utils import trans
|
|
18
19
|
|
|
@@ -64,6 +65,7 @@ class UI:
|
|
|
64
65
|
self.update_tokens()
|
|
65
66
|
self.vision.update()
|
|
66
67
|
self.window.controller.agent.legacy.update()
|
|
68
|
+
self.img_update_available_resolutions()
|
|
67
69
|
|
|
68
70
|
def handle(self, event: BaseEvent):
|
|
69
71
|
"""
|
|
@@ -215,4 +217,20 @@ class UI:
|
|
|
215
217
|
def on_global_stop(self):
|
|
216
218
|
"""Global stop button action"""
|
|
217
219
|
if self.stop_action == "idx":
|
|
218
|
-
self.window.controller.idx.force_stop()
|
|
220
|
+
self.window.controller.idx.force_stop()
|
|
221
|
+
|
|
222
|
+
def img_update_available_resolutions(self):
|
|
223
|
+
"""Update available resolutions for images"""
|
|
224
|
+
mode = self.window.core.config.get('mode')
|
|
225
|
+
if mode != MODE_IMAGE:
|
|
226
|
+
return
|
|
227
|
+
model = self.window.core.config.get('model')
|
|
228
|
+
keys = self.window.core.image.get_available_resolutions(model)
|
|
229
|
+
current = self.window.core.config.get('img_resolution', '1024x1024')
|
|
230
|
+
self.window.ui.config['global']['img_resolution'].set_keys(keys, lock=False)
|
|
231
|
+
self.window.controller.config.apply_value(
|
|
232
|
+
parent_id="global",
|
|
233
|
+
key="img_resolution",
|
|
234
|
+
option=self.window.core.image.get_resolution_option(),
|
|
235
|
+
value=current,
|
|
236
|
+
)
|
pygpt_net/core/audio/audio.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import hashlib
|
|
@@ -42,6 +42,11 @@ class Audio:
|
|
|
42
42
|
}
|
|
43
43
|
self.last_error = None
|
|
44
44
|
|
|
45
|
+
def setup(self):
|
|
46
|
+
"""Initialize audio core"""
|
|
47
|
+
self.capture.setup()
|
|
48
|
+
self.output.setup()
|
|
49
|
+
|
|
45
50
|
def get_input_devices(self) -> List[Tuple[int, str]]:
|
|
46
51
|
"""
|
|
47
52
|
Get input devices
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .native import NativeBackend
|