pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import Optional, Dict, Any
|
|
14
|
+
|
|
15
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
16
|
+
from pygpt_net.core.realtime.options import RealtimeOptions
|
|
17
|
+
from pygpt_net.core.bridge.context import BridgeContext
|
|
18
|
+
from pygpt_net.core.realtime.shared.session import extract_last_session_id
|
|
19
|
+
from pygpt_net.item.model import ModelItem
|
|
20
|
+
|
|
21
|
+
from .client import GoogleLiveClient
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Realtime:
|
|
25
|
+
|
|
26
|
+
PROVIDER = "google"
|
|
27
|
+
|
|
28
|
+
def __init__(self, window=None):
|
|
29
|
+
"""
|
|
30
|
+
Google GenAI API realtime controller
|
|
31
|
+
|
|
32
|
+
:param window: Window instance
|
|
33
|
+
"""
|
|
34
|
+
self.window = window
|
|
35
|
+
self.handler = GoogleLiveClient(window)
|
|
36
|
+
self.prev_auto_turn = False
|
|
37
|
+
self.prev_vad_silence = 2000
|
|
38
|
+
self.prev_vad_prefix = 300
|
|
39
|
+
|
|
40
|
+
def begin(
|
|
41
|
+
self,
|
|
42
|
+
context: BridgeContext,
|
|
43
|
+
model: Optional[ModelItem] = None,
|
|
44
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
45
|
+
rt_signals=None
|
|
46
|
+
) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Begin realtime session if applicable
|
|
49
|
+
|
|
50
|
+
:param context: BridgeContext
|
|
51
|
+
:param model: Optional[ModelItem]
|
|
52
|
+
:param extra: Optional dict with extra parameters
|
|
53
|
+
:param rt_signals: Optional RealtimeSignals
|
|
54
|
+
:return: bool - True if realtime session started, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
# Build realtime options
|
|
57
|
+
mm = context.multimodal_ctx
|
|
58
|
+
audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
|
|
59
|
+
audio_format = getattr(mm, "audio_format", None) if mm else None
|
|
60
|
+
audio_rate = getattr(mm, "audio_rate", None) if mm else None
|
|
61
|
+
is_debug = self.window.core.config.get("log.realtime", False)
|
|
62
|
+
auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
|
|
63
|
+
opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
|
|
64
|
+
opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
|
|
65
|
+
|
|
66
|
+
# setup manager
|
|
67
|
+
self.window.controller.realtime.set_current_active(self.PROVIDER)
|
|
68
|
+
self.window.controller.realtime.set_busy()
|
|
69
|
+
self.handler.set_debug(is_debug)
|
|
70
|
+
|
|
71
|
+
# handle sub-reply (tool results from tool calls)
|
|
72
|
+
if context.ctx.internal:
|
|
73
|
+
if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
|
|
74
|
+
tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
|
|
75
|
+
tool_call_id = None
|
|
76
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
77
|
+
tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
|
|
78
|
+
if not tool_call_id:
|
|
79
|
+
tool_call_id = tool_calls[0].get("id", "") # fallback to id
|
|
80
|
+
if tool_call_id:
|
|
81
|
+
tool_results = context.ctx.input
|
|
82
|
+
try:
|
|
83
|
+
tool_results = json.loads(tool_results)
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
self.handler.send_tool_results_sync({
|
|
87
|
+
tool_call_id: tool_results
|
|
88
|
+
})
|
|
89
|
+
return True # do not start new session, just send tool results
|
|
90
|
+
|
|
91
|
+
# update auto-turn in active session
|
|
92
|
+
if (self.handler.is_session_active()
|
|
93
|
+
and (auto_turn != self.prev_auto_turn
|
|
94
|
+
or opt_vad_silence != self.prev_vad_silence
|
|
95
|
+
or opt_vad_prefix != self.prev_vad_prefix)):
|
|
96
|
+
self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
|
|
97
|
+
|
|
98
|
+
# Tools
|
|
99
|
+
tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
|
|
100
|
+
remote_tools = self.window.core.api.google.build_remote_tools(model)
|
|
101
|
+
if tools:
|
|
102
|
+
remote_tools = [] # in Google, remote tools are not allowed if function calling is used
|
|
103
|
+
|
|
104
|
+
# if auto-turn is enabled and prompt is empty, update session and context only
|
|
105
|
+
if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
|
|
106
|
+
self.handler.update_session_tools_sync(tools, remote_tools)
|
|
107
|
+
self.handler.update_ctx(context.ctx)
|
|
108
|
+
return True # do not send new request if session is active
|
|
109
|
+
|
|
110
|
+
# Last session ID
|
|
111
|
+
last_session_id = extract_last_session_id(context.history)
|
|
112
|
+
if is_debug:
|
|
113
|
+
print("[realtime session] Last ID", last_session_id)
|
|
114
|
+
|
|
115
|
+
# Voice
|
|
116
|
+
voice_name = "Kore"
|
|
117
|
+
try:
|
|
118
|
+
v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
|
|
119
|
+
if v:
|
|
120
|
+
mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
|
|
121
|
+
"legend": "Legend"}
|
|
122
|
+
voice_name = mapping.get(str(v).strip().lower(), str(v))
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# Options
|
|
127
|
+
opts = RealtimeOptions(
|
|
128
|
+
provider=self.PROVIDER,
|
|
129
|
+
model=model.id,
|
|
130
|
+
system_prompt=context.system_prompt,
|
|
131
|
+
prompt=context.prompt,
|
|
132
|
+
voice=voice_name,
|
|
133
|
+
audio_data=audio_bytes,
|
|
134
|
+
audio_format=audio_format,
|
|
135
|
+
audio_rate=audio_rate,
|
|
136
|
+
vad=None,
|
|
137
|
+
extra=extra or {},
|
|
138
|
+
tools=tools,
|
|
139
|
+
remote_tools=remote_tools,
|
|
140
|
+
rt_signals=rt_signals,
|
|
141
|
+
rt_session_id=last_session_id,
|
|
142
|
+
auto_turn=auto_turn,
|
|
143
|
+
vad_end_silence_ms=opt_vad_silence,
|
|
144
|
+
vad_prefix_padding_ms=opt_vad_prefix,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Start or append to realtime session via manager
|
|
148
|
+
try:
|
|
149
|
+
if is_debug:
|
|
150
|
+
print("[realtime] Starting session with options:", opts.to_dict())
|
|
151
|
+
rt = self.window.controller.realtime.manager
|
|
152
|
+
rt.start(context.ctx, opts)
|
|
153
|
+
|
|
154
|
+
self.prev_auto_turn = auto_turn
|
|
155
|
+
self.prev_vad_silence = opt_vad_silence
|
|
156
|
+
self.prev_vad_prefix = opt_vad_prefix
|
|
157
|
+
return True
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.window.core.debug.log(e)
|
|
160
|
+
return False # fallback to non-live path
|
|
161
|
+
|
|
162
|
+
def handle_audio_input(self, event: RealtimeEvent):
|
|
163
|
+
"""
|
|
164
|
+
Handle Realtime audio input event
|
|
165
|
+
|
|
166
|
+
:param event: RealtimeEvent
|
|
167
|
+
"""
|
|
168
|
+
self.handler.rt_handle_audio_input_sync(event)
|
|
169
|
+
|
|
170
|
+
def manual_commit(self):
|
|
171
|
+
"""Manually commit audio input to realtime session"""
|
|
172
|
+
self.handler.force_response_now_sync()
|
|
173
|
+
|
|
174
|
+
def shutdown(self):
|
|
175
|
+
"""Shutdown realtime loops"""
|
|
176
|
+
if self.handler.is_session_active():
|
|
177
|
+
self.handler.close_session_sync()
|
|
178
|
+
try:
|
|
179
|
+
self.handler.stop_loop_sync()
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
def reset(self):
|
|
184
|
+
"""Close realtime session"""
|
|
185
|
+
if self.handler.is_session_active():
|
|
186
|
+
self.handler.close_session_sync()
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from openai import OpenAI
|
|
@@ -33,6 +33,7 @@ from .container import Container
|
|
|
33
33
|
from .image import Image
|
|
34
34
|
from .remote_tools import RemoteTools
|
|
35
35
|
from .responses import Responses
|
|
36
|
+
from .realtime import Realtime
|
|
36
37
|
from .store import Store
|
|
37
38
|
from .summarizer import Summarizer
|
|
38
39
|
from .tools import Tools
|
|
@@ -57,6 +58,7 @@ class ApiOpenAI:
|
|
|
57
58
|
self.image = Image(window)
|
|
58
59
|
self.remote_tools = RemoteTools(window)
|
|
59
60
|
self.responses = Responses(window)
|
|
61
|
+
self.realtime = Realtime(window)
|
|
60
62
|
self.store = Store(window)
|
|
61
63
|
self.summarizer = Summarizer(window)
|
|
62
64
|
self.tools = Tools(window)
|
|
@@ -90,12 +92,18 @@ class ApiOpenAI:
|
|
|
90
92
|
self.last_client_args = args
|
|
91
93
|
return self.client
|
|
92
94
|
|
|
93
|
-
def call(
|
|
95
|
+
def call(
|
|
96
|
+
self,
|
|
97
|
+
context: BridgeContext,
|
|
98
|
+
extra: dict = None,
|
|
99
|
+
rt_signals = None
|
|
100
|
+
) -> bool:
|
|
94
101
|
"""
|
|
95
102
|
Call OpenAI API
|
|
96
103
|
|
|
97
104
|
:param context: Bridge context
|
|
98
105
|
:param extra: Extra arguments
|
|
106
|
+
:param rt_signals: Realtime signals for audio streaming
|
|
99
107
|
:return: result
|
|
100
108
|
"""
|
|
101
109
|
mode = context.mode
|
|
@@ -145,6 +153,18 @@ class ApiOpenAI:
|
|
|
145
153
|
MODE_RESEARCH,
|
|
146
154
|
MODE_COMPUTER,
|
|
147
155
|
]:
|
|
156
|
+
if mode == MODE_AUDIO and stream:
|
|
157
|
+
|
|
158
|
+
# Realtime API for audio streaming
|
|
159
|
+
is_realtime = self.realtime.begin(
|
|
160
|
+
context=context,
|
|
161
|
+
model=model,
|
|
162
|
+
extra=extra or {},
|
|
163
|
+
rt_signals=rt_signals
|
|
164
|
+
)
|
|
165
|
+
if is_realtime:
|
|
166
|
+
return True
|
|
167
|
+
|
|
148
168
|
# responses API
|
|
149
169
|
if use_responses_api:
|
|
150
170
|
response = self.responses.send(
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .realtime import Realtime
|