pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +8 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +29 -3
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/data/config/config.json +10 -4
- pygpt_net/data/config/models.json +149 -103
- pygpt_net/data/config/settings.json +50 -0
- pygpt_net/data/locale/locale.de.ini +5 -5
- pygpt_net/data/locale/locale.en.ini +19 -13
- pygpt_net/data/locale/locale.es.ini +5 -5
- pygpt_net/data/locale/locale.fr.ini +5 -5
- pygpt_net/data/locale/locale.it.ini +5 -5
- pygpt_net/data/locale/locale.pl.ini +5 -5
- pygpt_net/data/locale/locale.uk.ini +5 -5
- pygpt_net/data/locale/locale.zh.ini +1 -1
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +39 -6
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +15 -0
- pygpt_net/provider/core/model/patch.py +11 -0
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -15,6 +15,10 @@ eleven_labs_voice.description = Specify the Voice ID.
|
|
|
15
15
|
eleven_labs_voice.label = Voice ID
|
|
16
16
|
google_api_key.description = You can obtain your own API key here: https://console.cloud.google.com/apis/library/texttospeech.googleapis.com
|
|
17
17
|
google_api_key.label = Google Cloud Text-to-speech API Key
|
|
18
|
+
google_genai_tts_model.description = Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts
|
|
19
|
+
google_genai_tts_model.label = Model
|
|
20
|
+
google_genai_tts_voice.description = Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)
|
|
21
|
+
google_genai_tts_voice.label = Voice
|
|
18
22
|
google_lang.description = Specify the language code.
|
|
19
23
|
google_lang.label = Language code
|
|
20
24
|
google_voice.description = Specify the voice.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date:
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -23,6 +23,7 @@ from pygpt_net.utils import trans
|
|
|
23
23
|
from .config import Config
|
|
24
24
|
from .worker import Worker
|
|
25
25
|
from .simple import Simple
|
|
26
|
+
from ...core.types import MODE_AUDIO
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class Plugin(BasePlugin):
|
|
@@ -124,13 +125,31 @@ class Plugin(BasePlugin):
|
|
|
124
125
|
words = [x.strip() for x in words] # remove white-spaces
|
|
125
126
|
return words
|
|
126
127
|
|
|
127
|
-
def toggle_recording_simple(
|
|
128
|
+
def toggle_recording_simple(
|
|
129
|
+
self,
|
|
130
|
+
state: bool = None,
|
|
131
|
+
auto: bool = False
|
|
132
|
+
):
|
|
128
133
|
"""
|
|
129
134
|
Event: AUDIO_INPUT_RECORD_TOGGLE
|
|
130
135
|
|
|
131
136
|
Toggle recording
|
|
137
|
+
|
|
138
|
+
:param state: state to set
|
|
139
|
+
:param auto: True if called automatically (not by user)
|
|
140
|
+
"""
|
|
141
|
+
if self.window.controller.realtime.is_enabled():
|
|
142
|
+
self.handler_simple.toggle_realtime(state=state, auto=auto)
|
|
143
|
+
return
|
|
144
|
+
self.handler_simple.toggle_recording(state=state)
|
|
145
|
+
|
|
146
|
+
def is_recording(self) -> bool:
|
|
132
147
|
"""
|
|
133
|
-
|
|
148
|
+
Check if is recording (simple mode)
|
|
149
|
+
|
|
150
|
+
:return: True if is recording
|
|
151
|
+
"""
|
|
152
|
+
return self.handler_simple.is_recording
|
|
134
153
|
|
|
135
154
|
def toggle_speech(self, state: bool):
|
|
136
155
|
"""
|
|
@@ -214,7 +233,9 @@ class Plugin(BasePlugin):
|
|
|
214
233
|
self.toggle_speech(data['value'])
|
|
215
234
|
|
|
216
235
|
elif name == Event.AUDIO_INPUT_RECORD_TOGGLE:
|
|
217
|
-
|
|
236
|
+
state = data['state'] if 'value' in data else None
|
|
237
|
+
auto = data['auto'] if 'auto' in data else False
|
|
238
|
+
self.toggle_recording_simple(state=state, auto=auto)
|
|
218
239
|
|
|
219
240
|
elif name == Event.AUDIO_INPUT_STOP:
|
|
220
241
|
self.on_stop()
|
|
@@ -492,6 +513,18 @@ class Plugin(BasePlugin):
|
|
|
492
513
|
self.window.dispatch(event) # send text, input clear in send method
|
|
493
514
|
self.set_status('')
|
|
494
515
|
|
|
516
|
+
def handle_realtime_stopped(self):
|
|
517
|
+
"""Handle realtime stopped"""
|
|
518
|
+
context = BridgeContext()
|
|
519
|
+
context.prompt = "..."
|
|
520
|
+
extra = {}
|
|
521
|
+
event = KernelEvent(KernelEvent.INPUT_SYSTEM, {
|
|
522
|
+
'context': context,
|
|
523
|
+
'extra': extra,
|
|
524
|
+
})
|
|
525
|
+
self.window.dispatch(event) # send text, input clear in send method
|
|
526
|
+
self.set_status('')
|
|
527
|
+
|
|
495
528
|
@Slot(object)
|
|
496
529
|
def handle_status(self, data: str):
|
|
497
530
|
"""
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
13
13
|
|
|
14
14
|
from PySide6.QtCore import QTimer
|
|
15
15
|
|
|
16
|
-
from pygpt_net.core.events import AppEvent
|
|
16
|
+
from pygpt_net.core.events import AppEvent, RealtimeEvent
|
|
17
17
|
from pygpt_net.core.tabs.tab import Tab
|
|
18
18
|
from pygpt_net.utils import trans
|
|
19
19
|
|
|
@@ -32,8 +32,46 @@ class Simple:
|
|
|
32
32
|
self.is_recording = False
|
|
33
33
|
self.timer = None
|
|
34
34
|
|
|
35
|
-
def
|
|
36
|
-
|
|
35
|
+
def toggle_realtime(
|
|
36
|
+
self,
|
|
37
|
+
state: bool = None,
|
|
38
|
+
auto: bool = False
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Toggle recording
|
|
42
|
+
|
|
43
|
+
:param state: True to start recording, False to stop recording, None to toggle
|
|
44
|
+
:param auto: True if called automatically (not by user)
|
|
45
|
+
"""
|
|
46
|
+
if state is not None:
|
|
47
|
+
if state and not self.is_recording:
|
|
48
|
+
self.start_recording(realtime=True)
|
|
49
|
+
elif not state:
|
|
50
|
+
self.force_stop()
|
|
51
|
+
else:
|
|
52
|
+
self.force_stop()
|
|
53
|
+
return
|
|
54
|
+
if self.is_recording:
|
|
55
|
+
self.stop_recording(realtime=True)
|
|
56
|
+
if not auto:
|
|
57
|
+
self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP))
|
|
58
|
+
else:
|
|
59
|
+
self.start_recording(realtime=True)
|
|
60
|
+
if not auto:
|
|
61
|
+
self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START))
|
|
62
|
+
|
|
63
|
+
def toggle_recording(self, state: bool = None):
|
|
64
|
+
"""
|
|
65
|
+
Toggle recording
|
|
66
|
+
|
|
67
|
+
:param state: True to start recording, False to stop recording, None to toggle
|
|
68
|
+
"""
|
|
69
|
+
if state is not None:
|
|
70
|
+
if state and not self.is_recording:
|
|
71
|
+
self.start_recording()
|
|
72
|
+
elif not state:
|
|
73
|
+
self.force_stop()
|
|
74
|
+
return
|
|
37
75
|
if self.is_recording:
|
|
38
76
|
self.stop_recording()
|
|
39
77
|
else:
|
|
@@ -51,11 +89,12 @@ class Simple:
|
|
|
51
89
|
"""Stop timeout"""
|
|
52
90
|
self.stop_recording(timeout=True)
|
|
53
91
|
|
|
54
|
-
def start_recording(self, force: bool = False):
|
|
92
|
+
def start_recording(self, force: bool = False, realtime: bool = False):
|
|
55
93
|
"""
|
|
56
94
|
Start recording
|
|
57
95
|
|
|
58
96
|
:param force: True to force recording
|
|
97
|
+
:param realtime: True if called from realtime callback
|
|
59
98
|
"""
|
|
60
99
|
# display snap warning if not displayed yet
|
|
61
100
|
if (not self.plugin.window.core.config.get("audio.input.snap", False)
|
|
@@ -89,7 +128,7 @@ class Simple:
|
|
|
89
128
|
# disable in continuous mode
|
|
90
129
|
timeout = int(self.plugin.window.core.config.get('audio.input.timeout', 120) or 0) # get timeout
|
|
91
130
|
timeout_continuous = self.plugin.window.core.config.get('audio.input.timeout.continuous', False) # enable continuous timeout
|
|
92
|
-
if timeout > 0:
|
|
131
|
+
if timeout > 0 and not realtime:
|
|
93
132
|
if self.timer is None and (not continuous_enabled or timeout_continuous):
|
|
94
133
|
self.timer = QTimer()
|
|
95
134
|
self.timer.timeout.connect(self.stop_timeout)
|
|
@@ -119,11 +158,12 @@ class Simple:
|
|
|
119
158
|
)
|
|
120
159
|
self.switch_btn_start() # switch button to start
|
|
121
160
|
|
|
122
|
-
def stop_recording(self, timeout: bool = False):
|
|
161
|
+
def stop_recording(self, timeout: bool = False, realtime: bool = False):
|
|
123
162
|
"""
|
|
124
163
|
Stop recording
|
|
125
164
|
|
|
126
165
|
:param timeout: True if stopped due to timeout
|
|
166
|
+
:param realtime: True if called from realtime callback
|
|
127
167
|
"""
|
|
128
168
|
self.plugin.window.core.audio.capture.reset_audio_level()
|
|
129
169
|
self.is_recording = False
|
|
@@ -143,7 +183,7 @@ class Simple:
|
|
|
143
183
|
return
|
|
144
184
|
|
|
145
185
|
if self.plugin.window.core.audio.capture.has_frames():
|
|
146
|
-
if not self.plugin.window.core.audio.capture.has_min_frames():
|
|
186
|
+
if not self.plugin.window.core.audio.capture.has_min_frames() and not realtime:
|
|
147
187
|
self.plugin.window.update_status(trans("status.audio.too_short"))
|
|
148
188
|
self.plugin.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED)) # app event
|
|
149
189
|
return
|
|
@@ -152,6 +192,15 @@ class Simple:
|
|
|
152
192
|
else:
|
|
153
193
|
self.plugin.window.update_status("")
|
|
154
194
|
|
|
195
|
+
def force_stop(self):
|
|
196
|
+
"""Stop recording"""
|
|
197
|
+
self.is_recording = False
|
|
198
|
+
self.plugin.window.dispatch(AppEvent(AppEvent.INPUT_VOICE_LISTEN_STOPPED)) # app event
|
|
199
|
+
self.switch_btn_start() # switch button to start
|
|
200
|
+
if self.plugin.window.core.audio.capture.has_source():
|
|
201
|
+
self.plugin.window.core.audio.capture.stop() # stop recording
|
|
202
|
+
return
|
|
203
|
+
|
|
155
204
|
def on_stop(self):
|
|
156
205
|
"""Handle auto-transcribe"""
|
|
157
206
|
path = os.path.join(self.plugin.window.core.config.path, self.plugin.input_file)
|
|
@@ -6,13 +6,14 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import Optional, Dict, Any
|
|
13
13
|
|
|
14
14
|
from google.genai import types as gtypes
|
|
15
15
|
from google import genai
|
|
16
|
+
|
|
16
17
|
from pygpt_net.core.types import (
|
|
17
18
|
MODE_ASSISTANT,
|
|
18
19
|
MODE_AUDIO,
|
|
@@ -29,7 +30,7 @@ from .vision import Vision
|
|
|
29
30
|
from .tools import Tools
|
|
30
31
|
from .audio import Audio
|
|
31
32
|
from .image import Image
|
|
32
|
-
|
|
33
|
+
from .realtime import Realtime
|
|
33
34
|
|
|
34
35
|
class ApiGoogle:
|
|
35
36
|
def __init__(self, window=None):
|
|
@@ -44,6 +45,7 @@ class ApiGoogle:
|
|
|
44
45
|
self.tools = Tools(window)
|
|
45
46
|
self.audio = Audio(window)
|
|
46
47
|
self.image = Image(window)
|
|
48
|
+
self.realtime = Realtime(window)
|
|
47
49
|
self.client: Optional[genai.Client] = None
|
|
48
50
|
self.locked = False
|
|
49
51
|
self.last_client_args: Optional[Dict[str, Any]] = None
|
|
@@ -72,12 +74,18 @@ class ApiGoogle:
|
|
|
72
74
|
self.last_client_args = filtered
|
|
73
75
|
return self.client
|
|
74
76
|
|
|
75
|
-
def call(
|
|
77
|
+
def call(
|
|
78
|
+
self,
|
|
79
|
+
context: BridgeContext,
|
|
80
|
+
extra: dict = None,
|
|
81
|
+
rt_signals = None
|
|
82
|
+
) -> bool:
|
|
76
83
|
"""
|
|
77
84
|
Make an API call to Google GenAI
|
|
78
85
|
|
|
79
86
|
:param context: BridgeContext
|
|
80
87
|
:param extra: Extra parameters
|
|
88
|
+
:param rt_signals: Realtime signals for audio streaming
|
|
81
89
|
:return: True if successful, False otherwise
|
|
82
90
|
"""
|
|
83
91
|
mode = context.mode
|
|
@@ -94,6 +102,18 @@ class ApiGoogle:
|
|
|
94
102
|
response = None
|
|
95
103
|
|
|
96
104
|
if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
|
|
105
|
+
|
|
106
|
+
# Live API for audio streaming
|
|
107
|
+
if mode == MODE_AUDIO and stream:
|
|
108
|
+
is_realtime = self.realtime.begin(
|
|
109
|
+
context=context,
|
|
110
|
+
model=model,
|
|
111
|
+
extra=extra or {},
|
|
112
|
+
rt_signals=rt_signals
|
|
113
|
+
)
|
|
114
|
+
if is_realtime:
|
|
115
|
+
return True
|
|
116
|
+
|
|
97
117
|
response = self.chat.send(context=context, extra=extra)
|
|
98
118
|
used_tokens = self.chat.get_used_tokens()
|
|
99
119
|
if ctx:
|
|
@@ -135,7 +155,11 @@ class ApiGoogle:
|
|
|
135
155
|
pass
|
|
136
156
|
return True
|
|
137
157
|
|
|
138
|
-
def quick_call(
|
|
158
|
+
def quick_call(
|
|
159
|
+
self,
|
|
160
|
+
context: BridgeContext,
|
|
161
|
+
extra: dict = None
|
|
162
|
+
) -> str:
|
|
139
163
|
"""
|
|
140
164
|
Make a quick API call to Google GenAI and return the output text
|
|
141
165
|
|
|
@@ -206,9 +230,9 @@ class ApiGoogle:
|
|
|
206
230
|
def build_remote_tools(self, model: ModelItem = None) -> list:
|
|
207
231
|
"""
|
|
208
232
|
Build Google GenAI remote tools based on config flags.
|
|
209
|
-
-
|
|
233
|
+
- remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
|
|
210
234
|
or GoogleSearchRetrieval (Gemini 1.5 fallback).
|
|
211
|
-
-
|
|
235
|
+
- remote_tools.google.code_interpreter: enables code execution tool.
|
|
212
236
|
|
|
213
237
|
Returns a list of gtypes.Tool objects (can be empty).
|
|
214
238
|
|
|
@@ -242,6 +266,15 @@ class ApiGoogle:
|
|
|
242
266
|
except Exception as e:
|
|
243
267
|
self.window.core.debug.log(e)
|
|
244
268
|
|
|
269
|
+
# URL Context tool
|
|
270
|
+
if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
|
|
271
|
+
try:
|
|
272
|
+
# Supported on Gemini 2.x+ models (not on 1.5)
|
|
273
|
+
if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
|
|
274
|
+
tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
|
|
275
|
+
except Exception as e:
|
|
276
|
+
self.window.core.debug.log(e)
|
|
277
|
+
|
|
245
278
|
return tools
|
|
246
279
|
|
|
247
280
|
|
|
@@ -24,6 +24,8 @@ class Audio:
|
|
|
24
24
|
Audio helpers for Google GenAI.
|
|
25
25
|
- Build audio input parts for requests
|
|
26
26
|
- Convert Google PCM output to WAV (base64) for UI compatibility
|
|
27
|
+
|
|
28
|
+
:param window: Window instance
|
|
27
29
|
"""
|
|
28
30
|
self.window = window
|
|
29
31
|
|
|
@@ -103,7 +105,12 @@ class Audio:
|
|
|
103
105
|
|
|
104
106
|
@staticmethod
|
|
105
107
|
def _ensure_bytes(data) -> Optional[bytes]:
|
|
106
|
-
"""
|
|
108
|
+
"""
|
|
109
|
+
Return raw bytes from inline_data.data (bytes or base64 string).
|
|
110
|
+
|
|
111
|
+
:param data: bytes or base64 string
|
|
112
|
+
:return: bytes or None
|
|
113
|
+
"""
|
|
107
114
|
try:
|
|
108
115
|
if isinstance(data, (bytes, bytearray)):
|
|
109
116
|
return bytes(data)
|
|
@@ -29,9 +29,17 @@ class Chat:
|
|
|
29
29
|
self.window = window
|
|
30
30
|
self.input_tokens = 0
|
|
31
31
|
|
|
32
|
-
def send(
|
|
32
|
+
def send(
|
|
33
|
+
self,
|
|
34
|
+
context: BridgeContext,
|
|
35
|
+
extra: Optional[Dict[str, Any]] = None
|
|
36
|
+
):
|
|
33
37
|
"""
|
|
34
38
|
Call Google GenAI for chat / multimodal / audio.
|
|
39
|
+
|
|
40
|
+
:param context: BridgeContext with prompt, model, history, mode, etc.
|
|
41
|
+
:param extra: Extra parameters (not used currently)
|
|
42
|
+
:return: Response object or generator (if streaming)
|
|
35
43
|
"""
|
|
36
44
|
prompt = context.prompt
|
|
37
45
|
stream = context.stream
|
|
@@ -110,9 +118,13 @@ class Chat:
|
|
|
110
118
|
# Tools -> merge app-defined tools with remote tools
|
|
111
119
|
base_tools = self.window.core.api.google.tools.prepare(model, functions)
|
|
112
120
|
remote_tools = self.window.core.api.google.build_remote_tools(model)
|
|
121
|
+
|
|
122
|
+
# Check tools compatibility
|
|
113
123
|
if base_tools:
|
|
114
|
-
remote_tools = [] #
|
|
124
|
+
remote_tools = [] # remote tools are not allowed if function calling is used
|
|
115
125
|
tools = (base_tools or []) + (remote_tools or [])
|
|
126
|
+
if "-image" in model.id:
|
|
127
|
+
tools = None # function calling is not supported for image models
|
|
116
128
|
|
|
117
129
|
# Sampling
|
|
118
130
|
temperature = self.window.core.config.get('temperature')
|
|
@@ -144,7 +156,7 @@ class Chat:
|
|
|
144
156
|
# Voice selection (case-sensitive name)
|
|
145
157
|
voice_name = "Kore"
|
|
146
158
|
try:
|
|
147
|
-
tmp = self.window.core.plugins.get_option("audio_output", "
|
|
159
|
+
tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
|
|
148
160
|
if tmp:
|
|
149
161
|
name = str(tmp).strip()
|
|
150
162
|
mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
|
|
@@ -169,9 +181,17 @@ class Chat:
|
|
|
169
181
|
else:
|
|
170
182
|
return client.models.generate_content(**params)
|
|
171
183
|
|
|
172
|
-
def unpack_response(
|
|
184
|
+
def unpack_response(
|
|
185
|
+
self,
|
|
186
|
+
mode: str,
|
|
187
|
+
response, ctx: CtxItem
|
|
188
|
+
):
|
|
173
189
|
"""
|
|
174
190
|
Unpack non-streaming response from Google GenAI and set context.
|
|
191
|
+
|
|
192
|
+
:param mode: MODE_CHAT or MODE_AUDIO
|
|
193
|
+
:param response: Response object
|
|
194
|
+
:param ctx: CtxItem to set output, audio_output, tokens, tool_calls
|
|
175
195
|
"""
|
|
176
196
|
if mode == MODE_AUDIO:
|
|
177
197
|
# Prefer audio if present
|
|
@@ -229,6 +249,11 @@ class Chat:
|
|
|
229
249
|
def extract_text(self, response) -> str:
|
|
230
250
|
"""
|
|
231
251
|
Extract output text.
|
|
252
|
+
|
|
253
|
+
Prefer response.text (Python SDK), then fallback to parts[].text.
|
|
254
|
+
|
|
255
|
+
:param response: Response object
|
|
256
|
+
:return: Extracted text
|
|
232
257
|
"""
|
|
233
258
|
txt = getattr(response, "text", None) or getattr(response, "output_text", None)
|
|
234
259
|
if txt:
|
|
@@ -332,11 +357,17 @@ class Chat:
|
|
|
332
357
|
|
|
333
358
|
return out
|
|
334
359
|
|
|
335
|
-
def _extract_inline_images_and_links(
|
|
360
|
+
def _extract_inline_images_and_links(
|
|
361
|
+
self,
|
|
362
|
+
response, ctx: CtxItem
|
|
363
|
+
) -> None:
|
|
336
364
|
"""
|
|
337
365
|
Extract inline image parts (Gemini image output) and file links.
|
|
338
366
|
- Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
|
|
339
367
|
- Appends HTTP(S) image URIs from file_data to ctx.urls.
|
|
368
|
+
|
|
369
|
+
:param response: Response object
|
|
370
|
+
:param ctx: CtxItem to set images and urls
|
|
340
371
|
"""
|
|
341
372
|
images: list[str] = []
|
|
342
373
|
urls: list[str] = []
|
|
@@ -386,7 +417,12 @@ class Chat:
|
|
|
386
417
|
|
|
387
418
|
@staticmethod
|
|
388
419
|
def _ensure_bytes(data) -> bytes | None:
|
|
389
|
-
"""
|
|
420
|
+
"""
|
|
421
|
+
Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
|
|
422
|
+
|
|
423
|
+
:param data: bytes or str
|
|
424
|
+
:return: bytes or None
|
|
425
|
+
"""
|
|
390
426
|
try:
|
|
391
427
|
if isinstance(data, (bytes, bytearray)):
|
|
392
428
|
return bytes(data)
|
|
@@ -545,6 +581,9 @@ class Chat:
|
|
|
545
581
|
Heuristic check if the model supports native TTS.
|
|
546
582
|
- Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
|
|
547
583
|
- Future/preview names may contain 'native-audio'.
|
|
584
|
+
|
|
585
|
+
:param model_id: Model ID
|
|
586
|
+
:return: True if supports TTS, False otherwise
|
|
548
587
|
"""
|
|
549
588
|
if not model_id:
|
|
550
589
|
return False
|