pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -6,13 +6,15 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.
|
|
9
|
+
# Updated Date: 2025.09.01 23:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
|
+
import os
|
|
12
13
|
from typing import Optional, Dict, Any
|
|
13
14
|
|
|
14
15
|
from google.genai import types as gtypes
|
|
15
16
|
from google import genai
|
|
17
|
+
|
|
16
18
|
from pygpt_net.core.types import (
|
|
17
19
|
MODE_ASSISTANT,
|
|
18
20
|
MODE_AUDIO,
|
|
@@ -29,7 +31,8 @@ from .vision import Vision
|
|
|
29
31
|
from .tools import Tools
|
|
30
32
|
from .audio import Audio
|
|
31
33
|
from .image import Image
|
|
32
|
-
|
|
34
|
+
from .realtime import Realtime
|
|
35
|
+
from .video import Video
|
|
33
36
|
|
|
34
37
|
class ApiGoogle:
|
|
35
38
|
def __init__(self, window=None):
|
|
@@ -44,6 +47,8 @@ class ApiGoogle:
|
|
|
44
47
|
self.tools = Tools(window)
|
|
45
48
|
self.audio = Audio(window)
|
|
46
49
|
self.image = Image(window)
|
|
50
|
+
self.realtime = Realtime(window)
|
|
51
|
+
self.video = Video(window)
|
|
47
52
|
self.client: Optional[genai.Client] = None
|
|
48
53
|
self.locked = False
|
|
49
54
|
self.last_client_args: Optional[Dict[str, Any]] = None
|
|
@@ -64,20 +69,56 @@ class ApiGoogle:
|
|
|
64
69
|
model = ModelItem()
|
|
65
70
|
model.provider = "google"
|
|
66
71
|
args = self.window.core.models.prepare_client_args(mode, model)
|
|
72
|
+
config = self.window.core.config
|
|
73
|
+
|
|
67
74
|
filtered = {}
|
|
68
75
|
if args.get("api_key"):
|
|
69
76
|
filtered["api_key"] = args["api_key"]
|
|
77
|
+
|
|
78
|
+
# setup VertexAI
|
|
79
|
+
use_vertex = False
|
|
80
|
+
if config.get("api_native_google.use_vertex", False):
|
|
81
|
+
use_vertex = True
|
|
82
|
+
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "1"
|
|
83
|
+
os.environ["GOOGLE_CLOUD_PROJECT"] = config.get("api_native_google.cloud_project", "")
|
|
84
|
+
os.environ["GOOGLE_CLOUD_LOCATION"] = config.get("api_native_google.cloud_location", "us-central1")
|
|
85
|
+
if config.get("api_native_google.app_credentials", ""):
|
|
86
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = config.get("api_native_google.app_credentials", "")
|
|
87
|
+
else:
|
|
88
|
+
if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI"):
|
|
89
|
+
del os.environ["GOOGLE_GENAI_USE_VERTEXAI"]
|
|
90
|
+
if os.environ.get("GOOGLE_CLOUD_PROJECT"):
|
|
91
|
+
del os.environ["GOOGLE_CLOUD_PROJECT"]
|
|
92
|
+
if os.environ.get("GOOGLE_CLOUD_LOCATION"):
|
|
93
|
+
del os.environ["GOOGLE_CLOUD_LOCATION"]
|
|
94
|
+
if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
|
|
95
|
+
del os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
|
|
96
|
+
|
|
97
|
+
# append VertexAI params to client args
|
|
98
|
+
if use_vertex:
|
|
99
|
+
filtered["vertexai"] = True
|
|
100
|
+
filtered["project"] = os.environ.get("GOOGLE_CLOUD_PROJECT")
|
|
101
|
+
filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
|
|
102
|
+
# filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
|
|
103
|
+
|
|
70
104
|
if self.client is None or self.last_client_args != filtered:
|
|
71
105
|
self.client = genai.Client(**filtered)
|
|
72
106
|
self.last_client_args = filtered
|
|
107
|
+
|
|
73
108
|
return self.client
|
|
74
109
|
|
|
75
|
-
def call(
|
|
110
|
+
def call(
|
|
111
|
+
self,
|
|
112
|
+
context: BridgeContext,
|
|
113
|
+
extra: dict = None,
|
|
114
|
+
rt_signals = None
|
|
115
|
+
) -> bool:
|
|
76
116
|
"""
|
|
77
117
|
Make an API call to Google GenAI
|
|
78
118
|
|
|
79
119
|
:param context: BridgeContext
|
|
80
120
|
:param extra: Extra parameters
|
|
121
|
+
:param rt_signals: Realtime signals for audio streaming
|
|
81
122
|
:return: True if successful, False otherwise
|
|
82
123
|
"""
|
|
83
124
|
mode = context.mode
|
|
@@ -94,13 +135,28 @@ class ApiGoogle:
|
|
|
94
135
|
response = None
|
|
95
136
|
|
|
96
137
|
if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
|
|
138
|
+
|
|
139
|
+
# Live API for audio streaming
|
|
140
|
+
if mode == MODE_AUDIO and stream:
|
|
141
|
+
is_realtime = self.realtime.begin(
|
|
142
|
+
context=context,
|
|
143
|
+
model=model,
|
|
144
|
+
extra=extra or {},
|
|
145
|
+
rt_signals=rt_signals
|
|
146
|
+
)
|
|
147
|
+
if is_realtime:
|
|
148
|
+
return True
|
|
149
|
+
|
|
97
150
|
response = self.chat.send(context=context, extra=extra)
|
|
98
151
|
used_tokens = self.chat.get_used_tokens()
|
|
99
152
|
if ctx:
|
|
100
153
|
self.vision.append_images(ctx)
|
|
101
154
|
|
|
102
155
|
elif mode == MODE_IMAGE:
|
|
103
|
-
|
|
156
|
+
if context.model.is_video_output():
|
|
157
|
+
return self.video.generate(context=context, extra=extra) # veo, etc.
|
|
158
|
+
else:
|
|
159
|
+
return self.image.generate(context=context, extra=extra) # imagen, etc.
|
|
104
160
|
|
|
105
161
|
elif mode == MODE_ASSISTANT:
|
|
106
162
|
return False # not implemented for Google
|
|
@@ -135,7 +191,11 @@ class ApiGoogle:
|
|
|
135
191
|
pass
|
|
136
192
|
return True
|
|
137
193
|
|
|
138
|
-
def quick_call(
|
|
194
|
+
def quick_call(
|
|
195
|
+
self,
|
|
196
|
+
context: BridgeContext,
|
|
197
|
+
extra: dict = None
|
|
198
|
+
) -> str:
|
|
139
199
|
"""
|
|
140
200
|
Make a quick API call to Google GenAI and return the output text
|
|
141
201
|
|
|
@@ -206,9 +266,9 @@ class ApiGoogle:
|
|
|
206
266
|
def build_remote_tools(self, model: ModelItem = None) -> list:
|
|
207
267
|
"""
|
|
208
268
|
Build Google GenAI remote tools based on config flags.
|
|
209
|
-
-
|
|
269
|
+
- remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
|
|
210
270
|
or GoogleSearchRetrieval (Gemini 1.5 fallback).
|
|
211
|
-
-
|
|
271
|
+
- remote_tools.google.code_interpreter: enables code execution tool.
|
|
212
272
|
|
|
213
273
|
Returns a list of gtypes.Tool objects (can be empty).
|
|
214
274
|
|
|
@@ -242,6 +302,15 @@ class ApiGoogle:
|
|
|
242
302
|
except Exception as e:
|
|
243
303
|
self.window.core.debug.log(e)
|
|
244
304
|
|
|
305
|
+
# URL Context tool
|
|
306
|
+
if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
|
|
307
|
+
try:
|
|
308
|
+
# Supported on Gemini 2.x+ models (not on 1.5)
|
|
309
|
+
if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
|
|
310
|
+
tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
|
|
311
|
+
except Exception as e:
|
|
312
|
+
self.window.core.debug.log(e)
|
|
313
|
+
|
|
245
314
|
return tools
|
|
246
315
|
|
|
247
316
|
|
|
@@ -24,6 +24,8 @@ class Audio:
|
|
|
24
24
|
Audio helpers for Google GenAI.
|
|
25
25
|
- Build audio input parts for requests
|
|
26
26
|
- Convert Google PCM output to WAV (base64) for UI compatibility
|
|
27
|
+
|
|
28
|
+
:param window: Window instance
|
|
27
29
|
"""
|
|
28
30
|
self.window = window
|
|
29
31
|
|
|
@@ -103,7 +105,12 @@ class Audio:
|
|
|
103
105
|
|
|
104
106
|
@staticmethod
|
|
105
107
|
def _ensure_bytes(data) -> Optional[bytes]:
|
|
106
|
-
"""
|
|
108
|
+
"""
|
|
109
|
+
Return raw bytes from inline_data.data (bytes or base64 string).
|
|
110
|
+
|
|
111
|
+
:param data: bytes or base64 string
|
|
112
|
+
:return: bytes or None
|
|
113
|
+
"""
|
|
107
114
|
try:
|
|
108
115
|
if isinstance(data, (bytes, bytearray)):
|
|
109
116
|
return bytes(data)
|
|
@@ -29,9 +29,17 @@ class Chat:
|
|
|
29
29
|
self.window = window
|
|
30
30
|
self.input_tokens = 0
|
|
31
31
|
|
|
32
|
-
def send(
|
|
32
|
+
def send(
|
|
33
|
+
self,
|
|
34
|
+
context: BridgeContext,
|
|
35
|
+
extra: Optional[Dict[str, Any]] = None
|
|
36
|
+
):
|
|
33
37
|
"""
|
|
34
38
|
Call Google GenAI for chat / multimodal / audio.
|
|
39
|
+
|
|
40
|
+
:param context: BridgeContext with prompt, model, history, mode, etc.
|
|
41
|
+
:param extra: Extra parameters (not used currently)
|
|
42
|
+
:return: Response object or generator (if streaming)
|
|
35
43
|
"""
|
|
36
44
|
prompt = context.prompt
|
|
37
45
|
stream = context.stream
|
|
@@ -110,9 +118,13 @@ class Chat:
|
|
|
110
118
|
# Tools -> merge app-defined tools with remote tools
|
|
111
119
|
base_tools = self.window.core.api.google.tools.prepare(model, functions)
|
|
112
120
|
remote_tools = self.window.core.api.google.build_remote_tools(model)
|
|
121
|
+
|
|
122
|
+
# Check tools compatibility
|
|
113
123
|
if base_tools:
|
|
114
|
-
remote_tools = [] #
|
|
124
|
+
remote_tools = [] # remote tools are not allowed if function calling is used
|
|
115
125
|
tools = (base_tools or []) + (remote_tools or [])
|
|
126
|
+
if "-image" in model.id:
|
|
127
|
+
tools = None # function calling is not supported for image models
|
|
116
128
|
|
|
117
129
|
# Sampling
|
|
118
130
|
temperature = self.window.core.config.get('temperature')
|
|
@@ -144,7 +156,7 @@ class Chat:
|
|
|
144
156
|
# Voice selection (case-sensitive name)
|
|
145
157
|
voice_name = "Kore"
|
|
146
158
|
try:
|
|
147
|
-
tmp = self.window.core.plugins.get_option("audio_output", "
|
|
159
|
+
tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
|
|
148
160
|
if tmp:
|
|
149
161
|
name = str(tmp).strip()
|
|
150
162
|
mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
|
|
@@ -169,9 +181,17 @@ class Chat:
|
|
|
169
181
|
else:
|
|
170
182
|
return client.models.generate_content(**params)
|
|
171
183
|
|
|
172
|
-
def unpack_response(
|
|
184
|
+
def unpack_response(
|
|
185
|
+
self,
|
|
186
|
+
mode: str,
|
|
187
|
+
response, ctx: CtxItem
|
|
188
|
+
):
|
|
173
189
|
"""
|
|
174
190
|
Unpack non-streaming response from Google GenAI and set context.
|
|
191
|
+
|
|
192
|
+
:param mode: MODE_CHAT or MODE_AUDIO
|
|
193
|
+
:param response: Response object
|
|
194
|
+
:param ctx: CtxItem to set output, audio_output, tokens, tool_calls
|
|
175
195
|
"""
|
|
176
196
|
if mode == MODE_AUDIO:
|
|
177
197
|
# Prefer audio if present
|
|
@@ -229,6 +249,11 @@ class Chat:
|
|
|
229
249
|
def extract_text(self, response) -> str:
|
|
230
250
|
"""
|
|
231
251
|
Extract output text.
|
|
252
|
+
|
|
253
|
+
Prefer response.text (Python SDK), then fallback to parts[].text.
|
|
254
|
+
|
|
255
|
+
:param response: Response object
|
|
256
|
+
:return: Extracted text
|
|
232
257
|
"""
|
|
233
258
|
txt = getattr(response, "text", None) or getattr(response, "output_text", None)
|
|
234
259
|
if txt:
|
|
@@ -332,11 +357,17 @@ class Chat:
|
|
|
332
357
|
|
|
333
358
|
return out
|
|
334
359
|
|
|
335
|
-
def _extract_inline_images_and_links(
|
|
360
|
+
def _extract_inline_images_and_links(
|
|
361
|
+
self,
|
|
362
|
+
response, ctx: CtxItem
|
|
363
|
+
) -> None:
|
|
336
364
|
"""
|
|
337
365
|
Extract inline image parts (Gemini image output) and file links.
|
|
338
366
|
- Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
|
|
339
367
|
- Appends HTTP(S) image URIs from file_data to ctx.urls.
|
|
368
|
+
|
|
369
|
+
:param response: Response object
|
|
370
|
+
:param ctx: CtxItem to set images and urls
|
|
340
371
|
"""
|
|
341
372
|
images: list[str] = []
|
|
342
373
|
urls: list[str] = []
|
|
@@ -386,7 +417,12 @@ class Chat:
|
|
|
386
417
|
|
|
387
418
|
@staticmethod
|
|
388
419
|
def _ensure_bytes(data) -> bytes | None:
|
|
389
|
-
"""
|
|
420
|
+
"""
|
|
421
|
+
Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
|
|
422
|
+
|
|
423
|
+
:param data: bytes or str
|
|
424
|
+
:return: bytes or None
|
|
425
|
+
"""
|
|
390
426
|
try:
|
|
391
427
|
if isinstance(data, (bytes, bytearray)):
|
|
392
428
|
return bytes(data)
|
|
@@ -545,6 +581,9 @@ class Chat:
|
|
|
545
581
|
Heuristic check if the model supports native TTS.
|
|
546
582
|
- Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
|
|
547
583
|
- Future/preview names may contain 'native-audio'.
|
|
584
|
+
|
|
585
|
+
:param model_id: Model ID
|
|
586
|
+
:return: True if supports TTS, False otherwise
|
|
548
587
|
"""
|
|
549
588
|
if not model_id:
|
|
550
589
|
return False
|