pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +7 -1
  4. pygpt_net/app_core.py +3 -1
  5. pygpt_net/config.py +3 -1
  6. pygpt_net/controller/__init__.py +9 -2
  7. pygpt_net/controller/audio/audio.py +38 -1
  8. pygpt_net/controller/audio/ui.py +2 -2
  9. pygpt_net/controller/chat/audio.py +1 -8
  10. pygpt_net/controller/chat/common.py +23 -62
  11. pygpt_net/controller/chat/handler/__init__.py +0 -0
  12. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  13. pygpt_net/controller/chat/output.py +8 -3
  14. pygpt_net/controller/chat/stream.py +3 -1071
  15. pygpt_net/controller/chat/text.py +3 -2
  16. pygpt_net/controller/kernel/kernel.py +11 -3
  17. pygpt_net/controller/kernel/reply.py +5 -1
  18. pygpt_net/controller/lang/custom.py +2 -2
  19. pygpt_net/controller/media/__init__.py +12 -0
  20. pygpt_net/controller/media/media.py +115 -0
  21. pygpt_net/controller/realtime/__init__.py +12 -0
  22. pygpt_net/controller/realtime/manager.py +53 -0
  23. pygpt_net/controller/realtime/realtime.py +293 -0
  24. pygpt_net/controller/ui/mode.py +23 -2
  25. pygpt_net/controller/ui/ui.py +19 -1
  26. pygpt_net/core/audio/audio.py +6 -1
  27. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  28. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  29. pygpt_net/core/audio/backend/native/player.py +139 -0
  30. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  31. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  32. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  33. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  34. pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
  35. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  36. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  37. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  38. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  39. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  40. pygpt_net/core/audio/backend/shared/player.py +137 -0
  41. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  42. pygpt_net/core/audio/capture.py +5 -0
  43. pygpt_net/core/audio/output.py +14 -2
  44. pygpt_net/core/audio/whisper.py +6 -2
  45. pygpt_net/core/bridge/bridge.py +2 -1
  46. pygpt_net/core/bridge/worker.py +4 -1
  47. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  48. pygpt_net/core/events/__init__.py +2 -1
  49. pygpt_net/core/events/realtime.py +55 -0
  50. pygpt_net/core/image/image.py +56 -5
  51. pygpt_net/core/realtime/__init__.py +0 -0
  52. pygpt_net/core/realtime/options.py +87 -0
  53. pygpt_net/core/realtime/shared/__init__.py +0 -0
  54. pygpt_net/core/realtime/shared/audio.py +213 -0
  55. pygpt_net/core/realtime/shared/loop.py +64 -0
  56. pygpt_net/core/realtime/shared/session.py +59 -0
  57. pygpt_net/core/realtime/shared/text.py +37 -0
  58. pygpt_net/core/realtime/shared/tools.py +276 -0
  59. pygpt_net/core/realtime/shared/turn.py +38 -0
  60. pygpt_net/core/realtime/shared/types.py +16 -0
  61. pygpt_net/core/realtime/worker.py +160 -0
  62. pygpt_net/core/render/web/body.py +24 -3
  63. pygpt_net/core/text/utils.py +54 -2
  64. pygpt_net/core/types/__init__.py +1 -0
  65. pygpt_net/core/types/image.py +54 -0
  66. pygpt_net/core/video/__init__.py +12 -0
  67. pygpt_net/core/video/video.py +290 -0
  68. pygpt_net/data/config/config.json +26 -5
  69. pygpt_net/data/config/models.json +221 -103
  70. pygpt_net/data/config/settings.json +244 -6
  71. pygpt_net/data/css/web-blocks.css +6 -0
  72. pygpt_net/data/css/web-chatgpt.css +6 -0
  73. pygpt_net/data/css/web-chatgpt_wide.css +6 -0
  74. pygpt_net/data/locale/locale.de.ini +35 -7
  75. pygpt_net/data/locale/locale.en.ini +56 -17
  76. pygpt_net/data/locale/locale.es.ini +35 -7
  77. pygpt_net/data/locale/locale.fr.ini +35 -7
  78. pygpt_net/data/locale/locale.it.ini +35 -7
  79. pygpt_net/data/locale/locale.pl.ini +38 -7
  80. pygpt_net/data/locale/locale.uk.ini +35 -7
  81. pygpt_net/data/locale/locale.zh.ini +31 -3
  82. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  83. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  84. pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
  85. pygpt_net/item/model.py +22 -1
  86. pygpt_net/plugin/audio_input/plugin.py +37 -4
  87. pygpt_net/plugin/audio_input/simple.py +57 -8
  88. pygpt_net/plugin/cmd_files/worker.py +3 -0
  89. pygpt_net/provider/api/google/__init__.py +76 -7
  90. pygpt_net/provider/api/google/audio.py +8 -1
  91. pygpt_net/provider/api/google/chat.py +45 -6
  92. pygpt_net/provider/api/google/image.py +226 -86
  93. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  94. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  95. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  96. pygpt_net/provider/api/google/video.py +364 -0
  97. pygpt_net/provider/api/openai/__init__.py +22 -2
  98. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  99. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  100. pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
  101. pygpt_net/provider/audio_input/google_genai.py +103 -0
  102. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  103. pygpt_net/provider/audio_output/google_tts.py +0 -12
  104. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  105. pygpt_net/provider/core/config/patch.py +241 -178
  106. pygpt_net/provider/core/model/patch.py +28 -2
  107. pygpt_net/provider/llms/google.py +8 -9
  108. pygpt_net/provider/web/duckduck_search.py +212 -0
  109. pygpt_net/ui/layout/toolbox/audio.py +55 -0
  110. pygpt_net/ui/layout/toolbox/footer.py +14 -42
  111. pygpt_net/ui/layout/toolbox/image.py +7 -13
  112. pygpt_net/ui/layout/toolbox/raw.py +52 -0
  113. pygpt_net/ui/layout/toolbox/split.py +48 -0
  114. pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
  115. pygpt_net/ui/layout/toolbox/video.py +49 -0
  116. pygpt_net/ui/widget/option/combo.py +15 -1
  117. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
  118. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
  119. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  120. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
  121. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
  122. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,15 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.28 20:00:00 #
9
+ # Updated Date: 2025.09.01 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
+ import os
12
13
  from typing import Optional, Dict, Any
13
14
 
14
15
  from google.genai import types as gtypes
15
16
  from google import genai
17
+
16
18
  from pygpt_net.core.types import (
17
19
  MODE_ASSISTANT,
18
20
  MODE_AUDIO,
@@ -29,7 +31,8 @@ from .vision import Vision
29
31
  from .tools import Tools
30
32
  from .audio import Audio
31
33
  from .image import Image
32
-
34
+ from .realtime import Realtime
35
+ from .video import Video
33
36
 
34
37
  class ApiGoogle:
35
38
  def __init__(self, window=None):
@@ -44,6 +47,8 @@ class ApiGoogle:
44
47
  self.tools = Tools(window)
45
48
  self.audio = Audio(window)
46
49
  self.image = Image(window)
50
+ self.realtime = Realtime(window)
51
+ self.video = Video(window)
47
52
  self.client: Optional[genai.Client] = None
48
53
  self.locked = False
49
54
  self.last_client_args: Optional[Dict[str, Any]] = None
@@ -64,20 +69,56 @@ class ApiGoogle:
64
69
  model = ModelItem()
65
70
  model.provider = "google"
66
71
  args = self.window.core.models.prepare_client_args(mode, model)
72
+ config = self.window.core.config
73
+
67
74
  filtered = {}
68
75
  if args.get("api_key"):
69
76
  filtered["api_key"] = args["api_key"]
77
+
78
+ # setup VertexAI
79
+ use_vertex = False
80
+ if config.get("api_native_google.use_vertex", False):
81
+ use_vertex = True
82
+ os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "1"
83
+ os.environ["GOOGLE_CLOUD_PROJECT"] = config.get("api_native_google.cloud_project", "")
84
+ os.environ["GOOGLE_CLOUD_LOCATION"] = config.get("api_native_google.cloud_location", "us-central1")
85
+ if config.get("api_native_google.app_credentials", ""):
86
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = config.get("api_native_google.app_credentials", "")
87
+ else:
88
+ if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI"):
89
+ del os.environ["GOOGLE_GENAI_USE_VERTEXAI"]
90
+ if os.environ.get("GOOGLE_CLOUD_PROJECT"):
91
+ del os.environ["GOOGLE_CLOUD_PROJECT"]
92
+ if os.environ.get("GOOGLE_CLOUD_LOCATION"):
93
+ del os.environ["GOOGLE_CLOUD_LOCATION"]
94
+ if os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
95
+ del os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
96
+
97
+ # append VertexAI params to client args
98
+ if use_vertex:
99
+ filtered["vertexai"] = True
100
+ filtered["project"] = os.environ.get("GOOGLE_CLOUD_PROJECT")
101
+ filtered["location"] = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1")
102
+ # filtered["http_options"] = gtypes.HttpOptions(api_version="v1")
103
+
70
104
  if self.client is None or self.last_client_args != filtered:
71
105
  self.client = genai.Client(**filtered)
72
106
  self.last_client_args = filtered
107
+
73
108
  return self.client
74
109
 
75
- def call(self, context: BridgeContext, extra: dict = None) -> bool:
110
+ def call(
111
+ self,
112
+ context: BridgeContext,
113
+ extra: dict = None,
114
+ rt_signals = None
115
+ ) -> bool:
76
116
  """
77
117
  Make an API call to Google GenAI
78
118
 
79
119
  :param context: BridgeContext
80
120
  :param extra: Extra parameters
121
+ :param rt_signals: Realtime signals for audio streaming
81
122
  :return: True if successful, False otherwise
82
123
  """
83
124
  mode = context.mode
@@ -94,13 +135,28 @@ class ApiGoogle:
94
135
  response = None
95
136
 
96
137
  if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
138
+
139
+ # Live API for audio streaming
140
+ if mode == MODE_AUDIO and stream:
141
+ is_realtime = self.realtime.begin(
142
+ context=context,
143
+ model=model,
144
+ extra=extra or {},
145
+ rt_signals=rt_signals
146
+ )
147
+ if is_realtime:
148
+ return True
149
+
97
150
  response = self.chat.send(context=context, extra=extra)
98
151
  used_tokens = self.chat.get_used_tokens()
99
152
  if ctx:
100
153
  self.vision.append_images(ctx)
101
154
 
102
155
  elif mode == MODE_IMAGE:
103
- return self.image.generate(context=context, extra=extra)
156
+ if context.model.is_video_output():
157
+ return self.video.generate(context=context, extra=extra) # veo, etc.
158
+ else:
159
+ return self.image.generate(context=context, extra=extra) # imagen, etc.
104
160
 
105
161
  elif mode == MODE_ASSISTANT:
106
162
  return False # not implemented for Google
@@ -135,7 +191,11 @@ class ApiGoogle:
135
191
  pass
136
192
  return True
137
193
 
138
- def quick_call(self, context: BridgeContext, extra: dict = None) -> str:
194
+ def quick_call(
195
+ self,
196
+ context: BridgeContext,
197
+ extra: dict = None
198
+ ) -> str:
139
199
  """
140
200
  Make a quick API call to Google GenAI and return the output text
141
201
 
@@ -206,9 +266,9 @@ class ApiGoogle:
206
266
  def build_remote_tools(self, model: ModelItem = None) -> list:
207
267
  """
208
268
  Build Google GenAI remote tools based on config flags.
209
- - google_tool_search: enables grounding via Google Search (Gemini 2.x)
269
+ - remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
210
270
  or GoogleSearchRetrieval (Gemini 1.5 fallback).
211
- - google_tool_code_execution: enables code execution tool.
271
+ - remote_tools.google.code_interpreter: enables code execution tool.
212
272
 
213
273
  Returns a list of gtypes.Tool objects (can be empty).
214
274
 
@@ -242,6 +302,15 @@ class ApiGoogle:
242
302
  except Exception as e:
243
303
  self.window.core.debug.log(e)
244
304
 
305
+ # URL Context tool
306
+ if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
307
+ try:
308
+ # Supported on Gemini 2.x+ models (not on 1.5)
309
+ if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
310
+ tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
311
+ except Exception as e:
312
+ self.window.core.debug.log(e)
313
+
245
314
  return tools
246
315
 
247
316
 
@@ -24,6 +24,8 @@ class Audio:
24
24
  Audio helpers for Google GenAI.
25
25
  - Build audio input parts for requests
26
26
  - Convert Google PCM output to WAV (base64) for UI compatibility
27
+
28
+ :param window: Window instance
27
29
  """
28
30
  self.window = window
29
31
 
@@ -103,7 +105,12 @@ class Audio:
103
105
 
104
106
  @staticmethod
105
107
  def _ensure_bytes(data) -> Optional[bytes]:
106
- """Return raw bytes from inline_data.data (bytes or base64 string)."""
108
+ """
109
+ Return raw bytes from inline_data.data (bytes or base64 string).
110
+
111
+ :param data: bytes or base64 string
112
+ :return: bytes or None
113
+ """
107
114
  try:
108
115
  if isinstance(data, (bytes, bytearray)):
109
116
  return bytes(data)
@@ -29,9 +29,17 @@ class Chat:
29
29
  self.window = window
30
30
  self.input_tokens = 0
31
31
 
32
- def send(self, context: BridgeContext, extra: Optional[Dict[str, Any]] = None):
32
+ def send(
33
+ self,
34
+ context: BridgeContext,
35
+ extra: Optional[Dict[str, Any]] = None
36
+ ):
33
37
  """
34
38
  Call Google GenAI for chat / multimodal / audio.
39
+
40
+ :param context: BridgeContext with prompt, model, history, mode, etc.
41
+ :param extra: Extra parameters (not used currently)
42
+ :return: Response object or generator (if streaming)
35
43
  """
36
44
  prompt = context.prompt
37
45
  stream = context.stream
@@ -110,9 +118,13 @@ class Chat:
110
118
  # Tools -> merge app-defined tools with remote tools
111
119
  base_tools = self.window.core.api.google.tools.prepare(model, functions)
112
120
  remote_tools = self.window.core.api.google.build_remote_tools(model)
121
+
122
+ # Check tools compatibility
113
123
  if base_tools:
114
- remote_tools = [] # do not mix local and remote tools
124
+ remote_tools = [] # remote tools are not allowed if function calling is used
115
125
  tools = (base_tools or []) + (remote_tools or [])
126
+ if "-image" in model.id:
127
+ tools = None # function calling is not supported for image models
116
128
 
117
129
  # Sampling
118
130
  temperature = self.window.core.config.get('temperature')
@@ -144,7 +156,7 @@ class Chat:
144
156
  # Voice selection (case-sensitive name)
145
157
  voice_name = "Kore"
146
158
  try:
147
- tmp = self.window.core.plugins.get_option("audio_output", "google_voice_native")
159
+ tmp = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
148
160
  if tmp:
149
161
  name = str(tmp).strip()
150
162
  mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse", "legend": "Legend"}
@@ -169,9 +181,17 @@ class Chat:
169
181
  else:
170
182
  return client.models.generate_content(**params)
171
183
 
172
- def unpack_response(self, mode: str, response, ctx: CtxItem):
184
+ def unpack_response(
185
+ self,
186
+ mode: str,
187
+ response, ctx: CtxItem
188
+ ):
173
189
  """
174
190
  Unpack non-streaming response from Google GenAI and set context.
191
+
192
+ :param mode: MODE_CHAT or MODE_AUDIO
193
+ :param response: Response object
194
+ :param ctx: CtxItem to set output, audio_output, tokens, tool_calls
175
195
  """
176
196
  if mode == MODE_AUDIO:
177
197
  # Prefer audio if present
@@ -229,6 +249,11 @@ class Chat:
229
249
  def extract_text(self, response) -> str:
230
250
  """
231
251
  Extract output text.
252
+
253
+ Prefer response.text (Python SDK), then fallback to parts[].text.
254
+
255
+ :param response: Response object
256
+ :return: Extracted text
232
257
  """
233
258
  txt = getattr(response, "text", None) or getattr(response, "output_text", None)
234
259
  if txt:
@@ -332,11 +357,17 @@ class Chat:
332
357
 
333
358
  return out
334
359
 
335
- def _extract_inline_images_and_links(self, response, ctx: CtxItem) -> None:
360
+ def _extract_inline_images_and_links(
361
+ self,
362
+ response, ctx: CtxItem
363
+ ) -> None:
336
364
  """
337
365
  Extract inline image parts (Gemini image output) and file links.
338
366
  - Saves inline_data (image/*) bytes to files and appends paths to ctx.images.
339
367
  - Appends HTTP(S) image URIs from file_data to ctx.urls.
368
+
369
+ :param response: Response object
370
+ :param ctx: CtxItem to set images and urls
340
371
  """
341
372
  images: list[str] = []
342
373
  urls: list[str] = []
@@ -386,7 +417,12 @@ class Chat:
386
417
 
387
418
  @staticmethod
388
419
  def _ensure_bytes(data) -> bytes | None:
389
- """Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string."""
420
+ """
421
+ Return raw bytes from SDK part.inline_data.data which can be bytes or base64 string.
422
+
423
+ :param data: bytes or str
424
+ :return: bytes or None
425
+ """
390
426
  try:
391
427
  if isinstance(data, (bytes, bytearray)):
392
428
  return bytes(data)
@@ -545,6 +581,9 @@ class Chat:
545
581
  Heuristic check if the model supports native TTS.
546
582
  - Official TTS models contain '-tts' in id (e.g. 'gemini-2.5-flash-preview-tts').
547
583
  - Future/preview names may contain 'native-audio'.
584
+
585
+ :param model_id: Model ID
586
+ :return: True if supports TTS, False otherwise
548
587
  """
549
588
  if not model_id:
550
589
  return False