pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -25,12 +25,12 @@ from pygpt_net.core.types import (
25
25
  from pygpt_net.item.ctx import CtxItem
26
26
  from pygpt_net.item.model import ModelItem
27
27
 
28
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
28
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
29
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
29
30
  from pygpt_net.utils import trans
30
31
 
31
32
  from ..base import BaseAgent
32
33
  from .bots.research_bot.manager import ResearchManager
33
- from ...gpt.agents.experts import get_experts
34
34
 
35
35
 
36
36
  class Agent(BaseAgent):
@@ -15,7 +15,7 @@ from pydantic import BaseModel
15
15
  from agents import Agent
16
16
 
17
17
  from pygpt_net.item.preset import PresetItem
18
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
18
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
19
19
 
20
20
 
21
21
  class WebSearchItem(BaseModel):
@@ -14,7 +14,7 @@ from agents.model_settings import ModelSettings
14
14
 
15
15
  from pygpt_net.core.types import OPENAI_REMOTE_TOOL_DISABLE_WEB_SEARCH
16
16
  from pygpt_net.item.preset import PresetItem
17
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
17
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
18
18
 
19
19
 
20
20
  def get_search_agent(
@@ -15,7 +15,7 @@ from pydantic import BaseModel
15
15
  from agents import Agent
16
16
 
17
17
  from pygpt_net.item.preset import PresetItem
18
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
18
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
19
19
 
20
20
 
21
21
  class ReportData(BaseModel):
@@ -30,12 +30,12 @@ from pygpt_net.item.ctx import CtxItem
30
30
  from pygpt_net.item.model import ModelItem
31
31
  from pygpt_net.item.preset import PresetItem
32
32
 
33
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
34
- from pygpt_net.provider.gpt.agents.response import StreamHandler
33
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
34
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
35
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
35
36
  from pygpt_net.utils import trans
36
37
 
37
38
  from ..base import BaseAgent
38
- from ...gpt.agents.experts import get_experts
39
39
 
40
40
 
41
41
  @dataclass
@@ -340,7 +340,7 @@ class Agent(BaseAgent):
340
340
 
341
341
  print("Winner: agent ", choose)
342
342
 
343
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(results[choose], ctx)
343
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(results[choose], ctx)
344
344
  input_items = results[choose].to_input_list()
345
345
 
346
346
  if bridge.stopped():
@@ -437,7 +437,7 @@ class Agent(BaseAgent):
437
437
  bridge.on_stop(ctx)
438
438
  break
439
439
 
440
- window.core.gpt.responses.unpack_agent_response(results[choose], ctx)
440
+ window.core.api.openai.responses.unpack_agent_response(results[choose], ctx)
441
441
  input_items = results[choose].to_input_list()
442
442
 
443
443
  evaluator_result = await Runner.run(evaluator, input_items)
@@ -31,9 +31,9 @@ from pygpt_net.core.types import (
31
31
  from pygpt_net.item.ctx import CtxItem
32
32
  from pygpt_net.item.model import ModelItem
33
33
 
34
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
35
- from pygpt_net.provider.gpt.agents.response import StreamHandler
36
- from pygpt_net.provider.gpt.agents.experts import get_experts
34
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
35
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
36
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
37
37
  from pygpt_net.utils import trans
38
38
 
39
39
  from ..base import BaseAgent
@@ -218,7 +218,7 @@ class Agent(BaseAgent):
218
218
  agent,
219
219
  **kwargs
220
220
  )
221
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
221
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
222
222
  response_id = result.last_response_id
223
223
  if verbose:
224
224
  print("Final response:", result)
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
+ # ================================================== #
11
+
12
+ from .anthropic import ApiAnthropic
13
+ from .google import ApiGoogle
14
+ from .openai import ApiOpenAI
15
+
16
+ class Api:
17
+
18
+ def __init__(self, window=None):
19
+ """
20
+ API wrappers core
21
+
22
+ :param window: Window instance
23
+ """
24
+ self.window = window
25
+ self.anthropic = ApiAnthropic(window)
26
+ self.google = ApiGoogle(window)
27
+ self.openai = ApiOpenAI(window)
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
+ # ================================================== #
11
+
12
+ from anthropic import Anthropic
13
+
14
+ from pygpt_net.core.types import (
15
+ MODE_CHAT,
16
+ )
17
+ from pygpt_net.item.model import ModelItem
18
+
19
+ class ApiAnthropic:
20
+
21
+ def __init__(self, window=None):
22
+ """
23
+ Anthropic API wrapper core
24
+
25
+ :param window: Window instance
26
+ """
27
+ self.window = window
28
+ self.client = None
29
+ self.locked = False
30
+
31
+ def get_client(
32
+ self,
33
+ mode: str = MODE_CHAT,
34
+ model: ModelItem = None
35
+ ) -> Anthropic:
36
+ """
37
+ Return Anthropic client
38
+
39
+ :param mode: Mode
40
+ :param model: Model
41
+ :return: Anthropic client
42
+ """
43
+ if self.client is not None:
44
+ try:
45
+ self.client.close() # close previous client if exists
46
+ except Exception as e:
47
+ self.window.core.debug.log(e)
48
+ print("Error closing previous Anthropic client:", e)
49
+ self.client = Anthropic(
50
+ api_key=self.window.core.config.get('api_key_anthropic', "")
51
+ )
52
+ return self.client
53
+
54
+ def stop(self):
55
+ """On global event stop"""
56
+ pass
57
+
58
+ def close(self):
59
+ """Close Anthropic client"""
60
+ if self.locked:
61
+ return
62
+ if self.client is not None:
63
+ try:
64
+ pass
65
+ # self.client.close()
66
+ except Exception as e:
67
+ self.window.core.debug.log(e)
68
+ print("Error closing Anthropic client:", e)
@@ -0,0 +1,295 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional, Dict, Any
13
+
14
+ from google.genai import types as gtypes
15
+ from google import genai
16
+
17
+ from pygpt_net.core.types import (
18
+ MODE_ASSISTANT,
19
+ MODE_AUDIO,
20
+ MODE_CHAT,
21
+ MODE_COMPLETION,
22
+ MODE_IMAGE,
23
+ MODE_RESEARCH,
24
+ )
25
+ from pygpt_net.core.bridge.context import BridgeContext
26
+ from pygpt_net.item.model import ModelItem
27
+
28
+ from .chat import Chat
29
+ from .vision import Vision
30
+ from .tools import Tools
31
+ from .audio import Audio
32
+ from .image import Image
33
+ from .realtime import Realtime
34
+
35
+ class ApiGoogle:
36
+ def __init__(self, window=None):
37
+ """
38
+ Google GenAI API SDK wrapper
39
+
40
+ :param window: Window instance
41
+ """
42
+ self.window = window
43
+ self.chat = Chat(window)
44
+ self.vision = Vision(window)
45
+ self.tools = Tools(window)
46
+ self.audio = Audio(window)
47
+ self.image = Image(window)
48
+ self.realtime = Realtime(window)
49
+ self.client: Optional[genai.Client] = None
50
+ self.locked = False
51
+ self.last_client_args: Optional[Dict[str, Any]] = None
52
+
53
+ def get_client(
54
+ self,
55
+ mode: str = MODE_CHAT,
56
+ model: ModelItem = None
57
+ ) -> genai.Client:
58
+ """
59
+ Get or create Google GenAI client
60
+
61
+ :param mode: Mode (chat, completion, image, etc.)
62
+ :param model: ModelItem
63
+ :return: genai.Client instance
64
+ """
65
+ if not model:
66
+ model = ModelItem()
67
+ model.provider = "google"
68
+ args = self.window.core.models.prepare_client_args(mode, model)
69
+ filtered = {}
70
+ if args.get("api_key"):
71
+ filtered["api_key"] = args["api_key"]
72
+ if self.client is None or self.last_client_args != filtered:
73
+ self.client = genai.Client(**filtered)
74
+ self.last_client_args = filtered
75
+ return self.client
76
+
77
+ def call(
78
+ self,
79
+ context: BridgeContext,
80
+ extra: dict = None,
81
+ rt_signals = None
82
+ ) -> bool:
83
+ """
84
+ Make an API call to Google GenAI
85
+
86
+ :param context: BridgeContext
87
+ :param extra: Extra parameters
88
+ :param rt_signals: Realtime signals for audio streaming
89
+ :return: True if successful, False otherwise
90
+ """
91
+ mode = context.mode
92
+ model = context.model
93
+ stream = context.stream
94
+ ctx = context.ctx
95
+ ai_name = ctx.output_name if ctx else "assistant"
96
+
97
+ # No Responses API in google-genai
98
+ if ctx:
99
+ ctx.use_responses_api = False
100
+
101
+ used_tokens = 0
102
+ response = None
103
+
104
+ if mode in [MODE_COMPLETION, MODE_CHAT, MODE_AUDIO, MODE_RESEARCH]:
105
+
106
+ # Live API for audio streaming
107
+ if mode == MODE_AUDIO and stream:
108
+ is_realtime = self.realtime.begin(
109
+ context=context,
110
+ model=model,
111
+ extra=extra or {},
112
+ rt_signals=rt_signals
113
+ )
114
+ if is_realtime:
115
+ return True
116
+
117
+ response = self.chat.send(context=context, extra=extra)
118
+ used_tokens = self.chat.get_used_tokens()
119
+ if ctx:
120
+ self.vision.append_images(ctx)
121
+
122
+ elif mode == MODE_IMAGE:
123
+ return self.image.generate(context=context, extra=extra)
124
+
125
+ elif mode == MODE_ASSISTANT:
126
+ return False # not implemented for Google
127
+
128
+ if stream:
129
+ if ctx:
130
+ ctx.stream = response
131
+ ctx.set_output("", ai_name)
132
+ ctx.input_tokens = used_tokens
133
+ return True
134
+
135
+ if response is None:
136
+ return False
137
+
138
+ if isinstance(response, dict) and "error" in response:
139
+ return False
140
+
141
+ if ctx:
142
+ ctx.ai_name = ai_name
143
+ self.chat.unpack_response(mode, response, ctx)
144
+ try:
145
+ import json
146
+ for tc in getattr(ctx, "tool_calls", []) or []:
147
+ fn = tc.get("function") or {}
148
+ args = fn.get("arguments")
149
+ if isinstance(args, str):
150
+ try:
151
+ fn["arguments"] = json.loads(args)
152
+ except Exception:
153
+ fn["arguments"] = {}
154
+ except Exception:
155
+ pass
156
+ return True
157
+
158
+ def quick_call(
159
+ self,
160
+ context: BridgeContext,
161
+ extra: dict = None
162
+ ) -> str:
163
+ """
164
+ Make a quick API call to Google GenAI and return the output text
165
+
166
+ :param context: BridgeContext
167
+ :param extra: Extra parameters
168
+ :return: Output text
169
+ """
170
+ if context.request:
171
+ context.stream = False
172
+ context.mode = MODE_CHAT
173
+ self.locked = True
174
+ self.call(context, extra)
175
+ self.locked = False
176
+ return context.ctx.output
177
+
178
+ self.locked = True
179
+ try:
180
+ ctx = context.ctx
181
+ prompt = context.prompt
182
+ system_prompt = context.system_prompt
183
+ temperature = context.temperature
184
+ history = context.history
185
+ functions = context.external_functions
186
+ model = context.model or self.window.core.models.from_defaults()
187
+
188
+ client = self.get_client(MODE_CHAT, model)
189
+ tools = self.tools.prepare(model, functions)
190
+
191
+ """
192
+ # with remote tools
193
+ base_tools = self.tools.prepare(model, functions)
194
+ remote_tools = self.build_remote_tools(model)
195
+ tools = (base_tools or []) + (remote_tools or [])
196
+ """
197
+
198
+ inputs = self.chat.build_input(
199
+ prompt=prompt,
200
+ system_prompt=system_prompt,
201
+ model=model,
202
+ history=history,
203
+ attachments=context.attachments,
204
+ multimodal_ctx=context.multimodal_ctx,
205
+ )
206
+ cfg = genai.types.GenerateContentConfig(
207
+ temperature=temperature if temperature is not None else self.window.core.config.get('temperature'),
208
+ top_p=self.window.core.config.get('top_p'),
209
+ max_output_tokens=context.max_tokens if context.max_tokens else None,
210
+ system_instruction=system_prompt if system_prompt else None,
211
+ tools=tools if tools else None,
212
+ )
213
+ resp = client.models.generate_content(
214
+ model=model.id,
215
+ contents=inputs,
216
+ config=cfg,
217
+ )
218
+
219
+ if ctx:
220
+ calls = self.chat.extract_tool_calls(resp)
221
+ if calls:
222
+ ctx.tool_calls = calls
223
+ return self.chat.extract_text(resp)
224
+ except Exception as e:
225
+ self.window.core.debug.log(e)
226
+ return ""
227
+ finally:
228
+ self.locked = False
229
+
230
+ def build_remote_tools(self, model: ModelItem = None) -> list:
231
+ """
232
+ Build Google GenAI remote tools based on config flags.
233
+ - remote_tools.google.web_search: enables grounding via Google Search (Gemini 2.x)
234
+ or GoogleSearchRetrieval (Gemini 1.5 fallback).
235
+ - remote_tools.google.code_interpreter: enables code execution tool.
236
+
237
+ Returns a list of gtypes.Tool objects (can be empty).
238
+
239
+ :param model: ModelItem
240
+ :return: list of gtypes.Tool
241
+ """
242
+ tools: list = []
243
+ cfg = self.window.core.config
244
+ model_id = (model.id if model and getattr(model, "id", None) else "").lower()
245
+
246
+ # Google Search tool
247
+ if cfg.get("remote_tools.google.web_search") and "image" not in model.id:
248
+ try:
249
+ if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
250
+ # Gemini 2.x uses GoogleSearch
251
+ tools.append(gtypes.Tool(google_search=gtypes.GoogleSearch()))
252
+ else:
253
+ # Gemini 1.5 fallback uses GoogleSearchRetrieval
254
+ # Note: Supported only for 1.5 models.
255
+ tools.append(gtypes.Tool(
256
+ google_search_retrieval=gtypes.GoogleSearchRetrieval()
257
+ ))
258
+ except Exception as e:
259
+ # Do not break the request if tool construction fails
260
+ self.window.core.debug.log(e)
261
+
262
+ # Code Execution tool
263
+ if cfg.get("remote_tools.google.code_interpreter") and "image" not in model.id:
264
+ try:
265
+ tools.append(gtypes.Tool(code_execution=gtypes.ToolCodeExecution))
266
+ except Exception as e:
267
+ self.window.core.debug.log(e)
268
+
269
+ # URL Context tool
270
+ if cfg.get("remote_tools.google.url_ctx") and "image" not in model.id:
271
+ try:
272
+ # Supported on Gemini 2.x+ models (not on 1.5)
273
+ if not model_id.startswith("gemini-1.5") and not model_id.startswith("models/gemini-1.5"):
274
+ tools.append(gtypes.Tool(url_context=gtypes.UrlContext))
275
+ except Exception as e:
276
+ self.window.core.debug.log(e)
277
+
278
+ return tools
279
+
280
+
281
+ def stop(self):
282
+ """On global event stop"""
283
+ pass
284
+
285
+ def close(self):
286
+ """Close Google client"""
287
+ if self.locked:
288
+ return
289
+ if self.client is not None:
290
+ try:
291
+ pass
292
+ # self.client.close()
293
+ except Exception as e:
294
+ self.window.core.debug.log(e)
295
+ print("Error closing Google client:", e)
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
+ # ================================================== #
11
+
12
+ import base64
13
+ import io
14
+ import wave
15
+ from typing import Optional, Tuple
16
+
17
+ from google.genai.types import Part
18
+ from pygpt_net.core.bridge.context import MultimodalContext
19
+
20
+
21
+ class Audio:
22
+ def __init__(self, window=None):
23
+ """
24
+ Audio helpers for Google GenAI.
25
+ - Build audio input parts for requests
26
+ - Convert Google PCM output to WAV (base64) for UI compatibility
27
+
28
+ :param window: Window instance
29
+ """
30
+ self.window = window
31
+
32
+ # ---------- INPUT (user -> model) ----------
33
+
34
+ def build_part(
35
+ self,
36
+ multimodal_ctx: Optional[MultimodalContext]
37
+ ) -> Optional[Part]:
38
+ """
39
+ Build audio Part from multimodal context (inline bytes).
40
+
41
+ :param multimodal_ctx: MultimodalContext
42
+ :return: Part or None
43
+ """
44
+ if not multimodal_ctx or not multimodal_ctx.is_audio_input or not multimodal_ctx.audio_data:
45
+ return None
46
+ audio_format = (multimodal_ctx.audio_format or "wav").lower()
47
+ mime = f"audio/{audio_format}"
48
+ return Part.from_bytes(data=multimodal_ctx.audio_data, mime_type=mime)
49
+
50
+ # ---------- OUTPUT (model -> UI) ----------
51
+
52
+ def extract_first_audio_part(
53
+ self,
54
+ response
55
+ ) -> Tuple[Optional[bytes], Optional[str]]:
56
+ """
57
+ Extract first audio inline_data from a non-streaming response.
58
+
59
+ :param response: Google response object
60
+ :return: (audio_bytes, mime_type) or (None, None)
61
+ """
62
+ try:
63
+ candidates = getattr(response, "candidates", None) or []
64
+ for cand in candidates:
65
+ content = getattr(cand, "content", None)
66
+ parts = getattr(content, "parts", None) or []
67
+ for p in parts:
68
+ inline = getattr(p, "inline_data", None)
69
+ if not inline:
70
+ continue
71
+ mime = (getattr(inline, "mime_type", "") or "").lower()
72
+ if not mime.startswith("audio/"):
73
+ continue
74
+ data = getattr(inline, "data", None)
75
+ audio_bytes = self._ensure_bytes(data)
76
+ if audio_bytes:
77
+ return audio_bytes, mime
78
+ except Exception:
79
+ pass
80
+ return None, None
81
+
82
+ def pcm16_to_wav_base64(
83
+ self,
84
+ pcm_bytes: bytes,
85
+ rate: int = 24000,
86
+ channels: int = 1,
87
+ sample_width: int = 2
88
+ ) -> str:
89
+ """
90
+ Wrap raw PCM16 mono @ 24kHz into WAV and return base64-encoded payload.
91
+
92
+ :param pcm_bytes: Raw PCM16 bytes
93
+ :param rate: Sample rate (Hz), default 24000 for Google TTS
94
+ :param channels: Channels, default 1
95
+ :param sample_width: Bytes per sample, default 2 for PCM16
96
+ :return: Base64-encoded WAV
97
+ """
98
+ buf = io.BytesIO()
99
+ with wave.open(buf, "wb") as wf:
100
+ wf.setnchannels(channels)
101
+ wf.setsampwidth(sample_width)
102
+ wf.setframerate(rate)
103
+ wf.writeframes(pcm_bytes)
104
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
105
+
106
+ @staticmethod
107
+ def _ensure_bytes(data) -> Optional[bytes]:
108
+ """
109
+ Return raw bytes from inline_data.data (bytes or base64 string).
110
+
111
+ :param data: bytes or base64 string
112
+ :return: bytes or None
113
+ """
114
+ try:
115
+ if isinstance(data, (bytes, bytearray)):
116
+ return bytes(data)
117
+ if isinstance(data, str):
118
+ return base64.b64decode(data)
119
+ except Exception:
120
+ return None
121
+ return None