pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,194 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import json
13
+ from typing import Optional, Dict, Any
14
+
15
+ from pygpt_net.core.bridge import BridgeContext
16
+ from pygpt_net.core.events import RealtimeEvent
17
+ from pygpt_net.core.realtime.options import RealtimeOptions
18
+ from pygpt_net.core.realtime.shared.session import extract_last_session_id
19
+ from pygpt_net.item.model import ModelItem
20
+ from pygpt_net.utils import trans
21
+
22
+ from .client import OpenAIRealtimeClient
23
+
24
+ class Realtime:
25
+
26
+ PROVIDER = "openai"
27
+
28
+ def __init__(self, window=None):
29
+ """
30
+ OpenAI API realtime controller
31
+
32
+ :param window: Window instance
33
+ """
34
+ self.window = window
35
+ self.handler = OpenAIRealtimeClient(window)
36
+ self.prev_auto_turn = False
37
+ self.prev_vad_silence = 2000
38
+ self.prev_vad_prefix = 300
39
+
40
+ def begin(
41
+ self,
42
+ context: BridgeContext,
43
+ model: Optional[ModelItem] = None,
44
+ extra: Optional[Dict[str, Any]] = None,
45
+ rt_signals=None
46
+ ) -> bool:
47
+ """
48
+ Begin realtime session if applicable
49
+
50
+ :param context: BridgeContext
51
+ :param model: Optional[ModelItem]
52
+ :param extra: Optional dict with extra parameters
53
+ :param rt_signals: RealtimeSignals
54
+ :return: True if realtime session started, False otherwise
55
+ """
56
+ mm = context.multimodal_ctx
57
+ audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
58
+ audio_format = getattr(mm, "audio_format", None) if mm else None
59
+ audio_rate = getattr(mm, "audio_rate", None) if mm else None
60
+ is_debug = self.window.core.config.get("log.realtime", False)
61
+ auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
62
+ opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
63
+ opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
64
+
65
+ # setup manager
66
+ self.window.controller.realtime.set_current_active(self.PROVIDER)
67
+ self.window.controller.realtime.set_busy()
68
+ self.handler.set_debug(is_debug)
69
+
70
+ # tools
71
+ tools = self.window.core.api.openai.tools.prepare(model, context.external_functions)
72
+
73
+ # remote tools
74
+ remote_tools = []
75
+ remote_tools = self.window.core.api.openai.remote_tools.append_to_tools(
76
+ mode=context.mode,
77
+ model=model,
78
+ stream=context.stream,
79
+ is_expert_call=context.is_expert_call,
80
+ tools=remote_tools,
81
+ preset=context.preset,
82
+ )
83
+
84
+ # handle sub-reply (tool results from tool calls)
85
+ if context.ctx.internal:
86
+ if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
87
+ tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
88
+ tool_call_id = None
89
+ if isinstance(tool_calls, list) and len(tool_calls) > 0:
90
+ tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
91
+ if not tool_call_id:
92
+ tool_call_id = tool_calls[0].get("id", "") # fallback to id
93
+ if tool_call_id:
94
+ tool_results = context.ctx.input
95
+ try:
96
+ tool_results = json.loads(tool_results)
97
+ except Exception:
98
+ pass
99
+ self.handler.send_tool_results_sync({
100
+ tool_call_id: tool_results
101
+ })
102
+ self.handler.update_ctx(context.ctx)
103
+ return True # do not start new session, just send tool results
104
+
105
+ # update auto-turn in active session
106
+ if (self.handler.is_session_active()
107
+ and (auto_turn != self.prev_auto_turn
108
+ or opt_vad_silence != self.prev_vad_silence
109
+ or opt_vad_prefix != self.prev_vad_prefix)):
110
+ print("updating")
111
+ self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
112
+
113
+ # if auto-turn is enabled and prompt is empty, update session and context only
114
+ if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
115
+ self.handler.update_session_tools_sync(tools, remote_tools)
116
+ self.handler.update_ctx(context.ctx)
117
+ self.window.update_status(trans("speech.listening"))
118
+ return True # do not send new request if session is active
119
+
120
+ # Last session ID
121
+ last_session_id = extract_last_session_id(context.history)
122
+ if is_debug:
123
+ print("[realtime session] Last ID", last_session_id)
124
+
125
+ # Voice
126
+ voice = "alloy"
127
+ try:
128
+ v = self.window.core.plugins.get_option("audio_output", "openai_voice")
129
+ if v:
130
+ voice = str(v)
131
+ except Exception:
132
+ pass
133
+
134
+ # Options
135
+ opts = RealtimeOptions(
136
+ provider=self.PROVIDER,
137
+ model=context.model.id,
138
+ system_prompt=context.system_prompt,
139
+ prompt=context.prompt,
140
+ voice=voice,
141
+ audio_data=audio_bytes,
142
+ audio_format=audio_format,
143
+ audio_rate=audio_rate,
144
+ vad="server_vad",
145
+ extra=extra or {},
146
+ tools=tools,
147
+ remote_tools=remote_tools,
148
+ rt_signals=rt_signals,
149
+ rt_session_id=last_session_id,
150
+ auto_turn=auto_turn,
151
+ vad_end_silence_ms=opt_vad_silence,
152
+ vad_prefix_padding_ms=opt_vad_prefix,
153
+ )
154
+
155
+ # Start or append to realtime session via manager
156
+ try:
157
+ if is_debug:
158
+ print("[realtime] Starting session with options:", opts.to_dict())
159
+ rt = self.window.controller.realtime.manager
160
+ rt.start(context.ctx, opts)
161
+
162
+ self.prev_auto_turn = auto_turn
163
+ self.prev_vad_silence = opt_vad_silence
164
+ self.prev_vad_prefix = opt_vad_prefix
165
+ return True
166
+ except Exception as e:
167
+ self.window.core.debug.log(e)
168
+ return False # fallback to non-live path
169
+
170
+ def handle_audio_input(self, event: RealtimeEvent):
171
+ """
172
+ Handle Realtime audio input event
173
+
174
+ :param event: RealtimeEvent
175
+ """
176
+ self.handler.rt_handle_audio_input_sync(event)
177
+
178
+ def manual_commit(self):
179
+ """Manually commit audio input to realtime session"""
180
+ self.handler.force_response_now_sync()
181
+
182
+ def shutdown(self):
183
+ """Shutdown realtime loops"""
184
+ if self.handler.is_session_active():
185
+ self.handler.close_session_sync()
186
+ try:
187
+ self.handler.stop_loop_sync()
188
+ except Exception:
189
+ pass
190
+
191
+ def reset(self):
192
+ """Close realtime session"""
193
+ if self.handler.is_session_active():
194
+ self.handler.close_session_sync()
@@ -113,7 +113,7 @@ class RemoteTools:
113
113
  # extend local tools with remote tools
114
114
  if enabled["computer_use"]:
115
115
  if not model.id in OPENAI_REMOTE_TOOL_DISABLE_COMPUTER_USE:
116
- tools.append(self.window.core.gpt.computer.get_tool())
116
+ tools.append(self.window.core.api.openai.computer.get_tool())
117
117
  else:
118
118
  if not model.id in OPENAI_REMOTE_TOOL_DISABLE_WEB_SEARCH:
119
119
  if enabled["web_search"]:
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.05 00:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import base64
@@ -92,7 +92,7 @@ class Responses:
92
92
  user_name = ctx.input_name # from ctx
93
93
  ai_name = ctx.output_name # from ctx
94
94
 
95
- client = self.window.core.gpt.get_client(mode, model)
95
+ client = self.window.core.api.openai.get_client(mode, model)
96
96
 
97
97
  # build chat messages
98
98
  messages = self.build(
@@ -122,7 +122,7 @@ class Responses:
122
122
  response_kwargs = {}
123
123
 
124
124
  # tools / functions
125
- tools = self.window.core.gpt.tools.prepare_responses_api(model, functions)
125
+ tools = self.window.core.api.openai.tools.prepare_responses_api(model, functions)
126
126
 
127
127
  # extra arguments, o3 only
128
128
  if model.extra and "reasoning_effort" in model.extra:
@@ -130,7 +130,7 @@ class Responses:
130
130
  response_kwargs['reasoning']['effort'] = model.extra["reasoning_effort"]
131
131
 
132
132
  # append remote tools
133
- tools = self.window.core.gpt.remote_tools.append_to_tools(
133
+ tools = self.window.core.api.openai.remote_tools.append_to_tools(
134
134
  mode=mode,
135
135
  model=model,
136
136
  stream=stream,
@@ -250,20 +250,31 @@ class Responses:
250
250
  used_tokens,
251
251
  max_ctx_tokens,
252
252
  )
253
+
254
+ has_response_id_in_last_item = False
255
+ if items and len(items) > 0:
256
+ last_item = items[-1]
257
+ if last_item and last_item.msg_id:
258
+ has_response_id_in_last_item = True
259
+
253
260
  for item in items:
254
261
  # input
255
262
  if item.final_input is not None and item.final_input != "":
256
- messages.append({
257
- "role": "user",
258
- "content": item.final_input,
259
- })
263
+ if not has_response_id_in_last_item:
264
+ messages.append({
265
+ "role": "user",
266
+ "content": item.final_input,
267
+ })
260
268
 
261
269
  # output
262
270
  if item.final_output is not None and item.final_output != "":
263
- msg = {
264
- "role": "assistant",
265
- "content": item.final_output,
266
- }
271
+ if not has_response_id_in_last_item:
272
+ msg = {
273
+ "role": "assistant",
274
+ "content": item.final_output,
275
+ }
276
+ else:
277
+ msg = {}
267
278
  # append previous audio ID
268
279
  if MODE_AUDIO in model.mode:
269
280
  if item.audio_id:
@@ -281,7 +292,9 @@ class Responses:
281
292
  msg["audio"] = {
282
293
  "id": self.audio_prev_id
283
294
  }
284
- messages.append(msg)
295
+
296
+ if msg:
297
+ messages.append(msg)
285
298
 
286
299
  # ---- tool output ----
287
300
  is_tool_output = False # reset tool output flag
@@ -340,7 +353,7 @@ class Responses:
340
353
 
341
354
  # computer call output
342
355
  elif output_type == "computer_call":
343
- base64img = self.window.core.gpt.vision.get_attachment(attachments)
356
+ base64img = self.window.core.api.openai.vision.get_attachment(attachments)
344
357
  if base64img and "call_id" in tool_call:
345
358
  if tool_call["call_id"]:
346
359
  # tool output
@@ -382,13 +395,13 @@ class Responses:
382
395
  if (model.is_image_input()
383
396
  and mode != MODE_COMPUTER
384
397
  and not model.id.startswith("computer-use")):
385
- content = self.window.core.gpt.vision.build_content(
398
+ content = self.window.core.api.openai.vision.build_content(
386
399
  content=content,
387
400
  attachments=attachments,
388
401
  responses_api=True,
389
402
  )
390
403
  if model.is_audio_input():
391
- content = self.window.core.gpt.audio.build_content(
404
+ content = self.window.core.api.openai.audio.build_content(
392
405
  content=content,
393
406
  multimodal_ctx=multimodal_ctx,
394
407
  )
@@ -404,6 +417,7 @@ class Responses:
404
417
  messages,
405
418
  model.id,
406
419
  )
420
+
407
421
  return messages
408
422
 
409
423
  def reset_tokens(self):
@@ -431,7 +445,7 @@ class Responses:
431
445
 
432
446
  if mode in [
433
447
  MODE_CHAT,
434
- MODE_VISION,
448
+ # MODE_VISION,
435
449
  MODE_RESEARCH,
436
450
  MODE_COMPUTER,
437
451
  ]:
@@ -499,7 +513,7 @@ class Responses:
499
513
  id = output.id
500
514
  call_id = output.call_id
501
515
  action = output.action
502
- tool_calls, is_call = self.window.core.gpt.computer.handle_action(
516
+ tool_calls, is_call = self.window.core.api.openai.computer.handle_action(
503
517
  id=id,
504
518
  call_id=call_id,
505
519
  action=action,
@@ -567,7 +581,7 @@ class Responses:
567
581
  if files:
568
582
  self.window.core.debug.info("[chat] Container files found, downloading...")
569
583
  try:
570
- self.window.core.gpt.container.download_files(ctx, files)
584
+ self.window.core.api.openai.container.download_files(ctx, files)
571
585
  except Exception as e:
572
586
  self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
573
587
 
@@ -631,7 +645,7 @@ class Responses:
631
645
  if files:
632
646
  self.window.core.debug.info("[chat] Container files found, downloading...")
633
647
  try:
634
- self.window.core.gpt.container.download_files(ctx, files)
648
+ self.window.core.api.openai.container.download_files(ctx, files)
635
649
  except Exception as e:
636
650
  self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
637
651
 
@@ -30,7 +30,7 @@ class Store:
30
30
 
31
31
  :return: OpenAI client
32
32
  """
33
- return self.window.core.gpt.get_client()
33
+ return self.window.core.api.openai.get_client()
34
34
 
35
35
  def log(
36
36
  self,
@@ -92,7 +92,7 @@ class Store:
92
92
  :param file_id: file ID
93
93
  :param path: path to save file
94
94
  """
95
- client = self.window.core.gpt.get_client()
95
+ client = self.window.core.api.openai.get_client()
96
96
  content = client.files.content(file_id)
97
97
  data = content.read()
98
98
  with open(path, 'wb', ) as f:
@@ -54,7 +54,7 @@ class Vision:
54
54
  attachments = context.attachments
55
55
  model = context.model
56
56
  model_id = model.id
57
- client = self.window.core.gpt.get_client()
57
+ client = self.window.core.api.openai.get_client()
58
58
 
59
59
  # extra API kwargs
60
60
  response_kwargs = {}
File without changes
@@ -565,7 +565,7 @@ class Worker(QRunnable):
565
565
  """
566
566
  try:
567
567
  if self.stream: # stream mode
568
- run = self.window.core.gpt.assistants.run_create_stream(
568
+ run = self.window.core.api.openai.assistants.run_create_stream(
569
569
  self.signals,
570
570
  self.ctx,
571
571
  self.thread_id,
@@ -575,7 +575,7 @@ class Worker(QRunnable):
575
575
  )
576
576
  else:
577
577
  # not stream mode
578
- run = self.window.core.gpt.assistants.run_create(
578
+ run = self.window.core.api.openai.assistants.run_create(
579
579
  self.thread_id,
580
580
  self.assistant_id,
581
581
  self.model,
@@ -596,7 +596,7 @@ class Worker(QRunnable):
596
596
  :return: result
597
597
  """
598
598
  try:
599
- response = self.window.core.gpt.assistants.msg_send(
599
+ response = self.window.core.api.openai.assistants.msg_send(
600
600
  self.thread_id,
601
601
  self.prompt,
602
602
  self.file_ids,
@@ -615,7 +615,7 @@ class Worker(QRunnable):
615
615
  :return: result
616
616
  """
617
617
  try:
618
- run = self.window.core.gpt.assistants.run_submit_tool(self.ctx, self.tools_outputs)
618
+ run = self.window.core.api.openai.assistants.run_submit_tool(self.ctx, self.tools_outputs)
619
619
  if run is not None:
620
620
  self.ctx.run_id = run.id # update run id
621
621
  self.signals.finished.emit(self.ctx, run, False) # continue status check
@@ -237,7 +237,7 @@ class ImportWorker(QRunnable):
237
237
  self.log("Importing assistants...")
238
238
  self.window.core.assistants.clear()
239
239
  items = self.window.core.assistants.get_all()
240
- self.window.core.gpt.assistants.import_all(items, callback=self.callback)
240
+ self.window.core.api.openai.assistants.import_all(items, callback=self.callback)
241
241
  self.window.core.assistants.items = items
242
242
  self.window.core.assistants.save()
243
243
 
@@ -266,7 +266,7 @@ class ImportWorker(QRunnable):
266
266
  self.log("Importing vector stores...")
267
267
  self.window.core.assistants.store.clear()
268
268
  items = {}
269
- self.window.core.gpt.store.import_stores(items, callback=self.callback)
269
+ self.window.core.api.openai.store.import_stores(items, callback=self.callback)
270
270
  self.window.core.assistants.store.import_items(items)
271
271
  if not silent:
272
272
  self.signals.finished.emit("vector_stores", self.store_id, len(items))
@@ -285,7 +285,7 @@ class ImportWorker(QRunnable):
285
285
  """
286
286
  try:
287
287
  self.log("Truncating stores...")
288
- num = self.window.core.gpt.store.remove_all(callback=self.callback)
288
+ num = self.window.core.api.openai.store.remove_all(callback=self.callback)
289
289
  self.window.core.assistants.store.items = {}
290
290
  self.window.core.assistants.store.save()
291
291
  if not silent:
@@ -336,12 +336,12 @@ class ImportWorker(QRunnable):
336
336
  self.log("Truncating all files...")
337
337
  self.window.core.assistants.files.truncate() # clear all files
338
338
  # remove all files in API
339
- num = self.window.core.gpt.store.remove_files(callback=self.callback)
339
+ num = self.window.core.api.openai.store.remove_files(callback=self.callback)
340
340
  else:
341
341
  self.log("Truncating files for store: {}".format(self.store_id))
342
342
  self.window.core.assistants.files.truncate(self.store_id) # clear store files, remove from stores / DB
343
343
  # remove store files in API
344
- num = self.window.core.gpt.store.remove_store_files(
344
+ num = self.window.core.api.openai.store.remove_store_files(
345
345
  self.store_id,
346
346
  callback=self.callback,
347
347
  )
@@ -365,14 +365,14 @@ class ImportWorker(QRunnable):
365
365
  self.log("Uploading files...")
366
366
  for file in self.files:
367
367
  try:
368
- file_id = self.window.core.gpt.store.upload(file)
368
+ file_id = self.window.core.api.openai.store.upload(file)
369
369
  if file_id is not None:
370
- stored_file = self.window.core.gpt.store.add_file(
370
+ stored_file = self.window.core.api.openai.store.add_file(
371
371
  self.store_id,
372
372
  file_id,
373
373
  )
374
374
  if stored_file is not None:
375
- data = self.window.core.gpt.store.get_file(file_id)
375
+ data = self.window.core.api.openai.store.get_file(file_id)
376
376
  self.window.core.assistants.files.insert(self.store_id, data) # insert to DB
377
377
  msg = "Uploaded file: {}/{}".format((num + 1), len(self.files))
378
378
  self.signals.status.emit("upload_files", msg)
@@ -403,11 +403,11 @@ class ImportWorker(QRunnable):
403
403
  if self.store_id is None:
404
404
  self.log("Importing all files...")
405
405
  self.window.core.assistants.files.truncate_local() # clear local DB (all)
406
- num = self.window.core.gpt.store.import_stores_files(self.callback) # import all files
406
+ num = self.window.core.api.openai.store.import_stores_files(self.callback) # import all files
407
407
  else:
408
408
  self.log("Importing files for store: {}".format(self.store_id))
409
409
  self.window.core.assistants.files.truncate_local(self.store_id) # clear local DB (all)
410
- items = self.window.core.gpt.store.import_store_files(
410
+ items = self.window.core.api.openai.store.import_store_files(
411
411
  self.store_id,
412
412
  [],
413
413
  callback=self.callback,
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.29 18:00:00 #
10
+ # ================================================== #
11
+
12
+ from .base import BaseProvider
13
+
14
+
15
+ class GoogleGenAIAudioInput(BaseProvider):
16
+
17
+ PROMPT_TRANSCRIBE = (
18
+ "You are a speech-to-text transcriber. "
19
+ "Return only the verbatim transcript as plain text. "
20
+ "Do not add any explanations, timestamps, labels or formatting."
21
+ )
22
+
23
+ def __init__(self, *args, **kwargs):
24
+ """
25
+ Google GenAI (Gemini) audio provider for transcription (via API).
26
+
27
+ :param args: args
28
+ :param kwargs: kwargs
29
+ """
30
+ super(GoogleGenAIAudioInput, self).__init__(*args, **kwargs)
31
+ self.plugin = kwargs.get("plugin")
32
+ self.id = "google_genai"
33
+ self.name = "Google GenAI"
34
+
35
+ def init_options(self):
36
+ """Initialize options"""
37
+ # Keep option shape consistent with Whisper provider
38
+ self.plugin.add_option(
39
+ "google_genai_audio_model",
40
+ type="text",
41
+ value="gemini-2.5-flash",
42
+ label="Model",
43
+ tab="google_genai",
44
+ description="Specify Gemini model supporting audio, e.g., gemini-2.5-flash",
45
+ )
46
+ self.plugin.add_option(
47
+ "google_genai_audio_prompt",
48
+ type="textarea",
49
+ value=self.PROMPT_TRANSCRIBE,
50
+ label="System Prompt",
51
+ tab="google_genai",
52
+ description="System prompt to guide the transcription output",
53
+ tooltip="System prompt for transcription",
54
+ persist=True,
55
+ )
56
+
57
+ def transcribe(self, path: str) -> str:
58
+ """
59
+ Audio to text transcription using Google GenAI (Gemini).
60
+
61
+ :param path: path to audio file to transcribe
62
+ :return: transcribed text
63
+ """
64
+ # Get pre-configured GenAI client from the app core
65
+ client = self.plugin.window.core.api.google.get_client()
66
+
67
+ # Upload the audio file via the Files API
68
+ uploaded_file = client.files.upload(file=path)
69
+
70
+ # Ask the model to produce a plain text transcript only
71
+ # Using system_instruction keeps the public API surface simple (no extra options needed)
72
+ config = {
73
+ "system_instruction": self.plugin.get_option_value("google_genai_audio_prompt") or self.PROMPT_TRANSCRIBE,
74
+ "temperature": 0.0,
75
+ }
76
+
77
+ # Generate content (transcription) with the selected model
78
+ model_name = self.plugin.get_option_value("google_genai_audio_model")
79
+ response = client.models.generate_content(
80
+ model=model_name,
81
+ contents=[uploaded_file],
82
+ config=config,
83
+ )
84
+
85
+ # The SDK exposes the unified .text property for convenience
86
+ return response.text or ""
87
+
88
+ def is_configured(self) -> bool:
89
+ """
90
+ Check if provider is configured
91
+
92
+ :return: True if configured, False otherwise
93
+ """
94
+ api_key = self.plugin.window.core.config.get("api_key_google")
95
+ return api_key is not None and api_key != ""
96
+
97
+ def get_config_message(self) -> str:
98
+ """
99
+ Return message to display when provider is not configured
100
+
101
+ :return: message
102
+ """
103
+ return "Google GenAI API key is not set yet. Please configure it in settings."
@@ -43,7 +43,7 @@ class OpenAIWhisper(BaseProvider):
43
43
  :param path: path to audio file to transcribe
44
44
  :return: transcribed text
45
45
  """
46
- client = self.plugin.window.core.gpt.get_client()
46
+ client = self.plugin.window.core.api.openai.get_client()
47
47
  with open(path, "rb") as audio_file:
48
48
  return client.audio.transcriptions.create(
49
49
  model=self.plugin.get_option_value('whisper_model'),