pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import json
13
+ from typing import Optional, Dict, Any
14
+
15
+ from pygpt_net.core.events import RealtimeEvent
16
+ from pygpt_net.core.realtime.options import RealtimeOptions
17
+ from pygpt_net.core.bridge.context import BridgeContext
18
+ from pygpt_net.core.realtime.shared.session import extract_last_session_id
19
+ from pygpt_net.item.model import ModelItem
20
+
21
+ from .client import GoogleLiveClient
22
+
23
+
24
+ class Realtime:
25
+
26
+ PROVIDER = "google"
27
+
28
+ def __init__(self, window=None):
29
+ """
30
+ Google GenAI API realtime controller
31
+
32
+ :param window: Window instance
33
+ """
34
+ self.window = window
35
+ self.handler = GoogleLiveClient(window)
36
+ self.prev_auto_turn = False
37
+ self.prev_vad_silence = 2000
38
+ self.prev_vad_prefix = 300
39
+
40
+ def begin(
41
+ self,
42
+ context: BridgeContext,
43
+ model: Optional[ModelItem] = None,
44
+ extra: Optional[Dict[str, Any]] = None,
45
+ rt_signals=None
46
+ ) -> bool:
47
+ """
48
+ Begin realtime session if applicable
49
+
50
+ :param context: BridgeContext
51
+ :param model: Optional[ModelItem]
52
+ :param extra: Optional dict with extra parameters
53
+ :param rt_signals: Optional RealtimeSignals
54
+ :return: bool - True if realtime session started, False otherwise
55
+ """
56
+ # Build realtime options
57
+ mm = context.multimodal_ctx
58
+ audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
59
+ audio_format = getattr(mm, "audio_format", None) if mm else None
60
+ audio_rate = getattr(mm, "audio_rate", None) if mm else None
61
+ is_debug = self.window.core.config.get("log.realtime", False)
62
+ auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
63
+ opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
64
+ opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
65
+
66
+ # setup manager
67
+ self.window.controller.realtime.set_current_active(self.PROVIDER)
68
+ self.window.controller.realtime.set_busy()
69
+ self.handler.set_debug(is_debug)
70
+
71
+ # handle sub-reply (tool results from tool calls)
72
+ if context.ctx.internal:
73
+ if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
74
+ tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
75
+ tool_call_id = None
76
+ if isinstance(tool_calls, list) and len(tool_calls) > 0:
77
+ tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
78
+ if not tool_call_id:
79
+ tool_call_id = tool_calls[0].get("id", "") # fallback to id
80
+ if tool_call_id:
81
+ tool_results = context.ctx.input
82
+ try:
83
+ tool_results = json.loads(tool_results)
84
+ except Exception:
85
+ pass
86
+ self.handler.send_tool_results_sync({
87
+ tool_call_id: tool_results
88
+ })
89
+ return True # do not start new session, just send tool results
90
+
91
+ # update auto-turn in active session
92
+ if (self.handler.is_session_active()
93
+ and (auto_turn != self.prev_auto_turn
94
+ or opt_vad_silence != self.prev_vad_silence
95
+ or opt_vad_prefix != self.prev_vad_prefix)):
96
+ self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
97
+
98
+ # Tools
99
+ tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
100
+ remote_tools = self.window.core.api.google.build_remote_tools(model)
101
+ if tools:
102
+ remote_tools = [] # in Google, remote tools are not allowed if function calling is used
103
+
104
+ # if auto-turn is enabled and prompt is empty, update session and context only
105
+ if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
106
+ self.handler.update_session_tools_sync(tools, remote_tools)
107
+ self.handler.update_ctx(context.ctx)
108
+ return True # do not send new request if session is active
109
+
110
+ # Last session ID
111
+ last_session_id = extract_last_session_id(context.history)
112
+ if is_debug:
113
+ print("[realtime session] Last ID", last_session_id)
114
+
115
+ # Voice
116
+ voice_name = "Kore"
117
+ try:
118
+ v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
119
+ if v:
120
+ mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
121
+ "legend": "Legend"}
122
+ voice_name = mapping.get(str(v).strip().lower(), str(v))
123
+ except Exception:
124
+ pass
125
+
126
+ # Options
127
+ opts = RealtimeOptions(
128
+ provider=self.PROVIDER,
129
+ model=model.id,
130
+ system_prompt=context.system_prompt,
131
+ prompt=context.prompt,
132
+ voice=voice_name,
133
+ audio_data=audio_bytes,
134
+ audio_format=audio_format,
135
+ audio_rate=audio_rate,
136
+ vad=None,
137
+ extra=extra or {},
138
+ tools=tools,
139
+ remote_tools=remote_tools,
140
+ rt_signals=rt_signals,
141
+ rt_session_id=last_session_id,
142
+ auto_turn=auto_turn,
143
+ vad_end_silence_ms=opt_vad_silence,
144
+ vad_prefix_padding_ms=opt_vad_prefix,
145
+ )
146
+
147
+ # Start or append to realtime session via manager
148
+ try:
149
+ if is_debug:
150
+ print("[realtime] Starting session with options:", opts.to_dict())
151
+ rt = self.window.controller.realtime.manager
152
+ rt.start(context.ctx, opts)
153
+
154
+ self.prev_auto_turn = auto_turn
155
+ self.prev_vad_silence = opt_vad_silence
156
+ self.prev_vad_prefix = opt_vad_prefix
157
+ return True
158
+ except Exception as e:
159
+ self.window.core.debug.log(e)
160
+ return False # fallback to non-live path
161
+
162
+ def handle_audio_input(self, event: RealtimeEvent):
163
+ """
164
+ Handle Realtime audio input event
165
+
166
+ :param event: RealtimeEvent
167
+ """
168
+ self.handler.rt_handle_audio_input_sync(event)
169
+
170
+ def manual_commit(self):
171
+ """Manually commit audio input to realtime session"""
172
+ self.handler.force_response_now_sync()
173
+
174
+ def shutdown(self):
175
+ """Shutdown realtime loops"""
176
+ if self.handler.is_session_active():
177
+ self.handler.close_session_sync()
178
+ try:
179
+ self.handler.stop_loop_sync()
180
+ except Exception:
181
+ pass
182
+
183
+ def reset(self):
184
+ """Close realtime session"""
185
+ if self.handler.is_session_active():
186
+ self.handler.close_session_sync()
@@ -0,0 +1,222 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
+ # ================================================== #
11
+
12
+ import json
13
+ from typing import List, Any, Dict, Optional
14
+
15
+ from google.genai import types as gtypes
16
+ from pygpt_net.item.model import ModelItem
17
+
18
+
19
+ class Tools:
20
+ def __init__(self, window=None):
21
+ """
22
+ Tools mapper for Google GenAI
23
+
24
+ :param window: Window instance
25
+ """
26
+ self.window = window
27
+
28
+ # -------- SANITIZER --------
29
+ def _sanitize_schema(self, schema: Any) -> Any:
30
+ """
31
+ Sanitize JSON Schema dict by removing unsupported keywords and normalizing types.
32
+
33
+ 1. Remove unsupported keywords like additionalProperties, patternProperties,
34
+ dependencies, oneOf, anyOf, allOf, $ref, $defs, examples, readOnly, writeOnly.
35
+ 2. Normalize 'type' to a single value (e.g., if it's a list, take the first non-null type).
36
+ 3. Ensure 'enum' is only present for string types.
37
+ 4. Recursively sanitize nested schemas in 'properties' and 'items'.
38
+ 5. Handle arrays by ensuring 'items' is a single schema.
39
+ 6. Handle objects by ensuring 'properties' is a dict and 'required' is a list of strings.
40
+
41
+ :param schema: Any JSON Schema as dict or list
42
+ :return: Sanitized schema dict
43
+ """
44
+ if isinstance(schema, list):
45
+ return self._sanitize_schema(schema[0]) if schema else {}
46
+
47
+ if not isinstance(schema, dict):
48
+ return schema
49
+
50
+ banned = {
51
+ "additionalProperties",
52
+ "additional_properties",
53
+ "unevaluatedProperties",
54
+ "patternProperties",
55
+ "dependencies",
56
+ "dependentSchemas",
57
+ "dependentRequired",
58
+ "oneOf",
59
+ "anyOf",
60
+ "allOf",
61
+ "$defs",
62
+ "$ref",
63
+ "$schema",
64
+ "$id",
65
+ "examples",
66
+ "readOnly",
67
+ "writeOnly",
68
+ "nullable",
69
+ }
70
+ for k in list(schema.keys()):
71
+ if k in banned:
72
+ schema.pop(k, None)
73
+
74
+ # Union -> first non-null type
75
+ t = schema.get("type")
76
+ if isinstance(t, list):
77
+ t_no_null = [x for x in t if x != "null"]
78
+ schema["type"] = t_no_null[0] if t_no_null else "string"
79
+
80
+ # enum only for string
81
+ if "enum" in schema and schema.get("type") not in ("string", "STRING"):
82
+ schema.pop("enum", None)
83
+
84
+ # object
85
+ if (schema.get("type") or "").lower() == "object":
86
+ props = schema.get("properties")
87
+ if not isinstance(props, dict):
88
+ props = {}
89
+ clean_props: Dict[str, Any] = {}
90
+ for pname, pval in props.items():
91
+ clean_props[pname] = self._sanitize_schema(pval)
92
+ schema["properties"] = clean_props
93
+
94
+ req = schema.get("required")
95
+ if not isinstance(req, list) or not all(isinstance(x, str) for x in req):
96
+ schema.pop("required", None)
97
+ elif len(req) == 0:
98
+ schema.pop("required", None)
99
+
100
+ # array
101
+ if (schema.get("type") or "").lower() == "array":
102
+ items = schema.get("items")
103
+ if isinstance(items, list) and items:
104
+ items = items[0]
105
+ if not isinstance(items, dict):
106
+ items = {"type": "string"}
107
+ schema["items"] = self._sanitize_schema(items)
108
+
109
+ # recursive sanitize
110
+ for k, v in list(schema.items()):
111
+ if isinstance(v, dict):
112
+ schema[k] = self._sanitize_schema(v)
113
+ elif isinstance(v, list):
114
+ schema[k] = [self._sanitize_schema(x) for x in v]
115
+
116
+ return schema
117
+
118
+ # -------- CONVERTER to gtypes.Schema (UPPERCASE) --------
119
+ def _to_gschema(self, schema: Any) -> gtypes.Schema:
120
+ """
121
+ Convert sanitized dict -> google.genai.types.Schema.
122
+ Enforces UPPERCASE type names (OBJECT, ARRAY, STRING, NUMBER, INTEGER, BOOLEAN).
123
+
124
+ :param schema: Sanitized JSON Schema as dict
125
+ :return: gtypes.Schema
126
+ """
127
+ TYPE_MAP = {
128
+ "enum": "STRING",
129
+ "ENUM": "STRING",
130
+ "object": "OBJECT",
131
+ "dict": "OBJECT",
132
+ "array": "ARRAY",
133
+ "list": "ARRAY",
134
+ "string": "STRING",
135
+ "number": "NUMBER",
136
+ "float": "NUMBER",
137
+ "integer": "INTEGER",
138
+ "boolean": "BOOLEAN",
139
+ "int": "INTEGER",
140
+ "bool": "BOOLEAN",
141
+ "OBJECT": "OBJECT",
142
+ "DICT": "OBJECT",
143
+ "ARRAY": "ARRAY",
144
+ "LIST": "ARRAY",
145
+ "STRING": "STRING",
146
+ "NUMBER": "NUMBER",
147
+ "FLOAT": "NUMBER",
148
+ "INTEGER": "INTEGER",
149
+ "BOOLEAN": "BOOLEAN",
150
+ "INT": "INTEGER",
151
+ "BOOL": "BOOLEAN",
152
+ }
153
+
154
+ if isinstance(schema, gtypes.Schema):
155
+ return schema
156
+
157
+ if not isinstance(schema, dict):
158
+ return gtypes.Schema(type="STRING")
159
+
160
+ t = TYPE_MAP.get(str(schema.get("type", "OBJECT")).upper(), "OBJECT")
161
+ desc = schema.get("description")
162
+ fmt = schema.get("format")
163
+ enum = schema.get("enum") if isinstance(schema.get("enum"), list) else None
164
+ req = schema.get("required") if isinstance(schema.get("required"), list) else None
165
+
166
+ gs = gtypes.Schema(
167
+ type=t,
168
+ description=desc,
169
+ format=fmt,
170
+ enum=enum,
171
+ required=[x for x in (req or []) if isinstance(x, str)] or None,
172
+ )
173
+
174
+ props = schema.get("properties")
175
+ if isinstance(props, dict):
176
+ gs.properties = {k: self._to_gschema(v) for k, v in props.items()}
177
+
178
+ items = schema.get("items")
179
+ if isinstance(items, dict):
180
+ gs.items = self._to_gschema(items)
181
+
182
+ return gs
183
+
184
+ def prepare(self, model: ModelItem, functions: list) -> List[gtypes.Tool]:
185
+ """
186
+ Prepare Google Function Declarations (types.Tool) for google-genai.
187
+
188
+ :param model: ModelItem
189
+ :param functions: List of function definitions as dicts with 'name', 'desc', 'params' (JSON Schema)
190
+ :return: List of gtypes.Tool
191
+ """
192
+ if not functions or not isinstance(functions, list):
193
+ return []
194
+
195
+ fds: List[gtypes.FunctionDeclaration] = []
196
+ for function in functions:
197
+ name = str(function.get("name") or "").strip()
198
+ if not name:
199
+ continue
200
+
201
+ desc = function.get("desc") or ""
202
+ params: Optional[dict] = {}
203
+ if function.get("params"):
204
+ try:
205
+ params = json.loads(function["params"])
206
+ except Exception:
207
+ params = {}
208
+
209
+ params = self._sanitize_schema(params or {})
210
+ if not params.get("type"):
211
+ params["type"] = "object"
212
+
213
+ gschema = self._to_gschema(params or {"type": "object"})
214
+
215
+ fd = gtypes.FunctionDeclaration(
216
+ name=name,
217
+ description=desc,
218
+ parameters=gschema,
219
+ )
220
+ fds.append(fd)
221
+
222
+ return [gtypes.Tool(function_declarations=fds)] if fds else []
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.28 20:00:00 #
10
+ # ================================================== #
11
+
12
+ import os
13
+ from typing import Optional, Dict, List, Union
14
+
15
+ from google.genai.types import Part
16
+
17
+ from pygpt_net.item.attachment import AttachmentItem
18
+ from pygpt_net.item.ctx import CtxItem
19
+
20
+
21
+ class Vision:
22
+ def __init__(self, window=None):
23
+ """
24
+ Vision helpers for Google GenAI
25
+
26
+ :param window: Window instance
27
+ """
28
+ self.window = window
29
+ self.attachments: Dict[str, str] = {}
30
+ self.urls: List[str] = []
31
+ self.input_tokens = 0
32
+
33
+ def build_parts(
34
+ self,
35
+ content: Union[str, list],
36
+ attachments: Optional[Dict[str, AttachmentItem]] = None,
37
+ ) -> List[Part]:
38
+ """
39
+ Build image parts from local attachments (inline bytes)
40
+
41
+ :param content: Message content (str or list)
42
+ :param attachments: Attachments dict (id -> AttachmentItem)
43
+ :return: List of Parts
44
+ """
45
+ parts: List[Part] = []
46
+ self.attachments = {}
47
+ self.urls = []
48
+
49
+ if attachments:
50
+ for id_, attachment in attachments.items():
51
+ if attachment.path and os.path.exists(attachment.path):
52
+ if self.is_image(attachment.path):
53
+ mime = self._guess_mime(attachment.path)
54
+ with open(attachment.path, "rb") as f:
55
+ data = f.read()
56
+ parts.append(Part.from_bytes(data=data, mime_type=mime))
57
+ self.attachments[id_] = attachment.path
58
+ attachment.consumed = True
59
+
60
+ return parts
61
+
62
+ def is_image(self, path: str) -> bool:
63
+ """
64
+ Check if path looks like an image
65
+
66
+ :param path: File path
67
+ :return: True if image, False otherwise
68
+ """
69
+ return path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif', '.webp'))
70
+
71
+ def _guess_mime(self, path: str) -> str:
72
+ """
73
+ Guess mime type from file extension
74
+
75
+ :param path: File path
76
+ :return: Mime type string
77
+ """
78
+ ext = os.path.splitext(path)[1].lower().lstrip(".")
79
+ if ext in ("jpg", "jpeg"):
80
+ return "image/jpeg"
81
+ if ext == "png":
82
+ return "image/png"
83
+ if ext == "gif":
84
+ return "image/gif"
85
+ if ext == "bmp":
86
+ return "image/bmp"
87
+ if ext == "webp":
88
+ return "image/webp"
89
+ if ext == "tiff":
90
+ return "image/tiff"
91
+ return "image/jpeg"
92
+
93
+ def append_images(self, ctx: CtxItem):
94
+ """
95
+ Append sent images paths to context for UI/history
96
+
97
+ :param ctx: CtxItem
98
+ """
99
+ images = self.get_attachments()
100
+ if len(images) > 0:
101
+ ctx.images = self.window.core.filesystem.make_local_list(list(images.values()))
102
+
103
+ def get_attachments(self) -> Dict[str, str]:
104
+ """
105
+ Return attachments dict (id -> path)
106
+
107
+ :return: Dict of attachments
108
+ """
109
+ return self.attachments
110
+
111
+ def get_urls(self) -> List[str]:
112
+ """
113
+ Return image urls (unused here)
114
+
115
+ :return: List of URLs
116
+ """
117
+ return self.urls
118
+
119
+ def reset_tokens(self):
120
+ """Reset input tokens counter"""
121
+ self.input_tokens = 0
122
+
123
+ def get_used_tokens(self) -> int:
124
+ """
125
+ Return input tokens counter
126
+
127
+ :return: Number of input tokens
128
+ """
129
+ return self.input_tokens
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.19 07:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from openai import OpenAI
@@ -33,13 +33,14 @@ from .container import Container
33
33
  from .image import Image
34
34
  from .remote_tools import RemoteTools
35
35
  from .responses import Responses
36
+ from .realtime import Realtime
36
37
  from .store import Store
37
38
  from .summarizer import Summarizer
38
39
  from .tools import Tools
39
40
  from .vision import Vision
40
41
 
41
42
 
42
- class Gpt:
43
+ class ApiOpenAI:
43
44
 
44
45
  def __init__(self, window=None):
45
46
  """
@@ -57,6 +58,7 @@ class Gpt:
57
58
  self.image = Image(window)
58
59
  self.remote_tools = RemoteTools(window)
59
60
  self.responses = Responses(window)
61
+ self.realtime = Realtime(window)
60
62
  self.store = Store(window)
61
63
  self.summarizer = Summarizer(window)
62
64
  self.tools = Tools(window)
@@ -90,12 +92,18 @@ class Gpt:
90
92
  self.last_client_args = args
91
93
  return self.client
92
94
 
93
- def call(self, context: BridgeContext, extra: dict = None) -> bool:
95
+ def call(
96
+ self,
97
+ context: BridgeContext,
98
+ extra: dict = None,
99
+ rt_signals = None
100
+ ) -> bool:
94
101
  """
95
102
  Call OpenAI API
96
103
 
97
104
  :param context: Bridge context
98
105
  :param extra: Extra arguments
106
+ :param rt_signals: Realtime signals for audio streaming
99
107
  :return: result
100
108
  """
101
109
  mode = context.mode
@@ -145,6 +153,18 @@ class Gpt:
145
153
  MODE_RESEARCH,
146
154
  MODE_COMPUTER,
147
155
  ]:
156
+ if mode == MODE_AUDIO and stream:
157
+
158
+ # Realtime API for audio streaming
159
+ is_realtime = self.realtime.begin(
160
+ context=context,
161
+ model=model,
162
+ extra=extra or {},
163
+ rt_signals=rt_signals
164
+ )
165
+ if is_realtime:
166
+ return True
167
+
148
168
  # responses API
149
169
  if use_responses_api:
150
170
  response = self.responses.send(
@@ -281,7 +301,7 @@ class Gpt:
281
301
  # additional_kwargs["max_tokens"] = max_tokens
282
302
 
283
303
  # tools / functions
284
- tools = self.window.core.gpt.tools.prepare(model, functions)
304
+ tools = self.window.core.api.openai.tools.prepare(model, functions)
285
305
  if len(tools) > 0 and "disable_tools" not in extra:
286
306
  additional_kwargs["tools"] = tools
287
307
 
File without changes
@@ -48,7 +48,7 @@ class LocalComputer(Computer):
48
48
 
49
49
  :return: Environment of the computer, such as "mac", "windows", "ubuntu", or "browser".
50
50
  """
51
- return self.window.core.gpt.computer.get_current_env()
51
+ return self.window.core.api.openai.computer.get_current_env()
52
52
 
53
53
  @property
54
54
  def dimensions(self) -> tuple[int, int]:
@@ -13,7 +13,7 @@ from agents import (
13
13
  from pygpt_net.item.model import ModelItem
14
14
  from pygpt_net.item.preset import PresetItem
15
15
 
16
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
16
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
17
17
 
18
18
 
19
19
  def get_experts(
@@ -169,7 +169,7 @@ class StreamHandler:
169
169
  self.files_handled = True
170
170
  self.window.core.debug.info("[chat] Container files found, downloading...")
171
171
  try:
172
- self.window.core.gpt.container.download_files(ctx, self.files)
172
+ self.window.core.api.openai.container.download_files(ctx, self.files)
173
173
  except Exception as e:
174
174
  self.window.core.debug.error(f"[chat] Error downloading container files: {e}")
175
175
 
@@ -36,7 +36,7 @@ class Assistants:
36
36
 
37
37
  :return: OpenAI client
38
38
  """
39
- return self.window.core.gpt.get_client()
39
+ return self.window.core.api.openai.get_client()
40
40
 
41
41
  def log(
42
42
  self,