pygpt-net 2.6.30__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygpt_net/CHANGELOG.txt +8 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/controller/__init__.py +5 -2
  5. pygpt_net/controller/audio/audio.py +25 -1
  6. pygpt_net/controller/audio/ui.py +2 -2
  7. pygpt_net/controller/chat/audio.py +1 -8
  8. pygpt_net/controller/chat/common.py +29 -3
  9. pygpt_net/controller/chat/handler/__init__.py +0 -0
  10. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  11. pygpt_net/controller/chat/output.py +8 -3
  12. pygpt_net/controller/chat/stream.py +3 -1071
  13. pygpt_net/controller/chat/text.py +3 -2
  14. pygpt_net/controller/kernel/kernel.py +11 -3
  15. pygpt_net/controller/kernel/reply.py +5 -1
  16. pygpt_net/controller/realtime/__init__.py +12 -0
  17. pygpt_net/controller/realtime/manager.py +53 -0
  18. pygpt_net/controller/realtime/realtime.py +268 -0
  19. pygpt_net/controller/ui/mode.py +7 -0
  20. pygpt_net/controller/ui/ui.py +19 -1
  21. pygpt_net/core/audio/audio.py +6 -1
  22. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  23. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  24. pygpt_net/core/audio/backend/native/player.py +139 -0
  25. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  26. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  27. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  28. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  29. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  30. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  31. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  32. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  33. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  34. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  35. pygpt_net/core/audio/backend/shared/player.py +137 -0
  36. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  37. pygpt_net/core/audio/capture.py +5 -0
  38. pygpt_net/core/audio/output.py +13 -2
  39. pygpt_net/core/audio/whisper.py +6 -2
  40. pygpt_net/core/bridge/bridge.py +2 -1
  41. pygpt_net/core/bridge/worker.py +4 -1
  42. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  43. pygpt_net/core/events/__init__.py +2 -1
  44. pygpt_net/core/events/realtime.py +55 -0
  45. pygpt_net/core/image/image.py +51 -1
  46. pygpt_net/core/realtime/__init__.py +0 -0
  47. pygpt_net/core/realtime/options.py +87 -0
  48. pygpt_net/core/realtime/shared/__init__.py +0 -0
  49. pygpt_net/core/realtime/shared/audio.py +213 -0
  50. pygpt_net/core/realtime/shared/loop.py +64 -0
  51. pygpt_net/core/realtime/shared/session.py +59 -0
  52. pygpt_net/core/realtime/shared/text.py +37 -0
  53. pygpt_net/core/realtime/shared/tools.py +276 -0
  54. pygpt_net/core/realtime/shared/turn.py +38 -0
  55. pygpt_net/core/realtime/shared/types.py +16 -0
  56. pygpt_net/core/realtime/worker.py +164 -0
  57. pygpt_net/core/types/__init__.py +1 -0
  58. pygpt_net/core/types/image.py +48 -0
  59. pygpt_net/data/config/config.json +10 -4
  60. pygpt_net/data/config/models.json +149 -103
  61. pygpt_net/data/config/settings.json +50 -0
  62. pygpt_net/data/locale/locale.de.ini +5 -5
  63. pygpt_net/data/locale/locale.en.ini +19 -13
  64. pygpt_net/data/locale/locale.es.ini +5 -5
  65. pygpt_net/data/locale/locale.fr.ini +5 -5
  66. pygpt_net/data/locale/locale.it.ini +5 -5
  67. pygpt_net/data/locale/locale.pl.ini +5 -5
  68. pygpt_net/data/locale/locale.uk.ini +5 -5
  69. pygpt_net/data/locale/locale.zh.ini +1 -1
  70. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  71. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  72. pygpt_net/plugin/audio_input/plugin.py +37 -4
  73. pygpt_net/plugin/audio_input/simple.py +57 -8
  74. pygpt_net/plugin/cmd_files/worker.py +3 -0
  75. pygpt_net/provider/api/google/__init__.py +39 -6
  76. pygpt_net/provider/api/google/audio.py +8 -1
  77. pygpt_net/provider/api/google/chat.py +45 -6
  78. pygpt_net/provider/api/google/image.py +226 -86
  79. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  80. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  81. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  82. pygpt_net/provider/api/openai/__init__.py +22 -2
  83. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  84. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  85. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  86. pygpt_net/provider/audio_input/google_genai.py +103 -0
  87. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  88. pygpt_net/provider/audio_output/google_tts.py +0 -12
  89. pygpt_net/provider/audio_output/openai_tts.py +8 -5
  90. pygpt_net/provider/core/config/patch.py +15 -0
  91. pygpt_net/provider/core/model/patch.py +11 -0
  92. pygpt_net/provider/llms/google.py +8 -9
  93. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  94. pygpt_net/ui/layout/toolbox/image.py +5 -0
  95. pygpt_net/ui/widget/option/combo.py +15 -1
  96. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +26 -14
  97. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +100 -62
  98. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  99. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  100. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  101. {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import json
13
+ from typing import Optional, Dict, Any
14
+
15
+ from pygpt_net.core.events import RealtimeEvent
16
+ from pygpt_net.core.realtime.options import RealtimeOptions
17
+ from pygpt_net.core.bridge.context import BridgeContext
18
+ from pygpt_net.core.realtime.shared.session import extract_last_session_id
19
+ from pygpt_net.item.model import ModelItem
20
+
21
+ from .client import GoogleLiveClient
22
+
23
+
24
+ class Realtime:
25
+
26
+ PROVIDER = "google"
27
+
28
+ def __init__(self, window=None):
29
+ """
30
+ Google GenAI API realtime controller
31
+
32
+ :param window: Window instance
33
+ """
34
+ self.window = window
35
+ self.handler = GoogleLiveClient(window)
36
+ self.prev_auto_turn = False
37
+ self.prev_vad_silence = 2000
38
+ self.prev_vad_prefix = 300
39
+
40
+ def begin(
41
+ self,
42
+ context: BridgeContext,
43
+ model: Optional[ModelItem] = None,
44
+ extra: Optional[Dict[str, Any]] = None,
45
+ rt_signals=None
46
+ ) -> bool:
47
+ """
48
+ Begin realtime session if applicable
49
+
50
+ :param context: BridgeContext
51
+ :param model: Optional[ModelItem]
52
+ :param extra: Optional dict with extra parameters
53
+ :param rt_signals: Optional RealtimeSignals
54
+ :return: bool - True if realtime session started, False otherwise
55
+ """
56
+ # Build realtime options
57
+ mm = context.multimodal_ctx
58
+ audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
59
+ audio_format = getattr(mm, "audio_format", None) if mm else None
60
+ audio_rate = getattr(mm, "audio_rate", None) if mm else None
61
+ is_debug = self.window.core.config.get("log.realtime", False)
62
+ auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
63
+ opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
64
+ opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
65
+
66
+ # setup manager
67
+ self.window.controller.realtime.set_current_active(self.PROVIDER)
68
+ self.window.controller.realtime.set_busy()
69
+ self.handler.set_debug(is_debug)
70
+
71
+ # handle sub-reply (tool results from tool calls)
72
+ if context.ctx.internal:
73
+ if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
74
+ tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
75
+ tool_call_id = None
76
+ if isinstance(tool_calls, list) and len(tool_calls) > 0:
77
+ tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
78
+ if not tool_call_id:
79
+ tool_call_id = tool_calls[0].get("id", "") # fallback to id
80
+ if tool_call_id:
81
+ tool_results = context.ctx.input
82
+ try:
83
+ tool_results = json.loads(tool_results)
84
+ except Exception:
85
+ pass
86
+ self.handler.send_tool_results_sync({
87
+ tool_call_id: tool_results
88
+ })
89
+ return True # do not start new session, just send tool results
90
+
91
+ # update auto-turn in active session
92
+ if (self.handler.is_session_active()
93
+ and (auto_turn != self.prev_auto_turn
94
+ or opt_vad_silence != self.prev_vad_silence
95
+ or opt_vad_prefix != self.prev_vad_prefix)):
96
+ self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
97
+
98
+ # Tools
99
+ tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
100
+ remote_tools = self.window.core.api.google.build_remote_tools(model)
101
+ if tools:
102
+ remote_tools = [] # in Google, remote tools are not allowed if function calling is used
103
+
104
+ # if auto-turn is enabled and prompt is empty, update session and context only
105
+ if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
106
+ self.handler.update_session_tools_sync(tools, remote_tools)
107
+ self.handler.update_ctx(context.ctx)
108
+ return True # do not send new request if session is active
109
+
110
+ # Last session ID
111
+ last_session_id = extract_last_session_id(context.history)
112
+ if is_debug:
113
+ print("[realtime session] Last ID", last_session_id)
114
+
115
+ # Voice
116
+ voice_name = "Kore"
117
+ try:
118
+ v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
119
+ if v:
120
+ mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
121
+ "legend": "Legend"}
122
+ voice_name = mapping.get(str(v).strip().lower(), str(v))
123
+ except Exception:
124
+ pass
125
+
126
+ # Options
127
+ opts = RealtimeOptions(
128
+ provider=self.PROVIDER,
129
+ model=model.id,
130
+ system_prompt=context.system_prompt,
131
+ prompt=context.prompt,
132
+ voice=voice_name,
133
+ audio_data=audio_bytes,
134
+ audio_format=audio_format,
135
+ audio_rate=audio_rate,
136
+ vad=None,
137
+ extra=extra or {},
138
+ tools=tools,
139
+ remote_tools=remote_tools,
140
+ rt_signals=rt_signals,
141
+ rt_session_id=last_session_id,
142
+ auto_turn=auto_turn,
143
+ vad_end_silence_ms=opt_vad_silence,
144
+ vad_prefix_padding_ms=opt_vad_prefix,
145
+ )
146
+
147
+ # Start or append to realtime session via manager
148
+ try:
149
+ if is_debug:
150
+ print("[realtime] Starting session with options:", opts.to_dict())
151
+ rt = self.window.controller.realtime.manager
152
+ rt.start(context.ctx, opts)
153
+
154
+ self.prev_auto_turn = auto_turn
155
+ self.prev_vad_silence = opt_vad_silence
156
+ self.prev_vad_prefix = opt_vad_prefix
157
+ return True
158
+ except Exception as e:
159
+ self.window.core.debug.log(e)
160
+ return False # fallback to non-live path
161
+
162
+ def handle_audio_input(self, event: RealtimeEvent):
163
+ """
164
+ Handle Realtime audio input event
165
+
166
+ :param event: RealtimeEvent
167
+ """
168
+ self.handler.rt_handle_audio_input_sync(event)
169
+
170
+ def manual_commit(self):
171
+ """Manually commit audio input to realtime session"""
172
+ self.handler.force_response_now_sync()
173
+
174
+ def shutdown(self):
175
+ """Shutdown realtime loops"""
176
+ if self.handler.is_session_active():
177
+ self.handler.close_session_sync()
178
+ try:
179
+ self.handler.stop_loop_sync()
180
+ except Exception:
181
+ pass
182
+
183
+ def reset(self):
184
+ """Close realtime session"""
185
+ if self.handler.is_session_active():
186
+ self.handler.close_session_sync()
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.19 07:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from openai import OpenAI
@@ -33,6 +33,7 @@ from .container import Container
33
33
  from .image import Image
34
34
  from .remote_tools import RemoteTools
35
35
  from .responses import Responses
36
+ from .realtime import Realtime
36
37
  from .store import Store
37
38
  from .summarizer import Summarizer
38
39
  from .tools import Tools
@@ -57,6 +58,7 @@ class ApiOpenAI:
57
58
  self.image = Image(window)
58
59
  self.remote_tools = RemoteTools(window)
59
60
  self.responses = Responses(window)
61
+ self.realtime = Realtime(window)
60
62
  self.store = Store(window)
61
63
  self.summarizer = Summarizer(window)
62
64
  self.tools = Tools(window)
@@ -90,12 +92,18 @@ class ApiOpenAI:
90
92
  self.last_client_args = args
91
93
  return self.client
92
94
 
93
- def call(self, context: BridgeContext, extra: dict = None) -> bool:
95
+ def call(
96
+ self,
97
+ context: BridgeContext,
98
+ extra: dict = None,
99
+ rt_signals = None
100
+ ) -> bool:
94
101
  """
95
102
  Call OpenAI API
96
103
 
97
104
  :param context: Bridge context
98
105
  :param extra: Extra arguments
106
+ :param rt_signals: Realtime signals for audio streaming
99
107
  :return: result
100
108
  """
101
109
  mode = context.mode
@@ -145,6 +153,18 @@ class ApiOpenAI:
145
153
  MODE_RESEARCH,
146
154
  MODE_COMPUTER,
147
155
  ]:
156
+ if mode == MODE_AUDIO and stream:
157
+
158
+ # Realtime API for audio streaming
159
+ is_realtime = self.realtime.begin(
160
+ context=context,
161
+ model=model,
162
+ extra=extra or {},
163
+ rt_signals=rt_signals
164
+ )
165
+ if is_realtime:
166
+ return True
167
+
148
168
  # responses API
149
169
  if use_responses_api:
150
170
  response = self.responses.send(
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from .realtime import Realtime