pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import List, Tuple
@@ -17,6 +17,7 @@ from pygpt_net.core.events import (
17
17
  ControlEvent,
18
18
  AppEvent,
19
19
  RenderEvent,
20
+ RealtimeEvent,
20
21
  )
21
22
 
22
23
 
@@ -71,6 +72,14 @@ class Dispatcher:
71
72
 
72
73
  handled = False
73
74
 
75
+ # realtime first, if it's a realtime event
76
+ if isinstance(event, RealtimeEvent):
77
+ controller.realtime.handle(event)
78
+ if log_event:
79
+ debug.info(f"[event] Dispatch end: {event.full_name} ({event.call_id})")
80
+ self.call_id += 1
81
+ return [], event
82
+
74
83
  # kernel
75
84
  if isinstance(event, KernelEvent):
76
85
  kernel_auto = (KernelEvent.INIT, KernelEvent.RESTART, KernelEvent.STOP, KernelEvent.TERMINATE)
@@ -96,20 +105,47 @@ class Dispatcher:
96
105
  if handled:
97
106
  return [], event
98
107
 
108
+ # realtime
109
+ controller.realtime.handle(event)
110
+ if event.stop:
111
+ if log_event:
112
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
113
+ return [], event
114
+
99
115
  # agents
100
116
  controller.agent.handle(event)
117
+ if event.stop:
118
+ if log_event:
119
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
120
+ return [], event
101
121
 
102
122
  # ctx
103
123
  controller.ctx.handle(event)
124
+ if event.stop:
125
+ if log_event:
126
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
127
+ return [], event
104
128
 
105
129
  # model
106
130
  controller.model.handle(event)
131
+ if event.stop:
132
+ if log_event:
133
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
134
+ return [], event
107
135
 
108
136
  # idx
109
137
  controller.idx.handle(event)
138
+ if event.stop:
139
+ if log_event:
140
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
141
+ return [], event
110
142
 
111
143
  # ui
112
144
  controller.ui.handle(event)
145
+ if event.stop:
146
+ if log_event:
147
+ debug.info(f"[event] Skipping... (stopped): {event.name}")
148
+ return [], event
113
149
 
114
150
  # access
115
151
  if isinstance(event, (ControlEvent, AppEvent)):
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.11.20 03:00:00 #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from .base import BaseEvent
@@ -14,4 +14,5 @@ from .app import AppEvent
14
14
  from .control import ControlEvent
15
15
  from .event import Event
16
16
  from .kernel import KernelEvent
17
+ from .realtime import RealtimeEvent
17
18
  from .render import RenderEvent
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.30 06:00:00 #
10
+ # ================================================== #
11
+
12
+ from typing import Optional
13
+
14
+ from .base import BaseEvent
15
+
16
+
17
+ class RealtimeEvent(BaseEvent):
18
+ """
19
+ Realtime events
20
+
21
+ - RT_OUTPUT_AUDIO_DELTA - audio output chunk (with payload)
22
+ - RT_OUTPUT_READY - audio output is ready (STREAM_BEGIN)
23
+ - RT_OUTPUT_TEXT_DELTA - text chunk (delta)
24
+ - RT_OUTPUT_AUDIO_END - audio output ended (STREAM_END)
25
+ - RT_OUTPUT_TURN_END - audio output turn ended (TURN_END)
26
+ - RT_OUTPUT_AUDIO_ERROR - audio output error (STREAM_ERROR)
27
+ - RT_OUTPUT_AUDIO_VOLUME_CHANGED - audio output volume changed (volume level)
28
+ """
29
+
30
+ # realtime events
31
+ RT_OUTPUT_AUDIO_DELTA = "rt.output.audio.delta"
32
+ RT_OUTPUT_AUDIO_END = "rt.output.audio.end"
33
+ RT_OUTPUT_AUDIO_ERROR = "rt.output.audio.error"
34
+ RT_OUTPUT_AUDIO_VOLUME_CHANGED = "rt.output.audio.volume.changed"
35
+ RT_OUTPUT_AUDIO_COMMIT = "rt.output.audio.commit"
36
+ RT_OUTPUT_READY = "rt.output.audio.ready"
37
+ RT_OUTPUT_TEXT_DELTA = "rt.output.text.delta"
38
+ RT_OUTPUT_TURN_END = "rt.output.turn.end"
39
+ RT_INPUT_AUDIO_DELTA = "rt.input.audio.delta"
40
+ RT_INPUT_AUDIO_MANUAL_START = "rt.input.audio.manual.start"
41
+ RT_INPUT_AUDIO_MANUAL_STOP = "rt.input.audio.manual.stop"
42
+
43
+ def __init__(
44
+ self,
45
+ name: Optional[str] = None,
46
+ data: Optional[dict] = None,
47
+ ):
48
+ """
49
+ Event object class
50
+
51
+ :param name: event name
52
+ :param data: event data
53
+ """
54
+ super(RealtimeEvent, self).__init__(name, data)
55
+ self.id = "RealtimeEvent"
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -49,7 +49,7 @@ class Experts:
49
49
  self.allowed_modes = [
50
50
  MODE_CHAT,
51
51
  MODE_COMPLETION,
52
- MODE_VISION,
52
+ # MODE_VISION,
53
53
  # MODE_LANGCHAIN,
54
54
  MODE_LLAMA_INDEX,
55
55
  MODE_AUDIO,
@@ -12,10 +12,11 @@
12
12
  import os
13
13
  import uuid
14
14
  from time import strftime
15
- from typing import List
15
+ from typing import List, Dict
16
16
 
17
17
  from PySide6.QtCore import Slot, QObject
18
18
 
19
+ from pygpt_net.core.types import IMAGE_AVAILABLE_RESOLUTIONS
19
20
  from pygpt_net.item.ctx import CtxItem
20
21
  from pygpt_net.utils import trans
21
22
 
@@ -141,3 +142,52 @@ class Image(QObject):
141
142
  img_dir = self.window.core.config.get_user_dir("img")
142
143
  filename = f"{dt_prefix}_{img_id}.png"
143
144
  return os.path.join(img_dir, filename)
145
+
146
+ def get_resolution_option(self) -> dict:
147
+ """
148
+ Get image resolution option for UI
149
+
150
+ :return: dict
151
+ """
152
+ return {
153
+ "type": "combo",
154
+ "slider": True,
155
+ "label": "img_resolution",
156
+ "value": "1024x1024",
157
+ "keys": self.get_available_resolutions(),
158
+ }
159
+
160
+ def get_available_resolutions(self, model: str = None) -> Dict[str, str]:
161
+ """
162
+ Get available image resolutions
163
+
164
+ :param model: model name
165
+ :return: dict of available resolutions
166
+ """
167
+ available = IMAGE_AVAILABLE_RESOLUTIONS
168
+ model_keys = available.keys()
169
+ # find by model if specified
170
+ if model:
171
+ model = self._normalize_model_name(model)
172
+ for key in model_keys:
173
+ if model.startswith(key):
174
+ return available[key]
175
+
176
+ # return all available resolutions, but unique only
177
+ resolutions = {}
178
+ for key in model_keys:
179
+ resolutions.update(available[key])
180
+ return resolutions
181
+
182
+
183
+ def _normalize_model_name(self, model: str) -> str:
184
+ """
185
+ Normalize model id (strip optional 'models/' prefix).
186
+
187
+ :param model: model id
188
+ """
189
+ try:
190
+ return model.split("/")[-1]
191
+ except Exception:
192
+ return model
193
+
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from typing import Dict, List
@@ -53,7 +53,7 @@ class Modes:
53
53
  MODE_IMAGE,
54
54
  # MODE_LANGCHAIN,
55
55
  MODE_LLAMA_INDEX,
56
- MODE_VISION,
56
+ # MODE_VISION,
57
57
  MODE_RESEARCH,
58
58
  MODE_COMPUTER,
59
59
  )
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import copy
@@ -165,8 +165,8 @@ class Presets:
165
165
  return MODE_COMPLETION
166
166
  if preset.img:
167
167
  return MODE_IMAGE
168
- if preset.vision:
169
- return MODE_VISION
168
+ # if preset.vision:
169
+ # return MODE_VISION
170
170
  # if preset.langchain:
171
171
  # return MODE_LANGCHAIN
172
172
  if preset.assistant:
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import Optional
14
+
15
+ @dataclass
16
+ class RealtimeOptions:
17
+ """
18
+ Options for starting a realtime session.
19
+
20
+ :param provider: Provider name ("google" or "openai")
21
+ :param model: Model name
22
+ :param system_prompt: System prompt text
23
+ :param prompt: User prompt text
24
+ :param voice: Voice name for TTS
25
+ :param audio_data: Optional input audio data (bytes)
26
+ :param audio_format: Format of the input audio (e.g., "pcm16", "wav")
27
+ :param audio_rate: Sample rate of the input audio (e.g., 16000)
28
+ :param vad: Voice Activity Detection mode (e.g., "server_vad" or None for manual)
29
+ :param extra: Free-form dictionary for extra parameters
30
+ :param rt_signals: Real-time signals for event handling
31
+ """
32
+ provider: str = "openai" # "google" | "openai"
33
+ model: Optional[str] = None
34
+ system_prompt: Optional[str] = None
35
+ prompt: Optional[str] = None
36
+ voice: Optional[str] = None
37
+
38
+ # Optional input audio
39
+ audio_data: Optional[bytes] = None
40
+ audio_format: Optional[str] = None # e.g., "pcm16", "wav"
41
+ audio_rate: Optional[int] = None # e.g., 16000
42
+
43
+ # Provider-specific VAD flag (use None for manual mode)
44
+ vad: Optional[str] = None # e.g., "server_vad"
45
+
46
+ vad_end_silence_ms: Optional[int] = 2000 # VAD end silence in ms
47
+ vad_prefix_padding_ms: Optional[int] = 300 # VAD prefix padding in ms
48
+
49
+ # Real-time signals
50
+ rt_signals: field() = None # RT signals
51
+
52
+ # Tools and remote tools
53
+ tools: Optional[list] = None
54
+ remote_tools: Optional[list] = None
55
+
56
+ # Auto-turn enable/disable
57
+ auto_turn: Optional[bool] = False
58
+
59
+ # Transcript enable/disable
60
+ transcribe: Optional[bool] = True
61
+
62
+ # Last session ID
63
+ rt_session_id: Optional[str] = None
64
+
65
+ # Extra parameters
66
+ extra: dict = field(default_factory=dict)
67
+
68
+ def to_dict(self):
69
+ return {
70
+ "provider": self.provider,
71
+ "model": self.model,
72
+ "system_prompt": self.system_prompt,
73
+ "prompt": self.prompt,
74
+ "voice": self.voice,
75
+ "audio_data (len)": len(self.audio_data) if self.audio_data else 0,
76
+ "audio_format": self.audio_format,
77
+ "audio_rate": self.audio_rate,
78
+ "vad": self.vad,
79
+ "vad_end_silence_ms": self.vad_end_silence_ms,
80
+ "vad_prefix_padding_ms": self.vad_prefix_padding_ms,
81
+ "tools": self.tools,
82
+ "remote_tools": self.remote_tools,
83
+ "auto_turn": self.auto_turn,
84
+ "transcribe": self.transcribe,
85
+ "rt_session_id": self.rt_session_id,
86
+ "extra": self.extra,
87
+ }
File without changes
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import io
13
+ import math
14
+ import os
15
+ import wave
16
+ import audioop
17
+ from array import array
18
+ import struct
19
+ from typing import Optional, Tuple, List
20
+
21
+ DEFAULT_24K = 24000
22
+
23
+ def coerce_to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], fallback_rate: int = DEFAULT_24K) -> Tuple[int, int, bytes]:
24
+ """
25
+ Convert input audio (PCM16 raw or WAV) to PCM16 mono bytes. Float WAV is treated as raw (best effort).
26
+ Returns (sample_rate, channels=1, pcm16_bytes).
27
+ """
28
+ if not data:
29
+ return fallback_rate, 1, b""
30
+ fmt = (fmt or "").lower().strip()
31
+ if fmt in ("pcm16", "pcm", "raw"):
32
+ sr = int(rate_hint) if rate_hint else fallback_rate
33
+ return sr, 1, data
34
+
35
+ # WAV path
36
+ try:
37
+ with wave.open(io.BytesIO(data), "rb") as wf:
38
+ sr = wf.getframerate() or fallback_rate
39
+ ch = wf.getnchannels() or 1
40
+ sw = wf.getsampwidth() or 2
41
+ frames = wf.readframes(wf.getnframes())
42
+
43
+ if sw != 2:
44
+ frames = audioop.lin2lin(frames, sw, 2)
45
+ if ch == 2:
46
+ frames = audioop.tomono(frames, 2, 0.5, 0.5)
47
+ elif ch != 1:
48
+ frames = audioop.tomono(frames, 2, 1.0, 0.0)
49
+
50
+ return sr, 1, frames
51
+ except Exception:
52
+ sr = int(rate_hint) if rate_hint else fallback_rate
53
+ return sr, 1, data
54
+
55
+ def float32_to_int16_bytes(b: bytes) -> bytes:
56
+ """Convert little-endian float32 PCM [-1.0, 1.0] to int16 PCM."""
57
+ if not b:
58
+ return b""
59
+ try:
60
+ arr = array("f")
61
+ arr.frombytes(b)
62
+ if struct.unpack('<I', struct.pack('=I', 1))[0] != 1: # fallback if non-little
63
+ arr.byteswap()
64
+ out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in arr))
65
+ return out.tobytes()
66
+ except Exception:
67
+ try:
68
+ n = len(b) // 4
69
+ vals = struct.unpack("<" + "f" * n, b[: n * 4])
70
+ out = array("h", (max(-32768, min(32767, int(round(x * 32767.0)))) for x in vals))
71
+ return out.tobytes()
72
+ except Exception:
73
+ return b""
74
+
75
+ def parse_wav_fmt(data: bytes) -> Optional[dict]:
76
+ """Minimal WAV fmt chunk parser to detect float/int format."""
77
+ try:
78
+ if len(data) < 12 or data[0:4] != b"RIFF" or data[8:12] != b"WAVE":
79
+ return None
80
+ p = 12
81
+ while p + 8 <= len(data):
82
+ cid = data[p:p+4]
83
+ sz = int.from_bytes(data[p+4:p+8], "little", signed=False)
84
+ p += 8
85
+ if cid == b"fmt ":
86
+ fmtb = data[p:p+sz]
87
+ if len(fmtb) < 16:
88
+ return None
89
+ format_tag = int.from_bytes(fmtb[0:2], "little")
90
+ channels = int.from_bytes(fmtb[2:4], "little")
91
+ sample_rate = int.from_bytes(fmtb[4:8], "little")
92
+ bits_per_sample = int.from_bytes(fmtb[14:16], "little")
93
+ sub_tag = None
94
+ if format_tag == 65534 and sz >= 40: # WAVE_FORMAT_EXTENSIBLE
95
+ sub_tag = int.from_bytes(fmtb[24:26], "little", signed=False)
96
+ return {
97
+ "format_tag": format_tag,
98
+ "channels": channels,
99
+ "sample_rate": sample_rate,
100
+ "bits_per_sample": bits_per_sample,
101
+ "subformat_tag": sub_tag,
102
+ }
103
+ p += (sz + 1) & ~1
104
+ return None
105
+ except Exception:
106
+ return None
107
+
108
+ def to_pcm16_mono(data: bytes, fmt: Optional[str], rate_hint: Optional[int], target_rate: int) -> Tuple[bytes, int]:
109
+ """
110
+ Normalize any input audio (RAW/WAV, int/float) to PCM16 mono at target_rate.
111
+ Returns (pcm16_bytes, target_rate).
112
+ """
113
+ if not data:
114
+ return b"", target_rate
115
+
116
+ fmt = (fmt or "").lower().strip()
117
+ if fmt in ("pcm16", "pcm", "raw"):
118
+ src_rate = int(rate_hint) if rate_hint else target_rate
119
+ pcm16 = data
120
+ if src_rate != target_rate:
121
+ try:
122
+ pcm16, _ = audioop.ratecv(pcm16, 2, 1, src_rate, target_rate, None)
123
+ except Exception:
124
+ return b"", target_rate
125
+ return pcm16, target_rate
126
+
127
+ # WAV path with float support
128
+ try:
129
+ fmt_info = parse_wav_fmt(data)
130
+ with wave.open(io.BytesIO(data), "rb") as wf:
131
+ sr = wf.getframerate() or target_rate
132
+ ch = wf.getnchannels() or 1
133
+ sw = wf.getsampwidth() or 2
134
+ frames = wf.readframes(wf.getnframes())
135
+
136
+ format_tag = (fmt_info or {}).get("format_tag", 1)
137
+ bits_per_sample = (fmt_info or {}).get("bits_per_sample", sw * 8)
138
+
139
+ # float32 -> int16
140
+ if format_tag == 3 or ((format_tag == 65534) and (fmt_info or {}).get("subformat_tag") == 3):
141
+ frames16 = float32_to_int16_bytes(frames)
142
+ else:
143
+ if sw != 2:
144
+ frames16 = audioop.lin2lin(frames, sw, 2)
145
+ else:
146
+ frames16 = frames
147
+
148
+ # mixdown to mono
149
+ if ch == 2:
150
+ try:
151
+ frames16 = audioop.tomono(frames16, 2, 0.5, 0.5)
152
+ except Exception:
153
+ frames16 = frames16[0::2] + b""
154
+ elif ch != 1:
155
+ try:
156
+ frames16 = audioop.tomono(frames16, 2, 1.0, 0.0)
157
+ except Exception:
158
+ pass
159
+
160
+ # resample
161
+ if sr != target_rate:
162
+ try:
163
+ frames16, _ = audioop.ratecv(frames16, 2, 1, sr, target_rate, None)
164
+ except Exception:
165
+ return b"", target_rate
166
+
167
+ return frames16, target_rate
168
+ except Exception:
169
+ return b"", target_rate
170
+
171
+ def resample_pcm16_mono(pcm: bytes, src_rate: int, dst_rate: int) -> bytes:
172
+ if src_rate == dst_rate or not pcm:
173
+ return pcm
174
+ try:
175
+ out, _ = audioop.ratecv(pcm, 2, 1, src_rate, dst_rate, None)
176
+ return out
177
+ except Exception:
178
+ return pcm
179
+
180
+ def iter_pcm_chunks(pcm: bytes, sr: int, ms: int = 50) -> List[bytes]:
181
+ """Split PCM16 mono stream into ~ms byte chunks."""
182
+ b_per_ms = int(sr * 2 / 1000)
183
+ n = max(b_per_ms * ms, 1)
184
+ return [pcm[i:i + n] for i in range(0, len(pcm), n)]
185
+
186
+ def dump_wav(path: str, sample_rate: int, pcm16_mono: bytes):
187
+ try:
188
+ os.makedirs(os.path.dirname(path), exist_ok=True)
189
+ except Exception:
190
+ pass
191
+ try:
192
+ with wave.open(path, "wb") as wf:
193
+ wf.setnchannels(1)
194
+ wf.setsampwidth(2)
195
+ wf.setframerate(int(sample_rate))
196
+ wf.writeframes(pcm16_mono)
197
+ except Exception:
198
+ pass
199
+
200
+ def pcm16_stats(pcm16_mono: bytes, sample_rate: int) -> dict:
201
+ try:
202
+ n_samp = len(pcm16_mono) // 2
203
+ dur = n_samp / float(sample_rate or 1)
204
+ rms = audioop.rms(pcm16_mono, 2)
205
+ peak = audioop.max(pcm16_mono, 2) if pcm16_mono else 0
206
+ try:
207
+ avg = audioop.avg(pcm16_mono, 2)
208
+ except Exception:
209
+ avg = 0
210
+ dbfs = (-999.0 if rms == 0 else 20.0 * math.log10(rms / 32768.0))
211
+ return {"duration_s": dur, "samples": n_samp, "rms": rms, "peak": peak, "dc_offset": avg, "dbfs": dbfs}
212
+ except Exception:
213
+ return {}
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # ================================================== #
4
+ # This file is a part of PYGPT package #
5
+ # Website: https://pygpt.net #
6
+ # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
+ # MIT License #
8
+ # Created By : Marcin Szczygliński #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
+ # ================================================== #
11
+
12
+ import asyncio
13
+ import threading
14
+ from typing import Optional
15
+
16
+ class BackgroundLoop:
17
+ """
18
+ Dedicated background asyncio loop running in its own thread.
19
+ Safe cross-thread scheduling and sync wrappers.
20
+ """
21
+ def __init__(self, name: str = "RT-Loop"):
22
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
23
+ self._thread: Optional[threading.Thread] = None
24
+ self._name = name
25
+
26
+ @property
27
+ def loop(self) -> Optional[asyncio.AbstractEventLoop]:
28
+ return self._loop
29
+
30
+ def ensure(self):
31
+ if self._loop and self._loop.is_running():
32
+ return
33
+ self._loop = asyncio.new_event_loop()
34
+
35
+ def _runner(loop: asyncio.AbstractEventLoop):
36
+ asyncio.set_event_loop(loop)
37
+ loop.run_forever()
38
+
39
+ self._thread = threading.Thread(target=_runner, args=(self._loop,), name=self._name, daemon=True)
40
+ self._thread.start()
41
+
42
+ async def run(self, coro):
43
+ if not self._loop:
44
+ raise RuntimeError("Owner loop is not running")
45
+ cfut = asyncio.run_coroutine_threadsafe(coro, self._loop)
46
+ return await asyncio.wrap_future(cfut)
47
+
48
+ def run_sync(self, coro, timeout: float = 5.0):
49
+ if not self._loop or not self._loop.is_running():
50
+ return None
51
+ fut = asyncio.run_coroutine_threadsafe(coro, self._loop)
52
+ try:
53
+ return fut.result(timeout=timeout)
54
+ except Exception:
55
+ return None
56
+
57
+ def stop(self, timeout: float = 2.0):
58
+ loop, thread = self._loop, self._thread
59
+ if loop and loop.is_running():
60
+ loop.call_soon_threadsafe(loop.stop)
61
+ if thread and thread.is_alive():
62
+ thread.join(timeout=timeout)
63
+ self._loop = None
64
+ self._thread = None