pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. pygpt_net/CHANGELOG.txt +15 -0
  2. pygpt_net/__init__.py +3 -3
  3. pygpt_net/app.py +4 -0
  4. pygpt_net/{container.py → app_core.py} +5 -6
  5. pygpt_net/controller/__init__.py +5 -2
  6. pygpt_net/controller/access/control.py +1 -9
  7. pygpt_net/controller/assistant/assistant.py +4 -4
  8. pygpt_net/controller/assistant/batch.py +7 -7
  9. pygpt_net/controller/assistant/files.py +4 -4
  10. pygpt_net/controller/assistant/threads.py +3 -3
  11. pygpt_net/controller/attachment/attachment.py +4 -7
  12. pygpt_net/controller/audio/audio.py +25 -1
  13. pygpt_net/controller/audio/ui.py +2 -2
  14. pygpt_net/controller/chat/audio.py +1 -8
  15. pygpt_net/controller/chat/common.py +30 -4
  16. pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
  17. pygpt_net/controller/chat/output.py +8 -3
  18. pygpt_net/controller/chat/stream.py +4 -405
  19. pygpt_net/controller/chat/text.py +3 -2
  20. pygpt_net/controller/chat/vision.py +11 -19
  21. pygpt_net/controller/config/placeholder.py +1 -1
  22. pygpt_net/controller/ctx/ctx.py +1 -1
  23. pygpt_net/controller/ctx/summarizer.py +1 -1
  24. pygpt_net/controller/kernel/kernel.py +11 -3
  25. pygpt_net/controller/kernel/reply.py +5 -1
  26. pygpt_net/controller/mode/mode.py +21 -12
  27. pygpt_net/controller/plugins/settings.py +3 -2
  28. pygpt_net/controller/presets/editor.py +112 -99
  29. pygpt_net/controller/realtime/__init__.py +12 -0
  30. pygpt_net/controller/realtime/manager.py +53 -0
  31. pygpt_net/controller/realtime/realtime.py +268 -0
  32. pygpt_net/controller/theme/theme.py +3 -2
  33. pygpt_net/controller/ui/mode.py +7 -0
  34. pygpt_net/controller/ui/ui.py +19 -1
  35. pygpt_net/controller/ui/vision.py +4 -4
  36. pygpt_net/core/agents/legacy.py +2 -2
  37. pygpt_net/core/agents/runners/openai_workflow.py +2 -2
  38. pygpt_net/core/assistants/files.py +5 -5
  39. pygpt_net/core/assistants/store.py +4 -4
  40. pygpt_net/core/audio/audio.py +6 -1
  41. pygpt_net/core/audio/backend/native/__init__.py +12 -0
  42. pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
  43. pygpt_net/core/audio/backend/native/player.py +139 -0
  44. pygpt_net/core/audio/backend/native/realtime.py +250 -0
  45. pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
  46. pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
  47. pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
  48. pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
  49. pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
  50. pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
  51. pygpt_net/core/audio/backend/shared/__init__.py +38 -0
  52. pygpt_net/core/audio/backend/shared/conversions.py +211 -0
  53. pygpt_net/core/audio/backend/shared/envelope.py +38 -0
  54. pygpt_net/core/audio/backend/shared/player.py +137 -0
  55. pygpt_net/core/audio/backend/shared/rt.py +52 -0
  56. pygpt_net/core/audio/capture.py +5 -0
  57. pygpt_net/core/audio/output.py +13 -2
  58. pygpt_net/core/audio/whisper.py +6 -2
  59. pygpt_net/core/bridge/bridge.py +4 -3
  60. pygpt_net/core/bridge/worker.py +31 -9
  61. pygpt_net/core/debug/console/console.py +2 -2
  62. pygpt_net/core/debug/presets.py +2 -2
  63. pygpt_net/core/dispatcher/dispatcher.py +37 -1
  64. pygpt_net/core/events/__init__.py +2 -1
  65. pygpt_net/core/events/realtime.py +55 -0
  66. pygpt_net/core/experts/experts.py +2 -2
  67. pygpt_net/core/image/image.py +51 -1
  68. pygpt_net/core/modes/modes.py +2 -2
  69. pygpt_net/core/presets/presets.py +3 -3
  70. pygpt_net/core/realtime/options.py +87 -0
  71. pygpt_net/core/realtime/shared/__init__.py +0 -0
  72. pygpt_net/core/realtime/shared/audio.py +213 -0
  73. pygpt_net/core/realtime/shared/loop.py +64 -0
  74. pygpt_net/core/realtime/shared/session.py +59 -0
  75. pygpt_net/core/realtime/shared/text.py +37 -0
  76. pygpt_net/core/realtime/shared/tools.py +276 -0
  77. pygpt_net/core/realtime/shared/turn.py +38 -0
  78. pygpt_net/core/realtime/shared/types.py +16 -0
  79. pygpt_net/core/realtime/worker.py +164 -0
  80. pygpt_net/core/tokens/tokens.py +4 -4
  81. pygpt_net/core/types/__init__.py +1 -0
  82. pygpt_net/core/types/image.py +48 -0
  83. pygpt_net/core/types/mode.py +5 -2
  84. pygpt_net/core/vision/analyzer.py +1 -1
  85. pygpt_net/data/config/config.json +13 -4
  86. pygpt_net/data/config/models.json +219 -101
  87. pygpt_net/data/config/modes.json +3 -9
  88. pygpt_net/data/config/settings.json +135 -27
  89. pygpt_net/data/config/settings_section.json +2 -2
  90. pygpt_net/data/locale/locale.de.ini +7 -7
  91. pygpt_net/data/locale/locale.en.ini +25 -12
  92. pygpt_net/data/locale/locale.es.ini +7 -7
  93. pygpt_net/data/locale/locale.fr.ini +7 -7
  94. pygpt_net/data/locale/locale.it.ini +7 -7
  95. pygpt_net/data/locale/locale.pl.ini +8 -8
  96. pygpt_net/data/locale/locale.uk.ini +7 -7
  97. pygpt_net/data/locale/locale.zh.ini +3 -3
  98. pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
  99. pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
  100. pygpt_net/item/model.py +23 -3
  101. pygpt_net/plugin/audio_input/plugin.py +37 -4
  102. pygpt_net/plugin/audio_input/simple.py +57 -8
  103. pygpt_net/plugin/cmd_files/worker.py +3 -0
  104. pygpt_net/plugin/openai_dalle/plugin.py +4 -4
  105. pygpt_net/plugin/openai_vision/plugin.py +12 -13
  106. pygpt_net/provider/agents/openai/agent.py +5 -5
  107. pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
  108. pygpt_net/provider/agents/openai/agent_planner.py +5 -6
  109. pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
  110. pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
  111. pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
  112. pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
  113. pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
  114. pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
  115. pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
  116. pygpt_net/provider/agents/openai/evolve.py +5 -5
  117. pygpt_net/provider/agents/openai/supervisor.py +4 -4
  118. pygpt_net/provider/api/__init__.py +27 -0
  119. pygpt_net/provider/api/anthropic/__init__.py +68 -0
  120. pygpt_net/provider/api/google/__init__.py +295 -0
  121. pygpt_net/provider/api/google/audio.py +121 -0
  122. pygpt_net/provider/api/google/chat.py +591 -0
  123. pygpt_net/provider/api/google/image.py +427 -0
  124. pygpt_net/provider/api/google/realtime/__init__.py +12 -0
  125. pygpt_net/provider/api/google/realtime/client.py +1945 -0
  126. pygpt_net/provider/api/google/realtime/realtime.py +186 -0
  127. pygpt_net/provider/api/google/tools.py +222 -0
  128. pygpt_net/provider/api/google/vision.py +129 -0
  129. pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
  130. pygpt_net/provider/api/openai/agents/__init__.py +0 -0
  131. pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
  132. pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
  133. pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
  134. pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
  135. pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
  136. pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
  137. pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
  138. pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
  139. pygpt_net/provider/api/openai/realtime/client.py +1828 -0
  140. pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
  141. pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
  142. pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
  143. pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
  144. pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
  145. pygpt_net/provider/api/openai/worker/__init__.py +0 -0
  146. pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
  147. pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
  148. pygpt_net/provider/audio_input/google_genai.py +103 -0
  149. pygpt_net/provider/audio_input/openai_whisper.py +1 -1
  150. pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
  151. pygpt_net/provider/audio_output/openai_tts.py +9 -6
  152. pygpt_net/provider/core/config/patch.py +26 -0
  153. pygpt_net/provider/core/model/patch.py +20 -0
  154. pygpt_net/provider/core/preset/json_file.py +2 -4
  155. pygpt_net/provider/llms/anthropic.py +2 -5
  156. pygpt_net/provider/llms/base.py +4 -3
  157. pygpt_net/provider/llms/google.py +8 -9
  158. pygpt_net/provider/llms/openai.py +1 -1
  159. pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
  160. pygpt_net/ui/dialog/preset.py +71 -55
  161. pygpt_net/ui/layout/toolbox/footer.py +16 -0
  162. pygpt_net/ui/layout/toolbox/image.py +5 -0
  163. pygpt_net/ui/main.py +6 -4
  164. pygpt_net/ui/widget/option/combo.py +15 -1
  165. pygpt_net/utils.py +9 -0
  166. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
  167. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
  168. pygpt_net/core/audio/backend/pyaudio.py +0 -554
  169. /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
  170. /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
  171. /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
  172. /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
  173. /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
  174. /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
  175. /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
  176. /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
  177. /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
  178. /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
  179. /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
  180. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
  181. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
  182. {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
@@ -845,7 +845,7 @@ mode.agent_openai.tooltip = Просунуті агенти (OpenAI)
845
845
  mode.agent.tooltip = Прості агенти (legacy)
846
846
  mode.assistant = Помічник
847
847
  mode.assistant.tooltip = Чат за допомогою API Асистентів
848
- mode.audio = Чат з аудіо
848
+ mode.audio = Realtime + audio
849
849
  mode.chat = Чат
850
850
  mode.chat.tooltip = Режим чату (за замовчуванням)
851
851
  mode.completion = Завершення
@@ -1174,9 +1174,9 @@ settings.ctx.sources = Показати джерела індексу Llama
1174
1174
  settings.ctx.sources.desc = Якщо включено, використані джерела будуть відображатися в відповіді (якщо доступно, не працюватиме в потоковому чаті)
1175
1175
  settings.ctx.use_extra = Використовувати додатковий контекст виводу
1176
1176
  settings.ctx.use_extra.desc = Якщо увімкнено, звичайний текстовий вивід (якщо доступний) з результатів команд буде відображений поруч з JSON виводом.
1177
+ settings.debug.show_menu = Показати меню налагодження
1177
1178
  settings.defaults.app.confirm = Завантажити заводські налаштування додатку?
1178
1179
  settings.defaults.user.confirm = Відмінити поточні зміни?
1179
- settings.developer.debug = Показати меню налагодження
1180
1180
  settings.dict.delete.confirm = Видалити елемент зі списку?
1181
1181
  settings.download.dir = Директорія для завантаження файлів
1182
1182
  settings.download.dir.desc = Піддиректорія для завантажених файлів, наприклад, у режимі помічників, всередині "data"
@@ -1197,9 +1197,9 @@ settings.frequency_penalty = Частотний штраф
1197
1197
  settings.func_call.native = Використовувати рідні виклики функцій API
1198
1198
  settings.func_call.native.desc = Якщо увімкнено, програма буде використовувати рідні виклики функцій API замість внутрішнього формату pygpt і нижченаведених запитів команд не використовуватимуться. Лише режими чату та асистентів.
1199
1199
  settings.img_dialog_open = Відкрити діалогове вікно зображення після генерації (Режим зображення)
1200
- settings.img_prompt_model = DALL-E: модель генерації запиту
1201
- settings.img_quality = DALL-E: якість зображення
1202
- settings.img_resolution = DALL-E: розмір зображення
1200
+ settings.img_prompt_model = Модель Генерації Запиту
1201
+ settings.img_quality = Якість Зображення
1202
+ settings.img_resolution = Розмір Зображення
1203
1203
  settings.layout.animation.disable = Вимкнути анімації
1204
1204
  settings.layout.animation.disable.desc = Вимикає анімації макета, як анімовані завантажувачі тощо.
1205
1205
  settings.layout.density = Щільність компонування
@@ -1294,7 +1294,7 @@ settings.prompt.ctx.auto_summary.user = Контекст: авто-резюме
1294
1294
  settings.prompt.ctx.auto_summary.user.desc = Заповнювачі: {input}, {output}
1295
1295
  settings.prompt.expert = Експерт: Основний запит
1296
1296
  settings.prompt.expert.desc = Інструкція (системний запит) для ведучого експерта, як керувати підеекспертами. Інструкції для підеекспертів даються з їхніх налаштувань.
1297
- settings.prompt.img = DALL-E: генерація зображення
1297
+ settings.prompt.img = Генерація зображення
1298
1298
  settings.prompt.img.desc = Підказка для генерації команддля DALL-E (якщо вимкнено сирівний режим). Тільки режим зображення.
1299
1299
  settings.remote_tools.code_interpreter = Інтерпретатор коду
1300
1300
  settings.remote_tools.code_interpreter.desc = Увімкніть віддалений інструмент `code_interpreter` у режимі Чат / через Responses API OpenAI.
@@ -1336,7 +1336,7 @@ settings.section.audio.cache = Кеш
1336
1336
  settings.section.audio.device = Пристрої
1337
1337
  settings.section.audio.options = Параметри
1338
1338
  settings.section.ctx = Контекст
1339
- settings.section.developer = Розробник
1339
+ settings.section.debug = Налагодження
1340
1340
  settings.section.files = Файли та вкладення
1341
1341
  settings.section.general = Загальні
1342
1342
  settings.section.images = Зображення
@@ -845,7 +845,7 @@ mode.agent_openai.tooltip = 高级代理 (OpenAI)
845
845
  mode.agent.tooltip = 简单代理(自主)
846
846
  mode.assistant = 助手
847
847
  mode.assistant.tooltip = 使用助手API進行聊天
848
- mode.audio = 语音聊天
848
+ mode.audio = Realtime + audio
849
849
  mode.chat = 聊天模式
850
850
  mode.chat.tooltip = 聊天模式(預設)
851
851
  mode.completion = 完成模式
@@ -1174,9 +1174,9 @@ settings.ctx.sources = 显示Llama索引源
1174
1174
  settings.ctx.sources.desc = 如果启用,使用的源将在回应中显示(如果可用,不适用于流式聊天)
1175
1175
  settings.ctx.use_extra = 使用额外的上下文输出
1176
1176
  settings.ctx.use_extra.desc = 如果启用,将在命令结果的 JSON 输出旁边显示纯文本输出(如果有)。
1177
+ settings.debug.show_menu = 显示调试菜单
1177
1178
  settings.defaults.app.confirm = 加载出厂应用程序设置?
1178
1179
  settings.defaults.user.confirm = 撤销当前更改?
1179
- settings.developer.debug = 显示调试菜单
1180
1180
  settings.dict.delete.confirm = 从列表中移除项目?
1181
1181
  settings.download.dir = 文件下载目录
1182
1182
  settings.download.dir.desc = 下载文件的子目录,例如在助手模式下,位于 "data" 内部
@@ -1336,7 +1336,7 @@ settings.section.audio.cache = 缓存
1336
1336
  settings.section.audio.device = 设备
1337
1337
  settings.section.audio.options = 选项
1338
1338
  settings.section.ctx = 上下文
1339
- settings.section.developer = 開發者
1339
+ settings.section.debug = 调试
1340
1340
  settings.section.files = 文件和附件
1341
1341
  settings.section.general = 一般
1342
1342
  settings.section.images = 圖像
@@ -17,6 +17,10 @@ google_args.tooltip = Provide additional keyword arguments for recognize_google(
17
17
  google_cloud_args.description = Additional keyword arguments for r.recognize_google_cloud(audio, **kwargs).
18
18
  google_cloud_args.label = Additional keyword arguments
19
19
  google_cloud_args.tooltip = Provide additional keyword arguments for recognize_google_cloud()
20
+ google_genai_audio_model.description = Specify Gemini model supporting audio, e.g., gemini-2.5-flash
21
+ google_genai_audio_model.label = Model
22
+ google_genai_audio_prompt.description = System prompt for transcription
23
+ google_genai_audio_prompt.label = System Prompt
20
24
  magic_word.description = Activate listening only after the magic word is provided, like 'Hey GPT' or 'OK GPT'. Default: False.
21
25
  magic_word.label = Magic word
22
26
  magic_word_phrase_length.description = Magic word phrase length. Default: 2.
@@ -15,6 +15,10 @@ eleven_labs_voice.description = Specify the Voice ID.
15
15
  eleven_labs_voice.label = Voice ID
16
16
  google_api_key.description = You can obtain your own API key here: https://console.cloud.google.com/apis/library/texttospeech.googleapis.com
17
17
  google_api_key.label = Google Cloud Text-to-speech API Key
18
+ google_genai_tts_model.description = Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts
19
+ google_genai_tts_model.label = Model
20
+ google_genai_tts_voice.description = Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)
21
+ google_genai_tts_voice.label = Voice
18
22
  google_lang.description = Specify the language code.
19
23
  google_lang.label = Language code
20
24
  google_voice.description = Specify the voice.
pygpt_net/item/model.py CHANGED
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.23 15:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import json
@@ -253,7 +253,17 @@ class ModelItem:
253
253
 
254
254
  :return: True if supports image input
255
255
  """
256
- if MODE_VISION in self.mode or MULTIMODAL_IMAGE in self.input:
256
+ if MULTIMODAL_IMAGE in self.input:
257
+ return True
258
+ return False
259
+
260
+ def is_image_output(self) -> bool:
261
+ """
262
+ Check if model supports image output
263
+
264
+ :return: True if supports image output
265
+ """
266
+ if "image" in self.output or MODE_VISION in self.mode:
257
267
  return True
258
268
  return False
259
269
 
@@ -263,7 +273,17 @@ class ModelItem:
263
273
 
264
274
  :return: True if supports audio input
265
275
  """
266
- if MODE_AUDIO in self.mode or MULTIMODAL_AUDIO in self.input:
276
+ if MULTIMODAL_AUDIO in self.input:
277
+ return True
278
+ return False
279
+
280
+ def is_audio_output(self) -> bool:
281
+ """
282
+ Check if model supports audio output
283
+
284
+ :return: True if supports audio output
285
+ """
286
+ if MULTIMODAL_AUDIO in self.output:
267
287
  return True
268
288
  return False
269
289
 
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2024.11.26 19:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
@@ -23,6 +23,7 @@ from pygpt_net.utils import trans
23
23
  from .config import Config
24
24
  from .worker import Worker
25
25
  from .simple import Simple
26
+ from ...core.types import MODE_AUDIO
26
27
 
27
28
 
28
29
  class Plugin(BasePlugin):
@@ -124,13 +125,31 @@ class Plugin(BasePlugin):
124
125
  words = [x.strip() for x in words] # remove white-spaces
125
126
  return words
126
127
 
127
- def toggle_recording_simple(self):
128
+ def toggle_recording_simple(
129
+ self,
130
+ state: bool = None,
131
+ auto: bool = False
132
+ ):
128
133
  """
129
134
  Event: AUDIO_INPUT_RECORD_TOGGLE
130
135
 
131
136
  Toggle recording
137
+
138
+ :param state: state to set
139
+ :param auto: True if called automatically (not by user)
140
+ """
141
+ if self.window.controller.realtime.is_enabled():
142
+ self.handler_simple.toggle_realtime(state=state, auto=auto)
143
+ return
144
+ self.handler_simple.toggle_recording(state=state)
145
+
146
+ def is_recording(self) -> bool:
132
147
  """
133
- self.handler_simple.toggle_recording()
148
+ Check if is recording (simple mode)
149
+
150
+ :return: True if is recording
151
+ """
152
+ return self.handler_simple.is_recording
134
153
 
135
154
  def toggle_speech(self, state: bool):
136
155
  """
@@ -214,7 +233,9 @@ class Plugin(BasePlugin):
214
233
  self.toggle_speech(data['value'])
215
234
 
216
235
  elif name == Event.AUDIO_INPUT_RECORD_TOGGLE:
217
- self.toggle_recording_simple()
236
+ state = data['state'] if 'value' in data else None
237
+ auto = data['auto'] if 'auto' in data else False
238
+ self.toggle_recording_simple(state=state, auto=auto)
218
239
 
219
240
  elif name == Event.AUDIO_INPUT_STOP:
220
241
  self.on_stop()
@@ -492,6 +513,18 @@ class Plugin(BasePlugin):
492
513
  self.window.dispatch(event) # send text, input clear in send method
493
514
  self.set_status('')
494
515
 
516
+ def handle_realtime_stopped(self):
517
+ """Handle realtime stopped"""
518
+ context = BridgeContext()
519
+ context.prompt = "..."
520
+ extra = {}
521
+ event = KernelEvent(KernelEvent.INPUT_SYSTEM, {
522
+ 'context': context,
523
+ 'extra': extra,
524
+ })
525
+ self.window.dispatch(event) # send text, input clear in send method
526
+ self.set_status('')
527
+
495
528
  @Slot(object)
496
529
  def handle_status(self, data: str):
497
530
  """
@@ -6,14 +6,14 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.27 07:00:00 #
9
+ # Updated Date: 2025.08.31 23:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  import os
13
13
 
14
14
  from PySide6.QtCore import QTimer
15
15
 
16
- from pygpt_net.core.events import AppEvent
16
+ from pygpt_net.core.events import AppEvent, RealtimeEvent
17
17
  from pygpt_net.core.tabs.tab import Tab
18
18
  from pygpt_net.utils import trans
19
19
 
@@ -32,8 +32,46 @@ class Simple:
32
32
  self.is_recording = False
33
33
  self.timer = None
34
34
 
35
- def toggle_recording(self):
36
- """Toggle recording"""
35
+ def toggle_realtime(
36
+ self,
37
+ state: bool = None,
38
+ auto: bool = False
39
+ ):
40
+ """
41
+ Toggle recording
42
+
43
+ :param state: True to start recording, False to stop recording, None to toggle
44
+ :param auto: True if called automatically (not by user)
45
+ """
46
+ if state is not None:
47
+ if state and not self.is_recording:
48
+ self.start_recording(realtime=True)
49
+ elif not state:
50
+ self.force_stop()
51
+ else:
52
+ self.force_stop()
53
+ return
54
+ if self.is_recording:
55
+ self.stop_recording(realtime=True)
56
+ if not auto:
57
+ self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP))
58
+ else:
59
+ self.start_recording(realtime=True)
60
+ if not auto:
61
+ self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START))
62
+
63
+ def toggle_recording(self, state: bool = None):
64
+ """
65
+ Toggle recording
66
+
67
+ :param state: True to start recording, False to stop recording, None to toggle
68
+ """
69
+ if state is not None:
70
+ if state and not self.is_recording:
71
+ self.start_recording()
72
+ elif not state:
73
+ self.force_stop()
74
+ return
37
75
  if self.is_recording:
38
76
  self.stop_recording()
39
77
  else:
@@ -51,11 +89,12 @@ class Simple:
51
89
  """Stop timeout"""
52
90
  self.stop_recording(timeout=True)
53
91
 
54
- def start_recording(self, force: bool = False):
92
+ def start_recording(self, force: bool = False, realtime: bool = False):
55
93
  """
56
94
  Start recording
57
95
 
58
96
  :param force: True to force recording
97
+ :param realtime: True if called from realtime callback
59
98
  """
60
99
  # display snap warning if not displayed yet
61
100
  if (not self.plugin.window.core.config.get("audio.input.snap", False)
@@ -89,7 +128,7 @@ class Simple:
89
128
  # disable in continuous mode
90
129
  timeout = int(self.plugin.window.core.config.get('audio.input.timeout', 120) or 0) # get timeout
91
130
  timeout_continuous = self.plugin.window.core.config.get('audio.input.timeout.continuous', False) # enable continuous timeout
92
- if timeout > 0:
131
+ if timeout > 0 and not realtime:
93
132
  if self.timer is None and (not continuous_enabled or timeout_continuous):
94
133
  self.timer = QTimer()
95
134
  self.timer.timeout.connect(self.stop_timeout)
@@ -119,11 +158,12 @@ class Simple:
119
158
  )
120
159
  self.switch_btn_start() # switch button to start
121
160
 
122
- def stop_recording(self, timeout: bool = False):
161
+ def stop_recording(self, timeout: bool = False, realtime: bool = False):
123
162
  """
124
163
  Stop recording
125
164
 
126
165
  :param timeout: True if stopped due to timeout
166
+ :param realtime: True if called from realtime callback
127
167
  """
128
168
  self.plugin.window.core.audio.capture.reset_audio_level()
129
169
  self.is_recording = False
@@ -143,7 +183,7 @@ class Simple:
143
183
  return
144
184
 
145
185
  if self.plugin.window.core.audio.capture.has_frames():
146
- if not self.plugin.window.core.audio.capture.has_min_frames():
186
+ if not self.plugin.window.core.audio.capture.has_min_frames() and not realtime:
147
187
  self.plugin.window.update_status(trans("status.audio.too_short"))
148
188
  self.plugin.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED)) # app event
149
189
  return
@@ -152,6 +192,15 @@ class Simple:
152
192
  else:
153
193
  self.plugin.window.update_status("")
154
194
 
195
+ def force_stop(self):
196
+ """Stop recording"""
197
+ self.is_recording = False
198
+ self.plugin.window.dispatch(AppEvent(AppEvent.INPUT_VOICE_LISTEN_STOPPED)) # app event
199
+ self.switch_btn_start() # switch button to start
200
+ if self.plugin.window.core.audio.capture.has_source():
201
+ self.plugin.window.core.audio.capture.stop() # stop recording
202
+ return
203
+
155
204
  def on_stop(self):
156
205
  """Handle auto-transcribe"""
157
206
  path = os.path.join(self.plugin.window.core.config.path, self.plugin.input_file)
@@ -920,6 +920,9 @@ class Worker(BaseWorker):
920
920
  :param context: context data
921
921
  :return: extra data
922
922
  """
923
+ # disabled in v2.6.31
924
+ # reason: do not duplicate context in chat
925
+ return {}
923
926
  cmd = item["cmd"]
924
927
  extra = {
925
928
  'plugin': "cmd_files",
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.07.30 00:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from pygpt_net.core.types import (
@@ -42,8 +42,8 @@ class Plugin(BasePlugin):
42
42
  ]
43
43
  self.allowed_modes = [
44
44
  MODE_CHAT,
45
- MODE_LANGCHAIN,
46
- MODE_VISION,
45
+ # MODE_LANGCHAIN,
46
+ # MODE_VISION,
47
47
  MODE_LLAMA_INDEX,
48
48
  MODE_ASSISTANT,
49
49
  MODE_AGENT,
@@ -166,7 +166,7 @@ class Plugin(BasePlugin):
166
166
  sync = False
167
167
  if self.window.core.config.get("mode") in [MODE_AGENT_LLAMA, MODE_AGENT_OPENAI]:
168
168
  sync = True
169
- self.window.core.gpt.image.generate(bridge_context, extra, sync) # force inline mode, async call
169
+ self.window.core.api.openai.image.generate(bridge_context, extra, sync) # force inline mode, async call
170
170
  except Exception as e:
171
171
  self.log("Error: " + str(e))
172
172
  return
@@ -6,7 +6,7 @@
6
6
  # GitHub: https://github.com/szczyglis-dev/py-gpt #
7
7
  # MIT License #
8
8
  # Created By : Marcin Szczygliński #
9
- # Updated Date: 2025.08.15 23:00:00 #
9
+ # Updated Date: 2025.08.28 09:00:00 #
10
10
  # ================================================== #
11
11
 
12
12
  from pygpt_net.core.types import (
@@ -102,7 +102,7 @@ class Plugin(BasePlugin):
102
102
  ) # mode change
103
103
 
104
104
  elif name == Event.MODEL_BEFORE:
105
- if "mode" in data and data["mode"] == MODE_VISION:
105
+ if data.get("mode") == MODE_CHAT:
106
106
  key = self.get_option_value("model")
107
107
  if self.window.core.models.has(key):
108
108
  data['model'] = self.window.core.models.get(key)
@@ -119,7 +119,7 @@ class Plugin(BasePlugin):
119
119
  data['value'] = self.on_system_prompt(data['value'])
120
120
 
121
121
  elif name == Event.UI_ATTACHMENTS:
122
- mode = data["mode"]
122
+ mode = data.get("mode")
123
123
  if mode in [MODE_AGENT, MODE_AGENT_LLAMA, MODE_AGENT_OPENAI] and not self.window.core.config.get("cmd"):
124
124
  pass
125
125
  else:
@@ -263,8 +263,7 @@ class Plugin(BasePlugin):
263
263
  # append vision prompt only if vision is provided or enabled
264
264
  if not self.is_vision_provided():
265
265
  return prompt
266
- prompt = "Image attachment has been already sent.\n\n" + prompt
267
- return prompt
266
+ return "Image attachment has been already sent.\n\n" + prompt
268
267
 
269
268
  def on_pre_prompt(self, prompt: str) -> str:
270
269
  """
@@ -294,12 +293,12 @@ class Plugin(BasePlugin):
294
293
  """
295
294
  mode = self.window.core.config.get('mode')
296
295
  attachments = self.window.core.attachments.get_all(mode)
297
- self.window.core.gpt.vision.build_content(
296
+ self.window.core.api.openai.vision.build_content(
298
297
  str(self.prompt),
299
298
  attachments,
300
299
  ) # tmp build content, provide attachments from global mode
301
300
 
302
- built_attachments = self.window.core.gpt.vision.attachments
301
+ built_attachments = self.window.core.api.openai.vision.attachments
303
302
  if len(built_attachments) > 0:
304
303
  return True
305
304
  return False
@@ -313,13 +312,13 @@ class Plugin(BasePlugin):
313
312
  result = False
314
313
  mode = self.window.core.config.get('mode')
315
314
  attachments = self.window.core.attachments.get_all(mode) # from global mode
316
- self.window.core.gpt.vision.build_content(
315
+ self.window.core.api.openai.vision.build_content(
317
316
  str(self.prompt),
318
317
  attachments,
319
318
  ) # tmp build content, provide attachments from global mode
320
319
 
321
- built_attachments = self.window.core.gpt.vision.attachments
322
- built_urls = self.window.core.gpt.vision.urls
320
+ built_attachments = self.window.core.api.openai.vision.attachments
321
+ built_urls = self.window.core.api.openai.vision.urls
323
322
 
324
323
  # check for images in URLs found in prompt
325
324
  img_urls = []
@@ -343,13 +342,13 @@ class Plugin(BasePlugin):
343
342
  :return: updated mode
344
343
  """
345
344
  # abort if already in vision mode or command enabled
346
- if mode == MODE_VISION or mode in self.disabled_mode_switch:
345
+ if mode in self.disabled_mode_switch:
347
346
  return mode # keep current mode
348
347
 
349
- # if already used in this ctx then keep vision mode
348
+ # if already used in this ctx then keep vision (in CHAT) mode
350
349
  if self.is_vision_provided():
351
350
  ctx.is_vision = True
352
- return MODE_VISION
351
+ return MODE_CHAT
353
352
 
354
353
  return mode # keep current mode
355
354
 
@@ -26,12 +26,12 @@ from pygpt_net.core.types import (
26
26
  from pygpt_net.item.ctx import CtxItem
27
27
  from pygpt_net.item.model import ModelItem
28
28
 
29
- from pygpt_net.provider.gpt.agents.remote_tools import is_computer_tool, append_tools
30
- from pygpt_net.provider.gpt.agents.computer import Agent as ComputerAgent, LocalComputer
31
- from pygpt_net.provider.gpt.agents.response import StreamHandler
29
+ from pygpt_net.provider.api.openai.agents.remote_tools import is_computer_tool, append_tools
30
+ from pygpt_net.provider.api.openai.agents.computer import Agent as ComputerAgent, LocalComputer
31
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
32
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
32
33
 
33
34
  from ..base import BaseAgent
34
- from ...gpt.agents.experts import get_experts
35
35
 
36
36
  class Agent(BaseAgent):
37
37
  def __init__(self, *args, **kwargs):
@@ -159,7 +159,7 @@ class Agent(BaseAgent):
159
159
  agent,
160
160
  **kwargs
161
161
  )
162
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
162
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
163
163
  response_id = result.last_response_id
164
164
  if verbose:
165
165
  print("Final response:", result)
@@ -29,12 +29,12 @@ from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.item.model import ModelItem
30
30
  from pygpt_net.item.preset import PresetItem
31
31
 
32
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
33
- from pygpt_net.provider.gpt.agents.response import StreamHandler
32
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
33
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
34
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
34
35
  from pygpt_net.utils import trans
35
36
 
36
37
  from ..base import BaseAgent
37
- from ...gpt.agents.experts import get_experts
38
38
 
39
39
 
40
40
  class Agent(BaseAgent):
@@ -274,7 +274,7 @@ class Agent(BaseAgent):
274
274
  if verbose:
275
275
  print("Final response:", result)
276
276
 
277
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
277
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
278
278
 
279
279
  if bridge.stopped():
280
280
  bridge.on_stop(ctx)
@@ -305,7 +305,7 @@ class Agent(BaseAgent):
305
305
  if verbose:
306
306
  print("Final response:", result)
307
307
 
308
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
308
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
309
309
  if bridge.stopped():
310
310
  bridge.on_stop(ctx)
311
311
  break
@@ -30,14 +30,13 @@ from pygpt_net.item.ctx import CtxItem
30
30
  from pygpt_net.item.model import ModelItem
31
31
  from pygpt_net.item.preset import PresetItem
32
32
 
33
- from pygpt_net.provider.gpt.agents.client import get_custom_model_provider, set_openai_env
34
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
35
- from pygpt_net.provider.gpt.agents.response import StreamHandler
33
+ from pygpt_net.provider.api.openai.agents.client import get_custom_model_provider, set_openai_env
34
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
35
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
36
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
36
37
  from pygpt_net.utils import trans
37
38
 
38
39
  from ..base import BaseAgent
39
- from ...gpt.agents.experts import get_experts
40
-
41
40
 
42
41
  @dataclass
43
42
  class EvaluationFeedback:
@@ -327,7 +326,7 @@ class Agent(BaseAgent):
327
326
  print("Final response:", result)
328
327
 
329
328
  input_items = result.to_input_list()
330
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
329
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
331
330
 
332
331
  if bridge.stopped():
333
332
  bridge.on_stop(ctx)
@@ -29,12 +29,12 @@ from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.item.model import ModelItem
30
30
  from pygpt_net.item.preset import PresetItem
31
31
 
32
- from pygpt_net.provider.gpt.agents.client import get_custom_model_provider, set_openai_env
33
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
34
- from pygpt_net.provider.gpt.agents.response import StreamHandler
32
+ from pygpt_net.provider.api.openai.agents.client import get_custom_model_provider, set_openai_env
33
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
34
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
35
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
35
36
 
36
37
  from ..base import BaseAgent
37
- from ...gpt.agents.experts import get_experts
38
38
 
39
39
 
40
40
  class Agent(BaseAgent):
@@ -137,7 +137,7 @@ class Agent(BaseAgent):
137
137
  agent,
138
138
  **kwargs
139
139
  )
140
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
140
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
141
141
  response_id = result.last_response_id
142
142
  if verbose:
143
143
  print("Final response:", result)
@@ -29,12 +29,12 @@ from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.item.model import ModelItem
30
30
  from pygpt_net.item.preset import PresetItem
31
31
 
32
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
33
- from pygpt_net.provider.gpt.agents.response import StreamHandler
32
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
33
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
34
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
34
35
  from pygpt_net.utils import trans
35
36
 
36
37
  from ..base import BaseAgent
37
- from ...gpt.agents.experts import get_experts
38
38
 
39
39
 
40
40
  @dataclass
@@ -221,7 +221,7 @@ class Agent(BaseAgent):
221
221
  print("Final response:", result)
222
222
 
223
223
  input_items = result.to_input_list()
224
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
224
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
225
225
 
226
226
  if bridge.stopped():
227
227
  bridge.on_stop(ctx)
@@ -29,12 +29,12 @@ from pygpt_net.item.ctx import CtxItem
29
29
  from pygpt_net.item.model import ModelItem
30
30
  from pygpt_net.item.preset import PresetItem
31
31
 
32
- from pygpt_net.provider.gpt.agents.remote_tools import append_tools
33
- from pygpt_net.provider.gpt.agents.response import StreamHandler
32
+ from pygpt_net.provider.api.openai.agents.remote_tools import append_tools
33
+ from pygpt_net.provider.api.openai.agents.response import StreamHandler
34
+ from pygpt_net.provider.api.openai.agents.experts import get_experts
34
35
  from pygpt_net.utils import trans
35
36
 
36
37
  from ..base import BaseAgent
37
- from ...gpt.agents.experts import get_experts
38
38
 
39
39
 
40
40
  @dataclass
@@ -221,7 +221,7 @@ class Agent(BaseAgent):
221
221
  print("Final response:", result)
222
222
 
223
223
  input_items = result.to_input_list()
224
- final_output, last_response_id = window.core.gpt.responses.unpack_agent_response(result, ctx)
224
+ final_output, last_response_id = window.core.api.openai.responses.unpack_agent_response(result, ctx)
225
225
 
226
226
  if bridge.stopped():
227
227
  bridge.on_stop(ctx)