pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +7 -1
- pygpt_net/app_core.py +3 -1
- pygpt_net/config.py +3 -1
- pygpt_net/controller/__init__.py +9 -2
- pygpt_net/controller/audio/audio.py +38 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +23 -62
- pygpt_net/controller/chat/handler/__init__.py +0 -0
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +3 -1071
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/lang/custom.py +2 -2
- pygpt_net/controller/media/__init__.py +12 -0
- pygpt_net/controller/media/media.py +115 -0
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +293 -0
- pygpt_net/controller/ui/mode.py +23 -2
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +14 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +2 -1
- pygpt_net/core/bridge/worker.py +4 -1
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/image/image.py +56 -5
- pygpt_net/core/realtime/__init__.py +0 -0
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +160 -0
- pygpt_net/core/render/web/body.py +24 -3
- pygpt_net/core/text/utils.py +54 -2
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +54 -0
- pygpt_net/core/video/__init__.py +12 -0
- pygpt_net/core/video/video.py +290 -0
- pygpt_net/data/config/config.json +26 -5
- pygpt_net/data/config/models.json +221 -103
- pygpt_net/data/config/settings.json +244 -6
- pygpt_net/data/css/web-blocks.css +6 -0
- pygpt_net/data/css/web-chatgpt.css +6 -0
- pygpt_net/data/css/web-chatgpt_wide.css +6 -0
- pygpt_net/data/locale/locale.de.ini +35 -7
- pygpt_net/data/locale/locale.en.ini +56 -17
- pygpt_net/data/locale/locale.es.ini +35 -7
- pygpt_net/data/locale/locale.fr.ini +35 -7
- pygpt_net/data/locale/locale.it.ini +35 -7
- pygpt_net/data/locale/locale.pl.ini +38 -7
- pygpt_net/data/locale/locale.uk.ini +35 -7
- pygpt_net/data/locale/locale.zh.ini +31 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
- pygpt_net/item/model.py +22 -1
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/provider/api/google/__init__.py +76 -7
- pygpt_net/provider/api/google/audio.py +8 -1
- pygpt_net/provider/api/google/chat.py +45 -6
- pygpt_net/provider/api/google/image.py +226 -86
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/video.py +364 -0
- pygpt_net/provider/api/openai/__init__.py +22 -2
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/google_tts.py +0 -12
- pygpt_net/provider/audio_output/openai_tts.py +8 -5
- pygpt_net/provider/core/config/patch.py +241 -178
- pygpt_net/provider/core/model/patch.py +28 -2
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/web/duckduck_search.py +212 -0
- pygpt_net/ui/layout/toolbox/audio.py +55 -0
- pygpt_net/ui/layout/toolbox/footer.py +14 -42
- pygpt_net/ui/layout/toolbox/image.py +7 -13
- pygpt_net/ui/layout/toolbox/raw.py +52 -0
- pygpt_net/ui/layout/toolbox/split.py +48 -0
- pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
- pygpt_net/ui/layout/toolbox/video.py +49 -0
- pygpt_net/ui/widget/option/combo.py +15 -1
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from typing import Optional, Dict, Any
|
|
14
|
+
|
|
15
|
+
from pygpt_net.core.events import RealtimeEvent
|
|
16
|
+
from pygpt_net.core.realtime.options import RealtimeOptions
|
|
17
|
+
from pygpt_net.core.bridge.context import BridgeContext
|
|
18
|
+
from pygpt_net.core.realtime.shared.session import extract_last_session_id
|
|
19
|
+
from pygpt_net.item.model import ModelItem
|
|
20
|
+
|
|
21
|
+
from .client import GoogleLiveClient
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Realtime:
|
|
25
|
+
|
|
26
|
+
PROVIDER = "google"
|
|
27
|
+
|
|
28
|
+
def __init__(self, window=None):
|
|
29
|
+
"""
|
|
30
|
+
Google GenAI API realtime controller
|
|
31
|
+
|
|
32
|
+
:param window: Window instance
|
|
33
|
+
"""
|
|
34
|
+
self.window = window
|
|
35
|
+
self.handler = GoogleLiveClient(window)
|
|
36
|
+
self.prev_auto_turn = False
|
|
37
|
+
self.prev_vad_silence = 2000
|
|
38
|
+
self.prev_vad_prefix = 300
|
|
39
|
+
|
|
40
|
+
def begin(
|
|
41
|
+
self,
|
|
42
|
+
context: BridgeContext,
|
|
43
|
+
model: Optional[ModelItem] = None,
|
|
44
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
45
|
+
rt_signals=None
|
|
46
|
+
) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Begin realtime session if applicable
|
|
49
|
+
|
|
50
|
+
:param context: BridgeContext
|
|
51
|
+
:param model: Optional[ModelItem]
|
|
52
|
+
:param extra: Optional dict with extra parameters
|
|
53
|
+
:param rt_signals: Optional RealtimeSignals
|
|
54
|
+
:return: bool - True if realtime session started, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
# Build realtime options
|
|
57
|
+
mm = context.multimodal_ctx
|
|
58
|
+
audio_bytes = getattr(mm, "audio_data", None) if mm and getattr(mm, "is_audio_input", False) else None
|
|
59
|
+
audio_format = getattr(mm, "audio_format", None) if mm else None
|
|
60
|
+
audio_rate = getattr(mm, "audio_rate", None) if mm else None
|
|
61
|
+
is_debug = self.window.core.config.get("log.realtime", False)
|
|
62
|
+
auto_turn = self.window.core.config.get("audio.input.auto_turn", True)
|
|
63
|
+
opt_vad_silence = self.window.core.config.get("audio.input.vad.silence", 2000)
|
|
64
|
+
opt_vad_prefix = self.window.core.config.get("audio.input.vad.prefix", 300)
|
|
65
|
+
|
|
66
|
+
# setup manager
|
|
67
|
+
self.window.controller.realtime.set_current_active(self.PROVIDER)
|
|
68
|
+
self.window.controller.realtime.set_busy()
|
|
69
|
+
self.handler.set_debug(is_debug)
|
|
70
|
+
|
|
71
|
+
# handle sub-reply (tool results from tool calls)
|
|
72
|
+
if context.ctx.internal:
|
|
73
|
+
if context.ctx.prev_ctx and context.ctx.prev_ctx.extra.get("prev_tool_calls"):
|
|
74
|
+
tool_calls = context.ctx.prev_ctx.extra.get("prev_tool_calls", [])
|
|
75
|
+
tool_call_id = None
|
|
76
|
+
if isinstance(tool_calls, list) and len(tool_calls) > 0:
|
|
77
|
+
tool_call_id = tool_calls[0].get("call_id", "") # get first call_id
|
|
78
|
+
if not tool_call_id:
|
|
79
|
+
tool_call_id = tool_calls[0].get("id", "") # fallback to id
|
|
80
|
+
if tool_call_id:
|
|
81
|
+
tool_results = context.ctx.input
|
|
82
|
+
try:
|
|
83
|
+
tool_results = json.loads(tool_results)
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
self.handler.send_tool_results_sync({
|
|
87
|
+
tool_call_id: tool_results
|
|
88
|
+
})
|
|
89
|
+
return True # do not start new session, just send tool results
|
|
90
|
+
|
|
91
|
+
# update auto-turn in active session
|
|
92
|
+
if (self.handler.is_session_active()
|
|
93
|
+
and (auto_turn != self.prev_auto_turn
|
|
94
|
+
or opt_vad_silence != self.prev_vad_silence
|
|
95
|
+
or opt_vad_prefix != self.prev_vad_prefix)):
|
|
96
|
+
self.handler.update_session_autoturn_sync(auto_turn, opt_vad_silence, opt_vad_prefix)
|
|
97
|
+
|
|
98
|
+
# Tools
|
|
99
|
+
tools = self.window.core.api.google.tools.prepare(model, context.external_functions)
|
|
100
|
+
remote_tools = self.window.core.api.google.build_remote_tools(model)
|
|
101
|
+
if tools:
|
|
102
|
+
remote_tools = [] # in Google, remote tools are not allowed if function calling is used
|
|
103
|
+
|
|
104
|
+
# if auto-turn is enabled and prompt is empty, update session and context only
|
|
105
|
+
if auto_turn and self.handler.is_session_active() and (context.prompt.strip() == "" or context.prompt == "..."):
|
|
106
|
+
self.handler.update_session_tools_sync(tools, remote_tools)
|
|
107
|
+
self.handler.update_ctx(context.ctx)
|
|
108
|
+
return True # do not send new request if session is active
|
|
109
|
+
|
|
110
|
+
# Last session ID
|
|
111
|
+
last_session_id = extract_last_session_id(context.history)
|
|
112
|
+
if is_debug:
|
|
113
|
+
print("[realtime session] Last ID", last_session_id)
|
|
114
|
+
|
|
115
|
+
# Voice
|
|
116
|
+
voice_name = "Kore"
|
|
117
|
+
try:
|
|
118
|
+
v = self.window.core.plugins.get_option("audio_output", "google_genai_tts_voice")
|
|
119
|
+
if v:
|
|
120
|
+
mapping = {"kore": "Kore", "puck": "Puck", "charon": "Charon", "verse": "Verse",
|
|
121
|
+
"legend": "Legend"}
|
|
122
|
+
voice_name = mapping.get(str(v).strip().lower(), str(v))
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# Options
|
|
127
|
+
opts = RealtimeOptions(
|
|
128
|
+
provider=self.PROVIDER,
|
|
129
|
+
model=model.id,
|
|
130
|
+
system_prompt=context.system_prompt,
|
|
131
|
+
prompt=context.prompt,
|
|
132
|
+
voice=voice_name,
|
|
133
|
+
audio_data=audio_bytes,
|
|
134
|
+
audio_format=audio_format,
|
|
135
|
+
audio_rate=audio_rate,
|
|
136
|
+
vad=None,
|
|
137
|
+
extra=extra or {},
|
|
138
|
+
tools=tools,
|
|
139
|
+
remote_tools=remote_tools,
|
|
140
|
+
rt_signals=rt_signals,
|
|
141
|
+
rt_session_id=last_session_id,
|
|
142
|
+
auto_turn=auto_turn,
|
|
143
|
+
vad_end_silence_ms=opt_vad_silence,
|
|
144
|
+
vad_prefix_padding_ms=opt_vad_prefix,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Start or append to realtime session via manager
|
|
148
|
+
try:
|
|
149
|
+
if is_debug:
|
|
150
|
+
print("[realtime] Starting session with options:", opts.to_dict())
|
|
151
|
+
rt = self.window.controller.realtime.manager
|
|
152
|
+
rt.start(context.ctx, opts)
|
|
153
|
+
|
|
154
|
+
self.prev_auto_turn = auto_turn
|
|
155
|
+
self.prev_vad_silence = opt_vad_silence
|
|
156
|
+
self.prev_vad_prefix = opt_vad_prefix
|
|
157
|
+
return True
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.window.core.debug.log(e)
|
|
160
|
+
return False # fallback to non-live path
|
|
161
|
+
|
|
162
|
+
def handle_audio_input(self, event: RealtimeEvent):
|
|
163
|
+
"""
|
|
164
|
+
Handle Realtime audio input event
|
|
165
|
+
|
|
166
|
+
:param event: RealtimeEvent
|
|
167
|
+
"""
|
|
168
|
+
self.handler.rt_handle_audio_input_sync(event)
|
|
169
|
+
|
|
170
|
+
def manual_commit(self):
|
|
171
|
+
"""Manually commit audio input to realtime session"""
|
|
172
|
+
self.handler.force_response_now_sync()
|
|
173
|
+
|
|
174
|
+
def shutdown(self):
|
|
175
|
+
"""Shutdown realtime loops"""
|
|
176
|
+
if self.handler.is_session_active():
|
|
177
|
+
self.handler.close_session_sync()
|
|
178
|
+
try:
|
|
179
|
+
self.handler.stop_loop_sync()
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
def reset(self):
|
|
184
|
+
"""Close realtime session"""
|
|
185
|
+
if self.handler.is_session_active():
|
|
186
|
+
self.handler.close_session_sync()
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.09.01 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import base64, datetime, os, requests
|
|
13
|
+
import mimetypes
|
|
14
|
+
import time
|
|
15
|
+
|
|
16
|
+
from typing import Optional, Dict, Any, List
|
|
17
|
+
from google import genai
|
|
18
|
+
from google.genai import types as gtypes
|
|
19
|
+
|
|
20
|
+
from PySide6.QtCore import QObject, Signal, QRunnable, Slot
|
|
21
|
+
|
|
22
|
+
from pygpt_net.core.events import KernelEvent
|
|
23
|
+
from pygpt_net.core.bridge.context import BridgeContext
|
|
24
|
+
from pygpt_net.item.ctx import CtxItem
|
|
25
|
+
from pygpt_net.utils import trans
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Video:
|
|
29
|
+
|
|
30
|
+
MODE_GENERATE = "generate"
|
|
31
|
+
MODE_IMAGE_TO_VIDEO = "image2video"
|
|
32
|
+
|
|
33
|
+
def __init__(self, window=None):
|
|
34
|
+
self.window = window
|
|
35
|
+
self.worker = None
|
|
36
|
+
|
|
37
|
+
def generate(
|
|
38
|
+
self,
|
|
39
|
+
context: BridgeContext,
|
|
40
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
41
|
+
sync: bool = True
|
|
42
|
+
) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Generate video(s) using Google GenAI Veo.
|
|
45
|
+
|
|
46
|
+
:param context: BridgeContext with prompt, model, attachments
|
|
47
|
+
:param extra: extra parameters (num, inline, duration, aspect_ratio)
|
|
48
|
+
:param sync: run synchronously (blocking) if True
|
|
49
|
+
:return: True if started
|
|
50
|
+
"""
|
|
51
|
+
extra = extra or {}
|
|
52
|
+
ctx = context.ctx or CtxItem()
|
|
53
|
+
model = context.model
|
|
54
|
+
prompt = context.prompt
|
|
55
|
+
num = int(extra.get("num", 1))
|
|
56
|
+
inline = bool(extra.get("inline", False))
|
|
57
|
+
|
|
58
|
+
# decide sub-mode based on attachments (image-to-video when image is attached)
|
|
59
|
+
sub_mode = self.MODE_GENERATE
|
|
60
|
+
attachments = context.attachments or {}
|
|
61
|
+
if self._has_image_attachment(attachments):
|
|
62
|
+
sub_mode = self.MODE_IMAGE_TO_VIDEO
|
|
63
|
+
|
|
64
|
+
# model used to improve the prompt (not video model)
|
|
65
|
+
prompt_model = self.window.core.models.from_defaults()
|
|
66
|
+
tmp = self.window.core.config.get('video.prompt_model')
|
|
67
|
+
if self.window.core.models.has(tmp):
|
|
68
|
+
prompt_model = self.window.core.models.get(tmp)
|
|
69
|
+
|
|
70
|
+
worker = VideoWorker()
|
|
71
|
+
worker.window = self.window
|
|
72
|
+
worker.client = self.window.core.api.google.get_client()
|
|
73
|
+
worker.ctx = ctx
|
|
74
|
+
worker.mode = sub_mode
|
|
75
|
+
worker.attachments = attachments
|
|
76
|
+
worker.model = model.id # Veo model id
|
|
77
|
+
worker.input_prompt = prompt
|
|
78
|
+
worker.model_prompt = prompt_model # LLM for prompt rewriting
|
|
79
|
+
worker.system_prompt = self.window.core.prompt.get('video')
|
|
80
|
+
worker.raw = self.window.core.config.get('img_raw')
|
|
81
|
+
worker.num = num
|
|
82
|
+
worker.inline = inline
|
|
83
|
+
|
|
84
|
+
# optional params
|
|
85
|
+
worker.aspect_ratio = str(extra.get("aspect_ratio") or self.window.core.config.get('video.aspect_ratio') or "16:9")
|
|
86
|
+
worker.duration_seconds = int(extra.get("duration") or self.window.core.config.get('video.duration') or 8)
|
|
87
|
+
worker.fps = int(extra.get("fps") or self.window.core.config.get('video.fps') or 24)
|
|
88
|
+
worker.seed = extra.get("seed") or self.window.core.config.get('video.seed') or None
|
|
89
|
+
worker.negative_prompt = extra.get("negative_prompt") or self.window.core.config.get('video.negative_prompt') or None
|
|
90
|
+
worker.generate_audio = bool(extra.get("generate_audio", self.window.core.config.get('video.generate_audio') or False))
|
|
91
|
+
worker.resolution = (extra.get("resolution") or self.window.core.config.get('video.resolution') or "720p")
|
|
92
|
+
|
|
93
|
+
self.worker = worker
|
|
94
|
+
self.worker.signals.finished.connect(self.window.core.video.handle_finished)
|
|
95
|
+
self.worker.signals.finished_inline.connect(self.window.core.video.handle_finished_inline)
|
|
96
|
+
self.worker.signals.status.connect(self.window.core.video.handle_status)
|
|
97
|
+
self.worker.signals.error.connect(self.window.core.video.handle_error)
|
|
98
|
+
|
|
99
|
+
if sync or not self.window.controller.kernel.async_allowed(ctx):
|
|
100
|
+
self.worker.run()
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
self.window.dispatch(KernelEvent(KernelEvent.STATE_BUSY, {"id": "video"}))
|
|
104
|
+
self.window.threadpool.start(self.worker)
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
def _has_image_attachment(self, attachments: Dict[str, Any]) -> bool:
|
|
108
|
+
"""Check if at least one image attachment is present."""
|
|
109
|
+
for _, att in (attachments or {}).items():
|
|
110
|
+
try:
|
|
111
|
+
p = getattr(att, "path", None)
|
|
112
|
+
if p and os.path.exists(p):
|
|
113
|
+
mt, _ = mimetypes.guess_type(p)
|
|
114
|
+
if mt and mt.startswith("image/"):
|
|
115
|
+
return True
|
|
116
|
+
except Exception:
|
|
117
|
+
continue
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class VideoSignals(QObject):
|
|
122
|
+
finished = Signal(object, list, str) # ctx, paths, prompt
|
|
123
|
+
finished_inline = Signal(object, list, str) # ctx, paths, prompt
|
|
124
|
+
status = Signal(object) # message
|
|
125
|
+
error = Signal(object) # exception
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class VideoWorker(QRunnable):
|
|
129
|
+
def __init__(self, *args, **kwargs):
|
|
130
|
+
super().__init__()
|
|
131
|
+
self.signals = VideoSignals()
|
|
132
|
+
self.window = None
|
|
133
|
+
self.client: Optional[genai.Client] = None
|
|
134
|
+
self.ctx: Optional[CtxItem] = None
|
|
135
|
+
|
|
136
|
+
# params
|
|
137
|
+
self.mode = Video.MODE_GENERATE
|
|
138
|
+
self.attachments: Dict[str, Any] = {}
|
|
139
|
+
self.model = "veo-3.0-generate-001"
|
|
140
|
+
self.model_prompt = None
|
|
141
|
+
self.input_prompt = ""
|
|
142
|
+
self.system_prompt = ""
|
|
143
|
+
self.inline = False
|
|
144
|
+
self.raw = False
|
|
145
|
+
self.num = 1
|
|
146
|
+
|
|
147
|
+
# video generation params
|
|
148
|
+
self.aspect_ratio = "16:9"
|
|
149
|
+
self.duration_seconds = 8
|
|
150
|
+
self.fps = 24
|
|
151
|
+
self.seed: Optional[int] = None
|
|
152
|
+
self.negative_prompt: Optional[str] = None
|
|
153
|
+
self.generate_audio: bool = False # Veo 3 only
|
|
154
|
+
self.resolution: str = "720p" # Veo 3 supports 720p/1080p
|
|
155
|
+
|
|
156
|
+
# limits / capabilities
|
|
157
|
+
# self.veo_max_num = 4 # Veo returns up to 4 videos
|
|
158
|
+
self.veo_max_num = 1 # limit to 1 in Gemini API
|
|
159
|
+
|
|
160
|
+
# fallbacks
|
|
161
|
+
self.DEFAULT_VEO_MODEL = "veo-3.0-generate-001"
|
|
162
|
+
|
|
163
|
+
@Slot()
|
|
164
|
+
def run(self):
|
|
165
|
+
try:
|
|
166
|
+
# optional prompt enhancement
|
|
167
|
+
if not self.raw and not self.inline and self.input_prompt:
|
|
168
|
+
try:
|
|
169
|
+
self.signals.status.emit(trans('vid.status.prompt.wait'))
|
|
170
|
+
bridge_context = BridgeContext(
|
|
171
|
+
prompt=self.input_prompt,
|
|
172
|
+
system_prompt=self.system_prompt,
|
|
173
|
+
model=self.model_prompt,
|
|
174
|
+
max_tokens=200,
|
|
175
|
+
temperature=1.0,
|
|
176
|
+
)
|
|
177
|
+
ev = KernelEvent(KernelEvent.CALL, {'context': bridge_context, 'extra': {}})
|
|
178
|
+
self.window.dispatch(ev)
|
|
179
|
+
resp = ev.data.get('response')
|
|
180
|
+
if resp:
|
|
181
|
+
self.input_prompt = resp
|
|
182
|
+
except Exception as e:
|
|
183
|
+
self.signals.error.emit(e)
|
|
184
|
+
self.signals.status.emit(trans('vid.status.prompt.error') + ": " + str(e))
|
|
185
|
+
|
|
186
|
+
# prepare config
|
|
187
|
+
num = min(self.num, self.veo_max_num)
|
|
188
|
+
cfg_kwargs = {
|
|
189
|
+
"number_of_videos": num,
|
|
190
|
+
#"duration_seconds": self._duration_for_model(self.model, self.duration_seconds),
|
|
191
|
+
}
|
|
192
|
+
if self.aspect_ratio:
|
|
193
|
+
cfg_kwargs["aspect_ratio"] = self.aspect_ratio
|
|
194
|
+
if self.seed is not None:
|
|
195
|
+
cfg_kwargs["seed"] = int(self.seed)
|
|
196
|
+
if self.negative_prompt:
|
|
197
|
+
cfg_kwargs["negative_prompt"] = self.negative_prompt
|
|
198
|
+
if self._is_veo3(self.model):
|
|
199
|
+
# Veo 3 supports audio and resolution
|
|
200
|
+
# WARN: but not Gemini API:
|
|
201
|
+
pass
|
|
202
|
+
"""
|
|
203
|
+
cfg_kwargs["generate_audio"] = bool(self.generate_audio)
|
|
204
|
+
if self.resolution:
|
|
205
|
+
cfg_kwargs["resolution"] = self.resolution
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
config = gtypes.GenerateVideosConfig(**cfg_kwargs)
|
|
209
|
+
|
|
210
|
+
# build request
|
|
211
|
+
req_kwargs = {
|
|
212
|
+
"model": self.model or self.DEFAULT_VEO_MODEL,
|
|
213
|
+
"prompt": self.input_prompt or "",
|
|
214
|
+
"config": config,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# image-to-video if an image attachment is present and supported
|
|
218
|
+
base_img = self._first_image_attachment(self.attachments)
|
|
219
|
+
if self.mode == Video.MODE_IMAGE_TO_VIDEO and base_img is not None and self._supports_image_to_video(self.model):
|
|
220
|
+
req_kwargs["image"] = gtypes.Image.from_file(location=base_img)
|
|
221
|
+
|
|
222
|
+
self.signals.status.emit(trans('vid.status.generating') + f": {self.input_prompt}...")
|
|
223
|
+
|
|
224
|
+
# start long-running operation
|
|
225
|
+
operation = self.client.models.generate_videos(**req_kwargs)
|
|
226
|
+
|
|
227
|
+
# poll until done
|
|
228
|
+
while not getattr(operation, "done", False):
|
|
229
|
+
time.sleep(10)
|
|
230
|
+
operation = self.client.operations.get(operation)
|
|
231
|
+
|
|
232
|
+
# extract response payload
|
|
233
|
+
op_resp = getattr(operation, "response", None) or getattr(operation, "result", None)
|
|
234
|
+
if not op_resp:
|
|
235
|
+
raise RuntimeError("Empty operation response.")
|
|
236
|
+
|
|
237
|
+
gen_list = getattr(op_resp, "generated_videos", None) or []
|
|
238
|
+
if not gen_list:
|
|
239
|
+
raise RuntimeError("No videos generated.")
|
|
240
|
+
|
|
241
|
+
# download and save all outputs up to num
|
|
242
|
+
paths: List[str] = []
|
|
243
|
+
for idx, gv in enumerate(gen_list[:num]):
|
|
244
|
+
data = self._download_video_bytes(getattr(gv, "video", None))
|
|
245
|
+
p = self._save(idx, data)
|
|
246
|
+
if p:
|
|
247
|
+
paths.append(p)
|
|
248
|
+
|
|
249
|
+
if self.inline:
|
|
250
|
+
self.signals.finished_inline.emit(self.ctx, paths, self.input_prompt)
|
|
251
|
+
else:
|
|
252
|
+
self.signals.finished.emit(self.ctx, paths, self.input_prompt)
|
|
253
|
+
|
|
254
|
+
except Exception as e:
|
|
255
|
+
self.signals.error.emit(e)
|
|
256
|
+
finally:
|
|
257
|
+
self._cleanup()
|
|
258
|
+
|
|
259
|
+
# ---------- helpers ----------
|
|
260
|
+
|
|
261
|
+
def _is_veo3(self, model_id: str) -> bool:
|
|
262
|
+
mid = str(model_id or "").lower()
|
|
263
|
+
return mid.startswith("veo-3.")
|
|
264
|
+
|
|
265
|
+
def _supports_image_to_video(self, model_id: str) -> bool:
|
|
266
|
+
"""Return True if the model supports image->video."""
|
|
267
|
+
mid = str(model_id or "").lower()
|
|
268
|
+
# Official support for image-to-video on veo-2 and veo-3 preview; keep extendable.
|
|
269
|
+
return ("veo-2.0" in mid) or ("veo-3.0-generate-preview" in mid) or ("veo-3.0-fast-generate-preview" in mid)
|
|
270
|
+
|
|
271
|
+
def _duration_for_model(self, model_id: str, requested: int) -> int:
|
|
272
|
+
"""Adjust duration constraints to model-specific limits."""
|
|
273
|
+
mid = str(model_id or "").lower()
|
|
274
|
+
if "veo-2.0" in mid:
|
|
275
|
+
# Veo 2 supports 5–8s, default 8s.
|
|
276
|
+
return max(5, min(8, int(requested or 8)))
|
|
277
|
+
if "veo-3.0" in mid:
|
|
278
|
+
# Veo 3 commonly uses 8s clips; honor request if provided, otherwise 8s.
|
|
279
|
+
return int(requested or 8)
|
|
280
|
+
return int(requested or 8)
|
|
281
|
+
|
|
282
|
+
def _first_image_attachment(self, attachments: Dict[str, Any]) -> Optional[str]:
|
|
283
|
+
"""Return path of the first image attachment, if any."""
|
|
284
|
+
for _, att in (attachments or {}).items():
|
|
285
|
+
try:
|
|
286
|
+
p = getattr(att, "path", None)
|
|
287
|
+
if p and os.path.exists(p):
|
|
288
|
+
mt, _ = mimetypes.guess_type(p)
|
|
289
|
+
if mt and mt.startswith("image/"):
|
|
290
|
+
return p
|
|
291
|
+
except Exception:
|
|
292
|
+
continue
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
def _download_video_bytes(self, file_ref) -> Optional[bytes]:
|
|
296
|
+
"""
|
|
297
|
+
Download video bytes using the Files service.
|
|
298
|
+
Falls back to direct URL download if necessary.
|
|
299
|
+
"""
|
|
300
|
+
if not file_ref:
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
# Preferred: SDK-managed download (handles URIs and sets video_bytes).
|
|
304
|
+
try:
|
|
305
|
+
data = self.client.files.download(file=file_ref)
|
|
306
|
+
if isinstance(data, (bytes, bytearray)):
|
|
307
|
+
return bytes(data)
|
|
308
|
+
except Exception:
|
|
309
|
+
pass
|
|
310
|
+
|
|
311
|
+
# Fallback: try to fetch by uri or url.
|
|
312
|
+
uri = getattr(file_ref, "uri", None) or getattr(file_ref, "url", None) or getattr(file_ref, "download_uri", None)
|
|
313
|
+
if uri:
|
|
314
|
+
try:
|
|
315
|
+
r = requests.get(uri, timeout=120)
|
|
316
|
+
if r.status_code == 200:
|
|
317
|
+
return r.content
|
|
318
|
+
except Exception:
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
# Last resort: try inline/base64 if present.
|
|
322
|
+
b64 = getattr(file_ref, "video_bytes", None)
|
|
323
|
+
if isinstance(b64, (bytes, bytearray)):
|
|
324
|
+
return bytes(b64)
|
|
325
|
+
if isinstance(b64, str):
|
|
326
|
+
try:
|
|
327
|
+
return base64.b64decode(b64)
|
|
328
|
+
except Exception:
|
|
329
|
+
return None
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
def _save(self, idx: int, data: Optional[bytes]) -> Optional[str]:
|
|
333
|
+
"""Save video bytes to file and return path."""
|
|
334
|
+
if not data:
|
|
335
|
+
return None
|
|
336
|
+
name = (
|
|
337
|
+
datetime.date.today().strftime("%Y-%m-%d") + "_" +
|
|
338
|
+
datetime.datetime.now().strftime("%H-%M-%S") + "-" +
|
|
339
|
+
self.window.core.video.make_safe_filename(self.input_prompt) + "-" +
|
|
340
|
+
str(idx + 1) + ".mp4"
|
|
341
|
+
)
|
|
342
|
+
path = os.path.join(self.window.core.config.get_user_dir("video"), name)
|
|
343
|
+
self.signals.status.emit(trans('vid.status.downloading') + f" ({idx + 1} / {self.num}) -> {path}")
|
|
344
|
+
|
|
345
|
+
if self.window.core.video.save_video(path, data):
|
|
346
|
+
return str(path)
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
350
|
+
with open(path, "wb") as f:
|
|
351
|
+
f.write(data)
|
|
352
|
+
return str(path)
|
|
353
|
+
except Exception:
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
def _cleanup(self):
|
|
357
|
+
"""Cleanup resources."""
|
|
358
|
+
sig = self.signals
|
|
359
|
+
self.signals = None
|
|
360
|
+
if sig is not None:
|
|
361
|
+
try:
|
|
362
|
+
sig.deleteLater()
|
|
363
|
+
except RuntimeError:
|
|
364
|
+
pass
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from openai import OpenAI
|
|
@@ -33,6 +33,7 @@ from .container import Container
|
|
|
33
33
|
from .image import Image
|
|
34
34
|
from .remote_tools import RemoteTools
|
|
35
35
|
from .responses import Responses
|
|
36
|
+
from .realtime import Realtime
|
|
36
37
|
from .store import Store
|
|
37
38
|
from .summarizer import Summarizer
|
|
38
39
|
from .tools import Tools
|
|
@@ -57,6 +58,7 @@ class ApiOpenAI:
|
|
|
57
58
|
self.image = Image(window)
|
|
58
59
|
self.remote_tools = RemoteTools(window)
|
|
59
60
|
self.responses = Responses(window)
|
|
61
|
+
self.realtime = Realtime(window)
|
|
60
62
|
self.store = Store(window)
|
|
61
63
|
self.summarizer = Summarizer(window)
|
|
62
64
|
self.tools = Tools(window)
|
|
@@ -90,12 +92,18 @@ class ApiOpenAI:
|
|
|
90
92
|
self.last_client_args = args
|
|
91
93
|
return self.client
|
|
92
94
|
|
|
93
|
-
def call(
|
|
95
|
+
def call(
|
|
96
|
+
self,
|
|
97
|
+
context: BridgeContext,
|
|
98
|
+
extra: dict = None,
|
|
99
|
+
rt_signals = None
|
|
100
|
+
) -> bool:
|
|
94
101
|
"""
|
|
95
102
|
Call OpenAI API
|
|
96
103
|
|
|
97
104
|
:param context: Bridge context
|
|
98
105
|
:param extra: Extra arguments
|
|
106
|
+
:param rt_signals: Realtime signals for audio streaming
|
|
99
107
|
:return: result
|
|
100
108
|
"""
|
|
101
109
|
mode = context.mode
|
|
@@ -145,6 +153,18 @@ class ApiOpenAI:
|
|
|
145
153
|
MODE_RESEARCH,
|
|
146
154
|
MODE_COMPUTER,
|
|
147
155
|
]:
|
|
156
|
+
if mode == MODE_AUDIO and stream:
|
|
157
|
+
|
|
158
|
+
# Realtime API for audio streaming
|
|
159
|
+
is_realtime = self.realtime.begin(
|
|
160
|
+
context=context,
|
|
161
|
+
model=model,
|
|
162
|
+
extra=extra or {},
|
|
163
|
+
rt_signals=rt_signals
|
|
164
|
+
)
|
|
165
|
+
if is_realtime:
|
|
166
|
+
return True
|
|
167
|
+
|
|
148
168
|
# responses API
|
|
149
169
|
if use_responses_api:
|
|
150
170
|
response = self.responses.send(
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.31 23:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
from .realtime import Realtime
|