PyPI - pygpt-net - Versions diffs - 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl - Mend

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

pygpt_net/CHANGELOG.txt +15 -0
pygpt_net/__init__.py +3 -3
pygpt_net/app.py +7 -1
pygpt_net/app_core.py +3 -1
pygpt_net/config.py +3 -1
pygpt_net/controller/__init__.py +9 -2
pygpt_net/controller/audio/audio.py +38 -1
pygpt_net/controller/audio/ui.py +2 -2
pygpt_net/controller/chat/audio.py +1 -8
pygpt_net/controller/chat/common.py +23 -62
pygpt_net/controller/chat/handler/__init__.py +0 -0
pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
pygpt_net/controller/chat/output.py +8 -3
pygpt_net/controller/chat/stream.py +3 -1071
pygpt_net/controller/chat/text.py +3 -2
pygpt_net/controller/kernel/kernel.py +11 -3
pygpt_net/controller/kernel/reply.py +5 -1
pygpt_net/controller/lang/custom.py +2 -2
pygpt_net/controller/media/__init__.py +12 -0
pygpt_net/controller/media/media.py +115 -0
pygpt_net/controller/realtime/__init__.py +12 -0
pygpt_net/controller/realtime/manager.py +53 -0
pygpt_net/controller/realtime/realtime.py +293 -0
pygpt_net/controller/ui/mode.py +23 -2
pygpt_net/controller/ui/ui.py +19 -1
pygpt_net/core/audio/audio.py +6 -1
pygpt_net/core/audio/backend/native/__init__.py +12 -0
pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
pygpt_net/core/audio/backend/native/player.py +139 -0
pygpt_net/core/audio/backend/native/realtime.py +250 -0
pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
pygpt_net/core/audio/backend/pyaudio/realtime.py +312 -0
pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
pygpt_net/core/audio/backend/shared/__init__.py +38 -0
pygpt_net/core/audio/backend/shared/conversions.py +211 -0
pygpt_net/core/audio/backend/shared/envelope.py +38 -0
pygpt_net/core/audio/backend/shared/player.py +137 -0
pygpt_net/core/audio/backend/shared/rt.py +52 -0
pygpt_net/core/audio/capture.py +5 -0
pygpt_net/core/audio/output.py +14 -2
pygpt_net/core/audio/whisper.py +6 -2
pygpt_net/core/bridge/bridge.py +2 -1
pygpt_net/core/bridge/worker.py +4 -1
pygpt_net/core/dispatcher/dispatcher.py +37 -1
pygpt_net/core/events/__init__.py +2 -1
pygpt_net/core/events/realtime.py +55 -0
pygpt_net/core/image/image.py +56 -5
pygpt_net/core/realtime/__init__.py +0 -0
pygpt_net/core/realtime/options.py +87 -0
pygpt_net/core/realtime/shared/__init__.py +0 -0
pygpt_net/core/realtime/shared/audio.py +213 -0
pygpt_net/core/realtime/shared/loop.py +64 -0
pygpt_net/core/realtime/shared/session.py +59 -0
pygpt_net/core/realtime/shared/text.py +37 -0
pygpt_net/core/realtime/shared/tools.py +276 -0
pygpt_net/core/realtime/shared/turn.py +38 -0
pygpt_net/core/realtime/shared/types.py +16 -0
pygpt_net/core/realtime/worker.py +160 -0
pygpt_net/core/render/web/body.py +24 -3
pygpt_net/core/text/utils.py +54 -2
pygpt_net/core/types/__init__.py +1 -0
pygpt_net/core/types/image.py +54 -0
pygpt_net/core/video/__init__.py +12 -0
pygpt_net/core/video/video.py +290 -0
pygpt_net/data/config/config.json +26 -5
pygpt_net/data/config/models.json +221 -103
pygpt_net/data/config/settings.json +244 -6
pygpt_net/data/css/web-blocks.css +6 -0
pygpt_net/data/css/web-chatgpt.css +6 -0
pygpt_net/data/css/web-chatgpt_wide.css +6 -0
pygpt_net/data/locale/locale.de.ini +35 -7
pygpt_net/data/locale/locale.en.ini +56 -17
pygpt_net/data/locale/locale.es.ini +35 -7
pygpt_net/data/locale/locale.fr.ini +35 -7
pygpt_net/data/locale/locale.it.ini +35 -7
pygpt_net/data/locale/locale.pl.ini +38 -7
pygpt_net/data/locale/locale.uk.ini +35 -7
pygpt_net/data/locale/locale.zh.ini +31 -3
pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
pygpt_net/data/locale/plugin.cmd_web.en.ini +8 -0
pygpt_net/item/model.py +22 -1
pygpt_net/plugin/audio_input/plugin.py +37 -4
pygpt_net/plugin/audio_input/simple.py +57 -8
pygpt_net/plugin/cmd_files/worker.py +3 -0
pygpt_net/provider/api/google/__init__.py +76 -7
pygpt_net/provider/api/google/audio.py +8 -1
pygpt_net/provider/api/google/chat.py +45 -6
pygpt_net/provider/api/google/image.py +226 -86
pygpt_net/provider/api/google/realtime/__init__.py +12 -0
pygpt_net/provider/api/google/realtime/client.py +1945 -0
pygpt_net/provider/api/google/realtime/realtime.py +186 -0
pygpt_net/provider/api/google/video.py +364 -0
pygpt_net/provider/api/openai/__init__.py +22 -2
pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
pygpt_net/provider/api/openai/realtime/client.py +1828 -0
pygpt_net/provider/api/openai/realtime/realtime.py +193 -0
pygpt_net/provider/audio_input/google_genai.py +103 -0
pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
pygpt_net/provider/audio_output/google_tts.py +0 -12
pygpt_net/provider/audio_output/openai_tts.py +8 -5
pygpt_net/provider/core/config/patch.py +241 -178
pygpt_net/provider/core/model/patch.py +28 -2
pygpt_net/provider/llms/google.py +8 -9
pygpt_net/provider/web/duckduck_search.py +212 -0
pygpt_net/ui/layout/toolbox/audio.py +55 -0
pygpt_net/ui/layout/toolbox/footer.py +14 -42
pygpt_net/ui/layout/toolbox/image.py +7 -13
pygpt_net/ui/layout/toolbox/raw.py +52 -0
pygpt_net/ui/layout/toolbox/split.py +48 -0
pygpt_net/ui/layout/toolbox/toolbox.py +8 -8
pygpt_net/ui/layout/toolbox/video.py +49 -0
pygpt_net/ui/widget/option/combo.py +15 -1
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/METADATA +46 -22
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/RECORD +121 -73
pygpt_net/core/audio/backend/pyaudio.py +0 -554
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/LICENSE +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/WHEEL +0 -0
{pygpt_net-2.6.30.dist-info → pygpt_net-2.6.32.dist-info}/entry_points.txt +0 -0

pygpt_net/data/locale/locale.pl.ini CHANGED Viewed

@@ -1,4 +1,7 @@
 [LOCALE]
 about.btn.github = GitHub
 about.btn.support = Wsparcie
 about.btn.website = WWW
@@ -191,6 +194,7 @@ attachments_uploaded.tab = Wgrane pliki
 audio.cache.clear.confirm = Czy na pewno chcesz usunąć wszystkie pliki w pamięci podręcznej audio?
 audio.cache.clear.success = OK. Wszystkie pliki pamięci podręcznej audio zostały wyczyszczone.
 audio.control.btn = Kontrola głosowa
+audio.loop = Pętla
 audio.magic_word.detected = Wykryto magiczne słowo!
 audio.magic_word.invalid = To nie magiczne słowo :(
 audio.magic_word.please = Podaj magiczne słowo...
@@ -846,7 +850,7 @@ mode.agent_openai.tooltip = Zaawansowani agenci (OpenAI)
 mode.agent.tooltip = Prości agenci (legacy)
 mode.assistant = Asystent
 mode.assistant.tooltip = Czat przy użyciu API Asystentów
-mode.audio = Czat Audio
+mode.audio = Realtime + audio
 mode.chat = Czat
 mode.chat.tooltip = Tryb czatu (domyślny)
 mode.completion = Uzupełnianie
@@ -855,7 +859,7 @@ mode.computer = Kontrola komputera
 mode.computer.tooltip = Kontrola komputera (mysz, klawiatura)
 mode.expert = Eksperci (współpraca)
 mode.expert.tooltip = Eksperci do wezwania w tle
-mode.img = Obraz
+mode.img = Obraz i wideo
 mode.img.tooltip = Generowanie obrazów przy użyciu DALL-E
 mode.langchain = Langchain
 mode.langchain.tooltip = Czat z modelami dostarczonymi przez Langchain
@@ -1128,6 +1132,8 @@ settings.audio.input.stop_interval.desc = Interwał w sekundach dla automatyczne
 settings.audio.input.timeout = Limit czasu nagrywania
 settings.audio.input.timeout.continuous = Włącz limit czasu w trybie ciągłym
 settings.audio.input.timeout.desc = Limit czasu (w sekundach) dla automatycznego zatrzymania nagrywania, 0 aby wyłączyć, domyślnie: 120
+settings.audio.input.vad.prefix = VAD prefix padding (in ms)
+settings.audio.input.vad.silence = VAD end silence (in ms)
 settings.audio.output.backend = Backend dla wyjścia audio
 settings.audio.output.backend.desc = Wybierz backend dla wyjścia audio.
 settings.audio.output.device = Urządzenie do wyjścia audio
@@ -1198,9 +1204,10 @@ settings.frequency_penalty = Frequency Penalty
 settings.func_call.native = Używaj natywnych wywołań funkcji API
 settings.func_call.native.desc = Jeśli włączone, aplikacja będzie używać natywnych wywołań funkcji API zamiast wewnętrznego formatu pygpt i poniższych promptów poleceń. Tylko tryby czatu i asystentów.
 settings.img_dialog_open = Otwórz okno dialogowe obrazu po wygenerowaniu (Tryb obrazu)
-settings.img_prompt_model = DALL-E: model do generowania promptów
-settings.img_quality = DALL-E: jakość obrazu
-settings.img_resolution = DALL-E: rozmiar obrazu
+settings.img_prompt_model = Model do generowania promptów
+settings.img_prompt_model.desc = LLM used to refine your prompt before image generation (not the image model)
+settings.img_quality = Jakość obrazu
+settings.img_resolution = Rozmiar obrazu
 settings.layout.animation.disable = Wyłącz animacje
 settings.layout.animation.disable.desc = Wyłącza animacje układu, jak animowane ładowarki, itp.
 settings.layout.density = Rozmiar layoutu
@@ -1295,8 +1302,10 @@ settings.prompt.ctx.auto_summary.user = Kontekst: auto-podsumowanie (wiadomość
 settings.prompt.ctx.auto_summary.user.desc = Placeholdery: {input}, {output}
 settings.prompt.expert = Ekspert: Główna wskazówka
 settings.prompt.expert.desc = Instrukcja (systemowa wskazówka) dla głównego eksperta, jak obsługiwać ekspertów pomocniczych. Instrukcje dla ekspertów pomocniczych są podawane z ich ustawień.
-settings.prompt.img = DALL-E: generowanie obrazu
+settings.prompt.img = Generowanie obrazu
 settings.prompt.img.desc = Prompt do generowania poleceń dla DALL-E (jeśli surowy tryb jest wyłączony). Tylko tryb obrazu.
+settings.prompt.video = Video generation
+settings.prompt.video.desc = Prompt for generating prompts for video model (if raw-mode is disabled). Image / Videos mode only.
 settings.remote_tools.code_interpreter = Interpreter kodu
 settings.remote_tools.code_interpreter.desc = Włącz narzędzie `code_interpreter` w trybie Czat / za pośrednictwem OpenAI Responses API.
 settings.remote_tools.file_search = File search
@@ -1340,7 +1349,9 @@ settings.section.ctx = Kontekst
 settings.section.debug = Debugowanie
 settings.section.files = Pliki i załączniki
 settings.section.general = Ogólne
-settings.section.images = Obrazy
+settings.section.images = Obrazy i wideo
+settings.section.images.image = Image
+settings.section.images.video = Video
 settings.section.layout = Wygląd
 settings.section.llama_index = Indeksy / LlamaIndex
 settings.section.llama-index.chat = Czat
@@ -1368,6 +1379,22 @@ settings.upload.data_dir.desc = Włącz, aby przechowywać wszystko w jednym kat
 settings.upload.store = Przechowuj załączniki w katalogu upload w katalogu roboczym
 settings.upload.store.desc = Włącz przechowywanie lokalnej kopii przesłanych załączników do przyszłego użytku
 settings.use_context = Włącz kontekst (pamięć)
+settings.video.aspect_ratio = Aspect ratio
+settings.video.aspect_ratio.desc = Frame aspect ratio (e.g., 16:9, 9:16, 1:1); availability depends on the selected model
+settings.video.duration = Video duration
+settings.video.duration.desc = Clip length in seconds; limits may vary by model
+settings.video.fps = FPS
+settings.video.fps.desc = Frames per second (e.g., 24, 25, 30); may be rounded or ignored by the model
+settings.video.generate_audio = Generate audio
+settings.video.generate_audio.desc = Include synthesized background audio if supported by the model
+settings.video.negative_prompt = Negative prompt
+settings.video.negative_prompt.desc = Words or phrases to avoid in the output (comma-separated)
+settings.video.prompt_model = Prompt enhancement model
+settings.video.prompt_model.desc = LLM used to refine your prompt before video generation (not the video model)
+settings.video.resolution = Video resolution
+settings.video.resolution.desc = Target output resolution (e.g., 720p, 1080p); availability depends on the model
+settings.video.seed = Seed
+settings.video.seed.desc = Optional random seed for reproducible results; leave empty for random
 settings.vision.capture.auto = Auto przechwyt.
 settings.vision.capture.enabled = Kamera
 settings.vision.capture.height = Kamera - obraz szerokość (px)
@@ -1527,6 +1554,10 @@ updater.check.launch = Sprawdzaj przy uruchamianiu
 update.released = wydanie
 update.snap = Pobierz ze Snap Store
 update.title = Sprawdzanie dostępności aktualizacji
+vid.status.downloading = Downloading video... please wait...
+vid.status.generating = Generating video from
+vid.status.prompt.error = Enhancement prompt error occured
+vid.status.prompt.wait = Preparing prompt... please wait...
 vision.capture.auto = Auto przechwyt.
 vision.capture.auto.click = Auto przechwyt. jest włączone!
 vision.capture.auto.label = Auto przechwytywanie wł.

pygpt_net/data/locale/locale.uk.ini CHANGED Viewed

@@ -191,6 +191,7 @@ attachments_uploaded.tab = Завантажені файли
 audio.cache.clear.confirm = Ви впевнені, що хочете видалити всі кешовані аудіофайли?
 audio.cache.clear.success = OK. Усі файли аудіокешу очищено.
 audio.control.btn = Керування голосом
+audio.loop = Петля
 audio.magic_word.detected = Магічне слово виявлено!
 audio.magic_word.invalid = Не магічне слово :(
 audio.magic_word.please = Магічне слово, будь ласка...
@@ -845,7 +846,7 @@ mode.agent_openai.tooltip = Просунуті агенти (OpenAI)
 mode.agent.tooltip = Прості агенти (legacy)
 mode.assistant = Помічник
 mode.assistant.tooltip = Чат за допомогою API Асистентів
-mode.audio = Чат з аудіо
+mode.audio = Realtime + audio
 mode.chat = Чат
 mode.chat.tooltip = Режим чату (за замовчуванням)
 mode.completion = Завершення
@@ -854,7 +855,7 @@ mode.computer = Використання комп'ютера
 mode.computer.tooltip = Використання комп'ютера (миша, клавіатура, навігація)
 mode.expert = Експерти (співпраця)
 mode.expert.tooltip = Виклик експертів на задній план
-mode.img = Зображення
+mode.img = Зображення та відео
 mode.img.tooltip = Генерація зображень за допомогою DALL-E
 mode.langchain = Langchain
 mode.langchain.tooltip = Чат з моделями, наданими Langchain
@@ -1127,6 +1128,8 @@ settings.audio.input.stop_interval.desc = Інтервал у секундах
 settings.audio.input.timeout = Тайм-аут запису
 settings.audio.input.timeout.continuous = Увімкнути тайм-аут у безперервному режимі
 settings.audio.input.timeout.desc = Тайм-аут (в секундах) для автоматичної зупинки запису, 0 для відключення, за замовчуванням: 120
+settings.audio.input.vad.prefix = Заповнення префікса VAD (в мс)
+settings.audio.input.vad.silence = Кінцева тиша VAD (в мс)
 settings.audio.output.backend = Бекенд для аудіовиходу
 settings.audio.output.backend.desc = Виберіть бекенд для аудіовиходу.
 settings.audio.output.device = Пристрій для аудіовиходу
@@ -1197,9 +1200,10 @@ settings.frequency_penalty = Частотний штраф
 settings.func_call.native = Використовувати рідні виклики функцій API
 settings.func_call.native.desc = Якщо увімкнено, програма буде використовувати рідні виклики функцій API замість внутрішнього формату pygpt і нижченаведених запитів команд не використовуватимуться. Лише режими чату та асистентів.
 settings.img_dialog_open = Відкрити діалогове вікно зображення після генерації (Режим зображення)
-settings.img_prompt_model = DALL-E: модель генерації запиту
-settings.img_quality = DALL-E: якість зображення
-settings.img_resolution = DALL-E: розмір зображення
+settings.img_prompt_model = Модель Генерації Запиту
+settings.img_prompt_model.desc = LLM, що використовується для уточнення вашого запиту перед генерацією зображень (не модель зображень)
+settings.img_quality = Якість Зображення
+settings.img_resolution = Розмір Зображення
 settings.layout.animation.disable = Вимкнути анімації
 settings.layout.animation.disable.desc = Вимикає анімації макета, як анімовані завантажувачі тощо.
 settings.layout.density = Щільність компонування
@@ -1294,8 +1298,10 @@ settings.prompt.ctx.auto_summary.user = Контекст: авто-резюме
 settings.prompt.ctx.auto_summary.user.desc = Заповнювачі: {input}, {output}
 settings.prompt.expert = Експерт: Основний запит
 settings.prompt.expert.desc = Інструкція (системний запит) для ведучого експерта, як керувати підеекспертами. Інструкції для підеекспертів даються з їхніх налаштувань.
-settings.prompt.img = DALL-E: генерація зображення
+settings.prompt.img = Генерація зображення
 settings.prompt.img.desc = Підказка для генерації команддля DALL-E (якщо вимкнено сирівний режим). Тільки режим зображення.
+settings.prompt.video = Генерація відео
+settings.prompt.video.desc = Запит для генерації запитів для відеомоделі (якщо режим без обробки вимкнено). Тільки режим Зображення/Відео.
 settings.remote_tools.code_interpreter = Інтерпретатор коду
 settings.remote_tools.code_interpreter.desc = Увімкніть віддалений інструмент `code_interpreter` у режимі Чат / через Responses API OpenAI.
 settings.remote_tools.file_search = File search
@@ -1339,7 +1345,9 @@ settings.section.ctx = Контекст
 settings.section.debug = Налагодження
 settings.section.files = Файли та вкладення
 settings.section.general = Загальні
-settings.section.images = Зображення
+settings.section.images = Зображення і відео
+settings.section.images.image = Зображення
+settings.section.images.video = Відео
 settings.section.layout = Макет
 settings.section.llama_index = Індекси / LlamaIndex
 settings.section.llama-index.chat = Чат
@@ -1367,6 +1375,22 @@ settings.upload.data_dir.desc = Увімкніть, щоб зберігати в
 settings.upload.store = Зберігати вкладення у директорії завантаження робочого каталогу
 settings.upload.store.desc = Увімкнути зберігання локальної копії завантажених вкладень для майбутнього використання
 settings.use_context = Використовувати контекст (пам'ять)
+settings.video.aspect_ratio = Співвідношення сторін
+settings.video.aspect_ratio.desc = Співвідношення сторін кадру (наприклад, 16:9, 9:16, 1:1); доступність залежить від вибраної моделі
+settings.video.duration = Тривалість відео
+settings.video.duration.desc = Довжина кліпу в секундах; межі можуть відрізнятися залежно від моделі
+settings.video.fps = FPS
+settings.video.fps.desc = Кількість кадрів на секунду (наприклад, 24, 25, 30); може бути округлена або проігнорована моделлю
+settings.video.generate_audio = Генерувати аудіо
+settings.video.generate_audio.desc = Включити синтезоване фонове аудіо, якщо модель його підтримує
+settings.video.negative_prompt = Негативний запит
+settings.video.negative_prompt.desc = Слова чи фрази для уникнення в результаті (через кому)
+settings.video.prompt_model = Модель покращення запиту
+settings.video.prompt_model.desc = LLM, що використовується для уточнення вашого запиту перед генерацією відео (не відеомодель)
+settings.video.resolution = Роздільна здатність відео
+settings.video.resolution.desc = Цільова вихідна роздільна здатність (наприклад, 720p, 1080p); доступність залежить від моделі
+settings.video.seed = Seed
+settings.video.seed.desc = Опціональний випадковий seed для відтворюваних результатів; залиште порожнім для випадкового
 settings.vision.capture.auto = Автоматичне захоплення
 settings.vision.capture.enabled = Камера
 settings.vision.capture.height = Висота захоплення з камери (пікселі)
@@ -1526,6 +1550,10 @@ updater.check.launch = Перевіряти при запуску
 update.released = збірка
 update.snap = Перейти до Snap Store
 update.title = Перевірка оновлень
+vid.status.downloading = Завантаження відео... будь ласка, зачекайте...
+vid.status.generating = Генерація відео з
+vid.status.prompt.error = Виникла помилка під час покращення запиту
+vid.status.prompt.wait = Підготовка запиту... будь ласка, зачекайте...
 vision.capture.auto = Автоматичне захоплення
 vision.capture.auto.click = Увімкнуто автоматичне захоплення!
 vision.capture.auto.label = Автоматичне захоплення увімкнуто

pygpt_net/data/locale/locale.zh.ini CHANGED Viewed

@@ -191,6 +191,7 @@ attachments_uploaded.tab = 已上傳的文件
 audio.cache.clear.confirm = 您确定要删除所有缓存的音频文件吗？
 audio.cache.clear.success = OK. 所有音频缓存文件已清除。
 audio.control.btn = 语音控制
+audio.loop = 循环
 audio.magic_word.detected = 檢測到魔法詞！
 audio.magic_word.invalid = 不是魔法词 :(
 audio.magic_word.please = 请说魔法词...
@@ -845,7 +846,7 @@ mode.agent_openai.tooltip = 高级代理 (OpenAI)
 mode.agent.tooltip = 简单代理（自主）
 mode.assistant = 助手
 mode.assistant.tooltip = 使用助手API進行聊天
-mode.audio = 语音聊天
+mode.audio = Realtime + audio
 mode.chat = 聊天模式
 mode.chat.tooltip = 聊天模式（預設）
 mode.completion = 完成模式
@@ -854,7 +855,7 @@ mode.computer = 使用计算机
 mode.computer.tooltip = 使用计算机（鼠标、键盘、导航）
 mode.expert = 专家 (合作)
 mode.expert.tooltip = 背景中呼叫专家
-mode.img = 圖像
+mode.img = 图像和视频
 mode.img.tooltip = 使用DALL-E生成圖像
 mode.langchain = Langchain模式
 mode.langchain.tooltip = 使用Langchain提供的模型進行聊天
@@ -1127,6 +1128,8 @@ settings.audio.input.stop_interval.desc = 自动转录音频片段的间隔（
 settings.audio.input.timeout = 录音超时
 settings.audio.input.timeout.continuous = 在连续模式下启用超时
 settings.audio.input.timeout.desc = 自动停止录音的超时时间（秒），0为禁用，默认：120
+settings.audio.input.vad.prefix = VAD 前缀填充（以毫秒为单位）
+settings.audio.input.vad.silence = VAD 结束静音（以毫秒为单位）
 settings.audio.output.backend = 音频输出的后端
 settings.audio.output.backend.desc = 选择音频输出的后端。
 settings.audio.output.device = 音频输出的设备
@@ -1198,6 +1201,7 @@ settings.func_call.native = 使用本机 API 函数调用
 settings.func_call.native.desc = 如果启用，应用程序将使用本机 API 函数调用，而不是内部 pygpt 格式，并且下面的命令提示将不被使用。仅聊天和助手模式。
 settings.img_dialog_open = 生成后打开图像对话框（图像模式）
 settings.img_prompt_model = DALL-E：提示生成模型
+settings.img_prompt_model.desc = 用于在图像生成前优化您的提示的LLM（不是图像模型）
 settings.img_quality = DALL-E：图像质量
 settings.img_resolution = DALL-E：图像尺寸
 settings.layout.animation.disable = 禁用动画
@@ -1296,6 +1300,8 @@ settings.prompt.expert = 专家：主提示
 settings.prompt.expert.desc = 对主专家如何处理奴隶专家的指令（系统提示）。奴隶专家的指令根据他们的预设给出。
 settings.prompt.img = DALL-E：生成图像
 settings.prompt.img.desc = 提示用于生成DALL-E的命令（如果原始模式被禁用）。仅图像模式。
+settings.prompt.video = 视频生成
+settings.prompt.video.desc = 为视频模型生成提示（如果未禁用原始模式）。仅限图像/视频模式。
 settings.remote_tools.code_interpreter = 代码解释器
 settings.remote_tools.code_interpreter.desc = 在聊天模式/通过 OpenAI Responses API 启用“code_interpreter”远程工具。
 settings.remote_tools.file_search = File search
@@ -1339,7 +1345,9 @@ settings.section.ctx = 上下文
 settings.section.debug = 调试
 settings.section.files = 文件和附件
 settings.section.general = 一般
-settings.section.images = 圖像
+settings.section.images = 图像和视频
+settings.section.images.image = 图像
+settings.section.images.video = 视频
 settings.section.layout = 布局
 settings.section.llama_index = 索引 / LlamaIndex
 settings.section.llama-index.chat = 聊天
@@ -1367,6 +1375,22 @@ settings.upload.data_dir.desc = 启用以将所有内容存储在一个数据目
 settings.upload.store = 在工作目录上传目录中存储附件
 settings.upload.store.desc = 启用以存储上传附件的本地副本以供将来使用
 settings.use_context = 使用上下文（记忆）
+settings.video.aspect_ratio = 纵横比
+settings.video.aspect_ratio.desc = 帧的纵横比（例如，16:9、9:16、1:1）；可用性取决于所选模型
+settings.video.duration = 视频时长
+settings.video.duration.desc = 剪辑长度（以秒为单位）；限制可能因模型而异
+settings.video.fps = FPS
+settings.video.fps.desc = 每秒帧数（例如，24、25、30）；可能会被模型四舍五入或忽略
+settings.video.generate_audio = 生成音频
+settings.video.generate_audio.desc = 如果模型支持，包含合成背景音频
+settings.video.negative_prompt = 负面提示
+settings.video.negative_prompt.desc = 在输出中要避免的词语或短语（用逗号分隔）
+settings.video.prompt_model = 提示增强模型
+settings.video.prompt_model.desc = 用于在视频生成前优化您的提示的LLM（不是视频模型）
+settings.video.resolution = 视频分辨率
+settings.video.resolution.desc = 目标输出分辨率（例如，720p，1080p）；可用性取决于模型
+settings.video.seed = 随机种子
+settings.video.seed.desc = 可选的随机种子以获得可重复的结果；留空为随机
 settings.vision.capture.auto = 自动捕获
 settings.vision.capture.enabled = 相机
 settings.vision.capture.height = 相机高度（px）
@@ -1526,6 +1550,10 @@ updater.check.launch = 啟動時檢查
 update.released = 發布時間
 update.snap = 前往Snap商店
 update.title = 檢查更新中
+vid.status.downloading = 正在下载视频...请稍候...
+vid.status.generating = 正在从中生成视频
+vid.status.prompt.error = 提示增强出错
+vid.status.prompt.wait = 正在准备提示...请稍候...
 vision.capture.auto = 自動捕獲
 vision.capture.auto.click = 已啟用自動捕獲！
 vision.capture.auto.label = 啟用自動捕獲

pygpt_net/data/locale/plugin.audio_input.en.ini CHANGED Viewed

@@ -17,6 +17,10 @@ google_args.tooltip = Provide additional keyword arguments for recognize_google(
 google_cloud_args.description = Additional keyword arguments for r.recognize_google_cloud(audio, **kwargs).
 google_cloud_args.label = Additional keyword arguments
 google_cloud_args.tooltip = Provide additional keyword arguments for recognize_google_cloud()
+google_genai_audio_model.description = Specify Gemini model supporting audio, e.g., gemini-2.5-flash
+google_genai_audio_model.label = Model
+google_genai_audio_prompt.description = System prompt for transcription
+google_genai_audio_prompt.label = System Prompt
 magic_word.description = Activate listening only after the magic word is provided, like 'Hey GPT' or 'OK GPT'. Default: False.
 magic_word.label = Magic word
 magic_word_phrase_length.description = Magic word phrase length. Default: 2.

pygpt_net/data/locale/plugin.audio_output.en.ini CHANGED Viewed

@@ -15,6 +15,10 @@ eleven_labs_voice.description = Specify the Voice ID.
 eleven_labs_voice.label = Voice ID
 google_api_key.description = You can obtain your own API key here: https://console.cloud.google.com/apis/library/texttospeech.googleapis.com
 google_api_key.label = Google Cloud Text-to-speech API Key
+google_genai_tts_model.description = Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts
+google_genai_tts_model.label = Model
+google_genai_tts_voice.description = Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)
+google_genai_tts_voice.label = Voice
 google_lang.description = Specify the language code.
 google_lang.label = Language code
 google_voice.description = Specify the voice.

pygpt_net/data/locale/plugin.cmd_web.en.ini CHANGED Viewed

@@ -31,6 +31,14 @@ cmd.web_url_raw.tooltip = Example prompt: Give me the raw content of http://exam
 cmd.web_urls.description = Enable `web_urls` command execution.\nIf enabled, the model will be able to search the Web and get a list of found URLs.
 cmd.web_urls.label = Enable: getting a list of URLs from search results
 cmd.web_urls.tooltip = Example prompt: Give me the list of URLs for query (question).
+ddg_backend.description = Engine backend: auto, html, lite.
+ddg_backend.label = Backend
+ddg_region.description = DuckDuckGo region, e.g. us-en, pl-pl, wt-wt
+ddg_region.label = Region (kl)
+ddg_safesearch.description = Allowed values: on, moderate, off
+ddg_safesearch.label = SafeSearch
+ddg_timelimit.description = Use: d, w, m, y or leave empty for any time.
+ddg_timelimit.label = Time limit (df)
 disable_ssl.description = Disables SSL verification when crawling web pages.
 disable_ssl.label = Disable SSL verification
 google_api_cx.description = You can find your CX ID at https://programmablesearchengine.google.com/controlpanel/all\nRemember to enable the "Search on ALL internet pages" option in project settings.

pygpt_net/item/model.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.28 09:00:00                  #
+# Updated Date: 2025.09.01 23:00:00                  #
 # ================================================== #
 import json
@@ -19,6 +19,7 @@ from pygpt_net.core.types import (
     MODE_AUDIO,
     MULTIMODAL_AUDIO,
     OPENAI_COMPATIBLE_PROVIDERS,
+    MULTIMODAL_VIDEO,
 )
 class ModelItem:
@@ -287,6 +288,26 @@ class ModelItem:
             return True
         return False
+    def is_video_input(self) -> bool:
+        """
+        Check if model supports video input
+        :return: True if supports video input
+        """
+        if MULTIMODAL_VIDEO in self.input:
+            return True
+        return False
+    def is_video_output(self) -> bool:
+        """
+        Check if model supports video output
+        :return: True if supports video output
+        """
+        if MULTIMODAL_VIDEO in self.output:
+            return True
+        return False
     def dump(self) -> str:
         """
         Dump event to json string

pygpt_net/plugin/audio_input/plugin.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2024.11.26 19:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import os
@@ -23,6 +23,7 @@ from pygpt_net.utils import trans
 from .config import Config
 from .worker import Worker
 from .simple import Simple
+from ...core.types import MODE_AUDIO
 class Plugin(BasePlugin):
@@ -124,13 +125,31 @@ class Plugin(BasePlugin):
             words = [x.strip() for x in words]  # remove white-spaces
         return words
-    def toggle_recording_simple(self):
+    def toggle_recording_simple(
+            self,
+            state: bool = None,
+            auto: bool = False
+    ):
         """
         Event: AUDIO_INPUT_RECORD_TOGGLE
         Toggle recording
+        :param state: state to set
+        :param auto: True if called automatically (not by user)
+        """
+        if self.window.controller.realtime.is_enabled():
+            self.handler_simple.toggle_realtime(state=state, auto=auto)
+            return
+        self.handler_simple.toggle_recording(state=state)
+    def is_recording(self) -> bool:
         """
-        self.handler_simple.toggle_recording()
+        Check if is recording (simple mode)
+        :return: True if is recording
+        """
+        return self.handler_simple.is_recording
     def toggle_speech(self, state: bool):
         """
@@ -214,7 +233,9 @@ class Plugin(BasePlugin):
             self.toggle_speech(data['value'])
         elif name == Event.AUDIO_INPUT_RECORD_TOGGLE:
-            self.toggle_recording_simple()
+            state = data['state'] if 'value' in data else None
+            auto = data['auto'] if 'auto' in data else False
+            self.toggle_recording_simple(state=state, auto=auto)
         elif name == Event.AUDIO_INPUT_STOP:
             self.on_stop()
@@ -492,6 +513,18 @@ class Plugin(BasePlugin):
                 self.window.dispatch(event)  # send text, input clear in send method
                 self.set_status('')
+    def handle_realtime_stopped(self):
+        """Handle realtime stopped"""
+        context = BridgeContext()
+        context.prompt = "..."
+        extra = {}
+        event = KernelEvent(KernelEvent.INPUT_SYSTEM, {
+            'context': context,
+            'extra': extra,
+        })
+        self.window.dispatch(event)  # send text, input clear in send method
+        self.set_status('')
     @Slot(object)
     def handle_status(self, data: str):
         """

pygpt_net/plugin/audio_input/simple.py CHANGED Viewed

@@ -6,14 +6,14 @@
 # GitHub:  https://github.com/szczyglis-dev/py-gpt   #
 # MIT License                                        #
 # Created By  : Marcin Szczygliński                  #
-# Updated Date: 2025.08.27 07:00:00                  #
+# Updated Date: 2025.08.31 23:00:00                  #
 # ================================================== #
 import os
 from PySide6.QtCore import QTimer
-from pygpt_net.core.events import AppEvent
+from pygpt_net.core.events import AppEvent, RealtimeEvent
 from pygpt_net.core.tabs.tab import Tab
 from pygpt_net.utils import trans
@@ -32,8 +32,46 @@ class Simple:
         self.is_recording = False
         self.timer = None
-    def toggle_recording(self):
-        """Toggle recording"""
+    def toggle_realtime(
+            self,
+            state: bool = None,
+            auto: bool = False
+    ):
+        """
+        Toggle recording
+        :param state: True to start recording, False to stop recording, None to toggle
+        :param auto: True if called automatically (not by user)
+        """
+        if state is not None:
+            if state and not self.is_recording:
+                self.start_recording(realtime=True)
+            elif not state:
+                self.force_stop()
+            else:
+                self.force_stop()
+            return
+        if self.is_recording:
+            self.stop_recording(realtime=True)
+            if not auto:
+                self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_STOP))
+        else:
+            self.start_recording(realtime=True)
+            if not auto:
+                self.plugin.window.dispatch(RealtimeEvent(RealtimeEvent.RT_INPUT_AUDIO_MANUAL_START))
+    def toggle_recording(self, state: bool = None):
+        """
+        Toggle recording
+        :param state: True to start recording, False to stop recording, None to toggle
+        """
+        if state is not None:
+            if state and not self.is_recording:
+                self.start_recording()
+            elif not state:
+                self.force_stop()
+            return
         if self.is_recording:
             self.stop_recording()
         else:
@@ -51,11 +89,12 @@ class Simple:
         """Stop timeout"""
         self.stop_recording(timeout=True)
-    def start_recording(self, force: bool = False):
+    def start_recording(self, force: bool = False, realtime: bool = False):
         """
         Start recording
         :param force: True to force recording
+        :param realtime: True if called from realtime callback
         """
         # display snap warning if not displayed yet
         if (not self.plugin.window.core.config.get("audio.input.snap", False)
@@ -89,7 +128,7 @@ class Simple:
             # disable in continuous mode
             timeout = int(self.plugin.window.core.config.get('audio.input.timeout', 120) or 0) # get timeout
             timeout_continuous = self.plugin.window.core.config.get('audio.input.timeout.continuous', False) # enable continuous timeout
-            if timeout > 0:
+            if timeout > 0 and not realtime:
                 if self.timer is None and (not continuous_enabled or timeout_continuous):
                     self.timer = QTimer()
                     self.timer.timeout.connect(self.stop_timeout)
@@ -119,11 +158,12 @@ class Simple:
                 )
             self.switch_btn_start()  # switch button to start
-    def stop_recording(self, timeout: bool = False):
+    def stop_recording(self, timeout: bool = False, realtime: bool = False):
         """
         Stop recording
         :param timeout: True if stopped due to timeout
+        :param realtime: True if called from realtime callback
         """
         self.plugin.window.core.audio.capture.reset_audio_level()
         self.is_recording = False
@@ -143,7 +183,7 @@ class Simple:
                 return
             if self.plugin.window.core.audio.capture.has_frames():
-                if not self.plugin.window.core.audio.capture.has_min_frames():
+                if not self.plugin.window.core.audio.capture.has_min_frames() and not realtime:
                     self.plugin.window.update_status(trans("status.audio.too_short"))
                     self.plugin.window.dispatch(AppEvent(AppEvent.VOICE_CONTROL_STOPPED))  # app event
                     return
@@ -152,6 +192,15 @@ class Simple:
         else:
             self.plugin.window.update_status("")
+    def force_stop(self):
+        """Stop recording"""
+        self.is_recording = False
+        self.plugin.window.dispatch(AppEvent(AppEvent.INPUT_VOICE_LISTEN_STOPPED))  # app event
+        self.switch_btn_start()  # switch button to start
+        if self.plugin.window.core.audio.capture.has_source():
+            self.plugin.window.core.audio.capture.stop()  # stop recording
+            return
     def on_stop(self):
         """Handle auto-transcribe"""
         path = os.path.join(self.plugin.window.core.config.path, self.plugin.input_file)

pygpt_net/plugin/cmd_files/worker.py CHANGED Viewed

@@ -920,6 +920,9 @@ class Worker(BaseWorker):
         :param context: context data
         :return: extra data
         """
+        # disabled in v2.6.31
+        # reason: do not duplicate context in chat
+        return {}
         cmd = item["cmd"]
         extra = {
             'plugin': "cmd_files",

pygpt-net 2.6.30__py3-none-any.whl → 2.6.32__py3-none-any.whl

pygpt-net 2.6.30py3-none-any.whl → 2.6.32py3-none-any.whl