pygpt-net 2.6.29__py3-none-any.whl → 2.6.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygpt_net/CHANGELOG.txt +15 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/app.py +4 -0
- pygpt_net/{container.py → app_core.py} +5 -6
- pygpt_net/controller/__init__.py +5 -2
- pygpt_net/controller/access/control.py +1 -9
- pygpt_net/controller/assistant/assistant.py +4 -4
- pygpt_net/controller/assistant/batch.py +7 -7
- pygpt_net/controller/assistant/files.py +4 -4
- pygpt_net/controller/assistant/threads.py +3 -3
- pygpt_net/controller/attachment/attachment.py +4 -7
- pygpt_net/controller/audio/audio.py +25 -1
- pygpt_net/controller/audio/ui.py +2 -2
- pygpt_net/controller/chat/audio.py +1 -8
- pygpt_net/controller/chat/common.py +30 -4
- pygpt_net/controller/chat/handler/stream_worker.py +1124 -0
- pygpt_net/controller/chat/output.py +8 -3
- pygpt_net/controller/chat/stream.py +4 -405
- pygpt_net/controller/chat/text.py +3 -2
- pygpt_net/controller/chat/vision.py +11 -19
- pygpt_net/controller/config/placeholder.py +1 -1
- pygpt_net/controller/ctx/ctx.py +1 -1
- pygpt_net/controller/ctx/summarizer.py +1 -1
- pygpt_net/controller/kernel/kernel.py +11 -3
- pygpt_net/controller/kernel/reply.py +5 -1
- pygpt_net/controller/mode/mode.py +21 -12
- pygpt_net/controller/plugins/settings.py +3 -2
- pygpt_net/controller/presets/editor.py +112 -99
- pygpt_net/controller/realtime/__init__.py +12 -0
- pygpt_net/controller/realtime/manager.py +53 -0
- pygpt_net/controller/realtime/realtime.py +268 -0
- pygpt_net/controller/theme/theme.py +3 -2
- pygpt_net/controller/ui/mode.py +7 -0
- pygpt_net/controller/ui/ui.py +19 -1
- pygpt_net/controller/ui/vision.py +4 -4
- pygpt_net/core/agents/legacy.py +2 -2
- pygpt_net/core/agents/runners/openai_workflow.py +2 -2
- pygpt_net/core/assistants/files.py +5 -5
- pygpt_net/core/assistants/store.py +4 -4
- pygpt_net/core/audio/audio.py +6 -1
- pygpt_net/core/audio/backend/native/__init__.py +12 -0
- pygpt_net/core/audio/backend/{native.py → native/native.py} +426 -127
- pygpt_net/core/audio/backend/native/player.py +139 -0
- pygpt_net/core/audio/backend/native/realtime.py +250 -0
- pygpt_net/core/audio/backend/pyaudio/__init__.py +12 -0
- pygpt_net/core/audio/backend/pyaudio/playback.py +194 -0
- pygpt_net/core/audio/backend/pyaudio/pyaudio.py +923 -0
- pygpt_net/core/audio/backend/pyaudio/realtime.py +275 -0
- pygpt_net/core/audio/backend/pygame/__init__.py +12 -0
- pygpt_net/core/audio/backend/{pygame.py → pygame/pygame.py} +130 -19
- pygpt_net/core/audio/backend/shared/__init__.py +38 -0
- pygpt_net/core/audio/backend/shared/conversions.py +211 -0
- pygpt_net/core/audio/backend/shared/envelope.py +38 -0
- pygpt_net/core/audio/backend/shared/player.py +137 -0
- pygpt_net/core/audio/backend/shared/rt.py +52 -0
- pygpt_net/core/audio/capture.py +5 -0
- pygpt_net/core/audio/output.py +13 -2
- pygpt_net/core/audio/whisper.py +6 -2
- pygpt_net/core/bridge/bridge.py +4 -3
- pygpt_net/core/bridge/worker.py +31 -9
- pygpt_net/core/debug/console/console.py +2 -2
- pygpt_net/core/debug/presets.py +2 -2
- pygpt_net/core/dispatcher/dispatcher.py +37 -1
- pygpt_net/core/events/__init__.py +2 -1
- pygpt_net/core/events/realtime.py +55 -0
- pygpt_net/core/experts/experts.py +2 -2
- pygpt_net/core/image/image.py +51 -1
- pygpt_net/core/modes/modes.py +2 -2
- pygpt_net/core/presets/presets.py +3 -3
- pygpt_net/core/realtime/options.py +87 -0
- pygpt_net/core/realtime/shared/__init__.py +0 -0
- pygpt_net/core/realtime/shared/audio.py +213 -0
- pygpt_net/core/realtime/shared/loop.py +64 -0
- pygpt_net/core/realtime/shared/session.py +59 -0
- pygpt_net/core/realtime/shared/text.py +37 -0
- pygpt_net/core/realtime/shared/tools.py +276 -0
- pygpt_net/core/realtime/shared/turn.py +38 -0
- pygpt_net/core/realtime/shared/types.py +16 -0
- pygpt_net/core/realtime/worker.py +164 -0
- pygpt_net/core/tokens/tokens.py +4 -4
- pygpt_net/core/types/__init__.py +1 -0
- pygpt_net/core/types/image.py +48 -0
- pygpt_net/core/types/mode.py +5 -2
- pygpt_net/core/vision/analyzer.py +1 -1
- pygpt_net/data/config/config.json +13 -4
- pygpt_net/data/config/models.json +219 -101
- pygpt_net/data/config/modes.json +3 -9
- pygpt_net/data/config/settings.json +135 -27
- pygpt_net/data/config/settings_section.json +2 -2
- pygpt_net/data/locale/locale.de.ini +7 -7
- pygpt_net/data/locale/locale.en.ini +25 -12
- pygpt_net/data/locale/locale.es.ini +7 -7
- pygpt_net/data/locale/locale.fr.ini +7 -7
- pygpt_net/data/locale/locale.it.ini +7 -7
- pygpt_net/data/locale/locale.pl.ini +8 -8
- pygpt_net/data/locale/locale.uk.ini +7 -7
- pygpt_net/data/locale/locale.zh.ini +3 -3
- pygpt_net/data/locale/plugin.audio_input.en.ini +4 -0
- pygpt_net/data/locale/plugin.audio_output.en.ini +4 -0
- pygpt_net/item/model.py +23 -3
- pygpt_net/plugin/audio_input/plugin.py +37 -4
- pygpt_net/plugin/audio_input/simple.py +57 -8
- pygpt_net/plugin/cmd_files/worker.py +3 -0
- pygpt_net/plugin/openai_dalle/plugin.py +4 -4
- pygpt_net/plugin/openai_vision/plugin.py +12 -13
- pygpt_net/provider/agents/openai/agent.py +5 -5
- pygpt_net/provider/agents/openai/agent_b2b.py +5 -5
- pygpt_net/provider/agents/openai/agent_planner.py +5 -6
- pygpt_net/provider/agents/openai/agent_with_experts.py +5 -5
- pygpt_net/provider/agents/openai/agent_with_experts_feedback.py +4 -4
- pygpt_net/provider/agents/openai/agent_with_feedback.py +4 -4
- pygpt_net/provider/agents/openai/bot_researcher.py +2 -2
- pygpt_net/provider/agents/openai/bots/research_bot/agents/planner_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/search_agent.py +1 -1
- pygpt_net/provider/agents/openai/bots/research_bot/agents/writer_agent.py +1 -1
- pygpt_net/provider/agents/openai/evolve.py +5 -5
- pygpt_net/provider/agents/openai/supervisor.py +4 -4
- pygpt_net/provider/api/__init__.py +27 -0
- pygpt_net/provider/api/anthropic/__init__.py +68 -0
- pygpt_net/provider/api/google/__init__.py +295 -0
- pygpt_net/provider/api/google/audio.py +121 -0
- pygpt_net/provider/api/google/chat.py +591 -0
- pygpt_net/provider/api/google/image.py +427 -0
- pygpt_net/provider/api/google/realtime/__init__.py +12 -0
- pygpt_net/provider/api/google/realtime/client.py +1945 -0
- pygpt_net/provider/api/google/realtime/realtime.py +186 -0
- pygpt_net/provider/api/google/tools.py +222 -0
- pygpt_net/provider/api/google/vision.py +129 -0
- pygpt_net/provider/{gpt → api/openai}/__init__.py +24 -4
- pygpt_net/provider/api/openai/agents/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/agents/computer.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/experts.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/agents/response.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/assistants.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/chat.py +15 -8
- pygpt_net/provider/{gpt → api/openai}/completion.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/image.py +1 -1
- pygpt_net/provider/api/openai/realtime/__init__.py +12 -0
- pygpt_net/provider/api/openai/realtime/client.py +1828 -0
- pygpt_net/provider/api/openai/realtime/realtime.py +194 -0
- pygpt_net/provider/{gpt → api/openai}/remote_tools.py +1 -1
- pygpt_net/provider/{gpt → api/openai}/responses.py +34 -20
- pygpt_net/provider/{gpt → api/openai}/store.py +2 -2
- pygpt_net/provider/{gpt → api/openai}/vision.py +1 -1
- pygpt_net/provider/api/openai/worker/__init__.py +0 -0
- pygpt_net/provider/{gpt → api/openai}/worker/assistants.py +4 -4
- pygpt_net/provider/{gpt → api/openai}/worker/importer.py +10 -10
- pygpt_net/provider/audio_input/google_genai.py +103 -0
- pygpt_net/provider/audio_input/openai_whisper.py +1 -1
- pygpt_net/provider/audio_output/google_genai_tts.py +229 -0
- pygpt_net/provider/audio_output/openai_tts.py +9 -6
- pygpt_net/provider/core/config/patch.py +26 -0
- pygpt_net/provider/core/model/patch.py +20 -0
- pygpt_net/provider/core/preset/json_file.py +2 -4
- pygpt_net/provider/llms/anthropic.py +2 -5
- pygpt_net/provider/llms/base.py +4 -3
- pygpt_net/provider/llms/google.py +8 -9
- pygpt_net/provider/llms/openai.py +1 -1
- pygpt_net/provider/loaders/hub/image_vision/base.py +1 -1
- pygpt_net/ui/dialog/preset.py +71 -55
- pygpt_net/ui/layout/toolbox/footer.py +16 -0
- pygpt_net/ui/layout/toolbox/image.py +5 -0
- pygpt_net/ui/main.py +6 -4
- pygpt_net/ui/widget/option/combo.py +15 -1
- pygpt_net/utils.py +9 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/METADATA +55 -55
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/RECORD +181 -135
- pygpt_net/core/audio/backend/pyaudio.py +0 -554
- /pygpt_net/{provider/gpt/agents → controller/chat/handler}/__init__.py +0 -0
- /pygpt_net/{provider/gpt/worker → core/realtime}/__init__.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/client.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/remote_tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/agents/utils.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/audio.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/computer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/container.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/summarizer.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/tools.py +0 -0
- /pygpt_net/provider/{gpt → api/openai}/utils.py +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/LICENSE +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/WHEEL +0 -0
- {pygpt_net-2.6.29.dist-info → pygpt_net-2.6.31.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ================================================== #
|
|
4
|
+
# This file is a part of PYGPT package #
|
|
5
|
+
# Website: https://pygpt.net #
|
|
6
|
+
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
|
+
# MIT License #
|
|
8
|
+
# Created By : Marcin Szczygliński #
|
|
9
|
+
# Updated Date: 2025.08.29 18:00:00 #
|
|
10
|
+
# ================================================== #
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import wave
|
|
14
|
+
import base64
|
|
15
|
+
|
|
16
|
+
from .base import BaseProvider
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GoogleGenAITextToSpeech(BaseProvider):
|
|
20
|
+
def __init__(self, *args, **kwargs):
|
|
21
|
+
"""
|
|
22
|
+
Google GenAI Text-to-Speech provider (Gemini TTS via API).
|
|
23
|
+
|
|
24
|
+
:param args: args
|
|
25
|
+
:param kwargs: kwargs
|
|
26
|
+
"""
|
|
27
|
+
super(GoogleGenAITextToSpeech, self).__init__(*args, **kwargs)
|
|
28
|
+
self.plugin = kwargs.get("plugin")
|
|
29
|
+
self.id = "google_genai_tts"
|
|
30
|
+
self.name = "Google GenAI TTS"
|
|
31
|
+
|
|
32
|
+
# Supported preview TTS models (fallback to flash if invalid)
|
|
33
|
+
self.allowed_models = [
|
|
34
|
+
"gemini-2.5-flash-preview-tts",
|
|
35
|
+
"gemini-2.5-pro-preview-tts",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Prebuilt voice names exposed by Gemini TTS
|
|
39
|
+
# Keep list in sync with official docs; fallback to "Puck" if invalid.
|
|
40
|
+
self.allowed_voices = [
|
|
41
|
+
"Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus",
|
|
42
|
+
"Aoede", "Callirrhoe", "Autonoe", "Enceladus", "Iapetus",
|
|
43
|
+
"Umbriel", "Algieba", "Despina", "Erinome", "Algenib",
|
|
44
|
+
"Rasalgethi", "Laomedeia", "Achernar", "Alnilam", "Schedar",
|
|
45
|
+
"Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi",
|
|
46
|
+
"Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
def init_options(self):
|
|
50
|
+
"""Initialize options"""
|
|
51
|
+
# Keep option names consistent with the app style; simple text fields are enough.
|
|
52
|
+
self.plugin.add_option(
|
|
53
|
+
"google_genai_tts_model",
|
|
54
|
+
type="text",
|
|
55
|
+
value="gemini-2.5-flash-preview-tts",
|
|
56
|
+
label="Model",
|
|
57
|
+
tab="google_genai_tts",
|
|
58
|
+
description="Specify Gemini TTS model, e.g.: gemini-2.5-flash-preview-tts or gemini-2.5-pro-preview-tts",
|
|
59
|
+
)
|
|
60
|
+
self.plugin.add_option(
|
|
61
|
+
"google_genai_tts_voice",
|
|
62
|
+
type="text",
|
|
63
|
+
value="Kore",
|
|
64
|
+
label="Voice",
|
|
65
|
+
tab="google_genai_tts",
|
|
66
|
+
description="Specify voice, e.g.: Puck, Kore, Charon, Leda, Zephyr... (case-sensitive)",
|
|
67
|
+
urls={
|
|
68
|
+
"Voices": "https://ai.google.dev/gemini-api/docs/speech-generation"
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def speech(self, text: str) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Text to speech synthesis using Google GenAI (Gemini TTS).
|
|
75
|
+
|
|
76
|
+
:param text: text to synthesize
|
|
77
|
+
:return: path to generated audio file
|
|
78
|
+
"""
|
|
79
|
+
# Get pre-configured GenAI client
|
|
80
|
+
client = self.plugin.window.core.api.google.get_client()
|
|
81
|
+
|
|
82
|
+
# Resolve path where audio should be written
|
|
83
|
+
output_file = self.plugin.output_file
|
|
84
|
+
path = os.path.join(self.plugin.window.core.config.path, output_file)
|
|
85
|
+
|
|
86
|
+
# Validate/select model
|
|
87
|
+
model = self.plugin.get_option_value("google_genai_tts_model") or "gemini-2.5-flash-preview-tts"
|
|
88
|
+
model = self._normalize_model_name(model)
|
|
89
|
+
if model not in self.allowed_models:
|
|
90
|
+
model = "gemini-2.5-flash-preview-tts"
|
|
91
|
+
|
|
92
|
+
# Validate/select voice
|
|
93
|
+
voice = self.plugin.get_option_value("google_genai_tts_voice") or "Kore"
|
|
94
|
+
# if voice not in self.allowed_voices:
|
|
95
|
+
# voice = "Kore"
|
|
96
|
+
|
|
97
|
+
# Build generation config for audio modality + voice
|
|
98
|
+
# Using explicit types for clarity and forward-compatibility
|
|
99
|
+
try:
|
|
100
|
+
from google.genai import types
|
|
101
|
+
except Exception as ex:
|
|
102
|
+
# Fail fast if SDK is missing or incompatible
|
|
103
|
+
raise RuntimeError("google.genai SDK is not available. Please install/update Google GenAI SDK.") from ex
|
|
104
|
+
|
|
105
|
+
gen_config = types.GenerateContentConfig(
|
|
106
|
+
response_modalities=["AUDIO"],
|
|
107
|
+
speech_config=types.SpeechConfig(
|
|
108
|
+
voice_config=types.VoiceConfig(
|
|
109
|
+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
|
110
|
+
voice_name=voice
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
),
|
|
114
|
+
temperature=0.8, # balanced default; keep configurable later if needed
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Perform TTS request
|
|
118
|
+
response = client.models.generate_content(
|
|
119
|
+
model=model,
|
|
120
|
+
contents=text,
|
|
121
|
+
config=gen_config,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Extract PCM bytes from the first candidate/part
|
|
125
|
+
pcm = self._extract_pcm_bytes(response)
|
|
126
|
+
|
|
127
|
+
# Persist as standard WAV (PCM 16-bit, mono, 24 kHz)
|
|
128
|
+
self._save_wav(path, pcm, channels=1, rate=24000, sample_width=2)
|
|
129
|
+
|
|
130
|
+
return str(path)
|
|
131
|
+
|
|
132
|
+
def _extract_pcm_bytes(self, response) -> bytes:
|
|
133
|
+
"""
|
|
134
|
+
Extract PCM bytes from generate_content response.
|
|
135
|
+
|
|
136
|
+
:param response: Google GenAI response object
|
|
137
|
+
:return: raw PCM byte data
|
|
138
|
+
"""
|
|
139
|
+
# Defensive extraction to support minor SDK variations
|
|
140
|
+
data = None
|
|
141
|
+
try:
|
|
142
|
+
cand = response.candidates[0]
|
|
143
|
+
part = cand.content.parts[0]
|
|
144
|
+
if getattr(part, "inline_data", None) and getattr(part.inline_data, "data", None):
|
|
145
|
+
data = part.inline_data.data
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
if data is None:
|
|
150
|
+
# Some SDK builds may return base64 str; try resolving alternative layout
|
|
151
|
+
try:
|
|
152
|
+
parts = getattr(response, "candidates", [])[0].content.parts
|
|
153
|
+
for p in parts:
|
|
154
|
+
if getattr(p, "inline_data", None) and getattr(p.inline_data, "data", None):
|
|
155
|
+
data = p.inline_data.data
|
|
156
|
+
break
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
if data is None:
|
|
161
|
+
raise RuntimeError("No audio data returned by Gemini TTS response.")
|
|
162
|
+
|
|
163
|
+
# Normalize to raw bytes
|
|
164
|
+
if isinstance(data, (bytes, bytearray)):
|
|
165
|
+
return bytes(data)
|
|
166
|
+
if isinstance(data, str):
|
|
167
|
+
# Fallback: treat as base64-encoded PCM
|
|
168
|
+
return base64.b64decode(data)
|
|
169
|
+
|
|
170
|
+
# Last resort: try bytes() cast
|
|
171
|
+
try:
|
|
172
|
+
return bytes(data)
|
|
173
|
+
except Exception as ex:
|
|
174
|
+
raise RuntimeError("Unsupported audio payload type returned by Gemini TTS.") from ex
|
|
175
|
+
|
|
176
|
+
def _save_wav(
|
|
177
|
+
self,
|
|
178
|
+
filename: str,
|
|
179
|
+
pcm_bytes: bytes,
|
|
180
|
+
channels: int = 1,
|
|
181
|
+
rate: int = 24000,
|
|
182
|
+
sample_width: int = 2
|
|
183
|
+
):
|
|
184
|
+
"""
|
|
185
|
+
Save raw PCM bytes to a WAV file.
|
|
186
|
+
|
|
187
|
+
:param filename: output WAV file path
|
|
188
|
+
:param pcm_bytes: raw PCM byte data
|
|
189
|
+
:param channels: number of audio channels (1=mono, 2=stereo)
|
|
190
|
+
:param rate: sample rate in Hz (e.g., 24000)
|
|
191
|
+
:param sample_width: sample width in bytes (e.g., 2 for 16-bit)
|
|
192
|
+
"""
|
|
193
|
+
# Ensure parent directory exists
|
|
194
|
+
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
|
195
|
+
|
|
196
|
+
# Write PCM payload as WAV
|
|
197
|
+
with wave.open(filename, "wb") as wf:
|
|
198
|
+
wf.setnchannels(channels)
|
|
199
|
+
wf.setsampwidth(sample_width) # bytes per sample (2 -> 16-bit)
|
|
200
|
+
wf.setframerate(rate)
|
|
201
|
+
wf.writeframes(pcm_bytes)
|
|
202
|
+
|
|
203
|
+
def _normalize_model_name(self, model: str) -> str:
|
|
204
|
+
"""
|
|
205
|
+
Normalize model id (strip optional 'models/' prefix).
|
|
206
|
+
|
|
207
|
+
:param model: model id
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
return model.split("/")[-1]
|
|
211
|
+
except Exception:
|
|
212
|
+
return model
|
|
213
|
+
|
|
214
|
+
def is_configured(self) -> bool:
|
|
215
|
+
"""
|
|
216
|
+
Check if provider is configured
|
|
217
|
+
|
|
218
|
+
:return: True if configured, False otherwise
|
|
219
|
+
"""
|
|
220
|
+
api_key = self.plugin.window.core.config.get("api_key_google")
|
|
221
|
+
return api_key is not None and api_key != ""
|
|
222
|
+
|
|
223
|
+
def get_config_message(self) -> str:
|
|
224
|
+
"""
|
|
225
|
+
Return message to display when provider is not configured
|
|
226
|
+
|
|
227
|
+
:return: message
|
|
228
|
+
"""
|
|
229
|
+
return "Google GenAI API key is not set yet. Please configure it in settings."
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.29 18:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -51,6 +51,9 @@ class OpenAITextToSpeech(BaseProvider):
|
|
|
51
51
|
use="audio_tts_whisper_voices",
|
|
52
52
|
description="Specify voice, available voices: "
|
|
53
53
|
"alloy, echo, fable, onyx, nova, shimmer",
|
|
54
|
+
urls={
|
|
55
|
+
"Voices": "https://platform.openai.com/docs/guides/text-to-speech/voice-options"
|
|
56
|
+
},
|
|
54
57
|
)
|
|
55
58
|
|
|
56
59
|
def speech(self, text: str) -> str:
|
|
@@ -60,15 +63,15 @@ class OpenAITextToSpeech(BaseProvider):
|
|
|
60
63
|
:param text: text to speech
|
|
61
64
|
:return: path to generated audio file or None if audio playback is handled here
|
|
62
65
|
"""
|
|
63
|
-
client = self.plugin.window.core.
|
|
66
|
+
client = self.plugin.window.core.api.openai.get_client()
|
|
64
67
|
output_file = self.plugin.output_file
|
|
65
68
|
voice = self.plugin.get_option_value('openai_voice')
|
|
66
69
|
model = self.plugin.get_option_value('openai_model')
|
|
67
70
|
allowed_voices = self.plugin.window.core.audio.whisper.get_voices()
|
|
68
|
-
if model not in self.allowed_models:
|
|
69
|
-
model = 'tts-1'
|
|
70
|
-
if voice not in allowed_voices:
|
|
71
|
-
voice = 'alloy'
|
|
71
|
+
# if model not in self.allowed_models:
|
|
72
|
+
# model = 'tts-1'
|
|
73
|
+
# if voice not in allowed_voices:
|
|
74
|
+
# voice = 'alloy'
|
|
72
75
|
path = os.path.join(
|
|
73
76
|
self.plugin.window.core.config.path,
|
|
74
77
|
output_file,
|
|
@@ -2355,6 +2355,32 @@ class Patch:
|
|
|
2355
2355
|
self.window.core.updater.patch_css('web-chatgpt_wide.light.css', True)
|
|
2356
2356
|
updated = True
|
|
2357
2357
|
|
|
2358
|
+
# < 2.6.30
|
|
2359
|
+
if old < parse_version("2.6.30"):
|
|
2360
|
+
print("Migrating config from < 2.6.30...")
|
|
2361
|
+
if "api_native_google" not in data:
|
|
2362
|
+
data["api_native_google"] = True
|
|
2363
|
+
if "remote_tools.google.web_search" not in data:
|
|
2364
|
+
data["remote_tools.google.web_search"] = True
|
|
2365
|
+
if "remote_tools.google.code_interpreter" not in data:
|
|
2366
|
+
data["remote_tools.google.code_interpreter"] = False
|
|
2367
|
+
updated = True
|
|
2368
|
+
|
|
2369
|
+
# < 2.6.31
|
|
2370
|
+
if old < parse_version("2.6.31"):
|
|
2371
|
+
print("Migrating config from < 2.6.31...")
|
|
2372
|
+
if "log.realtime" not in data:
|
|
2373
|
+
data["log.realtime"] = False
|
|
2374
|
+
if "remote_tools.google.url_ctx" not in data:
|
|
2375
|
+
data["remote_tools.google.url_ctx"] = False
|
|
2376
|
+
if "audio.input.auto_turn" not in data:
|
|
2377
|
+
data["audio.input.auto_turn"] = False
|
|
2378
|
+
if "audio.input.vad.prefix" not in data:
|
|
2379
|
+
data["audio.input.vad.prefix"] = 300
|
|
2380
|
+
if "audio.input.vad.silence" not in data:
|
|
2381
|
+
data["audio.input.vad.silence"] = 2000
|
|
2382
|
+
updated = True
|
|
2383
|
+
|
|
2358
2384
|
# update file
|
|
2359
2385
|
migrated = False
|
|
2360
2386
|
if updated:
|
|
@@ -763,6 +763,26 @@ class Patch:
|
|
|
763
763
|
model.mode.append(MODE_AGENT_OPENAI)
|
|
764
764
|
updated = True
|
|
765
765
|
|
|
766
|
+
# < 2.6.30 <--- add Google Imagen models
|
|
767
|
+
if old < parse_version("2.6.30"):
|
|
768
|
+
print("Migrating models from < 2.6.30...")
|
|
769
|
+
if "imagen-3.0-generate-002" not in data:
|
|
770
|
+
data["imagen-3.0-generate-002"] = base_data["imagen-3.0-generate-002"]
|
|
771
|
+
if "imagen-4.0-generate-001" not in data:
|
|
772
|
+
data["imagen-4.0-generate-001"] = base_data["imagen-4.0-generate-001"]
|
|
773
|
+
updated = True
|
|
774
|
+
|
|
775
|
+
# < 2.6.31 <--- add realtime models
|
|
776
|
+
if old < parse_version("2.6.31"):
|
|
777
|
+
print("Migrating models from < 2.6.31...")
|
|
778
|
+
if "gemini-2.5-flash-preview-native-audio-dialog" not in data:
|
|
779
|
+
data["gemini-2.5-flash-preview-native-audio-dialog"] = base_data["gemini-2.5-flash-preview-native-audio-dialog"]
|
|
780
|
+
if "gpt-realtime" not in data:
|
|
781
|
+
data["gpt-realtime"] = base_data["gpt-realtime"]
|
|
782
|
+
if "gpt-4o-realtime-preview" not in data:
|
|
783
|
+
data["gpt-4o-realtime-preview"] = base_data["gpt-4o-realtime-preview"]
|
|
784
|
+
updated = True
|
|
785
|
+
|
|
766
786
|
# update file
|
|
767
787
|
if updated:
|
|
768
788
|
data = dict(sorted(data.items()))
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.28 09:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import json
|
|
@@ -192,7 +192,7 @@ class JsonFileProvider(BaseProvider):
|
|
|
192
192
|
MODE_CHAT: item.chat,
|
|
193
193
|
MODE_COMPLETION: item.completion,
|
|
194
194
|
MODE_IMAGE: item.img,
|
|
195
|
-
MODE_VISION: item.vision,
|
|
195
|
+
# MODE_VISION: item.vision,
|
|
196
196
|
# MODE_LANGCHAIN: item.langchain,
|
|
197
197
|
MODE_ASSISTANT: item.assistant,
|
|
198
198
|
MODE_LLAMA_INDEX: item.llama_index,
|
|
@@ -250,8 +250,6 @@ class JsonFileProvider(BaseProvider):
|
|
|
250
250
|
item.llama_index = data[MODE_LLAMA_INDEX]
|
|
251
251
|
if MODE_RESEARCH in data:
|
|
252
252
|
item.research = data[MODE_RESEARCH]
|
|
253
|
-
if MODE_VISION in data:
|
|
254
|
-
item.vision = data[MODE_VISION]
|
|
255
253
|
|
|
256
254
|
if 'agent_provider' in data:
|
|
257
255
|
item.agent_provider = data['agent_provider']
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.28 09:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import List, Dict, Optional
|
|
@@ -93,10 +93,7 @@ class AnthropicLLM(BaseLLM):
|
|
|
93
93
|
:param window: window instance
|
|
94
94
|
:return: list of models
|
|
95
95
|
"""
|
|
96
|
-
|
|
97
|
-
client = anthropic.Anthropic(
|
|
98
|
-
api_key=window.core.config.get('api_key_anthropic', "")
|
|
99
|
-
)
|
|
96
|
+
client = window.core.api.anthropic.get_client()
|
|
100
97
|
models_list = client.models.list()
|
|
101
98
|
items = []
|
|
102
99
|
if models_list.data:
|
pygpt_net/provider/llms/base.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.28 09:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -18,7 +18,8 @@ from llama_index.core.multi_modal_llms import MultiModalLLM as LlamaMultiModalLL
|
|
|
18
18
|
|
|
19
19
|
from pygpt_net.core.types import (
|
|
20
20
|
MODE_LANGCHAIN,
|
|
21
|
-
MODE_LLAMA_INDEX,
|
|
21
|
+
MODE_LLAMA_INDEX,
|
|
22
|
+
MODE_CHAT,
|
|
22
23
|
)
|
|
23
24
|
from pygpt_net.item.model import ModelItem
|
|
24
25
|
from pygpt_net.utils import parse_args
|
|
@@ -221,7 +222,7 @@ class BaseLLM:
|
|
|
221
222
|
"""
|
|
222
223
|
model = ModelItem()
|
|
223
224
|
model.provider = self.id
|
|
224
|
-
return window.core.
|
|
225
|
+
return window.core.api.openai.get_client(
|
|
225
226
|
mode=MODE_CHAT,
|
|
226
227
|
model=model,
|
|
227
228
|
)
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
|
7
7
|
# MIT License #
|
|
8
8
|
# Created By : Marcin Szczygliński #
|
|
9
|
-
# Updated Date: 2025.08.
|
|
9
|
+
# Updated Date: 2025.08.30 06:00:00 #
|
|
10
10
|
# ================================================== #
|
|
11
11
|
|
|
12
12
|
from typing import Optional, List, Dict
|
|
@@ -92,13 +92,12 @@ class GoogleLLM(BaseLLM):
|
|
|
92
92
|
:return: list of models
|
|
93
93
|
"""
|
|
94
94
|
items = []
|
|
95
|
-
client =
|
|
95
|
+
client = window.core.api.google.get_client()
|
|
96
96
|
models_list = client.models.list()
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
})
|
|
97
|
+
for item in models_list:
|
|
98
|
+
id = item.name.replace("models/", "")
|
|
99
|
+
items.append({
|
|
100
|
+
"id": id,
|
|
101
|
+
"name": id, # TODO: token limit get from API
|
|
102
|
+
})
|
|
104
103
|
return items
|
|
@@ -99,7 +99,7 @@ class OpenAILLM(BaseLLM):
|
|
|
99
99
|
|
|
100
100
|
if window.core.config.get('api_use_responses_llama', False):
|
|
101
101
|
tools = []
|
|
102
|
-
tools = window.core.
|
|
102
|
+
tools = window.core.api.openai.remote_tools.append_to_tools(
|
|
103
103
|
mode=MODE_LLAMA_INDEX,
|
|
104
104
|
model=model,
|
|
105
105
|
stream=stream,
|
|
@@ -145,7 +145,7 @@ class ImageVisionLLMReader(BaseReader):
|
|
|
145
145
|
image = image.convert("RGB")
|
|
146
146
|
image_str = img_2_b64(image)
|
|
147
147
|
|
|
148
|
-
client = self._window.core.
|
|
148
|
+
client = self._window.core.api.openai.get_client()
|
|
149
149
|
encoded = self._encode_image(str(file))
|
|
150
150
|
content = [
|
|
151
151
|
{
|