msgmodel 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msgmodel/__init__.py +81 -0
- msgmodel/__main__.py +224 -0
- msgmodel/config.py +159 -0
- msgmodel/core.py +506 -0
- msgmodel/exceptions.py +93 -0
- msgmodel/providers/__init__.py +11 -0
- msgmodel/providers/gemini.py +325 -0
- msgmodel/providers/openai.py +350 -0
- msgmodel/py.typed +0 -0
- msgmodel/security.py +165 -0
- msgmodel-3.2.1.dist-info/METADATA +416 -0
- msgmodel-3.2.1.dist-info/RECORD +16 -0
- msgmodel-3.2.1.dist-info/WHEEL +5 -0
- msgmodel-3.2.1.dist-info/entry_points.txt +2 -0
- msgmodel-3.2.1.dist-info/licenses/LICENSE +21 -0
- msgmodel-3.2.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""
|
|
2
|
+
msgmodel.providers.gemini
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
|
+
|
|
5
|
+
Google Gemini API provider implementation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import base64
|
|
10
|
+
import logging
|
|
11
|
+
import mimetypes as mime_module
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional, Dict, Any, List, Iterator, Callable
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
from ..config import GeminiConfig, GEMINI_URL
|
|
19
|
+
from ..exceptions import APIError, StreamingError, ConfigurationError
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
MIME_TYPE_JSON = "application/json"
|
|
24
|
+
MIME_TYPE_OCTET_STREAM = "application/octet-stream"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GeminiProvider:
|
|
28
|
+
"""
|
|
29
|
+
Google Gemini API provider for making LLM requests.
|
|
30
|
+
|
|
31
|
+
Handles API calls and response parsing for Gemini models.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, api_key: str, config: Optional[GeminiConfig] = None):
|
|
35
|
+
"""
|
|
36
|
+
Initialize the Gemini provider.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
api_key: Google API key for Gemini
|
|
40
|
+
config: Optional configuration (uses defaults if not provided)
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ConfigurationError: If billing verification fails or paid tier is not active
|
|
44
|
+
"""
|
|
45
|
+
self.api_key = api_key
|
|
46
|
+
self.config = config or GeminiConfig()
|
|
47
|
+
self._billing_verified = False
|
|
48
|
+
|
|
49
|
+
# Verify paid API access on initialization
|
|
50
|
+
self._verify_paid_api_access()
|
|
51
|
+
|
|
52
|
+
def _verify_paid_api_access(self) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Verify that the API key has access to paid Gemini services.
|
|
55
|
+
|
|
56
|
+
Makes a minimal test request to confirm paid quota is active.
|
|
57
|
+
Raises ConfigurationError if billing verification fails.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ConfigurationError: If API access indicates unpaid tier or no billing
|
|
61
|
+
APIError: If the verification request fails for other reasons
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
# Make a minimal test request to verify paid access
|
|
65
|
+
test_payload = {
|
|
66
|
+
"contents": [{"parts": [{"text": "[BILLING VERIFICATION]"}]}],
|
|
67
|
+
"generationConfig": {"maxOutputTokens": 10}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
url = (
|
|
71
|
+
f"{GEMINI_URL}/{self.config.api_version}/models/"
|
|
72
|
+
f"{self.config.model}:generateContent?key={self.api_key}"
|
|
73
|
+
)
|
|
74
|
+
headers = {"Content-Type": MIME_TYPE_JSON}
|
|
75
|
+
|
|
76
|
+
response = requests.post(url, headers=headers, data=json.dumps(test_payload), timeout=5)
|
|
77
|
+
|
|
78
|
+
# Check for specific error indicators of unpaid tier
|
|
79
|
+
if response.status_code == 429:
|
|
80
|
+
raise ConfigurationError(
|
|
81
|
+
"BILLING VERIFICATION FAILED: Rate limit exceeded.\n"
|
|
82
|
+
"This may indicate unpaid quota. Ensure your Google Cloud project has:\n"
|
|
83
|
+
" 1. Cloud Billing account linked\n"
|
|
84
|
+
" 2. PAID API quota enabled (not free tier)\n"
|
|
85
|
+
" 3. Sufficient billing credits\n"
|
|
86
|
+
"See: https://console.cloud.google.com/billing"
|
|
87
|
+
)
|
|
88
|
+
elif response.status_code == 403:
|
|
89
|
+
raise ConfigurationError(
|
|
90
|
+
"BILLING VERIFICATION FAILED: Access denied (403).\n"
|
|
91
|
+
"Ensure your API key has paid quota access:\n"
|
|
92
|
+
" 1. Verify API key is valid\n"
|
|
93
|
+
" 2. Confirm Cloud Billing is enabled\n"
|
|
94
|
+
" 3. Check that paid quota is active (not free tier)\n"
|
|
95
|
+
"See: https://console.cloud.google.com/billing"
|
|
96
|
+
)
|
|
97
|
+
elif not response.ok:
|
|
98
|
+
error_msg = response.text
|
|
99
|
+
raise APIError(
|
|
100
|
+
f"Billing verification failed with status {response.status_code}: {error_msg}",
|
|
101
|
+
status_code=response.status_code,
|
|
102
|
+
response_text=error_msg
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
self._billing_verified = True
|
|
106
|
+
logger.info("✓ Gemini paid API access verified. Data retention enforced to abuse monitoring only.")
|
|
107
|
+
|
|
108
|
+
except requests.RequestException as e:
|
|
109
|
+
raise APIError(
|
|
110
|
+
f"Billing verification request failed: {e}. "
|
|
111
|
+
f"Ensure your Google Cloud project has paid API quota active."
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _build_url(self, stream: bool = False) -> str:
|
|
115
|
+
"""Build the API endpoint URL."""
|
|
116
|
+
action = "streamGenerateContent" if stream else "generateContent"
|
|
117
|
+
url = (
|
|
118
|
+
f"{GEMINI_URL}/{self.config.api_version}/models/"
|
|
119
|
+
f"{self.config.model}:{action}?key={self.api_key}"
|
|
120
|
+
)
|
|
121
|
+
if stream:
|
|
122
|
+
url += "&alt=sse"
|
|
123
|
+
return url
|
|
124
|
+
|
|
125
|
+
def _build_payload(
|
|
126
|
+
self,
|
|
127
|
+
prompt: str,
|
|
128
|
+
system_instruction: Optional[str] = None,
|
|
129
|
+
file_data: Optional[Dict[str, Any]] = None
|
|
130
|
+
) -> Dict[str, Any]:
|
|
131
|
+
"""Build the API request payload."""
|
|
132
|
+
parts: List[Dict[str, Any]] = [{"text": prompt}]
|
|
133
|
+
|
|
134
|
+
if file_data:
|
|
135
|
+
filtered_data = {
|
|
136
|
+
"mime_type": file_data["mime_type"],
|
|
137
|
+
"data": file_data["data"]
|
|
138
|
+
}
|
|
139
|
+
parts.append({"inline_data": filtered_data})
|
|
140
|
+
|
|
141
|
+
payload: Dict[str, Any] = {
|
|
142
|
+
"contents": [{"parts": parts}],
|
|
143
|
+
"generationConfig": {
|
|
144
|
+
"maxOutputTokens": self.config.max_tokens,
|
|
145
|
+
"temperature": self.config.temperature,
|
|
146
|
+
"topP": self.config.top_p,
|
|
147
|
+
"topK": self.config.top_k,
|
|
148
|
+
"candidateCount": self.config.candidate_count
|
|
149
|
+
},
|
|
150
|
+
"safetySettings": [
|
|
151
|
+
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": self.config.safety_threshold},
|
|
152
|
+
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": self.config.safety_threshold},
|
|
153
|
+
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": self.config.safety_threshold},
|
|
154
|
+
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": self.config.safety_threshold}
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if system_instruction:
|
|
159
|
+
payload["systemInstruction"] = {"parts": [{"text": system_instruction}]}
|
|
160
|
+
|
|
161
|
+
return payload
|
|
162
|
+
|
|
163
|
+
def query(
|
|
164
|
+
self,
|
|
165
|
+
prompt: str,
|
|
166
|
+
system_instruction: Optional[str] = None,
|
|
167
|
+
file_data: Optional[Dict[str, Any]] = None
|
|
168
|
+
) -> Dict[str, Any]:
|
|
169
|
+
"""
|
|
170
|
+
Make a non-streaming API call to Gemini.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
prompt: The user prompt
|
|
174
|
+
system_instruction: Optional system instruction
|
|
175
|
+
file_data: Optional file data dict
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
The API response as a dictionary
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
APIError: If the API call fails
|
|
182
|
+
"""
|
|
183
|
+
url = self._build_url()
|
|
184
|
+
payload = self._build_payload(prompt, system_instruction, file_data)
|
|
185
|
+
headers = {"Content-Type": MIME_TYPE_JSON}
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
|
189
|
+
except requests.RequestException as e:
|
|
190
|
+
raise APIError(f"Request failed: {e}")
|
|
191
|
+
|
|
192
|
+
if not response.ok:
|
|
193
|
+
raise APIError(
|
|
194
|
+
f"Gemini API error: {response.text}",
|
|
195
|
+
status_code=response.status_code,
|
|
196
|
+
response_text=response.text
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return response.json()
|
|
200
|
+
|
|
201
|
+
def stream(
|
|
202
|
+
self,
|
|
203
|
+
prompt: str,
|
|
204
|
+
system_instruction: Optional[str] = None,
|
|
205
|
+
file_data: Optional[Dict[str, Any]] = None,
|
|
206
|
+
timeout: float = 300,
|
|
207
|
+
on_chunk: Optional[Callable[[str], bool]] = None
|
|
208
|
+
) -> Iterator[str]:
|
|
209
|
+
"""
|
|
210
|
+
Make a streaming API call to Gemini.
|
|
211
|
+
|
|
212
|
+
v3.2.1 Enhancement: Adds timeout support and optional abort callback.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
prompt: The user prompt
|
|
216
|
+
system_instruction: Optional system instruction
|
|
217
|
+
file_data: Optional file data dict
|
|
218
|
+
timeout: Timeout in seconds for the streaming connection (default: 300s/5min)
|
|
219
|
+
on_chunk: Optional callback that receives each chunk. Return False to abort stream.
|
|
220
|
+
|
|
221
|
+
Yields:
|
|
222
|
+
Text chunks as they arrive
|
|
223
|
+
|
|
224
|
+
Raises:
|
|
225
|
+
APIError: If the API call fails
|
|
226
|
+
StreamingError: If streaming fails or timeout occurs
|
|
227
|
+
"""
|
|
228
|
+
url = self._build_url(stream=True)
|
|
229
|
+
payload = self._build_payload(prompt, system_instruction, file_data)
|
|
230
|
+
headers = {"Content-Type": MIME_TYPE_JSON}
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
response = requests.post(
|
|
234
|
+
url,
|
|
235
|
+
headers=headers,
|
|
236
|
+
data=json.dumps(payload),
|
|
237
|
+
stream=True,
|
|
238
|
+
timeout=timeout
|
|
239
|
+
)
|
|
240
|
+
except requests.Timeout:
|
|
241
|
+
raise StreamingError(f"Gemini streaming request timed out after {timeout} seconds")
|
|
242
|
+
except requests.RequestException as e:
|
|
243
|
+
raise APIError(f"Request failed: {e}")
|
|
244
|
+
|
|
245
|
+
if not response.ok:
|
|
246
|
+
raise APIError(
|
|
247
|
+
f"Gemini API error: {response.text}",
|
|
248
|
+
status_code=response.status_code,
|
|
249
|
+
response_text=response.text
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
try:
|
|
253
|
+
for line in response.iter_lines():
|
|
254
|
+
if line:
|
|
255
|
+
line_text = line.decode("utf-8")
|
|
256
|
+
if line_text.startswith("data: "):
|
|
257
|
+
data = line_text[6:]
|
|
258
|
+
try:
|
|
259
|
+
chunk = json.loads(data)
|
|
260
|
+
text = self.extract_text(chunk)
|
|
261
|
+
if text:
|
|
262
|
+
# v3.2.1: Support abort callback
|
|
263
|
+
if on_chunk is not None:
|
|
264
|
+
should_continue = on_chunk(text)
|
|
265
|
+
if should_continue is False:
|
|
266
|
+
return
|
|
267
|
+
yield text
|
|
268
|
+
except json.JSONDecodeError:
|
|
269
|
+
continue
|
|
270
|
+
except Exception as e:
|
|
271
|
+
raise StreamingError(f"Streaming interrupted: {e}")
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def extract_text(response: Dict[str, Any]) -> str:
|
|
275
|
+
"""
|
|
276
|
+
Extract text from a Gemini API response.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
response: The raw API response
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Extracted text content
|
|
283
|
+
"""
|
|
284
|
+
texts = []
|
|
285
|
+
for candidate in response.get("candidates", []):
|
|
286
|
+
content = candidate.get("content", {})
|
|
287
|
+
for part in content.get("parts", []):
|
|
288
|
+
if "text" in part:
|
|
289
|
+
texts.append(part["text"])
|
|
290
|
+
return "".join(texts)
|
|
291
|
+
|
|
292
|
+
@staticmethod
|
|
293
|
+
def extract_binary_outputs(response: Dict[str, Any], output_dir: str = ".") -> List[str]:
|
|
294
|
+
"""
|
|
295
|
+
Extract and save binary outputs from a Gemini response.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
response: The raw API response
|
|
299
|
+
output_dir: Directory to save output files
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
List of saved file paths
|
|
303
|
+
"""
|
|
304
|
+
saved_files = []
|
|
305
|
+
output_path = Path(output_dir)
|
|
306
|
+
|
|
307
|
+
for idx, candidate in enumerate(response.get("candidates", [])):
|
|
308
|
+
content = candidate.get("content", {})
|
|
309
|
+
for part_idx, part in enumerate(content.get("parts", [])):
|
|
310
|
+
if "inline_data" in part:
|
|
311
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
312
|
+
mime_type = part["inline_data"].get("mime_type", MIME_TYPE_OCTET_STREAM)
|
|
313
|
+
extension = mime_module.guess_extension(mime_type) or ".bin"
|
|
314
|
+
filename = f"output_{timestamp}_c{idx}_p{part_idx}{extension}"
|
|
315
|
+
|
|
316
|
+
filepath = output_path / filename
|
|
317
|
+
binary_data = base64.b64decode(part["inline_data"]["data"])
|
|
318
|
+
|
|
319
|
+
with open(filepath, "wb") as f:
|
|
320
|
+
f.write(binary_data)
|
|
321
|
+
|
|
322
|
+
saved_files.append(str(filepath))
|
|
323
|
+
logger.info(f"Binary output written to: {filepath}")
|
|
324
|
+
|
|
325
|
+
return saved_files
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""
|
|
2
|
+
msgmodel.providers.openai
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
4
|
+
|
|
5
|
+
OpenAI API provider implementation.
|
|
6
|
+
|
|
7
|
+
ZERO DATA RETENTION (ZDR) - ENFORCED:
|
|
8
|
+
- The X-OpenAI-No-Store header is ALWAYS sent with all requests.
|
|
9
|
+
- This header instructs OpenAI not to store inputs and outputs for service improvements.
|
|
10
|
+
- ZDR is non-negotiable and cannot be disabled.
|
|
11
|
+
- See: https://platform.openai.com/docs/guides/zero-data-retention
|
|
12
|
+
|
|
13
|
+
FILE UPLOADS:
|
|
14
|
+
- All file uploads are via inline base64-encoding in prompts (no Files API)
|
|
15
|
+
- Files are limited to practical API size constraints (~15-20MB)
|
|
16
|
+
- This approach provides better privacy and stateless operation
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import base64
|
|
21
|
+
import logging
|
|
22
|
+
from typing import Optional, Dict, Any, Iterator, List, Callable
|
|
23
|
+
|
|
24
|
+
import requests
|
|
25
|
+
|
|
26
|
+
from ..config import OpenAIConfig, OPENAI_URL
|
|
27
|
+
from ..exceptions import APIError, ProviderError, StreamingError
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# MIME type constants
|
|
32
|
+
MIME_TYPE_JSON = "application/json"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OpenAIProvider:
|
|
36
|
+
"""
|
|
37
|
+
OpenAI API provider for making LLM requests.
|
|
38
|
+
|
|
39
|
+
Handles API calls and response parsing for OpenAI models.
|
|
40
|
+
All file uploads use inline base64-encoding for privacy and statelessness.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, api_key: str, config: Optional[OpenAIConfig] = None):
|
|
44
|
+
"""
|
|
45
|
+
Initialize the OpenAI provider.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
api_key: OpenAI API key
|
|
49
|
+
config: Optional configuration (uses defaults if not provided)
|
|
50
|
+
"""
|
|
51
|
+
self.api_key = api_key
|
|
52
|
+
self.config = config or OpenAIConfig()
|
|
53
|
+
|
|
54
|
+
def _build_headers(self) -> Dict[str, str]:
|
|
55
|
+
"""
|
|
56
|
+
Build HTTP headers for OpenAI API requests.
|
|
57
|
+
|
|
58
|
+
ENFORCES Zero Data Retention (ZDR) by always including the X-OpenAI-No-Store header.
|
|
59
|
+
This header instructs OpenAI not to store inputs and outputs for service improvements.
|
|
60
|
+
|
|
61
|
+
ZDR is non-negotiable and cannot be disabled.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dictionary of HTTP headers with ZDR enforced
|
|
65
|
+
"""
|
|
66
|
+
headers: Dict[str, str] = {
|
|
67
|
+
"Content-Type": MIME_TYPE_JSON,
|
|
68
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
69
|
+
"X-OpenAI-No-Store": "true" # ← ALWAYS enforced, no option to disable
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return headers
|
|
73
|
+
|
|
74
|
+
def _build_content(
|
|
75
|
+
self,
|
|
76
|
+
prompt: str,
|
|
77
|
+
file_data: Optional[Dict[str, Any]] = None
|
|
78
|
+
) -> List[Dict[str, Any]]:
|
|
79
|
+
"""Build the content array for the API request."""
|
|
80
|
+
content: List[Dict[str, Any]] = []
|
|
81
|
+
|
|
82
|
+
if file_data:
|
|
83
|
+
mime_type = file_data["mime_type"]
|
|
84
|
+
encoded_data = file_data.get("data", "")
|
|
85
|
+
filename = file_data.get("filename", "input.bin")
|
|
86
|
+
|
|
87
|
+
if mime_type.startswith("image/"):
|
|
88
|
+
content.append({
|
|
89
|
+
"type": "image_url",
|
|
90
|
+
"image_url": {
|
|
91
|
+
"url": f"data:{mime_type};base64,{encoded_data}"
|
|
92
|
+
}
|
|
93
|
+
})
|
|
94
|
+
elif mime_type.startswith("text/"):
|
|
95
|
+
try:
|
|
96
|
+
decoded_text = base64.b64decode(encoded_data).decode("utf-8", errors="ignore")
|
|
97
|
+
except Exception:
|
|
98
|
+
decoded_text = ""
|
|
99
|
+
if decoded_text.strip():
|
|
100
|
+
content.append({
|
|
101
|
+
"type": "text",
|
|
102
|
+
"text": f"(Contents of {filename}):\n\n{decoded_text}"
|
|
103
|
+
})
|
|
104
|
+
else:
|
|
105
|
+
content.append({
|
|
106
|
+
"type": "text",
|
|
107
|
+
"text": (
|
|
108
|
+
f"[Note: A file named '{filename}' with MIME type '{mime_type}' "
|
|
109
|
+
f"was provided. You may not be able to read it directly, but you "
|
|
110
|
+
f"can still respond based on the description and prompt.]"
|
|
111
|
+
)
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
content.append({
|
|
115
|
+
"type": "text",
|
|
116
|
+
"text": prompt
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
return content
|
|
120
|
+
|
|
121
|
+
def _supports_max_completion_tokens(self, model_name: str) -> bool:
|
|
122
|
+
"""
|
|
123
|
+
Check if model supports max_completion_tokens (GPT-4o and later).
|
|
124
|
+
|
|
125
|
+
Newer OpenAI models (GPT-4o, GPT-4 Turbo) use max_completion_tokens instead of max_tokens.
|
|
126
|
+
Legacy models still use max_tokens.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
model_name: The model identifier
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
True if model uses max_completion_tokens, False if it uses max_tokens
|
|
133
|
+
"""
|
|
134
|
+
# Models that support max_completion_tokens (newer OpenAI models)
|
|
135
|
+
new_models = [
|
|
136
|
+
"gpt-4o",
|
|
137
|
+
"gpt-4-turbo",
|
|
138
|
+
"gpt-4-turbo-preview",
|
|
139
|
+
"gpt-4-turbo-2024",
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
# Models that only support max_tokens (legacy)
|
|
143
|
+
legacy_models = [
|
|
144
|
+
"gpt-3.5-turbo",
|
|
145
|
+
"gpt-4",
|
|
146
|
+
"gpt-4-0613",
|
|
147
|
+
"gpt-4-0125-preview",
|
|
148
|
+
"gpt-4-1106-preview",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
# Check for exact match or prefix match for new models
|
|
152
|
+
for new_model in new_models:
|
|
153
|
+
if model_name == new_model or model_name.startswith(new_model):
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
# Default: use legacy max_tokens for safety
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def _build_payload(
|
|
160
|
+
self,
|
|
161
|
+
prompt: str,
|
|
162
|
+
system_instruction: Optional[str] = None,
|
|
163
|
+
file_data: Optional[Dict[str, Any]] = None,
|
|
164
|
+
stream: bool = False
|
|
165
|
+
) -> Dict[str, Any]:
|
|
166
|
+
"""Build the API request payload for OpenAI Chat Completions API."""
|
|
167
|
+
content = self._build_content(prompt, file_data)
|
|
168
|
+
|
|
169
|
+
# Build messages array with system message first (if provided)
|
|
170
|
+
messages: List[Dict[str, Any]] = []
|
|
171
|
+
if system_instruction:
|
|
172
|
+
messages.append({"role": "system", "content": system_instruction})
|
|
173
|
+
messages.append({"role": "user", "content": content})
|
|
174
|
+
|
|
175
|
+
payload: Dict[str, Any] = {
|
|
176
|
+
"model": self.config.model,
|
|
177
|
+
"messages": messages,
|
|
178
|
+
"temperature": self.config.temperature,
|
|
179
|
+
"top_p": self.config.top_p,
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Use appropriate max tokens parameter based on model version
|
|
183
|
+
# v3.2.1: Support both max_tokens (legacy) and max_completion_tokens (GPT-4o+)
|
|
184
|
+
if self._supports_max_completion_tokens(self.config.model):
|
|
185
|
+
payload["max_completion_tokens"] = self.config.max_tokens
|
|
186
|
+
else:
|
|
187
|
+
payload["max_tokens"] = self.config.max_tokens
|
|
188
|
+
|
|
189
|
+
if stream:
|
|
190
|
+
payload["stream"] = True
|
|
191
|
+
|
|
192
|
+
return payload
|
|
193
|
+
|
|
194
|
+
def query(
|
|
195
|
+
self,
|
|
196
|
+
prompt: str,
|
|
197
|
+
system_instruction: Optional[str] = None,
|
|
198
|
+
file_data: Optional[Dict[str, Any]] = None
|
|
199
|
+
) -> Dict[str, Any]:
|
|
200
|
+
"""
|
|
201
|
+
Make a non-streaming API call to OpenAI.
|
|
202
|
+
|
|
203
|
+
With store_data=False (default), this request opts into Zero Data Retention (ZDR)
|
|
204
|
+
via the X-OpenAI-No-Store header, ensuring OpenAI does not store this interaction
|
|
205
|
+
for service improvements.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
prompt: The user prompt
|
|
209
|
+
system_instruction: Optional system instruction
|
|
210
|
+
file_data: Optional file data dict
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
The API response as a dictionary
|
|
214
|
+
|
|
215
|
+
Raises:
|
|
216
|
+
APIError: If the API call fails
|
|
217
|
+
"""
|
|
218
|
+
payload = self._build_payload(prompt, system_instruction, file_data)
|
|
219
|
+
headers = self._build_headers()
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
response = requests.post(
|
|
223
|
+
OPENAI_URL,
|
|
224
|
+
headers=headers,
|
|
225
|
+
data=json.dumps(payload)
|
|
226
|
+
)
|
|
227
|
+
except requests.RequestException as e:
|
|
228
|
+
raise APIError(f"Request failed: {e}")
|
|
229
|
+
|
|
230
|
+
if not response.ok:
|
|
231
|
+
raise APIError(
|
|
232
|
+
f"OpenAI API error: {response.text}",
|
|
233
|
+
status_code=response.status_code,
|
|
234
|
+
response_text=response.text
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return response.json()
|
|
238
|
+
|
|
239
|
+
def stream(
|
|
240
|
+
self,
|
|
241
|
+
prompt: str,
|
|
242
|
+
system_instruction: Optional[str] = None,
|
|
243
|
+
file_data: Optional[Dict[str, Any]] = None,
|
|
244
|
+
timeout: float = 300,
|
|
245
|
+
on_chunk: Optional[Callable[[str], bool]] = None
|
|
246
|
+
) -> Iterator[str]:
|
|
247
|
+
"""
|
|
248
|
+
Make a streaming API call to OpenAI.
|
|
249
|
+
|
|
250
|
+
With store_data=False (default), this request opts into Zero Data Retention (ZDR)
|
|
251
|
+
via the X-OpenAI-No-Store header, ensuring OpenAI does not store this interaction
|
|
252
|
+
for service improvements.
|
|
253
|
+
|
|
254
|
+
v3.2.1 Enhancement: Adds timeout support and optional abort callback.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
prompt: The user prompt
|
|
258
|
+
system_instruction: Optional system instruction
|
|
259
|
+
file_data: Optional file data dict
|
|
260
|
+
timeout: Timeout in seconds for the streaming connection (default: 300s/5min)
|
|
261
|
+
on_chunk: Optional callback that receives each chunk. Return False to abort stream.
|
|
262
|
+
|
|
263
|
+
Yields:
|
|
264
|
+
Text chunks as they arrive
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
APIError: If the API call fails
|
|
268
|
+
StreamingError: If streaming fails or timeout occurs
|
|
269
|
+
"""
|
|
270
|
+
payload = self._build_payload(prompt, system_instruction, file_data, stream=True)
|
|
271
|
+
headers = self._build_headers()
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
response = requests.post(
|
|
275
|
+
OPENAI_URL,
|
|
276
|
+
headers=headers,
|
|
277
|
+
data=json.dumps(payload),
|
|
278
|
+
stream=True,
|
|
279
|
+
timeout=timeout
|
|
280
|
+
)
|
|
281
|
+
except requests.Timeout:
|
|
282
|
+
raise StreamingError(f"OpenAI streaming request timed out after {timeout} seconds")
|
|
283
|
+
except requests.RequestException as e:
|
|
284
|
+
raise APIError(f"Request failed: {e}")
|
|
285
|
+
|
|
286
|
+
if not response.ok:
|
|
287
|
+
raise APIError(
|
|
288
|
+
f"OpenAI API error: {response.text}",
|
|
289
|
+
status_code=response.status_code,
|
|
290
|
+
response_text=response.text
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
chunks_received = 0
|
|
294
|
+
try:
|
|
295
|
+
for line in response.iter_lines():
|
|
296
|
+
if line:
|
|
297
|
+
line_text = line.decode("utf-8")
|
|
298
|
+
if line_text.startswith("data: "):
|
|
299
|
+
data = line_text[6:]
|
|
300
|
+
if data == "[DONE]":
|
|
301
|
+
break
|
|
302
|
+
try:
|
|
303
|
+
chunk = json.loads(data)
|
|
304
|
+
# Extract text from OpenAI Chat Completions streaming response
|
|
305
|
+
# Format: {"choices": [{"delta": {"content": "..."}}], ...}
|
|
306
|
+
if "choices" in chunk and isinstance(chunk["choices"], list):
|
|
307
|
+
for choice in chunk["choices"]:
|
|
308
|
+
if isinstance(choice, dict):
|
|
309
|
+
delta = choice.get("delta", {})
|
|
310
|
+
if isinstance(delta, dict):
|
|
311
|
+
text = delta.get("content", "")
|
|
312
|
+
if text:
|
|
313
|
+
chunks_received += 1
|
|
314
|
+
# v3.2.1: Support abort callback
|
|
315
|
+
if on_chunk is not None:
|
|
316
|
+
should_continue = on_chunk(text)
|
|
317
|
+
if should_continue is False:
|
|
318
|
+
return
|
|
319
|
+
yield text
|
|
320
|
+
except json.JSONDecodeError:
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
if chunks_received == 0:
|
|
324
|
+
logger.error("No text chunks extracted from streaming response. Response format may not match OpenAI Chat Completions delta structure or stream may have ended prematurely.")
|
|
325
|
+
except Exception as e:
|
|
326
|
+
raise StreamingError(f"Streaming interrupted: {e}")
|
|
327
|
+
|
|
328
|
+
@staticmethod
|
|
329
|
+
def extract_text(response: Dict[str, Any]) -> str:
|
|
330
|
+
"""
|
|
331
|
+
Extract text from an OpenAI Chat Completions response.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
response: The raw API response
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Extracted text content
|
|
338
|
+
"""
|
|
339
|
+
# OpenAI Chat Completions response format:
|
|
340
|
+
# {"choices": [{"message": {"content": "..."}}], ...}
|
|
341
|
+
if "choices" in response and isinstance(response["choices"], list):
|
|
342
|
+
for choice in response["choices"]:
|
|
343
|
+
if isinstance(choice, dict):
|
|
344
|
+
message = choice.get("message", {})
|
|
345
|
+
if isinstance(message, dict):
|
|
346
|
+
content = message.get("content", "")
|
|
347
|
+
if content:
|
|
348
|
+
return content
|
|
349
|
+
|
|
350
|
+
return ""
|
msgmodel/py.typed
ADDED
|
File without changes
|