msgmodel 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msgmodel/core.py ADDED
@@ -0,0 +1,506 @@
1
+ """
2
+ msgmodel.core
3
+ ~~~~~~~~~~~~~
4
+
5
+ Core API for the msgmodel library.
6
+
7
+ Provides a unified interface to query any supported LLM provider.
8
+ """
9
+
10
+ import os
11
+ import io
12
+ import base64
13
+ import mimetypes
14
+ import logging
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Optional, Dict, Any, Iterator, Union
18
+
19
+ from .config import (
20
+ Provider,
21
+ OpenAIConfig,
22
+ GeminiConfig,
23
+ ProviderConfig,
24
+ get_default_config,
25
+ OPENAI_API_KEY_ENV,
26
+ GEMINI_API_KEY_ENV,
27
+ OPENAI_API_KEY_FILE,
28
+ GEMINI_API_KEY_FILE,
29
+ )
30
+ from .exceptions import (
31
+ MsgModelError,
32
+ ConfigurationError,
33
+ AuthenticationError,
34
+ FileError,
35
+ APIError,
36
+ )
37
+ from .providers.openai import OpenAIProvider
38
+ from .providers.gemini import GeminiProvider
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # MIME type constants
43
+ MIME_TYPE_PDF = "application/pdf"
44
+ MIME_TYPE_OCTET_STREAM = "application/octet-stream"
45
+ FILE_ENCODING = "utf-8"
46
+
47
+
48
+ @dataclass
49
+ class LLMResponse:
50
+ """
51
+ Structured response from an LLM provider.
52
+
53
+ Attributes:
54
+ text: The extracted text response
55
+ raw_response: The complete raw API response
56
+ model: The model that generated the response
57
+ provider: The provider that was used
58
+ usage: Token usage information (if available)
59
+ """
60
+ text: str
61
+ raw_response: Dict[str, Any]
62
+ model: str
63
+ provider: str
64
+ usage: Optional[Dict[str, int]] = None
65
+
66
+
67
+ def _get_api_key(
68
+ provider: Provider,
69
+ api_key: Optional[str] = None
70
+ ) -> str:
71
+ """
72
+ Get the API key for a provider from various sources.
73
+
74
+ Priority:
75
+ 1. Directly provided api_key parameter
76
+ 2. Environment variable
77
+ 3. Key file in current directory
78
+
79
+ Args:
80
+ provider: The LLM provider
81
+ api_key: Optional directly provided API key
82
+
83
+ Returns:
84
+ The API key string
85
+
86
+ Raises:
87
+ AuthenticationError: If no API key can be found
88
+ """
89
+ if api_key:
90
+ return api_key
91
+
92
+ # Map providers to their env vars and files
93
+ env_vars = {
94
+ Provider.OPENAI: OPENAI_API_KEY_ENV,
95
+ Provider.GEMINI: GEMINI_API_KEY_ENV,
96
+ }
97
+
98
+ key_files = {
99
+ Provider.OPENAI: OPENAI_API_KEY_FILE,
100
+ Provider.GEMINI: GEMINI_API_KEY_FILE,
101
+ }
102
+
103
+ env_var = env_vars[provider]
104
+ key = os.environ.get(env_var)
105
+ if key:
106
+ return key
107
+
108
+ # Try key file
109
+ key_file = key_files[provider]
110
+ if Path(key_file).exists():
111
+ try:
112
+ with open(key_file, "r", encoding=FILE_ENCODING) as f:
113
+ return f.read().strip()
114
+ except IOError as e:
115
+ raise AuthenticationError(f"Failed to read API key file {key_file}: {e}")
116
+
117
+ raise AuthenticationError(
118
+ f"No API key found for {provider.value}. "
119
+ f"Provide api_key parameter, set {env_var} environment variable, "
120
+ f"or create {key_file} file."
121
+ )
122
+
123
+
124
+ def _infer_mime_type(file_like: io.BytesIO, filename: Optional[str] = None) -> str:
125
+ """
126
+ Infer MIME type from filename or file content with fallback magic byte detection.
127
+
128
+ v3.2.1 Enhancement: Detects MIME type using multiple strategies:
129
+ 1. Filename-based detection (fastest, most reliable)
130
+ 2. Magic byte detection (fallback for files without extensions)
131
+ 3. Safe default (application/octet-stream)
132
+
133
+ Args:
134
+ file_like: BytesIO object to inspect
135
+ filename: Optional filename hint for MIME type detection
136
+
137
+ Returns:
138
+ MIME type string (e.g., 'image/png', 'application/pdf')
139
+ """
140
+ # Strategy 1: Try filename-based detection
141
+ if filename:
142
+ mime_type, _ = mimetypes.guess_type(filename)
143
+ if mime_type:
144
+ return mime_type
145
+
146
+ # Strategy 2: Magic byte detection for common file formats
147
+ try:
148
+ current_pos = file_like.tell()
149
+ file_like.seek(0)
150
+ magic_bytes = file_like.read(512)
151
+ file_like.seek(current_pos)
152
+
153
+ # Magic byte signatures for common formats
154
+ signatures = {
155
+ b'%PDF': 'application/pdf',
156
+ b'\x89PNG\r\n\x1a\n': 'image/png',
157
+ b'\xff\xd8\xff': 'image/jpeg',
158
+ b'GIF8': 'image/gif',
159
+ b'BM': 'image/bmp',
160
+ b'RIFF': 'audio/wav',
161
+ b'ID3': 'audio/mpeg',
162
+ b'PK\x03\x04': 'application/zip',
163
+ b'\x50\x4b\x03\x04': 'application/zip',
164
+ b'\xef\xbb\xbf<?xml': 'application/xml',
165
+ b'<?xml': 'application/xml',
166
+ }
167
+
168
+ for sig, mime_type in signatures.items():
169
+ if magic_bytes.startswith(sig):
170
+ return mime_type
171
+ except (AttributeError, IOError):
172
+ pass
173
+
174
+ # Strategy 3: Safe default
175
+ return MIME_TYPE_OCTET_STREAM
176
+
177
+
178
+ def _prepare_file_data(file_path: str) -> Dict[str, Any]:
179
+ """
180
+ Prepare file data from disk for API submission.
181
+
182
+ Args:
183
+ file_path: Path to the file on disk
184
+
185
+ Returns:
186
+ Dictionary containing file metadata and encoded data
187
+
188
+ Raises:
189
+ FileError: If the file cannot be read
190
+ """
191
+ try:
192
+ path = Path(file_path)
193
+ with open(path, "rb") as f:
194
+ binary_content = f.read()
195
+ except (FileNotFoundError, IOError, OSError) as e:
196
+ raise FileError(f"Failed to read file {file_path}: {e}")
197
+
198
+ # Use improved MIME type inference with fallback
199
+ mime_type = _infer_mime_type(io.BytesIO(binary_content), filename=Path(file_path).name)
200
+
201
+ encoded_data = base64.b64encode(binary_content).decode("utf-8")
202
+
203
+ return {
204
+ "mime_type": mime_type,
205
+ "data": encoded_data,
206
+ "filename": Path(file_path).name,
207
+ "is_file_like": False, # Mark as disk file
208
+ }
209
+
210
+
211
+ def _prepare_file_like_data(file_like: io.BytesIO, filename: Optional[str] = None) -> Dict[str, Any]:
212
+ """
213
+ Prepare file-like object data for API submission.
214
+
215
+ Processes a BytesIO object entirely in memory (never touches disk).
216
+ This is the only supported method for file upload in msgmodel v3.2.1+
217
+ to ensure privacy and stateless operation.
218
+
219
+ v3.2.1 Enhancement: Includes improved MIME type inference with magic byte fallback.
220
+
221
+ Args:
222
+ file_like: An io.BytesIO object containing binary data
223
+ filename: Optional filename hint (defaults to 'upload.bin')
224
+
225
+ Returns:
226
+ Dictionary containing file metadata and encoded data
227
+
228
+ Raises:
229
+ FileError: If the file-like object cannot be read
230
+ """
231
+ try:
232
+ # Seek to beginning to ensure we read the full content
233
+ file_like.seek(0)
234
+ binary_content = file_like.read()
235
+ # Reset position for potential reuse by caller
236
+ file_like.seek(0)
237
+ except (AttributeError, IOError, OSError) as e:
238
+ raise FileError(f"Failed to read from file-like object: {e}")
239
+
240
+ # v3.2.1: Use improved MIME type inference with fallback
241
+ mime_type = _infer_mime_type(file_like, filename)
242
+
243
+ encoded_data = base64.b64encode(binary_content).decode("utf-8")
244
+
245
+ return {
246
+ "mime_type": mime_type,
247
+ "data": encoded_data,
248
+ "filename": filename or "upload.bin",
249
+ "is_file_like": True, # Mark as in-memory file
250
+ }
251
+
252
+
253
+ def _validate_max_tokens(max_tokens: int) -> None:
254
+ """Validate max_tokens parameter."""
255
+ if max_tokens < 1:
256
+ raise ConfigurationError("max_tokens must be at least 1")
257
+ if max_tokens > 1000000:
258
+ logger.warning(f"max_tokens={max_tokens} is very large and may cause issues")
259
+
260
+
261
+ def query(
262
+ provider: Union[str, Provider],
263
+ prompt: str,
264
+ api_key: Optional[str] = None,
265
+ system_instruction: Optional[str] = None,
266
+ file_like: Optional[io.BytesIO] = None,
267
+ filename: Optional[str] = None,
268
+ config: Optional[ProviderConfig] = None,
269
+ max_tokens: Optional[int] = None,
270
+ model: Optional[str] = None,
271
+ temperature: Optional[float] = None,
272
+ ) -> LLMResponse:
273
+ """
274
+ Query an LLM provider and return a structured response.
275
+
276
+ This is the main entry point for the library. It provides a unified
277
+ interface to all supported LLM providers.
278
+
279
+ Args:
280
+ provider: The LLM provider ('openai' or 'gemini', or 'o', 'g')
281
+ prompt: The user prompt text
282
+ api_key: API key (optional if set via env var or file)
283
+ system_instruction: Optional system instruction/prompt
284
+ file_like: Optional file-like object (io.BytesIO) - must be seekable
285
+ This is the only method for file upload. Files are base64-encoded
286
+ and embedded in prompts for privacy and stateless operation.
287
+ Limited to practical API constraints (~15-20MB for OpenAI, ~22MB for Gemini).
288
+ filename: Optional filename hint for MIME type detection when using file_like.
289
+ If not provided, attempts to use file_like.name attribute. Defaults to 'upload.bin'
290
+ config: Optional provider-specific configuration object
291
+ max_tokens: Override for max tokens (convenience parameter)
292
+ model: Override for model (convenience parameter)
293
+ temperature: Override for temperature (convenience parameter)
294
+
295
+ Returns:
296
+ LLMResponse containing the text response and metadata
297
+
298
+ Raises:
299
+ ConfigurationError: For invalid configuration
300
+ AuthenticationError: For API key issues
301
+ FileError: For file-related issues
302
+ APIError: For API call failures
303
+
304
+ Examples:
305
+ >>> # Simple query with env var API key
306
+ >>> response = query("openai", "Hello, world!")
307
+ >>> print(response.text)
308
+
309
+ >>> # Query with in-memory file (privacy-focused, no disk access)
310
+ >>> import io
311
+ >>> file_obj = io.BytesIO(binary_content)
312
+ >>> response = query(
313
+ ... "openai",
314
+ ... "Analyze this document",
315
+ ... file_like=file_obj,
316
+ ... filename="document.pdf", # Enables proper MIME type detection
317
+ ... system_instruction="You are a document analyst"
318
+ ... )
319
+
320
+ >>> # Using .name attribute on BytesIO (alternative to filename param)
321
+ >>> file_obj = io.BytesIO(binary_content)
322
+ >>> file_obj.name = "image.png" # Set name attribute for MIME detection
323
+ >>> response = query("gemini", "Describe this image", file_like=file_obj)
324
+ """
325
+ # Normalize provider
326
+ if isinstance(provider, str):
327
+ provider = Provider.from_string(provider)
328
+
329
+ # Get API key
330
+ key = _get_api_key(provider, api_key)
331
+
332
+ # Get or create config
333
+ if config is None:
334
+ config = get_default_config(provider)
335
+
336
+ # Apply convenience overrides
337
+ if max_tokens is not None:
338
+ _validate_max_tokens(max_tokens)
339
+ config.max_tokens = max_tokens
340
+ if model is not None:
341
+ config.model = model
342
+ if temperature is not None:
343
+ config.temperature = temperature
344
+
345
+ # Prepare file data if provided
346
+ file_data = None
347
+ if file_like:
348
+ # Use provided filename, fall back to .name attribute, then default
349
+ file_hint = filename or getattr(file_like, 'name', 'upload.bin')
350
+ file_data = _prepare_file_like_data(file_like, filename=file_hint)
351
+
352
+ # Create provider instance and make request
353
+ if provider == Provider.OPENAI:
354
+ assert isinstance(config, OpenAIConfig)
355
+ prov = OpenAIProvider(key, config)
356
+ raw_response = prov.query(prompt, system_instruction, file_data)
357
+ text = prov.extract_text(raw_response)
358
+
359
+ elif provider == Provider.GEMINI:
360
+ assert isinstance(config, GeminiConfig)
361
+ prov = GeminiProvider(key, config)
362
+ raw_response = prov.query(prompt, system_instruction, file_data)
363
+ text = prov.extract_text(raw_response)
364
+
365
+ else:
366
+ # Should never reach here due to Provider enum, but maintain type safety
367
+ raise ConfigurationError(f"Unsupported provider: {provider}")
368
+
369
+ # Extract usage info if available
370
+ usage = None
371
+ if "usage" in raw_response:
372
+ usage = raw_response["usage"]
373
+
374
+ return LLMResponse(
375
+ text=text,
376
+ raw_response=raw_response,
377
+ model=config.model,
378
+ provider=provider.value,
379
+ usage=usage,
380
+ )
381
+
382
+
383
+ def stream(
384
+ provider: Union[str, Provider],
385
+ prompt: str,
386
+ api_key: Optional[str] = None,
387
+ system_instruction: Optional[str] = None,
388
+ file_path: Optional[str] = None,
389
+ file_like: Optional[io.BytesIO] = None,
390
+ filename: Optional[str] = None,
391
+ config: Optional[ProviderConfig] = None,
392
+ max_tokens: Optional[int] = None,
393
+ model: Optional[str] = None,
394
+ temperature: Optional[float] = None,
395
+ timeout: float = 300,
396
+ on_chunk: Optional[Any] = None,
397
+ ) -> Iterator[str]:
398
+ """
399
+ Stream a response from an LLM provider.
400
+
401
+ Similar to query(), but yields text chunks as they arrive instead
402
+ of waiting for the complete response.
403
+
404
+ Args:
405
+ provider: The LLM provider ('openai', 'gemini', 'claude', or 'o', 'g', 'c')
406
+ prompt: The user prompt text
407
+ api_key: API key (optional if set via env var or file)
408
+ system_instruction: Optional system instruction/prompt
409
+ file_path: Optional path to a file (image, PDF, etc.)
410
+ file_like: Optional file-like object (io.BytesIO) - must be seekable
411
+ filename: Optional filename hint for MIME type detection when using file_like.
412
+ If not provided, attempts to use file_like.name attribute. Defaults to 'upload.bin'
413
+ config: Optional provider-specific configuration object
414
+ max_tokens: Override for max tokens (convenience parameter)
415
+ model: Override for model (convenience parameter)
416
+ temperature: Override for temperature (convenience parameter)
417
+ timeout: Timeout in seconds for streaming connection (default: 300s/5min). v3.2.1+
418
+ on_chunk: Optional callback(chunk) -> bool. Return False to abort stream. v3.2.1+
419
+
420
+ Yields:
421
+ Text chunks as they arrive from the API
422
+
423
+ Raises:
424
+ ConfigurationError: For invalid configuration or file conflicts
425
+ AuthenticationError: For API key issues
426
+ FileError: For file-related issues
427
+ APIError: For API call failures
428
+ StreamingError: For streaming-specific issues
429
+
430
+ Examples:
431
+ >>> # Stream response to prompt
432
+ >>> for chunk in stream("openai", "Tell me a story"):
433
+ ... print(chunk, end="", flush=True)
434
+
435
+ >>> # Stream with file attachment from disk
436
+ >>> for chunk in stream("gemini", "Summarize this PDF", file_path="document.pdf"):
437
+ ... print(chunk, end="", flush=True)
438
+
439
+ >>> # Stream with in-memory file (privacy-focused, no disk access)
440
+ >>> import io
441
+ >>> file_obj = io.BytesIO(uploaded_file_bytes)
442
+ >>> for chunk in stream(
443
+ ... "openai",
444
+ ... "Analyze this uploaded file",
445
+ ... file_like=file_obj,
446
+ ... filename="document.pdf", # Enables proper MIME type detection
447
+ ... system_instruction="Provide detailed analysis"
448
+ ... ):
449
+ ... print(chunk, end="", flush=True)
450
+
451
+ >>> # Gemini with BytesIO and .name attribute for MIME detection
452
+ >>> file_obj = io.BytesIO(image_bytes)
453
+ >>> file_obj.name = "photo.jpg"
454
+ >>> for chunk in stream("gemini", "Describe this photo", file_like=file_obj):
455
+ ... print(chunk, end="", flush=True)
456
+ """
457
+ # Normalize provider
458
+ if isinstance(provider, str):
459
+ provider = Provider.from_string(provider)
460
+
461
+ # Check for mutually exclusive file parameters
462
+ if file_path is not None and file_like is not None:
463
+ raise ConfigurationError(
464
+ "Cannot specify both file_path and file_like. "
465
+ "Use file_path for disk files or file_like for in-memory BytesIO objects, not both."
466
+ )
467
+
468
+ # Get API key
469
+ key = _get_api_key(provider, api_key)
470
+
471
+ # Get or create config
472
+ if config is None:
473
+ config = get_default_config(provider)
474
+
475
+ # Apply convenience overrides
476
+ if max_tokens is not None:
477
+ _validate_max_tokens(max_tokens)
478
+ config.max_tokens = max_tokens
479
+ if model is not None:
480
+ config.model = model
481
+ if temperature is not None:
482
+ config.temperature = temperature
483
+
484
+ # Prepare file data if provided
485
+ file_data = None
486
+ if file_path:
487
+ file_data = _prepare_file_data(file_path)
488
+ elif file_like:
489
+ # Use provided filename, fall back to .name attribute, then default
490
+ file_hint = filename or getattr(file_like, 'name', 'upload.bin')
491
+ file_data = _prepare_file_like_data(file_like, filename=file_hint)
492
+
493
+ # Create provider instance and stream
494
+ if provider == Provider.OPENAI:
495
+ assert isinstance(config, OpenAIConfig)
496
+ prov = OpenAIProvider(key, config)
497
+ yield from prov.stream(prompt, system_instruction, file_data, timeout=timeout, on_chunk=on_chunk)
498
+
499
+ elif provider == Provider.GEMINI:
500
+ assert isinstance(config, GeminiConfig)
501
+ prov = GeminiProvider(key, config)
502
+ yield from prov.stream(prompt, system_instruction, file_data, timeout=timeout, on_chunk=on_chunk)
503
+
504
+ else:
505
+ # Should never reach here due to Provider enum, but maintain type safety
506
+ raise ConfigurationError(f"Unsupported provider: {provider}")
msgmodel/exceptions.py ADDED
@@ -0,0 +1,93 @@
1
+ """
2
+ msgmodel.exceptions
3
+ ~~~~~~~~~~~~~~~~~~~
4
+
5
+ Custom exceptions for the msgmodel library.
6
+
7
+ All exceptions inherit from MsgModelError, allowing callers to catch
8
+ all library-specific errors with a single except clause.
9
+ """
10
+
11
+
12
+ class MsgModelError(Exception):
13
+ """Base exception for all msgmodel errors."""
14
+ pass
15
+
16
+
17
+ class ConfigurationError(MsgModelError):
18
+ """
19
+ Raised when configuration is invalid or incomplete.
20
+
21
+ Examples:
22
+ - Invalid provider name
23
+ - Invalid max_tokens value
24
+ - Missing required parameters
25
+ """
26
+ pass
27
+
28
+
29
+ class AuthenticationError(MsgModelError):
30
+ """
31
+ Raised when API authentication fails.
32
+
33
+ Examples:
34
+ - Missing API key
35
+ - Invalid API key
36
+ - API key file not found
37
+ """
38
+ pass
39
+
40
+
41
+ class FileError(MsgModelError):
42
+ """
43
+ Raised when file operations fail.
44
+
45
+ Examples:
46
+ - File not found
47
+ - Unable to read file
48
+ - Invalid file format
49
+ """
50
+ pass
51
+
52
+
53
+ class APIError(MsgModelError):
54
+ """
55
+ Raised when an API call fails.
56
+
57
+ Attributes:
58
+ status_code: HTTP status code from the API response
59
+ response_text: Raw response text from the API
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ message: str,
65
+ status_code: int | None = None,
66
+ response_text: str | None = None
67
+ ):
68
+ super().__init__(message)
69
+ self.status_code = status_code
70
+ self.response_text = response_text
71
+
72
+
73
+ class ProviderError(MsgModelError):
74
+ """
75
+ Raised when a provider-specific error occurs.
76
+
77
+ Examples:
78
+ - Unsupported file type for provider
79
+ - Provider-specific validation failure
80
+ - Missing provider dependency (e.g., anthropic package)
81
+ """
82
+ pass
83
+
84
+
85
+ class StreamingError(MsgModelError):
86
+ """
87
+ Raised when streaming-specific errors occur.
88
+
89
+ Examples:
90
+ - Connection interrupted during streaming
91
+ - Invalid streaming response format
92
+ """
93
+ pass
@@ -0,0 +1,11 @@
1
+ """
2
+ msgmodel.providers
3
+ ~~~~~~~~~~~~~~~~~~
4
+
5
+ Provider-specific implementations for LLM API calls.
6
+ """
7
+
8
+ from .openai import OpenAIProvider
9
+ from .gemini import GeminiProvider
10
+
11
+ __all__ = ["OpenAIProvider", "GeminiProvider"]