ai-synapse 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_engine/__init__.py +25 -0
- ai_engine/_engine.py +121 -0
- ai_engine/_exceptions.py +89 -0
- ai_engine/anthropic.py +33 -0
- ai_engine/openai.py +120 -0
- ai_engine/py.typed +0 -0
- ai_engine/resources/__init__.py +2 -0
- ai_engine/resources/chat.py +135 -0
- ai_engine/resources/models.py +48 -0
- ai_engine/types/__init__.py +171 -0
- ai_synapse-4.0.0.dist-info/METADATA +257 -0
- ai_synapse-4.0.0.dist-info/RECORD +46 -0
- ai_synapse-4.0.0.dist-info/WHEEL +5 -0
- ai_synapse-4.0.0.dist-info/licenses/LICENSE +21 -0
- ai_synapse-4.0.0.dist-info/top_level.txt +2 -0
- core/__init__.py +1 -0
- core/advanced_features.py +217 -0
- core/ai_engine.py +2044 -0
- core/api_versioning.py +133 -0
- core/batch.py +134 -0
- core/billing.py +262 -0
- core/caching.py +215 -0
- core/capabilities.py +305 -0
- core/chat_intelligence.py +279 -0
- core/config_sync.py +300 -0
- core/enterprise.py +336 -0
- core/error_codes.py +224 -0
- core/health_monitor.py +198 -0
- core/infrastructure.py +286 -0
- core/intelligent_router.py +440 -0
- core/latency_tracker.py +158 -0
- core/load_test.py +186 -0
- core/logging_sla.py +293 -0
- core/middleware.py +199 -0
- core/model_cache.py +202 -0
- core/plugin_system.py +189 -0
- core/provider_requests.py +454 -0
- core/py.typed +0 -0
- core/rate_limit_manager.py +128 -0
- core/request_queue.py +105 -0
- core/response_cache.py +200 -0
- core/session_backup.py +243 -0
- core/statistics_manager.py +253 -0
- core/stress_test.py +313 -0
- core/usage_tracker.py +125 -0
- core/workflow_engine.py +322 -0
ai_engine/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""AI Synapse SDK — Drop-in OpenAI & Anthropic compatibility with free multi-provider routing."""
|
|
2
|
+
|
|
3
|
+
__version__ = "4.0.0"
|
|
4
|
+
|
|
5
|
+
from .openai import OpenAI, AsyncOpenAI
|
|
6
|
+
from ._engine import AIEngine, get_engine, set_engine, _global_config
|
|
7
|
+
from ._exceptions import (
|
|
8
|
+
AIEngineError,
|
|
9
|
+
OpenAIError,
|
|
10
|
+
BadRequestError,
|
|
11
|
+
AuthenticationError,
|
|
12
|
+
RateLimitError,
|
|
13
|
+
InternalServerError,
|
|
14
|
+
NotFoundError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Lazy Anthropic import (not implemented yet)
|
|
18
|
+
try:
|
|
19
|
+
from .anthropic import Anthropic, AsyncAnthropic
|
|
20
|
+
except ImportError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
def use(**kwargs):
|
|
24
|
+
"""Configure global AI Engine settings (late configuration)."""
|
|
25
|
+
_global_config.update(kwargs)
|
ai_engine/_engine.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Shared AI Synapse engine singleton — initialized once, used by all SDK classes."""
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger("ai_engine")
|
|
9
|
+
|
|
10
|
+
_global_config: Dict[str, Any] = {}
|
|
11
|
+
_engine_instance = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _load_config_json(config_path: str = None) -> Dict[str, Any]:
|
|
15
|
+
"""Load config from JSON file."""
|
|
16
|
+
if config_path:
|
|
17
|
+
path = Path(config_path)
|
|
18
|
+
else:
|
|
19
|
+
# Default: config.json next to this package
|
|
20
|
+
path = Path(__file__).parent / "config.json"
|
|
21
|
+
|
|
22
|
+
if path.exists():
|
|
23
|
+
with open(path) as f:
|
|
24
|
+
return json.load(f)
|
|
25
|
+
return {}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve_config(config=None, cdn_config=None, **kwargs) -> Dict[str, Any]:
|
|
29
|
+
"""Merge config from: JSON file → constructor args → env vars → defaults."""
|
|
30
|
+
# 1. Load from config.json
|
|
31
|
+
if isinstance(config, str):
|
|
32
|
+
base = _load_config_json(config)
|
|
33
|
+
elif isinstance(config, dict):
|
|
34
|
+
base = config.copy()
|
|
35
|
+
else:
|
|
36
|
+
base = _load_config_json()
|
|
37
|
+
|
|
38
|
+
# 2. Merge global config
|
|
39
|
+
base.update(_global_config)
|
|
40
|
+
|
|
41
|
+
# 3. Merge constructor kwargs
|
|
42
|
+
if "api_keys" in kwargs:
|
|
43
|
+
base.setdefault("api_keys", {}).update(kwargs["api_keys"])
|
|
44
|
+
for key in ("timeout", "max_retries", "default_provider"):
|
|
45
|
+
if key in kwargs:
|
|
46
|
+
base[key] = kwargs[key]
|
|
47
|
+
if cdn_config is not None:
|
|
48
|
+
base["cdn_config_url"] = cdn_config
|
|
49
|
+
|
|
50
|
+
# 4. Environment variable overrides
|
|
51
|
+
env_prefix = "AI_ENGINE_"
|
|
52
|
+
env_map = {
|
|
53
|
+
"AI_ENGINE_CDN_CONFIG": "cdn_config_url",
|
|
54
|
+
"AI_ENGINE_TIMEOUT": "timeout",
|
|
55
|
+
"AI_ENGINE_DEFAULT_PROVIDER": "default_provider",
|
|
56
|
+
}
|
|
57
|
+
for env_var, config_key in env_map.items():
|
|
58
|
+
val = os.environ.get(env_var)
|
|
59
|
+
if val is not None:
|
|
60
|
+
base[config_key] = int(val) if config_key == "timeout" else val
|
|
61
|
+
|
|
62
|
+
# 5. API key env vars: AI_ENGINE_API_KEY_{PROVIDER}
|
|
63
|
+
for env_key, env_val in os.environ.items():
|
|
64
|
+
if env_key.startswith("AI_ENGINE_API_KEY_"):
|
|
65
|
+
provider = env_key[len("AI_ENGINE_API_KEY_"):].lower()
|
|
66
|
+
base.setdefault("api_keys", {})[provider] = env_val
|
|
67
|
+
|
|
68
|
+
return base
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _init_engine(config: Dict[str, Any]):
|
|
72
|
+
"""Initialize AI_engine from merged config."""
|
|
73
|
+
import sys
|
|
74
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
75
|
+
from core.ai_engine import AI_engine
|
|
76
|
+
|
|
77
|
+
# Apply provider overrides
|
|
78
|
+
engine = AI_engine(verbose=False)
|
|
79
|
+
|
|
80
|
+
# Set API keys from config
|
|
81
|
+
api_keys = config.get("api_keys", {})
|
|
82
|
+
for provider_name, key in api_keys.items():
|
|
83
|
+
if provider_name in engine.providers:
|
|
84
|
+
engine.providers[provider_name]["api_keys"] = [key]
|
|
85
|
+
engine.providers[provider_name]["enabled"] = True
|
|
86
|
+
|
|
87
|
+
# Apply provider priority/enable overrides
|
|
88
|
+
provider_overrides = config.get("providers", {})
|
|
89
|
+
for provider_name, overrides in provider_overrides.items():
|
|
90
|
+
if provider_name in engine.providers:
|
|
91
|
+
for k, v in overrides.items():
|
|
92
|
+
engine.providers[provider_name][k] = v
|
|
93
|
+
|
|
94
|
+
return engine
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_engine(config=None, **kwargs):
|
|
98
|
+
"""Get or create the shared engine singleton."""
|
|
99
|
+
global _engine_instance
|
|
100
|
+
if _engine_instance is None:
|
|
101
|
+
resolved = _resolve_config(config, **kwargs)
|
|
102
|
+
_engine_instance = _init_engine(resolved)
|
|
103
|
+
return _engine_instance
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def set_engine(engine):
|
|
107
|
+
"""Manually set the engine singleton."""
|
|
108
|
+
global _engine_instance
|
|
109
|
+
_engine_instance = engine
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def reset_engine():
|
|
113
|
+
"""Reset the engine (for testing)."""
|
|
114
|
+
global _engine_instance
|
|
115
|
+
_engine_instance = None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Re-export for convenience
|
|
119
|
+
class AIEngine:
|
|
120
|
+
"""Advanced AI Engine client with provider-specific features."""
|
|
121
|
+
pass
|
ai_engine/_exceptions.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Exception hierarchy for AI Synapse SDK."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AIEngineError(Exception):
|
|
5
|
+
"""Base exception for AI Engine SDK."""
|
|
6
|
+
def __init__(self, message=None, status_code=None, error_type=None, param=None, code=None):
|
|
7
|
+
self.status_code = status_code
|
|
8
|
+
self.error_type = error_type
|
|
9
|
+
self.param = param
|
|
10
|
+
self.code = code
|
|
11
|
+
super().__init__(message or "An error occurred")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpenAIError(AIEngineError):
|
|
15
|
+
"""Error matching OpenAI SDK format."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BadRequestError(OpenAIError):
|
|
20
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
21
|
+
super().__init__(message, status_code=400, error_type="invalid_request_error", **kwargs)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AuthenticationError(OpenAIError):
|
|
25
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
26
|
+
super().__init__(message, status_code=401, error_type="authentication_error", **kwargs)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PermissionDeniedError(OpenAIError):
|
|
30
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
31
|
+
super().__init__(message, status_code=403, error_type="permission_error", **kwargs)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NotFoundError(OpenAIError):
|
|
35
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
36
|
+
super().__init__(message, status_code=404, error_type="not_found_error", **kwargs)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RateLimitError(OpenAIError):
|
|
40
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
41
|
+
super().__init__(message, status_code=429, error_type="rate_limit_error", **kwargs)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class InternalServerError(OpenAIError):
|
|
45
|
+
def __init__(self, message=None, response=None, body=None, **kwargs):
|
|
46
|
+
super().__init__(message, status_code=500, error_type="server_error", **kwargs)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AnthropicError(AIEngineError):
|
|
50
|
+
"""Error matching Anthropic SDK format."""
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AnthropicBadRequestError(AnthropicError):
|
|
55
|
+
def __init__(self, message=None):
|
|
56
|
+
super().__init__(message, status_code=400, error_type="invalid_request_error")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AnthropicAuthenticationError(AnthropicError):
|
|
60
|
+
def __init__(self, message=None):
|
|
61
|
+
super().__init__(message, status_code=401, error_type="authentication_error")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AnthropicRateLimitError(AnthropicError):
|
|
65
|
+
def __init__(self, message=None):
|
|
66
|
+
super().__init__(message, status_code=429, error_type="rate_limit_error")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def raise_for_status(status_code, error_body):
|
|
70
|
+
"""Raise the appropriate exception for a given status code."""
|
|
71
|
+
error_dict = error_body.get("error", {})
|
|
72
|
+
message = error_dict.get("message", "Unknown error")
|
|
73
|
+
error_type = error_dict.get("type", "unknown")
|
|
74
|
+
param = error_dict.get("param")
|
|
75
|
+
code = error_dict.get("code")
|
|
76
|
+
|
|
77
|
+
exc_map = {
|
|
78
|
+
400: BadRequestError,
|
|
79
|
+
401: AuthenticationError,
|
|
80
|
+
403: PermissionDeniedError,
|
|
81
|
+
404: NotFoundError,
|
|
82
|
+
429: RateLimitError,
|
|
83
|
+
500: InternalServerError,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
exc_cls = exc_map.get(status_code, OpenAIError)
|
|
87
|
+
if exc_cls in (OpenAIError,):
|
|
88
|
+
raise exc_cls(message=message, status_code=status_code, error_type=error_type, param=param, code=code)
|
|
89
|
+
raise exc_cls(message=message, param=param, code=code)
|
ai_engine/anthropic.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Anthropic SDK compatibility — placeholder for future implementation."""
|
|
2
|
+
from ._engine import get_engine
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Anthropic:
|
|
6
|
+
"""Drop-in replacement for anthropic.Anthropic (future implementation).
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from ai_engine import Anthropic
|
|
10
|
+
client = Anthropic(api_key="dummy")
|
|
11
|
+
response = client.messages.create(
|
|
12
|
+
model="claude-3-haiku-20240307",
|
|
13
|
+
max_tokens=100,
|
|
14
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
15
|
+
)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, *, api_key: str = "dummy", **kwargs):
|
|
19
|
+
self._api_key = api_key
|
|
20
|
+
raise NotImplementedError(
|
|
21
|
+
"Anthropic compatibility is coming in a future release. "
|
|
22
|
+
"Use OpenAI compatibility instead: from ai_engine import OpenAI"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AsyncAnthropic:
|
|
27
|
+
"""Async Anthropic placeholder (future implementation)."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, **kwargs):
|
|
30
|
+
raise NotImplementedError(
|
|
31
|
+
"AsyncAnthropic compatibility is coming in a future release. "
|
|
32
|
+
"Use AsyncOpenAI instead: from ai_engine import AsyncOpenAI"
|
|
33
|
+
)
|
ai_engine/openai.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Drop-in replacement for openai.OpenAI — routes through AI Synapse core."""
|
|
2
|
+
from functools import cached_property
|
|
3
|
+
from typing import Dict, Any, Optional
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from ._engine import get_engine, _resolve_config, _init_engine
|
|
7
|
+
from .resources.chat import Completions
|
|
8
|
+
from .resources.models import Models
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("ai_engine")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _ChatNamespace:
|
|
14
|
+
"""Namespace for client.chat.*"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, engine):
|
|
17
|
+
self._completions = Completions(engine)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def completions(self) -> Completions:
|
|
21
|
+
return self._completions
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OpenAI:
|
|
25
|
+
"""Drop-in replacement for openai.OpenAI.
|
|
26
|
+
|
|
27
|
+
Routes all requests through AI Engine's free multi-provider infrastructure.
|
|
28
|
+
|
|
29
|
+
Usage:
|
|
30
|
+
from ai_engine import OpenAI
|
|
31
|
+
|
|
32
|
+
client = OpenAI()
|
|
33
|
+
response = client.chat.completions.create(
|
|
34
|
+
model="gpt-4",
|
|
35
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
36
|
+
)
|
|
37
|
+
print(response.choices[0].message.content)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
*,
|
|
43
|
+
api_key: str = "dummy",
|
|
44
|
+
base_url: str = None,
|
|
45
|
+
config=None,
|
|
46
|
+
cdn_config: str = None,
|
|
47
|
+
timeout: int = 30,
|
|
48
|
+
max_retries: int = 2,
|
|
49
|
+
api_keys: Dict[str, str] = None,
|
|
50
|
+
**kwargs,
|
|
51
|
+
):
|
|
52
|
+
self._config = _resolve_config(
|
|
53
|
+
config=config,
|
|
54
|
+
cdn_config=cdn_config,
|
|
55
|
+
api_keys=api_keys or {},
|
|
56
|
+
timeout=timeout,
|
|
57
|
+
max_retries=max_retries,
|
|
58
|
+
**kwargs,
|
|
59
|
+
)
|
|
60
|
+
self._engine = _init_engine(self._config)
|
|
61
|
+
self._chat = _ChatNamespace(self._engine)
|
|
62
|
+
self._models = Models(self._engine)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def chat(self) -> _ChatNamespace:
|
|
66
|
+
return self._chat
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def models(self) -> Models:
|
|
70
|
+
return self._models
|
|
71
|
+
|
|
72
|
+
def config_status(self):
|
|
73
|
+
"""Get CDN config sync status."""
|
|
74
|
+
try:
|
|
75
|
+
from core.config_sync import config_fetcher
|
|
76
|
+
return config_fetcher.get_status()
|
|
77
|
+
except ImportError:
|
|
78
|
+
return {"enabled": False}
|
|
79
|
+
|
|
80
|
+
def refresh_config(self):
|
|
81
|
+
"""Force refresh CDN config."""
|
|
82
|
+
try:
|
|
83
|
+
from core.config_sync import config_fetcher, CACHE_META, CACHE_FILE
|
|
84
|
+
CACHE_META.unlink(missing_ok=True)
|
|
85
|
+
CACHE_FILE.unlink(missing_ok=True)
|
|
86
|
+
config_fetcher.fetch_and_apply()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.warning(f"CDN refresh failed: {e}")
|
|
89
|
+
|
|
90
|
+
def check_image_compatibility(self, provider: str, model: str = None):
|
|
91
|
+
"""Check if a provider/model supports image uploads."""
|
|
92
|
+
from core.capabilities import capability_manager
|
|
93
|
+
return capability_manager.check_image_compatibility(provider, model)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class AsyncOpenAI:
|
|
97
|
+
"""Async drop-in replacement for openai.AsyncOpenAI.
|
|
98
|
+
|
|
99
|
+
Usage:
|
|
100
|
+
from ai_engine import AsyncOpenAI
|
|
101
|
+
|
|
102
|
+
async def main():
|
|
103
|
+
client = AsyncOpenAI()
|
|
104
|
+
response = await client.chat.completions.create(
|
|
105
|
+
model="gpt-4",
|
|
106
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
107
|
+
)
|
|
108
|
+
print(response.choices[0].message.content)
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, **kwargs):
|
|
112
|
+
self._sync_client = OpenAI(**kwargs)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def chat(self):
|
|
116
|
+
return self._sync_client.chat
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def models(self):
|
|
120
|
+
return self._sync_client.models
|
ai_engine/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Chat completions resource — wraps AI_engine.chat_completion()."""
|
|
2
|
+
import time
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import List, Dict, Any, Optional, Iterator, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Completions:
|
|
8
|
+
"""Chat.Completions resource — client.chat.completions.create(...)"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, engine):
|
|
11
|
+
self._engine = engine
|
|
12
|
+
|
|
13
|
+
def create(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
model: str = "auto",
|
|
17
|
+
messages: List[Dict[str, str]],
|
|
18
|
+
stream: bool = False,
|
|
19
|
+
temperature: Optional[float] = None,
|
|
20
|
+
max_tokens: Optional[int] = None,
|
|
21
|
+
top_p: Optional[float] = None,
|
|
22
|
+
stop: Optional[List[str]] = None,
|
|
23
|
+
n: int = 1,
|
|
24
|
+
user: Optional[str] = None,
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
"""Create a chat completion.
|
|
28
|
+
|
|
29
|
+
Returns ChatCompletion for non-streaming, or yields ChatCompletionChunk for streaming.
|
|
30
|
+
"""
|
|
31
|
+
if stream:
|
|
32
|
+
return self._stream(model, messages, temperature=temperature,
|
|
33
|
+
max_tokens=max_tokens, **kwargs)
|
|
34
|
+
|
|
35
|
+
from ..types import ChatCompletion, ChatCompletionChoice, ChatCompletionMessage, Usage
|
|
36
|
+
|
|
37
|
+
result = self._engine.chat_completion(
|
|
38
|
+
messages=messages,
|
|
39
|
+
model=model if model != "auto" else None,
|
|
40
|
+
preferred_provider=kwargs.get("preferred_provider"),
|
|
41
|
+
force_provider=kwargs.get("force_provider", False),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if not result or not getattr(result, "success", False):
|
|
45
|
+
from .._exceptions import raise_for_status, InternalServerError
|
|
46
|
+
error_msg = getattr(result, "error_message", "Unknown error") if result else "No response"
|
|
47
|
+
raise InternalServerError(message=error_msg)
|
|
48
|
+
|
|
49
|
+
# Build OpenAI-compatible response
|
|
50
|
+
prompt_tokens = sum(len(m.get("content", "").split()) for m in messages)
|
|
51
|
+
completion_tokens = len(result.content.split())
|
|
52
|
+
|
|
53
|
+
return ChatCompletion(
|
|
54
|
+
id=f"chatcmpl-{uuid.uuid4().hex[:24]}",
|
|
55
|
+
object="chat.completion",
|
|
56
|
+
created=int(time.time()),
|
|
57
|
+
model=result.model_used or model,
|
|
58
|
+
choices=[ChatCompletionChoice(
|
|
59
|
+
index=0,
|
|
60
|
+
message=ChatCompletionMessage(
|
|
61
|
+
role="assistant",
|
|
62
|
+
content=result.content,
|
|
63
|
+
),
|
|
64
|
+
finish_reason="stop",
|
|
65
|
+
)],
|
|
66
|
+
usage=Usage(
|
|
67
|
+
prompt_tokens=max(1, prompt_tokens),
|
|
68
|
+
completion_tokens=max(1, completion_tokens),
|
|
69
|
+
total_tokens=max(1, prompt_tokens + completion_tokens),
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def _stream(self, model, messages, temperature=None, max_tokens=None, **kwargs):
|
|
74
|
+
"""Yield ChatCompletionChunk objects for streaming."""
|
|
75
|
+
from ..types import ChatCompletionChunk, ChatCompletionChunkChoice, ChatCompletionChunkDelta
|
|
76
|
+
|
|
77
|
+
completion_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
78
|
+
created = int(time.time())
|
|
79
|
+
|
|
80
|
+
result = self._engine.chat_completion(
|
|
81
|
+
messages=messages,
|
|
82
|
+
model=model if model != "auto" else None,
|
|
83
|
+
preferred_provider=kwargs.get("preferred_provider"),
|
|
84
|
+
force_provider=kwargs.get("force_provider", False),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if not result or not getattr(result, "success", False):
|
|
88
|
+
from .._exceptions import InternalServerError
|
|
89
|
+
error_msg = getattr(result, "error_message", "Unknown error") if result else "No response"
|
|
90
|
+
raise InternalServerError(message=error_msg)
|
|
91
|
+
|
|
92
|
+
actual_model = result.model_used or model
|
|
93
|
+
content = getattr(result, "content", "")
|
|
94
|
+
|
|
95
|
+
# First chunk: role
|
|
96
|
+
yield ChatCompletionChunk(
|
|
97
|
+
id=completion_id,
|
|
98
|
+
object="chat.completion.chunk",
|
|
99
|
+
created=created,
|
|
100
|
+
model=actual_model,
|
|
101
|
+
choices=[ChatCompletionChunkChoice(
|
|
102
|
+
index=0,
|
|
103
|
+
delta=ChatCompletionChunkDelta(role="assistant", content=""),
|
|
104
|
+
finish_reason=None,
|
|
105
|
+
)],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Content chunks (word by word)
|
|
109
|
+
words = content.split(" ")
|
|
110
|
+
for i, word in enumerate(words):
|
|
111
|
+
chunk_content = (" " if i > 0 else "") + word + (" " if i < len(words) - 1 else "")
|
|
112
|
+
yield ChatCompletionChunk(
|
|
113
|
+
id=completion_id,
|
|
114
|
+
object="chat.completion.chunk",
|
|
115
|
+
created=created,
|
|
116
|
+
model=actual_model,
|
|
117
|
+
choices=[ChatCompletionChunkChoice(
|
|
118
|
+
index=0,
|
|
119
|
+
delta=ChatCompletionChunkDelta(content=chunk_content),
|
|
120
|
+
finish_reason=None,
|
|
121
|
+
)],
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Final chunk
|
|
125
|
+
yield ChatCompletionChunk(
|
|
126
|
+
id=completion_id,
|
|
127
|
+
object="chat.completion.chunk",
|
|
128
|
+
created=created,
|
|
129
|
+
model=actual_model,
|
|
130
|
+
choices=[ChatCompletionChunkChoice(
|
|
131
|
+
index=0,
|
|
132
|
+
delta=ChatCompletionChunkDelta(),
|
|
133
|
+
finish_reason="stop",
|
|
134
|
+
)],
|
|
135
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Models resource — wraps AI_engine model discovery."""
|
|
2
|
+
import time
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Models:
|
|
7
|
+
"""Models resource — client.models.list(), client.models.retrieve()"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, engine):
|
|
10
|
+
self._engine = engine
|
|
11
|
+
|
|
12
|
+
def list(self, **kwargs):
|
|
13
|
+
"""List all available models."""
|
|
14
|
+
from ..types import ModelList, Model
|
|
15
|
+
|
|
16
|
+
# Use shared model cache for fast listing
|
|
17
|
+
try:
|
|
18
|
+
from core.model_cache import shared_model_cache
|
|
19
|
+
if shared_model_cache.is_cache_valid():
|
|
20
|
+
model_ids = shared_model_cache.get_models()
|
|
21
|
+
else:
|
|
22
|
+
model_ids = []
|
|
23
|
+
except ImportError:
|
|
24
|
+
model_ids = []
|
|
25
|
+
|
|
26
|
+
models = []
|
|
27
|
+
for model_id in model_ids:
|
|
28
|
+
parts = model_id.split("/", 1)
|
|
29
|
+
owned_by = parts[0] if len(parts) > 1 else "unknown"
|
|
30
|
+
models.append(Model(
|
|
31
|
+
id=model_id,
|
|
32
|
+
object="model",
|
|
33
|
+
created=int(time.time()),
|
|
34
|
+
owned_by=owned_by,
|
|
35
|
+
))
|
|
36
|
+
|
|
37
|
+
return ModelList(object="list", data=models)
|
|
38
|
+
|
|
39
|
+
def retrieve(self, model: str, **kwargs):
|
|
40
|
+
"""Retrieve a single model by ID."""
|
|
41
|
+
from ..types import Model
|
|
42
|
+
|
|
43
|
+
return Model(
|
|
44
|
+
id=model,
|
|
45
|
+
object="model",
|
|
46
|
+
created=int(time.time()),
|
|
47
|
+
owned_by=model.split("/")[0] if "/" in model else "unknown",
|
|
48
|
+
)
|