neuralbridge-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuralbridge_sdk-0.1.0/LICENSE +21 -0
- neuralbridge_sdk-0.1.0/PKG-INFO +63 -0
- neuralbridge_sdk-0.1.0/README.md +40 -0
- neuralbridge_sdk-0.1.0/neuralbridge/__init__.py +7 -0
- neuralbridge_sdk-0.1.0/neuralbridge/adapters/__init__.py +4 -0
- neuralbridge_sdk-0.1.0/neuralbridge/adapters/openai_adapter.py +135 -0
- neuralbridge_sdk-0.1.0/neuralbridge/core/__init__.py +8 -0
- neuralbridge_sdk-0.1.0/neuralbridge/core/engine.py +362 -0
- neuralbridge_sdk-0.1.0/neuralbridge/core/strategies.py +34 -0
- neuralbridge_sdk-0.1.0/neuralbridge/core/types.py +171 -0
- neuralbridge_sdk-0.1.0/neuralbridge_sdk.egg-info/PKG-INFO +63 -0
- neuralbridge_sdk-0.1.0/neuralbridge_sdk.egg-info/SOURCES.txt +15 -0
- neuralbridge_sdk-0.1.0/neuralbridge_sdk.egg-info/dependency_links.txt +1 -0
- neuralbridge_sdk-0.1.0/neuralbridge_sdk.egg-info/requires.txt +2 -0
- neuralbridge_sdk-0.1.0/neuralbridge_sdk.egg-info/top_level.txt +1 -0
- neuralbridge_sdk-0.1.0/pyproject.toml +29 -0
- neuralbridge_sdk-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 NeuralBridge Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: neuralbridge-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Self-healing decision engine for AI API calls - diagnose, strategize, recover automatically
|
|
5
|
+
Author-email: NeuralBridge Team <team@neuralbridge.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ai,api,self-healing,recovery,openai,llm,fallback
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: openai>=1.0.0
|
|
21
|
+
Requires-Dist: requests>=2.28.0
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# NeuralBridge SDK
|
|
25
|
+
|
|
26
|
+
Self-healing decision engine for AI API calls. Drop-in replacement for `openai.OpenAI` with automatic fault diagnosis, cascade recovery, and transparent model switching.
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install neuralbridge
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from neuralbridge import NeuralBridge
|
|
38
|
+
|
|
39
|
+
client = NeuralBridge(
|
|
40
|
+
api_key="sk-xxx",
|
|
41
|
+
primary_model="gpt-4o",
|
|
42
|
+
fallback_models=["gpt-4o-mini"]
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
response = client.chat.completions.create(
|
|
46
|
+
model="gpt-4o",
|
|
47
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
48
|
+
)
|
|
49
|
+
print(response.choices[0].message.content)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## 4-Layer Cascade Recovery
|
|
53
|
+
|
|
54
|
+
| Layer | Strategy | When |
|
|
55
|
+
|-------|----------|------|
|
|
56
|
+
| LAYER 0 | Predictive model switch | Same fault pattern seen 3+ times |
|
|
57
|
+
| LAYER 1 | Smart retry + immediate model switch | Timeout, rate limit |
|
|
58
|
+
| LAYER 2 | Fault-model mapping switch | Model not found, internal error |
|
|
59
|
+
| LAYER 3 | Request degradation | Context too long, content blocked |
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# NeuralBridge SDK
|
|
2
|
+
|
|
3
|
+
Self-healing decision engine for AI API calls. Drop-in replacement for `openai.OpenAI` with automatic fault diagnosis, cascade recovery, and transparent model switching.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install neuralbridge
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from neuralbridge import NeuralBridge
|
|
15
|
+
|
|
16
|
+
client = NeuralBridge(
|
|
17
|
+
api_key="sk-xxx",
|
|
18
|
+
primary_model="gpt-4o",
|
|
19
|
+
fallback_models=["gpt-4o-mini"]
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
response = client.chat.completions.create(
|
|
23
|
+
model="gpt-4o",
|
|
24
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
25
|
+
)
|
|
26
|
+
print(response.choices[0].message.content)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## 4-Layer Cascade Recovery
|
|
30
|
+
|
|
31
|
+
| Layer | Strategy | When |
|
|
32
|
+
|-------|----------|------|
|
|
33
|
+
| LAYER 0 | Predictive model switch | Same fault pattern seen 3+ times |
|
|
34
|
+
| LAYER 1 | Smart retry + immediate model switch | Timeout, rate limit |
|
|
35
|
+
| LAYER 2 | Fault-model mapping switch | Model not found, internal error |
|
|
36
|
+
| LAYER 3 | Request degradation | Context too long, content blocked |
|
|
37
|
+
|
|
38
|
+
## License
|
|
39
|
+
|
|
40
|
+
MIT
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""NeuralBridge - Self-healing decision engine for AI API calls."""
|
|
2
|
+
from .adapters.openai_adapter import NeuralBridge
|
|
3
|
+
from .core.engine import FlywheelEngine
|
|
4
|
+
from .core.types import RecoveryRecord, ModelPerformance, CascadeLevel
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
__all__ = ["NeuralBridge", "FlywheelEngine", "RecoveryRecord", "ModelPerformance", "CascadeLevel"]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""NeuralBridge OpenAI Adapter - Drop-in replacement for openai.OpenAI"""
|
|
2
|
+
import time
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
|
|
5
|
+
from ..core.engine import FlywheelEngine
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ChatCompletions:
|
|
9
|
+
"""Chat completions interface compatible with OpenAI API."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, engine: FlywheelEngine):
|
|
12
|
+
self.engine = engine
|
|
13
|
+
|
|
14
|
+
def create(self, model: str = None, messages: List[Dict[str, str]] = None, **kwargs):
|
|
15
|
+
"""Create chat completion with automatic fault recovery."""
|
|
16
|
+
if messages is None:
|
|
17
|
+
raise ValueError("messages parameter is required")
|
|
18
|
+
|
|
19
|
+
model = model or self.engine.primary_model
|
|
20
|
+
|
|
21
|
+
# First attempt with original model
|
|
22
|
+
response, latency = self.engine._execute_request(messages, model, **kwargs)
|
|
23
|
+
|
|
24
|
+
if self.engine._check_success(response):
|
|
25
|
+
self.engine.record_model_result(model, True, latency)
|
|
26
|
+
return self._format_response(response["response"])
|
|
27
|
+
|
|
28
|
+
# Cascade recovery
|
|
29
|
+
self.engine.record_model_result(model, False, latency)
|
|
30
|
+
recovered_response, record = self.engine.cascade_recover(
|
|
31
|
+
initial_error=response,
|
|
32
|
+
messages=messages,
|
|
33
|
+
original_model=model,
|
|
34
|
+
**kwargs
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if record.recovered:
|
|
38
|
+
return self._format_response(recovered_response["response"])
|
|
39
|
+
else:
|
|
40
|
+
raise Exception(f"Recovery failed after {record.total_attempts} attempts. "
|
|
41
|
+
f"Fault: {record.fault_type}, Strategy: {record.strategy_used}")
|
|
42
|
+
|
|
43
|
+
def _format_response(self, response_data: Dict) -> Any:
|
|
44
|
+
"""Format response to match OpenAI API structure."""
|
|
45
|
+
class Choice:
|
|
46
|
+
def __init__(self, index, message, finish_reason):
|
|
47
|
+
self.index = index
|
|
48
|
+
self.message = message
|
|
49
|
+
self.finish_reason = finish_reason
|
|
50
|
+
|
|
51
|
+
class Message:
|
|
52
|
+
def __init__(self, role, content):
|
|
53
|
+
self.role = role
|
|
54
|
+
self.content = content
|
|
55
|
+
|
|
56
|
+
class Usage:
|
|
57
|
+
def __init__(self, prompt_tokens, completion_tokens, total_tokens):
|
|
58
|
+
self.prompt_tokens = prompt_tokens
|
|
59
|
+
self.completion_tokens = completion_tokens
|
|
60
|
+
self.total_tokens = total_tokens
|
|
61
|
+
|
|
62
|
+
class ChatCompletion:
|
|
63
|
+
def __init__(self, data):
|
|
64
|
+
self.id = data.get("id", "")
|
|
65
|
+
self.object = "chat.completion"
|
|
66
|
+
self.created = int(time.time())
|
|
67
|
+
self.model = data.get("model", "")
|
|
68
|
+
choices_data = data.get("choices", [])
|
|
69
|
+
self.choices = []
|
|
70
|
+
for c in choices_data:
|
|
71
|
+
msg = c.get("message", {})
|
|
72
|
+
message_obj = Message(msg.get("role", "assistant"), msg.get("content", ""))
|
|
73
|
+
choice_obj = Choice(c.get("index", 0), message_obj, c.get("finish_reason", "stop"))
|
|
74
|
+
self.choices.append(choice_obj)
|
|
75
|
+
usage_data = data.get("usage", {})
|
|
76
|
+
self.usage = Usage(
|
|
77
|
+
usage_data.get("prompt_tokens", 0),
|
|
78
|
+
usage_data.get("completion_tokens", 0),
|
|
79
|
+
usage_data.get("total_tokens", 0)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return ChatCompletion(response_data)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class NeuralBridge:
|
|
86
|
+
"""
|
|
87
|
+
Self-healing decision engine for AI API calls.
|
|
88
|
+
Drop-in replacement for openai.OpenAI with automatic fault diagnosis and cascade recovery.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, api_key: str, base_url: str = "https://api.openai.com/v1",
|
|
92
|
+
primary_model: str = "gpt-4o", fallback_models: Optional[List[str]] = None):
|
|
93
|
+
"""
|
|
94
|
+
Initialize NeuralBridge client.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
api_key: API key for authentication
|
|
98
|
+
base_url: Base URL for API endpoint
|
|
99
|
+
primary_model: Primary model to use (default: gpt-4o)
|
|
100
|
+
fallback_models: List of fallback models (default: ["gpt-4o-mini"])
|
|
101
|
+
"""
|
|
102
|
+
self.engine = FlywheelEngine(
|
|
103
|
+
api_key=api_key,
|
|
104
|
+
base_url=base_url,
|
|
105
|
+
primary_model=primary_model,
|
|
106
|
+
fallback_models=fallback_models
|
|
107
|
+
)
|
|
108
|
+
self.chat = type('Chat', (), {'completions': ChatCompletions(self.engine)})()
|
|
109
|
+
|
|
110
|
+
def get_recovery_stats(self) -> Dict[str, Any]:
|
|
111
|
+
"""Get statistics about recovery operations."""
|
|
112
|
+
return {
|
|
113
|
+
"total_recovery_attempts": len(self.engine.recovery_records),
|
|
114
|
+
"successful_recoveries": sum(1 for r in self.engine.recovery_records if r.recovered),
|
|
115
|
+
"diagnosis_stats": dict(self.engine.diagnosis_stats),
|
|
116
|
+
"model_performance": {
|
|
117
|
+
model: {
|
|
118
|
+
"success_rate": perf.success_rate,
|
|
119
|
+
"avg_latency_ms": perf.avg_latency_ms,
|
|
120
|
+
"total_calls": perf.success_count + perf.failure_count
|
|
121
|
+
}
|
|
122
|
+
for model, perf in self.engine.model_performance.items()
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def clear_history(self):
|
|
127
|
+
"""Clear recovery history and reset performance metrics."""
|
|
128
|
+
self.engine.recovery_records.clear()
|
|
129
|
+
self.engine.diagnosis_stats.clear()
|
|
130
|
+
for perf in self.engine.model_performance.values():
|
|
131
|
+
perf.success_count = 0
|
|
132
|
+
perf.failure_count = 0
|
|
133
|
+
perf.total_latency_ms = 0.0
|
|
134
|
+
perf.timeout_count = 0
|
|
135
|
+
perf.rate_limit_count = 0
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""NeuralBridge Core Package"""
|
|
2
|
+
from .engine import FlywheelEngine
|
|
3
|
+
from .types import (FaultCategory, FaultSeverity, CascadeLevel, ModelPerformance,
|
|
4
|
+
RecoveryRecord, AdaptiveTimeout, DynamicBackoff, FaultPredictor, RequestProfile)
|
|
5
|
+
|
|
6
|
+
__all__ = ["FlywheelEngine", "FaultCategory", "FaultSeverity", "CascadeLevel",
|
|
7
|
+
"ModelPerformance", "RecoveryRecord", "AdaptiveTimeout",
|
|
8
|
+
"DynamicBackoff", "FaultPredictor", "RequestProfile"]
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""NeuralBridge Core - Self-healing Engine (V8.2)"""
|
|
2
|
+
import hashlib
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
import requests
|
|
7
|
+
from collections import defaultdict, deque
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Dict, List, Optional, Tuple, Any
|
|
10
|
+
|
|
11
|
+
from .types import (ModelPerformance, RecoveryRecord, AdaptiveTimeout, DynamicBackoff, FaultPredictor, RequestProfile)
|
|
12
|
+
from .strategies import CASCADE_STRATEGIES, DEFAULT_CASCADE, FAULT_MODEL_MAPPING
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FlywheelEngine:
|
|
16
|
+
"""NeuralBridge self-healing decision engine - 4-layer cascade recovery."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str, base_url: str = "https://api.openai.com/v1",
|
|
19
|
+
models: Optional[List[str]] = None, primary_model: str = "gpt-4o",
|
|
20
|
+
fallback_models: Optional[List[str]] = None):
|
|
21
|
+
self.api_key = api_key
|
|
22
|
+
self.base_url = base_url.rstrip("/")
|
|
23
|
+
self.primary_model = primary_model
|
|
24
|
+
self.fallback_models = fallback_models or ["gpt-4o-mini"]
|
|
25
|
+
self.all_models = [primary_model] + self.fallback_models
|
|
26
|
+
self.model_performance: Dict[str, ModelPerformance] = {m: ModelPerformance(model_name=m) for m in self.all_models}
|
|
27
|
+
self.response_time_history: deque = deque(maxlen=50)
|
|
28
|
+
self.dynamic_backoff = DynamicBackoff()
|
|
29
|
+
self.fault_predictor = FaultPredictor()
|
|
30
|
+
self.adaptive_timeout = AdaptiveTimeout()
|
|
31
|
+
self.recovery_records: List[RecoveryRecord] = []
|
|
32
|
+
self.lock = threading.Lock()
|
|
33
|
+
self.diagnosis_stats: Dict[str, int] = defaultdict(int)
|
|
34
|
+
self._timeout_consecutive_failures = 0
|
|
35
|
+
|
|
36
|
+
def diagnose(self, error_msg=None, status_code=None, response_data=None):
|
|
37
|
+
fault_type, confidence = "internal_error", 0.5
|
|
38
|
+
|
|
39
|
+
if error_msg:
|
|
40
|
+
el = str(error_msg).lower()
|
|
41
|
+
if any(p in el for p in ["timeout", "timed out", "timed_out", "connection timeout"]):
|
|
42
|
+
fault_type, confidence = "connection_timeout", 0.95
|
|
43
|
+
elif any(p in el for p in ["model", "invalid", "not found", "does not exist"]):
|
|
44
|
+
fault_type, confidence = "model_not_found", 0.90
|
|
45
|
+
elif any(p in el for p in ["rate limit", "rate_limit", "429", "too many requests"]):
|
|
46
|
+
fault_type, confidence = "rate_limit_exceeded", 0.95
|
|
47
|
+
elif any(p in el for p in ["context", "length", "too long", "token limit", "exceeded"]):
|
|
48
|
+
fault_type, confidence = "context_length_exceeded", 0.85
|
|
49
|
+
elif any(p in el for p in ["overload", "503", "unavailable"]):
|
|
50
|
+
fault_type, confidence = "service_overloaded", 0.90
|
|
51
|
+
elif any(p in el for p in ["safety", "blocked", "content", "filter"]):
|
|
52
|
+
fault_type, confidence = "content_safety_blocked", 0.80
|
|
53
|
+
elif any(p in el for p in ["401", "unauthorized", "api key", "invalid key"]):
|
|
54
|
+
fault_type, confidence = "invalid_api_key", 0.95
|
|
55
|
+
elif any(p in el for p in ["500", "internal"]):
|
|
56
|
+
fault_type, confidence = "internal_error", 0.85
|
|
57
|
+
|
|
58
|
+
if status_code:
|
|
59
|
+
sc_map = {408: ("connection_timeout", 0.90), 429: ("rate_limit_exceeded", 0.95),
|
|
60
|
+
503: ("service_overloaded", 0.90), 401: ("invalid_api_key", 0.95)}
|
|
61
|
+
if status_code in sc_map:
|
|
62
|
+
ft, c = sc_map[status_code]
|
|
63
|
+
fault_type, confidence = ft, max(confidence, c)
|
|
64
|
+
elif status_code == 400:
|
|
65
|
+
if "model" in str(error_msg or "").lower():
|
|
66
|
+
fault_type, confidence = "model_not_found", max(confidence, 0.85)
|
|
67
|
+
elif "context" in str(error_msg or "").lower():
|
|
68
|
+
fault_type, confidence = "context_length_exceeded", max(confidence, 0.85)
|
|
69
|
+
|
|
70
|
+
self.diagnosis_stats[fault_type] += 1
|
|
71
|
+
return fault_type, confidence
|
|
72
|
+
|
|
73
|
+
def select_best_model(self, fault_type=None, prefer_speed=False, prefer_reliability=False):
|
|
74
|
+
if fault_type and fault_type in FAULT_MODEL_MAPPING:
|
|
75
|
+
return self._select_by_strategy(FAULT_MODEL_MAPPING[fault_type]["select_strategy"])
|
|
76
|
+
|
|
77
|
+
best_model, best_score = self.primary_model, -1
|
|
78
|
+
for model in self.all_models:
|
|
79
|
+
perf = self.model_performance.get(model, ModelPerformance(model_name=model))
|
|
80
|
+
if prefer_speed:
|
|
81
|
+
score = max(0, 1 - perf.avg_latency_ms / 10000) * 0.7 + perf.success_rate * 0.3
|
|
82
|
+
elif prefer_reliability:
|
|
83
|
+
score = perf.success_rate * 0.7 + max(0, 1 - perf.avg_latency_ms / 10000) * 0.3
|
|
84
|
+
else:
|
|
85
|
+
score = perf.get_score()
|
|
86
|
+
|
|
87
|
+
if fault_type == "connection_timeout":
|
|
88
|
+
score *= 1 - (perf.timeout_count / max(1, perf.success_count + perf.timeout_count)) * 0.5
|
|
89
|
+
|
|
90
|
+
if score > best_score:
|
|
91
|
+
best_score, best_model = score, model
|
|
92
|
+
|
|
93
|
+
return best_model
|
|
94
|
+
|
|
95
|
+
def _select_by_strategy(self, strategy):
|
|
96
|
+
if strategy == "speed_priority":
|
|
97
|
+
return min(self.all_models, key=lambda m: self.model_performance.get(m, ModelPerformance(model_name=m)).avg_latency_ms)
|
|
98
|
+
if strategy == "load_balance":
|
|
99
|
+
return self.fallback_models[0] if self.primary_model == self.all_models[0] else self.primary_model
|
|
100
|
+
if strategy == "reliability_priority":
|
|
101
|
+
return max(self.all_models, key=lambda m: self.model_performance.get(m, ModelPerformance(model_name=m)).success_rate)
|
|
102
|
+
return self.primary_model
|
|
103
|
+
|
|
104
|
+
def generate_fault_fingerprint(self, fault_type, error_msg, model):
|
|
105
|
+
pattern_hash = hashlib.md5(str(error_msg)[:50].encode()).hexdigest()[:8]
|
|
106
|
+
timestamp = datetime.now().strftime("%Y%m%d%H")
|
|
107
|
+
return hashlib.sha256(f"{fault_type}:{pattern_hash}:{model}:{timestamp}".encode()).hexdigest()[:16]
|
|
108
|
+
|
|
109
|
+
def analyze_request(self, messages, kwargs):
|
|
110
|
+
total_tokens = sum(len(str(m.get("content", "")).split()) * 1.3 for m in messages)
|
|
111
|
+
complexity = "high" if total_tokens > 2000 or kwargs.get("max_tokens", 2048) > 2000 else ("medium" if total_tokens > 500 else "low")
|
|
112
|
+
return RequestProfile(
|
|
113
|
+
complexity=complexity,
|
|
114
|
+
estimated_tokens=int(total_tokens),
|
|
115
|
+
streaming=kwargs.get("stream", False),
|
|
116
|
+
has_system_prompt=any(m.get("role") == "system" for m in messages),
|
|
117
|
+
max_tokens_requested=kwargs.get("max_tokens", 2048),
|
|
118
|
+
temperature=kwargs.get("temperature", 0.7)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def degrade_request(self, messages, fault_type, profile=None):
|
|
122
|
+
info = {"action": "no_degradation", "changes": []}
|
|
123
|
+
|
|
124
|
+
if fault_type == "context_length_exceeded":
|
|
125
|
+
preserved = [m for m in messages if m.get("role") in ("system", "user")]
|
|
126
|
+
other = [m for m in messages if m.get("role") not in ("system", "user")]
|
|
127
|
+
return preserved + other[-2:], {"action": "smart_truncate"}
|
|
128
|
+
|
|
129
|
+
if fault_type == "content_safety_blocked":
|
|
130
|
+
triggers = ["暴力", "色情", "政治", "敏感", "违法", "hack", "porn", "violent"]
|
|
131
|
+
degraded = []
|
|
132
|
+
for m in messages:
|
|
133
|
+
content = m.get("content", "")
|
|
134
|
+
for w in triggers:
|
|
135
|
+
content = re.sub(w, "[redacted]", content, flags=re.IGNORECASE)
|
|
136
|
+
degraded.append({"role": m.get("role"), "content": content})
|
|
137
|
+
return degraded, {"action": "prompt_rewrite"}
|
|
138
|
+
|
|
139
|
+
return messages, info
|
|
140
|
+
|
|
141
|
+
def _execute_request(self, messages, model=None, timeout=60, **kwargs):
|
|
142
|
+
model = model or self.primary_model
|
|
143
|
+
start = time.time()
|
|
144
|
+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
145
|
+
payload = {"model": model, "messages": messages}
|
|
146
|
+
for key in ("max_tokens", "temperature", "top_p", "stream"):
|
|
147
|
+
if key in kwargs:
|
|
148
|
+
payload[key] = kwargs[key]
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
resp = requests.post(f"{self.base_url}/chat/completions", headers=headers, json=payload, timeout=timeout)
|
|
152
|
+
latency_ms = (time.time() - start) * 1000
|
|
153
|
+
self.response_time_history.append(latency_ms)
|
|
154
|
+
|
|
155
|
+
if resp.status_code == 200:
|
|
156
|
+
return {"success": True, "response": resp.json(), "status_code": 200}, latency_ms
|
|
157
|
+
|
|
158
|
+
err = resp.json() if resp.content else {}
|
|
159
|
+
return {"success": False, "error": err.get("error", {}).get("message", f"HTTP {resp.status_code}"),
|
|
160
|
+
"error_code": err.get("error", {}).get("code"), "status_code": resp.status_code}, latency_ms
|
|
161
|
+
|
|
162
|
+
except requests.exceptions.Timeout:
|
|
163
|
+
latency_ms = (time.time() - start) * 1000
|
|
164
|
+
self.dynamic_backoff.record_result(False, latency_ms, is_timeout=True)
|
|
165
|
+
return {"success": False, "error": "Request timeout", "error_code": "timeout"}, latency_ms
|
|
166
|
+
|
|
167
|
+
except Exception as e:
|
|
168
|
+
return {"success": False, "error": str(e)}, (time.time() - start) * 1000
|
|
169
|
+
|
|
170
|
+
def _check_success(self, response):
|
|
171
|
+
if not response or not isinstance(response, dict):
|
|
172
|
+
return False
|
|
173
|
+
if response.get("success") is True:
|
|
174
|
+
return True
|
|
175
|
+
rd = response.get("response", {})
|
|
176
|
+
return isinstance(rd, dict) and bool(rd.get("choices"))
|
|
177
|
+
|
|
178
|
+
def record_model_result(self, model, success, latency_ms, is_timeout=False, is_rate_limit=False):
|
|
179
|
+
with self.lock:
|
|
180
|
+
if model not in self.model_performance:
|
|
181
|
+
self.model_performance[model] = ModelPerformance(model_name=model)
|
|
182
|
+
perf = self.model_performance[model]
|
|
183
|
+
if success:
|
|
184
|
+
perf.record_success(latency_ms)
|
|
185
|
+
self._timeout_consecutive_failures = 0
|
|
186
|
+
else:
|
|
187
|
+
perf.record_failure(is_timeout, is_rate_limit)
|
|
188
|
+
if is_timeout:
|
|
189
|
+
self._timeout_consecutive_failures += 1
|
|
190
|
+
|
|
191
|
+
def cascade_recover(self, initial_error, messages, original_model=None, **kwargs):
|
|
192
|
+
start = time.time()
|
|
193
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
|
|
194
|
+
model = original_model or self.primary_model
|
|
195
|
+
|
|
196
|
+
fault_type, confidence = self.diagnose(
|
|
197
|
+
error_msg=initial_error.get("error"),
|
|
198
|
+
status_code=initial_error.get("status_code")
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
if fault_type == "connection_timeout":
|
|
202
|
+
model = self.fallback_models[0] if model == self.primary_model else self.primary_model
|
|
203
|
+
|
|
204
|
+
fingerprint = self.generate_fault_fingerprint(fault_type, initial_error.get("error", ""), model)
|
|
205
|
+
need_predictive = self.fault_predictor.record_fault(fingerprint)
|
|
206
|
+
profile = self.analyze_request(messages, kwargs)
|
|
207
|
+
|
|
208
|
+
current_messages, current_model, current_kwargs = messages, model, kwargs.copy()
|
|
209
|
+
cascade_level, strategy_used, strategy_params = 0, "", {}
|
|
210
|
+
recovery_response, recovery_latency_ms = None, 0.0
|
|
211
|
+
total_attempts = 0
|
|
212
|
+
|
|
213
|
+
for strategy in CASCADE_STRATEGIES.get(fault_type, DEFAULT_CASCADE):
|
|
214
|
+
cascade_level = strategy["level"]
|
|
215
|
+
strategy_used = strategy.get("name", strategy["action"])
|
|
216
|
+
strategy_params = strategy.copy()
|
|
217
|
+
total_attempts += 1
|
|
218
|
+
|
|
219
|
+
action = strategy["action"]
|
|
220
|
+
timeout_override = strategy.get("timeout_override", 60)
|
|
221
|
+
|
|
222
|
+
if action == "predictive_switch":
|
|
223
|
+
if need_predictive:
|
|
224
|
+
new_model = self.select_best_model(fault_type)
|
|
225
|
+
if new_model != current_model:
|
|
226
|
+
current_model = new_model
|
|
227
|
+
strategy_params["switched_to"] = new_model
|
|
228
|
+
current_kwargs["timeout"] = timeout_override
|
|
229
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
230
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
231
|
+
if self._check_success(response):
|
|
232
|
+
self.record_model_result(current_model, True, latency)
|
|
233
|
+
break
|
|
234
|
+
need_predictive = False
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
if action == "immediate_model_switch":
|
|
238
|
+
new_model = self.fallback_models[0] if current_model == self.primary_model else self.primary_model
|
|
239
|
+
if new_model != current_model:
|
|
240
|
+
current_model = new_model
|
|
241
|
+
strategy_params["switched_to"] = new_model
|
|
242
|
+
current_kwargs["timeout"] = timeout_override
|
|
243
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
244
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
245
|
+
if self._check_success(response):
|
|
246
|
+
self.record_model_result(current_model, True, latency)
|
|
247
|
+
break
|
|
248
|
+
self.record_model_result(current_model, False, latency, is_timeout="timeout" in str(response).lower())
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
if action == "dynamic_backoff_switch_retry":
|
|
252
|
+
new_model = self.fallback_models[0] if current_model == self.primary_model else self.primary_model
|
|
253
|
+
if new_model != current_model:
|
|
254
|
+
current_model = new_model
|
|
255
|
+
strategy_params["switched_to"] = new_model
|
|
256
|
+
time.sleep(strategy.get("wait_before_retry", 3))
|
|
257
|
+
current_kwargs["timeout"] = timeout_override
|
|
258
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
259
|
+
is_timeout = "timeout" in str(response).lower()
|
|
260
|
+
self.dynamic_backoff.record_result(self._check_success(response), latency, is_timeout)
|
|
261
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
262
|
+
if self._check_success(response):
|
|
263
|
+
self.record_model_result(current_model, True, latency)
|
|
264
|
+
break
|
|
265
|
+
self.record_model_result(current_model, False, latency, is_timeout=is_timeout)
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
if action == "dynamic_backoff_retry":
|
|
269
|
+
time.sleep(self.dynamic_backoff.calculate_delay(fault_type))
|
|
270
|
+
current_kwargs["timeout"] = timeout_override
|
|
271
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
272
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
273
|
+
if self._check_success(response):
|
|
274
|
+
self.record_model_result(current_model, True, latency)
|
|
275
|
+
break
|
|
276
|
+
self.record_model_result(current_model, False, latency)
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
if action == "fault_model_mapping_switch":
|
|
280
|
+
new_model = self.select_best_model(fault_type)
|
|
281
|
+
if new_model != current_model:
|
|
282
|
+
current_model = new_model
|
|
283
|
+
strategy_params["switched_to"] = new_model
|
|
284
|
+
current_kwargs["timeout"] = timeout_override
|
|
285
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
286
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
287
|
+
if self._check_success(response):
|
|
288
|
+
self.record_model_result(current_model, True, latency)
|
|
289
|
+
break
|
|
290
|
+
self.record_model_result(current_model, False, latency)
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
if action == "smart_truncate":
|
|
294
|
+
degraded, deg = self.degrade_request(current_messages, fault_type, profile)
|
|
295
|
+
current_messages = degraded
|
|
296
|
+
strategy_params["degradation"] = deg
|
|
297
|
+
current_kwargs["timeout"] = timeout_override
|
|
298
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
299
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
300
|
+
if self._check_success(response):
|
|
301
|
+
self.record_model_result(current_model, True, latency)
|
|
302
|
+
break
|
|
303
|
+
self.record_model_result(current_model, False, latency)
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
if action in ("basic_retry", "extended_wait_retry"):
|
|
307
|
+
if action == "extended_wait_retry":
|
|
308
|
+
time.sleep(strategy.get("wait_seconds", 10))
|
|
309
|
+
current_kwargs["timeout"] = timeout_override
|
|
310
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
311
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
312
|
+
if self._check_success(response):
|
|
313
|
+
self.record_model_result(current_model, True, latency)
|
|
314
|
+
break
|
|
315
|
+
self.record_model_result(current_model, False, latency)
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
if action == "model_switch":
|
|
319
|
+
current_model = self.fallback_models[0] if current_model == self.primary_model else self.primary_model
|
|
320
|
+
strategy_params["switched_to"] = current_model
|
|
321
|
+
current_kwargs["timeout"] = timeout_override
|
|
322
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
323
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
324
|
+
if self._check_success(response):
|
|
325
|
+
self.record_model_result(current_model, True, latency)
|
|
326
|
+
break
|
|
327
|
+
self.record_model_result(current_model, False, latency)
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
if action == "request_degrade":
|
|
331
|
+
degraded, deg = self.degrade_request(current_messages, fault_type, profile)
|
|
332
|
+
current_messages = degraded
|
|
333
|
+
strategy_params["degradation"] = deg
|
|
334
|
+
current_kwargs["timeout"] = timeout_override
|
|
335
|
+
response, latency = self._execute_request(current_messages, current_model, **current_kwargs)
|
|
336
|
+
recovery_response, recovery_latency_ms = response, latency
|
|
337
|
+
if self._check_success(response):
|
|
338
|
+
self.record_model_result(current_model, True, latency)
|
|
339
|
+
break
|
|
340
|
+
self.record_model_result(current_model, False, latency)
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
total_time_ms = (time.time() - start) * 1000
|
|
344
|
+
recovered = self._check_success(recovery_response) if recovery_response else False
|
|
345
|
+
|
|
346
|
+
record = RecoveryRecord(
|
|
347
|
+
timestamp=timestamp,
|
|
348
|
+
fault_type=fault_type,
|
|
349
|
+
diagnosis_confidence=confidence,
|
|
350
|
+
first_error=initial_error.get("error", ""),
|
|
351
|
+
cascade_level=cascade_level,
|
|
352
|
+
strategy_used=strategy_used,
|
|
353
|
+
strategy_params=strategy_params,
|
|
354
|
+
recovered=recovered,
|
|
355
|
+
total_attempts=total_attempts,
|
|
356
|
+
total_time_ms=total_time_ms,
|
|
357
|
+
fault_fingerprint=fingerprint,
|
|
358
|
+
recovery_latency_ms=recovery_latency_ms
|
|
359
|
+
)
|
|
360
|
+
self.recovery_records.append(record)
|
|
361
|
+
|
|
362
|
+
return recovery_response, record
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""NeuralBridge Core - Recovery Strategies"""
|
|
2
|
+
from typing import Dict, List, Any
|
|
3
|
+
|
|
4
|
+
FAULT_MODEL_MAPPING: Dict[str, Dict[str, Any]] = {
|
|
5
|
+
"connection_timeout": {"select_strategy": "speed_priority"},
|
|
6
|
+
"rate_limit_exceeded": {"select_strategy": "load_balance"},
|
|
7
|
+
"context_length_exceeded": {"select_strategy": "context_window"},
|
|
8
|
+
"internal_error": {"select_strategy": "reliability_priority"},
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
CASCADE_STRATEGIES: Dict[str, List[Dict[str, Any]]] = {
|
|
12
|
+
"connection_timeout": [
|
|
13
|
+
{"level": 0, "name": "predictive_model_switch", "action": "predictive_switch", "trigger_on_fingerprint_count": 2},
|
|
14
|
+
{"level": 1, "name": "immediate_model_switch", "action": "immediate_model_switch", "timeout_override": 60},
|
|
15
|
+
{"level": 1, "name": "dynamic_backoff_switch_retry", "action": "dynamic_backoff_switch_retry", "timeout_override": 60, "wait_before_retry": 3},
|
|
16
|
+
{"level": 2, "name": "fault_model_mapping_switch", "action": "fault_model_mapping_switch", "select_by": "fault_type_mapping", "timeout_override": 60},
|
|
17
|
+
],
|
|
18
|
+
"rate_limit_exceeded": [
|
|
19
|
+
{"level": 0, "name": "predictive_load_balance", "action": "predictive_load_balance", "trigger_on_fingerprint_count": 1},
|
|
20
|
+
{"level": 1, "name": "dynamic_backoff_retry", "action": "dynamic_backoff_retry", "use_adaptive_backoff": True},
|
|
21
|
+
],
|
|
22
|
+
"service_overloaded": [{"level": 1, "name": "extended_wait_retry", "action": "extended_wait_retry", "wait_seconds": 10}],
|
|
23
|
+
"model_not_found": [{"level": 2, "name": "fault_model_mapping_switch", "action": "fault_model_mapping_switch", "select_by": "fault_type_mapping"}],
|
|
24
|
+
"internal_error": [{"level": 2, "name": "fault_model_mapping_switch", "action": "fault_model_mapping_switch", "select_by": "fault_type_mapping"}],
|
|
25
|
+
"context_length_exceeded": [{"level": 3, "name": "smart_truncate", "action": "smart_truncate", "preserve_ratio": 0.8, "preserve_system": True, "preserve_recent": True}],
|
|
26
|
+
"content_safety_blocked": [{"level": 3, "name": "rewrite_prompt", "action": "rewrite_prompt", "remove_triggers": True}],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
DEFAULT_CASCADE: List[Dict[str, Any]] = [
|
|
30
|
+
{"level": 0, "name": "predictive_switch", "action": "predictive_switch"},
|
|
31
|
+
{"level": 1, "name": "basic_retry", "action": "basic_retry"},
|
|
32
|
+
{"level": 2, "name": "model_switch", "action": "model_switch"},
|
|
33
|
+
{"level": 3, "name": "request_degrade", "action": "request_degrade"},
|
|
34
|
+
]
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""NeuralBridge Core - Types and Data Structures"""
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FaultCategory(Enum):
|
|
8
|
+
NETWORK = "network"
|
|
9
|
+
AUTH = "authentication"
|
|
10
|
+
REQUEST = "request"
|
|
11
|
+
RESPONSE = "response"
|
|
12
|
+
SYSTEM = "system"
|
|
13
|
+
BUSINESS = "business"
|
|
14
|
+
ROUTING = "routing"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FaultSeverity(Enum):
|
|
18
|
+
LOW = 1
|
|
19
|
+
MEDIUM = 2
|
|
20
|
+
HIGH = 3
|
|
21
|
+
CRITICAL = 4
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CascadeLevel(Enum):
|
|
25
|
+
LAYER_0_PREDICTIVE = 0
|
|
26
|
+
LAYER_1_SMART_RETRY = 1
|
|
27
|
+
LAYER_2_SMART_SWITCH = 2
|
|
28
|
+
LAYER_3_REQUEST_DEGRADE = 3
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ModelPerformance:
|
|
33
|
+
model_name: str
|
|
34
|
+
success_count: int = 0
|
|
35
|
+
failure_count: int = 0
|
|
36
|
+
total_latency_ms: float = 0.0
|
|
37
|
+
timeout_count: int = 0
|
|
38
|
+
last_used: float = 0.0
|
|
39
|
+
last_success: float = 0.0
|
|
40
|
+
rate_limit_count: int = 0
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def success_rate(self) -> float:
|
|
44
|
+
total = self.success_count + self.failure_count
|
|
45
|
+
return self.success_count / total if total > 0 else 0.5
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def avg_latency_ms(self) -> float:
|
|
49
|
+
total_calls = self.success_count + self.timeout_count
|
|
50
|
+
return self.total_latency_ms / total_calls if total_calls > 0 else 5000.0
|
|
51
|
+
|
|
52
|
+
def record_success(self, latency_ms: float):
|
|
53
|
+
self.success_count += 1
|
|
54
|
+
self.total_latency_ms += latency_ms
|
|
55
|
+
self.last_success = self.last_used = _now()
|
|
56
|
+
|
|
57
|
+
def record_failure(self, is_timeout: bool = False, is_rate_limit: bool = False):
|
|
58
|
+
self.failure_count += 1
|
|
59
|
+
self.last_used = _now()
|
|
60
|
+
if is_timeout:
|
|
61
|
+
self.timeout_count += 1
|
|
62
|
+
if is_rate_limit:
|
|
63
|
+
self.rate_limit_count += 1
|
|
64
|
+
|
|
65
|
+
def get_score(self) -> float:
|
|
66
|
+
return self.success_rate * 0.6 + max(0, 1 - (self.avg_latency_ms / 10000)) * 0.4
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class RecoveryRecord:
|
|
71
|
+
timestamp: str
|
|
72
|
+
fault_type: str
|
|
73
|
+
diagnosis_confidence: float
|
|
74
|
+
first_error: str
|
|
75
|
+
cascade_level: int
|
|
76
|
+
strategy_used: str
|
|
77
|
+
strategy_params: Dict
|
|
78
|
+
recovered: bool
|
|
79
|
+
total_attempts: int
|
|
80
|
+
total_time_ms: float
|
|
81
|
+
fault_fingerprint: str = ""
|
|
82
|
+
recovery_latency_ms: float = 0.0
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class AdaptiveTimeout:
|
|
87
|
+
base_timeout_ms: int = 30000
|
|
88
|
+
history_window: int = 20
|
|
89
|
+
multiplier: float = 1.5
|
|
90
|
+
min_timeout_ms: int = 5000
|
|
91
|
+
max_timeout_ms: int = 180000
|
|
92
|
+
|
|
93
|
+
def calculate(self, avg_response_time: Optional[float] = None) -> int:
|
|
94
|
+
if avg_response_time:
|
|
95
|
+
calculated = int(avg_response_time * self.multiplier)
|
|
96
|
+
else:
|
|
97
|
+
calculated = self.base_timeout_ms
|
|
98
|
+
return max(self.min_timeout_ms, min(self.max_timeout_ms, calculated))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class DynamicBackoff:
|
|
103
|
+
base_delay_s: float = 1.0
|
|
104
|
+
max_delay_s: float = 60.0
|
|
105
|
+
min_delay_s: float = 1.0
|
|
106
|
+
timeout_min_delay_s: float = 3.0
|
|
107
|
+
last_response_time_ms: float = 0.0
|
|
108
|
+
last_timeout_ms: float = 0.0
|
|
109
|
+
consecutive_timeouts: int = 0
|
|
110
|
+
|
|
111
|
+
def calculate_delay(self, fault_type: Optional[str] = None) -> float:
|
|
112
|
+
if self.last_timeout_ms > 0 or (fault_type and "timeout" in fault_type.lower()):
|
|
113
|
+
base_delay = max(3.0, min(self.last_timeout_ms / 1000 * 0.5, 15.0)) if self.last_timeout_ms > 0 else self.timeout_min_delay_s
|
|
114
|
+
if self.consecutive_timeouts > 0:
|
|
115
|
+
base_delay = min(base_delay * (1 + self.consecutive_timeouts * 0.2), self.max_delay_s)
|
|
116
|
+
return base_delay
|
|
117
|
+
|
|
118
|
+
if self.last_response_time_ms > 0:
|
|
119
|
+
if self.last_response_time_ms < 1000:
|
|
120
|
+
delay = max(self.min_delay_s, 1.0)
|
|
121
|
+
elif self.last_response_time_ms < 5000:
|
|
122
|
+
delay = 1.5
|
|
123
|
+
elif self.last_response_time_ms < 10000:
|
|
124
|
+
delay = 2.0
|
|
125
|
+
else:
|
|
126
|
+
delay = max(3.0, self.last_response_time_ms / 1000 * 0.3)
|
|
127
|
+
return min(delay, self.max_delay_s)
|
|
128
|
+
|
|
129
|
+
return self.base_delay_s
|
|
130
|
+
|
|
131
|
+
def record_result(self, success: bool, latency_ms: float, is_timeout: bool = False):
|
|
132
|
+
self.last_response_time_ms = latency_ms
|
|
133
|
+
if is_timeout:
|
|
134
|
+
self.last_timeout_ms = latency_ms
|
|
135
|
+
self.consecutive_timeouts += 1
|
|
136
|
+
else:
|
|
137
|
+
self.last_timeout_ms = 0.0
|
|
138
|
+
self.consecutive_timeouts = 0
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class FaultPredictor:
|
|
143
|
+
fingerprint_history: Dict[str, List[float]] = field(default_factory=dict)
|
|
144
|
+
prediction_threshold: int = 3
|
|
145
|
+
|
|
146
|
+
def record_fault(self, fingerprint: str) -> bool:
|
|
147
|
+
import time as _time
|
|
148
|
+
now = _time.time()
|
|
149
|
+
if fingerprint not in self.fingerprint_history:
|
|
150
|
+
self.fingerprint_history[fingerprint] = []
|
|
151
|
+
self.fingerprint_history[fingerprint].append(now)
|
|
152
|
+
self.fingerprint_history[fingerprint] = [t for t in self.fingerprint_history[fingerprint] if now - t < 3600]
|
|
153
|
+
return len(self.fingerprint_history[fingerprint]) >= self.prediction_threshold
|
|
154
|
+
|
|
155
|
+
def should_predictive_switch(self, fingerprint: str) -> bool:
|
|
156
|
+
return fingerprint in self.fingerprint_history and len(self.fingerprint_history[fingerprint]) >= self.prediction_threshold
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dataclass
|
|
160
|
+
class RequestProfile:
|
|
161
|
+
complexity: str
|
|
162
|
+
estimated_tokens: int
|
|
163
|
+
streaming: bool
|
|
164
|
+
has_system_prompt: bool
|
|
165
|
+
max_tokens_requested: int
|
|
166
|
+
temperature: float
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _now() -> float:
|
|
170
|
+
import time as _time
|
|
171
|
+
return _time.time()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: neuralbridge-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Self-healing decision engine for AI API calls - diagnose, strategize, recover automatically
|
|
5
|
+
Author-email: NeuralBridge Team <team@neuralbridge.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ai,api,self-healing,recovery,openai,llm,fallback
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: openai>=1.0.0
|
|
21
|
+
Requires-Dist: requests>=2.28.0
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# NeuralBridge SDK
|
|
25
|
+
|
|
26
|
+
Self-healing decision engine for AI API calls. Drop-in replacement for `openai.OpenAI` with automatic fault diagnosis, cascade recovery, and transparent model switching.
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install neuralbridge
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from neuralbridge import NeuralBridge
|
|
38
|
+
|
|
39
|
+
client = NeuralBridge(
|
|
40
|
+
api_key="sk-xxx",
|
|
41
|
+
primary_model="gpt-4o",
|
|
42
|
+
fallback_models=["gpt-4o-mini"]
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
response = client.chat.completions.create(
|
|
46
|
+
model="gpt-4o",
|
|
47
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
48
|
+
)
|
|
49
|
+
print(response.choices[0].message.content)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## 4-Layer Cascade Recovery
|
|
53
|
+
|
|
54
|
+
| Layer | Strategy | When |
|
|
55
|
+
|-------|----------|------|
|
|
56
|
+
| LAYER 0 | Predictive model switch | Same fault pattern seen 3+ times |
|
|
57
|
+
| LAYER 1 | Smart retry + immediate model switch | Timeout, rate limit |
|
|
58
|
+
| LAYER 2 | Fault-model mapping switch | Model not found, internal error |
|
|
59
|
+
| LAYER 3 | Request degradation | Context too long, content blocked |
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
neuralbridge/__init__.py
|
|
5
|
+
neuralbridge/adapters/__init__.py
|
|
6
|
+
neuralbridge/adapters/openai_adapter.py
|
|
7
|
+
neuralbridge/core/__init__.py
|
|
8
|
+
neuralbridge/core/engine.py
|
|
9
|
+
neuralbridge/core/strategies.py
|
|
10
|
+
neuralbridge/core/types.py
|
|
11
|
+
neuralbridge_sdk.egg-info/PKG-INFO
|
|
12
|
+
neuralbridge_sdk.egg-info/SOURCES.txt
|
|
13
|
+
neuralbridge_sdk.egg-info/dependency_links.txt
|
|
14
|
+
neuralbridge_sdk.egg-info/requires.txt
|
|
15
|
+
neuralbridge_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
neuralbridge
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "neuralbridge-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Self-healing decision engine for AI API calls - diagnose, strategize, recover automatically"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{name = "NeuralBridge Team", email = "team@neuralbridge.dev"}]
|
|
13
|
+
keywords = ["ai", "api", "self-healing", "recovery", "openai", "llm", "fallback"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
24
|
+
]
|
|
25
|
+
dependencies = ["openai>=1.0.0", "requests>=2.28.0"]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
where = ["."]
|
|
29
|
+
include = ["neuralbridge*"]
|