aisbf 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisbf/config.py +57 -1
- aisbf/handlers.py +314 -33
- aisbf/providers.py +164 -9
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/config.py +57 -1
- aisbf-0.2.4.data/data/share/aisbf/aisbf/handlers.py +664 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/providers.py +164 -9
- aisbf-0.2.4.data/data/share/aisbf/main.py +421 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/METADATA +1 -1
- aisbf-0.2.4.dist-info/RECORD +24 -0
- aisbf-0.2.2.data/data/share/aisbf/aisbf/handlers.py +0 -383
- aisbf-0.2.2.data/data/share/aisbf/main.py +0 -214
- aisbf-0.2.2.dist-info/RECORD +0 -24
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/__init__.py +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/models.py +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf.sh +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.json +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.md +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/providers.json +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/requirements.txt +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/rotations.json +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/WHEEL +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/entry_points.txt +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/licenses/LICENSE.txt +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/top_level.txt +0 -0
aisbf/config.py
CHANGED
|
@@ -34,6 +34,7 @@ class ProviderConfig(BaseModel):
|
|
|
34
34
|
endpoint: str
|
|
35
35
|
type: str
|
|
36
36
|
api_key_required: bool
|
|
37
|
+
rate_limit: float = 0.0
|
|
37
38
|
|
|
38
39
|
class RotationConfig(BaseModel):
|
|
39
40
|
providers: List[Dict]
|
|
@@ -111,32 +112,78 @@ class Config:
|
|
|
111
112
|
print(f"Created default config file: {dst}")
|
|
112
113
|
|
|
113
114
|
def _load_providers(self):
|
|
115
|
+
import logging
|
|
116
|
+
logger = logging.getLogger(__name__)
|
|
117
|
+
logger.info(f"=== Config._load_providers START ===")
|
|
118
|
+
|
|
114
119
|
providers_path = Path.home() / '.aisbf' / 'providers.json'
|
|
120
|
+
logger.info(f"Looking for providers at: {providers_path}")
|
|
121
|
+
|
|
115
122
|
if not providers_path.exists():
|
|
123
|
+
logger.info(f"User config not found, falling back to source config")
|
|
116
124
|
# Fallback to source config if user config doesn't exist
|
|
117
125
|
try:
|
|
118
126
|
source_dir = self._get_config_source_dir()
|
|
119
127
|
providers_path = source_dir / 'providers.json'
|
|
128
|
+
logger.info(f"Using source config at: {providers_path}")
|
|
120
129
|
except FileNotFoundError:
|
|
130
|
+
logger.error("Could not find providers.json configuration file")
|
|
121
131
|
raise FileNotFoundError("Could not find providers.json configuration file")
|
|
122
132
|
|
|
133
|
+
logger.info(f"Loading providers from: {providers_path}")
|
|
123
134
|
with open(providers_path) as f:
|
|
124
135
|
data = json.load(f)
|
|
125
136
|
self.providers = {k: ProviderConfig(**v) for k, v in data['providers'].items()}
|
|
137
|
+
logger.info(f"Loaded {len(self.providers)} providers: {list(self.providers.keys())}")
|
|
138
|
+
for provider_id, provider_config in self.providers.items():
|
|
139
|
+
logger.info(f" - {provider_id}: type={provider_config.type}, endpoint={provider_config.endpoint}")
|
|
140
|
+
logger.info(f"=== Config._load_providers END ===")
|
|
126
141
|
|
|
127
142
|
def _load_rotations(self):
|
|
143
|
+
import logging
|
|
144
|
+
logger = logging.getLogger(__name__)
|
|
145
|
+
logger.info(f"=== Config._load_rotations START ===")
|
|
146
|
+
|
|
128
147
|
rotations_path = Path.home() / '.aisbf' / 'rotations.json'
|
|
148
|
+
logger.info(f"Looking for rotations at: {rotations_path}")
|
|
149
|
+
|
|
129
150
|
if not rotations_path.exists():
|
|
151
|
+
logger.info(f"User config not found, falling back to source config")
|
|
130
152
|
# Fallback to source config if user config doesn't exist
|
|
131
153
|
try:
|
|
132
154
|
source_dir = self._get_config_source_dir()
|
|
133
155
|
rotations_path = source_dir / 'rotations.json'
|
|
156
|
+
logger.info(f"Using source config at: {rotations_path}")
|
|
134
157
|
except FileNotFoundError:
|
|
158
|
+
logger.error("Could not find rotations.json configuration file")
|
|
135
159
|
raise FileNotFoundError("Could not find rotations.json configuration file")
|
|
136
160
|
|
|
161
|
+
logger.info(f"Loading rotations from: {rotations_path}")
|
|
137
162
|
with open(rotations_path) as f:
|
|
138
163
|
data = json.load(f)
|
|
139
164
|
self.rotations = {k: RotationConfig(**v) for k, v in data['rotations'].items()}
|
|
165
|
+
logger.info(f"Loaded {len(self.rotations)} rotations: {list(self.rotations.keys())}")
|
|
166
|
+
|
|
167
|
+
# Validate that all providers referenced in rotations exist
|
|
168
|
+
logger.info(f"=== VALIDATING ROTATION PROVIDERS ===")
|
|
169
|
+
available_providers = list(self.providers.keys())
|
|
170
|
+
logger.info(f"Available providers: {available_providers}")
|
|
171
|
+
|
|
172
|
+
for rotation_id, rotation_config in self.rotations.items():
|
|
173
|
+
logger.info(f"Validating rotation: {rotation_id}")
|
|
174
|
+
for provider in rotation_config.providers:
|
|
175
|
+
provider_id = provider['provider_id']
|
|
176
|
+
if provider_id not in self.providers:
|
|
177
|
+
logger.warning(f"!!! CONFIGURATION WARNING !!!")
|
|
178
|
+
logger.warning(f"Rotation '{rotation_id}' references provider '{provider_id}' which is NOT defined in providers.json")
|
|
179
|
+
logger.warning(f"Available providers: {available_providers}")
|
|
180
|
+
logger.warning(f"This provider will be SKIPPED during rotation requests")
|
|
181
|
+
logger.warning(f"Please add the provider to providers.json or remove it from the rotation configuration")
|
|
182
|
+
logger.warning(f"!!! END WARNING !!!")
|
|
183
|
+
else:
|
|
184
|
+
logger.info(f" ✓ Provider '{provider_id}' is available")
|
|
185
|
+
|
|
186
|
+
logger.info(f"=== Config._load_rotations END ===")
|
|
140
187
|
|
|
141
188
|
def _load_autoselect(self):
|
|
142
189
|
autoselect_path = Path.home() / '.aisbf' / 'autoselect.json'
|
|
@@ -162,7 +209,16 @@ class Config:
|
|
|
162
209
|
}
|
|
163
210
|
|
|
164
211
|
def get_provider(self, provider_id: str) -> ProviderConfig:
|
|
165
|
-
|
|
212
|
+
import logging
|
|
213
|
+
logger = logging.getLogger(__name__)
|
|
214
|
+
logger.info(f"Config.get_provider called with provider_id: {provider_id}")
|
|
215
|
+
logger.info(f"Available providers: {list(self.providers.keys())}")
|
|
216
|
+
result = self.providers.get(provider_id)
|
|
217
|
+
if result:
|
|
218
|
+
logger.info(f"Found provider: {result}")
|
|
219
|
+
else:
|
|
220
|
+
logger.warning(f"Provider {provider_id} not found!")
|
|
221
|
+
return result
|
|
166
222
|
|
|
167
223
|
def get_rotation(self, rotation_id: str) -> RotationConfig:
|
|
168
224
|
return self.rotations.get(rotation_id)
|
aisbf/handlers.py
CHANGED
|
@@ -37,24 +37,47 @@ class RequestHandler:
|
|
|
37
37
|
self.config = config
|
|
38
38
|
|
|
39
39
|
async def handle_chat_completion(self, request: Request, provider_id: str, request_data: Dict) -> Dict:
|
|
40
|
+
import logging
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
logger.info(f"=== RequestHandler.handle_chat_completion START ===")
|
|
43
|
+
logger.info(f"Provider ID: {provider_id}")
|
|
44
|
+
logger.info(f"Request data: {request_data}")
|
|
45
|
+
|
|
40
46
|
provider_config = self.config.get_provider(provider_id)
|
|
47
|
+
logger.info(f"Provider config: {provider_config}")
|
|
48
|
+
logger.info(f"Provider type: {provider_config.type}")
|
|
49
|
+
logger.info(f"Provider endpoint: {provider_config.endpoint}")
|
|
50
|
+
logger.info(f"API key required: {provider_config.api_key_required}")
|
|
41
51
|
|
|
42
52
|
if provider_config.api_key_required:
|
|
43
53
|
api_key = request_data.get('api_key') or request.headers.get('Authorization', '').replace('Bearer ', '')
|
|
54
|
+
logger.info(f"API key from request: {'***' if api_key else 'None'}")
|
|
44
55
|
if not api_key:
|
|
45
56
|
raise HTTPException(status_code=401, detail="API key required")
|
|
46
57
|
else:
|
|
47
58
|
api_key = None
|
|
59
|
+
logger.info("No API key required for this provider")
|
|
48
60
|
|
|
61
|
+
logger.info(f"Getting provider handler for {provider_id}")
|
|
49
62
|
handler = get_provider_handler(provider_id, api_key)
|
|
63
|
+
logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
|
|
50
64
|
|
|
51
65
|
if handler.is_rate_limited():
|
|
52
66
|
raise HTTPException(status_code=503, detail="Provider temporarily unavailable")
|
|
53
67
|
|
|
54
68
|
try:
|
|
69
|
+
logger.info(f"Model requested: {request_data.get('model')}")
|
|
70
|
+
logger.info(f"Messages count: {len(request_data.get('messages', []))}")
|
|
71
|
+
logger.info(f"Max tokens: {request_data.get('max_tokens')}")
|
|
72
|
+
logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
|
|
73
|
+
logger.info(f"Stream: {request_data.get('stream', False)}")
|
|
74
|
+
|
|
55
75
|
# Apply rate limiting
|
|
76
|
+
logger.info("Applying rate limiting...")
|
|
56
77
|
await handler.apply_rate_limit()
|
|
78
|
+
logger.info("Rate limiting applied")
|
|
57
79
|
|
|
80
|
+
logger.info(f"Sending request to provider handler...")
|
|
58
81
|
response = await handler.handle_request(
|
|
59
82
|
model=request_data['model'],
|
|
60
83
|
messages=request_data['messages'],
|
|
@@ -62,7 +85,9 @@ class RequestHandler:
|
|
|
62
85
|
temperature=request_data.get('temperature', 1.0),
|
|
63
86
|
stream=request_data.get('stream', False)
|
|
64
87
|
)
|
|
88
|
+
logger.info(f"Response received from provider")
|
|
65
89
|
handler.record_success()
|
|
90
|
+
logger.info(f"=== RequestHandler.handle_chat_completion END ===")
|
|
66
91
|
return response
|
|
67
92
|
except Exception as e:
|
|
68
93
|
handler.record_failure()
|
|
@@ -129,49 +154,205 @@ class RotationHandler:
|
|
|
129
154
|
self.config = config
|
|
130
155
|
|
|
131
156
|
async def handle_rotation_request(self, rotation_id: str, request_data: Dict) -> Dict:
|
|
157
|
+
import logging
|
|
158
|
+
logger = logging.getLogger(__name__)
|
|
159
|
+
logger.info(f"=== RotationHandler.handle_rotation_request START ===")
|
|
160
|
+
logger.info(f"Rotation ID: {rotation_id}")
|
|
161
|
+
|
|
132
162
|
rotation_config = self.config.get_rotation(rotation_id)
|
|
133
163
|
if not rotation_config:
|
|
164
|
+
logger.error(f"Rotation {rotation_id} not found")
|
|
134
165
|
raise HTTPException(status_code=400, detail=f"Rotation {rotation_id} not found")
|
|
135
166
|
|
|
167
|
+
logger.info(f"Rotation config loaded successfully")
|
|
136
168
|
providers = rotation_config.providers
|
|
137
|
-
|
|
169
|
+
logger.info(f"Number of providers in rotation: {len(providers)}")
|
|
170
|
+
|
|
171
|
+
# Collect all available models with their weights
|
|
172
|
+
available_models = []
|
|
173
|
+
skipped_providers = []
|
|
174
|
+
total_models_considered = 0
|
|
138
175
|
|
|
176
|
+
logger.info(f"=== MODEL SELECTION PROCESS START ===")
|
|
177
|
+
logger.info(f"Scanning providers for available models...")
|
|
178
|
+
|
|
139
179
|
for provider in providers:
|
|
180
|
+
provider_id = provider['provider_id']
|
|
181
|
+
logger.info(f"")
|
|
182
|
+
logger.info(f"--- Processing provider: {provider_id} ---")
|
|
183
|
+
|
|
184
|
+
# Check if provider exists in configuration
|
|
185
|
+
provider_config = self.config.get_provider(provider_id)
|
|
186
|
+
if not provider_config:
|
|
187
|
+
logger.error(f" [ERROR] Provider {provider_id} not found in providers configuration")
|
|
188
|
+
logger.error(f" Available providers: {list(self.config.providers.keys())}")
|
|
189
|
+
logger.error(f" Skipping this provider")
|
|
190
|
+
skipped_providers.append(provider_id)
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# Check if provider is rate limited/deactivated
|
|
194
|
+
provider_handler = get_provider_handler(provider_id, provider.get('api_key'))
|
|
195
|
+
if provider_handler.is_rate_limited():
|
|
196
|
+
logger.warning(f" [SKIPPED] Provider {provider_id} is rate limited/deactivated")
|
|
197
|
+
logger.warning(f" Reason: Provider has exceeded failure threshold or is in cooldown period")
|
|
198
|
+
skipped_providers.append(provider_id)
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
logger.info(f" [AVAILABLE] Provider {provider_id} is active and ready")
|
|
202
|
+
|
|
203
|
+
models_in_provider = len(provider['models'])
|
|
204
|
+
total_models_considered += models_in_provider
|
|
205
|
+
logger.info(f" Found {models_in_provider} model(s) in this provider")
|
|
206
|
+
|
|
140
207
|
for model in provider['models']:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
208
|
+
model_name = model['name']
|
|
209
|
+
model_weight = model['weight']
|
|
210
|
+
model_rate_limit = model.get('rate_limit', 'N/A')
|
|
211
|
+
|
|
212
|
+
logger.info(f" - Model: {model_name}")
|
|
213
|
+
logger.info(f" Weight (Priority): {model_weight}")
|
|
214
|
+
logger.info(f" Rate Limit: {model_rate_limit}")
|
|
215
|
+
|
|
216
|
+
# Add provider_id and api_key to model for later use
|
|
217
|
+
model_with_provider = model.copy()
|
|
218
|
+
model_with_provider['provider_id'] = provider_id
|
|
219
|
+
model_with_provider['api_key'] = provider.get('api_key')
|
|
220
|
+
available_models.append(model_with_provider)
|
|
221
|
+
|
|
222
|
+
logger.info(f"")
|
|
223
|
+
logger.info(f"=== MODEL SELECTION SUMMARY ===")
|
|
224
|
+
logger.info(f"Total providers scanned: {len(providers)}")
|
|
225
|
+
logger.info(f"Providers skipped (rate limited): {len(skipped_providers)}")
|
|
226
|
+
if skipped_providers:
|
|
227
|
+
logger.info(f"Skipped providers: {', '.join(skipped_providers)}")
|
|
228
|
+
logger.info(f"Total models considered: {total_models_considered}")
|
|
229
|
+
logger.info(f"Total models available: {len(available_models)}")
|
|
230
|
+
|
|
231
|
+
if not available_models:
|
|
232
|
+
logger.error("No models available in rotation (all providers may be rate limited)")
|
|
233
|
+
logger.error("All providers in this rotation are currently deactivated")
|
|
234
|
+
raise HTTPException(status_code=503, detail="No models available in rotation (all providers may be rate limited)")
|
|
145
235
|
|
|
236
|
+
# Sort models by weight in descending order (higher weight = higher priority)
|
|
237
|
+
available_models.sort(key=lambda m: m['weight'], reverse=True)
|
|
238
|
+
|
|
239
|
+
logger.info(f"")
|
|
240
|
+
logger.info(f"=== PRIORITY-BASED SELECTION ===")
|
|
241
|
+
logger.info(f"Models sorted by weight (descending priority):")
|
|
242
|
+
for idx, model in enumerate(available_models, 1):
|
|
243
|
+
logger.info(f" {idx}. {model['name']} (provider: {model['provider_id']}, weight: {model['weight']})")
|
|
244
|
+
|
|
245
|
+
# Find the highest weight
|
|
246
|
+
highest_weight = available_models[0]['weight']
|
|
247
|
+
logger.info(f"")
|
|
248
|
+
logger.info(f"Highest priority weight: {highest_weight}")
|
|
249
|
+
|
|
250
|
+
# Filter models with the highest weight
|
|
251
|
+
highest_weight_models = [m for m in available_models if m['weight'] == highest_weight]
|
|
252
|
+
logger.info(f"Models with highest priority ({highest_weight}): {len(highest_weight_models)}")
|
|
253
|
+
for model in highest_weight_models:
|
|
254
|
+
logger.info(f" - {model['name']} (provider: {model['provider_id']})")
|
|
255
|
+
|
|
256
|
+
# If multiple models have the same highest weight, randomly select among them
|
|
146
257
|
import random
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
258
|
+
if len(highest_weight_models) > 1:
|
|
259
|
+
logger.info(f"Multiple models with same highest priority - performing random selection")
|
|
260
|
+
selected_model = random.choice(highest_weight_models)
|
|
261
|
+
logger.info(f"Randomly selected from {len(highest_weight_models)} candidates")
|
|
262
|
+
else:
|
|
263
|
+
selected_model = highest_weight_models[0]
|
|
264
|
+
logger.info(f"Single model with highest priority - deterministic selection")
|
|
265
|
+
|
|
266
|
+
logger.info(f"")
|
|
267
|
+
logger.info(f"=== FINAL SELECTION ===")
|
|
268
|
+
logger.info(f"Selected model: {selected_model['name']}")
|
|
269
|
+
logger.info(f"Selected provider: {selected_model['provider_id']}")
|
|
270
|
+
logger.info(f"Model weight (priority): {selected_model['weight']}")
|
|
271
|
+
logger.info(f"Model rate limit: {selected_model.get('rate_limit', 'N/A')}")
|
|
272
|
+
logger.info(f"=== MODEL SELECTION PROCESS END ===")
|
|
273
|
+
|
|
274
|
+
# Retry logic: Try up to 2 times with different models
|
|
275
|
+
max_retries = 2
|
|
276
|
+
tried_models = []
|
|
277
|
+
last_error = None
|
|
278
|
+
|
|
279
|
+
for attempt in range(max_retries):
|
|
280
|
+
logger.info(f"")
|
|
281
|
+
logger.info(f"=== ATTEMPT {attempt + 1}/{max_retries} ===")
|
|
282
|
+
|
|
283
|
+
# Select a model that hasn't been tried yet
|
|
284
|
+
remaining_models = [m for m in available_models if m not in tried_models]
|
|
285
|
+
|
|
286
|
+
if not remaining_models:
|
|
287
|
+
logger.error(f"No more models available to try")
|
|
288
|
+
logger.error(f"All {len(available_models)} models have been attempted")
|
|
289
|
+
break
|
|
290
|
+
|
|
291
|
+
# Sort remaining models by weight and select the best one
|
|
292
|
+
remaining_models.sort(key=lambda m: m['weight'], reverse=True)
|
|
293
|
+
current_model = remaining_models[0]
|
|
294
|
+
tried_models.append(current_model)
|
|
295
|
+
|
|
296
|
+
logger.info(f"Trying model: {current_model['name']} (provider: {current_model['provider_id']})")
|
|
297
|
+
logger.info(f"Attempt {attempt + 1} of {max_retries}")
|
|
298
|
+
|
|
299
|
+
provider_id = current_model['provider_id']
|
|
300
|
+
api_key = current_model.get('api_key')
|
|
301
|
+
model_name = current_model['name']
|
|
302
|
+
|
|
303
|
+
logger.info(f"Getting provider handler for {provider_id}")
|
|
304
|
+
handler = get_provider_handler(provider_id, api_key)
|
|
305
|
+
logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
|
|
162
306
|
|
|
163
|
-
|
|
164
|
-
model
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
307
|
+
if handler.is_rate_limited():
|
|
308
|
+
logger.warning(f"Provider {provider_id} is rate limited, skipping to next model")
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
logger.info(f"Model requested: {model_name}")
|
|
313
|
+
logger.info(f"Messages count: {len(request_data.get('messages', []))}")
|
|
314
|
+
logger.info(f"Max tokens: {request_data.get('max_tokens')}")
|
|
315
|
+
logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
|
|
316
|
+
logger.info(f"Stream: {request_data.get('stream', False)}")
|
|
317
|
+
|
|
318
|
+
# Apply rate limiting with model-specific rate limit if available
|
|
319
|
+
rate_limit = current_model.get('rate_limit')
|
|
320
|
+
logger.info(f"Model-specific rate limit: {rate_limit}")
|
|
321
|
+
logger.info("Applying rate limiting...")
|
|
322
|
+
await handler.apply_rate_limit(rate_limit)
|
|
323
|
+
logger.info("Rate limiting applied")
|
|
324
|
+
|
|
325
|
+
logger.info(f"Sending request to provider handler...")
|
|
326
|
+
response = await handler.handle_request(
|
|
327
|
+
model=model_name,
|
|
328
|
+
messages=request_data['messages'],
|
|
329
|
+
max_tokens=request_data.get('max_tokens'),
|
|
330
|
+
temperature=request_data.get('temperature', 1.0),
|
|
331
|
+
stream=request_data.get('stream', False)
|
|
332
|
+
)
|
|
333
|
+
logger.info(f"Response received from provider")
|
|
334
|
+
handler.record_success()
|
|
335
|
+
logger.info(f"=== RotationHandler.handle_rotation_request END ===")
|
|
336
|
+
logger.info(f"Request succeeded on attempt {attempt + 1}")
|
|
337
|
+
return response
|
|
338
|
+
except Exception as e:
|
|
339
|
+
last_error = str(e)
|
|
340
|
+
handler.record_failure()
|
|
341
|
+
logger.error(f"Attempt {attempt + 1} failed: {str(e)}")
|
|
342
|
+
logger.error(f"Error type: {type(e).__name__}")
|
|
343
|
+
logger.error(f"Will try next model...")
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
# All retries exhausted
|
|
347
|
+
logger.error(f"")
|
|
348
|
+
logger.error(f"=== ALL RETRIES EXHAUSTED ===")
|
|
349
|
+
logger.error(f"Attempted {len(tried_models)} different model(s): {[m['name'] for m in tried_models]}")
|
|
350
|
+
logger.error(f"Last error: {last_error}")
|
|
351
|
+
logger.error(f"Max retries ({max_retries}) reached without success")
|
|
352
|
+
raise HTTPException(
|
|
353
|
+
status_code=503,
|
|
354
|
+
detail=f"All providers in rotation failed after {max_retries} attempts. Last error: {last_error}"
|
|
355
|
+
)
|
|
175
356
|
|
|
176
357
|
async def handle_rotation_model_list(self, rotation_id: str) -> List[Dict]:
|
|
177
358
|
rotation_config = self.config.get_rotation(rotation_id)
|
|
@@ -253,6 +434,11 @@ class AutoselectHandler:
|
|
|
253
434
|
|
|
254
435
|
async def _get_model_selection(self, prompt: str) -> str:
|
|
255
436
|
"""Send the autoselect prompt to a model and get the selection"""
|
|
437
|
+
import logging
|
|
438
|
+
logger = logging.getLogger(__name__)
|
|
439
|
+
logger.info(f"=== AUTOSELECT MODEL SELECTION START ===")
|
|
440
|
+
logger.info(f"Using 'general' rotation for model selection")
|
|
441
|
+
|
|
256
442
|
# Use the first available provider/model for the selection
|
|
257
443
|
# This is a simple implementation - could be enhanced to use a specific selection model
|
|
258
444
|
rotation_handler = RotationHandler()
|
|
@@ -265,27 +451,64 @@ class AutoselectHandler:
|
|
|
265
451
|
"stream": False
|
|
266
452
|
}
|
|
267
453
|
|
|
454
|
+
logger.info(f"Selection request parameters:")
|
|
455
|
+
logger.info(f" Temperature: 0.1 (low for deterministic selection)")
|
|
456
|
+
logger.info(f" Max tokens: 100 (short response expected)")
|
|
457
|
+
logger.info(f" Stream: False")
|
|
458
|
+
|
|
268
459
|
# Use the fallback rotation for the selection
|
|
269
460
|
try:
|
|
461
|
+
logger.info(f"Sending selection request to rotation handler...")
|
|
270
462
|
response = await rotation_handler.handle_rotation_request("general", selection_request)
|
|
463
|
+
logger.info(f"Selection response received")
|
|
464
|
+
|
|
271
465
|
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
|
466
|
+
logger.info(f"Raw response content: {content[:200]}..." if len(content) > 200 else f"Raw response content: {content}")
|
|
467
|
+
|
|
272
468
|
model_id = self._extract_model_selection(content)
|
|
469
|
+
|
|
470
|
+
if model_id:
|
|
471
|
+
logger.info(f"=== AUTOSELECT MODEL SELECTION SUCCESS ===")
|
|
472
|
+
logger.info(f"Selected model ID: {model_id}")
|
|
473
|
+
else:
|
|
474
|
+
logger.warning(f"=== AUTOSELECT MODEL SELECTION FAILED ===")
|
|
475
|
+
logger.warning(f"Could not extract model ID from response")
|
|
476
|
+
logger.warning(f"Response content: {content}")
|
|
477
|
+
|
|
273
478
|
return model_id
|
|
274
479
|
except Exception as e:
|
|
480
|
+
logger.error(f"=== AUTOSELECT MODEL SELECTION ERROR ===")
|
|
481
|
+
logger.error(f"Error during model selection: {str(e)}")
|
|
482
|
+
logger.error(f"Will use fallback model")
|
|
275
483
|
# If selection fails, we'll handle it in the main handler
|
|
276
484
|
return None
|
|
277
485
|
|
|
278
486
|
async def handle_autoselect_request(self, autoselect_id: str, request_data: Dict) -> Dict:
|
|
279
487
|
"""Handle an autoselect request"""
|
|
488
|
+
import logging
|
|
489
|
+
logger = logging.getLogger(__name__)
|
|
490
|
+
logger.info(f"=== AUTOSELECT REQUEST START ===")
|
|
491
|
+
logger.info(f"Autoselect ID: {autoselect_id}")
|
|
492
|
+
|
|
280
493
|
autoselect_config = self.config.get_autoselect(autoselect_id)
|
|
281
494
|
if not autoselect_config:
|
|
495
|
+
logger.error(f"Autoselect {autoselect_id} not found")
|
|
282
496
|
raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
|
|
283
497
|
|
|
498
|
+
logger.info(f"Autoselect config loaded")
|
|
499
|
+
logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
|
|
500
|
+
for model_info in autoselect_config.available_models:
|
|
501
|
+
logger.info(f" - {model_info.model_id}: {model_info.description}")
|
|
502
|
+
logger.info(f"Fallback model: {autoselect_config.fallback}")
|
|
503
|
+
|
|
284
504
|
# Extract the user prompt from the request
|
|
285
505
|
user_messages = request_data.get('messages', [])
|
|
286
506
|
if not user_messages:
|
|
507
|
+
logger.error("No messages provided")
|
|
287
508
|
raise HTTPException(status_code=400, detail="No messages provided")
|
|
288
509
|
|
|
510
|
+
logger.info(f"User messages count: {len(user_messages)}")
|
|
511
|
+
|
|
289
512
|
# Build a string representation of the user prompt
|
|
290
513
|
user_prompt = ""
|
|
291
514
|
for msg in user_messages:
|
|
@@ -296,37 +519,73 @@ class AutoselectHandler:
|
|
|
296
519
|
content = str(content)
|
|
297
520
|
user_prompt += f"{role}: {content}\n"
|
|
298
521
|
|
|
522
|
+
logger.info(f"User prompt length: {len(user_prompt)} characters")
|
|
523
|
+
logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
|
|
524
|
+
|
|
299
525
|
# Build the autoselect prompt
|
|
526
|
+
logger.info(f"Building autoselect prompt...")
|
|
300
527
|
autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
|
|
528
|
+
logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
|
|
301
529
|
|
|
302
530
|
# Get the model selection
|
|
531
|
+
logger.info(f"Requesting model selection from AI...")
|
|
303
532
|
selected_model_id = await self._get_model_selection(autoselect_prompt)
|
|
304
533
|
|
|
305
534
|
# Validate the selected model
|
|
535
|
+
logger.info(f"=== MODEL VALIDATION ===")
|
|
306
536
|
if not selected_model_id:
|
|
307
537
|
# Fallback to the configured fallback model
|
|
538
|
+
logger.warning(f"No model ID returned from selection")
|
|
539
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
308
540
|
selected_model_id = autoselect_config.fallback
|
|
309
541
|
else:
|
|
310
542
|
# Check if the selected model is in the available models list
|
|
311
543
|
available_ids = [m.model_id for m in autoselect_config.available_models]
|
|
312
544
|
if selected_model_id not in available_ids:
|
|
545
|
+
logger.warning(f"Selected model '{selected_model_id}' not in available models list")
|
|
546
|
+
logger.warning(f"Available models: {available_ids}")
|
|
547
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
313
548
|
selected_model_id = autoselect_config.fallback
|
|
549
|
+
else:
|
|
550
|
+
logger.info(f"Selected model '{selected_model_id}' is valid and available")
|
|
551
|
+
|
|
552
|
+
logger.info(f"=== FINAL MODEL CHOICE ===")
|
|
553
|
+
logger.info(f"Selected model ID: {selected_model_id}")
|
|
554
|
+
logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
|
|
314
555
|
|
|
315
556
|
# Now proxy the actual request to the selected rotation
|
|
557
|
+
logger.info(f"Proxying request to rotation: {selected_model_id}")
|
|
316
558
|
rotation_handler = RotationHandler()
|
|
317
|
-
|
|
559
|
+
response = await rotation_handler.handle_rotation_request(selected_model_id, request_data)
|
|
560
|
+
logger.info(f"=== AUTOSELECT REQUEST END ===")
|
|
561
|
+
return response
|
|
318
562
|
|
|
319
563
|
async def handle_autoselect_streaming_request(self, autoselect_id: str, request_data: Dict):
|
|
320
564
|
"""Handle an autoselect streaming request"""
|
|
565
|
+
import logging
|
|
566
|
+
logger = logging.getLogger(__name__)
|
|
567
|
+
logger.info(f"=== AUTOSELECT STREAMING REQUEST START ===")
|
|
568
|
+
logger.info(f"Autoselect ID: {autoselect_id}")
|
|
569
|
+
|
|
321
570
|
autoselect_config = self.config.get_autoselect(autoselect_id)
|
|
322
571
|
if not autoselect_config:
|
|
572
|
+
logger.error(f"Autoselect {autoselect_id} not found")
|
|
323
573
|
raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
|
|
324
574
|
|
|
575
|
+
logger.info(f"Autoselect config loaded")
|
|
576
|
+
logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
|
|
577
|
+
for model_info in autoselect_config.available_models:
|
|
578
|
+
logger.info(f" - {model_info.model_id}: {model_info.description}")
|
|
579
|
+
logger.info(f"Fallback model: {autoselect_config.fallback}")
|
|
580
|
+
|
|
325
581
|
# Extract the user prompt from the request
|
|
326
582
|
user_messages = request_data.get('messages', [])
|
|
327
583
|
if not user_messages:
|
|
584
|
+
logger.error("No messages provided")
|
|
328
585
|
raise HTTPException(status_code=400, detail="No messages provided")
|
|
329
586
|
|
|
587
|
+
logger.info(f"User messages count: {len(user_messages)}")
|
|
588
|
+
|
|
330
589
|
# Build a string representation of the user prompt
|
|
331
590
|
user_prompt = ""
|
|
332
591
|
for msg in user_messages:
|
|
@@ -336,21 +595,41 @@ class AutoselectHandler:
|
|
|
336
595
|
content = str(content)
|
|
337
596
|
user_prompt += f"{role}: {content}\n"
|
|
338
597
|
|
|
598
|
+
logger.info(f"User prompt length: {len(user_prompt)} characters")
|
|
599
|
+
logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
|
|
600
|
+
|
|
339
601
|
# Build the autoselect prompt
|
|
602
|
+
logger.info(f"Building autoselect prompt...")
|
|
340
603
|
autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
|
|
604
|
+
logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
|
|
341
605
|
|
|
342
606
|
# Get the model selection
|
|
607
|
+
logger.info(f"Requesting model selection from AI...")
|
|
343
608
|
selected_model_id = await self._get_model_selection(autoselect_prompt)
|
|
344
609
|
|
|
345
610
|
# Validate the selected model
|
|
611
|
+
logger.info(f"=== MODEL VALIDATION ===")
|
|
346
612
|
if not selected_model_id:
|
|
613
|
+
logger.warning(f"No model ID returned from selection")
|
|
614
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
347
615
|
selected_model_id = autoselect_config.fallback
|
|
348
616
|
else:
|
|
349
617
|
available_ids = [m.model_id for m in autoselect_config.available_models]
|
|
350
618
|
if selected_model_id not in available_ids:
|
|
619
|
+
logger.warning(f"Selected model '{selected_model_id}' not in available models list")
|
|
620
|
+
logger.warning(f"Available models: {available_ids}")
|
|
621
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
351
622
|
selected_model_id = autoselect_config.fallback
|
|
623
|
+
else:
|
|
624
|
+
logger.info(f"Selected model '{selected_model_id}' is valid and available")
|
|
625
|
+
|
|
626
|
+
logger.info(f"=== FINAL MODEL CHOICE ===")
|
|
627
|
+
logger.info(f"Selected model ID: {selected_model_id}")
|
|
628
|
+
logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
|
|
629
|
+
logger.info(f"Request mode: Streaming")
|
|
352
630
|
|
|
353
631
|
# Now proxy the actual streaming request to the selected rotation
|
|
632
|
+
logger.info(f"Proxying streaming request to rotation: {selected_model_id}")
|
|
354
633
|
rotation_handler = RotationHandler()
|
|
355
634
|
|
|
356
635
|
async def stream_generator():
|
|
@@ -362,8 +641,10 @@ class AutoselectHandler:
|
|
|
362
641
|
for chunk in response:
|
|
363
642
|
yield f"data: {chunk}\n\n".encode('utf-8')
|
|
364
643
|
except Exception as e:
|
|
644
|
+
logger.error(f"Error in streaming response: {str(e)}")
|
|
365
645
|
yield f"data: {str(e)}\n\n".encode('utf-8')
|
|
366
646
|
|
|
647
|
+
logger.info(f"=== AUTOSELECT STREAMING REQUEST END ===")
|
|
367
648
|
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
|
368
649
|
|
|
369
650
|
async def handle_autoselect_model_list(self, autoselect_id: str) -> List[Dict]:
|