aisbf 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. aisbf/config.py +57 -1
  2. aisbf/handlers.py +314 -33
  3. aisbf/providers.py +164 -9
  4. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/config.py +57 -1
  5. aisbf-0.2.4.data/data/share/aisbf/aisbf/handlers.py +664 -0
  6. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/providers.py +164 -9
  7. aisbf-0.2.4.data/data/share/aisbf/main.py +421 -0
  8. {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/METADATA +1 -1
  9. aisbf-0.2.4.dist-info/RECORD +24 -0
  10. aisbf-0.2.2.data/data/share/aisbf/aisbf/handlers.py +0 -383
  11. aisbf-0.2.2.data/data/share/aisbf/main.py +0 -214
  12. aisbf-0.2.2.dist-info/RECORD +0 -24
  13. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/__init__.py +0 -0
  14. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/models.py +0 -0
  15. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf.sh +0 -0
  16. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.json +0 -0
  17. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.md +0 -0
  18. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/providers.json +0 -0
  19. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/requirements.txt +0 -0
  20. {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/rotations.json +0 -0
  21. {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/WHEEL +0 -0
  22. {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/entry_points.txt +0 -0
  23. {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/licenses/LICENSE.txt +0 -0
  24. {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,664 @@
1
+ """
2
+ Copyleft (C) 2026 Stefy Lanza <stefy@nexlab.net>
3
+
4
+ AISBF - AI Service Broker Framework || AI Should Be Free
5
+
6
+ Request handlers for AISBF.
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or
11
+ (at your option) any later version.
12
+
13
+ This program is distributed in the hope that it will be useful,
14
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ GNU General Public License for more details.
17
+
18
+ You should have received a copy of the GNU General Public License
19
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
20
+
21
+ Why did the programmer quit his job? Because he didn't get arrays!
22
+
23
+ Request handlers for AISBF.
24
+ """
25
+ import asyncio
26
+ import re
27
+ from typing import Dict, List, Optional
28
+ from pathlib import Path
29
+ from fastapi import HTTPException, Request
30
+ from fastapi.responses import JSONResponse, StreamingResponse
31
+ from .models import ChatCompletionRequest, ChatCompletionResponse
32
+ from .providers import get_provider_handler
33
+ from .config import config
34
+
35
+ class RequestHandler:
36
+ def __init__(self):
37
+ self.config = config
38
+
39
+ async def handle_chat_completion(self, request: Request, provider_id: str, request_data: Dict) -> Dict:
40
+ import logging
41
+ logger = logging.getLogger(__name__)
42
+ logger.info(f"=== RequestHandler.handle_chat_completion START ===")
43
+ logger.info(f"Provider ID: {provider_id}")
44
+ logger.info(f"Request data: {request_data}")
45
+
46
+ provider_config = self.config.get_provider(provider_id)
47
+ logger.info(f"Provider config: {provider_config}")
48
+ logger.info(f"Provider type: {provider_config.type}")
49
+ logger.info(f"Provider endpoint: {provider_config.endpoint}")
50
+ logger.info(f"API key required: {provider_config.api_key_required}")
51
+
52
+ if provider_config.api_key_required:
53
+ api_key = request_data.get('api_key') or request.headers.get('Authorization', '').replace('Bearer ', '')
54
+ logger.info(f"API key from request: {'***' if api_key else 'None'}")
55
+ if not api_key:
56
+ raise HTTPException(status_code=401, detail="API key required")
57
+ else:
58
+ api_key = None
59
+ logger.info("No API key required for this provider")
60
+
61
+ logger.info(f"Getting provider handler for {provider_id}")
62
+ handler = get_provider_handler(provider_id, api_key)
63
+ logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
64
+
65
+ if handler.is_rate_limited():
66
+ raise HTTPException(status_code=503, detail="Provider temporarily unavailable")
67
+
68
+ try:
69
+ logger.info(f"Model requested: {request_data.get('model')}")
70
+ logger.info(f"Messages count: {len(request_data.get('messages', []))}")
71
+ logger.info(f"Max tokens: {request_data.get('max_tokens')}")
72
+ logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
73
+ logger.info(f"Stream: {request_data.get('stream', False)}")
74
+
75
+ # Apply rate limiting
76
+ logger.info("Applying rate limiting...")
77
+ await handler.apply_rate_limit()
78
+ logger.info("Rate limiting applied")
79
+
80
+ logger.info(f"Sending request to provider handler...")
81
+ response = await handler.handle_request(
82
+ model=request_data['model'],
83
+ messages=request_data['messages'],
84
+ max_tokens=request_data.get('max_tokens'),
85
+ temperature=request_data.get('temperature', 1.0),
86
+ stream=request_data.get('stream', False)
87
+ )
88
+ logger.info(f"Response received from provider")
89
+ handler.record_success()
90
+ logger.info(f"=== RequestHandler.handle_chat_completion END ===")
91
+ return response
92
+ except Exception as e:
93
+ handler.record_failure()
94
+ raise HTTPException(status_code=500, detail=str(e))
95
+
96
+ async def handle_streaming_chat_completion(self, request: Request, provider_id: str, request_data: Dict):
97
+ provider_config = self.config.get_provider(provider_id)
98
+
99
+ if provider_config.api_key_required:
100
+ api_key = request_data.get('api_key') or request.headers.get('Authorization', '').replace('Bearer ', '')
101
+ if not api_key:
102
+ raise HTTPException(status_code=401, detail="API key required")
103
+ else:
104
+ api_key = None
105
+
106
+ handler = get_provider_handler(provider_id, api_key)
107
+
108
+ if handler.is_rate_limited():
109
+ raise HTTPException(status_code=503, detail="Provider temporarily unavailable")
110
+
111
+ async def stream_generator():
112
+ try:
113
+ # Apply rate limiting
114
+ await handler.apply_rate_limit()
115
+
116
+ response = await handler.handle_request(
117
+ model=request_data['model'],
118
+ messages=request_data['messages'],
119
+ max_tokens=request_data.get('max_tokens'),
120
+ temperature=request_data.get('temperature', 1.0),
121
+ stream=True
122
+ )
123
+ for chunk in response:
124
+ yield f"data: {chunk}\n\n".encode('utf-8')
125
+ handler.record_success()
126
+ except Exception as e:
127
+ handler.record_failure()
128
+ yield f"data: {str(e)}\n\n".encode('utf-8')
129
+
130
+ return StreamingResponse(stream_generator(), media_type="text/event-stream")
131
+
132
+ async def handle_model_list(self, request: Request, provider_id: str) -> List[Dict]:
133
+ provider_config = self.config.get_provider(provider_id)
134
+
135
+ if provider_config.api_key_required:
136
+ api_key = request.headers.get('Authorization', '').replace('Bearer ', '')
137
+ if not api_key:
138
+ raise HTTPException(status_code=401, detail="API key required")
139
+ else:
140
+ api_key = None
141
+
142
+ handler = get_provider_handler(provider_id, api_key)
143
+ try:
144
+ # Apply rate limiting
145
+ await handler.apply_rate_limit()
146
+
147
+ models = await handler.get_models()
148
+ return [model.dict() for model in models]
149
+ except Exception as e:
150
+ raise HTTPException(status_code=500, detail=str(e))
151
+
152
+ class RotationHandler:
153
+ def __init__(self):
154
+ self.config = config
155
+
156
+ async def handle_rotation_request(self, rotation_id: str, request_data: Dict) -> Dict:
157
+ import logging
158
+ logger = logging.getLogger(__name__)
159
+ logger.info(f"=== RotationHandler.handle_rotation_request START ===")
160
+ logger.info(f"Rotation ID: {rotation_id}")
161
+
162
+ rotation_config = self.config.get_rotation(rotation_id)
163
+ if not rotation_config:
164
+ logger.error(f"Rotation {rotation_id} not found")
165
+ raise HTTPException(status_code=400, detail=f"Rotation {rotation_id} not found")
166
+
167
+ logger.info(f"Rotation config loaded successfully")
168
+ providers = rotation_config.providers
169
+ logger.info(f"Number of providers in rotation: {len(providers)}")
170
+
171
+ # Collect all available models with their weights
172
+ available_models = []
173
+ skipped_providers = []
174
+ total_models_considered = 0
175
+
176
+ logger.info(f"=== MODEL SELECTION PROCESS START ===")
177
+ logger.info(f"Scanning providers for available models...")
178
+
179
+ for provider in providers:
180
+ provider_id = provider['provider_id']
181
+ logger.info(f"")
182
+ logger.info(f"--- Processing provider: {provider_id} ---")
183
+
184
+ # Check if provider exists in configuration
185
+ provider_config = self.config.get_provider(provider_id)
186
+ if not provider_config:
187
+ logger.error(f" [ERROR] Provider {provider_id} not found in providers configuration")
188
+ logger.error(f" Available providers: {list(self.config.providers.keys())}")
189
+ logger.error(f" Skipping this provider")
190
+ skipped_providers.append(provider_id)
191
+ continue
192
+
193
+ # Check if provider is rate limited/deactivated
194
+ provider_handler = get_provider_handler(provider_id, provider.get('api_key'))
195
+ if provider_handler.is_rate_limited():
196
+ logger.warning(f" [SKIPPED] Provider {provider_id} is rate limited/deactivated")
197
+ logger.warning(f" Reason: Provider has exceeded failure threshold or is in cooldown period")
198
+ skipped_providers.append(provider_id)
199
+ continue
200
+
201
+ logger.info(f" [AVAILABLE] Provider {provider_id} is active and ready")
202
+
203
+ models_in_provider = len(provider['models'])
204
+ total_models_considered += models_in_provider
205
+ logger.info(f" Found {models_in_provider} model(s) in this provider")
206
+
207
+ for model in provider['models']:
208
+ model_name = model['name']
209
+ model_weight = model['weight']
210
+ model_rate_limit = model.get('rate_limit', 'N/A')
211
+
212
+ logger.info(f" - Model: {model_name}")
213
+ logger.info(f" Weight (Priority): {model_weight}")
214
+ logger.info(f" Rate Limit: {model_rate_limit}")
215
+
216
+ # Add provider_id and api_key to model for later use
217
+ model_with_provider = model.copy()
218
+ model_with_provider['provider_id'] = provider_id
219
+ model_with_provider['api_key'] = provider.get('api_key')
220
+ available_models.append(model_with_provider)
221
+
222
+ logger.info(f"")
223
+ logger.info(f"=== MODEL SELECTION SUMMARY ===")
224
+ logger.info(f"Total providers scanned: {len(providers)}")
225
+ logger.info(f"Providers skipped (rate limited): {len(skipped_providers)}")
226
+ if skipped_providers:
227
+ logger.info(f"Skipped providers: {', '.join(skipped_providers)}")
228
+ logger.info(f"Total models considered: {total_models_considered}")
229
+ logger.info(f"Total models available: {len(available_models)}")
230
+
231
+ if not available_models:
232
+ logger.error("No models available in rotation (all providers may be rate limited)")
233
+ logger.error("All providers in this rotation are currently deactivated")
234
+ raise HTTPException(status_code=503, detail="No models available in rotation (all providers may be rate limited)")
235
+
236
+ # Sort models by weight in descending order (higher weight = higher priority)
237
+ available_models.sort(key=lambda m: m['weight'], reverse=True)
238
+
239
+ logger.info(f"")
240
+ logger.info(f"=== PRIORITY-BASED SELECTION ===")
241
+ logger.info(f"Models sorted by weight (descending priority):")
242
+ for idx, model in enumerate(available_models, 1):
243
+ logger.info(f" {idx}. {model['name']} (provider: {model['provider_id']}, weight: {model['weight']})")
244
+
245
+ # Find the highest weight
246
+ highest_weight = available_models[0]['weight']
247
+ logger.info(f"")
248
+ logger.info(f"Highest priority weight: {highest_weight}")
249
+
250
+ # Filter models with the highest weight
251
+ highest_weight_models = [m for m in available_models if m['weight'] == highest_weight]
252
+ logger.info(f"Models with highest priority ({highest_weight}): {len(highest_weight_models)}")
253
+ for model in highest_weight_models:
254
+ logger.info(f" - {model['name']} (provider: {model['provider_id']})")
255
+
256
+ # If multiple models have the same highest weight, randomly select among them
257
+ import random
258
+ if len(highest_weight_models) > 1:
259
+ logger.info(f"Multiple models with same highest priority - performing random selection")
260
+ selected_model = random.choice(highest_weight_models)
261
+ logger.info(f"Randomly selected from {len(highest_weight_models)} candidates")
262
+ else:
263
+ selected_model = highest_weight_models[0]
264
+ logger.info(f"Single model with highest priority - deterministic selection")
265
+
266
+ logger.info(f"")
267
+ logger.info(f"=== FINAL SELECTION ===")
268
+ logger.info(f"Selected model: {selected_model['name']}")
269
+ logger.info(f"Selected provider: {selected_model['provider_id']}")
270
+ logger.info(f"Model weight (priority): {selected_model['weight']}")
271
+ logger.info(f"Model rate limit: {selected_model.get('rate_limit', 'N/A')}")
272
+ logger.info(f"=== MODEL SELECTION PROCESS END ===")
273
+
274
+ # Retry logic: Try up to 2 times with different models
275
+ max_retries = 2
276
+ tried_models = []
277
+ last_error = None
278
+
279
+ for attempt in range(max_retries):
280
+ logger.info(f"")
281
+ logger.info(f"=== ATTEMPT {attempt + 1}/{max_retries} ===")
282
+
283
+ # Select a model that hasn't been tried yet
284
+ remaining_models = [m for m in available_models if m not in tried_models]
285
+
286
+ if not remaining_models:
287
+ logger.error(f"No more models available to try")
288
+ logger.error(f"All {len(available_models)} models have been attempted")
289
+ break
290
+
291
+ # Sort remaining models by weight and select the best one
292
+ remaining_models.sort(key=lambda m: m['weight'], reverse=True)
293
+ current_model = remaining_models[0]
294
+ tried_models.append(current_model)
295
+
296
+ logger.info(f"Trying model: {current_model['name']} (provider: {current_model['provider_id']})")
297
+ logger.info(f"Attempt {attempt + 1} of {max_retries}")
298
+
299
+ provider_id = current_model['provider_id']
300
+ api_key = current_model.get('api_key')
301
+ model_name = current_model['name']
302
+
303
+ logger.info(f"Getting provider handler for {provider_id}")
304
+ handler = get_provider_handler(provider_id, api_key)
305
+ logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
306
+
307
+ if handler.is_rate_limited():
308
+ logger.warning(f"Provider {provider_id} is rate limited, skipping to next model")
309
+ continue
310
+
311
+ try:
312
+ logger.info(f"Model requested: {model_name}")
313
+ logger.info(f"Messages count: {len(request_data.get('messages', []))}")
314
+ logger.info(f"Max tokens: {request_data.get('max_tokens')}")
315
+ logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
316
+ logger.info(f"Stream: {request_data.get('stream', False)}")
317
+
318
+ # Apply rate limiting with model-specific rate limit if available
319
+ rate_limit = current_model.get('rate_limit')
320
+ logger.info(f"Model-specific rate limit: {rate_limit}")
321
+ logger.info("Applying rate limiting...")
322
+ await handler.apply_rate_limit(rate_limit)
323
+ logger.info("Rate limiting applied")
324
+
325
+ logger.info(f"Sending request to provider handler...")
326
+ response = await handler.handle_request(
327
+ model=model_name,
328
+ messages=request_data['messages'],
329
+ max_tokens=request_data.get('max_tokens'),
330
+ temperature=request_data.get('temperature', 1.0),
331
+ stream=request_data.get('stream', False)
332
+ )
333
+ logger.info(f"Response received from provider")
334
+ handler.record_success()
335
+ logger.info(f"=== RotationHandler.handle_rotation_request END ===")
336
+ logger.info(f"Request succeeded on attempt {attempt + 1}")
337
+ return response
338
+ except Exception as e:
339
+ last_error = str(e)
340
+ handler.record_failure()
341
+ logger.error(f"Attempt {attempt + 1} failed: {str(e)}")
342
+ logger.error(f"Error type: {type(e).__name__}")
343
+ logger.error(f"Will try next model...")
344
+ continue
345
+
346
+ # All retries exhausted
347
+ logger.error(f"")
348
+ logger.error(f"=== ALL RETRIES EXHAUSTED ===")
349
+ logger.error(f"Attempted {len(tried_models)} different model(s): {[m['name'] for m in tried_models]}")
350
+ logger.error(f"Last error: {last_error}")
351
+ logger.error(f"Max retries ({max_retries}) reached without success")
352
+ raise HTTPException(
353
+ status_code=503,
354
+ detail=f"All providers in rotation failed after {max_retries} attempts. Last error: {last_error}"
355
+ )
356
+
357
+ async def handle_rotation_model_list(self, rotation_id: str) -> List[Dict]:
358
+ rotation_config = self.config.get_rotation(rotation_id)
359
+ if not rotation_config:
360
+ raise HTTPException(status_code=400, detail=f"Rotation {rotation_id} not found")
361
+
362
+ all_models = []
363
+ for provider in rotation_config.providers:
364
+ for model in provider['models']:
365
+ all_models.append({
366
+ "id": f"{provider['provider_id']}/{model['name']}",
367
+ "name": model['name'],
368
+ "provider_id": provider['provider_id'],
369
+ "weight": model['weight'],
370
+ "rate_limit": model.get('rate_limit')
371
+ })
372
+
373
+ return all_models
374
+
375
+ class AutoselectHandler:
376
+ def __init__(self):
377
+ self.config = config
378
+ self._skill_file_content = None
379
+
380
+ def _get_skill_file_content(self) -> str:
381
+ """Load the autoselect.md skill file content"""
382
+ if self._skill_file_content is None:
383
+ # Try installed locations first
384
+ installed_dirs = [
385
+ Path('/usr/share/aisbf'),
386
+ Path.home() / '.local' / 'share' / 'aisbf',
387
+ ]
388
+
389
+ for installed_dir in installed_dirs:
390
+ skill_file = installed_dir / 'autoselect.md'
391
+ if skill_file.exists():
392
+ with open(skill_file) as f:
393
+ self._skill_file_content = f.read()
394
+ return self._skill_file_content
395
+
396
+ # Fallback to source tree config directory
397
+ source_dir = Path(__file__).parent.parent / 'config'
398
+ skill_file = source_dir / 'autoselect.md'
399
+ if skill_file.exists():
400
+ with open(skill_file) as f:
401
+ self._skill_file_content = f.read()
402
+ return self._skill_file_content
403
+
404
+ raise FileNotFoundError("Could not find autoselect.md skill file")
405
+
406
+ return self._skill_file_content
407
+
408
+ def _build_autoselect_prompt(self, user_prompt: str, autoselect_config) -> str:
409
+ """Build the prompt for model selection"""
410
+ skill_content = self._get_skill_file_content()
411
+
412
+ # Build the available models list
413
+ models_list = ""
414
+ for model_info in autoselect_config.available_models:
415
+ models_list += f"<model><model_id>{model_info.model_id}</model_id><model_description>{model_info.description}</model_description></model>\n"
416
+
417
+ # Build the complete prompt
418
+ prompt = f"""{skill_content}
419
+
420
+ <aisbf_user_prompt>{user_prompt}</aisbf_user_prompt>
421
+ <aisbf_autoselect_list>
422
+ {models_list}
423
+ </aisbf_autoselect_list>
424
+ <aisbf_autoselect_fallback>{autoselect_config.fallback}</aisbf_autoselect_fallback>
425
+ """
426
+ return prompt
427
+
428
+ def _extract_model_selection(self, response: str) -> Optional[str]:
429
+ """Extract the model_id from the autoselection response"""
430
+ match = re.search(r'<aisbf_model_autoselection>(.*?)</aisbf_model_autoselection>', response, re.DOTALL)
431
+ if match:
432
+ return match.group(1).strip()
433
+ return None
434
+
435
+ async def _get_model_selection(self, prompt: str) -> str:
436
+ """Send the autoselect prompt to a model and get the selection"""
437
+ import logging
438
+ logger = logging.getLogger(__name__)
439
+ logger.info(f"=== AUTOSELECT MODEL SELECTION START ===")
440
+ logger.info(f"Using 'general' rotation for model selection")
441
+
442
+ # Use the first available provider/model for the selection
443
+ # This is a simple implementation - could be enhanced to use a specific selection model
444
+ rotation_handler = RotationHandler()
445
+
446
+ # Create a minimal request for model selection
447
+ selection_request = {
448
+ "messages": [{"role": "user", "content": prompt}],
449
+ "temperature": 0.1, # Low temperature for more deterministic selection
450
+ "max_tokens": 100, # We only need a short response
451
+ "stream": False
452
+ }
453
+
454
+ logger.info(f"Selection request parameters:")
455
+ logger.info(f" Temperature: 0.1 (low for deterministic selection)")
456
+ logger.info(f" Max tokens: 100 (short response expected)")
457
+ logger.info(f" Stream: False")
458
+
459
+ # Use the fallback rotation for the selection
460
+ try:
461
+ logger.info(f"Sending selection request to rotation handler...")
462
+ response = await rotation_handler.handle_rotation_request("general", selection_request)
463
+ logger.info(f"Selection response received")
464
+
465
+ content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
466
+ logger.info(f"Raw response content: {content[:200]}..." if len(content) > 200 else f"Raw response content: {content}")
467
+
468
+ model_id = self._extract_model_selection(content)
469
+
470
+ if model_id:
471
+ logger.info(f"=== AUTOSELECT MODEL SELECTION SUCCESS ===")
472
+ logger.info(f"Selected model ID: {model_id}")
473
+ else:
474
+ logger.warning(f"=== AUTOSELECT MODEL SELECTION FAILED ===")
475
+ logger.warning(f"Could not extract model ID from response")
476
+ logger.warning(f"Response content: {content}")
477
+
478
+ return model_id
479
+ except Exception as e:
480
+ logger.error(f"=== AUTOSELECT MODEL SELECTION ERROR ===")
481
+ logger.error(f"Error during model selection: {str(e)}")
482
+ logger.error(f"Will use fallback model")
483
+ # If selection fails, we'll handle it in the main handler
484
+ return None
485
+
486
+ async def handle_autoselect_request(self, autoselect_id: str, request_data: Dict) -> Dict:
487
+ """Handle an autoselect request"""
488
+ import logging
489
+ logger = logging.getLogger(__name__)
490
+ logger.info(f"=== AUTOSELECT REQUEST START ===")
491
+ logger.info(f"Autoselect ID: {autoselect_id}")
492
+
493
+ autoselect_config = self.config.get_autoselect(autoselect_id)
494
+ if not autoselect_config:
495
+ logger.error(f"Autoselect {autoselect_id} not found")
496
+ raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
497
+
498
+ logger.info(f"Autoselect config loaded")
499
+ logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
500
+ for model_info in autoselect_config.available_models:
501
+ logger.info(f" - {model_info.model_id}: {model_info.description}")
502
+ logger.info(f"Fallback model: {autoselect_config.fallback}")
503
+
504
+ # Extract the user prompt from the request
505
+ user_messages = request_data.get('messages', [])
506
+ if not user_messages:
507
+ logger.error("No messages provided")
508
+ raise HTTPException(status_code=400, detail="No messages provided")
509
+
510
+ logger.info(f"User messages count: {len(user_messages)}")
511
+
512
+ # Build a string representation of the user prompt
513
+ user_prompt = ""
514
+ for msg in user_messages:
515
+ role = msg.get('role', 'user')
516
+ content = msg.get('content', '')
517
+ if isinstance(content, list):
518
+ # Handle complex content (e.g., with images)
519
+ content = str(content)
520
+ user_prompt += f"{role}: {content}\n"
521
+
522
+ logger.info(f"User prompt length: {len(user_prompt)} characters")
523
+ logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
524
+
525
+ # Build the autoselect prompt
526
+ logger.info(f"Building autoselect prompt...")
527
+ autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
528
+ logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
529
+
530
+ # Get the model selection
531
+ logger.info(f"Requesting model selection from AI...")
532
+ selected_model_id = await self._get_model_selection(autoselect_prompt)
533
+
534
+ # Validate the selected model
535
+ logger.info(f"=== MODEL VALIDATION ===")
536
+ if not selected_model_id:
537
+ # Fallback to the configured fallback model
538
+ logger.warning(f"No model ID returned from selection")
539
+ logger.warning(f"Using fallback model: {autoselect_config.fallback}")
540
+ selected_model_id = autoselect_config.fallback
541
+ else:
542
+ # Check if the selected model is in the available models list
543
+ available_ids = [m.model_id for m in autoselect_config.available_models]
544
+ if selected_model_id not in available_ids:
545
+ logger.warning(f"Selected model '{selected_model_id}' not in available models list")
546
+ logger.warning(f"Available models: {available_ids}")
547
+ logger.warning(f"Using fallback model: {autoselect_config.fallback}")
548
+ selected_model_id = autoselect_config.fallback
549
+ else:
550
+ logger.info(f"Selected model '{selected_model_id}' is valid and available")
551
+
552
+ logger.info(f"=== FINAL MODEL CHOICE ===")
553
+ logger.info(f"Selected model ID: {selected_model_id}")
554
+ logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
555
+
556
+ # Now proxy the actual request to the selected rotation
557
+ logger.info(f"Proxying request to rotation: {selected_model_id}")
558
+ rotation_handler = RotationHandler()
559
+ response = await rotation_handler.handle_rotation_request(selected_model_id, request_data)
560
+ logger.info(f"=== AUTOSELECT REQUEST END ===")
561
+ return response
562
+
563
+ async def handle_autoselect_streaming_request(self, autoselect_id: str, request_data: Dict):
564
+ """Handle an autoselect streaming request"""
565
+ import logging
566
+ logger = logging.getLogger(__name__)
567
+ logger.info(f"=== AUTOSELECT STREAMING REQUEST START ===")
568
+ logger.info(f"Autoselect ID: {autoselect_id}")
569
+
570
+ autoselect_config = self.config.get_autoselect(autoselect_id)
571
+ if not autoselect_config:
572
+ logger.error(f"Autoselect {autoselect_id} not found")
573
+ raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
574
+
575
+ logger.info(f"Autoselect config loaded")
576
+ logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
577
+ for model_info in autoselect_config.available_models:
578
+ logger.info(f" - {model_info.model_id}: {model_info.description}")
579
+ logger.info(f"Fallback model: {autoselect_config.fallback}")
580
+
581
+ # Extract the user prompt from the request
582
+ user_messages = request_data.get('messages', [])
583
+ if not user_messages:
584
+ logger.error("No messages provided")
585
+ raise HTTPException(status_code=400, detail="No messages provided")
586
+
587
+ logger.info(f"User messages count: {len(user_messages)}")
588
+
589
+ # Build a string representation of the user prompt
590
+ user_prompt = ""
591
+ for msg in user_messages:
592
+ role = msg.get('role', 'user')
593
+ content = msg.get('content', '')
594
+ if isinstance(content, list):
595
+ content = str(content)
596
+ user_prompt += f"{role}: {content}\n"
597
+
598
+ logger.info(f"User prompt length: {len(user_prompt)} characters")
599
+ logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
600
+
601
+ # Build the autoselect prompt
602
+ logger.info(f"Building autoselect prompt...")
603
+ autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
604
+ logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
605
+
606
+ # Get the model selection
607
+ logger.info(f"Requesting model selection from AI...")
608
+ selected_model_id = await self._get_model_selection(autoselect_prompt)
609
+
610
+ # Validate the selected model
611
+ logger.info(f"=== MODEL VALIDATION ===")
612
+ if not selected_model_id:
613
+ logger.warning(f"No model ID returned from selection")
614
+ logger.warning(f"Using fallback model: {autoselect_config.fallback}")
615
+ selected_model_id = autoselect_config.fallback
616
+ else:
617
+ available_ids = [m.model_id for m in autoselect_config.available_models]
618
+ if selected_model_id not in available_ids:
619
+ logger.warning(f"Selected model '{selected_model_id}' not in available models list")
620
+ logger.warning(f"Available models: {available_ids}")
621
+ logger.warning(f"Using fallback model: {autoselect_config.fallback}")
622
+ selected_model_id = autoselect_config.fallback
623
+ else:
624
+ logger.info(f"Selected model '{selected_model_id}' is valid and available")
625
+
626
+ logger.info(f"=== FINAL MODEL CHOICE ===")
627
+ logger.info(f"Selected model ID: {selected_model_id}")
628
+ logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
629
+ logger.info(f"Request mode: Streaming")
630
+
631
+ # Now proxy the actual streaming request to the selected rotation
632
+ logger.info(f"Proxying streaming request to rotation: {selected_model_id}")
633
+ rotation_handler = RotationHandler()
634
+
635
+ async def stream_generator():
636
+ try:
637
+ response = await rotation_handler.handle_rotation_request(
638
+ selected_model_id,
639
+ {**request_data, "stream": True}
640
+ )
641
+ for chunk in response:
642
+ yield f"data: {chunk}\n\n".encode('utf-8')
643
+ except Exception as e:
644
+ logger.error(f"Error in streaming response: {str(e)}")
645
+ yield f"data: {str(e)}\n\n".encode('utf-8')
646
+
647
+ logger.info(f"=== AUTOSELECT STREAMING REQUEST END ===")
648
+ return StreamingResponse(stream_generator(), media_type="text/event-stream")
649
+
650
+ async def handle_autoselect_model_list(self, autoselect_id: str) -> List[Dict]:
651
+ """List available models for an autoselect endpoint"""
652
+ autoselect_config = self.config.get_autoselect(autoselect_id)
653
+ if not autoselect_config:
654
+ raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
655
+
656
+ # Return the available models that can be selected
657
+ return [
658
+ {
659
+ "id": model_info.model_id,
660
+ "name": model_info.model_id,
661
+ "description": model_info.description
662
+ }
663
+ for model_info in autoselect_config.available_models
664
+ ]