aisbf 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aisbf/config.py +57 -1
- aisbf/handlers.py +314 -33
- aisbf/providers.py +164 -9
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/config.py +57 -1
- aisbf-0.2.4.data/data/share/aisbf/aisbf/handlers.py +664 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/providers.py +164 -9
- aisbf-0.2.4.data/data/share/aisbf/main.py +421 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/METADATA +1 -1
- aisbf-0.2.4.dist-info/RECORD +24 -0
- aisbf-0.2.2.data/data/share/aisbf/aisbf/handlers.py +0 -383
- aisbf-0.2.2.data/data/share/aisbf/main.py +0 -214
- aisbf-0.2.2.dist-info/RECORD +0 -24
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/__init__.py +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf/models.py +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/aisbf.sh +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.json +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/autoselect.md +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/providers.json +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/requirements.txt +0 -0
- {aisbf-0.2.2.data → aisbf-0.2.4.data}/data/share/aisbf/rotations.json +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/WHEEL +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/entry_points.txt +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/licenses/LICENSE.txt +0 -0
- {aisbf-0.2.2.dist-info → aisbf-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyleft (C) 2026 Stefy Lanza <stefy@nexlab.net>
|
|
3
|
+
|
|
4
|
+
AISBF - AI Service Broker Framework || AI Should Be Free
|
|
5
|
+
|
|
6
|
+
Request handlers for AISBF.
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
11
|
+
(at your option) any later version.
|
|
12
|
+
|
|
13
|
+
This program is distributed in the hope that it will be useful,
|
|
14
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
GNU General Public License for more details.
|
|
17
|
+
|
|
18
|
+
You should have received a copy of the GNU General Public License
|
|
19
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
20
|
+
|
|
21
|
+
Why did the programmer quit his job? Because he didn't get arrays!
|
|
22
|
+
|
|
23
|
+
Request handlers for AISBF.
|
|
24
|
+
"""
|
|
25
|
+
import asyncio
|
|
26
|
+
import re
|
|
27
|
+
from typing import Dict, List, Optional
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from fastapi import HTTPException, Request
|
|
30
|
+
from fastapi.responses import JSONResponse, StreamingResponse
|
|
31
|
+
from .models import ChatCompletionRequest, ChatCompletionResponse
|
|
32
|
+
from .providers import get_provider_handler
|
|
33
|
+
from .config import config
|
|
34
|
+
|
|
35
|
+
class RequestHandler:
|
|
36
|
+
def __init__(self):
|
|
37
|
+
self.config = config
|
|
38
|
+
|
|
39
|
+
async def handle_chat_completion(self, request: Request, provider_id: str, request_data: Dict) -> Dict:
|
|
40
|
+
import logging
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
logger.info(f"=== RequestHandler.handle_chat_completion START ===")
|
|
43
|
+
logger.info(f"Provider ID: {provider_id}")
|
|
44
|
+
logger.info(f"Request data: {request_data}")
|
|
45
|
+
|
|
46
|
+
provider_config = self.config.get_provider(provider_id)
|
|
47
|
+
logger.info(f"Provider config: {provider_config}")
|
|
48
|
+
logger.info(f"Provider type: {provider_config.type}")
|
|
49
|
+
logger.info(f"Provider endpoint: {provider_config.endpoint}")
|
|
50
|
+
logger.info(f"API key required: {provider_config.api_key_required}")
|
|
51
|
+
|
|
52
|
+
if provider_config.api_key_required:
|
|
53
|
+
api_key = request_data.get('api_key') or request.headers.get('Authorization', '').replace('Bearer ', '')
|
|
54
|
+
logger.info(f"API key from request: {'***' if api_key else 'None'}")
|
|
55
|
+
if not api_key:
|
|
56
|
+
raise HTTPException(status_code=401, detail="API key required")
|
|
57
|
+
else:
|
|
58
|
+
api_key = None
|
|
59
|
+
logger.info("No API key required for this provider")
|
|
60
|
+
|
|
61
|
+
logger.info(f"Getting provider handler for {provider_id}")
|
|
62
|
+
handler = get_provider_handler(provider_id, api_key)
|
|
63
|
+
logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
|
|
64
|
+
|
|
65
|
+
if handler.is_rate_limited():
|
|
66
|
+
raise HTTPException(status_code=503, detail="Provider temporarily unavailable")
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
logger.info(f"Model requested: {request_data.get('model')}")
|
|
70
|
+
logger.info(f"Messages count: {len(request_data.get('messages', []))}")
|
|
71
|
+
logger.info(f"Max tokens: {request_data.get('max_tokens')}")
|
|
72
|
+
logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
|
|
73
|
+
logger.info(f"Stream: {request_data.get('stream', False)}")
|
|
74
|
+
|
|
75
|
+
# Apply rate limiting
|
|
76
|
+
logger.info("Applying rate limiting...")
|
|
77
|
+
await handler.apply_rate_limit()
|
|
78
|
+
logger.info("Rate limiting applied")
|
|
79
|
+
|
|
80
|
+
logger.info(f"Sending request to provider handler...")
|
|
81
|
+
response = await handler.handle_request(
|
|
82
|
+
model=request_data['model'],
|
|
83
|
+
messages=request_data['messages'],
|
|
84
|
+
max_tokens=request_data.get('max_tokens'),
|
|
85
|
+
temperature=request_data.get('temperature', 1.0),
|
|
86
|
+
stream=request_data.get('stream', False)
|
|
87
|
+
)
|
|
88
|
+
logger.info(f"Response received from provider")
|
|
89
|
+
handler.record_success()
|
|
90
|
+
logger.info(f"=== RequestHandler.handle_chat_completion END ===")
|
|
91
|
+
return response
|
|
92
|
+
except Exception as e:
|
|
93
|
+
handler.record_failure()
|
|
94
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
95
|
+
|
|
96
|
+
async def handle_streaming_chat_completion(self, request: Request, provider_id: str, request_data: Dict):
|
|
97
|
+
provider_config = self.config.get_provider(provider_id)
|
|
98
|
+
|
|
99
|
+
if provider_config.api_key_required:
|
|
100
|
+
api_key = request_data.get('api_key') or request.headers.get('Authorization', '').replace('Bearer ', '')
|
|
101
|
+
if not api_key:
|
|
102
|
+
raise HTTPException(status_code=401, detail="API key required")
|
|
103
|
+
else:
|
|
104
|
+
api_key = None
|
|
105
|
+
|
|
106
|
+
handler = get_provider_handler(provider_id, api_key)
|
|
107
|
+
|
|
108
|
+
if handler.is_rate_limited():
|
|
109
|
+
raise HTTPException(status_code=503, detail="Provider temporarily unavailable")
|
|
110
|
+
|
|
111
|
+
async def stream_generator():
|
|
112
|
+
try:
|
|
113
|
+
# Apply rate limiting
|
|
114
|
+
await handler.apply_rate_limit()
|
|
115
|
+
|
|
116
|
+
response = await handler.handle_request(
|
|
117
|
+
model=request_data['model'],
|
|
118
|
+
messages=request_data['messages'],
|
|
119
|
+
max_tokens=request_data.get('max_tokens'),
|
|
120
|
+
temperature=request_data.get('temperature', 1.0),
|
|
121
|
+
stream=True
|
|
122
|
+
)
|
|
123
|
+
for chunk in response:
|
|
124
|
+
yield f"data: {chunk}\n\n".encode('utf-8')
|
|
125
|
+
handler.record_success()
|
|
126
|
+
except Exception as e:
|
|
127
|
+
handler.record_failure()
|
|
128
|
+
yield f"data: {str(e)}\n\n".encode('utf-8')
|
|
129
|
+
|
|
130
|
+
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
|
131
|
+
|
|
132
|
+
async def handle_model_list(self, request: Request, provider_id: str) -> List[Dict]:
|
|
133
|
+
provider_config = self.config.get_provider(provider_id)
|
|
134
|
+
|
|
135
|
+
if provider_config.api_key_required:
|
|
136
|
+
api_key = request.headers.get('Authorization', '').replace('Bearer ', '')
|
|
137
|
+
if not api_key:
|
|
138
|
+
raise HTTPException(status_code=401, detail="API key required")
|
|
139
|
+
else:
|
|
140
|
+
api_key = None
|
|
141
|
+
|
|
142
|
+
handler = get_provider_handler(provider_id, api_key)
|
|
143
|
+
try:
|
|
144
|
+
# Apply rate limiting
|
|
145
|
+
await handler.apply_rate_limit()
|
|
146
|
+
|
|
147
|
+
models = await handler.get_models()
|
|
148
|
+
return [model.dict() for model in models]
|
|
149
|
+
except Exception as e:
|
|
150
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
151
|
+
|
|
152
|
+
class RotationHandler:
|
|
153
|
+
def __init__(self):
|
|
154
|
+
self.config = config
|
|
155
|
+
|
|
156
|
+
async def handle_rotation_request(self, rotation_id: str, request_data: Dict) -> Dict:
|
|
157
|
+
import logging
|
|
158
|
+
logger = logging.getLogger(__name__)
|
|
159
|
+
logger.info(f"=== RotationHandler.handle_rotation_request START ===")
|
|
160
|
+
logger.info(f"Rotation ID: {rotation_id}")
|
|
161
|
+
|
|
162
|
+
rotation_config = self.config.get_rotation(rotation_id)
|
|
163
|
+
if not rotation_config:
|
|
164
|
+
logger.error(f"Rotation {rotation_id} not found")
|
|
165
|
+
raise HTTPException(status_code=400, detail=f"Rotation {rotation_id} not found")
|
|
166
|
+
|
|
167
|
+
logger.info(f"Rotation config loaded successfully")
|
|
168
|
+
providers = rotation_config.providers
|
|
169
|
+
logger.info(f"Number of providers in rotation: {len(providers)}")
|
|
170
|
+
|
|
171
|
+
# Collect all available models with their weights
|
|
172
|
+
available_models = []
|
|
173
|
+
skipped_providers = []
|
|
174
|
+
total_models_considered = 0
|
|
175
|
+
|
|
176
|
+
logger.info(f"=== MODEL SELECTION PROCESS START ===")
|
|
177
|
+
logger.info(f"Scanning providers for available models...")
|
|
178
|
+
|
|
179
|
+
for provider in providers:
|
|
180
|
+
provider_id = provider['provider_id']
|
|
181
|
+
logger.info(f"")
|
|
182
|
+
logger.info(f"--- Processing provider: {provider_id} ---")
|
|
183
|
+
|
|
184
|
+
# Check if provider exists in configuration
|
|
185
|
+
provider_config = self.config.get_provider(provider_id)
|
|
186
|
+
if not provider_config:
|
|
187
|
+
logger.error(f" [ERROR] Provider {provider_id} not found in providers configuration")
|
|
188
|
+
logger.error(f" Available providers: {list(self.config.providers.keys())}")
|
|
189
|
+
logger.error(f" Skipping this provider")
|
|
190
|
+
skipped_providers.append(provider_id)
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# Check if provider is rate limited/deactivated
|
|
194
|
+
provider_handler = get_provider_handler(provider_id, provider.get('api_key'))
|
|
195
|
+
if provider_handler.is_rate_limited():
|
|
196
|
+
logger.warning(f" [SKIPPED] Provider {provider_id} is rate limited/deactivated")
|
|
197
|
+
logger.warning(f" Reason: Provider has exceeded failure threshold or is in cooldown period")
|
|
198
|
+
skipped_providers.append(provider_id)
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
logger.info(f" [AVAILABLE] Provider {provider_id} is active and ready")
|
|
202
|
+
|
|
203
|
+
models_in_provider = len(provider['models'])
|
|
204
|
+
total_models_considered += models_in_provider
|
|
205
|
+
logger.info(f" Found {models_in_provider} model(s) in this provider")
|
|
206
|
+
|
|
207
|
+
for model in provider['models']:
|
|
208
|
+
model_name = model['name']
|
|
209
|
+
model_weight = model['weight']
|
|
210
|
+
model_rate_limit = model.get('rate_limit', 'N/A')
|
|
211
|
+
|
|
212
|
+
logger.info(f" - Model: {model_name}")
|
|
213
|
+
logger.info(f" Weight (Priority): {model_weight}")
|
|
214
|
+
logger.info(f" Rate Limit: {model_rate_limit}")
|
|
215
|
+
|
|
216
|
+
# Add provider_id and api_key to model for later use
|
|
217
|
+
model_with_provider = model.copy()
|
|
218
|
+
model_with_provider['provider_id'] = provider_id
|
|
219
|
+
model_with_provider['api_key'] = provider.get('api_key')
|
|
220
|
+
available_models.append(model_with_provider)
|
|
221
|
+
|
|
222
|
+
logger.info(f"")
|
|
223
|
+
logger.info(f"=== MODEL SELECTION SUMMARY ===")
|
|
224
|
+
logger.info(f"Total providers scanned: {len(providers)}")
|
|
225
|
+
logger.info(f"Providers skipped (rate limited): {len(skipped_providers)}")
|
|
226
|
+
if skipped_providers:
|
|
227
|
+
logger.info(f"Skipped providers: {', '.join(skipped_providers)}")
|
|
228
|
+
logger.info(f"Total models considered: {total_models_considered}")
|
|
229
|
+
logger.info(f"Total models available: {len(available_models)}")
|
|
230
|
+
|
|
231
|
+
if not available_models:
|
|
232
|
+
logger.error("No models available in rotation (all providers may be rate limited)")
|
|
233
|
+
logger.error("All providers in this rotation are currently deactivated")
|
|
234
|
+
raise HTTPException(status_code=503, detail="No models available in rotation (all providers may be rate limited)")
|
|
235
|
+
|
|
236
|
+
# Sort models by weight in descending order (higher weight = higher priority)
|
|
237
|
+
available_models.sort(key=lambda m: m['weight'], reverse=True)
|
|
238
|
+
|
|
239
|
+
logger.info(f"")
|
|
240
|
+
logger.info(f"=== PRIORITY-BASED SELECTION ===")
|
|
241
|
+
logger.info(f"Models sorted by weight (descending priority):")
|
|
242
|
+
for idx, model in enumerate(available_models, 1):
|
|
243
|
+
logger.info(f" {idx}. {model['name']} (provider: {model['provider_id']}, weight: {model['weight']})")
|
|
244
|
+
|
|
245
|
+
# Find the highest weight
|
|
246
|
+
highest_weight = available_models[0]['weight']
|
|
247
|
+
logger.info(f"")
|
|
248
|
+
logger.info(f"Highest priority weight: {highest_weight}")
|
|
249
|
+
|
|
250
|
+
# Filter models with the highest weight
|
|
251
|
+
highest_weight_models = [m for m in available_models if m['weight'] == highest_weight]
|
|
252
|
+
logger.info(f"Models with highest priority ({highest_weight}): {len(highest_weight_models)}")
|
|
253
|
+
for model in highest_weight_models:
|
|
254
|
+
logger.info(f" - {model['name']} (provider: {model['provider_id']})")
|
|
255
|
+
|
|
256
|
+
# If multiple models have the same highest weight, randomly select among them
|
|
257
|
+
import random
|
|
258
|
+
if len(highest_weight_models) > 1:
|
|
259
|
+
logger.info(f"Multiple models with same highest priority - performing random selection")
|
|
260
|
+
selected_model = random.choice(highest_weight_models)
|
|
261
|
+
logger.info(f"Randomly selected from {len(highest_weight_models)} candidates")
|
|
262
|
+
else:
|
|
263
|
+
selected_model = highest_weight_models[0]
|
|
264
|
+
logger.info(f"Single model with highest priority - deterministic selection")
|
|
265
|
+
|
|
266
|
+
logger.info(f"")
|
|
267
|
+
logger.info(f"=== FINAL SELECTION ===")
|
|
268
|
+
logger.info(f"Selected model: {selected_model['name']}")
|
|
269
|
+
logger.info(f"Selected provider: {selected_model['provider_id']}")
|
|
270
|
+
logger.info(f"Model weight (priority): {selected_model['weight']}")
|
|
271
|
+
logger.info(f"Model rate limit: {selected_model.get('rate_limit', 'N/A')}")
|
|
272
|
+
logger.info(f"=== MODEL SELECTION PROCESS END ===")
|
|
273
|
+
|
|
274
|
+
# Retry logic: Try up to 2 times with different models
|
|
275
|
+
max_retries = 2
|
|
276
|
+
tried_models = []
|
|
277
|
+
last_error = None
|
|
278
|
+
|
|
279
|
+
for attempt in range(max_retries):
|
|
280
|
+
logger.info(f"")
|
|
281
|
+
logger.info(f"=== ATTEMPT {attempt + 1}/{max_retries} ===")
|
|
282
|
+
|
|
283
|
+
# Select a model that hasn't been tried yet
|
|
284
|
+
remaining_models = [m for m in available_models if m not in tried_models]
|
|
285
|
+
|
|
286
|
+
if not remaining_models:
|
|
287
|
+
logger.error(f"No more models available to try")
|
|
288
|
+
logger.error(f"All {len(available_models)} models have been attempted")
|
|
289
|
+
break
|
|
290
|
+
|
|
291
|
+
# Sort remaining models by weight and select the best one
|
|
292
|
+
remaining_models.sort(key=lambda m: m['weight'], reverse=True)
|
|
293
|
+
current_model = remaining_models[0]
|
|
294
|
+
tried_models.append(current_model)
|
|
295
|
+
|
|
296
|
+
logger.info(f"Trying model: {current_model['name']} (provider: {current_model['provider_id']})")
|
|
297
|
+
logger.info(f"Attempt {attempt + 1} of {max_retries}")
|
|
298
|
+
|
|
299
|
+
provider_id = current_model['provider_id']
|
|
300
|
+
api_key = current_model.get('api_key')
|
|
301
|
+
model_name = current_model['name']
|
|
302
|
+
|
|
303
|
+
logger.info(f"Getting provider handler for {provider_id}")
|
|
304
|
+
handler = get_provider_handler(provider_id, api_key)
|
|
305
|
+
logger.info(f"Provider handler obtained: {handler.__class__.__name__}")
|
|
306
|
+
|
|
307
|
+
if handler.is_rate_limited():
|
|
308
|
+
logger.warning(f"Provider {provider_id} is rate limited, skipping to next model")
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
logger.info(f"Model requested: {model_name}")
|
|
313
|
+
logger.info(f"Messages count: {len(request_data.get('messages', []))}")
|
|
314
|
+
logger.info(f"Max tokens: {request_data.get('max_tokens')}")
|
|
315
|
+
logger.info(f"Temperature: {request_data.get('temperature', 1.0)}")
|
|
316
|
+
logger.info(f"Stream: {request_data.get('stream', False)}")
|
|
317
|
+
|
|
318
|
+
# Apply rate limiting with model-specific rate limit if available
|
|
319
|
+
rate_limit = current_model.get('rate_limit')
|
|
320
|
+
logger.info(f"Model-specific rate limit: {rate_limit}")
|
|
321
|
+
logger.info("Applying rate limiting...")
|
|
322
|
+
await handler.apply_rate_limit(rate_limit)
|
|
323
|
+
logger.info("Rate limiting applied")
|
|
324
|
+
|
|
325
|
+
logger.info(f"Sending request to provider handler...")
|
|
326
|
+
response = await handler.handle_request(
|
|
327
|
+
model=model_name,
|
|
328
|
+
messages=request_data['messages'],
|
|
329
|
+
max_tokens=request_data.get('max_tokens'),
|
|
330
|
+
temperature=request_data.get('temperature', 1.0),
|
|
331
|
+
stream=request_data.get('stream', False)
|
|
332
|
+
)
|
|
333
|
+
logger.info(f"Response received from provider")
|
|
334
|
+
handler.record_success()
|
|
335
|
+
logger.info(f"=== RotationHandler.handle_rotation_request END ===")
|
|
336
|
+
logger.info(f"Request succeeded on attempt {attempt + 1}")
|
|
337
|
+
return response
|
|
338
|
+
except Exception as e:
|
|
339
|
+
last_error = str(e)
|
|
340
|
+
handler.record_failure()
|
|
341
|
+
logger.error(f"Attempt {attempt + 1} failed: {str(e)}")
|
|
342
|
+
logger.error(f"Error type: {type(e).__name__}")
|
|
343
|
+
logger.error(f"Will try next model...")
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
# All retries exhausted
|
|
347
|
+
logger.error(f"")
|
|
348
|
+
logger.error(f"=== ALL RETRIES EXHAUSTED ===")
|
|
349
|
+
logger.error(f"Attempted {len(tried_models)} different model(s): {[m['name'] for m in tried_models]}")
|
|
350
|
+
logger.error(f"Last error: {last_error}")
|
|
351
|
+
logger.error(f"Max retries ({max_retries}) reached without success")
|
|
352
|
+
raise HTTPException(
|
|
353
|
+
status_code=503,
|
|
354
|
+
detail=f"All providers in rotation failed after {max_retries} attempts. Last error: {last_error}"
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
async def handle_rotation_model_list(self, rotation_id: str) -> List[Dict]:
|
|
358
|
+
rotation_config = self.config.get_rotation(rotation_id)
|
|
359
|
+
if not rotation_config:
|
|
360
|
+
raise HTTPException(status_code=400, detail=f"Rotation {rotation_id} not found")
|
|
361
|
+
|
|
362
|
+
all_models = []
|
|
363
|
+
for provider in rotation_config.providers:
|
|
364
|
+
for model in provider['models']:
|
|
365
|
+
all_models.append({
|
|
366
|
+
"id": f"{provider['provider_id']}/{model['name']}",
|
|
367
|
+
"name": model['name'],
|
|
368
|
+
"provider_id": provider['provider_id'],
|
|
369
|
+
"weight": model['weight'],
|
|
370
|
+
"rate_limit": model.get('rate_limit')
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
return all_models
|
|
374
|
+
|
|
375
|
+
class AutoselectHandler:
|
|
376
|
+
def __init__(self):
|
|
377
|
+
self.config = config
|
|
378
|
+
self._skill_file_content = None
|
|
379
|
+
|
|
380
|
+
def _get_skill_file_content(self) -> str:
|
|
381
|
+
"""Load the autoselect.md skill file content"""
|
|
382
|
+
if self._skill_file_content is None:
|
|
383
|
+
# Try installed locations first
|
|
384
|
+
installed_dirs = [
|
|
385
|
+
Path('/usr/share/aisbf'),
|
|
386
|
+
Path.home() / '.local' / 'share' / 'aisbf',
|
|
387
|
+
]
|
|
388
|
+
|
|
389
|
+
for installed_dir in installed_dirs:
|
|
390
|
+
skill_file = installed_dir / 'autoselect.md'
|
|
391
|
+
if skill_file.exists():
|
|
392
|
+
with open(skill_file) as f:
|
|
393
|
+
self._skill_file_content = f.read()
|
|
394
|
+
return self._skill_file_content
|
|
395
|
+
|
|
396
|
+
# Fallback to source tree config directory
|
|
397
|
+
source_dir = Path(__file__).parent.parent / 'config'
|
|
398
|
+
skill_file = source_dir / 'autoselect.md'
|
|
399
|
+
if skill_file.exists():
|
|
400
|
+
with open(skill_file) as f:
|
|
401
|
+
self._skill_file_content = f.read()
|
|
402
|
+
return self._skill_file_content
|
|
403
|
+
|
|
404
|
+
raise FileNotFoundError("Could not find autoselect.md skill file")
|
|
405
|
+
|
|
406
|
+
return self._skill_file_content
|
|
407
|
+
|
|
408
|
+
def _build_autoselect_prompt(self, user_prompt: str, autoselect_config) -> str:
|
|
409
|
+
"""Build the prompt for model selection"""
|
|
410
|
+
skill_content = self._get_skill_file_content()
|
|
411
|
+
|
|
412
|
+
# Build the available models list
|
|
413
|
+
models_list = ""
|
|
414
|
+
for model_info in autoselect_config.available_models:
|
|
415
|
+
models_list += f"<model><model_id>{model_info.model_id}</model_id><model_description>{model_info.description}</model_description></model>\n"
|
|
416
|
+
|
|
417
|
+
# Build the complete prompt
|
|
418
|
+
prompt = f"""{skill_content}
|
|
419
|
+
|
|
420
|
+
<aisbf_user_prompt>{user_prompt}</aisbf_user_prompt>
|
|
421
|
+
<aisbf_autoselect_list>
|
|
422
|
+
{models_list}
|
|
423
|
+
</aisbf_autoselect_list>
|
|
424
|
+
<aisbf_autoselect_fallback>{autoselect_config.fallback}</aisbf_autoselect_fallback>
|
|
425
|
+
"""
|
|
426
|
+
return prompt
|
|
427
|
+
|
|
428
|
+
def _extract_model_selection(self, response: str) -> Optional[str]:
|
|
429
|
+
"""Extract the model_id from the autoselection response"""
|
|
430
|
+
match = re.search(r'<aisbf_model_autoselection>(.*?)</aisbf_model_autoselection>', response, re.DOTALL)
|
|
431
|
+
if match:
|
|
432
|
+
return match.group(1).strip()
|
|
433
|
+
return None
|
|
434
|
+
|
|
435
|
+
async def _get_model_selection(self, prompt: str) -> str:
|
|
436
|
+
"""Send the autoselect prompt to a model and get the selection"""
|
|
437
|
+
import logging
|
|
438
|
+
logger = logging.getLogger(__name__)
|
|
439
|
+
logger.info(f"=== AUTOSELECT MODEL SELECTION START ===")
|
|
440
|
+
logger.info(f"Using 'general' rotation for model selection")
|
|
441
|
+
|
|
442
|
+
# Use the first available provider/model for the selection
|
|
443
|
+
# This is a simple implementation - could be enhanced to use a specific selection model
|
|
444
|
+
rotation_handler = RotationHandler()
|
|
445
|
+
|
|
446
|
+
# Create a minimal request for model selection
|
|
447
|
+
selection_request = {
|
|
448
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
449
|
+
"temperature": 0.1, # Low temperature for more deterministic selection
|
|
450
|
+
"max_tokens": 100, # We only need a short response
|
|
451
|
+
"stream": False
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
logger.info(f"Selection request parameters:")
|
|
455
|
+
logger.info(f" Temperature: 0.1 (low for deterministic selection)")
|
|
456
|
+
logger.info(f" Max tokens: 100 (short response expected)")
|
|
457
|
+
logger.info(f" Stream: False")
|
|
458
|
+
|
|
459
|
+
# Use the fallback rotation for the selection
|
|
460
|
+
try:
|
|
461
|
+
logger.info(f"Sending selection request to rotation handler...")
|
|
462
|
+
response = await rotation_handler.handle_rotation_request("general", selection_request)
|
|
463
|
+
logger.info(f"Selection response received")
|
|
464
|
+
|
|
465
|
+
content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
|
466
|
+
logger.info(f"Raw response content: {content[:200]}..." if len(content) > 200 else f"Raw response content: {content}")
|
|
467
|
+
|
|
468
|
+
model_id = self._extract_model_selection(content)
|
|
469
|
+
|
|
470
|
+
if model_id:
|
|
471
|
+
logger.info(f"=== AUTOSELECT MODEL SELECTION SUCCESS ===")
|
|
472
|
+
logger.info(f"Selected model ID: {model_id}")
|
|
473
|
+
else:
|
|
474
|
+
logger.warning(f"=== AUTOSELECT MODEL SELECTION FAILED ===")
|
|
475
|
+
logger.warning(f"Could not extract model ID from response")
|
|
476
|
+
logger.warning(f"Response content: {content}")
|
|
477
|
+
|
|
478
|
+
return model_id
|
|
479
|
+
except Exception as e:
|
|
480
|
+
logger.error(f"=== AUTOSELECT MODEL SELECTION ERROR ===")
|
|
481
|
+
logger.error(f"Error during model selection: {str(e)}")
|
|
482
|
+
logger.error(f"Will use fallback model")
|
|
483
|
+
# If selection fails, we'll handle it in the main handler
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
async def handle_autoselect_request(self, autoselect_id: str, request_data: Dict) -> Dict:
|
|
487
|
+
"""Handle an autoselect request"""
|
|
488
|
+
import logging
|
|
489
|
+
logger = logging.getLogger(__name__)
|
|
490
|
+
logger.info(f"=== AUTOSELECT REQUEST START ===")
|
|
491
|
+
logger.info(f"Autoselect ID: {autoselect_id}")
|
|
492
|
+
|
|
493
|
+
autoselect_config = self.config.get_autoselect(autoselect_id)
|
|
494
|
+
if not autoselect_config:
|
|
495
|
+
logger.error(f"Autoselect {autoselect_id} not found")
|
|
496
|
+
raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
|
|
497
|
+
|
|
498
|
+
logger.info(f"Autoselect config loaded")
|
|
499
|
+
logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
|
|
500
|
+
for model_info in autoselect_config.available_models:
|
|
501
|
+
logger.info(f" - {model_info.model_id}: {model_info.description}")
|
|
502
|
+
logger.info(f"Fallback model: {autoselect_config.fallback}")
|
|
503
|
+
|
|
504
|
+
# Extract the user prompt from the request
|
|
505
|
+
user_messages = request_data.get('messages', [])
|
|
506
|
+
if not user_messages:
|
|
507
|
+
logger.error("No messages provided")
|
|
508
|
+
raise HTTPException(status_code=400, detail="No messages provided")
|
|
509
|
+
|
|
510
|
+
logger.info(f"User messages count: {len(user_messages)}")
|
|
511
|
+
|
|
512
|
+
# Build a string representation of the user prompt
|
|
513
|
+
user_prompt = ""
|
|
514
|
+
for msg in user_messages:
|
|
515
|
+
role = msg.get('role', 'user')
|
|
516
|
+
content = msg.get('content', '')
|
|
517
|
+
if isinstance(content, list):
|
|
518
|
+
# Handle complex content (e.g., with images)
|
|
519
|
+
content = str(content)
|
|
520
|
+
user_prompt += f"{role}: {content}\n"
|
|
521
|
+
|
|
522
|
+
logger.info(f"User prompt length: {len(user_prompt)} characters")
|
|
523
|
+
logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
|
|
524
|
+
|
|
525
|
+
# Build the autoselect prompt
|
|
526
|
+
logger.info(f"Building autoselect prompt...")
|
|
527
|
+
autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
|
|
528
|
+
logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
|
|
529
|
+
|
|
530
|
+
# Get the model selection
|
|
531
|
+
logger.info(f"Requesting model selection from AI...")
|
|
532
|
+
selected_model_id = await self._get_model_selection(autoselect_prompt)
|
|
533
|
+
|
|
534
|
+
# Validate the selected model
|
|
535
|
+
logger.info(f"=== MODEL VALIDATION ===")
|
|
536
|
+
if not selected_model_id:
|
|
537
|
+
# Fallback to the configured fallback model
|
|
538
|
+
logger.warning(f"No model ID returned from selection")
|
|
539
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
540
|
+
selected_model_id = autoselect_config.fallback
|
|
541
|
+
else:
|
|
542
|
+
# Check if the selected model is in the available models list
|
|
543
|
+
available_ids = [m.model_id for m in autoselect_config.available_models]
|
|
544
|
+
if selected_model_id not in available_ids:
|
|
545
|
+
logger.warning(f"Selected model '{selected_model_id}' not in available models list")
|
|
546
|
+
logger.warning(f"Available models: {available_ids}")
|
|
547
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
548
|
+
selected_model_id = autoselect_config.fallback
|
|
549
|
+
else:
|
|
550
|
+
logger.info(f"Selected model '{selected_model_id}' is valid and available")
|
|
551
|
+
|
|
552
|
+
logger.info(f"=== FINAL MODEL CHOICE ===")
|
|
553
|
+
logger.info(f"Selected model ID: {selected_model_id}")
|
|
554
|
+
logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
|
|
555
|
+
|
|
556
|
+
# Now proxy the actual request to the selected rotation
|
|
557
|
+
logger.info(f"Proxying request to rotation: {selected_model_id}")
|
|
558
|
+
rotation_handler = RotationHandler()
|
|
559
|
+
response = await rotation_handler.handle_rotation_request(selected_model_id, request_data)
|
|
560
|
+
logger.info(f"=== AUTOSELECT REQUEST END ===")
|
|
561
|
+
return response
|
|
562
|
+
|
|
563
|
+
async def handle_autoselect_streaming_request(self, autoselect_id: str, request_data: Dict):
|
|
564
|
+
"""Handle an autoselect streaming request"""
|
|
565
|
+
import logging
|
|
566
|
+
logger = logging.getLogger(__name__)
|
|
567
|
+
logger.info(f"=== AUTOSELECT STREAMING REQUEST START ===")
|
|
568
|
+
logger.info(f"Autoselect ID: {autoselect_id}")
|
|
569
|
+
|
|
570
|
+
autoselect_config = self.config.get_autoselect(autoselect_id)
|
|
571
|
+
if not autoselect_config:
|
|
572
|
+
logger.error(f"Autoselect {autoselect_id} not found")
|
|
573
|
+
raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
|
|
574
|
+
|
|
575
|
+
logger.info(f"Autoselect config loaded")
|
|
576
|
+
logger.info(f"Available models for selection: {len(autoselect_config.available_models)}")
|
|
577
|
+
for model_info in autoselect_config.available_models:
|
|
578
|
+
logger.info(f" - {model_info.model_id}: {model_info.description}")
|
|
579
|
+
logger.info(f"Fallback model: {autoselect_config.fallback}")
|
|
580
|
+
|
|
581
|
+
# Extract the user prompt from the request
|
|
582
|
+
user_messages = request_data.get('messages', [])
|
|
583
|
+
if not user_messages:
|
|
584
|
+
logger.error("No messages provided")
|
|
585
|
+
raise HTTPException(status_code=400, detail="No messages provided")
|
|
586
|
+
|
|
587
|
+
logger.info(f"User messages count: {len(user_messages)}")
|
|
588
|
+
|
|
589
|
+
# Build a string representation of the user prompt
|
|
590
|
+
user_prompt = ""
|
|
591
|
+
for msg in user_messages:
|
|
592
|
+
role = msg.get('role', 'user')
|
|
593
|
+
content = msg.get('content', '')
|
|
594
|
+
if isinstance(content, list):
|
|
595
|
+
content = str(content)
|
|
596
|
+
user_prompt += f"{role}: {content}\n"
|
|
597
|
+
|
|
598
|
+
logger.info(f"User prompt length: {len(user_prompt)} characters")
|
|
599
|
+
logger.info(f"User prompt preview: {user_prompt[:200]}..." if len(user_prompt) > 200 else f"User prompt: {user_prompt}")
|
|
600
|
+
|
|
601
|
+
# Build the autoselect prompt
|
|
602
|
+
logger.info(f"Building autoselect prompt...")
|
|
603
|
+
autoselect_prompt = self._build_autoselect_prompt(user_prompt, autoselect_config)
|
|
604
|
+
logger.info(f"Autoselect prompt built (length: {len(autoselect_prompt)} characters)")
|
|
605
|
+
|
|
606
|
+
# Get the model selection
|
|
607
|
+
logger.info(f"Requesting model selection from AI...")
|
|
608
|
+
selected_model_id = await self._get_model_selection(autoselect_prompt)
|
|
609
|
+
|
|
610
|
+
# Validate the selected model
|
|
611
|
+
logger.info(f"=== MODEL VALIDATION ===")
|
|
612
|
+
if not selected_model_id:
|
|
613
|
+
logger.warning(f"No model ID returned from selection")
|
|
614
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
615
|
+
selected_model_id = autoselect_config.fallback
|
|
616
|
+
else:
|
|
617
|
+
available_ids = [m.model_id for m in autoselect_config.available_models]
|
|
618
|
+
if selected_model_id not in available_ids:
|
|
619
|
+
logger.warning(f"Selected model '{selected_model_id}' not in available models list")
|
|
620
|
+
logger.warning(f"Available models: {available_ids}")
|
|
621
|
+
logger.warning(f"Using fallback model: {autoselect_config.fallback}")
|
|
622
|
+
selected_model_id = autoselect_config.fallback
|
|
623
|
+
else:
|
|
624
|
+
logger.info(f"Selected model '{selected_model_id}' is valid and available")
|
|
625
|
+
|
|
626
|
+
logger.info(f"=== FINAL MODEL CHOICE ===")
|
|
627
|
+
logger.info(f"Selected model ID: {selected_model_id}")
|
|
628
|
+
logger.info(f"Selection method: {'AI-selected' if selected_model_id != autoselect_config.fallback else 'Fallback'}")
|
|
629
|
+
logger.info(f"Request mode: Streaming")
|
|
630
|
+
|
|
631
|
+
# Now proxy the actual streaming request to the selected rotation
|
|
632
|
+
logger.info(f"Proxying streaming request to rotation: {selected_model_id}")
|
|
633
|
+
rotation_handler = RotationHandler()
|
|
634
|
+
|
|
635
|
+
async def stream_generator():
|
|
636
|
+
try:
|
|
637
|
+
response = await rotation_handler.handle_rotation_request(
|
|
638
|
+
selected_model_id,
|
|
639
|
+
{**request_data, "stream": True}
|
|
640
|
+
)
|
|
641
|
+
for chunk in response:
|
|
642
|
+
yield f"data: {chunk}\n\n".encode('utf-8')
|
|
643
|
+
except Exception as e:
|
|
644
|
+
logger.error(f"Error in streaming response: {str(e)}")
|
|
645
|
+
yield f"data: {str(e)}\n\n".encode('utf-8')
|
|
646
|
+
|
|
647
|
+
logger.info(f"=== AUTOSELECT STREAMING REQUEST END ===")
|
|
648
|
+
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
|
649
|
+
|
|
650
|
+
async def handle_autoselect_model_list(self, autoselect_id: str) -> List[Dict]:
|
|
651
|
+
"""List available models for an autoselect endpoint"""
|
|
652
|
+
autoselect_config = self.config.get_autoselect(autoselect_id)
|
|
653
|
+
if not autoselect_config:
|
|
654
|
+
raise HTTPException(status_code=400, detail=f"Autoselect {autoselect_id} not found")
|
|
655
|
+
|
|
656
|
+
# Return the available models that can be selected
|
|
657
|
+
return [
|
|
658
|
+
{
|
|
659
|
+
"id": model_info.model_id,
|
|
660
|
+
"name": model_info.model_id,
|
|
661
|
+
"description": model_info.description
|
|
662
|
+
}
|
|
663
|
+
for model_info in autoselect_config.available_models
|
|
664
|
+
]
|