isa-model 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +770 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/models/model_repo.py +343 -0
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/__init__.py +9 -0
- isa_model/deployment/cloud/modal/__init__.py +10 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +766 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +406 -0
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +537 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +187 -387
- isa_model/inference/providers/modal_provider.py +109 -0
- isa_model/inference/providers/yyds_provider.py +108 -0
- isa_model/inference/services/__init__.py +2 -1
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -55
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -35
- isa_model/inference/services/img/flux_professional_service.py +603 -0
- isa_model/inference/services/img/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +210 -69
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +519 -35
- isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +40 -0
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +150 -15
- isa_model/inference/services/llm/openai_llm_service.py +134 -31
- isa_model/inference/services/llm/yyds_llm_service.py +255 -0
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +241 -96
- isa_model/inference/services/vision/disabled/isA_vision_service.py +500 -0
- isa_model/inference/services/vision/doc_analysis_service.py +640 -0
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +109 -170
- isa_model/inference/services/vision/replicate_vision_service.py +508 -0
- isa_model/inference/services/vision/ui_analysis_service.py +823 -0
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/__init__.py +19 -0
- isa_model/serving/api/__init__.py +10 -0
- isa_model/serving/api/fastapi_server.py +89 -0
- isa_model/serving/api/middleware/__init__.py +9 -0
- isa_model/serving/api/middleware/request_logger.py +88 -0
- isa_model/serving/api/routes/__init__.py +5 -0
- isa_model/serving/api/routes/health.py +82 -0
- isa_model/serving/api/routes/llm.py +19 -0
- isa_model/serving/api/routes/ui_analysis.py +223 -0
- isa_model/serving/api/routes/unified.py +202 -0
- isa_model/serving/api/routes/vision.py +19 -0
- isa_model/serving/api/schemas/__init__.py +17 -0
- isa_model/serving/api/schemas/common.py +33 -0
- isa_model/serving/api/schemas/ui_analysis.py +78 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/METADATA +4 -1
- isa_model-0.3.6.dist-info/RECORD +147 -0
- isa_model/core/model_manager.py +0 -208
- isa_model/core/model_registry.py +0 -342
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- isa_model-0.3.4.dist-info/RECORD +0 -91
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/WHEEL +0 -0
- {isa_model-0.3.4.dist-info → isa_model-0.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,547 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Intelligent Model Selector - Simple similarity-based model selection
|
6
|
+
Uses embedding similarity matching against model descriptions and metadata
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
import asyncio
|
11
|
+
import json
|
12
|
+
from typing import Dict, List, Any, Optional, Tuple
|
13
|
+
from pathlib import Path
|
14
|
+
import yaml
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
try:
|
19
|
+
import asyncpg
|
20
|
+
from pgvector.asyncpg import register_vector
|
21
|
+
PGVECTOR_AVAILABLE = True
|
22
|
+
except ImportError:
|
23
|
+
PGVECTOR_AVAILABLE = False
|
24
|
+
logger.warning("pgvector not available, model selector will use in-memory fallback")
|
25
|
+
|
26
|
+
try:
|
27
|
+
from supabase import create_client, Client
|
28
|
+
SUPABASE_AVAILABLE = True
|
29
|
+
except ImportError:
|
30
|
+
SUPABASE_AVAILABLE = False
|
31
|
+
logger.warning("Supabase not available, falling back to SQLite")
|
32
|
+
|
33
|
+
|
34
|
+
class IntelligentModelSelector:
|
35
|
+
"""
|
36
|
+
Simple intelligent model selector using embedding similarity
|
37
|
+
|
38
|
+
Features:
|
39
|
+
- Embeds model descriptions and metadata
|
40
|
+
- Stores embeddings in pgvector for fast similarity search
|
41
|
+
- Falls back to in-memory similarity if pgvector unavailable
|
42
|
+
- Has default models for each service type
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
46
|
+
self.config = config or {}
|
47
|
+
self.db_pool = None
|
48
|
+
self.supabase_client = None
|
49
|
+
self.embedding_service = None
|
50
|
+
self.model_embeddings: Dict[str, List[float]] = {}
|
51
|
+
self.models_metadata: Dict[str, Dict[str, Any]] = {}
|
52
|
+
|
53
|
+
# Default models for each service type
|
54
|
+
self.default_models = {
|
55
|
+
"vision": {"model_id": "gpt-4.1-mini", "provider": "openai"},
|
56
|
+
"audio": {"model_id": "whisper-1", "provider": "openai"},
|
57
|
+
"text": {"model_id": "gpt-4.1-mini", "provider": "openai"},
|
58
|
+
"image": {"model_id": "flux-schnell", "provider": "replicate"},
|
59
|
+
"embedding": {"model_id": "text-embedding-3-small", "provider": "openai"},
|
60
|
+
"omni": {"model_id": "gpt-4.1", "provider": "openai"}
|
61
|
+
}
|
62
|
+
|
63
|
+
logger.info("Intelligent Model Selector initialized")
|
64
|
+
|
65
|
+
async def initialize(self):
|
66
|
+
"""Initialize the model selector"""
|
67
|
+
try:
|
68
|
+
# Initialize embedding service
|
69
|
+
await self._init_embedding_service()
|
70
|
+
|
71
|
+
# Initialize database - try Supabase first, then PostgreSQL
|
72
|
+
if SUPABASE_AVAILABLE and self.config.get("supabase"):
|
73
|
+
await self._init_supabase()
|
74
|
+
elif PGVECTOR_AVAILABLE:
|
75
|
+
await self._init_database()
|
76
|
+
|
77
|
+
# Load and embed models
|
78
|
+
await self._load_models()
|
79
|
+
|
80
|
+
logger.info("Model selector fully initialized")
|
81
|
+
|
82
|
+
except Exception as e:
|
83
|
+
logger.error(f"Failed to initialize model selector: {e}")
|
84
|
+
# Continue with fallback mode
|
85
|
+
|
86
|
+
async def _init_embedding_service(self):
|
87
|
+
"""Initialize embedding service for text similarity"""
|
88
|
+
try:
|
89
|
+
from isa_model.inference.ai_factory import AIFactory
|
90
|
+
factory = AIFactory.get_instance()
|
91
|
+
self.embedding_service = factory.get_embed("text-embedding-3-small", "openai")
|
92
|
+
logger.info("Embedding service initialized")
|
93
|
+
except Exception as e:
|
94
|
+
logger.warning(f"Failed to initialize embedding service: {e}")
|
95
|
+
|
96
|
+
async def _init_supabase(self):
|
97
|
+
"""Initialize Supabase client for vector search"""
|
98
|
+
try:
|
99
|
+
supabase_config = self.config.get("supabase", {})
|
100
|
+
url = supabase_config.get("url")
|
101
|
+
key = supabase_config.get("key")
|
102
|
+
|
103
|
+
if not url or not key:
|
104
|
+
# Try environment variables
|
105
|
+
import os
|
106
|
+
url = url or os.getenv("SUPABASE_URL")
|
107
|
+
key = key or os.getenv("SUPABASE_ANON_KEY")
|
108
|
+
|
109
|
+
if not url or not key:
|
110
|
+
raise ValueError("Supabase URL and key are required")
|
111
|
+
|
112
|
+
self.supabase_client = create_client(url, key)
|
113
|
+
logger.info("Supabase client initialized successfully")
|
114
|
+
|
115
|
+
except Exception as e:
|
116
|
+
logger.warning(f"Supabase initialization failed: {e}, using in-memory fallback")
|
117
|
+
self.supabase_client = None
|
118
|
+
|
119
|
+
async def _init_database(self):
|
120
|
+
"""Initialize pgvector database connection"""
|
121
|
+
try:
|
122
|
+
# Get database configuration
|
123
|
+
db_config = self.config.get("database", {
|
124
|
+
"host": "localhost",
|
125
|
+
"port": 5432,
|
126
|
+
"database": "isa_model",
|
127
|
+
"user": "postgres",
|
128
|
+
"password": "password"
|
129
|
+
})
|
130
|
+
|
131
|
+
# Create connection pool
|
132
|
+
self.db_pool = await asyncpg.create_pool(
|
133
|
+
host=db_config["host"],
|
134
|
+
port=db_config["port"],
|
135
|
+
database=db_config["database"],
|
136
|
+
user=db_config["user"],
|
137
|
+
password=db_config["password"],
|
138
|
+
min_size=1,
|
139
|
+
max_size=5
|
140
|
+
)
|
141
|
+
|
142
|
+
# Register vector extension
|
143
|
+
async with self.db_pool.acquire() as conn:
|
144
|
+
await register_vector(conn)
|
145
|
+
|
146
|
+
# Create models table if not exists
|
147
|
+
await conn.execute("""
|
148
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
149
|
+
|
150
|
+
CREATE TABLE IF NOT EXISTS model_embeddings (
|
151
|
+
id SERIAL PRIMARY KEY,
|
152
|
+
model_id VARCHAR(255) UNIQUE NOT NULL,
|
153
|
+
provider VARCHAR(100) NOT NULL,
|
154
|
+
model_type VARCHAR(50) NOT NULL,
|
155
|
+
description TEXT,
|
156
|
+
metadata JSONB,
|
157
|
+
embedding vector(1536),
|
158
|
+
created_at TIMESTAMP DEFAULT NOW(),
|
159
|
+
updated_at TIMESTAMP DEFAULT NOW()
|
160
|
+
);
|
161
|
+
|
162
|
+
CREATE INDEX IF NOT EXISTS idx_model_embeddings_similarity
|
163
|
+
ON model_embeddings USING ivfflat (embedding vector_cosine_ops);
|
164
|
+
""")
|
165
|
+
|
166
|
+
logger.info("Database initialized successfully")
|
167
|
+
|
168
|
+
except Exception as e:
|
169
|
+
logger.warning(f"Database initialization failed: {e}, using in-memory fallback")
|
170
|
+
self.db_pool = None
|
171
|
+
|
172
|
+
async def _load_models(self):
|
173
|
+
"""Load models from YAML configs and create embeddings"""
|
174
|
+
try:
|
175
|
+
# Get config directory
|
176
|
+
config_dir = Path(__file__).parent.parent.parent / "config" / "models"
|
177
|
+
|
178
|
+
if not config_dir.exists():
|
179
|
+
logger.warning(f"Model config directory not found: {config_dir}")
|
180
|
+
return
|
181
|
+
|
182
|
+
# Load all YAML files
|
183
|
+
for yaml_file in config_dir.glob("*.yaml"):
|
184
|
+
await self._load_models_from_file(yaml_file)
|
185
|
+
|
186
|
+
logger.info(f"Loaded {len(self.models_metadata)} models for similarity matching")
|
187
|
+
|
188
|
+
except Exception as e:
|
189
|
+
logger.error(f"Failed to load models: {e}")
|
190
|
+
|
191
|
+
async def _load_models_from_file(self, yaml_file: Path):
|
192
|
+
"""Load models from a specific YAML file"""
|
193
|
+
try:
|
194
|
+
with open(yaml_file, 'r', encoding='utf-8') as f:
|
195
|
+
data = yaml.safe_load(f)
|
196
|
+
|
197
|
+
provider = data.get("provider", "unknown")
|
198
|
+
models = data.get("models", [])
|
199
|
+
|
200
|
+
for model in models:
|
201
|
+
await self._process_model(model, provider)
|
202
|
+
|
203
|
+
except Exception as e:
|
204
|
+
logger.error(f"Failed to load models from {yaml_file}: {e}")
|
205
|
+
|
206
|
+
async def _process_model(self, model: Dict[str, Any], provider: str):
|
207
|
+
"""Process a single model and create embeddings"""
|
208
|
+
try:
|
209
|
+
model_id = model.get("model_id")
|
210
|
+
if not model_id:
|
211
|
+
return
|
212
|
+
|
213
|
+
# Create searchable text from description and metadata
|
214
|
+
description = model.get("metadata", {}).get("description", "")
|
215
|
+
specialized_tasks = model.get("metadata", {}).get("specialized_tasks", [])
|
216
|
+
capabilities = model.get("capabilities", [])
|
217
|
+
|
218
|
+
# Combine all text for embedding
|
219
|
+
search_text = f"{description} "
|
220
|
+
search_text += f"Capabilities: {', '.join(capabilities)} "
|
221
|
+
search_text += f"Tasks: {', '.join(specialized_tasks)}"
|
222
|
+
|
223
|
+
# Create embedding
|
224
|
+
if self.embedding_service:
|
225
|
+
try:
|
226
|
+
embedding = await self.embedding_service.create_text_embedding(search_text)
|
227
|
+
|
228
|
+
# Store model metadata
|
229
|
+
self.models_metadata[model_id] = {
|
230
|
+
"provider": provider,
|
231
|
+
"model_type": model.get("model_type"),
|
232
|
+
"capabilities": capabilities,
|
233
|
+
"metadata": model.get("metadata", {}),
|
234
|
+
"search_text": search_text
|
235
|
+
}
|
236
|
+
|
237
|
+
# Store embedding
|
238
|
+
if self.db_pool:
|
239
|
+
await self._store_model_embedding(model_id, provider, model, embedding)
|
240
|
+
else:
|
241
|
+
self.model_embeddings[model_id] = embedding
|
242
|
+
|
243
|
+
except Exception as e:
|
244
|
+
logger.warning(f"Failed to create embedding for {model_id}: {e}")
|
245
|
+
|
246
|
+
except Exception as e:
|
247
|
+
logger.error(f"Failed to process model {model.get('model_id', 'unknown')}: {e}")
|
248
|
+
|
249
|
+
async def _store_model_embedding(
|
250
|
+
self,
|
251
|
+
model_id: str,
|
252
|
+
provider: str,
|
253
|
+
model: Dict[str, Any],
|
254
|
+
embedding: List[float]
|
255
|
+
):
|
256
|
+
"""Store model embedding in database"""
|
257
|
+
try:
|
258
|
+
async with self.db_pool.acquire() as conn:
|
259
|
+
await conn.execute("""
|
260
|
+
INSERT INTO model_embeddings
|
261
|
+
(model_id, provider, model_type, description, metadata, embedding)
|
262
|
+
VALUES ($1, $2, $3, $4, $5, $6)
|
263
|
+
ON CONFLICT (model_id)
|
264
|
+
DO UPDATE SET
|
265
|
+
provider = $2,
|
266
|
+
model_type = $3,
|
267
|
+
description = $4,
|
268
|
+
metadata = $5,
|
269
|
+
embedding = $6,
|
270
|
+
updated_at = NOW()
|
271
|
+
""",
|
272
|
+
model_id,
|
273
|
+
provider,
|
274
|
+
model.get("model_type"),
|
275
|
+
model.get("metadata", {}).get("description", ""),
|
276
|
+
json.dumps(model.get("metadata", {})),
|
277
|
+
embedding
|
278
|
+
)
|
279
|
+
|
280
|
+
except Exception as e:
|
281
|
+
logger.error(f"Failed to store embedding for {model_id}: {e}")
|
282
|
+
|
283
|
+
async def select_model(
|
284
|
+
self,
|
285
|
+
request: str,
|
286
|
+
service_type: str,
|
287
|
+
context: Optional[Dict[str, Any]] = None
|
288
|
+
) -> Dict[str, Any]:
|
289
|
+
"""
|
290
|
+
Select best model using similarity matching
|
291
|
+
|
292
|
+
Args:
|
293
|
+
request: User's request/query
|
294
|
+
service_type: Type of service needed
|
295
|
+
context: Additional context
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Selection result with model info and reasoning
|
299
|
+
"""
|
300
|
+
try:
|
301
|
+
# Get embedding for user request
|
302
|
+
if not self.embedding_service:
|
303
|
+
return self._get_default_selection(service_type, "No embedding service available")
|
304
|
+
|
305
|
+
request_embedding = await self.embedding_service.create_text_embedding(request)
|
306
|
+
|
307
|
+
# Find similar models
|
308
|
+
if self.supabase_client:
|
309
|
+
candidates = await self._find_similar_models_supabase(request_embedding, service_type)
|
310
|
+
elif self.db_pool:
|
311
|
+
candidates = await self._find_similar_models_db(request_embedding, service_type)
|
312
|
+
else:
|
313
|
+
candidates = await self._find_similar_models_memory(request_embedding, service_type)
|
314
|
+
|
315
|
+
if not candidates:
|
316
|
+
return self._get_default_selection(service_type, "No suitable models found")
|
317
|
+
|
318
|
+
# Return best match
|
319
|
+
best_match = candidates[0]
|
320
|
+
|
321
|
+
return {
|
322
|
+
"success": True,
|
323
|
+
"selected_model": {
|
324
|
+
"model_id": best_match["model_id"],
|
325
|
+
"provider": best_match["provider"]
|
326
|
+
},
|
327
|
+
"selection_reason": f"Best similarity match (score: {best_match['similarity']:.3f})",
|
328
|
+
"alternatives": candidates[1:3], # Top 2 alternatives
|
329
|
+
"similarity_score": best_match["similarity"]
|
330
|
+
}
|
331
|
+
|
332
|
+
except Exception as e:
|
333
|
+
logger.error(f"Model selection failed: {e}")
|
334
|
+
return self._get_default_selection(service_type, f"Selection error: {e}")
|
335
|
+
|
336
|
+
async def _find_similar_models_supabase(
|
337
|
+
self,
|
338
|
+
request_embedding: List[float],
|
339
|
+
service_type: str
|
340
|
+
) -> List[Dict[str, Any]]:
|
341
|
+
"""Find similar models using Supabase RPC function"""
|
342
|
+
try:
|
343
|
+
# Use the RPC function we created in SQL
|
344
|
+
result = self.supabase_client.rpc(
|
345
|
+
'search_similar_models',
|
346
|
+
{
|
347
|
+
'query_embedding': request_embedding,
|
348
|
+
'similarity_threshold': 0.3, # Minimum similarity threshold
|
349
|
+
'match_count': 10,
|
350
|
+
'filter_model_type': service_type
|
351
|
+
}
|
352
|
+
).execute()
|
353
|
+
|
354
|
+
candidates = []
|
355
|
+
for row in result.data:
|
356
|
+
candidates.append({
|
357
|
+
"model_id": row["model_id"],
|
358
|
+
"provider": row["provider"],
|
359
|
+
"model_type": row["model_type"],
|
360
|
+
"similarity": float(row["similarity"]),
|
361
|
+
"description": row.get("description", "")
|
362
|
+
})
|
363
|
+
|
364
|
+
return candidates
|
365
|
+
|
366
|
+
except Exception as e:
|
367
|
+
logger.error(f"Supabase similarity search failed: {e}")
|
368
|
+
return []
|
369
|
+
|
370
|
+
async def _find_similar_models_db(
|
371
|
+
self,
|
372
|
+
request_embedding: List[float],
|
373
|
+
service_type: str
|
374
|
+
) -> List[Dict[str, Any]]:
|
375
|
+
"""Find similar models using database"""
|
376
|
+
try:
|
377
|
+
async with self.db_pool.acquire() as conn:
|
378
|
+
# Query for similar models
|
379
|
+
rows = await conn.fetch("""
|
380
|
+
SELECT
|
381
|
+
model_id,
|
382
|
+
provider,
|
383
|
+
model_type,
|
384
|
+
description,
|
385
|
+
metadata,
|
386
|
+
1 - (embedding <=> $1) as similarity
|
387
|
+
FROM model_embeddings
|
388
|
+
WHERE model_type = $2 OR model_type = 'omni'
|
389
|
+
ORDER BY embedding <=> $1
|
390
|
+
LIMIT 10
|
391
|
+
""", request_embedding, service_type)
|
392
|
+
|
393
|
+
candidates = []
|
394
|
+
for row in rows:
|
395
|
+
candidates.append({
|
396
|
+
"model_id": row["model_id"],
|
397
|
+
"provider": row["provider"],
|
398
|
+
"model_type": row["model_type"],
|
399
|
+
"similarity": float(row["similarity"]),
|
400
|
+
"description": row["description"]
|
401
|
+
})
|
402
|
+
|
403
|
+
return candidates
|
404
|
+
|
405
|
+
except Exception as e:
|
406
|
+
logger.error(f"Database similarity search failed: {e}")
|
407
|
+
return []
|
408
|
+
|
409
|
+
async def _find_similar_models_memory(
|
410
|
+
self,
|
411
|
+
request_embedding: List[float],
|
412
|
+
service_type: str
|
413
|
+
) -> List[Dict[str, Any]]:
|
414
|
+
"""Find similar models using in-memory search"""
|
415
|
+
try:
|
416
|
+
candidates = []
|
417
|
+
|
418
|
+
for model_id, embedding in self.model_embeddings.items():
|
419
|
+
metadata = self.models_metadata.get(model_id, {})
|
420
|
+
model_type = metadata.get("model_type")
|
421
|
+
|
422
|
+
# Filter by service type (including omni models)
|
423
|
+
if model_type not in [service_type, "omni"]:
|
424
|
+
continue
|
425
|
+
|
426
|
+
# Calculate cosine similarity
|
427
|
+
similarity = self._cosine_similarity(request_embedding, embedding)
|
428
|
+
|
429
|
+
candidates.append({
|
430
|
+
"model_id": model_id,
|
431
|
+
"provider": metadata.get("provider"),
|
432
|
+
"model_type": model_type,
|
433
|
+
"similarity": similarity,
|
434
|
+
"description": metadata.get("metadata", {}).get("description", "")
|
435
|
+
})
|
436
|
+
|
437
|
+
# Sort by similarity score
|
438
|
+
candidates.sort(key=lambda x: x["similarity"], reverse=True)
|
439
|
+
return candidates[:10]
|
440
|
+
|
441
|
+
except Exception as e:
|
442
|
+
logger.error(f"Memory similarity search failed: {e}")
|
443
|
+
return []
|
444
|
+
|
445
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
446
|
+
"""Calculate cosine similarity between two vectors"""
|
447
|
+
try:
|
448
|
+
import math
|
449
|
+
|
450
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
451
|
+
norm1 = math.sqrt(sum(a * a for a in vec1))
|
452
|
+
norm2 = math.sqrt(sum(b * b for b in vec2))
|
453
|
+
|
454
|
+
if norm1 * norm2 == 0:
|
455
|
+
return 0.0
|
456
|
+
|
457
|
+
return dot_product / (norm1 * norm2)
|
458
|
+
|
459
|
+
except Exception:
|
460
|
+
return 0.0
|
461
|
+
|
462
|
+
def _get_default_selection(self, service_type: str, reason: str) -> Dict[str, Any]:
|
463
|
+
"""Get default model selection"""
|
464
|
+
default = self.default_models.get(service_type, self.default_models["vision"])
|
465
|
+
|
466
|
+
return {
|
467
|
+
"success": True,
|
468
|
+
"selected_model": default,
|
469
|
+
"selection_reason": f"Default selection ({reason})",
|
470
|
+
"alternatives": [],
|
471
|
+
"similarity_score": 0.0
|
472
|
+
}
|
473
|
+
|
474
|
+
async def get_available_models(self, service_type: Optional[str] = None) -> List[Dict[str, Any]]:
|
475
|
+
"""Get list of available models"""
|
476
|
+
try:
|
477
|
+
if self.supabase_client:
|
478
|
+
# Query Supabase
|
479
|
+
query = self.supabase_client.table("model_embedding").select("model_id, provider, model_type, description, metadata")
|
480
|
+
|
481
|
+
if service_type:
|
482
|
+
query = query.or_(f"model_type.eq.{service_type},model_type.eq.omni")
|
483
|
+
|
484
|
+
result = query.order("model_id").execute()
|
485
|
+
return result.data
|
486
|
+
|
487
|
+
elif self.db_pool:
|
488
|
+
async with self.db_pool.acquire() as conn:
|
489
|
+
if service_type:
|
490
|
+
rows = await conn.fetch("""
|
491
|
+
SELECT model_id, provider, model_type, description, metadata
|
492
|
+
FROM model_embeddings
|
493
|
+
WHERE model_type = $1 OR model_type = 'omni'
|
494
|
+
ORDER BY model_id
|
495
|
+
""", service_type)
|
496
|
+
else:
|
497
|
+
rows = await conn.fetch("""
|
498
|
+
SELECT model_id, provider, model_type, description, metadata
|
499
|
+
FROM model_embeddings
|
500
|
+
ORDER BY model_type, model_id
|
501
|
+
""")
|
502
|
+
|
503
|
+
return [dict(row) for row in rows]
|
504
|
+
else:
|
505
|
+
# In-memory fallback
|
506
|
+
models = []
|
507
|
+
for model_id, metadata in self.models_metadata.items():
|
508
|
+
model_type = metadata.get("model_type")
|
509
|
+
if service_type and model_type not in [service_type, "omni"]:
|
510
|
+
continue
|
511
|
+
|
512
|
+
models.append({
|
513
|
+
"model_id": model_id,
|
514
|
+
"provider": metadata.get("provider"),
|
515
|
+
"model_type": model_type,
|
516
|
+
"description": metadata.get("metadata", {}).get("description", ""),
|
517
|
+
"metadata": metadata.get("metadata", {})
|
518
|
+
})
|
519
|
+
|
520
|
+
return models
|
521
|
+
|
522
|
+
except Exception as e:
|
523
|
+
logger.error(f"Failed to get available models: {e}")
|
524
|
+
return []
|
525
|
+
|
526
|
+
async def close(self):
|
527
|
+
"""Clean up resources"""
|
528
|
+
if self.db_pool:
|
529
|
+
await self.db_pool.close()
|
530
|
+
logger.info("Database connection closed")
|
531
|
+
if self.supabase_client:
|
532
|
+
# Supabase client doesn't need explicit closing
|
533
|
+
logger.info("Supabase client cleaned up")
|
534
|
+
|
535
|
+
|
536
|
+
# Singleton instance
|
537
|
+
_selector_instance = None
|
538
|
+
|
539
|
+
async def get_model_selector(config: Optional[Dict[str, Any]] = None) -> IntelligentModelSelector:
|
540
|
+
"""Get singleton model selector instance"""
|
541
|
+
global _selector_instance
|
542
|
+
|
543
|
+
if _selector_instance is None:
|
544
|
+
_selector_instance = IntelligentModelSelector(config)
|
545
|
+
await _selector_instance.initialize()
|
546
|
+
|
547
|
+
return _selector_instance
|