isa-model 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +30 -1
- isa_model/client.py +937 -0
- isa_model/core/config/__init__.py +16 -0
- isa_model/core/config/config_manager.py +514 -0
- isa_model/core/config.py +426 -0
- isa_model/core/models/model_billing_tracker.py +476 -0
- isa_model/core/models/model_manager.py +399 -0
- isa_model/core/{storage/supabase_storage.py → models/model_repo.py} +72 -73
- isa_model/core/pricing_manager.py +426 -0
- isa_model/core/services/__init__.py +19 -0
- isa_model/core/services/intelligent_model_selector.py +547 -0
- isa_model/core/types.py +291 -0
- isa_model/deployment/__init__.py +2 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +157 -3
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +532 -0
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +104 -3
- isa_model/deployment/cloud/modal/register_models.py +321 -0
- isa_model/deployment/runtime/deployed_service.py +338 -0
- isa_model/deployment/services/__init__.py +9 -0
- isa_model/deployment/services/auto_deploy_vision_service.py +538 -0
- isa_model/deployment/services/model_service.py +332 -0
- isa_model/deployment/services/service_monitor.py +356 -0
- isa_model/deployment/services/service_registry.py +527 -0
- isa_model/deployment/services/simple_auto_deploy_vision_service.py +275 -0
- isa_model/eval/__init__.py +80 -44
- isa_model/eval/config/__init__.py +10 -0
- isa_model/eval/config/evaluation_config.py +108 -0
- isa_model/eval/evaluators/__init__.py +18 -0
- isa_model/eval/evaluators/base_evaluator.py +503 -0
- isa_model/eval/evaluators/llm_evaluator.py +472 -0
- isa_model/eval/factory.py +417 -709
- isa_model/eval/infrastructure/__init__.py +24 -0
- isa_model/eval/infrastructure/experiment_tracker.py +466 -0
- isa_model/eval/metrics.py +191 -21
- isa_model/inference/ai_factory.py +257 -601
- isa_model/inference/services/audio/base_stt_service.py +65 -1
- isa_model/inference/services/audio/base_tts_service.py +75 -1
- isa_model/inference/services/audio/openai_stt_service.py +189 -151
- isa_model/inference/services/audio/openai_tts_service.py +12 -10
- isa_model/inference/services/audio/replicate_tts_service.py +61 -56
- isa_model/inference/services/base_service.py +55 -17
- isa_model/inference/services/embedding/base_embed_service.py +65 -1
- isa_model/inference/services/embedding/ollama_embed_service.py +103 -43
- isa_model/inference/services/embedding/openai_embed_service.py +8 -10
- isa_model/inference/services/helpers/stacked_config.py +148 -0
- isa_model/inference/services/img/__init__.py +18 -0
- isa_model/inference/services/{vision → img}/base_image_gen_service.py +80 -1
- isa_model/inference/services/{stacked → img}/flux_professional_service.py +25 -1
- isa_model/inference/services/{stacked → img/helpers}/base_stacked_service.py +40 -35
- isa_model/inference/services/{vision → img}/replicate_image_gen_service.py +44 -31
- isa_model/inference/services/llm/__init__.py +3 -3
- isa_model/inference/services/llm/base_llm_service.py +492 -40
- isa_model/inference/services/llm/helpers/llm_prompts.py +258 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +280 -0
- isa_model/inference/services/llm/ollama_llm_service.py +51 -17
- isa_model/inference/services/llm/openai_llm_service.py +70 -19
- isa_model/inference/services/llm/yyds_llm_service.py +24 -23
- isa_model/inference/services/vision/__init__.py +38 -4
- isa_model/inference/services/vision/base_vision_service.py +218 -117
- isa_model/inference/services/vision/{isA_vision_service.py → disabled/isA_vision_service.py} +98 -0
- isa_model/inference/services/{stacked → vision}/doc_analysis_service.py +1 -1
- isa_model/inference/services/vision/helpers/base_stacked_service.py +274 -0
- isa_model/inference/services/vision/helpers/image_utils.py +272 -3
- isa_model/inference/services/vision/helpers/vision_prompts.py +297 -0
- isa_model/inference/services/vision/openai_vision_service.py +104 -307
- isa_model/inference/services/vision/replicate_vision_service.py +140 -325
- isa_model/inference/services/{stacked → vision}/ui_analysis_service.py +2 -498
- isa_model/scripts/register_models.py +370 -0
- isa_model/scripts/register_models_with_embeddings.py +510 -0
- isa_model/serving/api/fastapi_server.py +6 -1
- isa_model/serving/api/routes/unified.py +274 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/METADATA +4 -1
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/RECORD +78 -53
- isa_model/config/__init__.py +0 -9
- isa_model/config/config_manager.py +0 -213
- isa_model/core/model_manager.py +0 -213
- isa_model/core/model_registry.py +0 -375
- isa_model/core/vision_models_init.py +0 -116
- isa_model/inference/billing_tracker.py +0 -406
- isa_model/inference/services/llm/triton_llm_service.py +0 -481
- isa_model/inference/services/stacked/__init__.py +0 -26
- isa_model/inference/services/stacked/config.py +0 -426
- isa_model/inference/services/vision/ollama_vision_service.py +0 -194
- /isa_model/core/{model_storage.py → models/model_storage.py} +0 -0
- /isa_model/inference/services/{vision → embedding}/helpers/text_splitter.py +0 -0
- /isa_model/inference/services/llm/{llm_adapter.py → helpers/llm_adapter.py} +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/WHEEL +0 -0
- {isa_model-0.3.5.dist-info → isa_model-0.3.7.dist-info}/top_level.txt +0 -0
isa_model/eval/metrics.py
CHANGED
@@ -15,6 +15,12 @@ from typing import Dict, List, Any, Optional, Union
|
|
15
15
|
from enum import Enum
|
16
16
|
from abc import ABC, abstractmethod
|
17
17
|
|
18
|
+
try:
|
19
|
+
from ..inference.ai_factory import AIFactory
|
20
|
+
AI_FACTORY_AVAILABLE = True
|
21
|
+
except ImportError:
|
22
|
+
AI_FACTORY_AVAILABLE = False
|
23
|
+
|
18
24
|
logger = logging.getLogger(__name__)
|
19
25
|
|
20
26
|
|
@@ -66,6 +72,16 @@ class LLMMetrics:
|
|
66
72
|
MetricType.COHERENCE,
|
67
73
|
MetricType.FLUENCY
|
68
74
|
]
|
75
|
+
|
76
|
+
# Initialize AI factory if available
|
77
|
+
if AI_FACTORY_AVAILABLE:
|
78
|
+
try:
|
79
|
+
self.ai_factory = AIFactory()
|
80
|
+
except Exception as e:
|
81
|
+
logger.warning(f"Failed to initialize AIFactory: {e}")
|
82
|
+
self.ai_factory = None
|
83
|
+
else:
|
84
|
+
self.ai_factory = None
|
69
85
|
|
70
86
|
def evaluate(
|
71
87
|
self,
|
@@ -200,21 +216,92 @@ class LLMMetrics:
|
|
200
216
|
provider: str,
|
201
217
|
**kwargs
|
202
218
|
) -> tuple:
|
203
|
-
"""Generate predictions from model."""
|
219
|
+
"""Generate predictions from model using actual inference."""
|
204
220
|
predictions = []
|
205
221
|
references = []
|
206
222
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
if
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
223
|
+
if not self.ai_factory:
|
224
|
+
logger.warning("AIFactory not available, using placeholder predictions")
|
225
|
+
# Fallback to placeholder predictions
|
226
|
+
for item in dataset:
|
227
|
+
if isinstance(item, dict):
|
228
|
+
if "input" in item and "output" in item:
|
229
|
+
predictions.append(f"Generated response for: {item['input']}")
|
230
|
+
references.append(item["output"])
|
231
|
+
elif "prompt" in item and "response" in item:
|
232
|
+
predictions.append(f"Generated response for: {item['prompt']}")
|
233
|
+
references.append(item["response"])
|
234
|
+
return predictions, references
|
235
|
+
|
236
|
+
try:
|
237
|
+
# Get LLM service
|
238
|
+
llm_service = self.ai_factory.get_llm(model_name=model_path, provider=provider)
|
239
|
+
|
240
|
+
# Process dataset in batches
|
241
|
+
for i in range(0, len(dataset), batch_size):
|
242
|
+
batch = dataset[i:i + batch_size]
|
243
|
+
batch_predictions = []
|
244
|
+
batch_references = []
|
245
|
+
|
246
|
+
for item in batch:
|
247
|
+
if isinstance(item, dict):
|
248
|
+
prompt = None
|
249
|
+
reference = None
|
250
|
+
|
251
|
+
# Extract prompt and reference based on data format
|
252
|
+
if "input" in item and "output" in item:
|
253
|
+
prompt = item["input"]
|
254
|
+
reference = item["output"]
|
255
|
+
elif "prompt" in item and "response" in item:
|
256
|
+
prompt = item["prompt"]
|
257
|
+
reference = item["response"]
|
258
|
+
elif "question" in item and "answer" in item:
|
259
|
+
prompt = item["question"]
|
260
|
+
reference = item["answer"]
|
261
|
+
elif "text" in item and "label" in item:
|
262
|
+
prompt = item["text"]
|
263
|
+
reference = str(item["label"])
|
264
|
+
|
265
|
+
if prompt and reference:
|
266
|
+
try:
|
267
|
+
# Generate prediction using actual model
|
268
|
+
response = await llm_service.ainvoke(prompt)
|
269
|
+
|
270
|
+
# Extract text from response
|
271
|
+
if hasattr(response, 'text'):
|
272
|
+
prediction = response.text
|
273
|
+
elif isinstance(response, dict) and 'text' in response:
|
274
|
+
prediction = response['text']
|
275
|
+
elif isinstance(response, str):
|
276
|
+
prediction = response
|
277
|
+
else:
|
278
|
+
prediction = str(response)
|
279
|
+
|
280
|
+
batch_predictions.append(prediction.strip())
|
281
|
+
batch_references.append(reference)
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
logger.error(f"Failed to generate prediction for item: {e}")
|
285
|
+
# Use fallback prediction
|
286
|
+
batch_predictions.append(f"Error generating prediction: {str(e)}")
|
287
|
+
batch_references.append(reference)
|
288
|
+
|
289
|
+
predictions.extend(batch_predictions)
|
290
|
+
references.extend(batch_references)
|
291
|
+
|
292
|
+
logger.info(f"Processed batch {i//batch_size + 1}/{(len(dataset) + batch_size - 1)//batch_size}")
|
293
|
+
|
294
|
+
except Exception as e:
|
295
|
+
logger.error(f"Failed to use AIFactory for predictions: {e}")
|
296
|
+
# Fallback to placeholder predictions
|
297
|
+
for item in dataset:
|
298
|
+
if isinstance(item, dict):
|
299
|
+
if "input" in item and "output" in item:
|
300
|
+
predictions.append(f"Generated response for: {item['input']}")
|
301
|
+
references.append(item["output"])
|
302
|
+
elif "prompt" in item and "response" in item:
|
303
|
+
predictions.append(f"Generated response for: {item['prompt']}")
|
304
|
+
references.append(item["response"])
|
218
305
|
|
219
306
|
logger.info(f"Generated {len(predictions)} predictions")
|
220
307
|
return predictions, references
|
@@ -226,11 +313,47 @@ class LLMMetrics:
|
|
226
313
|
provider: str,
|
227
314
|
**kwargs
|
228
315
|
) -> List[str]:
|
229
|
-
"""Generate texts from prompts."""
|
230
|
-
# Simplified implementation - replace with actual model inference
|
316
|
+
"""Generate texts from prompts using actual model inference."""
|
231
317
|
generated_texts = []
|
232
|
-
|
233
|
-
|
318
|
+
|
319
|
+
if not self.ai_factory:
|
320
|
+
logger.warning("AIFactory not available, using placeholder text generation")
|
321
|
+
# Fallback to placeholder generation
|
322
|
+
for prompt in prompts:
|
323
|
+
generated_texts.append(f"Generated response for: {prompt}")
|
324
|
+
return generated_texts
|
325
|
+
|
326
|
+
try:
|
327
|
+
# Get LLM service
|
328
|
+
llm_service = self.ai_factory.get_llm(model_name=model_path, provider=provider)
|
329
|
+
|
330
|
+
for prompt in prompts:
|
331
|
+
try:
|
332
|
+
# Generate text using actual model
|
333
|
+
response = await llm_service.ainvoke(prompt)
|
334
|
+
|
335
|
+
# Extract text from response
|
336
|
+
if hasattr(response, 'text'):
|
337
|
+
generated_text = response.text
|
338
|
+
elif isinstance(response, dict) and 'text' in response:
|
339
|
+
generated_text = response['text']
|
340
|
+
elif isinstance(response, str):
|
341
|
+
generated_text = response
|
342
|
+
else:
|
343
|
+
generated_text = str(response)
|
344
|
+
|
345
|
+
generated_texts.append(generated_text.strip())
|
346
|
+
|
347
|
+
except Exception as e:
|
348
|
+
logger.error(f"Failed to generate text for prompt: {e}")
|
349
|
+
# Use fallback generation
|
350
|
+
generated_texts.append(f"Error generating text: {str(e)}")
|
351
|
+
|
352
|
+
except Exception as e:
|
353
|
+
logger.error(f"Failed to use AIFactory for text generation: {e}")
|
354
|
+
# Fallback to placeholder generation
|
355
|
+
for prompt in prompts:
|
356
|
+
generated_texts.append(f"Generated response for: {prompt}")
|
234
357
|
|
235
358
|
return generated_texts
|
236
359
|
|
@@ -544,6 +667,16 @@ class BenchmarkRunner:
|
|
544
667
|
|
545
668
|
def __init__(self):
|
546
669
|
self.supported_benchmarks = ["mmlu", "hellaswag", "arc", "gsm8k"]
|
670
|
+
|
671
|
+
# Initialize AI factory if available
|
672
|
+
if AI_FACTORY_AVAILABLE:
|
673
|
+
try:
|
674
|
+
self.ai_factory = AIFactory()
|
675
|
+
except Exception as e:
|
676
|
+
logger.warning(f"Failed to initialize AIFactory: {e}")
|
677
|
+
self.ai_factory = None
|
678
|
+
else:
|
679
|
+
self.ai_factory = None
|
547
680
|
|
548
681
|
def run(
|
549
682
|
self,
|
@@ -588,9 +721,12 @@ class BenchmarkRunner:
|
|
588
721
|
|
589
722
|
for sample in test_data:
|
590
723
|
try:
|
591
|
-
#
|
724
|
+
# Format prompt using benchmark's method
|
725
|
+
prompt = benchmark.format_prompt(sample)
|
726
|
+
|
727
|
+
# Generate prediction using actual model
|
592
728
|
prediction = self._generate_prediction(
|
593
|
-
model_path,
|
729
|
+
model_path, {"prompt": prompt}, num_shots, provider, **kwargs
|
594
730
|
)
|
595
731
|
|
596
732
|
# Check if correct
|
@@ -623,6 +759,40 @@ class BenchmarkRunner:
|
|
623
759
|
provider: str,
|
624
760
|
**kwargs
|
625
761
|
) -> str:
|
626
|
-
"""Generate prediction for a sample
|
627
|
-
|
628
|
-
|
762
|
+
"""Generate prediction for a sample using actual model inference."""
|
763
|
+
if not self.ai_factory:
|
764
|
+
logger.warning("AIFactory not available, using placeholder prediction")
|
765
|
+
return "A" # Placeholder answer
|
766
|
+
|
767
|
+
try:
|
768
|
+
# Get LLM service
|
769
|
+
llm_service = self.ai_factory.get_llm(model_name=model_path, provider=provider)
|
770
|
+
|
771
|
+
# Format the prompt (this should be done by the benchmark)
|
772
|
+
if hasattr(sample, 'get'):
|
773
|
+
prompt = sample.get('prompt', str(sample))
|
774
|
+
else:
|
775
|
+
prompt = str(sample)
|
776
|
+
|
777
|
+
# Generate prediction using actual model
|
778
|
+
response = llm_service.generate(
|
779
|
+
prompt=prompt,
|
780
|
+
max_tokens=kwargs.get("max_tokens", 50),
|
781
|
+
temperature=kwargs.get("temperature", 0.0) # Low temperature for consistency
|
782
|
+
)
|
783
|
+
|
784
|
+
# Extract text from response
|
785
|
+
if hasattr(response, 'text'):
|
786
|
+
prediction = response.text
|
787
|
+
elif isinstance(response, dict) and 'text' in response:
|
788
|
+
prediction = response['text']
|
789
|
+
elif isinstance(response, str):
|
790
|
+
prediction = response
|
791
|
+
else:
|
792
|
+
prediction = str(response)
|
793
|
+
|
794
|
+
return prediction.strip()
|
795
|
+
|
796
|
+
except Exception as e:
|
797
|
+
logger.error(f"Failed to generate prediction: {e}")
|
798
|
+
return "A" # Fallback answer
|