isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -11,13 +11,106 @@ This module provides implementations of standard AI benchmarks:
11
11
  import os
12
12
  import json
13
13
  import logging
14
+ import requests
15
+ import zipfile
16
+ import tarfile
17
+ from pathlib import Path
14
18
  from typing import Dict, List, Any, Optional
15
19
  from abc import ABC, abstractmethod
16
20
  from dataclasses import dataclass
21
+ import pandas as pd
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
20
25
 
26
+ class DatasetDownloader:
27
+ """Utility class for downloading and caching benchmark datasets."""
28
+
29
+ def __init__(self, cache_dir: str = "~/.isa_model/datasets"):
30
+ self.cache_dir = Path(cache_dir).expanduser()
31
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
32
+
33
+ # Dataset URLs and info
34
+ self.dataset_info = {
35
+ "mmlu": {
36
+ "url": "https://people.eecs.berkeley.edu/~hendrycks/data.tar",
37
+ "filename": "mmlu_data.tar",
38
+ "extracted_dir": "data"
39
+ },
40
+ "hellaswag": {
41
+ "url": "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl",
42
+ "filename": "hellaswag_val.jsonl"
43
+ },
44
+ "arc": {
45
+ "url": "https://s3-us-west-2.amazonaws.com/ai2-website/data/ARC-V1-Feb2018.zip",
46
+ "filename": "arc_data.zip",
47
+ "extracted_dir": "ARC-V1-Feb2018-2"
48
+ },
49
+ "gsm8k": {
50
+ "url": "https://github.com/openai/grade-school-math/raw/master/grade_school_math/data/test.jsonl",
51
+ "filename": "gsm8k_test.jsonl"
52
+ }
53
+ }
54
+
55
+ def download_dataset(self, dataset_name: str, force_download: bool = False) -> Path:
56
+ """Download and cache a dataset."""
57
+ if dataset_name not in self.dataset_info:
58
+ raise ValueError(f"Unknown dataset: {dataset_name}")
59
+
60
+ info = self.dataset_info[dataset_name]
61
+ dataset_dir = self.cache_dir / dataset_name
62
+ dataset_dir.mkdir(exist_ok=True)
63
+
64
+ file_path = dataset_dir / info["filename"]
65
+
66
+ # Check if already downloaded
67
+ if file_path.exists() and not force_download:
68
+ logger.info(f"Using cached {dataset_name} dataset at {file_path}")
69
+ return self._get_data_path(dataset_name, file_path)
70
+
71
+ # Download the dataset
72
+ logger.info(f"Downloading {dataset_name} dataset from {info['url']}")
73
+ try:
74
+ response = requests.get(info["url"], stream=True)
75
+ response.raise_for_status()
76
+
77
+ with open(file_path, 'wb') as f:
78
+ for chunk in response.iter_content(chunk_size=8192):
79
+ f.write(chunk)
80
+
81
+ logger.info(f"Downloaded {dataset_name} dataset to {file_path}")
82
+
83
+ # Extract if needed
84
+ return self._get_data_path(dataset_name, file_path)
85
+
86
+ except Exception as e:
87
+ logger.error(f"Failed to download {dataset_name}: {e}")
88
+ # Fall back to placeholder data
89
+ return None
90
+
91
+ def _get_data_path(self, dataset_name: str, file_path: Path) -> Path:
92
+ """Get the actual data path, extracting archives if needed."""
93
+ info = self.dataset_info[dataset_name]
94
+
95
+ if "extracted_dir" in info:
96
+ # Need to extract
97
+ extract_dir = file_path.parent / info["extracted_dir"]
98
+
99
+ if not extract_dir.exists():
100
+ logger.info(f"Extracting {file_path}")
101
+
102
+ if file_path.suffix == ".zip":
103
+ with zipfile.ZipFile(file_path, 'r') as zip_ref:
104
+ zip_ref.extractall(file_path.parent)
105
+ elif file_path.suffix == ".tar" or ".tar." in file_path.name:
106
+ with tarfile.open(file_path, 'r') as tar_ref:
107
+ tar_ref.extractall(file_path.parent)
108
+
109
+ return extract_dir
110
+ else:
111
+ return file_path
112
+
113
+
21
114
  @dataclass
22
115
  class BenchmarkConfig:
23
116
  """Configuration for benchmark evaluation."""
@@ -36,6 +129,8 @@ class BaseBenchmark(ABC):
36
129
  self.config = config
37
130
  self.name = config.name
38
131
  self.data = None
132
+ self.downloader = DatasetDownloader()
133
+ self.use_real_data = True # Flag to control real vs placeholder data
39
134
 
40
135
  @abstractmethod
41
136
  def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
@@ -111,10 +206,62 @@ class MMLU(BaseBenchmark):
111
206
  self.subjects = subjects or self.all_subjects[:10] # Use first 10 subjects by default
112
207
 
113
208
  def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
114
- """Load MMLU data (simplified implementation)."""
115
- # This is a simplified implementation
116
- # In practice, you'd load from the actual MMLU dataset
209
+ """Load MMLU data with real dataset support."""
210
+ if self.use_real_data:
211
+ try:
212
+ return self._load_real_mmlu_data(max_samples)
213
+ except Exception as e:
214
+ logger.warning(f"Failed to load real MMLU data: {e}. Falling back to placeholder data.")
215
+ return self._load_placeholder_mmlu_data(max_samples)
216
+ else:
217
+ return self._load_placeholder_mmlu_data(max_samples)
218
+
219
+ def _load_real_mmlu_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
220
+ """Load real MMLU dataset."""
221
+ data_path = self.downloader.download_dataset("mmlu")
222
+ if not data_path or not data_path.exists():
223
+ raise FileNotFoundError("MMLU dataset not found")
224
+
225
+ data = []
226
+ samples_per_subject = max_samples // len(self.subjects) if max_samples else None
227
+
228
+ for subject in self.subjects:
229
+ subject_file = data_path / "test" / f"{subject}_test.csv"
230
+ if not subject_file.exists():
231
+ logger.warning(f"Subject file not found: {subject_file}")
232
+ continue
233
+
234
+ try:
235
+ # Load CSV data
236
+ df = pd.read_csv(subject_file, header=None,
237
+ names=["question", "A", "B", "C", "D", "answer"])
238
+
239
+ # Convert to our format
240
+ for idx, row in df.iterrows():
241
+ if samples_per_subject and len([d for d in data if d["subject"] == subject]) >= samples_per_subject:
242
+ break
243
+
244
+ sample = {
245
+ "subject": subject,
246
+ "question": row["question"],
247
+ "choices": [row["A"], row["B"], row["C"], row["D"]],
248
+ "answer": str(row["answer"]).strip().upper(),
249
+ "id": f"{subject}_{idx}"
250
+ }
251
+ data.append(sample)
252
+
253
+ except Exception as e:
254
+ logger.error(f"Error loading subject {subject}: {e}")
255
+ continue
117
256
 
257
+ if max_samples:
258
+ data = data[:max_samples]
259
+
260
+ logger.info(f"Loaded {len(data)} real MMLU samples across {len(self.subjects)} subjects")
261
+ return data
262
+
263
+ def _load_placeholder_mmlu_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
264
+ """Load placeholder MMLU data."""
118
265
  data = []
119
266
 
120
267
  for subject in self.subjects:
@@ -137,7 +284,7 @@ class MMLU(BaseBenchmark):
137
284
  if max_samples:
138
285
  data = data[:max_samples]
139
286
 
140
- logger.info(f"Loaded {len(data)} MMLU samples across {len(self.subjects)} subjects")
287
+ logger.info(f"Loaded {len(data)} placeholder MMLU samples across {len(self.subjects)} subjects")
141
288
  return data
142
289
 
143
290
  def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
@@ -194,12 +341,52 @@ class HellaSwag(BaseBenchmark):
194
341
  super().__init__(config)
195
342
 
196
343
  def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
197
- """Load HellaSwag data (simplified implementation)."""
198
- # This is a simplified implementation
199
- # In practice, you'd load from the actual HellaSwag dataset
344
+ """Load HellaSwag data with real dataset support."""
345
+ if self.use_real_data:
346
+ try:
347
+ return self._load_real_hellaswag_data(max_samples)
348
+ except Exception as e:
349
+ logger.warning(f"Failed to load real HellaSwag data: {e}. Falling back to placeholder data.")
350
+ return self._load_placeholder_hellaswag_data(max_samples)
351
+ else:
352
+ return self._load_placeholder_hellaswag_data(max_samples)
353
+
354
+ def _load_real_hellaswag_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
355
+ """Load real HellaSwag dataset."""
356
+ data_path = self.downloader.download_dataset("hellaswag")
357
+ if not data_path or not data_path.exists():
358
+ raise FileNotFoundError("HellaSwag dataset not found")
200
359
 
201
360
  data = []
202
361
 
362
+ try:
363
+ with open(data_path, 'r', encoding='utf-8') as f:
364
+ for i, line in enumerate(f):
365
+ if max_samples and i >= max_samples:
366
+ break
367
+
368
+ item = json.loads(line.strip())
369
+
370
+ sample = {
371
+ "context": item["ctx"],
372
+ "question": "What happens next?",
373
+ "choices": item["endings"],
374
+ "answer": chr(65 + int(item["label"])), # Convert 0,1,2,3 to A,B,C,D
375
+ "id": f"hellaswag_{item.get('ind', i)}"
376
+ }
377
+ data.append(sample)
378
+
379
+ except Exception as e:
380
+ logger.error(f"Error loading HellaSwag data: {e}")
381
+ raise
382
+
383
+ logger.info(f"Loaded {len(data)} real HellaSwag samples")
384
+ return data
385
+
386
+ def _load_placeholder_hellaswag_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
387
+ """Load placeholder HellaSwag data."""
388
+ data = []
389
+
203
390
  sample_contexts = [
204
391
  "A person is washing dishes in the kitchen",
205
392
  "Someone is riding a bicycle down a hill",
@@ -226,7 +413,7 @@ class HellaSwag(BaseBenchmark):
226
413
  }
227
414
  data.append(sample)
228
415
 
229
- logger.info(f"Loaded {len(data)} HellaSwag samples")
416
+ logger.info(f"Loaded {len(data)} placeholder HellaSwag samples")
230
417
  return data
231
418
 
232
419
  def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
@@ -377,12 +564,57 @@ class GSM8K(BaseBenchmark):
377
564
  super().__init__(config)
378
565
 
379
566
  def load_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
380
- """Load GSM8K data (simplified implementation)."""
381
- # This is a simplified implementation
382
- # In practice, you'd load from the actual GSM8K dataset
567
+ """Load GSM8K data with real dataset support."""
568
+ if self.use_real_data:
569
+ try:
570
+ return self._load_real_gsm8k_data(max_samples)
571
+ except Exception as e:
572
+ logger.warning(f"Failed to load real GSM8K data: {e}. Falling back to placeholder data.")
573
+ return self._load_placeholder_gsm8k_data(max_samples)
574
+ else:
575
+ return self._load_placeholder_gsm8k_data(max_samples)
576
+
577
+ def _load_real_gsm8k_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
578
+ """Load real GSM8K dataset."""
579
+ data_path = self.downloader.download_dataset("gsm8k")
580
+ if not data_path or not data_path.exists():
581
+ raise FileNotFoundError("GSM8K dataset not found")
383
582
 
384
583
  data = []
385
584
 
585
+ try:
586
+ with open(data_path, 'r', encoding='utf-8') as f:
587
+ for i, line in enumerate(f):
588
+ if max_samples and i >= max_samples:
589
+ break
590
+
591
+ item = json.loads(line.strip())
592
+
593
+ # Extract numerical answer from solution
594
+ answer_text = item["answer"]
595
+ import re
596
+ numbers = re.findall(r'\d+', answer_text)
597
+ answer = numbers[-1] if numbers else "0"
598
+
599
+ sample = {
600
+ "question": item["question"],
601
+ "answer": answer,
602
+ "solution": answer_text, # Keep full solution for reference
603
+ "id": f"gsm8k_{i}"
604
+ }
605
+ data.append(sample)
606
+
607
+ except Exception as e:
608
+ logger.error(f"Error loading GSM8K data: {e}")
609
+ raise
610
+
611
+ logger.info(f"Loaded {len(data)} real GSM8K samples")
612
+ return data
613
+
614
+ def _load_placeholder_gsm8k_data(self, max_samples: Optional[int] = None) -> List[Dict[str, Any]]:
615
+ """Load placeholder GSM8K data."""
616
+ data = []
617
+
386
618
  sample_problems = [
387
619
  {
388
620
  "question": "Janet has 12 apples. She gives 3 apples to her friend and eats 2 apples. How many apples does Janet have left?",
@@ -417,7 +649,7 @@ class GSM8K(BaseBenchmark):
417
649
  }
418
650
  data.append(sample)
419
651
 
420
- logger.info(f"Loaded {len(data)} GSM8K samples")
652
+ logger.info(f"Loaded {len(data)} placeholder GSM8K samples")
421
653
  return data
422
654
 
423
655
  def evaluate_sample(self, sample: Dict[str, Any], prediction: str) -> bool:
@@ -7,12 +7,18 @@ Provides specialized evaluators for different model types and evaluation tasks.
7
7
  from .base_evaluator import BaseEvaluator, EvaluationResult
8
8
  from .llm_evaluator import LLMEvaluator
9
9
  from .vision_evaluator import VisionEvaluator
10
- from .multimodal_evaluator import MultimodalEvaluator
10
+ from .audio_evaluator import AudioEvaluator
11
+ from .embedding_evaluator import EmbeddingEvaluator
12
+
13
+ # MultimodalEvaluator will be implemented later
14
+ # from .multimodal_evaluator import MultimodalEvaluator
11
15
 
12
16
  __all__ = [
13
17
  "BaseEvaluator",
14
18
  "EvaluationResult",
15
19
  "LLMEvaluator",
16
20
  "VisionEvaluator",
17
- "MultimodalEvaluator"
21
+ "AudioEvaluator",
22
+ "EmbeddingEvaluator"
23
+ # "MultimodalEvaluator" # TODO: Implement later
18
24
  ]