isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,395 @@
1
+ """
2
+ Example evaluation script demonstrating the ISA Model evaluation framework.
3
+
4
+ Shows how to:
5
+ 1. Evaluate standard benchmarks (MMLU, HellaSwag, etc.)
6
+ 2. Test ISA custom services
7
+ 3. Run multimodal evaluations
8
+ 4. Perform comprehensive service benchmarking
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import json
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional
16
+
17
+ # Import evaluation components
18
+ from .benchmarks import create_mmlu_benchmark, create_gsm8k_benchmark
19
+ from .benchmarks.multimodal_datasets import create_vqa_dataset, create_coco_captions_dataset
20
+ from .evaluators import LLMEvaluator, VisionEvaluator, AudioEvaluator, EmbeddingEvaluator
21
+ from .isa_integration import ISAModelInterface
22
+ from .isa_benchmarks import run_isa_service_benchmark
23
+ from .factory import EvaluationFactory
24
+
25
+ # Setup logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ async def run_standard_llm_evaluation():
31
+ """Example: Run standard LLM evaluation on MMLU and GSM8K."""
32
+ logger.info("šŸš€ Running Standard LLM Evaluation")
33
+
34
+ # Create evaluator
35
+ evaluator = LLMEvaluator(config={
36
+ "max_concurrent_requests": 5,
37
+ "batch_size": 10
38
+ })
39
+
40
+ # Create ISA model interface
41
+ model_interface = ISAModelInterface()
42
+
43
+ # Test MMLU
44
+ logger.info("šŸ“š Testing MMLU benchmark")
45
+ mmlu_benchmark = create_mmlu_benchmark(subjects=["anatomy", "astronomy", "business_ethics"])
46
+ mmlu_data = mmlu_benchmark.load_data(max_samples=20)
47
+
48
+ mmlu_result = await evaluator.evaluate(
49
+ model_interface=model_interface,
50
+ dataset=mmlu_data,
51
+ dataset_name="MMLU",
52
+ model_name="gpt-4.1-nano"
53
+ )
54
+
55
+ logger.info(f"MMLU Results: {mmlu_result.get_summary()}")
56
+
57
+ # Test GSM8K
58
+ logger.info("🧮 Testing GSM8K benchmark")
59
+ gsm8k_benchmark = create_gsm8k_benchmark()
60
+ gsm8k_data = gsm8k_benchmark.load_data(max_samples=10)
61
+
62
+ gsm8k_result = await evaluator.evaluate(
63
+ model_interface=model_interface,
64
+ dataset=gsm8k_data,
65
+ dataset_name="GSM8K",
66
+ model_name="gpt-4.1-nano"
67
+ )
68
+
69
+ logger.info(f"GSM8K Results: {gsm8k_result.get_summary()}")
70
+
71
+ return {
72
+ "mmlu": mmlu_result.to_dict(),
73
+ "gsm8k": gsm8k_result.to_dict()
74
+ }
75
+
76
+
77
+ async def run_vision_evaluation():
78
+ """Example: Run vision evaluation with VQA and image captioning."""
79
+ logger.info("šŸ‘ļø Running Vision Evaluation")
80
+
81
+ # Create vision evaluator
82
+ evaluator = VisionEvaluator(config={
83
+ "task_type": "vqa",
84
+ "max_image_size": (1024, 1024)
85
+ })
86
+
87
+ # Create ISA model interface
88
+ model_interface = ISAModelInterface()
89
+
90
+ # Test VQA
91
+ logger.info("ā“ Testing VQA dataset")
92
+ vqa_dataset = create_vqa_dataset()
93
+ vqa_data = vqa_dataset.load_data(max_samples=10, use_real_data=False) # Use placeholder for demo
94
+
95
+ vqa_result = await evaluator.evaluate(
96
+ model_interface=model_interface,
97
+ dataset=vqa_data,
98
+ dataset_name="VQA_v2",
99
+ model_name="gpt-4.1-mini"
100
+ )
101
+
102
+ logger.info(f"VQA Results: {vqa_result.get_summary()}")
103
+
104
+ # Test Image Captioning
105
+ logger.info("šŸ–¼ļø Testing Image Captioning")
106
+ caption_evaluator = VisionEvaluator(config={"task_type": "caption"})
107
+
108
+ coco_dataset = create_coco_captions_dataset()
109
+ caption_data = coco_dataset.load_data(max_samples=5, use_real_data=False)
110
+
111
+ caption_result = await caption_evaluator.evaluate(
112
+ model_interface=model_interface,
113
+ dataset=caption_data,
114
+ dataset_name="COCO_Captions",
115
+ model_name="gpt-4.1-mini"
116
+ )
117
+
118
+ logger.info(f"Caption Results: {caption_result.get_summary()}")
119
+
120
+ return {
121
+ "vqa": vqa_result.to_dict(),
122
+ "captioning": caption_result.to_dict()
123
+ }
124
+
125
+
126
+ async def run_audio_evaluation():
127
+ """Example: Run audio evaluation for STT and emotion recognition."""
128
+ logger.info("šŸŽµ Running Audio Evaluation")
129
+
130
+ # STT Evaluation
131
+ stt_evaluator = AudioEvaluator(config={
132
+ "task_type": "stt",
133
+ "normalize_text": True,
134
+ "case_sensitive": False
135
+ })
136
+
137
+ model_interface = ISAModelInterface()
138
+
139
+ # Create mock STT dataset
140
+ stt_data = [
141
+ {
142
+ "audio": "mock_audio_1.wav",
143
+ "expected_output": "The quick brown fox jumps over the lazy dog",
144
+ "task_type": "stt",
145
+ "id": "stt_test_1"
146
+ },
147
+ {
148
+ "audio": "mock_audio_2.wav",
149
+ "expected_output": "Machine learning is transforming artificial intelligence",
150
+ "task_type": "stt",
151
+ "id": "stt_test_2"
152
+ }
153
+ ]
154
+
155
+ stt_result = await stt_evaluator.evaluate(
156
+ model_interface=model_interface,
157
+ dataset=stt_data,
158
+ dataset_name="LibriSpeech_Test",
159
+ model_name="isa_audio_sota_service"
160
+ )
161
+
162
+ logger.info(f"STT Results: {stt_result.get_summary()}")
163
+
164
+ # Emotion Recognition Evaluation
165
+ emotion_evaluator = AudioEvaluator(config={"task_type": "emotion"})
166
+
167
+ emotion_data = [
168
+ {
169
+ "audio": "mock_emotion_1.wav",
170
+ "expected_output": "happy",
171
+ "task_type": "emotion",
172
+ "id": "emotion_test_1"
173
+ },
174
+ {
175
+ "audio": "mock_emotion_2.wav",
176
+ "expected_output": "sad",
177
+ "task_type": "emotion",
178
+ "id": "emotion_test_2"
179
+ }
180
+ ]
181
+
182
+ emotion_result = await emotion_evaluator.evaluate(
183
+ model_interface=model_interface,
184
+ dataset=emotion_data,
185
+ dataset_name="Emotion_Test",
186
+ model_name="isa_audio_sota_service"
187
+ )
188
+
189
+ logger.info(f"Emotion Results: {emotion_result.get_summary()}")
190
+
191
+ return {
192
+ "stt": stt_result.to_dict(),
193
+ "emotion": emotion_result.to_dict()
194
+ }
195
+
196
+
197
+ async def run_embedding_evaluation():
198
+ """Example: Run embedding evaluation for similarity and retrieval."""
199
+ logger.info("šŸ” Running Embedding Evaluation")
200
+
201
+ # Similarity Evaluation
202
+ similarity_evaluator = EmbeddingEvaluator(config={
203
+ "task_type": "similarity",
204
+ "similarity_metric": "cosine"
205
+ })
206
+
207
+ model_interface = ISAModelInterface()
208
+
209
+ # Create similarity dataset
210
+ similarity_data = [
211
+ {
212
+ "text1": "The cat is sleeping on the couch",
213
+ "text2": "A feline is resting on the sofa",
214
+ "expected_output": 0.8, # High similarity
215
+ "task_type": "similarity",
216
+ "id": "sim_test_1"
217
+ },
218
+ {
219
+ "text1": "I love pizza",
220
+ "text2": "The weather is sunny today",
221
+ "expected_output": 0.1, # Low similarity
222
+ "task_type": "similarity",
223
+ "id": "sim_test_2"
224
+ }
225
+ ]
226
+
227
+ similarity_result = await similarity_evaluator.evaluate(
228
+ model_interface=model_interface,
229
+ dataset=similarity_data,
230
+ dataset_name="Similarity_Test",
231
+ model_name="text-embedding-3-small"
232
+ )
233
+
234
+ logger.info(f"Similarity Results: {similarity_result.get_summary()}")
235
+
236
+ # Retrieval Evaluation
237
+ retrieval_evaluator = EmbeddingEvaluator(config={
238
+ "task_type": "retrieval",
239
+ "k_values": [1, 3, 5]
240
+ })
241
+
242
+ retrieval_data = [
243
+ {
244
+ "query": "machine learning algorithms",
245
+ "documents": [
246
+ "Neural networks are a type of machine learning algorithm",
247
+ "The weather is nice today",
248
+ "Deep learning uses artificial neural networks",
249
+ "I like to cook pasta"
250
+ ],
251
+ "expected_output": [1, 0, 1, 0], # Relevance labels
252
+ "task_type": "retrieval",
253
+ "id": "retrieval_test_1"
254
+ }
255
+ ]
256
+
257
+ retrieval_result = await retrieval_evaluator.evaluate(
258
+ model_interface=model_interface,
259
+ dataset=retrieval_data,
260
+ dataset_name="Retrieval_Test",
261
+ model_name="text-embedding-3-small"
262
+ )
263
+
264
+ logger.info(f"Retrieval Results: {retrieval_result.get_summary()}")
265
+
266
+ return {
267
+ "similarity": similarity_result.to_dict(),
268
+ "retrieval": retrieval_result.to_dict()
269
+ }
270
+
271
+
272
+ async def run_isa_service_benchmark_example():
273
+ """Example: Run comprehensive ISA service benchmarking."""
274
+ logger.info("⚔ Running ISA Service Benchmark")
275
+
276
+ benchmark_config = {
277
+ "test_duration_seconds": 30, # Short test for demo
278
+ "max_concurrent_requests": 5,
279
+ "warmup_requests": 3,
280
+ "services_to_test": [
281
+ "isa_ocr_service",
282
+ "isa_audio_sota_service",
283
+ "isa_embedding_reranking_service"
284
+ ]
285
+ }
286
+
287
+ benchmark_results = await run_isa_service_benchmark(benchmark_config)
288
+
289
+ logger.info("šŸ“Š ISA Service Benchmark Summary:")
290
+ summary = benchmark_results.get("summary", {})
291
+ logger.info(f"Services tested: {summary.get('total_services_tested', 0)}")
292
+ logger.info(f"Successful services: {summary.get('successful_services', 0)}")
293
+
294
+ # Log performance highlights
295
+ comparative = benchmark_results.get("comparative_analysis", {})
296
+ recommendations = comparative.get("recommendations", [])
297
+ for rec in recommendations:
298
+ logger.info(f"šŸ’” {rec}")
299
+
300
+ return benchmark_results
301
+
302
+
303
+ async def run_factory_evaluation():
304
+ """Example: Use EvaluationFactory for simplified multi-model comparison."""
305
+ logger.info("šŸ­ Running Factory-based Multi-Model Evaluation")
306
+
307
+ factory = EvaluationFactory()
308
+
309
+ # Define models to compare
310
+ models = [
311
+ {"name": "gpt-4.1-nano", "provider": "openai"},
312
+ {"name": "llama3.2:3b-instruct-fp16", "provider": "ollama"},
313
+ {"name": "claude-sonnet-4-20250514", "provider": "yyds"}
314
+ ]
315
+
316
+ # Create simple test dataset
317
+ test_data = [
318
+ {
319
+ "input": "What is 2+2?",
320
+ "output": "4",
321
+ "id": "math_test_1"
322
+ },
323
+ {
324
+ "input": "Name the capital of France.",
325
+ "output": "Paris",
326
+ "id": "geography_test_1"
327
+ }
328
+ ]
329
+
330
+ # Run comparison
331
+ comparison_results = await factory.compare_models(
332
+ models=models,
333
+ dataset=test_data,
334
+ evaluator_type="llm",
335
+ metrics=["accuracy", "f1_score", "latency"]
336
+ )
337
+
338
+ logger.info("šŸ“ˆ Model Comparison Results:")
339
+ for model_name, results in comparison_results.items():
340
+ metrics = results.get("metrics", {})
341
+ logger.info(f"{model_name}: Accuracy={metrics.get('accuracy', 0):.3f}, "
342
+ f"F1={metrics.get('f1_score', 0):.3f}")
343
+
344
+ return comparison_results
345
+
346
+
347
+ async def save_results(results: Dict[str, Any], output_file: str = "evaluation_results.json"):
348
+ """Save evaluation results to file."""
349
+ output_path = Path(output_file)
350
+
351
+ with open(output_path, 'w', encoding='utf-8') as f:
352
+ json.dump(results, f, indent=2, ensure_ascii=False, default=str)
353
+
354
+ logger.info(f"šŸ’¾ Results saved to {output_path}")
355
+
356
+
357
+ async def main():
358
+ """Run comprehensive evaluation examples."""
359
+ logger.info("šŸ”¬ Starting ISA Model Evaluation Framework Demo")
360
+
361
+ results = {}
362
+
363
+ try:
364
+ # Run all evaluation examples
365
+ results["llm_evaluation"] = await run_standard_llm_evaluation()
366
+ results["vision_evaluation"] = await run_vision_evaluation()
367
+ results["audio_evaluation"] = await run_audio_evaluation()
368
+ results["embedding_evaluation"] = await run_embedding_evaluation()
369
+ results["isa_benchmarks"] = await run_isa_service_benchmark_example()
370
+ results["factory_comparison"] = await run_factory_evaluation()
371
+
372
+ # Save results
373
+ await save_results(results)
374
+
375
+ logger.info("āœ… All evaluations completed successfully!")
376
+
377
+ # Print summary
378
+ logger.info("\nšŸ“‹ Evaluation Summary:")
379
+ logger.info(f"- LLM evaluations: {len(results['llm_evaluation'])} benchmarks")
380
+ logger.info(f"- Vision evaluations: {len(results['vision_evaluation'])} tasks")
381
+ logger.info(f"- Audio evaluations: {len(results['audio_evaluation'])} tasks")
382
+ logger.info(f"- Embedding evaluations: {len(results['embedding_evaluation'])} tasks")
383
+ logger.info(f"- ISA service benchmarks: {results['isa_benchmarks']['summary']['total_services_tested']} services")
384
+ logger.info(f"- Model comparisons: {len(results['factory_comparison'])} models")
385
+
386
+ except Exception as e:
387
+ logger.error(f"āŒ Evaluation failed: {e}")
388
+ raise
389
+
390
+ return results
391
+
392
+
393
+ if __name__ == "__main__":
394
+ # Run the evaluation demo
395
+ asyncio.run(main())