isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -1,700 +0,0 @@
1
- """
2
- ISA Model Service Benchmarks.
3
-
4
- Specialized benchmarks for evaluating ISA custom services:
5
- - Modal deployment performance
6
- - Cost-effectiveness analysis
7
- - GPU utilization testing
8
- - Service reliability and scalability
9
- - Cross-service comparison
10
- """
11
-
12
- import asyncio
13
- import logging
14
- import time
15
- import statistics
16
- from typing import Dict, List, Any, Optional, Union
17
- from dataclasses import dataclass
18
- from datetime import datetime, timedelta
19
- import json
20
-
21
- from .isa_integration import ISAModelInterface
22
- from .evaluators.base_evaluator import BaseEvaluator, EvaluationResult
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- @dataclass
28
- class ServicePerformanceMetrics:
29
- """Performance metrics for ISA services."""
30
- service_name: str
31
- total_requests: int
32
- successful_requests: int
33
- failed_requests: int
34
- avg_latency_ms: float
35
- p95_latency_ms: float
36
- p99_latency_ms: float
37
- throughput_rps: float # Requests per second
38
- total_cost_usd: float
39
- cost_per_request_usd: float
40
- gpu_utilization_percent: Optional[float] = None
41
- memory_usage_mb: Optional[float] = None
42
- error_rate: float = 0.0
43
-
44
-
45
- class ISAServiceBenchmark:
46
- """
47
- Comprehensive benchmark suite for ISA services.
48
-
49
- Tests performance, cost, reliability, and scalability of:
50
- - ISA OCR Service (Surya OCR)
51
- - ISA Vision Services (Qwen2.5-VL, Table extraction)
52
- - ISA Audio SOTA Service
53
- - ISA Embedding & Reranking Service
54
- - ISA Video Generation Service
55
- """
56
-
57
- def __init__(self, config: Optional[Dict[str, Any]] = None):
58
- """Initialize ISA service benchmark."""
59
- self.config = config or {}
60
- self.interface = ISAModelInterface(config)
61
-
62
- # Benchmark configuration
63
- self.test_duration_seconds = self.config.get("test_duration_seconds", 60)
64
- self.max_concurrent_requests = self.config.get("max_concurrent_requests", 10)
65
- self.warmup_requests = self.config.get("warmup_requests", 5)
66
-
67
- # Service configurations
68
- self.services_to_test = self.config.get("services_to_test", [
69
- "isa_ocr_service",
70
- "isa_vision_qwen25_service",
71
- "isa_audio_sota_service",
72
- "isa_embedding_reranking_service"
73
- ])
74
-
75
- # Test data
76
- self.test_samples = self._prepare_test_samples()
77
-
78
- def _prepare_test_samples(self) -> Dict[str, List[Dict[str, Any]]]:
79
- """Prepare test samples for different service types."""
80
- samples = {
81
- "ocr": [
82
- {"text": "Sample OCR text for performance testing", "complexity": "simple"},
83
- {"text": "More complex OCR text with special characters: éñ中文", "complexity": "medium"},
84
- {"text": "Very complex OCR text with multiple languages and formatting", "complexity": "complex"}
85
- ],
86
- "vision_vqa": [
87
- {"question": "What color is the object?", "complexity": "simple"},
88
- {"question": "Describe the scene in detail", "complexity": "medium"},
89
- {"question": "Analyze the complex relationships in this image", "complexity": "complex"}
90
- ],
91
- "audio_stt": [
92
- {"duration": 5, "content": "Short audio clip", "complexity": "simple"},
93
- {"duration": 30, "content": "Medium length audio", "complexity": "medium"},
94
- {"duration": 120, "content": "Long audio clip", "complexity": "complex"}
95
- ],
96
- "embedding": [
97
- {"text": "Short text for embedding", "length": "short"},
98
- {"text": "Medium length text for embedding testing with more content", "length": "medium"},
99
- {"text": "Very long text for embedding testing " * 20, "length": "long"}
100
- ]
101
- }
102
- return samples
103
-
104
- async def run_comprehensive_benchmark(self) -> Dict[str, Any]:
105
- """Run comprehensive benchmark across all ISA services."""
106
- logger.info("Starting comprehensive ISA service benchmark")
107
-
108
- results = {
109
- "benchmark_start_time": datetime.now().isoformat(),
110
- "config": self.config,
111
- "service_results": {},
112
- "comparative_analysis": {},
113
- "summary": {}
114
- }
115
-
116
- # Test each service
117
- for service_name in self.services_to_test:
118
- logger.info(f"Benchmarking {service_name}")
119
- try:
120
- service_results = await self._benchmark_service(service_name)
121
- results["service_results"][service_name] = service_results
122
- except Exception as e:
123
- logger.error(f"Error benchmarking {service_name}: {e}")
124
- results["service_results"][service_name] = {"error": str(e)}
125
-
126
- # Comparative analysis
127
- results["comparative_analysis"] = self._perform_comparative_analysis(
128
- results["service_results"]
129
- )
130
-
131
- # Summary
132
- results["summary"] = self._generate_summary(results["service_results"])
133
-
134
- results["benchmark_end_time"] = datetime.now().isoformat()
135
-
136
- logger.info("Comprehensive benchmark completed")
137
- return results
138
-
139
- async def _benchmark_service(self, service_name: str) -> Dict[str, Any]:
140
- """Benchmark a specific ISA service."""
141
- service_type = self._get_service_type(service_name)
142
- test_samples = self.test_samples.get(service_type, [])
143
-
144
- if not test_samples:
145
- logger.warning(f"No test samples for service type: {service_type}")
146
- return {"error": "No test samples available"}
147
-
148
- # Warmup
149
- await self._warmup_service(service_name, test_samples[:self.warmup_requests])
150
-
151
- # Performance testing
152
- performance_results = await self._run_performance_test(service_name, test_samples)
153
-
154
- # Load testing
155
- load_results = await self._run_load_test(service_name, test_samples)
156
-
157
- # Reliability testing
158
- reliability_results = await self._run_reliability_test(service_name, test_samples)
159
-
160
- # Cost analysis
161
- cost_analysis = self._analyze_costs(performance_results, load_results)
162
-
163
- return {
164
- "service_name": service_name,
165
- "service_type": service_type,
166
- "performance_test": performance_results,
167
- "load_test": load_results,
168
- "reliability_test": reliability_results,
169
- "cost_analysis": cost_analysis,
170
- "overall_metrics": self._calculate_overall_metrics(
171
- performance_results, load_results, reliability_results
172
- )
173
- }
174
-
175
- def _get_service_type(self, service_name: str) -> str:
176
- """Map service name to service type."""
177
- mapping = {
178
- "isa_ocr_service": "ocr",
179
- "isa_vision_qwen25_service": "vision_vqa",
180
- "isa_audio_sota_service": "audio_stt",
181
- "isa_embedding_reranking_service": "embedding"
182
- }
183
- return mapping.get(service_name, "unknown")
184
-
185
- async def _warmup_service(self, service_name: str, samples: List[Dict[str, Any]]):
186
- """Warm up the service with initial requests."""
187
- logger.info(f"Warming up {service_name}")
188
-
189
- for sample in samples:
190
- try:
191
- await self._make_service_request(service_name, sample)
192
- await asyncio.sleep(0.5) # Brief pause between warmup requests
193
- except Exception as e:
194
- logger.warning(f"Warmup request failed: {e}")
195
-
196
- async def _run_performance_test(self, service_name: str, samples: List[Dict[str, Any]]) -> Dict[str, Any]:
197
- """Run performance test measuring latency and accuracy."""
198
- logger.info(f"Running performance test for {service_name}")
199
-
200
- results = {
201
- "test_type": "performance",
202
- "requests": [],
203
- "metrics": {}
204
- }
205
-
206
- # Test each sample type
207
- for sample in samples:
208
- for _ in range(5): # 5 requests per sample type
209
- start_time = time.time()
210
- try:
211
- response = await self._make_service_request(service_name, sample)
212
- latency = (time.time() - start_time) * 1000 # Convert to milliseconds
213
-
214
- request_result = {
215
- "success": True,
216
- "latency_ms": latency,
217
- "sample_complexity": sample.get("complexity", "unknown"),
218
- "response_size": len(str(response)),
219
- "cost_estimate": response.get("cost_usd", 0.0)
220
- }
221
-
222
- except Exception as e:
223
- request_result = {
224
- "success": False,
225
- "error": str(e),
226
- "latency_ms": (time.time() - start_time) * 1000,
227
- "sample_complexity": sample.get("complexity", "unknown")
228
- }
229
-
230
- results["requests"].append(request_result)
231
-
232
- # Calculate metrics
233
- successful_requests = [r for r in results["requests"] if r["success"]]
234
- failed_requests = [r for r in results["requests"] if not r["success"]]
235
-
236
- if successful_requests:
237
- latencies = [r["latency_ms"] for r in successful_requests]
238
- costs = [r.get("cost_estimate", 0.0) for r in successful_requests]
239
-
240
- results["metrics"] = {
241
- "total_requests": len(results["requests"]),
242
- "successful_requests": len(successful_requests),
243
- "failed_requests": len(failed_requests),
244
- "success_rate": len(successful_requests) / len(results["requests"]),
245
- "avg_latency_ms": statistics.mean(latencies),
246
- "median_latency_ms": statistics.median(latencies),
247
- "p95_latency_ms": self._percentile(latencies, 95),
248
- "p99_latency_ms": self._percentile(latencies, 99),
249
- "min_latency_ms": min(latencies),
250
- "max_latency_ms": max(latencies),
251
- "total_cost_usd": sum(costs),
252
- "avg_cost_per_request": statistics.mean(costs) if costs else 0.0
253
- }
254
- else:
255
- results["metrics"] = {
256
- "total_requests": len(results["requests"]),
257
- "successful_requests": 0,
258
- "failed_requests": len(failed_requests),
259
- "success_rate": 0.0,
260
- "error": "All requests failed"
261
- }
262
-
263
- return results
264
-
265
- async def _run_load_test(self, service_name: str, samples: List[Dict[str, Any]]) -> Dict[str, Any]:
266
- """Run load test to measure throughput and scalability."""
267
- logger.info(f"Running load test for {service_name}")
268
-
269
- results = {
270
- "test_type": "load",
271
- "test_duration_seconds": self.test_duration_seconds,
272
- "max_concurrent_requests": self.max_concurrent_requests,
273
- "requests": [],
274
- "metrics": {}
275
- }
276
-
277
- # Create semaphore for concurrency control
278
- semaphore = asyncio.Semaphore(self.max_concurrent_requests)
279
-
280
- start_time = time.time()
281
- end_time = start_time + self.test_duration_seconds
282
-
283
- async def make_request():
284
- async with semaphore:
285
- sample = samples[len(results["requests"]) % len(samples)]
286
- request_start = time.time()
287
-
288
- try:
289
- response = await self._make_service_request(service_name, sample)
290
- latency = (time.time() - request_start) * 1000
291
-
292
- return {
293
- "success": True,
294
- "latency_ms": latency,
295
- "timestamp": request_start,
296
- "cost_estimate": response.get("cost_usd", 0.0)
297
- }
298
- except Exception as e:
299
- return {
300
- "success": False,
301
- "error": str(e),
302
- "latency_ms": (time.time() - request_start) * 1000,
303
- "timestamp": request_start
304
- }
305
-
306
- # Generate load
307
- tasks = []
308
- while time.time() < end_time:
309
- if len(tasks) < self.max_concurrent_requests:
310
- task = asyncio.create_task(make_request())
311
- tasks.append(task)
312
-
313
- # Collect completed tasks
314
- done_tasks = [task for task in tasks if task.done()]
315
- for task in done_tasks:
316
- try:
317
- result = await task
318
- results["requests"].append(result)
319
- except Exception as e:
320
- logger.error(f"Task error: {e}")
321
- tasks.remove(task)
322
-
323
- await asyncio.sleep(0.1) # Brief pause
324
-
325
- # Wait for remaining tasks
326
- if tasks:
327
- remaining_results = await asyncio.gather(*tasks, return_exceptions=True)
328
- for result in remaining_results:
329
- if isinstance(result, dict):
330
- results["requests"].append(result)
331
-
332
- # Calculate load test metrics
333
- if results["requests"]:
334
- successful_requests = [r for r in results["requests"] if r["success"]]
335
- total_time = time.time() - start_time
336
-
337
- results["metrics"] = {
338
- "total_requests": len(results["requests"]),
339
- "successful_requests": len(successful_requests),
340
- "failed_requests": len(results["requests"]) - len(successful_requests),
341
- "success_rate": len(successful_requests) / len(results["requests"]),
342
- "throughput_rps": len(results["requests"]) / total_time,
343
- "successful_throughput_rps": len(successful_requests) / total_time,
344
- "actual_test_duration": total_time,
345
- "concurrent_requests_achieved": min(self.max_concurrent_requests, len(results["requests"]))
346
- }
347
-
348
- if successful_requests:
349
- latencies = [r["latency_ms"] for r in successful_requests]
350
- results["metrics"].update({
351
- "avg_latency_ms": statistics.mean(latencies),
352
- "p95_latency_ms": self._percentile(latencies, 95),
353
- "p99_latency_ms": self._percentile(latencies, 99)
354
- })
355
-
356
- return results
357
-
358
- async def _run_reliability_test(self, service_name: str, samples: List[Dict[str, Any]]) -> Dict[str, Any]:
359
- """Run reliability test to measure service stability."""
360
- logger.info(f"Running reliability test for {service_name}")
361
-
362
- results = {
363
- "test_type": "reliability",
364
- "test_scenarios": [],
365
- "metrics": {}
366
- }
367
-
368
- # Test different reliability scenarios
369
- scenarios = [
370
- {"name": "consecutive_requests", "description": "100 consecutive requests"},
371
- {"name": "burst_requests", "description": "Burst of 20 concurrent requests"},
372
- {"name": "mixed_complexity", "description": "Mixed complexity requests"}
373
- ]
374
-
375
- for scenario in scenarios:
376
- scenario_results = await self._run_reliability_scenario(service_name, samples, scenario)
377
- results["test_scenarios"].append(scenario_results)
378
-
379
- # Calculate overall reliability metrics
380
- all_requests = []
381
- for scenario in results["test_scenarios"]:
382
- all_requests.extend(scenario.get("requests", []))
383
-
384
- if all_requests:
385
- successful = [r for r in all_requests if r["success"]]
386
- results["metrics"] = {
387
- "total_reliability_requests": len(all_requests),
388
- "successful_reliability_requests": len(successful),
389
- "overall_reliability_rate": len(successful) / len(all_requests),
390
- "failure_types": self._analyze_failure_types(all_requests)
391
- }
392
-
393
- return results
394
-
395
- async def _run_reliability_scenario(self, service_name: str, samples: List[Dict[str, Any]], scenario: Dict[str, Any]) -> Dict[str, Any]:
396
- """Run a specific reliability scenario."""
397
- scenario_results = {
398
- "scenario": scenario,
399
- "requests": [],
400
- "metrics": {}
401
- }
402
-
403
- if scenario["name"] == "consecutive_requests":
404
- # 100 consecutive requests
405
- for i in range(100):
406
- sample = samples[i % len(samples)]
407
- try:
408
- start_time = time.time()
409
- response = await self._make_service_request(service_name, sample)
410
- latency = (time.time() - start_time) * 1000
411
-
412
- scenario_results["requests"].append({
413
- "success": True,
414
- "request_number": i,
415
- "latency_ms": latency
416
- })
417
- except Exception as e:
418
- scenario_results["requests"].append({
419
- "success": False,
420
- "request_number": i,
421
- "error": str(e)
422
- })
423
-
424
- elif scenario["name"] == "burst_requests":
425
- # 20 concurrent requests
426
- tasks = []
427
- for i in range(20):
428
- sample = samples[i % len(samples)]
429
- task = asyncio.create_task(self._make_service_request(service_name, sample))
430
- tasks.append(task)
431
-
432
- results = await asyncio.gather(*tasks, return_exceptions=True)
433
- for i, result in enumerate(results):
434
- if isinstance(result, Exception):
435
- scenario_results["requests"].append({
436
- "success": False,
437
- "request_number": i,
438
- "error": str(result)
439
- })
440
- else:
441
- scenario_results["requests"].append({
442
- "success": True,
443
- "request_number": i,
444
- "response": result
445
- })
446
-
447
- elif scenario["name"] == "mixed_complexity":
448
- # Mix of different complexity samples
449
- for _ in range(30):
450
- for sample in samples: # Test each complexity
451
- try:
452
- start_time = time.time()
453
- response = await self._make_service_request(service_name, sample)
454
- latency = (time.time() - start_time) * 1000
455
-
456
- scenario_results["requests"].append({
457
- "success": True,
458
- "complexity": sample.get("complexity", "unknown"),
459
- "latency_ms": latency
460
- })
461
- except Exception as e:
462
- scenario_results["requests"].append({
463
- "success": False,
464
- "complexity": sample.get("complexity", "unknown"),
465
- "error": str(e)
466
- })
467
-
468
- # Calculate scenario metrics
469
- successful = [r for r in scenario_results["requests"] if r["success"]]
470
- scenario_results["metrics"] = {
471
- "total_requests": len(scenario_results["requests"]),
472
- "successful_requests": len(successful),
473
- "success_rate": len(successful) / len(scenario_results["requests"]) if scenario_results["requests"] else 0
474
- }
475
-
476
- return scenario_results
477
-
478
- async def _make_service_request(self, service_name: str, sample: Dict[str, Any]) -> Dict[str, Any]:
479
- """Make a request to a specific ISA service."""
480
- service_type = self._get_service_type(service_name)
481
-
482
- if service_type == "ocr":
483
- # Mock image for OCR testing
484
- return await self.interface.vision_analysis(
485
- image="mock_image_data",
486
- task_type="ocr",
487
- model_name="isa-surya-ocr-service"
488
- )
489
-
490
- elif service_type == "vision_vqa":
491
- return await self.interface.vision_analysis(
492
- image="mock_image_data",
493
- prompt=sample["question"],
494
- task_type="vqa",
495
- model_name="isa-qwen25-vision-service"
496
- )
497
-
498
- elif service_type == "audio_stt":
499
- return await self.interface.audio_processing(
500
- audio="mock_audio_data",
501
- task_type="stt",
502
- model_name="isa_audio_sota_service"
503
- )
504
-
505
- elif service_type == "embedding":
506
- return await self.interface.embedding_generation(
507
- text=sample["text"],
508
- model_name="isa-jina-reranker-v2-service"
509
- )
510
-
511
- else:
512
- raise ValueError(f"Unknown service type: {service_type}")
513
-
514
- def _analyze_costs(self, performance_results: Dict[str, Any], load_results: Dict[str, Any]) -> Dict[str, Any]:
515
- """Analyze cost-effectiveness of the service."""
516
- analysis = {
517
- "cost_breakdown": {},
518
- "cost_efficiency": {},
519
- "recommendations": []
520
- }
521
-
522
- # Extract cost data
523
- perf_costs = []
524
- load_costs = []
525
-
526
- for request in performance_results.get("requests", []):
527
- if request.get("success") and "cost_estimate" in request:
528
- perf_costs.append(request["cost_estimate"])
529
-
530
- for request in load_results.get("requests", []):
531
- if request.get("success") and "cost_estimate" in request:
532
- load_costs.append(request["cost_estimate"])
533
-
534
- all_costs = perf_costs + load_costs
535
-
536
- if all_costs:
537
- analysis["cost_breakdown"] = {
538
- "total_estimated_cost": sum(all_costs),
539
- "avg_cost_per_request": statistics.mean(all_costs),
540
- "min_cost_per_request": min(all_costs),
541
- "max_cost_per_request": max(all_costs),
542
- "cost_variance": statistics.variance(all_costs) if len(all_costs) > 1 else 0
543
- }
544
-
545
- # Cost efficiency analysis
546
- perf_metrics = performance_results.get("metrics", {})
547
- load_metrics = load_results.get("metrics", {})
548
-
549
- avg_latency = perf_metrics.get("avg_latency_ms", 0)
550
- throughput = load_metrics.get("throughput_rps", 0)
551
-
552
- if avg_latency > 0 and throughput > 0:
553
- analysis["cost_efficiency"] = {
554
- "cost_per_second_latency": statistics.mean(all_costs) / (avg_latency / 1000),
555
- "cost_per_rps": statistics.mean(all_costs) * throughput,
556
- "efficiency_score": throughput / (statistics.mean(all_costs) * avg_latency) if avg_latency > 0 else 0
557
- }
558
-
559
- return analysis
560
-
561
- def _calculate_overall_metrics(self, performance: Dict, load: Dict, reliability: Dict) -> ServicePerformanceMetrics:
562
- """Calculate overall service performance metrics."""
563
- perf_metrics = performance.get("metrics", {})
564
- load_metrics = load.get("metrics", {})
565
- reliability_metrics = reliability.get("metrics", {})
566
-
567
- return ServicePerformanceMetrics(
568
- service_name=performance.get("service_name", "unknown"),
569
- total_requests=perf_metrics.get("total_requests", 0) + load_metrics.get("total_requests", 0),
570
- successful_requests=perf_metrics.get("successful_requests", 0) + load_metrics.get("successful_requests", 0),
571
- failed_requests=perf_metrics.get("failed_requests", 0) + load_metrics.get("failed_requests", 0),
572
- avg_latency_ms=perf_metrics.get("avg_latency_ms", 0),
573
- p95_latency_ms=perf_metrics.get("p95_latency_ms", 0),
574
- p99_latency_ms=perf_metrics.get("p99_latency_ms", 0),
575
- throughput_rps=load_metrics.get("throughput_rps", 0),
576
- total_cost_usd=perf_metrics.get("total_cost_usd", 0),
577
- cost_per_request_usd=perf_metrics.get("avg_cost_per_request", 0),
578
- error_rate=1 - reliability_metrics.get("overall_reliability_rate", 1)
579
- )
580
-
581
- def _perform_comparative_analysis(self, service_results: Dict[str, Any]) -> Dict[str, Any]:
582
- """Perform comparative analysis across services."""
583
- analysis = {
584
- "performance_comparison": {},
585
- "cost_comparison": {},
586
- "reliability_comparison": {},
587
- "recommendations": []
588
- }
589
-
590
- services = list(service_results.keys())
591
-
592
- # Performance comparison
593
- performance_data = {}
594
- for service in services:
595
- if "error" not in service_results[service]:
596
- metrics = service_results[service].get("overall_metrics")
597
- if metrics:
598
- performance_data[service] = {
599
- "avg_latency_ms": metrics.avg_latency_ms,
600
- "throughput_rps": metrics.throughput_rps,
601
- "success_rate": 1 - metrics.error_rate
602
- }
603
-
604
- analysis["performance_comparison"] = performance_data
605
-
606
- # Cost comparison
607
- cost_data = {}
608
- for service in services:
609
- if "error" not in service_results[service]:
610
- metrics = service_results[service].get("overall_metrics")
611
- if metrics:
612
- cost_data[service] = {
613
- "cost_per_request": metrics.cost_per_request_usd,
614
- "total_cost": metrics.total_cost_usd
615
- }
616
-
617
- analysis["cost_comparison"] = cost_data
618
-
619
- # Generate recommendations
620
- if performance_data:
621
- fastest_service = min(performance_data.keys(), key=lambda x: performance_data[x]["avg_latency_ms"])
622
- highest_throughput = max(performance_data.keys(), key=lambda x: performance_data[x]["throughput_rps"])
623
-
624
- analysis["recommendations"].extend([
625
- f"Fastest response time: {fastest_service}",
626
- f"Highest throughput: {highest_throughput}"
627
- ])
628
-
629
- if cost_data:
630
- most_cost_effective = min(cost_data.keys(), key=lambda x: cost_data[x]["cost_per_request"])
631
- analysis["recommendations"].append(f"Most cost-effective: {most_cost_effective}")
632
-
633
- return analysis
634
-
635
- def _generate_summary(self, service_results: Dict[str, Any]) -> Dict[str, Any]:
636
- """Generate benchmark summary."""
637
- summary = {
638
- "total_services_tested": len(service_results),
639
- "successful_services": len([s for s in service_results.values() if "error" not in s]),
640
- "failed_services": len([s for s in service_results.values() if "error" in s]),
641
- "overall_performance": {},
642
- "key_findings": []
643
- }
644
-
645
- # Calculate overall performance across all services
646
- all_latencies = []
647
- all_throughputs = []
648
- all_costs = []
649
-
650
- for service_name, results in service_results.items():
651
- if "error" not in results:
652
- metrics = results.get("overall_metrics")
653
- if metrics:
654
- all_latencies.append(metrics.avg_latency_ms)
655
- all_throughputs.append(metrics.throughput_rps)
656
- all_costs.append(metrics.cost_per_request_usd)
657
-
658
- if all_latencies:
659
- summary["overall_performance"] = {
660
- "avg_latency_across_services": statistics.mean(all_latencies),
661
- "avg_throughput_across_services": statistics.mean(all_throughputs),
662
- "avg_cost_across_services": statistics.mean(all_costs) if all_costs else 0
663
- }
664
-
665
- return summary
666
-
667
- def _percentile(self, data: List[float], percentile: int) -> float:
668
- """Calculate percentile of data."""
669
- if not data:
670
- return 0.0
671
- sorted_data = sorted(data)
672
- index = int((percentile / 100) * len(sorted_data))
673
- return sorted_data[min(index, len(sorted_data) - 1)]
674
-
675
- def _analyze_failure_types(self, requests: List[Dict[str, Any]]) -> Dict[str, int]:
676
- """Analyze types of failures."""
677
- failure_types = {}
678
- for request in requests:
679
- if not request.get("success"):
680
- error = request.get("error", "unknown_error")
681
- # Categorize error types
682
- if "timeout" in error.lower():
683
- error_type = "timeout"
684
- elif "connection" in error.lower():
685
- error_type = "connection_error"
686
- elif "rate limit" in error.lower():
687
- error_type = "rate_limit"
688
- else:
689
- error_type = "other_error"
690
-
691
- failure_types[error_type] = failure_types.get(error_type, 0) + 1
692
-
693
- return failure_types
694
-
695
-
696
- # Convenience function for running ISA benchmarks
697
- async def run_isa_service_benchmark(config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
698
- """Run comprehensive ISA service benchmark."""
699
- benchmark = ISAServiceBenchmark(config)
700
- return await benchmark.run_comprehensive_benchmark()