isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
1
+ """
2
+ ISA Embedding & Reranking Service
3
+
4
+ Jina-based embedding and reranking service using SOTA Transformer models
5
+ - Reranking: Jina Reranker v2 (Transformer architecture)
6
+ - Languages: 100+ supported
7
+ """
8
+
9
+ import modal
10
+ import time
11
+ import json
12
+ import os
13
+ import logging
14
+ from typing import Dict, List, Optional, Any
15
+
16
+ # Define Modal application
17
+ app = modal.App("isa-embed-rerank")
18
+
19
+ # Define Modal container image
20
+ image = (
21
+ modal.Image.debian_slim(python_version="3.11")
22
+ .pip_install([
23
+ "torch>=2.0.0",
24
+ "transformers>=4.35.0",
25
+ "sentence-transformers>=2.2.2",
26
+ "huggingface_hub",
27
+ "accelerate",
28
+ "numpy>=1.24.3",
29
+ "httpx>=0.26.0",
30
+ "requests",
31
+ "pydantic>=2.0.0",
32
+ "python-dotenv",
33
+ "einops", # Required for Jina Reranker v2
34
+ ])
35
+ .env({
36
+ "TRANSFORMERS_CACHE": "/models",
37
+ "TORCH_HOME": "/models/torch",
38
+ "HF_HOME": "/models",
39
+ })
40
+ )
41
+
42
+ # Jina Reranking Service - Optimized for T4 GPU
43
+ @app.cls(
44
+ gpu="T4", # T4 4GB GPU for Jina Reranker
45
+ image=image,
46
+ memory=8192, # 8GB RAM
47
+ timeout=1800, # 30 minutes
48
+ scaledown_window=60, # 1 minute idle timeout
49
+ min_containers=0, # Scale to zero
50
+ max_containers=10, # Support up to 10 concurrent containers
51
+ )
52
+ class ISAEmbedRerankService:
53
+ """
54
+ ISA Jina Reranker v2 Service
55
+
56
+ Transformer-based SOTA reranking model:
57
+ - Model: jinaai/jina-reranker-v2-base-multilingual
58
+ - Architecture: Transformer (Cross-encoder)
59
+ - Languages: 100+ supported
60
+ - Performance: 2024 best-in-class reranker
61
+ """
62
+
63
+ @modal.enter()
64
+ def load_models(self):
65
+ """Load Jina Reranker v2 model"""
66
+ print("Loading Jina Reranker v2...")
67
+ start_time = time.time()
68
+
69
+ # Initialize instance variables
70
+ self.reranker_model = None
71
+ self.logger = logging.getLogger(__name__)
72
+ self.request_count = 0
73
+ self.total_processing_time = 0.0
74
+
75
+ try:
76
+ from transformers import AutoModelForSequenceClassification
77
+
78
+ # Load Jina Reranker v2 (SOTA 2024 Transformer)
79
+ print("Loading Jina Reranker v2 (Transformer-based)...")
80
+ self.reranker_model = AutoModelForSequenceClassification.from_pretrained(
81
+ 'jinaai/jina-reranker-v2-base-multilingual',
82
+ torch_dtype="auto",
83
+ trust_remote_code=True
84
+ )
85
+
86
+ load_time = time.time() - start_time
87
+ print(f"Jina Reranker v2 loaded successfully in {load_time:.2f}s")
88
+
89
+ # Model loading status
90
+ self.models_loaded = True
91
+
92
+ except Exception as e:
93
+ print(f"Model loading failed: {e}")
94
+ import traceback
95
+ traceback.print_exc()
96
+ self.models_loaded = False
97
+
98
+ @modal.method()
99
+ def rerank_documents(
100
+ self,
101
+ query: str,
102
+ documents: List[str],
103
+ top_k: Optional[int] = None,
104
+ return_documents: bool = True
105
+ ) -> Dict[str, Any]:
106
+ """
107
+ Rerank documents using Jina Reranker v2
108
+
109
+ Args:
110
+ query: Query text
111
+ documents: List of documents to rerank
112
+ top_k: Return top k results
113
+ return_documents: Whether to return document content
114
+
115
+ Returns:
116
+ Reranking results
117
+ """
118
+ start_time = time.time()
119
+ self.request_count += 1
120
+
121
+ try:
122
+ # Validate model loading status
123
+ if not self.models_loaded or not self.reranker_model:
124
+ raise RuntimeError("Jina Reranker v2 model not loaded")
125
+
126
+ # Prepare reranking input (query-document pairs)
127
+ query_doc_pairs = [[query, doc] for doc in documents]
128
+
129
+ # Execute reranking (Jina Reranker v2 API)
130
+ scores = self.reranker_model.compute_score(query_doc_pairs, max_length=1024)
131
+
132
+ # Ensure scores is numpy array/list
133
+ if hasattr(scores, 'cpu'):
134
+ scores = scores.cpu().numpy()
135
+ elif hasattr(scores, 'tolist'):
136
+ scores = scores.tolist()
137
+ elif not isinstance(scores, (list, tuple)):
138
+ scores = [scores]
139
+
140
+ # Create results list
141
+ results = []
142
+ for i, (doc, score) in enumerate(zip(documents, scores)):
143
+ result_item = {
144
+ 'index': i,
145
+ 'relevance_score': float(score),
146
+ }
147
+ if return_documents:
148
+ result_item['document'] = doc
149
+ results.append(result_item)
150
+
151
+ # Sort by score (descending)
152
+ results.sort(key=lambda x: x['relevance_score'], reverse=True)
153
+
154
+ # Apply top_k limit
155
+ if top_k is not None:
156
+ results = results[:top_k]
157
+
158
+ processing_time = time.time() - start_time
159
+ self.total_processing_time += processing_time
160
+
161
+ # Calculate cost (T4 GPU: ~$0.40/hour)
162
+ gpu_cost = (processing_time / 3600) * 0.40
163
+
164
+ result = {
165
+ 'success': True,
166
+ 'service': 'isa-embed-rerank',
167
+ 'operation': 'reranking',
168
+ 'provider': 'ISA',
169
+ 'results': results,
170
+ 'query': query,
171
+ 'model': 'jina-reranker-v2-base-multilingual',
172
+ 'architecture': 'Transformer',
173
+ 'num_documents': len(documents),
174
+ 'returned_count': len(results),
175
+ 'processing_time': processing_time,
176
+ 'billing': {
177
+ 'request_id': f"rerank_{self.request_count}_{int(time.time())}",
178
+ 'gpu_seconds': processing_time,
179
+ 'estimated_cost_usd': round(gpu_cost, 6),
180
+ 'gpu_type': 'T4'
181
+ },
182
+ 'model_info': {
183
+ 'model_name': 'jina-reranker-v2-base-multilingual',
184
+ 'provider': 'ISA',
185
+ 'architecture': 'Transformer',
186
+ 'gpu': 'T4',
187
+ 'languages_supported': '100+',
188
+ 'top_k': top_k,
189
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
190
+ }
191
+ }
192
+
193
+ # Output JSON results
194
+ print("=== JSON_RESULT_START ===")
195
+ print(json.dumps(result, default=str))
196
+ print("=== JSON_RESULT_END ===")
197
+
198
+ return result
199
+
200
+ except Exception as e:
201
+ processing_time = time.time() - start_time
202
+ error_result = {
203
+ 'success': False,
204
+ 'service': 'isa-embed-rerank',
205
+ 'operation': 'reranking',
206
+ 'provider': 'ISA',
207
+ 'error': str(e),
208
+ 'processing_time': processing_time,
209
+ 'billing': {
210
+ 'request_id': f"rerank_{self.request_count}_{int(time.time())}",
211
+ 'gpu_seconds': processing_time,
212
+ 'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
213
+ 'gpu_type': 'T4'
214
+ }
215
+ }
216
+
217
+ print("=== JSON_RESULT_START ===")
218
+ print(json.dumps(error_result, default=str))
219
+ print("=== JSON_RESULT_END ===")
220
+
221
+ return error_result
222
+
223
+ @modal.method()
224
+ def health_check(self) -> Dict[str, Any]:
225
+ """Health check endpoint"""
226
+ return {
227
+ 'status': 'healthy',
228
+ 'service': 'isa-embed-rerank',
229
+ 'provider': 'ISA',
230
+ 'models_loaded': self.models_loaded,
231
+ 'model': 'jina-reranker-v2-base-multilingual',
232
+ 'architecture': 'Transformer',
233
+ 'timestamp': time.time(),
234
+ 'gpu': 'T4',
235
+ 'memory_usage': '8GB',
236
+ 'request_count': self.request_count,
237
+ 'languages_supported': '100+'
238
+ }
239
+
240
+ # Deployment functions
241
+ @app.function()
242
+ def deploy_info():
243
+ """Deployment information"""
244
+ return {
245
+ 'service': 'isa-embed-rerank',
246
+ 'version': '1.0.0',
247
+ 'description': 'ISA Jina Reranker v2 service - SOTA 2024 Transformer-based reranking',
248
+ 'model': 'jina-reranker-v2-base-multilingual',
249
+ 'architecture': 'Transformer',
250
+ 'gpu': 'T4',
251
+ 'languages': '100+',
252
+ 'deployment_time': time.time()
253
+ }
254
+
255
+ @app.function()
256
+ def register_service():
257
+ """Register service to model repository"""
258
+ try:
259
+ from isa_model.core.models.model_repo import ModelRepository
260
+
261
+ repo = ModelRepository()
262
+
263
+ # Register reranking service
264
+ repo.register_model({
265
+ 'model_id': 'isa-jina-reranker-v2-service',
266
+ 'model_type': 'reranking',
267
+ 'provider': 'isa',
268
+ 'endpoint': 'https://isa-embed-rerank.modal.run',
269
+ 'capabilities': ['reranking', 'document_ranking'],
270
+ 'pricing': {'gpu_type': 'T4', 'cost_per_hour': 0.40},
271
+ 'metadata': {
272
+ 'model': 'jina-reranker-v2-base-multilingual',
273
+ 'architecture': 'Transformer',
274
+ 'languages': '100+',
275
+ 'sota_2024': True
276
+ }
277
+ })
278
+
279
+ print("Jina Reranker v2 service registered successfully")
280
+ return {'status': 'registered'}
281
+
282
+ except Exception as e:
283
+ print(f"Service registration failed: {e}")
284
+ return {'status': 'failed', 'error': str(e)}
285
+
286
+ if __name__ == "__main__":
287
+ print("ISA Jina Reranker v2 Service - Modal Deployment")
288
+ print("Deploy with: modal deploy isa_embed_rerank_service.py")
289
+ print()
290
+ print("Model: jina-reranker-v2-base-multilingual")
291
+ print("Architecture: Transformer (Cross-encoder)")
292
+ print("Languages: 100+ supported")
293
+ print("GPU: T4 (cost-effective)")
294
+ print()
295
+ print("Usage:")
296
+ print("service.rerank_documents('query', ['doc1', 'doc2', 'doc3'], top_k=5)")