isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
1
+ """Embedding services for Modal deployment"""
@@ -0,0 +1,296 @@
1
+ """
2
+ ISA Embedding & Reranking Service
3
+
4
+ Jina-based embedding and reranking service using SOTA Transformer models
5
+ - Reranking: Jina Reranker v2 (Transformer architecture)
6
+ - Languages: 100+ supported
7
+ """
8
+
9
+ import modal
10
+ import time
11
+ import json
12
+ import os
13
+ import logging
14
+ from typing import Dict, List, Optional, Any
15
+
16
+ # Define Modal application
17
+ app = modal.App("isa-embed-rerank")
18
+
19
+ # Define Modal container image
20
+ image = (
21
+ modal.Image.debian_slim(python_version="3.11")
22
+ .pip_install([
23
+ "torch>=2.0.0",
24
+ "transformers>=4.35.0",
25
+ "sentence-transformers>=2.2.2",
26
+ "huggingface_hub",
27
+ "accelerate",
28
+ "numpy>=1.24.3",
29
+ "httpx>=0.26.0",
30
+ "requests",
31
+ "pydantic>=2.0.0",
32
+ "python-dotenv",
33
+ "einops", # Required for Jina Reranker v2
34
+ ])
35
+ .env({
36
+ "TRANSFORMERS_CACHE": "/models",
37
+ "TORCH_HOME": "/models/torch",
38
+ "HF_HOME": "/models",
39
+ })
40
+ )
41
+
42
+ # Jina Reranking Service - Optimized for T4 GPU
43
+ @app.cls(
44
+ gpu="T4", # T4 4GB GPU for Jina Reranker
45
+ image=image,
46
+ memory=8192, # 8GB RAM
47
+ timeout=1800, # 30 minutes
48
+ scaledown_window=60, # 1 minute idle timeout
49
+ min_containers=0, # Scale to zero
50
+ max_containers=10, # Support up to 10 concurrent containers
51
+ )
52
+ class ISAEmbedRerankService:
53
+ """
54
+ ISA Jina Reranker v2 Service
55
+
56
+ Transformer-based SOTA reranking model:
57
+ - Model: jinaai/jina-reranker-v2-base-multilingual
58
+ - Architecture: Transformer (Cross-encoder)
59
+ - Languages: 100+ supported
60
+ - Performance: 2024 best-in-class reranker
61
+ """
62
+
63
+ @modal.enter()
64
+ def load_models(self):
65
+ """Load Jina Reranker v2 model"""
66
+ print("Loading Jina Reranker v2...")
67
+ start_time = time.time()
68
+
69
+ # Initialize instance variables
70
+ self.reranker_model = None
71
+ self.logger = logging.getLogger(__name__)
72
+ self.request_count = 0
73
+ self.total_processing_time = 0.0
74
+
75
+ try:
76
+ from transformers import AutoModelForSequenceClassification
77
+
78
+ # Load Jina Reranker v2 (SOTA 2024 Transformer)
79
+ print("Loading Jina Reranker v2 (Transformer-based)...")
80
+ self.reranker_model = AutoModelForSequenceClassification.from_pretrained(
81
+ 'jinaai/jina-reranker-v2-base-multilingual',
82
+ torch_dtype="auto",
83
+ trust_remote_code=True
84
+ )
85
+
86
+ load_time = time.time() - start_time
87
+ print(f"Jina Reranker v2 loaded successfully in {load_time:.2f}s")
88
+
89
+ # Model loading status
90
+ self.models_loaded = True
91
+
92
+ except Exception as e:
93
+ print(f"Model loading failed: {e}")
94
+ import traceback
95
+ traceback.print_exc()
96
+ self.models_loaded = False
97
+
98
+ @modal.method()
99
+ def rerank_documents(
100
+ self,
101
+ query: str,
102
+ documents: List[str],
103
+ top_k: Optional[int] = None,
104
+ return_documents: bool = True
105
+ ) -> Dict[str, Any]:
106
+ """
107
+ Rerank documents using Jina Reranker v2
108
+
109
+ Args:
110
+ query: Query text
111
+ documents: List of documents to rerank
112
+ top_k: Return top k results
113
+ return_documents: Whether to return document content
114
+
115
+ Returns:
116
+ Reranking results
117
+ """
118
+ start_time = time.time()
119
+ self.request_count += 1
120
+
121
+ try:
122
+ # Validate model loading status
123
+ if not self.models_loaded or not self.reranker_model:
124
+ raise RuntimeError("Jina Reranker v2 model not loaded")
125
+
126
+ # Prepare reranking input (query-document pairs)
127
+ query_doc_pairs = [[query, doc] for doc in documents]
128
+
129
+ # Execute reranking (Jina Reranker v2 API)
130
+ scores = self.reranker_model.compute_score(query_doc_pairs, max_length=1024)
131
+
132
+ # Ensure scores is numpy array/list
133
+ if hasattr(scores, 'cpu'):
134
+ scores = scores.cpu().numpy()
135
+ elif hasattr(scores, 'tolist'):
136
+ scores = scores.tolist()
137
+ elif not isinstance(scores, (list, tuple)):
138
+ scores = [scores]
139
+
140
+ # Create results list
141
+ results = []
142
+ for i, (doc, score) in enumerate(zip(documents, scores)):
143
+ result_item = {
144
+ 'index': i,
145
+ 'relevance_score': float(score),
146
+ }
147
+ if return_documents:
148
+ result_item['document'] = doc
149
+ results.append(result_item)
150
+
151
+ # Sort by score (descending)
152
+ results.sort(key=lambda x: x['relevance_score'], reverse=True)
153
+
154
+ # Apply top_k limit
155
+ if top_k is not None:
156
+ results = results[:top_k]
157
+
158
+ processing_time = time.time() - start_time
159
+ self.total_processing_time += processing_time
160
+
161
+ # Calculate cost (T4 GPU: ~$0.40/hour)
162
+ gpu_cost = (processing_time / 3600) * 0.40
163
+
164
+ result = {
165
+ 'success': True,
166
+ 'service': 'isa-embed-rerank',
167
+ 'operation': 'reranking',
168
+ 'provider': 'ISA',
169
+ 'results': results,
170
+ 'query': query,
171
+ 'model': 'jina-reranker-v2-base-multilingual',
172
+ 'architecture': 'Transformer',
173
+ 'num_documents': len(documents),
174
+ 'returned_count': len(results),
175
+ 'processing_time': processing_time,
176
+ 'billing': {
177
+ 'request_id': f"rerank_{self.request_count}_{int(time.time())}",
178
+ 'gpu_seconds': processing_time,
179
+ 'estimated_cost_usd': round(gpu_cost, 6),
180
+ 'gpu_type': 'T4'
181
+ },
182
+ 'model_info': {
183
+ 'model_name': 'jina-reranker-v2-base-multilingual',
184
+ 'provider': 'ISA',
185
+ 'architecture': 'Transformer',
186
+ 'gpu': 'T4',
187
+ 'languages_supported': '100+',
188
+ 'top_k': top_k,
189
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
190
+ }
191
+ }
192
+
193
+ # Output JSON results
194
+ print("=== JSON_RESULT_START ===")
195
+ print(json.dumps(result, default=str))
196
+ print("=== JSON_RESULT_END ===")
197
+
198
+ return result
199
+
200
+ except Exception as e:
201
+ processing_time = time.time() - start_time
202
+ error_result = {
203
+ 'success': False,
204
+ 'service': 'isa-embed-rerank',
205
+ 'operation': 'reranking',
206
+ 'provider': 'ISA',
207
+ 'error': str(e),
208
+ 'processing_time': processing_time,
209
+ 'billing': {
210
+ 'request_id': f"rerank_{self.request_count}_{int(time.time())}",
211
+ 'gpu_seconds': processing_time,
212
+ 'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
213
+ 'gpu_type': 'T4'
214
+ }
215
+ }
216
+
217
+ print("=== JSON_RESULT_START ===")
218
+ print(json.dumps(error_result, default=str))
219
+ print("=== JSON_RESULT_END ===")
220
+
221
+ return error_result
222
+
223
+ @modal.method()
224
+ def health_check(self) -> Dict[str, Any]:
225
+ """Health check endpoint"""
226
+ return {
227
+ 'status': 'healthy',
228
+ 'service': 'isa-embed-rerank',
229
+ 'provider': 'ISA',
230
+ 'models_loaded': self.models_loaded,
231
+ 'model': 'jina-reranker-v2-base-multilingual',
232
+ 'architecture': 'Transformer',
233
+ 'timestamp': time.time(),
234
+ 'gpu': 'T4',
235
+ 'memory_usage': '8GB',
236
+ 'request_count': self.request_count,
237
+ 'languages_supported': '100+'
238
+ }
239
+
240
+ # Deployment functions
241
+ @app.function()
242
+ def deploy_info():
243
+ """Deployment information"""
244
+ return {
245
+ 'service': 'isa-embed-rerank',
246
+ 'version': '1.0.0',
247
+ 'description': 'ISA Jina Reranker v2 service - SOTA 2024 Transformer-based reranking',
248
+ 'model': 'jina-reranker-v2-base-multilingual',
249
+ 'architecture': 'Transformer',
250
+ 'gpu': 'T4',
251
+ 'languages': '100+',
252
+ 'deployment_time': time.time()
253
+ }
254
+
255
+ @app.function()
256
+ def register_service():
257
+ """Register service to model repository"""
258
+ try:
259
+ from isa_model.core.models.model_repo import ModelRepository
260
+
261
+ repo = ModelRepository()
262
+
263
+ # Register reranking service
264
+ repo.register_model({
265
+ 'model_id': 'isa-jina-reranker-v2-service',
266
+ 'model_type': 'reranking',
267
+ 'provider': 'isa',
268
+ 'endpoint': 'https://isa-embed-rerank.modal.run',
269
+ 'capabilities': ['reranking', 'document_ranking'],
270
+ 'pricing': {'gpu_type': 'T4', 'cost_per_hour': 0.40},
271
+ 'metadata': {
272
+ 'model': 'jina-reranker-v2-base-multilingual',
273
+ 'architecture': 'Transformer',
274
+ 'languages': '100+',
275
+ 'sota_2024': True
276
+ }
277
+ })
278
+
279
+ print("Jina Reranker v2 service registered successfully")
280
+ return {'status': 'registered'}
281
+
282
+ except Exception as e:
283
+ print(f"Service registration failed: {e}")
284
+ return {'status': 'failed', 'error': str(e)}
285
+
286
+ if __name__ == "__main__":
287
+ print("ISA Jina Reranker v2 Service - Modal Deployment")
288
+ print("Deploy with: modal deploy isa_embed_rerank_service.py")
289
+ print()
290
+ print("Model: jina-reranker-v2-base-multilingual")
291
+ print("Architecture: Transformer (Cross-encoder)")
292
+ print("Languages: 100+ supported")
293
+ print("GPU: T4 (cost-effective)")
294
+ print()
295
+ print("Usage:")
296
+ print("service.rerank_documents('query', ['doc1', 'doc2', 'doc3'], top_k=5)")
@@ -0,0 +1 @@
1
+ """LLM services for Modal deployment"""