isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
"""Embedding services for Modal deployment"""
|
@@ -0,0 +1,296 @@
|
|
1
|
+
"""
|
2
|
+
ISA Embedding & Reranking Service
|
3
|
+
|
4
|
+
Jina-based embedding and reranking service using SOTA Transformer models
|
5
|
+
- Reranking: Jina Reranker v2 (Transformer architecture)
|
6
|
+
- Languages: 100+ supported
|
7
|
+
"""
|
8
|
+
|
9
|
+
import modal
|
10
|
+
import time
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
import logging
|
14
|
+
from typing import Dict, List, Optional, Any
|
15
|
+
|
16
|
+
# Define Modal application
|
17
|
+
app = modal.App("isa-embed-rerank")
|
18
|
+
|
19
|
+
# Define Modal container image
|
20
|
+
image = (
|
21
|
+
modal.Image.debian_slim(python_version="3.11")
|
22
|
+
.pip_install([
|
23
|
+
"torch>=2.0.0",
|
24
|
+
"transformers>=4.35.0",
|
25
|
+
"sentence-transformers>=2.2.2",
|
26
|
+
"huggingface_hub",
|
27
|
+
"accelerate",
|
28
|
+
"numpy>=1.24.3",
|
29
|
+
"httpx>=0.26.0",
|
30
|
+
"requests",
|
31
|
+
"pydantic>=2.0.0",
|
32
|
+
"python-dotenv",
|
33
|
+
"einops", # Required for Jina Reranker v2
|
34
|
+
])
|
35
|
+
.env({
|
36
|
+
"TRANSFORMERS_CACHE": "/models",
|
37
|
+
"TORCH_HOME": "/models/torch",
|
38
|
+
"HF_HOME": "/models",
|
39
|
+
})
|
40
|
+
)
|
41
|
+
|
42
|
+
# Jina Reranking Service - Optimized for T4 GPU
|
43
|
+
@app.cls(
|
44
|
+
gpu="T4", # T4 4GB GPU for Jina Reranker
|
45
|
+
image=image,
|
46
|
+
memory=8192, # 8GB RAM
|
47
|
+
timeout=1800, # 30 minutes
|
48
|
+
scaledown_window=60, # 1 minute idle timeout
|
49
|
+
min_containers=0, # Scale to zero
|
50
|
+
max_containers=10, # Support up to 10 concurrent containers
|
51
|
+
)
|
52
|
+
class ISAEmbedRerankService:
|
53
|
+
"""
|
54
|
+
ISA Jina Reranker v2 Service
|
55
|
+
|
56
|
+
Transformer-based SOTA reranking model:
|
57
|
+
- Model: jinaai/jina-reranker-v2-base-multilingual
|
58
|
+
- Architecture: Transformer (Cross-encoder)
|
59
|
+
- Languages: 100+ supported
|
60
|
+
- Performance: 2024 best-in-class reranker
|
61
|
+
"""
|
62
|
+
|
63
|
+
@modal.enter()
|
64
|
+
def load_models(self):
|
65
|
+
"""Load Jina Reranker v2 model"""
|
66
|
+
print("Loading Jina Reranker v2...")
|
67
|
+
start_time = time.time()
|
68
|
+
|
69
|
+
# Initialize instance variables
|
70
|
+
self.reranker_model = None
|
71
|
+
self.logger = logging.getLogger(__name__)
|
72
|
+
self.request_count = 0
|
73
|
+
self.total_processing_time = 0.0
|
74
|
+
|
75
|
+
try:
|
76
|
+
from transformers import AutoModelForSequenceClassification
|
77
|
+
|
78
|
+
# Load Jina Reranker v2 (SOTA 2024 Transformer)
|
79
|
+
print("Loading Jina Reranker v2 (Transformer-based)...")
|
80
|
+
self.reranker_model = AutoModelForSequenceClassification.from_pretrained(
|
81
|
+
'jinaai/jina-reranker-v2-base-multilingual',
|
82
|
+
torch_dtype="auto",
|
83
|
+
trust_remote_code=True
|
84
|
+
)
|
85
|
+
|
86
|
+
load_time = time.time() - start_time
|
87
|
+
print(f"Jina Reranker v2 loaded successfully in {load_time:.2f}s")
|
88
|
+
|
89
|
+
# Model loading status
|
90
|
+
self.models_loaded = True
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
print(f"Model loading failed: {e}")
|
94
|
+
import traceback
|
95
|
+
traceback.print_exc()
|
96
|
+
self.models_loaded = False
|
97
|
+
|
98
|
+
@modal.method()
|
99
|
+
def rerank_documents(
|
100
|
+
self,
|
101
|
+
query: str,
|
102
|
+
documents: List[str],
|
103
|
+
top_k: Optional[int] = None,
|
104
|
+
return_documents: bool = True
|
105
|
+
) -> Dict[str, Any]:
|
106
|
+
"""
|
107
|
+
Rerank documents using Jina Reranker v2
|
108
|
+
|
109
|
+
Args:
|
110
|
+
query: Query text
|
111
|
+
documents: List of documents to rerank
|
112
|
+
top_k: Return top k results
|
113
|
+
return_documents: Whether to return document content
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
Reranking results
|
117
|
+
"""
|
118
|
+
start_time = time.time()
|
119
|
+
self.request_count += 1
|
120
|
+
|
121
|
+
try:
|
122
|
+
# Validate model loading status
|
123
|
+
if not self.models_loaded or not self.reranker_model:
|
124
|
+
raise RuntimeError("Jina Reranker v2 model not loaded")
|
125
|
+
|
126
|
+
# Prepare reranking input (query-document pairs)
|
127
|
+
query_doc_pairs = [[query, doc] for doc in documents]
|
128
|
+
|
129
|
+
# Execute reranking (Jina Reranker v2 API)
|
130
|
+
scores = self.reranker_model.compute_score(query_doc_pairs, max_length=1024)
|
131
|
+
|
132
|
+
# Ensure scores is numpy array/list
|
133
|
+
if hasattr(scores, 'cpu'):
|
134
|
+
scores = scores.cpu().numpy()
|
135
|
+
elif hasattr(scores, 'tolist'):
|
136
|
+
scores = scores.tolist()
|
137
|
+
elif not isinstance(scores, (list, tuple)):
|
138
|
+
scores = [scores]
|
139
|
+
|
140
|
+
# Create results list
|
141
|
+
results = []
|
142
|
+
for i, (doc, score) in enumerate(zip(documents, scores)):
|
143
|
+
result_item = {
|
144
|
+
'index': i,
|
145
|
+
'relevance_score': float(score),
|
146
|
+
}
|
147
|
+
if return_documents:
|
148
|
+
result_item['document'] = doc
|
149
|
+
results.append(result_item)
|
150
|
+
|
151
|
+
# Sort by score (descending)
|
152
|
+
results.sort(key=lambda x: x['relevance_score'], reverse=True)
|
153
|
+
|
154
|
+
# Apply top_k limit
|
155
|
+
if top_k is not None:
|
156
|
+
results = results[:top_k]
|
157
|
+
|
158
|
+
processing_time = time.time() - start_time
|
159
|
+
self.total_processing_time += processing_time
|
160
|
+
|
161
|
+
# Calculate cost (T4 GPU: ~$0.40/hour)
|
162
|
+
gpu_cost = (processing_time / 3600) * 0.40
|
163
|
+
|
164
|
+
result = {
|
165
|
+
'success': True,
|
166
|
+
'service': 'isa-embed-rerank',
|
167
|
+
'operation': 'reranking',
|
168
|
+
'provider': 'ISA',
|
169
|
+
'results': results,
|
170
|
+
'query': query,
|
171
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
172
|
+
'architecture': 'Transformer',
|
173
|
+
'num_documents': len(documents),
|
174
|
+
'returned_count': len(results),
|
175
|
+
'processing_time': processing_time,
|
176
|
+
'billing': {
|
177
|
+
'request_id': f"rerank_{self.request_count}_{int(time.time())}",
|
178
|
+
'gpu_seconds': processing_time,
|
179
|
+
'estimated_cost_usd': round(gpu_cost, 6),
|
180
|
+
'gpu_type': 'T4'
|
181
|
+
},
|
182
|
+
'model_info': {
|
183
|
+
'model_name': 'jina-reranker-v2-base-multilingual',
|
184
|
+
'provider': 'ISA',
|
185
|
+
'architecture': 'Transformer',
|
186
|
+
'gpu': 'T4',
|
187
|
+
'languages_supported': '100+',
|
188
|
+
'top_k': top_k,
|
189
|
+
'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
# Output JSON results
|
194
|
+
print("=== JSON_RESULT_START ===")
|
195
|
+
print(json.dumps(result, default=str))
|
196
|
+
print("=== JSON_RESULT_END ===")
|
197
|
+
|
198
|
+
return result
|
199
|
+
|
200
|
+
except Exception as e:
|
201
|
+
processing_time = time.time() - start_time
|
202
|
+
error_result = {
|
203
|
+
'success': False,
|
204
|
+
'service': 'isa-embed-rerank',
|
205
|
+
'operation': 'reranking',
|
206
|
+
'provider': 'ISA',
|
207
|
+
'error': str(e),
|
208
|
+
'processing_time': processing_time,
|
209
|
+
'billing': {
|
210
|
+
'request_id': f"rerank_{self.request_count}_{int(time.time())}",
|
211
|
+
'gpu_seconds': processing_time,
|
212
|
+
'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
|
213
|
+
'gpu_type': 'T4'
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
print("=== JSON_RESULT_START ===")
|
218
|
+
print(json.dumps(error_result, default=str))
|
219
|
+
print("=== JSON_RESULT_END ===")
|
220
|
+
|
221
|
+
return error_result
|
222
|
+
|
223
|
+
@modal.method()
|
224
|
+
def health_check(self) -> Dict[str, Any]:
|
225
|
+
"""Health check endpoint"""
|
226
|
+
return {
|
227
|
+
'status': 'healthy',
|
228
|
+
'service': 'isa-embed-rerank',
|
229
|
+
'provider': 'ISA',
|
230
|
+
'models_loaded': self.models_loaded,
|
231
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
232
|
+
'architecture': 'Transformer',
|
233
|
+
'timestamp': time.time(),
|
234
|
+
'gpu': 'T4',
|
235
|
+
'memory_usage': '8GB',
|
236
|
+
'request_count': self.request_count,
|
237
|
+
'languages_supported': '100+'
|
238
|
+
}
|
239
|
+
|
240
|
+
# Deployment functions
|
241
|
+
@app.function()
|
242
|
+
def deploy_info():
|
243
|
+
"""Deployment information"""
|
244
|
+
return {
|
245
|
+
'service': 'isa-embed-rerank',
|
246
|
+
'version': '1.0.0',
|
247
|
+
'description': 'ISA Jina Reranker v2 service - SOTA 2024 Transformer-based reranking',
|
248
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
249
|
+
'architecture': 'Transformer',
|
250
|
+
'gpu': 'T4',
|
251
|
+
'languages': '100+',
|
252
|
+
'deployment_time': time.time()
|
253
|
+
}
|
254
|
+
|
255
|
+
@app.function()
|
256
|
+
def register_service():
|
257
|
+
"""Register service to model repository"""
|
258
|
+
try:
|
259
|
+
from isa_model.core.models.model_repo import ModelRepository
|
260
|
+
|
261
|
+
repo = ModelRepository()
|
262
|
+
|
263
|
+
# Register reranking service
|
264
|
+
repo.register_model({
|
265
|
+
'model_id': 'isa-jina-reranker-v2-service',
|
266
|
+
'model_type': 'reranking',
|
267
|
+
'provider': 'isa',
|
268
|
+
'endpoint': 'https://isa-embed-rerank.modal.run',
|
269
|
+
'capabilities': ['reranking', 'document_ranking'],
|
270
|
+
'pricing': {'gpu_type': 'T4', 'cost_per_hour': 0.40},
|
271
|
+
'metadata': {
|
272
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
273
|
+
'architecture': 'Transformer',
|
274
|
+
'languages': '100+',
|
275
|
+
'sota_2024': True
|
276
|
+
}
|
277
|
+
})
|
278
|
+
|
279
|
+
print("Jina Reranker v2 service registered successfully")
|
280
|
+
return {'status': 'registered'}
|
281
|
+
|
282
|
+
except Exception as e:
|
283
|
+
print(f"Service registration failed: {e}")
|
284
|
+
return {'status': 'failed', 'error': str(e)}
|
285
|
+
|
286
|
+
if __name__ == "__main__":
|
287
|
+
print("ISA Jina Reranker v2 Service - Modal Deployment")
|
288
|
+
print("Deploy with: modal deploy isa_embed_rerank_service.py")
|
289
|
+
print()
|
290
|
+
print("Model: jina-reranker-v2-base-multilingual")
|
291
|
+
print("Architecture: Transformer (Cross-encoder)")
|
292
|
+
print("Languages: 100+ supported")
|
293
|
+
print("GPU: T4 (cost-effective)")
|
294
|
+
print()
|
295
|
+
print("Usage:")
|
296
|
+
print("service.rerank_documents('query', ['doc1', 'doc2', 'doc3'], top_k=5)")
|
@@ -0,0 +1 @@
|
|
1
|
+
"""LLM services for Modal deployment"""
|