isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/__init__.py +1 -1
- isa_model/client.py +732 -565
- isa_model/core/cache/redis_cache.py +401 -0
- isa_model/core/config/config_manager.py +53 -10
- isa_model/core/config.py +1 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/migrations.py +277 -0
- isa_model/core/database/supabase_client.py +123 -0
- isa_model/core/models/__init__.py +37 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +36 -18
- isa_model/core/models/model_repo.py +44 -38
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +101 -370
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +7 -0
- isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
- isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
- isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/core/deployment_manager.py +6 -4
- isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
- isa_model/eval/benchmarks/__init__.py +27 -0
- isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
- isa_model/eval/benchmarks.py +244 -12
- isa_model/eval/evaluators/__init__.py +8 -2
- isa_model/eval/evaluators/audio_evaluator.py +727 -0
- isa_model/eval/evaluators/embedding_evaluator.py +742 -0
- isa_model/eval/evaluators/vision_evaluator.py +564 -0
- isa_model/eval/example_evaluation.py +395 -0
- isa_model/eval/factory.py +272 -5
- isa_model/eval/isa_benchmarks.py +700 -0
- isa_model/eval/isa_integration.py +582 -0
- isa_model/eval/metrics.py +159 -6
- isa_model/eval/tests/unit/test_basic.py +396 -0
- isa_model/inference/ai_factory.py +44 -8
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +32 -6
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/base_llm_service.py +30 -6
- isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
- isa_model/inference/services/llm/ollama_llm_service.py +2 -1
- isa_model/inference/services/llm/openai_llm_service.py +652 -55
- isa_model/inference/services/llm/yyds_llm_service.py +2 -1
- isa_model/inference/services/vision/__init__.py +5 -5
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/helpers/image_utils.py +11 -5
- isa_model/inference/services/vision/isa_vision_service.py +573 -0
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/serving/api/fastapi_server.py +88 -16
- isa_model/serving/api/middleware/auth.py +311 -0
- isa_model/serving/api/middleware/security.py +278 -0
- isa_model/serving/api/routes/analytics.py +486 -0
- isa_model/serving/api/routes/deployments.py +339 -0
- isa_model/serving/api/routes/evaluations.py +579 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/unified.py +324 -165
- isa_model/serving/api/startup.py +304 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/training/__init__.py +100 -6
- isa_model/training/core/__init__.py +4 -1
- isa_model/training/examples/intelligent_training_example.py +281 -0
- isa_model/training/intelligent/__init__.py +25 -0
- isa_model/training/intelligent/decision_engine.py +643 -0
- isa_model/training/intelligent/intelligent_factory.py +888 -0
- isa_model/training/intelligent/knowledge_base.py +751 -0
- isa_model/training/intelligent/resource_optimizer.py +839 -0
- isa_model/training/intelligent/task_classifier.py +576 -0
- isa_model/training/storage/__init__.py +24 -0
- isa_model/training/storage/core_integration.py +439 -0
- isa_model/training/storage/training_repository.py +552 -0
- isa_model/training/storage/training_storage.py +628 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
- isa_model-0.4.0.dist-info/RECORD +182 -0
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model-0.3.9.dist-info/RECORD +0 -138
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
- {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
"""
|
2
|
+
ISA Embedding & Reranking Service
|
3
|
+
|
4
|
+
Jina-based embedding and reranking service using SOTA Transformer models
|
5
|
+
- Reranking: Jina Reranker v2 (Transformer architecture)
|
6
|
+
- Languages: 100+ supported
|
7
|
+
"""
|
8
|
+
|
9
|
+
import modal
|
10
|
+
import time
|
11
|
+
import json
|
12
|
+
import os
|
13
|
+
import logging
|
14
|
+
from typing import Dict, List, Optional, Any
|
15
|
+
|
16
|
+
# Define Modal application
|
17
|
+
app = modal.App("isa-embed-rerank")
|
18
|
+
|
19
|
+
# Define Modal container image
|
20
|
+
image = (
|
21
|
+
modal.Image.debian_slim(python_version="3.11")
|
22
|
+
.pip_install([
|
23
|
+
"torch>=2.0.0",
|
24
|
+
"transformers>=4.35.0",
|
25
|
+
"sentence-transformers>=2.2.2",
|
26
|
+
"huggingface_hub",
|
27
|
+
"accelerate",
|
28
|
+
"numpy>=1.24.3",
|
29
|
+
"httpx>=0.26.0",
|
30
|
+
"requests",
|
31
|
+
"pydantic>=2.0.0",
|
32
|
+
"python-dotenv",
|
33
|
+
"einops", # Required for Jina Reranker v2
|
34
|
+
])
|
35
|
+
.env({
|
36
|
+
"TRANSFORMERS_CACHE": "/models",
|
37
|
+
"TORCH_HOME": "/models/torch",
|
38
|
+
"HF_HOME": "/models",
|
39
|
+
})
|
40
|
+
)
|
41
|
+
|
42
|
+
# Jina Reranking Service - Optimized for T4 GPU
|
43
|
+
@app.cls(
|
44
|
+
gpu="T4", # T4 4GB GPU for Jina Reranker
|
45
|
+
image=image,
|
46
|
+
memory=8192, # 8GB RAM
|
47
|
+
timeout=1800, # 30 minutes
|
48
|
+
scaledown_window=60, # 1 minute idle timeout
|
49
|
+
min_containers=0, # Scale to zero
|
50
|
+
max_containers=10, # Support up to 10 concurrent containers
|
51
|
+
)
|
52
|
+
class ISAEmbedRerankService:
|
53
|
+
"""
|
54
|
+
ISA Jina Reranker v2 Service
|
55
|
+
|
56
|
+
Transformer-based SOTA reranking model:
|
57
|
+
- Model: jinaai/jina-reranker-v2-base-multilingual
|
58
|
+
- Architecture: Transformer (Cross-encoder)
|
59
|
+
- Languages: 100+ supported
|
60
|
+
- Performance: 2024 best-in-class reranker
|
61
|
+
"""
|
62
|
+
|
63
|
+
@modal.enter()
|
64
|
+
def load_models(self):
|
65
|
+
"""Load Jina Reranker v2 model"""
|
66
|
+
print("Loading Jina Reranker v2...")
|
67
|
+
start_time = time.time()
|
68
|
+
|
69
|
+
# Initialize instance variables
|
70
|
+
self.reranker_model = None
|
71
|
+
self.logger = logging.getLogger(__name__)
|
72
|
+
self.request_count = 0
|
73
|
+
self.total_processing_time = 0.0
|
74
|
+
|
75
|
+
try:
|
76
|
+
from transformers import AutoModelForSequenceClassification
|
77
|
+
|
78
|
+
# Load Jina Reranker v2 (SOTA 2024 Transformer)
|
79
|
+
print("Loading Jina Reranker v2 (Transformer-based)...")
|
80
|
+
self.reranker_model = AutoModelForSequenceClassification.from_pretrained(
|
81
|
+
'jinaai/jina-reranker-v2-base-multilingual',
|
82
|
+
torch_dtype="auto",
|
83
|
+
trust_remote_code=True
|
84
|
+
)
|
85
|
+
|
86
|
+
load_time = time.time() - start_time
|
87
|
+
print(f"Jina Reranker v2 loaded successfully in {load_time:.2f}s")
|
88
|
+
|
89
|
+
# Model loading status
|
90
|
+
self.models_loaded = True
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
print(f"Model loading failed: {e}")
|
94
|
+
import traceback
|
95
|
+
traceback.print_exc()
|
96
|
+
self.models_loaded = False
|
97
|
+
|
98
|
+
@modal.method()
|
99
|
+
def rerank_documents(
|
100
|
+
self,
|
101
|
+
query: str,
|
102
|
+
documents: List[str],
|
103
|
+
top_k: Optional[int] = None,
|
104
|
+
return_documents: bool = True
|
105
|
+
) -> Dict[str, Any]:
|
106
|
+
"""
|
107
|
+
Rerank documents using Jina Reranker v2
|
108
|
+
|
109
|
+
Args:
|
110
|
+
query: Query text
|
111
|
+
documents: List of documents to rerank
|
112
|
+
top_k: Return top k results
|
113
|
+
return_documents: Whether to return document content
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
Reranking results
|
117
|
+
"""
|
118
|
+
start_time = time.time()
|
119
|
+
self.request_count += 1
|
120
|
+
|
121
|
+
try:
|
122
|
+
# Validate model loading status
|
123
|
+
if not self.models_loaded or not self.reranker_model:
|
124
|
+
raise RuntimeError("Jina Reranker v2 model not loaded")
|
125
|
+
|
126
|
+
# Prepare reranking input (query-document pairs)
|
127
|
+
query_doc_pairs = [[query, doc] for doc in documents]
|
128
|
+
|
129
|
+
# Execute reranking (Jina Reranker v2 API)
|
130
|
+
scores = self.reranker_model.compute_score(query_doc_pairs, max_length=1024)
|
131
|
+
|
132
|
+
# Ensure scores is numpy array/list
|
133
|
+
if hasattr(scores, 'cpu'):
|
134
|
+
scores = scores.cpu().numpy()
|
135
|
+
elif hasattr(scores, 'tolist'):
|
136
|
+
scores = scores.tolist()
|
137
|
+
elif not isinstance(scores, (list, tuple)):
|
138
|
+
scores = [scores]
|
139
|
+
|
140
|
+
# Create results list
|
141
|
+
results = []
|
142
|
+
for i, (doc, score) in enumerate(zip(documents, scores)):
|
143
|
+
result_item = {
|
144
|
+
'index': i,
|
145
|
+
'relevance_score': float(score),
|
146
|
+
}
|
147
|
+
if return_documents:
|
148
|
+
result_item['document'] = doc
|
149
|
+
results.append(result_item)
|
150
|
+
|
151
|
+
# Sort by score (descending)
|
152
|
+
results.sort(key=lambda x: x['relevance_score'], reverse=True)
|
153
|
+
|
154
|
+
# Apply top_k limit
|
155
|
+
if top_k is not None:
|
156
|
+
results = results[:top_k]
|
157
|
+
|
158
|
+
processing_time = time.time() - start_time
|
159
|
+
self.total_processing_time += processing_time
|
160
|
+
|
161
|
+
# Calculate cost (T4 GPU: ~$0.40/hour)
|
162
|
+
gpu_cost = (processing_time / 3600) * 0.40
|
163
|
+
|
164
|
+
result = {
|
165
|
+
'success': True,
|
166
|
+
'service': 'isa-embed-rerank',
|
167
|
+
'operation': 'reranking',
|
168
|
+
'provider': 'ISA',
|
169
|
+
'results': results,
|
170
|
+
'query': query,
|
171
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
172
|
+
'architecture': 'Transformer',
|
173
|
+
'num_documents': len(documents),
|
174
|
+
'returned_count': len(results),
|
175
|
+
'processing_time': processing_time,
|
176
|
+
'billing': {
|
177
|
+
'request_id': f"rerank_{self.request_count}_{int(time.time())}",
|
178
|
+
'gpu_seconds': processing_time,
|
179
|
+
'estimated_cost_usd': round(gpu_cost, 6),
|
180
|
+
'gpu_type': 'T4'
|
181
|
+
},
|
182
|
+
'model_info': {
|
183
|
+
'model_name': 'jina-reranker-v2-base-multilingual',
|
184
|
+
'provider': 'ISA',
|
185
|
+
'architecture': 'Transformer',
|
186
|
+
'gpu': 'T4',
|
187
|
+
'languages_supported': '100+',
|
188
|
+
'top_k': top_k,
|
189
|
+
'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
# Output JSON results
|
194
|
+
print("=== JSON_RESULT_START ===")
|
195
|
+
print(json.dumps(result, default=str))
|
196
|
+
print("=== JSON_RESULT_END ===")
|
197
|
+
|
198
|
+
return result
|
199
|
+
|
200
|
+
except Exception as e:
|
201
|
+
processing_time = time.time() - start_time
|
202
|
+
error_result = {
|
203
|
+
'success': False,
|
204
|
+
'service': 'isa-embed-rerank',
|
205
|
+
'operation': 'reranking',
|
206
|
+
'provider': 'ISA',
|
207
|
+
'error': str(e),
|
208
|
+
'processing_time': processing_time,
|
209
|
+
'billing': {
|
210
|
+
'request_id': f"rerank_{self.request_count}_{int(time.time())}",
|
211
|
+
'gpu_seconds': processing_time,
|
212
|
+
'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
|
213
|
+
'gpu_type': 'T4'
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
print("=== JSON_RESULT_START ===")
|
218
|
+
print(json.dumps(error_result, default=str))
|
219
|
+
print("=== JSON_RESULT_END ===")
|
220
|
+
|
221
|
+
return error_result
|
222
|
+
|
223
|
+
@modal.method()
|
224
|
+
def health_check(self) -> Dict[str, Any]:
|
225
|
+
"""Health check endpoint"""
|
226
|
+
return {
|
227
|
+
'status': 'healthy',
|
228
|
+
'service': 'isa-embed-rerank',
|
229
|
+
'provider': 'ISA',
|
230
|
+
'models_loaded': self.models_loaded,
|
231
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
232
|
+
'architecture': 'Transformer',
|
233
|
+
'timestamp': time.time(),
|
234
|
+
'gpu': 'T4',
|
235
|
+
'memory_usage': '8GB',
|
236
|
+
'request_count': self.request_count,
|
237
|
+
'languages_supported': '100+'
|
238
|
+
}
|
239
|
+
|
240
|
+
# Deployment functions
|
241
|
+
@app.function()
|
242
|
+
def deploy_info():
|
243
|
+
"""Deployment information"""
|
244
|
+
return {
|
245
|
+
'service': 'isa-embed-rerank',
|
246
|
+
'version': '1.0.0',
|
247
|
+
'description': 'ISA Jina Reranker v2 service - SOTA 2024 Transformer-based reranking',
|
248
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
249
|
+
'architecture': 'Transformer',
|
250
|
+
'gpu': 'T4',
|
251
|
+
'languages': '100+',
|
252
|
+
'deployment_time': time.time()
|
253
|
+
}
|
254
|
+
|
255
|
+
@app.function()
|
256
|
+
def register_service():
|
257
|
+
"""Register service to model repository"""
|
258
|
+
try:
|
259
|
+
from isa_model.core.models.model_repo import ModelRepository
|
260
|
+
|
261
|
+
repo = ModelRepository()
|
262
|
+
|
263
|
+
# Register reranking service
|
264
|
+
repo.register_model({
|
265
|
+
'model_id': 'isa-jina-reranker-v2-service',
|
266
|
+
'model_type': 'reranking',
|
267
|
+
'provider': 'isa',
|
268
|
+
'endpoint': 'https://isa-embed-rerank.modal.run',
|
269
|
+
'capabilities': ['reranking', 'document_ranking'],
|
270
|
+
'pricing': {'gpu_type': 'T4', 'cost_per_hour': 0.40},
|
271
|
+
'metadata': {
|
272
|
+
'model': 'jina-reranker-v2-base-multilingual',
|
273
|
+
'architecture': 'Transformer',
|
274
|
+
'languages': '100+',
|
275
|
+
'sota_2024': True
|
276
|
+
}
|
277
|
+
})
|
278
|
+
|
279
|
+
print("Jina Reranker v2 service registered successfully")
|
280
|
+
return {'status': 'registered'}
|
281
|
+
|
282
|
+
except Exception as e:
|
283
|
+
print(f"Service registration failed: {e}")
|
284
|
+
return {'status': 'failed', 'error': str(e)}
|
285
|
+
|
286
|
+
if __name__ == "__main__":
|
287
|
+
print("ISA Jina Reranker v2 Service - Modal Deployment")
|
288
|
+
print("Deploy with: modal deploy isa_embed_rerank_service.py")
|
289
|
+
print()
|
290
|
+
print("Model: jina-reranker-v2-base-multilingual")
|
291
|
+
print("Architecture: Transformer (Cross-encoder)")
|
292
|
+
print("Languages: 100+ supported")
|
293
|
+
print("GPU: T4 (cost-effective)")
|
294
|
+
print()
|
295
|
+
print("Usage:")
|
296
|
+
print("service.rerank_documents('query', ['doc1', 'doc2', 'doc3'], top_k=5)")
|