isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
|
|
1
|
+
"""
|
2
|
+
ISA Embedding Service
|
3
|
+
|
4
|
+
ISA reranking service using deployed Jina Reranker v2 via Modal
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
from typing import Dict, Any, List, Optional
|
9
|
+
|
10
|
+
try:
|
11
|
+
import modal
|
12
|
+
MODAL_AVAILABLE = True
|
13
|
+
except ImportError:
|
14
|
+
MODAL_AVAILABLE = False
|
15
|
+
modal = None
|
16
|
+
|
17
|
+
from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
class ISAEmbedService(BaseEmbedService):
|
22
|
+
"""
|
23
|
+
ISA Embedding Service - calls ISA deployed reranking models
|
24
|
+
|
25
|
+
Supported features:
|
26
|
+
- Document reranking (Jina Reranker v2 via Modal)
|
27
|
+
- Future: embedding generation
|
28
|
+
- Future: semantic similarity computation
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self,
|
32
|
+
rerank_modal_app_name: str = "isa-embed-rerank",
|
33
|
+
timeout: int = 30):
|
34
|
+
"""
|
35
|
+
Initialize ISA Embedding service
|
36
|
+
|
37
|
+
Args:
|
38
|
+
rerank_modal_app_name: Modal reranking app name
|
39
|
+
timeout: Request timeout in seconds
|
40
|
+
"""
|
41
|
+
# For now, skip BaseService initialization to avoid config validation
|
42
|
+
# TODO: Properly configure ISA provider in config system
|
43
|
+
self.provider_name = "isa"
|
44
|
+
self.model_name = "isa-jina-reranker-v2-service"
|
45
|
+
self.rerank_modal_app_name = rerank_modal_app_name
|
46
|
+
self.timeout = timeout
|
47
|
+
|
48
|
+
# Initialize Modal client
|
49
|
+
if MODAL_AVAILABLE:
|
50
|
+
try:
|
51
|
+
# Get deployed Modal application
|
52
|
+
self.modal_app = modal.App.lookup(rerank_modal_app_name)
|
53
|
+
logger.info(f"Connected to Modal rerank app: {rerank_modal_app_name}")
|
54
|
+
|
55
|
+
self.modal_service = True # Mark service as available
|
56
|
+
logger.info("Modal rerank app connection established")
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
logger.warning(f"Failed to connect to Modal rerank app: {e}")
|
60
|
+
self.modal_app = None
|
61
|
+
self.modal_service = None
|
62
|
+
else:
|
63
|
+
logger.warning("Modal SDK not available")
|
64
|
+
self.modal_app = None
|
65
|
+
self.modal_service = None
|
66
|
+
|
67
|
+
# Service statistics
|
68
|
+
self.request_count = 0
|
69
|
+
self.total_cost = 0.0
|
70
|
+
|
71
|
+
async def rerank_documents(
|
72
|
+
self,
|
73
|
+
query: str,
|
74
|
+
documents: List[str],
|
75
|
+
top_k: Optional[int] = None,
|
76
|
+
return_documents: bool = True
|
77
|
+
) -> Dict[str, Any]:
|
78
|
+
"""
|
79
|
+
Rerank documents using Jina Reranker v2
|
80
|
+
|
81
|
+
Args:
|
82
|
+
query: Query string
|
83
|
+
documents: List of documents to rerank
|
84
|
+
top_k: Return top k results (None = all)
|
85
|
+
return_documents: Whether to include document content in results
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Reranking results
|
89
|
+
"""
|
90
|
+
try:
|
91
|
+
if not self.modal_app or not self.modal_service:
|
92
|
+
return {
|
93
|
+
'success': False,
|
94
|
+
'provider': 'ISA',
|
95
|
+
'service': 'isa-embed-rerank',
|
96
|
+
'error': 'Modal rerank app or service not available'
|
97
|
+
}
|
98
|
+
|
99
|
+
# Call reranking service directly via Modal SDK
|
100
|
+
result = await self._call_rerank_service(query, documents, top_k, return_documents)
|
101
|
+
|
102
|
+
if result and result.get('success', False):
|
103
|
+
self.request_count += 1
|
104
|
+
|
105
|
+
# Record cost
|
106
|
+
if 'billing' in result:
|
107
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
108
|
+
self.total_cost += cost
|
109
|
+
|
110
|
+
# Format response to match expected structure
|
111
|
+
formatted_result = {
|
112
|
+
'success': True,
|
113
|
+
'provider': 'ISA',
|
114
|
+
'service': 'isa-embed-rerank',
|
115
|
+
'result': {
|
116
|
+
'results': result.get('results', []),
|
117
|
+
'processing_time': result.get('processing_time'),
|
118
|
+
'billing': result.get('billing', {}),
|
119
|
+
'query': result.get('query'),
|
120
|
+
'num_documents': result.get('num_documents'),
|
121
|
+
'returned_count': result.get('returned_count')
|
122
|
+
},
|
123
|
+
'metadata': {
|
124
|
+
'model_used': result.get('model'),
|
125
|
+
'provider': result.get('provider', 'ISA'),
|
126
|
+
'billing': result.get('billing', {})
|
127
|
+
}
|
128
|
+
}
|
129
|
+
return formatted_result
|
130
|
+
else:
|
131
|
+
return {
|
132
|
+
'success': False,
|
133
|
+
'provider': 'ISA',
|
134
|
+
'service': 'isa-embed-rerank',
|
135
|
+
'error': f'Rerank service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
136
|
+
'details': result
|
137
|
+
}
|
138
|
+
|
139
|
+
except Exception as e:
|
140
|
+
logger.error(f"ISA document reranking failed: {e}")
|
141
|
+
import traceback
|
142
|
+
traceback.print_exc()
|
143
|
+
return {
|
144
|
+
'success': False,
|
145
|
+
'provider': 'ISA',
|
146
|
+
'service': 'isa-embed-rerank',
|
147
|
+
'error': str(e)
|
148
|
+
}
|
149
|
+
|
150
|
+
async def _call_rerank_service(
|
151
|
+
self,
|
152
|
+
query: str,
|
153
|
+
documents: List[str],
|
154
|
+
top_k: Optional[int],
|
155
|
+
return_documents: bool
|
156
|
+
) -> Dict[str, Any]:
|
157
|
+
"""
|
158
|
+
Call reranking service via Modal SDK
|
159
|
+
"""
|
160
|
+
try:
|
161
|
+
import modal
|
162
|
+
|
163
|
+
logger.info("Calling Jina Reranker v2 service via Modal SDK...")
|
164
|
+
|
165
|
+
# Correct Modal SDK usage: call deployed class method
|
166
|
+
ISAEmbedRerankService = modal.Cls.from_name(
|
167
|
+
app_name=self.rerank_modal_app_name,
|
168
|
+
name="ISAEmbedRerankService"
|
169
|
+
)
|
170
|
+
|
171
|
+
# Create instance and call method
|
172
|
+
instance = ISAEmbedRerankService()
|
173
|
+
result = instance.rerank_documents.remote(
|
174
|
+
query=query,
|
175
|
+
documents=documents,
|
176
|
+
top_k=top_k,
|
177
|
+
return_documents=return_documents
|
178
|
+
)
|
179
|
+
|
180
|
+
logger.info("Modal rerank SDK call successful")
|
181
|
+
return result
|
182
|
+
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(f"Modal rerank SDK call failed: {e}")
|
185
|
+
return {
|
186
|
+
'success': False,
|
187
|
+
'error': f'Modal rerank SDK error: {str(e)}'
|
188
|
+
}
|
189
|
+
|
190
|
+
# ==================== Embedding methods (future implementation) ====================
|
191
|
+
|
192
|
+
async def create_text_embedding(self, text: str) -> List[float]:
|
193
|
+
"""Create single text embedding - not yet implemented"""
|
194
|
+
raise NotImplementedError("Text embedding not yet implemented in ISA service")
|
195
|
+
|
196
|
+
async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
197
|
+
"""Create multiple text embeddings - not yet implemented"""
|
198
|
+
raise NotImplementedError("Text embeddings not yet implemented in ISA service")
|
199
|
+
|
200
|
+
async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
|
201
|
+
"""Create text chunks with embeddings - not yet implemented"""
|
202
|
+
raise NotImplementedError("Text chunking not yet implemented in ISA service")
|
203
|
+
|
204
|
+
async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
205
|
+
"""Compute embedding similarity - not yet implemented"""
|
206
|
+
raise NotImplementedError("Similarity computation not yet implemented in ISA service")
|
207
|
+
|
208
|
+
async def find_similar_texts(
|
209
|
+
self,
|
210
|
+
query_embedding: List[float],
|
211
|
+
candidate_embeddings: List[List[float]],
|
212
|
+
top_k: int = 5
|
213
|
+
) -> List[Dict[str, Any]]:
|
214
|
+
"""Find similar texts - not yet implemented"""
|
215
|
+
raise NotImplementedError("Similar text search not yet implemented in ISA service")
|
216
|
+
|
217
|
+
def get_embedding_dimension(self) -> int:
|
218
|
+
"""Get embedding dimension - not applicable for rerank-only service"""
|
219
|
+
raise NotImplementedError("Embedding dimension not available for rerank-only service")
|
220
|
+
|
221
|
+
def get_max_input_length(self) -> int:
|
222
|
+
"""Get maximum input length"""
|
223
|
+
return 1024 # Jina Reranker v2 max length
|
224
|
+
|
225
|
+
# ==================== Service management methods ====================
|
226
|
+
|
227
|
+
async def health_check(self) -> Dict[str, Any]:
|
228
|
+
"""Check ISA reranking service health"""
|
229
|
+
try:
|
230
|
+
# Simple health check: call reranking service
|
231
|
+
test_result = await self.rerank_documents(
|
232
|
+
query="test",
|
233
|
+
documents=["test document"],
|
234
|
+
top_k=1,
|
235
|
+
return_documents=False
|
236
|
+
)
|
237
|
+
|
238
|
+
return {
|
239
|
+
'success': True,
|
240
|
+
'provider': 'ISA',
|
241
|
+
'service': 'isa-embed-rerank',
|
242
|
+
'status': 'healthy' if test_result.get('success') else 'error',
|
243
|
+
'rerank_service': test_result.get('success', False),
|
244
|
+
'usage_stats': {
|
245
|
+
'total_requests': self.request_count,
|
246
|
+
'total_cost_usd': round(self.total_cost, 6)
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
return {
|
252
|
+
'success': False,
|
253
|
+
'provider': 'ISA',
|
254
|
+
'service': 'isa-embed-rerank',
|
255
|
+
'status': 'error',
|
256
|
+
'error': str(e)
|
257
|
+
}
|
258
|
+
|
259
|
+
async def get_usage_stats(self) -> Dict[str, Any]:
|
260
|
+
"""Get usage statistics"""
|
261
|
+
try:
|
262
|
+
modal_stats = {}
|
263
|
+
|
264
|
+
# Try to get Modal service statistics
|
265
|
+
if self.modal_app:
|
266
|
+
try:
|
267
|
+
# Can extend to get Modal service stats
|
268
|
+
pass
|
269
|
+
except Exception as e:
|
270
|
+
logger.warning(f"Failed to get Modal stats: {e}")
|
271
|
+
|
272
|
+
return {
|
273
|
+
'provider': 'ISA',
|
274
|
+
'service': 'isa-embed-rerank',
|
275
|
+
'client_stats': {
|
276
|
+
'total_requests': self.request_count,
|
277
|
+
'total_cost_usd': round(self.total_cost, 6)
|
278
|
+
},
|
279
|
+
'modal_stats': modal_stats,
|
280
|
+
'combined_cost': round(self.total_cost, 6)
|
281
|
+
}
|
282
|
+
|
283
|
+
except Exception as e:
|
284
|
+
return {
|
285
|
+
'provider': 'ISA',
|
286
|
+
'service': 'isa-embed-rerank',
|
287
|
+
'error': str(e)
|
288
|
+
}
|
289
|
+
|
290
|
+
def get_supported_tasks(self) -> List[str]:
|
291
|
+
"""Get supported task list"""
|
292
|
+
return [
|
293
|
+
'rerank', # Document reranking
|
294
|
+
'rerank_documents', # Document reranking (alias)
|
295
|
+
'document_ranking' # Document ranking (alias)
|
296
|
+
]
|
297
|
+
|
298
|
+
def get_supported_formats(self) -> List[str]:
|
299
|
+
"""Get supported formats"""
|
300
|
+
return ['text'] # Text only
|
301
|
+
|
302
|
+
async def close(self):
|
303
|
+
"""Cleanup resources"""
|
304
|
+
# Modal client doesn't need explicit closure
|
305
|
+
pass
|
@@ -4,6 +4,7 @@ import asyncio
|
|
4
4
|
from typing import List, Dict, Any, Optional
|
5
5
|
|
6
6
|
from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
|
7
|
+
from isa_model.core.config.config_manager import ConfigManager
|
7
8
|
|
8
9
|
logger = logging.getLogger(__name__)
|
9
10
|
|
@@ -21,9 +22,20 @@ class OllamaEmbedService(BaseEmbedService):
|
|
21
22
|
|
22
23
|
# Initialize HTTP client with provider configuration
|
23
24
|
try:
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
config_manager = ConfigManager()
|
26
|
+
# Use Consul discovery with fallback
|
27
|
+
default_base_url = config_manager.get_ollama_url()
|
28
|
+
|
29
|
+
if "base_url" in provider_config:
|
30
|
+
base_url = provider_config["base_url"]
|
31
|
+
else:
|
32
|
+
host = provider_config.get("host", "localhost")
|
33
|
+
port = provider_config.get("port", 11434)
|
34
|
+
base_url = provider_config.get("base_url", f"http://{host}:{port}")
|
35
|
+
|
36
|
+
# Use config manager default (Consul discovery) if still not set
|
37
|
+
if base_url == f"http://localhost:11434":
|
38
|
+
base_url = default_base_url
|
27
39
|
|
28
40
|
self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
|
29
41
|
|
@@ -129,11 +129,9 @@ class OpenAIEmbedService(BaseEmbedService):
|
|
129
129
|
logger.error(f"Error creating text embeddings: {e}")
|
130
130
|
raise
|
131
131
|
|
132
|
-
async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
|
132
|
+
async def create_chunks(self, text: str, metadata: Optional[Dict] = None, chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
|
133
133
|
"""Create text chunks with embeddings"""
|
134
|
-
#
|
135
|
-
chunk_size = 400 # words
|
136
|
-
overlap = 50 # word overlap between chunks
|
134
|
+
# Use provided chunk_size and overlap, or defaults optimized for OpenAI models
|
137
135
|
|
138
136
|
words = text.split()
|
139
137
|
if not words:
|
@@ -0,0 +1,285 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
Resilient Embedding Service - Provides fallback mechanisms for embedding operations
|
6
|
+
Automatically handles OpenAI API failures with local embedding alternatives
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
import random
|
11
|
+
import numpy as np
|
12
|
+
from typing import List, Dict, Any, Optional, Union
|
13
|
+
from openai import APIConnectionError, APITimeoutError, RateLimitError, AuthenticationError
|
14
|
+
|
15
|
+
from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
|
16
|
+
from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
class ResilientEmbedService(BaseEmbedService):
|
21
|
+
"""
|
22
|
+
Resilient embedding service with automatic fallback mechanisms
|
23
|
+
|
24
|
+
When OpenAI service fails, automatically falls back to:
|
25
|
+
1. Simple TF-IDF based embeddings
|
26
|
+
2. Random embeddings (for testing/demo purposes)
|
27
|
+
"""
|
28
|
+
|
29
|
+
def __init__(self, provider_name: str = "openai", model_name: str = "text-embedding-3-small", **kwargs):
|
30
|
+
super().__init__(provider_name, model_name, **kwargs)
|
31
|
+
|
32
|
+
# Try to initialize OpenAI service
|
33
|
+
self.primary_service = None
|
34
|
+
self.fallback_mode = False
|
35
|
+
|
36
|
+
try:
|
37
|
+
self.primary_service = OpenAIEmbedService(provider_name, model_name, **kwargs)
|
38
|
+
logger.info("✅ Primary OpenAI embedding service initialized")
|
39
|
+
except Exception as e:
|
40
|
+
logger.warning(f"Failed to initialize OpenAI service, starting in fallback mode: {e}")
|
41
|
+
self.fallback_mode = True
|
42
|
+
|
43
|
+
# Initialize TF-IDF vectorizer for fallback
|
44
|
+
self._init_fallback_vectorizer()
|
45
|
+
|
46
|
+
def _init_fallback_vectorizer(self):
|
47
|
+
"""Initialize TF-IDF vectorizer for fallback embeddings"""
|
48
|
+
try:
|
49
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
50
|
+
|
51
|
+
# Use a simple TF-IDF vectorizer with limited features
|
52
|
+
self.tfidf_vectorizer = TfidfVectorizer(
|
53
|
+
max_features=1536, # Match OpenAI dimensions
|
54
|
+
stop_words='english',
|
55
|
+
ngram_range=(1, 2),
|
56
|
+
lowercase=True,
|
57
|
+
strip_accents='unicode'
|
58
|
+
)
|
59
|
+
|
60
|
+
# Pre-fit with some common words to ensure consistency
|
61
|
+
common_words = [
|
62
|
+
"hello world", "machine learning", "artificial intelligence",
|
63
|
+
"data science", "natural language processing", "computer vision",
|
64
|
+
"deep learning", "neural networks", "text analysis",
|
65
|
+
"information retrieval", "semantic search", "embeddings"
|
66
|
+
]
|
67
|
+
self.tfidf_vectorizer.fit(common_words)
|
68
|
+
self.tfidf_available = True
|
69
|
+
logger.info("✅ TF-IDF fallback vectorizer initialized")
|
70
|
+
|
71
|
+
except ImportError:
|
72
|
+
logger.warning("scikit-learn not available, using random embeddings as fallback")
|
73
|
+
self.tfidf_available = False
|
74
|
+
|
75
|
+
def _generate_fallback_embedding(self, text: str, dimension: int = 1536) -> List[float]:
|
76
|
+
"""Generate fallback embedding for a single text"""
|
77
|
+
|
78
|
+
if self.tfidf_available and hasattr(self, 'tfidf_vectorizer'):
|
79
|
+
try:
|
80
|
+
# Use TF-IDF for more meaningful embeddings
|
81
|
+
tfidf_vector = self.tfidf_vectorizer.transform([text]).toarray()[0]
|
82
|
+
|
83
|
+
# Pad or truncate to desired dimension
|
84
|
+
if len(tfidf_vector) < dimension:
|
85
|
+
padding = [0.0] * (dimension - len(tfidf_vector))
|
86
|
+
tfidf_vector = np.concatenate([tfidf_vector, padding])
|
87
|
+
elif len(tfidf_vector) > dimension:
|
88
|
+
tfidf_vector = tfidf_vector[:dimension]
|
89
|
+
|
90
|
+
# Normalize to unit vector
|
91
|
+
norm = np.linalg.norm(tfidf_vector)
|
92
|
+
if norm > 0:
|
93
|
+
tfidf_vector = tfidf_vector / norm
|
94
|
+
|
95
|
+
return tfidf_vector.tolist()
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
logger.warning(f"TF-IDF fallback failed: {e}, using random embedding")
|
99
|
+
|
100
|
+
# Random embedding as last resort (normalized)
|
101
|
+
random.seed(hash(text) % (2**32)) # Deterministic based on text
|
102
|
+
embedding = [random.gauss(0, 1) for _ in range(dimension)]
|
103
|
+
|
104
|
+
# Normalize to unit vector
|
105
|
+
norm = np.sqrt(sum(x*x for x in embedding))
|
106
|
+
if norm > 0:
|
107
|
+
embedding = [x/norm for x in embedding]
|
108
|
+
|
109
|
+
return embedding
|
110
|
+
|
111
|
+
async def create_text_embedding(self, text: str) -> List[float]:
|
112
|
+
"""Create embedding for single text with fallback"""
|
113
|
+
|
114
|
+
# Try primary service first if available
|
115
|
+
if not self.fallback_mode and self.primary_service:
|
116
|
+
try:
|
117
|
+
result = await self.primary_service.create_text_embedding(text)
|
118
|
+
logger.debug("✅ Used primary OpenAI service")
|
119
|
+
return result
|
120
|
+
|
121
|
+
except (APIConnectionError, APITimeoutError) as e:
|
122
|
+
logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
|
123
|
+
self.fallback_mode = True
|
124
|
+
except RateLimitError as e:
|
125
|
+
logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
|
126
|
+
except AuthenticationError as e:
|
127
|
+
logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
|
128
|
+
self.fallback_mode = True
|
129
|
+
except Exception as e:
|
130
|
+
logger.warning(f"OpenAI service error, using fallback: {e}")
|
131
|
+
|
132
|
+
# Use fallback embedding
|
133
|
+
logger.info(f"Using fallback embedding for text: {text[:50]}...")
|
134
|
+
return self._generate_fallback_embedding(text)
|
135
|
+
|
136
|
+
async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
137
|
+
"""Create embeddings for multiple texts with fallback"""
|
138
|
+
if not texts:
|
139
|
+
return []
|
140
|
+
|
141
|
+
# Try primary service first if available
|
142
|
+
if not self.fallback_mode and self.primary_service:
|
143
|
+
try:
|
144
|
+
result = await self.primary_service.create_text_embeddings(texts)
|
145
|
+
logger.debug(f"✅ Used primary OpenAI service for {len(texts)} texts")
|
146
|
+
return result
|
147
|
+
|
148
|
+
except (APIConnectionError, APITimeoutError) as e:
|
149
|
+
logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
|
150
|
+
self.fallback_mode = True
|
151
|
+
except RateLimitError as e:
|
152
|
+
logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
|
153
|
+
except AuthenticationError as e:
|
154
|
+
logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
|
155
|
+
self.fallback_mode = True
|
156
|
+
except Exception as e:
|
157
|
+
logger.warning(f"OpenAI service error, using fallback: {e}")
|
158
|
+
|
159
|
+
# Use fallback embeddings
|
160
|
+
logger.info(f"Using fallback embeddings for {len(texts)} texts")
|
161
|
+
return [self._generate_fallback_embedding(text) for text in texts]
|
162
|
+
|
163
|
+
async def create_chunks(self, text: str, metadata: Optional[Dict] = None,
|
164
|
+
chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
|
165
|
+
"""Create text chunks with embeddings (with fallback)"""
|
166
|
+
words = text.split()
|
167
|
+
if not words:
|
168
|
+
return []
|
169
|
+
|
170
|
+
chunks = []
|
171
|
+
chunk_texts = []
|
172
|
+
|
173
|
+
for i in range(0, len(words), chunk_size - overlap):
|
174
|
+
chunk_words = words[i:i + chunk_size]
|
175
|
+
chunk_text = " ".join(chunk_words)
|
176
|
+
chunk_texts.append(chunk_text)
|
177
|
+
|
178
|
+
chunks.append({
|
179
|
+
"text": chunk_text,
|
180
|
+
"start_index": i,
|
181
|
+
"end_index": min(i + chunk_size, len(words)),
|
182
|
+
"metadata": metadata or {}
|
183
|
+
})
|
184
|
+
|
185
|
+
# Get embeddings for all chunks
|
186
|
+
embeddings = await self.create_text_embeddings(chunk_texts)
|
187
|
+
|
188
|
+
# Add embeddings to chunks
|
189
|
+
for chunk, embedding in zip(chunks, embeddings):
|
190
|
+
chunk["embedding"] = embedding
|
191
|
+
chunk["fallback_used"] = self.fallback_mode
|
192
|
+
|
193
|
+
return chunks
|
194
|
+
|
195
|
+
async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
196
|
+
"""Compute cosine similarity between two embeddings"""
|
197
|
+
import math
|
198
|
+
|
199
|
+
try:
|
200
|
+
dot_product = sum(a * b for a, b in zip(embedding1, embedding2))
|
201
|
+
norm1 = math.sqrt(sum(a * a for a in embedding1))
|
202
|
+
norm2 = math.sqrt(sum(b * b for b in embedding2))
|
203
|
+
|
204
|
+
if norm1 * norm2 == 0:
|
205
|
+
return 0.0
|
206
|
+
|
207
|
+
return dot_product / (norm1 * norm2)
|
208
|
+
except Exception as e:
|
209
|
+
logger.error(f"Error computing similarity: {e}")
|
210
|
+
return 0.0
|
211
|
+
|
212
|
+
async def find_similar_texts(
|
213
|
+
self,
|
214
|
+
query_embedding: List[float],
|
215
|
+
candidate_embeddings: List[List[float]],
|
216
|
+
top_k: int = 5
|
217
|
+
) -> List[Dict[str, Any]]:
|
218
|
+
"""Find most similar texts based on embeddings"""
|
219
|
+
try:
|
220
|
+
similarities = []
|
221
|
+
|
222
|
+
for i, candidate in enumerate(candidate_embeddings):
|
223
|
+
similarity = await self.compute_similarity(query_embedding, candidate)
|
224
|
+
similarities.append({
|
225
|
+
"index": i,
|
226
|
+
"similarity": similarity
|
227
|
+
})
|
228
|
+
|
229
|
+
# Sort by similarity in descending order and return top_k
|
230
|
+
similarities.sort(key=lambda x: x["similarity"], reverse=True)
|
231
|
+
return similarities[:top_k]
|
232
|
+
|
233
|
+
except Exception as e:
|
234
|
+
logger.error(f"Error finding similar texts: {e}")
|
235
|
+
return []
|
236
|
+
|
237
|
+
def get_embedding_dimension(self) -> int:
|
238
|
+
"""Get the dimension of embeddings produced by this service"""
|
239
|
+
return 1536 # Standard dimension for consistency
|
240
|
+
|
241
|
+
def get_max_input_length(self) -> int:
|
242
|
+
"""Get maximum input text length supported"""
|
243
|
+
return 8192
|
244
|
+
|
245
|
+
def is_fallback_mode(self) -> bool:
|
246
|
+
"""Check if service is running in fallback mode"""
|
247
|
+
return self.fallback_mode
|
248
|
+
|
249
|
+
def get_service_status(self) -> Dict[str, Any]:
|
250
|
+
"""Get current service status and capabilities"""
|
251
|
+
return {
|
252
|
+
"primary_service_available": not self.fallback_mode and self.primary_service is not None,
|
253
|
+
"fallback_mode": self.fallback_mode,
|
254
|
+
"tfidf_available": self.tfidf_available,
|
255
|
+
"provider": self.provider_name,
|
256
|
+
"model": self.model_name,
|
257
|
+
"embedding_dimension": self.get_embedding_dimension(),
|
258
|
+
"max_input_length": self.get_max_input_length()
|
259
|
+
}
|
260
|
+
|
261
|
+
async def health_check(self) -> Dict[str, Any]:
|
262
|
+
"""Health check with detailed status"""
|
263
|
+
status = self.get_service_status()
|
264
|
+
|
265
|
+
# Test embedding generation
|
266
|
+
try:
|
267
|
+
test_embedding = await self.create_text_embedding("test")
|
268
|
+
status["embedding_test"] = {
|
269
|
+
"success": True,
|
270
|
+
"dimension": len(test_embedding),
|
271
|
+
"fallback_used": self.fallback_mode
|
272
|
+
}
|
273
|
+
except Exception as e:
|
274
|
+
status["embedding_test"] = {
|
275
|
+
"success": False,
|
276
|
+
"error": str(e)
|
277
|
+
}
|
278
|
+
|
279
|
+
return status
|
280
|
+
|
281
|
+
async def close(self):
|
282
|
+
"""Cleanup resources"""
|
283
|
+
if self.primary_service:
|
284
|
+
await self.primary_service.close()
|
285
|
+
logger.info("ResilientEmbedService has been closed.")
|