isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
1
+ """
2
+ ISA Embedding Service
3
+
4
+ ISA reranking service using deployed Jina Reranker v2 via Modal
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, Any, List, Optional
9
+
10
+ try:
11
+ import modal
12
+ MODAL_AVAILABLE = True
13
+ except ImportError:
14
+ MODAL_AVAILABLE = False
15
+ modal = None
16
+
17
+ from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class ISAEmbedService(BaseEmbedService):
22
+ """
23
+ ISA Embedding Service - calls ISA deployed reranking models
24
+
25
+ Supported features:
26
+ - Document reranking (Jina Reranker v2 via Modal)
27
+ - Future: embedding generation
28
+ - Future: semantic similarity computation
29
+ """
30
+
31
+ def __init__(self,
32
+ rerank_modal_app_name: str = "isa-embed-rerank",
33
+ timeout: int = 30):
34
+ """
35
+ Initialize ISA Embedding service
36
+
37
+ Args:
38
+ rerank_modal_app_name: Modal reranking app name
39
+ timeout: Request timeout in seconds
40
+ """
41
+ # For now, skip BaseService initialization to avoid config validation
42
+ # TODO: Properly configure ISA provider in config system
43
+ self.provider_name = "isa"
44
+ self.model_name = "isa-jina-reranker-v2-service"
45
+ self.rerank_modal_app_name = rerank_modal_app_name
46
+ self.timeout = timeout
47
+
48
+ # Initialize Modal client
49
+ if MODAL_AVAILABLE:
50
+ try:
51
+ # Get deployed Modal application
52
+ self.modal_app = modal.App.lookup(rerank_modal_app_name)
53
+ logger.info(f"Connected to Modal rerank app: {rerank_modal_app_name}")
54
+
55
+ self.modal_service = True # Mark service as available
56
+ logger.info("Modal rerank app connection established")
57
+
58
+ except Exception as e:
59
+ logger.warning(f"Failed to connect to Modal rerank app: {e}")
60
+ self.modal_app = None
61
+ self.modal_service = None
62
+ else:
63
+ logger.warning("Modal SDK not available")
64
+ self.modal_app = None
65
+ self.modal_service = None
66
+
67
+ # Service statistics
68
+ self.request_count = 0
69
+ self.total_cost = 0.0
70
+
71
+ async def rerank_documents(
72
+ self,
73
+ query: str,
74
+ documents: List[str],
75
+ top_k: Optional[int] = None,
76
+ return_documents: bool = True
77
+ ) -> Dict[str, Any]:
78
+ """
79
+ Rerank documents using Jina Reranker v2
80
+
81
+ Args:
82
+ query: Query string
83
+ documents: List of documents to rerank
84
+ top_k: Return top k results (None = all)
85
+ return_documents: Whether to include document content in results
86
+
87
+ Returns:
88
+ Reranking results
89
+ """
90
+ try:
91
+ if not self.modal_app or not self.modal_service:
92
+ return {
93
+ 'success': False,
94
+ 'provider': 'ISA',
95
+ 'service': 'isa-embed-rerank',
96
+ 'error': 'Modal rerank app or service not available'
97
+ }
98
+
99
+ # Call reranking service directly via Modal SDK
100
+ result = await self._call_rerank_service(query, documents, top_k, return_documents)
101
+
102
+ if result and result.get('success', False):
103
+ self.request_count += 1
104
+
105
+ # Record cost
106
+ if 'billing' in result:
107
+ cost = result['billing'].get('estimated_cost_usd', 0)
108
+ self.total_cost += cost
109
+
110
+ # Format response to match expected structure
111
+ formatted_result = {
112
+ 'success': True,
113
+ 'provider': 'ISA',
114
+ 'service': 'isa-embed-rerank',
115
+ 'result': {
116
+ 'results': result.get('results', []),
117
+ 'processing_time': result.get('processing_time'),
118
+ 'billing': result.get('billing', {}),
119
+ 'query': result.get('query'),
120
+ 'num_documents': result.get('num_documents'),
121
+ 'returned_count': result.get('returned_count')
122
+ },
123
+ 'metadata': {
124
+ 'model_used': result.get('model'),
125
+ 'provider': result.get('provider', 'ISA'),
126
+ 'billing': result.get('billing', {})
127
+ }
128
+ }
129
+ return formatted_result
130
+ else:
131
+ return {
132
+ 'success': False,
133
+ 'provider': 'ISA',
134
+ 'service': 'isa-embed-rerank',
135
+ 'error': f'Rerank service returned error: {result.get("error", "Unknown error") if result else "No response"}',
136
+ 'details': result
137
+ }
138
+
139
+ except Exception as e:
140
+ logger.error(f"ISA document reranking failed: {e}")
141
+ import traceback
142
+ traceback.print_exc()
143
+ return {
144
+ 'success': False,
145
+ 'provider': 'ISA',
146
+ 'service': 'isa-embed-rerank',
147
+ 'error': str(e)
148
+ }
149
+
150
+ async def _call_rerank_service(
151
+ self,
152
+ query: str,
153
+ documents: List[str],
154
+ top_k: Optional[int],
155
+ return_documents: bool
156
+ ) -> Dict[str, Any]:
157
+ """
158
+ Call reranking service via Modal SDK
159
+ """
160
+ try:
161
+ import modal
162
+
163
+ logger.info("Calling Jina Reranker v2 service via Modal SDK...")
164
+
165
+ # Correct Modal SDK usage: call deployed class method
166
+ ISAEmbedRerankService = modal.Cls.from_name(
167
+ app_name=self.rerank_modal_app_name,
168
+ name="ISAEmbedRerankService"
169
+ )
170
+
171
+ # Create instance and call method
172
+ instance = ISAEmbedRerankService()
173
+ result = instance.rerank_documents.remote(
174
+ query=query,
175
+ documents=documents,
176
+ top_k=top_k,
177
+ return_documents=return_documents
178
+ )
179
+
180
+ logger.info("Modal rerank SDK call successful")
181
+ return result
182
+
183
+ except Exception as e:
184
+ logger.error(f"Modal rerank SDK call failed: {e}")
185
+ return {
186
+ 'success': False,
187
+ 'error': f'Modal rerank SDK error: {str(e)}'
188
+ }
189
+
190
+ # ==================== Embedding methods (future implementation) ====================
191
+
192
+ async def create_text_embedding(self, text: str) -> List[float]:
193
+ """Create single text embedding - not yet implemented"""
194
+ raise NotImplementedError("Text embedding not yet implemented in ISA service")
195
+
196
+ async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
197
+ """Create multiple text embeddings - not yet implemented"""
198
+ raise NotImplementedError("Text embeddings not yet implemented in ISA service")
199
+
200
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
201
+ """Create text chunks with embeddings - not yet implemented"""
202
+ raise NotImplementedError("Text chunking not yet implemented in ISA service")
203
+
204
+ async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
205
+ """Compute embedding similarity - not yet implemented"""
206
+ raise NotImplementedError("Similarity computation not yet implemented in ISA service")
207
+
208
+ async def find_similar_texts(
209
+ self,
210
+ query_embedding: List[float],
211
+ candidate_embeddings: List[List[float]],
212
+ top_k: int = 5
213
+ ) -> List[Dict[str, Any]]:
214
+ """Find similar texts - not yet implemented"""
215
+ raise NotImplementedError("Similar text search not yet implemented in ISA service")
216
+
217
+ def get_embedding_dimension(self) -> int:
218
+ """Get embedding dimension - not applicable for rerank-only service"""
219
+ raise NotImplementedError("Embedding dimension not available for rerank-only service")
220
+
221
+ def get_max_input_length(self) -> int:
222
+ """Get maximum input length"""
223
+ return 1024 # Jina Reranker v2 max length
224
+
225
+ # ==================== Service management methods ====================
226
+
227
+ async def health_check(self) -> Dict[str, Any]:
228
+ """Check ISA reranking service health"""
229
+ try:
230
+ # Simple health check: call reranking service
231
+ test_result = await self.rerank_documents(
232
+ query="test",
233
+ documents=["test document"],
234
+ top_k=1,
235
+ return_documents=False
236
+ )
237
+
238
+ return {
239
+ 'success': True,
240
+ 'provider': 'ISA',
241
+ 'service': 'isa-embed-rerank',
242
+ 'status': 'healthy' if test_result.get('success') else 'error',
243
+ 'rerank_service': test_result.get('success', False),
244
+ 'usage_stats': {
245
+ 'total_requests': self.request_count,
246
+ 'total_cost_usd': round(self.total_cost, 6)
247
+ }
248
+ }
249
+
250
+ except Exception as e:
251
+ return {
252
+ 'success': False,
253
+ 'provider': 'ISA',
254
+ 'service': 'isa-embed-rerank',
255
+ 'status': 'error',
256
+ 'error': str(e)
257
+ }
258
+
259
+ async def get_usage_stats(self) -> Dict[str, Any]:
260
+ """Get usage statistics"""
261
+ try:
262
+ modal_stats = {}
263
+
264
+ # Try to get Modal service statistics
265
+ if self.modal_app:
266
+ try:
267
+ # Can extend to get Modal service stats
268
+ pass
269
+ except Exception as e:
270
+ logger.warning(f"Failed to get Modal stats: {e}")
271
+
272
+ return {
273
+ 'provider': 'ISA',
274
+ 'service': 'isa-embed-rerank',
275
+ 'client_stats': {
276
+ 'total_requests': self.request_count,
277
+ 'total_cost_usd': round(self.total_cost, 6)
278
+ },
279
+ 'modal_stats': modal_stats,
280
+ 'combined_cost': round(self.total_cost, 6)
281
+ }
282
+
283
+ except Exception as e:
284
+ return {
285
+ 'provider': 'ISA',
286
+ 'service': 'isa-embed-rerank',
287
+ 'error': str(e)
288
+ }
289
+
290
+ def get_supported_tasks(self) -> List[str]:
291
+ """Get supported task list"""
292
+ return [
293
+ 'rerank', # Document reranking
294
+ 'rerank_documents', # Document reranking (alias)
295
+ 'document_ranking' # Document ranking (alias)
296
+ ]
297
+
298
+ def get_supported_formats(self) -> List[str]:
299
+ """Get supported formats"""
300
+ return ['text'] # Text only
301
+
302
+ async def close(self):
303
+ """Cleanup resources"""
304
+ # Modal client doesn't need explicit closure
305
+ pass
@@ -4,6 +4,7 @@ import asyncio
4
4
  from typing import List, Dict, Any, Optional
5
5
 
6
6
  from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
7
+ from isa_model.core.config.config_manager import ConfigManager
7
8
 
8
9
  logger = logging.getLogger(__name__)
9
10
 
@@ -21,9 +22,20 @@ class OllamaEmbedService(BaseEmbedService):
21
22
 
22
23
  # Initialize HTTP client with provider configuration
23
24
  try:
24
- host = provider_config.get("host", "localhost")
25
- port = provider_config.get("port", 11434)
26
- base_url = f"http://{host}:{port}"
25
+ config_manager = ConfigManager()
26
+ # Use Consul discovery with fallback
27
+ default_base_url = config_manager.get_ollama_url()
28
+
29
+ if "base_url" in provider_config:
30
+ base_url = provider_config["base_url"]
31
+ else:
32
+ host = provider_config.get("host", "localhost")
33
+ port = provider_config.get("port", 11434)
34
+ base_url = provider_config.get("base_url", f"http://{host}:{port}")
35
+
36
+ # Use config manager default (Consul discovery) if still not set
37
+ if base_url == f"http://localhost:11434":
38
+ base_url = default_base_url
27
39
 
28
40
  self.client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
29
41
 
@@ -129,11 +129,9 @@ class OpenAIEmbedService(BaseEmbedService):
129
129
  logger.error(f"Error creating text embeddings: {e}")
130
130
  raise
131
131
 
132
- async def create_chunks(self, text: str, metadata: Optional[Dict] = None) -> List[Dict]:
132
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None, chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
133
133
  """Create text chunks with embeddings"""
134
- # Chunk size optimized for OpenAI models (roughly 512 tokens)
135
- chunk_size = 400 # words
136
- overlap = 50 # word overlap between chunks
134
+ # Use provided chunk_size and overlap, or defaults optimized for OpenAI models
137
135
 
138
136
  words = text.split()
139
137
  if not words:
@@ -0,0 +1,285 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Resilient Embedding Service - Provides fallback mechanisms for embedding operations
6
+ Automatically handles OpenAI API failures with local embedding alternatives
7
+ """
8
+
9
+ import logging
10
+ import random
11
+ import numpy as np
12
+ from typing import List, Dict, Any, Optional, Union
13
+ from openai import APIConnectionError, APITimeoutError, RateLimitError, AuthenticationError
14
+
15
+ from isa_model.inference.services.embedding.openai_embed_service import OpenAIEmbedService
16
+ from isa_model.inference.services.embedding.base_embed_service import BaseEmbedService
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class ResilientEmbedService(BaseEmbedService):
21
+ """
22
+ Resilient embedding service with automatic fallback mechanisms
23
+
24
+ When OpenAI service fails, automatically falls back to:
25
+ 1. Simple TF-IDF based embeddings
26
+ 2. Random embeddings (for testing/demo purposes)
27
+ """
28
+
29
+ def __init__(self, provider_name: str = "openai", model_name: str = "text-embedding-3-small", **kwargs):
30
+ super().__init__(provider_name, model_name, **kwargs)
31
+
32
+ # Try to initialize OpenAI service
33
+ self.primary_service = None
34
+ self.fallback_mode = False
35
+
36
+ try:
37
+ self.primary_service = OpenAIEmbedService(provider_name, model_name, **kwargs)
38
+ logger.info("✅ Primary OpenAI embedding service initialized")
39
+ except Exception as e:
40
+ logger.warning(f"Failed to initialize OpenAI service, starting in fallback mode: {e}")
41
+ self.fallback_mode = True
42
+
43
+ # Initialize TF-IDF vectorizer for fallback
44
+ self._init_fallback_vectorizer()
45
+
46
+ def _init_fallback_vectorizer(self):
47
+ """Initialize TF-IDF vectorizer for fallback embeddings"""
48
+ try:
49
+ from sklearn.feature_extraction.text import TfidfVectorizer
50
+
51
+ # Use a simple TF-IDF vectorizer with limited features
52
+ self.tfidf_vectorizer = TfidfVectorizer(
53
+ max_features=1536, # Match OpenAI dimensions
54
+ stop_words='english',
55
+ ngram_range=(1, 2),
56
+ lowercase=True,
57
+ strip_accents='unicode'
58
+ )
59
+
60
+ # Pre-fit with some common words to ensure consistency
61
+ common_words = [
62
+ "hello world", "machine learning", "artificial intelligence",
63
+ "data science", "natural language processing", "computer vision",
64
+ "deep learning", "neural networks", "text analysis",
65
+ "information retrieval", "semantic search", "embeddings"
66
+ ]
67
+ self.tfidf_vectorizer.fit(common_words)
68
+ self.tfidf_available = True
69
+ logger.info("✅ TF-IDF fallback vectorizer initialized")
70
+
71
+ except ImportError:
72
+ logger.warning("scikit-learn not available, using random embeddings as fallback")
73
+ self.tfidf_available = False
74
+
75
+ def _generate_fallback_embedding(self, text: str, dimension: int = 1536) -> List[float]:
76
+ """Generate fallback embedding for a single text"""
77
+
78
+ if self.tfidf_available and hasattr(self, 'tfidf_vectorizer'):
79
+ try:
80
+ # Use TF-IDF for more meaningful embeddings
81
+ tfidf_vector = self.tfidf_vectorizer.transform([text]).toarray()[0]
82
+
83
+ # Pad or truncate to desired dimension
84
+ if len(tfidf_vector) < dimension:
85
+ padding = [0.0] * (dimension - len(tfidf_vector))
86
+ tfidf_vector = np.concatenate([tfidf_vector, padding])
87
+ elif len(tfidf_vector) > dimension:
88
+ tfidf_vector = tfidf_vector[:dimension]
89
+
90
+ # Normalize to unit vector
91
+ norm = np.linalg.norm(tfidf_vector)
92
+ if norm > 0:
93
+ tfidf_vector = tfidf_vector / norm
94
+
95
+ return tfidf_vector.tolist()
96
+
97
+ except Exception as e:
98
+ logger.warning(f"TF-IDF fallback failed: {e}, using random embedding")
99
+
100
+ # Random embedding as last resort (normalized)
101
+ random.seed(hash(text) % (2**32)) # Deterministic based on text
102
+ embedding = [random.gauss(0, 1) for _ in range(dimension)]
103
+
104
+ # Normalize to unit vector
105
+ norm = np.sqrt(sum(x*x for x in embedding))
106
+ if norm > 0:
107
+ embedding = [x/norm for x in embedding]
108
+
109
+ return embedding
110
+
111
+ async def create_text_embedding(self, text: str) -> List[float]:
112
+ """Create embedding for single text with fallback"""
113
+
114
+ # Try primary service first if available
115
+ if not self.fallback_mode and self.primary_service:
116
+ try:
117
+ result = await self.primary_service.create_text_embedding(text)
118
+ logger.debug("✅ Used primary OpenAI service")
119
+ return result
120
+
121
+ except (APIConnectionError, APITimeoutError) as e:
122
+ logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
123
+ self.fallback_mode = True
124
+ except RateLimitError as e:
125
+ logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
126
+ except AuthenticationError as e:
127
+ logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
128
+ self.fallback_mode = True
129
+ except Exception as e:
130
+ logger.warning(f"OpenAI service error, using fallback: {e}")
131
+
132
+ # Use fallback embedding
133
+ logger.info(f"Using fallback embedding for text: {text[:50]}...")
134
+ return self._generate_fallback_embedding(text)
135
+
136
+ async def create_text_embeddings(self, texts: List[str]) -> List[List[float]]:
137
+ """Create embeddings for multiple texts with fallback"""
138
+ if not texts:
139
+ return []
140
+
141
+ # Try primary service first if available
142
+ if not self.fallback_mode and self.primary_service:
143
+ try:
144
+ result = await self.primary_service.create_text_embeddings(texts)
145
+ logger.debug(f"✅ Used primary OpenAI service for {len(texts)} texts")
146
+ return result
147
+
148
+ except (APIConnectionError, APITimeoutError) as e:
149
+ logger.warning(f"OpenAI connection issue, switching to fallback: {e}")
150
+ self.fallback_mode = True
151
+ except RateLimitError as e:
152
+ logger.warning(f"OpenAI rate limit hit, using fallback: {e}")
153
+ except AuthenticationError as e:
154
+ logger.error(f"OpenAI authentication failed, switching to fallback: {e}")
155
+ self.fallback_mode = True
156
+ except Exception as e:
157
+ logger.warning(f"OpenAI service error, using fallback: {e}")
158
+
159
+ # Use fallback embeddings
160
+ logger.info(f"Using fallback embeddings for {len(texts)} texts")
161
+ return [self._generate_fallback_embedding(text) for text in texts]
162
+
163
+ async def create_chunks(self, text: str, metadata: Optional[Dict] = None,
164
+ chunk_size: int = 400, overlap: int = 50, **kwargs) -> List[Dict]:
165
+ """Create text chunks with embeddings (with fallback)"""
166
+ words = text.split()
167
+ if not words:
168
+ return []
169
+
170
+ chunks = []
171
+ chunk_texts = []
172
+
173
+ for i in range(0, len(words), chunk_size - overlap):
174
+ chunk_words = words[i:i + chunk_size]
175
+ chunk_text = " ".join(chunk_words)
176
+ chunk_texts.append(chunk_text)
177
+
178
+ chunks.append({
179
+ "text": chunk_text,
180
+ "start_index": i,
181
+ "end_index": min(i + chunk_size, len(words)),
182
+ "metadata": metadata or {}
183
+ })
184
+
185
+ # Get embeddings for all chunks
186
+ embeddings = await self.create_text_embeddings(chunk_texts)
187
+
188
+ # Add embeddings to chunks
189
+ for chunk, embedding in zip(chunks, embeddings):
190
+ chunk["embedding"] = embedding
191
+ chunk["fallback_used"] = self.fallback_mode
192
+
193
+ return chunks
194
+
195
+ async def compute_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
196
+ """Compute cosine similarity between two embeddings"""
197
+ import math
198
+
199
+ try:
200
+ dot_product = sum(a * b for a, b in zip(embedding1, embedding2))
201
+ norm1 = math.sqrt(sum(a * a for a in embedding1))
202
+ norm2 = math.sqrt(sum(b * b for b in embedding2))
203
+
204
+ if norm1 * norm2 == 0:
205
+ return 0.0
206
+
207
+ return dot_product / (norm1 * norm2)
208
+ except Exception as e:
209
+ logger.error(f"Error computing similarity: {e}")
210
+ return 0.0
211
+
212
+ async def find_similar_texts(
213
+ self,
214
+ query_embedding: List[float],
215
+ candidate_embeddings: List[List[float]],
216
+ top_k: int = 5
217
+ ) -> List[Dict[str, Any]]:
218
+ """Find most similar texts based on embeddings"""
219
+ try:
220
+ similarities = []
221
+
222
+ for i, candidate in enumerate(candidate_embeddings):
223
+ similarity = await self.compute_similarity(query_embedding, candidate)
224
+ similarities.append({
225
+ "index": i,
226
+ "similarity": similarity
227
+ })
228
+
229
+ # Sort by similarity in descending order and return top_k
230
+ similarities.sort(key=lambda x: x["similarity"], reverse=True)
231
+ return similarities[:top_k]
232
+
233
+ except Exception as e:
234
+ logger.error(f"Error finding similar texts: {e}")
235
+ return []
236
+
237
+ def get_embedding_dimension(self) -> int:
238
+ """Get the dimension of embeddings produced by this service"""
239
+ return 1536 # Standard dimension for consistency
240
+
241
+ def get_max_input_length(self) -> int:
242
+ """Get maximum input text length supported"""
243
+ return 8192
244
+
245
+ def is_fallback_mode(self) -> bool:
246
+ """Check if service is running in fallback mode"""
247
+ return self.fallback_mode
248
+
249
+ def get_service_status(self) -> Dict[str, Any]:
250
+ """Get current service status and capabilities"""
251
+ return {
252
+ "primary_service_available": not self.fallback_mode and self.primary_service is not None,
253
+ "fallback_mode": self.fallback_mode,
254
+ "tfidf_available": self.tfidf_available,
255
+ "provider": self.provider_name,
256
+ "model": self.model_name,
257
+ "embedding_dimension": self.get_embedding_dimension(),
258
+ "max_input_length": self.get_max_input_length()
259
+ }
260
+
261
+ async def health_check(self) -> Dict[str, Any]:
262
+ """Health check with detailed status"""
263
+ status = self.get_service_status()
264
+
265
+ # Test embedding generation
266
+ try:
267
+ test_embedding = await self.create_text_embedding("test")
268
+ status["embedding_test"] = {
269
+ "success": True,
270
+ "dimension": len(test_embedding),
271
+ "fallback_used": self.fallback_mode
272
+ }
273
+ except Exception as e:
274
+ status["embedding_test"] = {
275
+ "success": False,
276
+ "error": str(e)
277
+ }
278
+
279
+ return status
280
+
281
+ async def close(self):
282
+ """Cleanup resources"""
283
+ if self.primary_service:
284
+ await self.primary_service.close()
285
+ logger.info("ResilientEmbedService has been closed.")