isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,581 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ ISA LLM Service - Inference client for Modal-deployed HuggingFace models
6
+ Supports custom trained models deployed on Modal infrastructure
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ from typing import Dict, Any, Optional, List
12
+
13
+ try:
14
+ import modal
15
+ MODAL_AVAILABLE = True
16
+ except ImportError:
17
+ MODAL_AVAILABLE = False
18
+ modal = None
19
+
20
+ from isa_model.inference.services.base_service import BaseService
21
+ from isa_model.core.models.model_manager import ModelManager
22
+ from isa_model.core.config import ConfigManager
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ class ISALLMService(BaseService):
27
+ """
28
+ ISA LLM Service - Client for Modal-deployed HuggingFace models
29
+ Calls ISA's own deployed LLM inference services on Modal
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ provider_name: str = "isa",
35
+ model_name: str = None,
36
+ model_manager: ModelManager = None,
37
+ config_manager: ConfigManager = None,
38
+ modal_app_name: str = "isa-llm-inference",
39
+ timeout: int = 60,
40
+ **kwargs
41
+ ):
42
+ # Skip BaseService init to avoid config validation for now
43
+ self.provider_name = provider_name
44
+ self.model_name = model_name or "isa-llm-service"
45
+ self.modal_app_name = modal_app_name
46
+ self.timeout = timeout
47
+
48
+ # Initialize Modal client
49
+ if MODAL_AVAILABLE:
50
+ try:
51
+ # Get deployed Modal app
52
+ self.modal_app = modal.App.lookup(modal_app_name)
53
+ logger.info(f"Connected to Modal LLM app: {modal_app_name}")
54
+
55
+ self.modal_service = True
56
+ logger.info("Modal LLM service connection established")
57
+
58
+ except Exception as e:
59
+ logger.warning(f"Failed to connect to Modal LLM app: {e}")
60
+ self.modal_app = None
61
+ self.modal_service = None
62
+ else:
63
+ logger.warning("Modal SDK not available")
64
+ self.modal_app = None
65
+ self.modal_service = None
66
+
67
+ # Service statistics
68
+ self.request_count = 0
69
+ self.total_cost = 0.0
70
+
71
+ # Fallback mode for when Modal is not available
72
+ self.fallback_mode = not MODAL_AVAILABLE or not self.modal_service
73
+
74
+ async def _fallback_response(self, method_name: str, **kwargs) -> Dict[str, Any]:
75
+ """
76
+ Provide fallback responses when Modal service is not available
77
+ """
78
+ import time
79
+ import random
80
+
81
+ if method_name == "generate_text":
82
+ prompt = kwargs.get("prompt", "")
83
+ # Simple rule-based responses for demo purposes
84
+ responses = [
85
+ "这是一个模拟的ISA LLM响应。",
86
+ "抱歉,Modal服务当前不可用,这是一个fallback响应。",
87
+ "ISA模型正在维护中,请稍后再试。",
88
+ f"您说:{prompt}。我理解了,但当前模型不可用。"
89
+ ]
90
+
91
+ generated_text = random.choice(responses)
92
+
93
+ return {
94
+ "success": True,
95
+ "text": generated_text,
96
+ "full_text": prompt + " " + generated_text,
97
+ "prompt": prompt,
98
+ "model_id": kwargs.get("model_id", "isa-llm-fallback"),
99
+ "provider": "ISA",
100
+ "service": "isa-llm",
101
+ "fallback": True,
102
+ "generation_config": kwargs.get("generation_config", {}),
103
+ "metadata": {
104
+ "processing_time": random.uniform(0.5, 2.0),
105
+ "device": "cpu",
106
+ "input_tokens": len(prompt.split()),
107
+ "output_tokens": len(generated_text.split()),
108
+ "note": "This is a fallback response - Modal service not available"
109
+ }
110
+ }
111
+
112
+ elif method_name == "chat_completion":
113
+ messages = kwargs.get("messages", [])
114
+ user_message = ""
115
+ if messages:
116
+ user_message = messages[-1].get("content", "")
117
+
118
+ chat_responses = [
119
+ "很抱歉,ISA模型当前不可用,这是一个模拟响应。",
120
+ "我是ISA模型的fallback版本,功能有限。",
121
+ f"我听到您说:{user_message},但现在无法提供完整的回复。",
122
+ "Modal服务正在重启中,请稍后再试完整的ISA模型功能。"
123
+ ]
124
+
125
+ response_text = random.choice(chat_responses)
126
+
127
+ return {
128
+ "success": True,
129
+ "text": response_text,
130
+ "role": "assistant",
131
+ "messages": messages,
132
+ "model_id": kwargs.get("model_id", "isa-llm-fallback"),
133
+ "provider": "ISA",
134
+ "service": "isa-llm",
135
+ "fallback": True,
136
+ "metadata": {
137
+ "processing_time": random.uniform(0.3, 1.5),
138
+ "device": "cpu",
139
+ "note": "This is a fallback response - Modal service not available"
140
+ }
141
+ }
142
+
143
+ elif method_name == "get_model_info":
144
+ return {
145
+ "success": True,
146
+ "model_id": kwargs.get("model_id", "isa-llm-fallback"),
147
+ "provider": "ISA",
148
+ "service": "isa-llm",
149
+ "architecture": "unknown (fallback mode)",
150
+ "fallback": True,
151
+ "note": "Modal service not available - showing fallback info"
152
+ }
153
+
154
+ elif method_name == "health_check":
155
+ return {
156
+ "success": True,
157
+ "status": "fallback",
158
+ "service": "isa-llm",
159
+ "provider": "ISA",
160
+ "device": "cpu",
161
+ "fallback": True,
162
+ "message": "Modal service not available - running in fallback mode"
163
+ }
164
+
165
+ else:
166
+ return {
167
+ "success": False,
168
+ "error": f"Method {method_name} not supported in fallback mode",
169
+ "fallback": True
170
+ }
171
+
172
+ async def _call_modal_llm_service(
173
+ self,
174
+ method_name: str,
175
+ **kwargs
176
+ ) -> Dict[str, Any]:
177
+ """
178
+ Call Modal LLM service via SDK with improved error handling and fallback
179
+ """
180
+ # If in fallback mode, use fallback response immediately
181
+ if self.fallback_mode:
182
+ logger.info(f"Using fallback mode for {method_name}")
183
+ return await self._fallback_response(method_name, **kwargs)
184
+
185
+ try:
186
+ if not MODAL_AVAILABLE:
187
+ logger.warning("Modal SDK not available, switching to fallback mode")
188
+ self.fallback_mode = True
189
+ return await self._fallback_response(method_name, **kwargs)
190
+
191
+ if not self.modal_app or not self.modal_service:
192
+ logger.warning("Modal app/service not available, switching to fallback mode")
193
+ self.fallback_mode = True
194
+ return await self._fallback_response(method_name, **kwargs)
195
+
196
+ logger.info(f"Calling Modal LLM service method: {method_name}")
197
+
198
+ try:
199
+ # Use Modal SDK to call the service
200
+ ISALLMServiceCls = modal.Cls.from_name(
201
+ app_name=self.modal_app_name,
202
+ name="ISALLMService"
203
+ )
204
+
205
+ # Create instance and call method
206
+ instance = ISALLMServiceCls()
207
+ method = getattr(instance, method_name)
208
+ result = method.remote(**kwargs)
209
+
210
+ logger.info("✅ Modal LLM service call successful")
211
+ return result
212
+
213
+ except modal.exception.NotFoundError:
214
+ logger.warning(f"Modal app not found, switching to fallback mode")
215
+ self.fallback_mode = True
216
+ return await self._fallback_response(method_name, **kwargs)
217
+
218
+ except modal.exception.ConnectionError:
219
+ logger.warning(f"Modal connection error, switching to fallback mode")
220
+ self.fallback_mode = True
221
+ return await self._fallback_response(method_name, **kwargs)
222
+
223
+ except Exception as e:
224
+ logger.error(f"Modal LLM service call failed: {e}, switching to fallback mode")
225
+ self.fallback_mode = True
226
+ return await self._fallback_response(method_name, **kwargs)
227
+
228
+ async def complete(
229
+ self,
230
+ prompt: str,
231
+ model_id: str = None,
232
+ max_length: Optional[int] = 50,
233
+ temperature: float = 0.7,
234
+ do_sample: bool = True,
235
+ top_p: float = 0.9,
236
+ repetition_penalty: float = 1.1,
237
+ **kwargs
238
+ ) -> Dict[str, Any]:
239
+ """
240
+ Generate completion using Modal-deployed LLM service
241
+
242
+ Args:
243
+ prompt: Input text prompt
244
+ model_id: HuggingFace model ID to use
245
+ max_length: Maximum length of generated text
246
+ temperature: Sampling temperature
247
+ do_sample: Whether to use sampling
248
+ top_p: Top-p sampling parameter
249
+ repetition_penalty: Repetition penalty
250
+ **kwargs: Additional generation parameters
251
+
252
+ Returns:
253
+ Dictionary containing generated text and metadata
254
+ """
255
+ try:
256
+ # Get HF token from environment
257
+ hf_token = os.getenv("HF_TOKEN")
258
+
259
+ # Use provided model_id or default trained model
260
+ target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
261
+
262
+ # Call Modal service
263
+ result = await self._call_modal_llm_service(
264
+ method_name="generate_text",
265
+ prompt=prompt,
266
+ model_id=target_model,
267
+ hf_token=hf_token,
268
+ max_length=max_length,
269
+ temperature=temperature,
270
+ do_sample=do_sample,
271
+ top_p=top_p,
272
+ repetition_penalty=repetition_penalty,
273
+ **kwargs
274
+ )
275
+
276
+ if result and result.get('success', False):
277
+ self.request_count += 1
278
+
279
+ # Add cost tracking if available
280
+ if 'billing' in result:
281
+ cost = result['billing'].get('estimated_cost_usd', 0)
282
+ self.total_cost += cost
283
+
284
+ return result
285
+ else:
286
+ return {
287
+ 'success': False,
288
+ 'provider': 'ISA',
289
+ 'service': 'isa-llm',
290
+ 'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}',
291
+ 'details': result
292
+ }
293
+
294
+ except Exception as e:
295
+ logger.error(f"ISA LLM completion failed: {e}")
296
+ return {
297
+ 'success': False,
298
+ 'provider': 'ISA',
299
+ 'service': 'isa-llm',
300
+ 'error': str(e)
301
+ }
302
+
303
+ async def chat(
304
+ self,
305
+ messages: List[Dict[str, str]],
306
+ model_id: str = None,
307
+ **kwargs
308
+ ) -> Dict[str, Any]:
309
+ """
310
+ Chat completion using Modal-deployed LLM service
311
+
312
+ Args:
313
+ messages: List of message dictionaries with 'role' and 'content'
314
+ model_id: HuggingFace model ID to use
315
+ **kwargs: Additional generation parameters
316
+
317
+ Returns:
318
+ Dictionary containing generated response and metadata
319
+ """
320
+ try:
321
+ # Get HF token from environment
322
+ hf_token = os.getenv("HF_TOKEN")
323
+
324
+ # Use provided model_id or default trained model
325
+ target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
326
+
327
+ # Call Modal service
328
+ result = await self._call_modal_llm_service(
329
+ method_name="chat_completion",
330
+ messages=messages,
331
+ model_id=target_model,
332
+ hf_token=hf_token,
333
+ **kwargs
334
+ )
335
+
336
+ if result and result.get('success', False):
337
+ self.request_count += 1
338
+
339
+ # Add cost tracking if available
340
+ if 'billing' in result:
341
+ cost = result['billing'].get('estimated_cost_usd', 0)
342
+ self.total_cost += cost
343
+
344
+ return result
345
+ else:
346
+ return {
347
+ 'success': False,
348
+ 'provider': 'ISA',
349
+ 'service': 'isa-llm',
350
+ 'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}',
351
+ 'details': result
352
+ }
353
+
354
+ except Exception as e:
355
+ logger.error(f"ISA LLM chat completion failed: {e}")
356
+ return {
357
+ 'success': False,
358
+ 'provider': 'ISA',
359
+ 'service': 'isa-llm',
360
+ 'error': str(e)
361
+ }
362
+
363
+ async def get_model_info(self, model_id: str = None) -> Dict[str, Any]:
364
+ """Get information about the model via Modal service"""
365
+ try:
366
+ # Get HF token from environment
367
+ hf_token = os.getenv("HF_TOKEN")
368
+
369
+ # Use provided model_id or default trained model
370
+ target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
371
+
372
+ # Call Modal service
373
+ result = await self._call_modal_llm_service(
374
+ method_name="get_model_info",
375
+ model_id=target_model,
376
+ hf_token=hf_token
377
+ )
378
+
379
+ if result and result.get('success', False):
380
+ return result
381
+ else:
382
+ return {
383
+ 'success': False,
384
+ 'provider': 'ISA',
385
+ 'service': 'isa-llm',
386
+ 'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}'
387
+ }
388
+
389
+ except Exception as e:
390
+ logger.error(f"Error getting model info: {e}")
391
+ return {
392
+ 'success': False,
393
+ 'error': str(e)
394
+ }
395
+
396
+ async def health_check(self) -> Dict[str, Any]:
397
+ """Check ISA LLM service health"""
398
+ try:
399
+ # Call Modal service health check
400
+ result = await self._call_modal_llm_service(
401
+ method_name="health_check"
402
+ )
403
+
404
+ if result and result.get('success', False):
405
+ return {
406
+ 'success': True,
407
+ 'provider': 'ISA',
408
+ 'service': 'isa-llm',
409
+ 'status': 'healthy',
410
+ 'modal_service': result,
411
+ 'usage_stats': {
412
+ 'total_requests': self.request_count,
413
+ 'total_cost_usd': round(self.total_cost, 6)
414
+ }
415
+ }
416
+ else:
417
+ return {
418
+ 'success': False,
419
+ 'provider': 'ISA',
420
+ 'service': 'isa-llm',
421
+ 'status': 'error',
422
+ 'error': f'Modal service error: {result.get("error", "Unknown error") if result else "No response"}'
423
+ }
424
+
425
+ except Exception as e:
426
+ return {
427
+ 'success': False,
428
+ 'provider': 'ISA',
429
+ 'service': 'isa-llm',
430
+ 'status': 'error',
431
+ 'error': str(e)
432
+ }
433
+
434
+ def get_supported_tasks(self) -> List[str]:
435
+ """Get supported task list"""
436
+ return [
437
+ 'generate', # Text generation
438
+ 'chat', # Chat completion
439
+ 'complete' # Text completion
440
+ ]
441
+
442
+ def get_supported_models(self) -> List[str]:
443
+ """Get supported model types"""
444
+ return [
445
+ 'dialogpt', # DialoGPT models
446
+ 'gpt2', # GPT-2 models
447
+ 'custom' # Custom trained models
448
+ ]
449
+
450
+ async def invoke(self, input_data: str, task: str = "chat", **kwargs) -> Dict[str, Any]:
451
+ """
452
+ Unified invoke method for ISA LLM service compatibility
453
+ Required by the ISA Model client interface
454
+ """
455
+ try:
456
+ if task in ["chat", "generate", "complete"]:
457
+ # Handle chat tasks by converting to message format
458
+ if task == "chat":
459
+ if isinstance(input_data, str):
460
+ messages = [{"role": "user", "content": input_data}]
461
+ elif isinstance(input_data, list):
462
+ messages = input_data
463
+ else:
464
+ messages = [{"role": "user", "content": str(input_data)}]
465
+
466
+ result = await self.chat(messages, **kwargs)
467
+
468
+ # Convert result to unified format
469
+ if result.get('success'):
470
+ response_text = ""
471
+ if 'response' in result and isinstance(result['response'], dict):
472
+ response_text = result['response'].get('generated_text', '')
473
+ elif 'generated_text' in result:
474
+ response_text = result['generated_text']
475
+ elif 'content' in result:
476
+ response_text = result['content']
477
+
478
+ return {
479
+ 'success': True,
480
+ 'result': {
481
+ 'content': response_text,
482
+ 'tool_calls': [],
483
+ 'response_metadata': result.get('metadata', {})
484
+ },
485
+ 'error': None,
486
+ 'metadata': {
487
+ 'model_used': self.model_name,
488
+ 'provider': self.provider_name,
489
+ 'task': task,
490
+ 'service_type': 'text',
491
+ 'processing_time': result.get('processing_time', 0)
492
+ }
493
+ }
494
+ else:
495
+ return {
496
+ 'success': False,
497
+ 'result': None,
498
+ 'error': result.get('error', 'Unknown error'),
499
+ 'metadata': {
500
+ 'model_used': self.model_name,
501
+ 'provider': self.provider_name,
502
+ 'task': task,
503
+ 'service_type': 'text'
504
+ }
505
+ }
506
+
507
+ elif task in ["generate", "complete"]:
508
+ result = await self.complete(input_data, **kwargs)
509
+
510
+ # Convert result to unified format
511
+ if result.get('success'):
512
+ response_text = ""
513
+ if 'response' in result and isinstance(result['response'], dict):
514
+ response_text = result['response'].get('generated_text', '')
515
+ elif 'generated_text' in result:
516
+ response_text = result['generated_text']
517
+ elif 'content' in result:
518
+ response_text = result['content']
519
+
520
+ return {
521
+ 'success': True,
522
+ 'result': {
523
+ 'content': response_text,
524
+ 'response_metadata': result.get('metadata', {})
525
+ },
526
+ 'error': None,
527
+ 'metadata': {
528
+ 'model_used': self.model_name,
529
+ 'provider': self.provider_name,
530
+ 'task': task,
531
+ 'service_type': 'text',
532
+ 'processing_time': result.get('processing_time', 0)
533
+ }
534
+ }
535
+ else:
536
+ return {
537
+ 'success': False,
538
+ 'result': None,
539
+ 'error': result.get('error', 'Unknown error'),
540
+ 'metadata': {
541
+ 'model_used': self.model_name,
542
+ 'provider': self.provider_name,
543
+ 'task': task,
544
+ 'service_type': 'text'
545
+ }
546
+ }
547
+ else:
548
+ return {
549
+ 'success': False,
550
+ 'result': None,
551
+ 'error': f'Unsupported task: {task}. Supported tasks: {self.get_supported_tasks()}',
552
+ 'metadata': {
553
+ 'model_used': self.model_name,
554
+ 'provider': self.provider_name,
555
+ 'task': task,
556
+ 'service_type': 'text'
557
+ }
558
+ }
559
+
560
+ except Exception as e:
561
+ logger.error(f"ISA LLM invoke failed: {e}")
562
+ return {
563
+ 'success': False,
564
+ 'result': None,
565
+ 'error': str(e),
566
+ 'metadata': {
567
+ 'model_used': self.model_name,
568
+ 'provider': self.provider_name,
569
+ 'task': task,
570
+ 'service_type': 'text'
571
+ }
572
+ }
573
+
574
+ # Backward compatibility aliases
575
+ class HuggingFaceLLMService(ISALLMService):
576
+ """Alias for backward compatibility with AIFactory naming convention"""
577
+ pass
578
+
579
+ class HuggingFaceInferenceService(ISALLMService):
580
+ """Alias for backward compatibility"""
581
+ pass
@@ -3,6 +3,7 @@ import httpx
3
3
  import json
4
4
  from typing import Dict, Any, List, Union, AsyncGenerator, Optional, Callable
5
5
  from isa_model.inference.services.llm.base_llm_service import BaseLLMService
6
+ from isa_model.core.config.config_manager import ConfigManager
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
@@ -16,7 +17,10 @@ class OllamaLLMService(BaseLLMService):
16
17
  provider_config = self.get_provider_config()
17
18
 
18
19
  # Create HTTP client for Ollama API
19
- base_url = provider_config.get("base_url", "http://localhost:11434")
20
+ config_manager = ConfigManager()
21
+ # Use Consul discovery with fallback
22
+ default_base_url = config_manager.get_ollama_url()
23
+ base_url = provider_config.get("base_url", default_base_url)
20
24
  timeout = provider_config.get("timeout", 60)
21
25
 
22
26
  self.client = httpx.AsyncClient(
@@ -34,7 +38,10 @@ class OllamaLLMService(BaseLLMService):
34
38
  """Ensure the HTTP client is available and not closed"""
35
39
  if not hasattr(self, 'client') or not self.client or self.client.is_closed:
36
40
  provider_config = self.get_provider_config()
37
- base_url = provider_config.get("base_url", "http://localhost:11434")
41
+ config_manager = ConfigManager()
42
+ # Use Consul discovery with fallback
43
+ default_base_url = config_manager.get_ollama_url()
44
+ base_url = provider_config.get("base_url", default_base_url)
38
45
  timeout = provider_config.get("timeout", 60)
39
46
  self.client = httpx.AsyncClient(base_url=base_url, timeout=timeout)
40
47