isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,581 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
ISA LLM Service - Inference client for Modal-deployed HuggingFace models
|
6
|
+
Supports custom trained models deployed on Modal infrastructure
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
import os
|
11
|
+
from typing import Dict, Any, Optional, List
|
12
|
+
|
13
|
+
try:
|
14
|
+
import modal
|
15
|
+
MODAL_AVAILABLE = True
|
16
|
+
except ImportError:
|
17
|
+
MODAL_AVAILABLE = False
|
18
|
+
modal = None
|
19
|
+
|
20
|
+
from isa_model.inference.services.base_service import BaseService
|
21
|
+
from isa_model.core.models.model_manager import ModelManager
|
22
|
+
from isa_model.core.config import ConfigManager
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
class ISALLMService(BaseService):
|
27
|
+
"""
|
28
|
+
ISA LLM Service - Client for Modal-deployed HuggingFace models
|
29
|
+
Calls ISA's own deployed LLM inference services on Modal
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
provider_name: str = "isa",
|
35
|
+
model_name: str = None,
|
36
|
+
model_manager: ModelManager = None,
|
37
|
+
config_manager: ConfigManager = None,
|
38
|
+
modal_app_name: str = "isa-llm-inference",
|
39
|
+
timeout: int = 60,
|
40
|
+
**kwargs
|
41
|
+
):
|
42
|
+
# Skip BaseService init to avoid config validation for now
|
43
|
+
self.provider_name = provider_name
|
44
|
+
self.model_name = model_name or "isa-llm-service"
|
45
|
+
self.modal_app_name = modal_app_name
|
46
|
+
self.timeout = timeout
|
47
|
+
|
48
|
+
# Initialize Modal client
|
49
|
+
if MODAL_AVAILABLE:
|
50
|
+
try:
|
51
|
+
# Get deployed Modal app
|
52
|
+
self.modal_app = modal.App.lookup(modal_app_name)
|
53
|
+
logger.info(f"Connected to Modal LLM app: {modal_app_name}")
|
54
|
+
|
55
|
+
self.modal_service = True
|
56
|
+
logger.info("Modal LLM service connection established")
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
logger.warning(f"Failed to connect to Modal LLM app: {e}")
|
60
|
+
self.modal_app = None
|
61
|
+
self.modal_service = None
|
62
|
+
else:
|
63
|
+
logger.warning("Modal SDK not available")
|
64
|
+
self.modal_app = None
|
65
|
+
self.modal_service = None
|
66
|
+
|
67
|
+
# Service statistics
|
68
|
+
self.request_count = 0
|
69
|
+
self.total_cost = 0.0
|
70
|
+
|
71
|
+
# Fallback mode for when Modal is not available
|
72
|
+
self.fallback_mode = not MODAL_AVAILABLE or not self.modal_service
|
73
|
+
|
74
|
+
async def _fallback_response(self, method_name: str, **kwargs) -> Dict[str, Any]:
|
75
|
+
"""
|
76
|
+
Provide fallback responses when Modal service is not available
|
77
|
+
"""
|
78
|
+
import time
|
79
|
+
import random
|
80
|
+
|
81
|
+
if method_name == "generate_text":
|
82
|
+
prompt = kwargs.get("prompt", "")
|
83
|
+
# Simple rule-based responses for demo purposes
|
84
|
+
responses = [
|
85
|
+
"这是一个模拟的ISA LLM响应。",
|
86
|
+
"抱歉,Modal服务当前不可用,这是一个fallback响应。",
|
87
|
+
"ISA模型正在维护中,请稍后再试。",
|
88
|
+
f"您说:{prompt}。我理解了,但当前模型不可用。"
|
89
|
+
]
|
90
|
+
|
91
|
+
generated_text = random.choice(responses)
|
92
|
+
|
93
|
+
return {
|
94
|
+
"success": True,
|
95
|
+
"text": generated_text,
|
96
|
+
"full_text": prompt + " " + generated_text,
|
97
|
+
"prompt": prompt,
|
98
|
+
"model_id": kwargs.get("model_id", "isa-llm-fallback"),
|
99
|
+
"provider": "ISA",
|
100
|
+
"service": "isa-llm",
|
101
|
+
"fallback": True,
|
102
|
+
"generation_config": kwargs.get("generation_config", {}),
|
103
|
+
"metadata": {
|
104
|
+
"processing_time": random.uniform(0.5, 2.0),
|
105
|
+
"device": "cpu",
|
106
|
+
"input_tokens": len(prompt.split()),
|
107
|
+
"output_tokens": len(generated_text.split()),
|
108
|
+
"note": "This is a fallback response - Modal service not available"
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
elif method_name == "chat_completion":
|
113
|
+
messages = kwargs.get("messages", [])
|
114
|
+
user_message = ""
|
115
|
+
if messages:
|
116
|
+
user_message = messages[-1].get("content", "")
|
117
|
+
|
118
|
+
chat_responses = [
|
119
|
+
"很抱歉,ISA模型当前不可用,这是一个模拟响应。",
|
120
|
+
"我是ISA模型的fallback版本,功能有限。",
|
121
|
+
f"我听到您说:{user_message},但现在无法提供完整的回复。",
|
122
|
+
"Modal服务正在重启中,请稍后再试完整的ISA模型功能。"
|
123
|
+
]
|
124
|
+
|
125
|
+
response_text = random.choice(chat_responses)
|
126
|
+
|
127
|
+
return {
|
128
|
+
"success": True,
|
129
|
+
"text": response_text,
|
130
|
+
"role": "assistant",
|
131
|
+
"messages": messages,
|
132
|
+
"model_id": kwargs.get("model_id", "isa-llm-fallback"),
|
133
|
+
"provider": "ISA",
|
134
|
+
"service": "isa-llm",
|
135
|
+
"fallback": True,
|
136
|
+
"metadata": {
|
137
|
+
"processing_time": random.uniform(0.3, 1.5),
|
138
|
+
"device": "cpu",
|
139
|
+
"note": "This is a fallback response - Modal service not available"
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
elif method_name == "get_model_info":
|
144
|
+
return {
|
145
|
+
"success": True,
|
146
|
+
"model_id": kwargs.get("model_id", "isa-llm-fallback"),
|
147
|
+
"provider": "ISA",
|
148
|
+
"service": "isa-llm",
|
149
|
+
"architecture": "unknown (fallback mode)",
|
150
|
+
"fallback": True,
|
151
|
+
"note": "Modal service not available - showing fallback info"
|
152
|
+
}
|
153
|
+
|
154
|
+
elif method_name == "health_check":
|
155
|
+
return {
|
156
|
+
"success": True,
|
157
|
+
"status": "fallback",
|
158
|
+
"service": "isa-llm",
|
159
|
+
"provider": "ISA",
|
160
|
+
"device": "cpu",
|
161
|
+
"fallback": True,
|
162
|
+
"message": "Modal service not available - running in fallback mode"
|
163
|
+
}
|
164
|
+
|
165
|
+
else:
|
166
|
+
return {
|
167
|
+
"success": False,
|
168
|
+
"error": f"Method {method_name} not supported in fallback mode",
|
169
|
+
"fallback": True
|
170
|
+
}
|
171
|
+
|
172
|
+
async def _call_modal_llm_service(
|
173
|
+
self,
|
174
|
+
method_name: str,
|
175
|
+
**kwargs
|
176
|
+
) -> Dict[str, Any]:
|
177
|
+
"""
|
178
|
+
Call Modal LLM service via SDK with improved error handling and fallback
|
179
|
+
"""
|
180
|
+
# If in fallback mode, use fallback response immediately
|
181
|
+
if self.fallback_mode:
|
182
|
+
logger.info(f"Using fallback mode for {method_name}")
|
183
|
+
return await self._fallback_response(method_name, **kwargs)
|
184
|
+
|
185
|
+
try:
|
186
|
+
if not MODAL_AVAILABLE:
|
187
|
+
logger.warning("Modal SDK not available, switching to fallback mode")
|
188
|
+
self.fallback_mode = True
|
189
|
+
return await self._fallback_response(method_name, **kwargs)
|
190
|
+
|
191
|
+
if not self.modal_app or not self.modal_service:
|
192
|
+
logger.warning("Modal app/service not available, switching to fallback mode")
|
193
|
+
self.fallback_mode = True
|
194
|
+
return await self._fallback_response(method_name, **kwargs)
|
195
|
+
|
196
|
+
logger.info(f"Calling Modal LLM service method: {method_name}")
|
197
|
+
|
198
|
+
try:
|
199
|
+
# Use Modal SDK to call the service
|
200
|
+
ISALLMServiceCls = modal.Cls.from_name(
|
201
|
+
app_name=self.modal_app_name,
|
202
|
+
name="ISALLMService"
|
203
|
+
)
|
204
|
+
|
205
|
+
# Create instance and call method
|
206
|
+
instance = ISALLMServiceCls()
|
207
|
+
method = getattr(instance, method_name)
|
208
|
+
result = method.remote(**kwargs)
|
209
|
+
|
210
|
+
logger.info("✅ Modal LLM service call successful")
|
211
|
+
return result
|
212
|
+
|
213
|
+
except modal.exception.NotFoundError:
|
214
|
+
logger.warning(f"Modal app not found, switching to fallback mode")
|
215
|
+
self.fallback_mode = True
|
216
|
+
return await self._fallback_response(method_name, **kwargs)
|
217
|
+
|
218
|
+
except modal.exception.ConnectionError:
|
219
|
+
logger.warning(f"Modal connection error, switching to fallback mode")
|
220
|
+
self.fallback_mode = True
|
221
|
+
return await self._fallback_response(method_name, **kwargs)
|
222
|
+
|
223
|
+
except Exception as e:
|
224
|
+
logger.error(f"Modal LLM service call failed: {e}, switching to fallback mode")
|
225
|
+
self.fallback_mode = True
|
226
|
+
return await self._fallback_response(method_name, **kwargs)
|
227
|
+
|
228
|
+
async def complete(
|
229
|
+
self,
|
230
|
+
prompt: str,
|
231
|
+
model_id: str = None,
|
232
|
+
max_length: Optional[int] = 50,
|
233
|
+
temperature: float = 0.7,
|
234
|
+
do_sample: bool = True,
|
235
|
+
top_p: float = 0.9,
|
236
|
+
repetition_penalty: float = 1.1,
|
237
|
+
**kwargs
|
238
|
+
) -> Dict[str, Any]:
|
239
|
+
"""
|
240
|
+
Generate completion using Modal-deployed LLM service
|
241
|
+
|
242
|
+
Args:
|
243
|
+
prompt: Input text prompt
|
244
|
+
model_id: HuggingFace model ID to use
|
245
|
+
max_length: Maximum length of generated text
|
246
|
+
temperature: Sampling temperature
|
247
|
+
do_sample: Whether to use sampling
|
248
|
+
top_p: Top-p sampling parameter
|
249
|
+
repetition_penalty: Repetition penalty
|
250
|
+
**kwargs: Additional generation parameters
|
251
|
+
|
252
|
+
Returns:
|
253
|
+
Dictionary containing generated text and metadata
|
254
|
+
"""
|
255
|
+
try:
|
256
|
+
# Get HF token from environment
|
257
|
+
hf_token = os.getenv("HF_TOKEN")
|
258
|
+
|
259
|
+
# Use provided model_id or default trained model
|
260
|
+
target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
|
261
|
+
|
262
|
+
# Call Modal service
|
263
|
+
result = await self._call_modal_llm_service(
|
264
|
+
method_name="generate_text",
|
265
|
+
prompt=prompt,
|
266
|
+
model_id=target_model,
|
267
|
+
hf_token=hf_token,
|
268
|
+
max_length=max_length,
|
269
|
+
temperature=temperature,
|
270
|
+
do_sample=do_sample,
|
271
|
+
top_p=top_p,
|
272
|
+
repetition_penalty=repetition_penalty,
|
273
|
+
**kwargs
|
274
|
+
)
|
275
|
+
|
276
|
+
if result and result.get('success', False):
|
277
|
+
self.request_count += 1
|
278
|
+
|
279
|
+
# Add cost tracking if available
|
280
|
+
if 'billing' in result:
|
281
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
282
|
+
self.total_cost += cost
|
283
|
+
|
284
|
+
return result
|
285
|
+
else:
|
286
|
+
return {
|
287
|
+
'success': False,
|
288
|
+
'provider': 'ISA',
|
289
|
+
'service': 'isa-llm',
|
290
|
+
'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
291
|
+
'details': result
|
292
|
+
}
|
293
|
+
|
294
|
+
except Exception as e:
|
295
|
+
logger.error(f"ISA LLM completion failed: {e}")
|
296
|
+
return {
|
297
|
+
'success': False,
|
298
|
+
'provider': 'ISA',
|
299
|
+
'service': 'isa-llm',
|
300
|
+
'error': str(e)
|
301
|
+
}
|
302
|
+
|
303
|
+
async def chat(
|
304
|
+
self,
|
305
|
+
messages: List[Dict[str, str]],
|
306
|
+
model_id: str = None,
|
307
|
+
**kwargs
|
308
|
+
) -> Dict[str, Any]:
|
309
|
+
"""
|
310
|
+
Chat completion using Modal-deployed LLM service
|
311
|
+
|
312
|
+
Args:
|
313
|
+
messages: List of message dictionaries with 'role' and 'content'
|
314
|
+
model_id: HuggingFace model ID to use
|
315
|
+
**kwargs: Additional generation parameters
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
Dictionary containing generated response and metadata
|
319
|
+
"""
|
320
|
+
try:
|
321
|
+
# Get HF token from environment
|
322
|
+
hf_token = os.getenv("HF_TOKEN")
|
323
|
+
|
324
|
+
# Use provided model_id or default trained model
|
325
|
+
target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
|
326
|
+
|
327
|
+
# Call Modal service
|
328
|
+
result = await self._call_modal_llm_service(
|
329
|
+
method_name="chat_completion",
|
330
|
+
messages=messages,
|
331
|
+
model_id=target_model,
|
332
|
+
hf_token=hf_token,
|
333
|
+
**kwargs
|
334
|
+
)
|
335
|
+
|
336
|
+
if result and result.get('success', False):
|
337
|
+
self.request_count += 1
|
338
|
+
|
339
|
+
# Add cost tracking if available
|
340
|
+
if 'billing' in result:
|
341
|
+
cost = result['billing'].get('estimated_cost_usd', 0)
|
342
|
+
self.total_cost += cost
|
343
|
+
|
344
|
+
return result
|
345
|
+
else:
|
346
|
+
return {
|
347
|
+
'success': False,
|
348
|
+
'provider': 'ISA',
|
349
|
+
'service': 'isa-llm',
|
350
|
+
'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}',
|
351
|
+
'details': result
|
352
|
+
}
|
353
|
+
|
354
|
+
except Exception as e:
|
355
|
+
logger.error(f"ISA LLM chat completion failed: {e}")
|
356
|
+
return {
|
357
|
+
'success': False,
|
358
|
+
'provider': 'ISA',
|
359
|
+
'service': 'isa-llm',
|
360
|
+
'error': str(e)
|
361
|
+
}
|
362
|
+
|
363
|
+
async def get_model_info(self, model_id: str = None) -> Dict[str, Any]:
|
364
|
+
"""Get information about the model via Modal service"""
|
365
|
+
try:
|
366
|
+
# Get HF token from environment
|
367
|
+
hf_token = os.getenv("HF_TOKEN")
|
368
|
+
|
369
|
+
# Use provided model_id or default trained model
|
370
|
+
target_model = model_id or "xenobordom/dialogpt-isa-trained-1755493402"
|
371
|
+
|
372
|
+
# Call Modal service
|
373
|
+
result = await self._call_modal_llm_service(
|
374
|
+
method_name="get_model_info",
|
375
|
+
model_id=target_model,
|
376
|
+
hf_token=hf_token
|
377
|
+
)
|
378
|
+
|
379
|
+
if result and result.get('success', False):
|
380
|
+
return result
|
381
|
+
else:
|
382
|
+
return {
|
383
|
+
'success': False,
|
384
|
+
'provider': 'ISA',
|
385
|
+
'service': 'isa-llm',
|
386
|
+
'error': f'Modal LLM service returned error: {result.get("error", "Unknown error") if result else "No response"}'
|
387
|
+
}
|
388
|
+
|
389
|
+
except Exception as e:
|
390
|
+
logger.error(f"Error getting model info: {e}")
|
391
|
+
return {
|
392
|
+
'success': False,
|
393
|
+
'error': str(e)
|
394
|
+
}
|
395
|
+
|
396
|
+
async def health_check(self) -> Dict[str, Any]:
|
397
|
+
"""Check ISA LLM service health"""
|
398
|
+
try:
|
399
|
+
# Call Modal service health check
|
400
|
+
result = await self._call_modal_llm_service(
|
401
|
+
method_name="health_check"
|
402
|
+
)
|
403
|
+
|
404
|
+
if result and result.get('success', False):
|
405
|
+
return {
|
406
|
+
'success': True,
|
407
|
+
'provider': 'ISA',
|
408
|
+
'service': 'isa-llm',
|
409
|
+
'status': 'healthy',
|
410
|
+
'modal_service': result,
|
411
|
+
'usage_stats': {
|
412
|
+
'total_requests': self.request_count,
|
413
|
+
'total_cost_usd': round(self.total_cost, 6)
|
414
|
+
}
|
415
|
+
}
|
416
|
+
else:
|
417
|
+
return {
|
418
|
+
'success': False,
|
419
|
+
'provider': 'ISA',
|
420
|
+
'service': 'isa-llm',
|
421
|
+
'status': 'error',
|
422
|
+
'error': f'Modal service error: {result.get("error", "Unknown error") if result else "No response"}'
|
423
|
+
}
|
424
|
+
|
425
|
+
except Exception as e:
|
426
|
+
return {
|
427
|
+
'success': False,
|
428
|
+
'provider': 'ISA',
|
429
|
+
'service': 'isa-llm',
|
430
|
+
'status': 'error',
|
431
|
+
'error': str(e)
|
432
|
+
}
|
433
|
+
|
434
|
+
def get_supported_tasks(self) -> List[str]:
|
435
|
+
"""Get supported task list"""
|
436
|
+
return [
|
437
|
+
'generate', # Text generation
|
438
|
+
'chat', # Chat completion
|
439
|
+
'complete' # Text completion
|
440
|
+
]
|
441
|
+
|
442
|
+
def get_supported_models(self) -> List[str]:
|
443
|
+
"""Get supported model types"""
|
444
|
+
return [
|
445
|
+
'dialogpt', # DialoGPT models
|
446
|
+
'gpt2', # GPT-2 models
|
447
|
+
'custom' # Custom trained models
|
448
|
+
]
|
449
|
+
|
450
|
+
async def invoke(self, input_data: str, task: str = "chat", **kwargs) -> Dict[str, Any]:
|
451
|
+
"""
|
452
|
+
Unified invoke method for ISA LLM service compatibility
|
453
|
+
Required by the ISA Model client interface
|
454
|
+
"""
|
455
|
+
try:
|
456
|
+
if task in ["chat", "generate", "complete"]:
|
457
|
+
# Handle chat tasks by converting to message format
|
458
|
+
if task == "chat":
|
459
|
+
if isinstance(input_data, str):
|
460
|
+
messages = [{"role": "user", "content": input_data}]
|
461
|
+
elif isinstance(input_data, list):
|
462
|
+
messages = input_data
|
463
|
+
else:
|
464
|
+
messages = [{"role": "user", "content": str(input_data)}]
|
465
|
+
|
466
|
+
result = await self.chat(messages, **kwargs)
|
467
|
+
|
468
|
+
# Convert result to unified format
|
469
|
+
if result.get('success'):
|
470
|
+
response_text = ""
|
471
|
+
if 'response' in result and isinstance(result['response'], dict):
|
472
|
+
response_text = result['response'].get('generated_text', '')
|
473
|
+
elif 'generated_text' in result:
|
474
|
+
response_text = result['generated_text']
|
475
|
+
elif 'content' in result:
|
476
|
+
response_text = result['content']
|
477
|
+
|
478
|
+
return {
|
479
|
+
'success': True,
|
480
|
+
'result': {
|
481
|
+
'content': response_text,
|
482
|
+
'tool_calls': [],
|
483
|
+
'response_metadata': result.get('metadata', {})
|
484
|
+
},
|
485
|
+
'error': None,
|
486
|
+
'metadata': {
|
487
|
+
'model_used': self.model_name,
|
488
|
+
'provider': self.provider_name,
|
489
|
+
'task': task,
|
490
|
+
'service_type': 'text',
|
491
|
+
'processing_time': result.get('processing_time', 0)
|
492
|
+
}
|
493
|
+
}
|
494
|
+
else:
|
495
|
+
return {
|
496
|
+
'success': False,
|
497
|
+
'result': None,
|
498
|
+
'error': result.get('error', 'Unknown error'),
|
499
|
+
'metadata': {
|
500
|
+
'model_used': self.model_name,
|
501
|
+
'provider': self.provider_name,
|
502
|
+
'task': task,
|
503
|
+
'service_type': 'text'
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
elif task in ["generate", "complete"]:
|
508
|
+
result = await self.complete(input_data, **kwargs)
|
509
|
+
|
510
|
+
# Convert result to unified format
|
511
|
+
if result.get('success'):
|
512
|
+
response_text = ""
|
513
|
+
if 'response' in result and isinstance(result['response'], dict):
|
514
|
+
response_text = result['response'].get('generated_text', '')
|
515
|
+
elif 'generated_text' in result:
|
516
|
+
response_text = result['generated_text']
|
517
|
+
elif 'content' in result:
|
518
|
+
response_text = result['content']
|
519
|
+
|
520
|
+
return {
|
521
|
+
'success': True,
|
522
|
+
'result': {
|
523
|
+
'content': response_text,
|
524
|
+
'response_metadata': result.get('metadata', {})
|
525
|
+
},
|
526
|
+
'error': None,
|
527
|
+
'metadata': {
|
528
|
+
'model_used': self.model_name,
|
529
|
+
'provider': self.provider_name,
|
530
|
+
'task': task,
|
531
|
+
'service_type': 'text',
|
532
|
+
'processing_time': result.get('processing_time', 0)
|
533
|
+
}
|
534
|
+
}
|
535
|
+
else:
|
536
|
+
return {
|
537
|
+
'success': False,
|
538
|
+
'result': None,
|
539
|
+
'error': result.get('error', 'Unknown error'),
|
540
|
+
'metadata': {
|
541
|
+
'model_used': self.model_name,
|
542
|
+
'provider': self.provider_name,
|
543
|
+
'task': task,
|
544
|
+
'service_type': 'text'
|
545
|
+
}
|
546
|
+
}
|
547
|
+
else:
|
548
|
+
return {
|
549
|
+
'success': False,
|
550
|
+
'result': None,
|
551
|
+
'error': f'Unsupported task: {task}. Supported tasks: {self.get_supported_tasks()}',
|
552
|
+
'metadata': {
|
553
|
+
'model_used': self.model_name,
|
554
|
+
'provider': self.provider_name,
|
555
|
+
'task': task,
|
556
|
+
'service_type': 'text'
|
557
|
+
}
|
558
|
+
}
|
559
|
+
|
560
|
+
except Exception as e:
|
561
|
+
logger.error(f"ISA LLM invoke failed: {e}")
|
562
|
+
return {
|
563
|
+
'success': False,
|
564
|
+
'result': None,
|
565
|
+
'error': str(e),
|
566
|
+
'metadata': {
|
567
|
+
'model_used': self.model_name,
|
568
|
+
'provider': self.provider_name,
|
569
|
+
'task': task,
|
570
|
+
'service_type': 'text'
|
571
|
+
}
|
572
|
+
}
|
573
|
+
|
574
|
+
# Backward compatibility aliases
|
575
|
+
class HuggingFaceLLMService(ISALLMService):
|
576
|
+
"""Alias for backward compatibility with AIFactory naming convention"""
|
577
|
+
pass
|
578
|
+
|
579
|
+
class HuggingFaceInferenceService(ISALLMService):
|
580
|
+
"""Alias for backward compatibility"""
|
581
|
+
pass
|