isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,424 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ ISA LLM Service - Modal deployment for HuggingFace trained models
6
+ Provides inference API for custom trained models
7
+ """
8
+
9
+ import os
10
+ import logging
11
+ from typing import Dict, Any, List, Optional
12
+ import modal
13
+
14
+ # Modal app configuration
15
+ app = modal.App("isa-llm-inference")
16
+
17
+ # GPU configuration for inference
18
+ GPU_CONFIG = modal.gpu.A10G()
19
+
20
+ # Base image with HuggingFace transformers
21
+ image = (
22
+ modal.Image.debian_slim(python_version="3.11")
23
+ .pip_install([
24
+ "torch>=2.0.0",
25
+ "transformers>=4.35.0",
26
+ "accelerate>=0.20.0",
27
+ "huggingface_hub>=0.17.0",
28
+ "peft>=0.5.0", # For LoRA models
29
+ "bitsandbytes>=0.41.0", # For quantization
30
+ "sentencepiece>=0.1.99", # For tokenizers
31
+ ])
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ @app.cls(
37
+ image=image,
38
+ gpu=GPU_CONFIG,
39
+ cpu=2.0,
40
+ memory=16384, # 16GB memory
41
+ timeout=300, # 5 minute timeout
42
+ container_idle_timeout=60, # Keep warm for 1 minute
43
+ allow_concurrent_inputs=5, # Allow concurrent requests
44
+ )
45
+ class ISALLMService:
46
+ """
47
+ ISA LLM Service for inference on HuggingFace trained models
48
+ Designed to work with models trained through ISA training pipeline
49
+ """
50
+
51
+ def __init__(self):
52
+ """Initialize the service (runs on container startup)"""
53
+ import torch
54
+ from transformers import AutoTokenizer, AutoModelForCausalLM
55
+
56
+ # Model will be loaded when first requested
57
+ self.model = None
58
+ self.tokenizer = None
59
+ self.current_model_id = None
60
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
61
+
62
+ logger.info(f"ISA LLM Service initialized on {self.device}")
63
+
64
+ def _load_model(self, model_id: str, hf_token: str = None):
65
+ """Load a specific model"""
66
+ import torch
67
+ from transformers import AutoTokenizer, AutoModelForCausalLM
68
+
69
+ if self.current_model_id == model_id and self.model is not None:
70
+ logger.info(f"Model {model_id} already loaded")
71
+ return
72
+
73
+ logger.info(f"Loading model: {model_id}")
74
+
75
+ try:
76
+ # Load tokenizer
77
+ self.tokenizer = AutoTokenizer.from_pretrained(
78
+ model_id,
79
+ token=hf_token,
80
+ trust_remote_code=True
81
+ )
82
+
83
+ # Set pad token if not exists
84
+ if self.tokenizer.pad_token is None:
85
+ self.tokenizer.pad_token = self.tokenizer.eos_token
86
+
87
+ # Load model with GPU optimization
88
+ self.model = AutoModelForCausalLM.from_pretrained(
89
+ model_id,
90
+ token=hf_token,
91
+ torch_dtype=torch.float16,
92
+ device_map="auto",
93
+ trust_remote_code=True,
94
+ low_cpu_mem_usage=True
95
+ )
96
+
97
+ self.current_model_id = model_id
98
+ logger.info(f"Successfully loaded model {model_id}")
99
+
100
+ except Exception as e:
101
+ logger.error(f"Failed to load model {model_id}: {e}")
102
+ raise
103
+
104
+ @modal.method
105
+ def generate_text(
106
+ self,
107
+ prompt: str,
108
+ model_id: str,
109
+ hf_token: str = None,
110
+ max_length: int = 100,
111
+ temperature: float = 0.7,
112
+ do_sample: bool = True,
113
+ top_p: float = 0.9,
114
+ repetition_penalty: float = 1.1,
115
+ **kwargs
116
+ ) -> Dict[str, Any]:
117
+ """
118
+ Generate text using the specified model
119
+
120
+ Args:
121
+ prompt: Input text prompt
122
+ model_id: HuggingFace model ID (e.g., "xenobordom/dialogpt-isa-trained-xxx")
123
+ hf_token: HuggingFace token for private models
124
+ max_length: Maximum generation length
125
+ temperature: Sampling temperature
126
+ do_sample: Whether to use sampling
127
+ top_p: Top-p sampling parameter
128
+ repetition_penalty: Repetition penalty
129
+ **kwargs: Additional generation parameters
130
+
131
+ Returns:
132
+ Dictionary containing generated text and metadata
133
+ """
134
+ import torch
135
+ import time
136
+
137
+ start_time = time.time()
138
+
139
+ try:
140
+ # Load model if needed
141
+ self._load_model(model_id, hf_token)
142
+
143
+ if self.model is None or self.tokenizer is None:
144
+ raise RuntimeError("Model not properly loaded")
145
+
146
+ # Tokenize input
147
+ inputs = self.tokenizer(
148
+ prompt,
149
+ return_tensors="pt",
150
+ padding=True,
151
+ truncation=True,
152
+ max_length=512
153
+ ).to(self.device)
154
+
155
+ # Generate
156
+ with torch.no_grad():
157
+ outputs = self.model.generate(
158
+ **inputs,
159
+ max_length=inputs["input_ids"].shape[1] + max_length,
160
+ temperature=temperature,
161
+ do_sample=do_sample,
162
+ top_p=top_p,
163
+ repetition_penalty=repetition_penalty,
164
+ pad_token_id=self.tokenizer.pad_token_id,
165
+ eos_token_id=self.tokenizer.eos_token_id,
166
+ **kwargs
167
+ )
168
+
169
+ # Decode generated text
170
+ full_text = self.tokenizer.decode(
171
+ outputs[0],
172
+ skip_special_tokens=True,
173
+ clean_up_tokenization_spaces=True
174
+ )
175
+
176
+ # Extract only the new generated part
177
+ generated_text = full_text
178
+ if generated_text.startswith(prompt):
179
+ generated_text = generated_text[len(prompt):].strip()
180
+
181
+ processing_time = time.time() - start_time
182
+
183
+ return {
184
+ "success": True,
185
+ "text": generated_text,
186
+ "full_text": full_text,
187
+ "prompt": prompt,
188
+ "model_id": model_id,
189
+ "provider": "ISA",
190
+ "service": "isa-llm",
191
+ "generation_config": {
192
+ "max_length": max_length,
193
+ "temperature": temperature,
194
+ "do_sample": do_sample,
195
+ "top_p": top_p,
196
+ "repetition_penalty": repetition_penalty
197
+ },
198
+ "metadata": {
199
+ "processing_time": processing_time,
200
+ "device": str(self.device),
201
+ "input_tokens": inputs["input_ids"].shape[1],
202
+ "output_tokens": outputs.shape[1]
203
+ }
204
+ }
205
+
206
+ except Exception as e:
207
+ logger.error(f"Error during text generation: {e}")
208
+ return {
209
+ "success": False,
210
+ "error": str(e),
211
+ "prompt": prompt,
212
+ "model_id": model_id,
213
+ "provider": "ISA",
214
+ "service": "isa-llm"
215
+ }
216
+
217
+ @modal.method
218
+ def chat_completion(
219
+ self,
220
+ messages: List[Dict[str, str]],
221
+ model_id: str,
222
+ hf_token: str = None,
223
+ **kwargs
224
+ ) -> Dict[str, Any]:
225
+ """
226
+ Chat completion with conversation history
227
+
228
+ Args:
229
+ messages: List of message dictionaries with 'role' and 'content'
230
+ model_id: HuggingFace model ID
231
+ hf_token: HuggingFace token
232
+ **kwargs: Additional generation parameters
233
+
234
+ Returns:
235
+ Dictionary containing generated response and metadata
236
+ """
237
+ try:
238
+ # Convert messages to a single prompt
239
+ conversation = ""
240
+ for msg in messages:
241
+ role = msg.get("role", "user")
242
+ content = msg.get("content", "")
243
+ if role == "user":
244
+ conversation += f"User: {content}\n"
245
+ elif role == "assistant":
246
+ conversation += f"Assistant: {content}\n"
247
+ elif role == "system":
248
+ conversation += f"System: {content}\n"
249
+
250
+ conversation += "Assistant: "
251
+
252
+ # Generate response
253
+ result = self.generate_text(
254
+ prompt=conversation,
255
+ model_id=model_id,
256
+ hf_token=hf_token,
257
+ **kwargs
258
+ )
259
+
260
+ # Format as chat response
261
+ if result.get("success"):
262
+ result["role"] = "assistant"
263
+ result["conversation"] = conversation
264
+ result["messages"] = messages
265
+
266
+ return result
267
+
268
+ except Exception as e:
269
+ logger.error(f"Error during chat completion: {e}")
270
+ return {
271
+ "success": False,
272
+ "error": str(e),
273
+ "messages": messages,
274
+ "model_id": model_id,
275
+ "provider": "ISA",
276
+ "service": "isa-llm"
277
+ }
278
+
279
+ @modal.method
280
+ def get_model_info(self, model_id: str, hf_token: str = None) -> Dict[str, Any]:
281
+ """Get information about the loaded model"""
282
+ try:
283
+ # Load model if needed
284
+ self._load_model(model_id, hf_token)
285
+
286
+ if self.model is None:
287
+ return {
288
+ "success": False,
289
+ "error": "Model not loaded"
290
+ }
291
+
292
+ # Get model config
293
+ config = self.model.config
294
+
295
+ # Count parameters
296
+ total_params = sum(p.numel() for p in self.model.parameters())
297
+ trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
298
+
299
+ return {
300
+ "success": True,
301
+ "model_id": model_id,
302
+ "provider": "ISA",
303
+ "service": "isa-llm",
304
+ "architecture": config.model_type if hasattr(config, 'model_type') else "unknown",
305
+ "vocab_size": config.vocab_size if hasattr(config, 'vocab_size') else None,
306
+ "hidden_size": config.hidden_size if hasattr(config, 'hidden_size') else None,
307
+ "num_layers": getattr(config, 'num_layers', getattr(config, 'n_layer', None)),
308
+ "num_attention_heads": getattr(config, 'num_attention_heads', getattr(config, 'n_head', None)),
309
+ "total_parameters": total_params,
310
+ "trainable_parameters": trainable_params,
311
+ "device": str(self.device),
312
+ "dtype": str(next(self.model.parameters()).dtype)
313
+ }
314
+
315
+ except Exception as e:
316
+ logger.error(f"Error getting model info: {e}")
317
+ return {
318
+ "success": False,
319
+ "error": str(e)
320
+ }
321
+
322
+ @modal.method
323
+ def health_check(self) -> Dict[str, Any]:
324
+ """Health check endpoint"""
325
+ import torch
326
+
327
+ try:
328
+ gpu_available = torch.cuda.is_available()
329
+ gpu_count = torch.cuda.device_count() if gpu_available else 0
330
+
331
+ return {
332
+ "success": True,
333
+ "status": "healthy",
334
+ "service": "isa-llm",
335
+ "provider": "ISA",
336
+ "device": str(self.device),
337
+ "gpu_available": gpu_available,
338
+ "gpu_count": gpu_count,
339
+ "current_model": self.current_model_id,
340
+ "memory_info": {
341
+ "allocated": torch.cuda.memory_allocated() if gpu_available else 0,
342
+ "cached": torch.cuda.memory_reserved() if gpu_available else 0
343
+ } if gpu_available else None
344
+ }
345
+
346
+ except Exception as e:
347
+ return {
348
+ "success": False,
349
+ "status": "error",
350
+ "error": str(e)
351
+ }
352
+
353
+ # Deployment functions
354
+ @app.function(
355
+ image=image,
356
+ schedule=modal.Cron("0 2 * * *"), # Deploy daily at 2 AM
357
+ timeout=300
358
+ )
359
+ def deploy_service():
360
+ """Deploy the ISA LLM service"""
361
+ print("ISA LLM Service deployed successfully!")
362
+ return {"status": "deployed", "service": "isa-llm"}
363
+
364
+ # Local testing function
365
+ @app.local_entrypoint()
366
+ def test_service():
367
+ """Test the ISA LLM service locally"""
368
+
369
+ # Test with our trained model
370
+ test_model_id = "xenobordom/dialogpt-isa-trained-1755493402"
371
+ test_prompt = "你好"
372
+
373
+ # Get HF token from environment
374
+ hf_token = os.getenv("HF_TOKEN")
375
+ if not hf_token:
376
+ print("❌ HF_TOKEN not found in environment")
377
+ return
378
+
379
+ print(f"🧪 Testing ISA LLM Service with model: {test_model_id}")
380
+
381
+ # Create service instance
382
+ service = ISALLMService()
383
+
384
+ # Test health check
385
+ print("📋 Testing health check...")
386
+ health = service.health_check.remote()
387
+ print(f"Health: {health}")
388
+
389
+ # Test model info
390
+ print("📊 Testing model info...")
391
+ info = service.get_model_info.remote(test_model_id, hf_token)
392
+ print(f"Model info: {info}")
393
+
394
+ # Test text generation
395
+ print("🤖 Testing text generation...")
396
+ result = service.generate_text.remote(
397
+ prompt=test_prompt,
398
+ model_id=test_model_id,
399
+ hf_token=hf_token,
400
+ max_length=30,
401
+ temperature=0.7
402
+ )
403
+ print(f"Generation result: {result}")
404
+
405
+ # Test chat completion
406
+ print("💬 Testing chat completion...")
407
+ messages = [
408
+ {"role": "user", "content": "你好"},
409
+ {"role": "assistant", "content": "你好!很高兴见到你。"},
410
+ {"role": "user", "content": "你能帮我做什么?"}
411
+ ]
412
+ chat_result = service.chat_completion.remote(
413
+ messages=messages,
414
+ model_id=test_model_id,
415
+ hf_token=hf_token,
416
+ max_length=30
417
+ )
418
+ print(f"Chat result: {chat_result}")
419
+
420
+ print("✅ ISA LLM Service test completed!")
421
+
422
+ if __name__ == "__main__":
423
+ # For local development
424
+ test_service()
@@ -0,0 +1 @@
1
+ """Video services for Modal deployment"""