isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
tenant-a-service LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("tenant-a-service")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Tenant_A_ServiceService:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Tenant_A_ServiceService()
|
48
|
+
return service.generate(**item)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
prefix-test-service LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("prefix-test-service")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Prefix_Test_ServiceService:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Prefix_Test_ServiceService()
|
48
|
+
return service.generate(**item)
|
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-llm-service LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-llm-service")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"torch>=2.0.0", "httpx>=0.26.0", "transformers>=4.35.0", "requests>=2.31.0", "pydantic>=2.0.0", "numpy>=1.24.0", "accelerate>=0.24.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Llm_ServiceService:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Llm_ServiceService()
|
48
|
+
return service.generate(**item)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-monitoring-gpt2 LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-monitoring-gpt2")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"numpy>=1.24.0", "requests>=2.31.0", "accelerate>=0.24.0", "httpx>=0.26.0", "pydantic>=2.0.0", "transformers>=4.35.0", "torch>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Monitoring_Gpt2Service:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Monitoring_Gpt2Service()
|
48
|
+
return service.generate(**item)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-monitoring-gpt2 LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-monitoring-gpt2")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"transformers>=4.35.0", "torch>=2.0.0", "accelerate>=0.24.0", "httpx>=0.26.0", "numpy>=1.24.0", "requests>=2.31.0", "pydantic>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Monitoring_Gpt2Service:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Monitoring_Gpt2Service()
|
48
|
+
return service.generate(**item)
|