isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
"""
|
2
|
+
Triton Inference Server deployment provider
|
3
|
+
|
4
|
+
Supports bare metal GPU deployment with TensorRT-LLM optimization.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .config import TritonConfig, TritonServiceType, create_llm_triton_config
|
8
|
+
from .provider import TritonProvider
|
9
|
+
|
10
|
+
__all__ = ["TritonConfig", "TritonServiceType", "TritonProvider", "create_llm_triton_config"]
|
@@ -0,0 +1,196 @@
|
|
1
|
+
"""
|
2
|
+
Triton deployment configuration
|
3
|
+
|
4
|
+
Configuration classes for Triton Inference Server deployment with TensorRT-LLM backend.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from dataclasses import dataclass, field
|
8
|
+
from typing import Dict, Any, Optional, List
|
9
|
+
from enum import Enum
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
|
13
|
+
class TritonServiceType(Enum):
|
14
|
+
"""Triton service types"""
|
15
|
+
LLM = "llm"
|
16
|
+
VISION = "vision"
|
17
|
+
EMBEDDING = "embedding"
|
18
|
+
|
19
|
+
|
20
|
+
class TritonBackend(Enum):
|
21
|
+
"""Triton backends"""
|
22
|
+
TENSORRT_LLM = "tensorrtllm"
|
23
|
+
PYTHON = "python"
|
24
|
+
ONNX = "onnxruntime"
|
25
|
+
PYTORCH = "pytorch"
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class TritonConfig:
|
30
|
+
"""Configuration for Triton Inference Server deployment"""
|
31
|
+
|
32
|
+
# Service identification
|
33
|
+
service_name: str
|
34
|
+
service_type: TritonServiceType
|
35
|
+
model_id: str
|
36
|
+
|
37
|
+
# Model configuration
|
38
|
+
model_name: str
|
39
|
+
model_version: str = "1"
|
40
|
+
backend: TritonBackend = TritonBackend.TENSORRT_LLM
|
41
|
+
|
42
|
+
# Model paths
|
43
|
+
model_repository: str = "/models"
|
44
|
+
hf_model_path: str = "/workspace/hf_model"
|
45
|
+
engine_output_path: str = "/workspace/engines"
|
46
|
+
|
47
|
+
# Performance settings
|
48
|
+
max_batch_size: int = 8
|
49
|
+
max_sequence_length: int = 2048
|
50
|
+
instance_group_count: int = 1
|
51
|
+
instance_group_kind: str = "KIND_GPU"
|
52
|
+
|
53
|
+
# TensorRT-LLM specific
|
54
|
+
use_tensorrt: bool = True
|
55
|
+
tensorrt_precision: str = "float16" # float16, int8, int4
|
56
|
+
use_inflight_batching: bool = True
|
57
|
+
enable_streaming: bool = True
|
58
|
+
|
59
|
+
# Container configuration
|
60
|
+
gpu_type: str = "nvidia"
|
61
|
+
gpu_count: int = 1
|
62
|
+
memory_gb: int = 32
|
63
|
+
container_image: str = "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"
|
64
|
+
|
65
|
+
# Network configuration
|
66
|
+
http_port: int = 8000
|
67
|
+
grpc_port: int = 8001
|
68
|
+
metrics_port: int = 8002
|
69
|
+
|
70
|
+
# Build configuration
|
71
|
+
build_container_image: str = "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"
|
72
|
+
build_options: Dict[str, Any] = field(default_factory=lambda: {
|
73
|
+
"gemm_plugin": "float16",
|
74
|
+
"gpt_attention_plugin": "float16",
|
75
|
+
"paged_kv_cache": True,
|
76
|
+
"remove_input_padding": True
|
77
|
+
})
|
78
|
+
|
79
|
+
# Environment variables
|
80
|
+
environment: Dict[str, str] = field(default_factory=dict)
|
81
|
+
|
82
|
+
def to_dict(self) -> Dict[str, Any]:
|
83
|
+
"""Convert to dictionary for serialization"""
|
84
|
+
return {
|
85
|
+
"service_name": self.service_name,
|
86
|
+
"service_type": self.service_type.value,
|
87
|
+
"model_id": self.model_id,
|
88
|
+
"model_name": self.model_name,
|
89
|
+
"model_version": self.model_version,
|
90
|
+
"backend": self.backend.value,
|
91
|
+
"model_repository": self.model_repository,
|
92
|
+
"hf_model_path": self.hf_model_path,
|
93
|
+
"engine_output_path": self.engine_output_path,
|
94
|
+
"max_batch_size": self.max_batch_size,
|
95
|
+
"max_sequence_length": self.max_sequence_length,
|
96
|
+
"instance_group_count": self.instance_group_count,
|
97
|
+
"instance_group_kind": self.instance_group_kind,
|
98
|
+
"use_tensorrt": self.use_tensorrt,
|
99
|
+
"tensorrt_precision": self.tensorrt_precision,
|
100
|
+
"use_inflight_batching": self.use_inflight_batching,
|
101
|
+
"enable_streaming": self.enable_streaming,
|
102
|
+
"gpu_type": self.gpu_type,
|
103
|
+
"gpu_count": self.gpu_count,
|
104
|
+
"memory_gb": self.memory_gb,
|
105
|
+
"container_image": self.container_image,
|
106
|
+
"http_port": self.http_port,
|
107
|
+
"grpc_port": self.grpc_port,
|
108
|
+
"metrics_port": self.metrics_port,
|
109
|
+
"build_container_image": self.build_container_image,
|
110
|
+
"build_options": self.build_options,
|
111
|
+
"environment": self.environment
|
112
|
+
}
|
113
|
+
|
114
|
+
@classmethod
|
115
|
+
def from_dict(cls, data: Dict[str, Any]) -> "TritonConfig":
|
116
|
+
"""Create from dictionary"""
|
117
|
+
return cls(
|
118
|
+
service_name=data["service_name"],
|
119
|
+
service_type=TritonServiceType(data["service_type"]),
|
120
|
+
model_id=data["model_id"],
|
121
|
+
model_name=data["model_name"],
|
122
|
+
model_version=data.get("model_version", "1"),
|
123
|
+
backend=TritonBackend(data.get("backend", "tensorrtllm")),
|
124
|
+
model_repository=data.get("model_repository", "/models"),
|
125
|
+
hf_model_path=data.get("hf_model_path", "/workspace/hf_model"),
|
126
|
+
engine_output_path=data.get("engine_output_path", "/workspace/engines"),
|
127
|
+
max_batch_size=data.get("max_batch_size", 8),
|
128
|
+
max_sequence_length=data.get("max_sequence_length", 2048),
|
129
|
+
instance_group_count=data.get("instance_group_count", 1),
|
130
|
+
instance_group_kind=data.get("instance_group_kind", "KIND_GPU"),
|
131
|
+
use_tensorrt=data.get("use_tensorrt", True),
|
132
|
+
tensorrt_precision=data.get("tensorrt_precision", "float16"),
|
133
|
+
use_inflight_batching=data.get("use_inflight_batching", True),
|
134
|
+
enable_streaming=data.get("enable_streaming", True),
|
135
|
+
gpu_type=data.get("gpu_type", "nvidia"),
|
136
|
+
gpu_count=data.get("gpu_count", 1),
|
137
|
+
memory_gb=data.get("memory_gb", 32),
|
138
|
+
container_image=data.get("container_image", "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"),
|
139
|
+
http_port=data.get("http_port", 8000),
|
140
|
+
grpc_port=data.get("grpc_port", 8001),
|
141
|
+
metrics_port=data.get("metrics_port", 8002),
|
142
|
+
build_container_image=data.get("build_container_image", "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"),
|
143
|
+
build_options=data.get("build_options", {
|
144
|
+
"gemm_plugin": "float16",
|
145
|
+
"gpt_attention_plugin": "float16",
|
146
|
+
"paged_kv_cache": True,
|
147
|
+
"remove_input_padding": True
|
148
|
+
}),
|
149
|
+
environment=data.get("environment", {})
|
150
|
+
)
|
151
|
+
|
152
|
+
|
153
|
+
# Predefined configurations for common use cases
|
154
|
+
def create_llm_triton_config(service_name: str, model_id: str,
|
155
|
+
precision: str = "float16",
|
156
|
+
max_batch_size: int = 8) -> TritonConfig:
|
157
|
+
"""Create configuration for LLM service with TensorRT-LLM"""
|
158
|
+
return TritonConfig(
|
159
|
+
service_name=service_name,
|
160
|
+
service_type=TritonServiceType.LLM,
|
161
|
+
model_id=model_id,
|
162
|
+
model_name=service_name.replace("-", "_"),
|
163
|
+
tensorrt_precision=precision,
|
164
|
+
max_batch_size=max_batch_size,
|
165
|
+
memory_gb=32 if precision == "float16" else 24,
|
166
|
+
use_inflight_batching=True,
|
167
|
+
enable_streaming=True
|
168
|
+
)
|
169
|
+
|
170
|
+
|
171
|
+
def create_vision_triton_config(service_name: str, model_id: str) -> TritonConfig:
|
172
|
+
"""Create configuration for vision service"""
|
173
|
+
return TritonConfig(
|
174
|
+
service_name=service_name,
|
175
|
+
service_type=TritonServiceType.VISION,
|
176
|
+
model_id=model_id,
|
177
|
+
model_name=service_name.replace("-", "_"),
|
178
|
+
backend=TritonBackend.PYTHON,
|
179
|
+
use_tensorrt=False,
|
180
|
+
memory_gb=16,
|
181
|
+
max_batch_size=16
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
def create_embedding_triton_config(service_name: str, model_id: str) -> TritonConfig:
|
186
|
+
"""Create configuration for embedding service"""
|
187
|
+
return TritonConfig(
|
188
|
+
service_name=service_name,
|
189
|
+
service_type=TritonServiceType.EMBEDDING,
|
190
|
+
model_id=model_id,
|
191
|
+
model_name=service_name.replace("-", "_"),
|
192
|
+
backend=TritonBackend.PYTHON,
|
193
|
+
use_tensorrt=False,
|
194
|
+
memory_gb=8,
|
195
|
+
max_batch_size=32
|
196
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Triton configuration templates and utilities"""
|