isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ """
2
+ Triton Inference Server deployment provider
3
+
4
+ Supports bare metal GPU deployment with TensorRT-LLM optimization.
5
+ """
6
+
7
+ from .config import TritonConfig, TritonServiceType, create_llm_triton_config
8
+ from .provider import TritonProvider
9
+
10
+ __all__ = ["TritonConfig", "TritonServiceType", "TritonProvider", "create_llm_triton_config"]
@@ -0,0 +1,196 @@
1
+ """
2
+ Triton deployment configuration
3
+
4
+ Configuration classes for Triton Inference Server deployment with TensorRT-LLM backend.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Dict, Any, Optional, List
9
+ from enum import Enum
10
+ from pathlib import Path
11
+
12
+
13
+ class TritonServiceType(Enum):
14
+ """Triton service types"""
15
+ LLM = "llm"
16
+ VISION = "vision"
17
+ EMBEDDING = "embedding"
18
+
19
+
20
+ class TritonBackend(Enum):
21
+ """Triton backends"""
22
+ TENSORRT_LLM = "tensorrtllm"
23
+ PYTHON = "python"
24
+ ONNX = "onnxruntime"
25
+ PYTORCH = "pytorch"
26
+
27
+
28
+ @dataclass
29
+ class TritonConfig:
30
+ """Configuration for Triton Inference Server deployment"""
31
+
32
+ # Service identification
33
+ service_name: str
34
+ service_type: TritonServiceType
35
+ model_id: str
36
+
37
+ # Model configuration
38
+ model_name: str
39
+ model_version: str = "1"
40
+ backend: TritonBackend = TritonBackend.TENSORRT_LLM
41
+
42
+ # Model paths
43
+ model_repository: str = "/models"
44
+ hf_model_path: str = "/workspace/hf_model"
45
+ engine_output_path: str = "/workspace/engines"
46
+
47
+ # Performance settings
48
+ max_batch_size: int = 8
49
+ max_sequence_length: int = 2048
50
+ instance_group_count: int = 1
51
+ instance_group_kind: str = "KIND_GPU"
52
+
53
+ # TensorRT-LLM specific
54
+ use_tensorrt: bool = True
55
+ tensorrt_precision: str = "float16" # float16, int8, int4
56
+ use_inflight_batching: bool = True
57
+ enable_streaming: bool = True
58
+
59
+ # Container configuration
60
+ gpu_type: str = "nvidia"
61
+ gpu_count: int = 1
62
+ memory_gb: int = 32
63
+ container_image: str = "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"
64
+
65
+ # Network configuration
66
+ http_port: int = 8000
67
+ grpc_port: int = 8001
68
+ metrics_port: int = 8002
69
+
70
+ # Build configuration
71
+ build_container_image: str = "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"
72
+ build_options: Dict[str, Any] = field(default_factory=lambda: {
73
+ "gemm_plugin": "float16",
74
+ "gpt_attention_plugin": "float16",
75
+ "paged_kv_cache": True,
76
+ "remove_input_padding": True
77
+ })
78
+
79
+ # Environment variables
80
+ environment: Dict[str, str] = field(default_factory=dict)
81
+
82
+ def to_dict(self) -> Dict[str, Any]:
83
+ """Convert to dictionary for serialization"""
84
+ return {
85
+ "service_name": self.service_name,
86
+ "service_type": self.service_type.value,
87
+ "model_id": self.model_id,
88
+ "model_name": self.model_name,
89
+ "model_version": self.model_version,
90
+ "backend": self.backend.value,
91
+ "model_repository": self.model_repository,
92
+ "hf_model_path": self.hf_model_path,
93
+ "engine_output_path": self.engine_output_path,
94
+ "max_batch_size": self.max_batch_size,
95
+ "max_sequence_length": self.max_sequence_length,
96
+ "instance_group_count": self.instance_group_count,
97
+ "instance_group_kind": self.instance_group_kind,
98
+ "use_tensorrt": self.use_tensorrt,
99
+ "tensorrt_precision": self.tensorrt_precision,
100
+ "use_inflight_batching": self.use_inflight_batching,
101
+ "enable_streaming": self.enable_streaming,
102
+ "gpu_type": self.gpu_type,
103
+ "gpu_count": self.gpu_count,
104
+ "memory_gb": self.memory_gb,
105
+ "container_image": self.container_image,
106
+ "http_port": self.http_port,
107
+ "grpc_port": self.grpc_port,
108
+ "metrics_port": self.metrics_port,
109
+ "build_container_image": self.build_container_image,
110
+ "build_options": self.build_options,
111
+ "environment": self.environment
112
+ }
113
+
114
+ @classmethod
115
+ def from_dict(cls, data: Dict[str, Any]) -> "TritonConfig":
116
+ """Create from dictionary"""
117
+ return cls(
118
+ service_name=data["service_name"],
119
+ service_type=TritonServiceType(data["service_type"]),
120
+ model_id=data["model_id"],
121
+ model_name=data["model_name"],
122
+ model_version=data.get("model_version", "1"),
123
+ backend=TritonBackend(data.get("backend", "tensorrtllm")),
124
+ model_repository=data.get("model_repository", "/models"),
125
+ hf_model_path=data.get("hf_model_path", "/workspace/hf_model"),
126
+ engine_output_path=data.get("engine_output_path", "/workspace/engines"),
127
+ max_batch_size=data.get("max_batch_size", 8),
128
+ max_sequence_length=data.get("max_sequence_length", 2048),
129
+ instance_group_count=data.get("instance_group_count", 1),
130
+ instance_group_kind=data.get("instance_group_kind", "KIND_GPU"),
131
+ use_tensorrt=data.get("use_tensorrt", True),
132
+ tensorrt_precision=data.get("tensorrt_precision", "float16"),
133
+ use_inflight_batching=data.get("use_inflight_batching", True),
134
+ enable_streaming=data.get("enable_streaming", True),
135
+ gpu_type=data.get("gpu_type", "nvidia"),
136
+ gpu_count=data.get("gpu_count", 1),
137
+ memory_gb=data.get("memory_gb", 32),
138
+ container_image=data.get("container_image", "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"),
139
+ http_port=data.get("http_port", 8000),
140
+ grpc_port=data.get("grpc_port", 8001),
141
+ metrics_port=data.get("metrics_port", 8002),
142
+ build_container_image=data.get("build_container_image", "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"),
143
+ build_options=data.get("build_options", {
144
+ "gemm_plugin": "float16",
145
+ "gpt_attention_plugin": "float16",
146
+ "paged_kv_cache": True,
147
+ "remove_input_padding": True
148
+ }),
149
+ environment=data.get("environment", {})
150
+ )
151
+
152
+
153
+ # Predefined configurations for common use cases
154
+ def create_llm_triton_config(service_name: str, model_id: str,
155
+ precision: str = "float16",
156
+ max_batch_size: int = 8) -> TritonConfig:
157
+ """Create configuration for LLM service with TensorRT-LLM"""
158
+ return TritonConfig(
159
+ service_name=service_name,
160
+ service_type=TritonServiceType.LLM,
161
+ model_id=model_id,
162
+ model_name=service_name.replace("-", "_"),
163
+ tensorrt_precision=precision,
164
+ max_batch_size=max_batch_size,
165
+ memory_gb=32 if precision == "float16" else 24,
166
+ use_inflight_batching=True,
167
+ enable_streaming=True
168
+ )
169
+
170
+
171
+ def create_vision_triton_config(service_name: str, model_id: str) -> TritonConfig:
172
+ """Create configuration for vision service"""
173
+ return TritonConfig(
174
+ service_name=service_name,
175
+ service_type=TritonServiceType.VISION,
176
+ model_id=model_id,
177
+ model_name=service_name.replace("-", "_"),
178
+ backend=TritonBackend.PYTHON,
179
+ use_tensorrt=False,
180
+ memory_gb=16,
181
+ max_batch_size=16
182
+ )
183
+
184
+
185
+ def create_embedding_triton_config(service_name: str, model_id: str) -> TritonConfig:
186
+ """Create configuration for embedding service"""
187
+ return TritonConfig(
188
+ service_name=service_name,
189
+ service_type=TritonServiceType.EMBEDDING,
190
+ model_id=model_id,
191
+ model_name=service_name.replace("-", "_"),
192
+ backend=TritonBackend.PYTHON,
193
+ use_tensorrt=False,
194
+ memory_gb=8,
195
+ max_batch_size=32
196
+ )
@@ -0,0 +1 @@
1
+ """Triton configuration templates and utilities"""