isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ """
2
+ Triton Inference Server deployment provider
3
+
4
+ Supports bare metal GPU deployment with TensorRT-LLM optimization.
5
+ """
6
+
7
+ from .config import TritonConfig, TritonServiceType, create_llm_triton_config
8
+ from .provider import TritonProvider
9
+
10
+ __all__ = ["TritonConfig", "TritonServiceType", "TritonProvider", "create_llm_triton_config"]
@@ -0,0 +1,196 @@
1
+ """
2
+ Triton deployment configuration
3
+
4
+ Configuration classes for Triton Inference Server deployment with TensorRT-LLM backend.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Dict, Any, Optional, List
9
+ from enum import Enum
10
+ from pathlib import Path
11
+
12
+
13
+ class TritonServiceType(Enum):
14
+ """Triton service types"""
15
+ LLM = "llm"
16
+ VISION = "vision"
17
+ EMBEDDING = "embedding"
18
+
19
+
20
+ class TritonBackend(Enum):
21
+ """Triton backends"""
22
+ TENSORRT_LLM = "tensorrtllm"
23
+ PYTHON = "python"
24
+ ONNX = "onnxruntime"
25
+ PYTORCH = "pytorch"
26
+
27
+
28
+ @dataclass
29
+ class TritonConfig:
30
+ """Configuration for Triton Inference Server deployment"""
31
+
32
+ # Service identification
33
+ service_name: str
34
+ service_type: TritonServiceType
35
+ model_id: str
36
+
37
+ # Model configuration
38
+ model_name: str
39
+ model_version: str = "1"
40
+ backend: TritonBackend = TritonBackend.TENSORRT_LLM
41
+
42
+ # Model paths
43
+ model_repository: str = "/models"
44
+ hf_model_path: str = "/workspace/hf_model"
45
+ engine_output_path: str = "/workspace/engines"
46
+
47
+ # Performance settings
48
+ max_batch_size: int = 8
49
+ max_sequence_length: int = 2048
50
+ instance_group_count: int = 1
51
+ instance_group_kind: str = "KIND_GPU"
52
+
53
+ # TensorRT-LLM specific
54
+ use_tensorrt: bool = True
55
+ tensorrt_precision: str = "float16" # float16, int8, int4
56
+ use_inflight_batching: bool = True
57
+ enable_streaming: bool = True
58
+
59
+ # Container configuration
60
+ gpu_type: str = "nvidia"
61
+ gpu_count: int = 1
62
+ memory_gb: int = 32
63
+ container_image: str = "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"
64
+
65
+ # Network configuration
66
+ http_port: int = 8000
67
+ grpc_port: int = 8001
68
+ metrics_port: int = 8002
69
+
70
+ # Build configuration
71
+ build_container_image: str = "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"
72
+ build_options: Dict[str, Any] = field(default_factory=lambda: {
73
+ "gemm_plugin": "float16",
74
+ "gpt_attention_plugin": "float16",
75
+ "paged_kv_cache": True,
76
+ "remove_input_padding": True
77
+ })
78
+
79
+ # Environment variables
80
+ environment: Dict[str, str] = field(default_factory=dict)
81
+
82
+ def to_dict(self) -> Dict[str, Any]:
83
+ """Convert to dictionary for serialization"""
84
+ return {
85
+ "service_name": self.service_name,
86
+ "service_type": self.service_type.value,
87
+ "model_id": self.model_id,
88
+ "model_name": self.model_name,
89
+ "model_version": self.model_version,
90
+ "backend": self.backend.value,
91
+ "model_repository": self.model_repository,
92
+ "hf_model_path": self.hf_model_path,
93
+ "engine_output_path": self.engine_output_path,
94
+ "max_batch_size": self.max_batch_size,
95
+ "max_sequence_length": self.max_sequence_length,
96
+ "instance_group_count": self.instance_group_count,
97
+ "instance_group_kind": self.instance_group_kind,
98
+ "use_tensorrt": self.use_tensorrt,
99
+ "tensorrt_precision": self.tensorrt_precision,
100
+ "use_inflight_batching": self.use_inflight_batching,
101
+ "enable_streaming": self.enable_streaming,
102
+ "gpu_type": self.gpu_type,
103
+ "gpu_count": self.gpu_count,
104
+ "memory_gb": self.memory_gb,
105
+ "container_image": self.container_image,
106
+ "http_port": self.http_port,
107
+ "grpc_port": self.grpc_port,
108
+ "metrics_port": self.metrics_port,
109
+ "build_container_image": self.build_container_image,
110
+ "build_options": self.build_options,
111
+ "environment": self.environment
112
+ }
113
+
114
+ @classmethod
115
+ def from_dict(cls, data: Dict[str, Any]) -> "TritonConfig":
116
+ """Create from dictionary"""
117
+ return cls(
118
+ service_name=data["service_name"],
119
+ service_type=TritonServiceType(data["service_type"]),
120
+ model_id=data["model_id"],
121
+ model_name=data["model_name"],
122
+ model_version=data.get("model_version", "1"),
123
+ backend=TritonBackend(data.get("backend", "tensorrtllm")),
124
+ model_repository=data.get("model_repository", "/models"),
125
+ hf_model_path=data.get("hf_model_path", "/workspace/hf_model"),
126
+ engine_output_path=data.get("engine_output_path", "/workspace/engines"),
127
+ max_batch_size=data.get("max_batch_size", 8),
128
+ max_sequence_length=data.get("max_sequence_length", 2048),
129
+ instance_group_count=data.get("instance_group_count", 1),
130
+ instance_group_kind=data.get("instance_group_kind", "KIND_GPU"),
131
+ use_tensorrt=data.get("use_tensorrt", True),
132
+ tensorrt_precision=data.get("tensorrt_precision", "float16"),
133
+ use_inflight_batching=data.get("use_inflight_batching", True),
134
+ enable_streaming=data.get("enable_streaming", True),
135
+ gpu_type=data.get("gpu_type", "nvidia"),
136
+ gpu_count=data.get("gpu_count", 1),
137
+ memory_gb=data.get("memory_gb", 32),
138
+ container_image=data.get("container_image", "nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3"),
139
+ http_port=data.get("http_port", 8000),
140
+ grpc_port=data.get("grpc_port", 8001),
141
+ metrics_port=data.get("metrics_port", 8002),
142
+ build_container_image=data.get("build_container_image", "nvcr.io/nvidia/tensorrtllm/tensorrt-llm:latest"),
143
+ build_options=data.get("build_options", {
144
+ "gemm_plugin": "float16",
145
+ "gpt_attention_plugin": "float16",
146
+ "paged_kv_cache": True,
147
+ "remove_input_padding": True
148
+ }),
149
+ environment=data.get("environment", {})
150
+ )
151
+
152
+
153
+ # Predefined configurations for common use cases
154
+ def create_llm_triton_config(service_name: str, model_id: str,
155
+ precision: str = "float16",
156
+ max_batch_size: int = 8) -> TritonConfig:
157
+ """Create configuration for LLM service with TensorRT-LLM"""
158
+ return TritonConfig(
159
+ service_name=service_name,
160
+ service_type=TritonServiceType.LLM,
161
+ model_id=model_id,
162
+ model_name=service_name.replace("-", "_"),
163
+ tensorrt_precision=precision,
164
+ max_batch_size=max_batch_size,
165
+ memory_gb=32 if precision == "float16" else 24,
166
+ use_inflight_batching=True,
167
+ enable_streaming=True
168
+ )
169
+
170
+
171
+ def create_vision_triton_config(service_name: str, model_id: str) -> TritonConfig:
172
+ """Create configuration for vision service"""
173
+ return TritonConfig(
174
+ service_name=service_name,
175
+ service_type=TritonServiceType.VISION,
176
+ model_id=model_id,
177
+ model_name=service_name.replace("-", "_"),
178
+ backend=TritonBackend.PYTHON,
179
+ use_tensorrt=False,
180
+ memory_gb=16,
181
+ max_batch_size=16
182
+ )
183
+
184
+
185
+ def create_embedding_triton_config(service_name: str, model_id: str) -> TritonConfig:
186
+ """Create configuration for embedding service"""
187
+ return TritonConfig(
188
+ service_name=service_name,
189
+ service_type=TritonServiceType.EMBEDDING,
190
+ model_id=model_id,
191
+ model_name=service_name.replace("-", "_"),
192
+ backend=TritonBackend.PYTHON,
193
+ use_tensorrt=False,
194
+ memory_gb=8,
195
+ max_batch_size=32
196
+ )
@@ -0,0 +1 @@
1
+ """Triton configuration templates and utilities"""