kubetorch 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. kubetorch/__init__.py +59 -0
  2. kubetorch/cli.py +1939 -0
  3. kubetorch/cli_utils.py +967 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +269 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +159 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +140 -0
  30. kubetorch/resources/callables/module.py +1315 -0
  31. kubetorch/resources/callables/utils.py +203 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +253 -0
  34. kubetorch/resources/compute/compute.py +2414 -0
  35. kubetorch/resources/compute/decorators.py +137 -0
  36. kubetorch/resources/compute/utils.py +1026 -0
  37. kubetorch/resources/compute/websocket.py +135 -0
  38. kubetorch/resources/images/__init__.py +1 -0
  39. kubetorch/resources/images/image.py +412 -0
  40. kubetorch/resources/images/images.py +64 -0
  41. kubetorch/resources/secrets/__init__.py +2 -0
  42. kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
  43. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  44. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  45. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  46. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  47. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  48. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  49. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  50. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  51. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  52. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  53. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  54. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  55. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  56. kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
  57. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  58. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  59. kubetorch/resources/secrets/secret.py +224 -0
  60. kubetorch/resources/secrets/secret_factory.py +64 -0
  61. kubetorch/resources/secrets/utils.py +222 -0
  62. kubetorch/resources/volumes/__init__.py +0 -0
  63. kubetorch/resources/volumes/volume.py +340 -0
  64. kubetorch/servers/__init__.py +0 -0
  65. kubetorch/servers/http/__init__.py +0 -0
  66. kubetorch/servers/http/distributed_utils.py +2968 -0
  67. kubetorch/servers/http/http_client.py +802 -0
  68. kubetorch/servers/http/http_server.py +1622 -0
  69. kubetorch/servers/http/server_metrics.py +255 -0
  70. kubetorch/servers/http/utils.py +722 -0
  71. kubetorch/serving/__init__.py +0 -0
  72. kubetorch/serving/autoscaling.py +153 -0
  73. kubetorch/serving/base_service_manager.py +344 -0
  74. kubetorch/serving/constants.py +77 -0
  75. kubetorch/serving/deployment_service_manager.py +431 -0
  76. kubetorch/serving/knative_service_manager.py +487 -0
  77. kubetorch/serving/raycluster_service_manager.py +526 -0
  78. kubetorch/serving/service_manager.py +18 -0
  79. kubetorch/serving/templates/deployment_template.yaml +17 -0
  80. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  81. kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
  82. kubetorch/serving/templates/pod_template.yaml +198 -0
  83. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  84. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  85. kubetorch/serving/templates/service_template.yaml +21 -0
  86. kubetorch/serving/templates/workerset_template.yaml +36 -0
  87. kubetorch/serving/utils.py +344 -0
  88. kubetorch/utils.py +263 -0
  89. kubetorch-0.2.5.dist-info/METADATA +75 -0
  90. kubetorch-0.2.5.dist-info/RECORD +92 -0
  91. kubetorch-0.2.5.dist-info/WHEEL +4 -0
  92. kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
File without changes
@@ -0,0 +1,153 @@
1
+ from dataclasses import asdict, dataclass
2
+
3
+ from kubetorch.logger import get_logger
4
+
5
+ logger = get_logger(__name__)
6
+
7
+
8
+ class AutoScalingError(Exception):
9
+ pass
10
+
11
+
12
+ @dataclass
13
+ class AutoscalingConfig:
14
+ # The concurrent requests or requests per second threshold that triggers scaling
15
+ target: int = None
16
+
17
+ # The time window used to calculate the average of metrics for scaling decisions
18
+ window: str = None
19
+
20
+ # The metric type to base scaling decisions on:
21
+ # - concurrency: number of simultaneous requests
22
+ # - rps: requests per second
23
+ # - cpu: CPU utilization (requires HPA class)
24
+ # - memory: Memory utilization (requires HPA class)
25
+ metric: str = None
26
+
27
+ # The percentage of the target value at which to start scaling.
28
+ # E.g., if target=100 and target_utilization=70, scaling occurs at 70 requests
29
+ target_utilization: int = None
30
+
31
+ # Minimum number of replicas. 0 allows scaling to zero when idle
32
+ min_scale: int = None
33
+
34
+ # Maximum number of replicas the service can scale up to
35
+ max_scale: int = None
36
+
37
+ # Initial number of pods launched by the service
38
+ initial_scale: int = None
39
+
40
+ # Maximum concurrent requests per pod (containerConcurrency).
41
+ # If not set, pods accept unlimited concurrent requests.
42
+ concurrency: int = None
43
+
44
+ # Time to keep the last pod before scaling to zero (e.g., "30s", "1m5s")
45
+ scale_to_zero_pod_retention_period: str = None
46
+
47
+ # Delay before scaling down (e.g., "15m"). Only for KPA autoscaler.
48
+ scale_down_delay: str = None
49
+
50
+ # Autoscaler class: "kpa.autoscaling.knative.dev" or "hpa.autoscaling.knative.dev"
51
+ autoscaler_class: str = None
52
+
53
+ # Progress deadline for deployment (e.g., "10m"). Time to wait for deployment to be ready.
54
+ progress_deadline: str = None
55
+
56
+ def __init__(self, **kwargs):
57
+ """Support additional kwargs for autoscaling annotations"""
58
+ for field in self.__annotations__:
59
+ setattr(self, field, kwargs.pop(field, getattr(self, field, None)))
60
+
61
+ # set additional kwargs as annotations
62
+ self.extra_annotations = {f"autoscaling.knative.dev/{k}": str(v) for k, v in kwargs.items()}
63
+
64
+ self._validate()
65
+
66
+ def _validate(self):
67
+ """Validation logic moved to separate method"""
68
+ if self.min_scale is not None and self.max_scale is not None and self.min_scale > self.max_scale:
69
+ raise AutoScalingError("min_scale cannot be greater than max_scale")
70
+ if self.window is not None and not self.window.endswith(("s", "m", "h")):
71
+ raise AutoScalingError("window must end with s, m, or h")
72
+ if self.target_utilization is not None and (self.target_utilization <= 0 or self.target_utilization > 100):
73
+ raise AutoScalingError("target_utilization must be between 1 and 100")
74
+ if self.scale_to_zero_pod_retention_period is not None:
75
+ # Validate time format (e.g., "30s", "1m5s", "2h")
76
+ import re
77
+
78
+ if not re.match(r"^\d+[smh](\d+[smh])*$", self.scale_to_zero_pod_retention_period):
79
+ raise AutoScalingError(
80
+ "scale_to_zero_pod_retention_period must be a valid duration (e.g., '30s', '1m5s')"
81
+ )
82
+ if self.scale_down_delay is not None:
83
+ # Validate time format
84
+ import re
85
+
86
+ if not re.match(r"^\d+[smh](\d+[smh])*$", self.scale_down_delay):
87
+ raise AutoScalingError("scale_down_delay must be a valid duration (e.g., '15m', '1h')")
88
+ if self.autoscaler_class is not None and self.autoscaler_class not in [
89
+ "kpa.autoscaling.knative.dev",
90
+ "hpa.autoscaling.knative.dev",
91
+ ]:
92
+ raise AutoScalingError(
93
+ "autoscaler_class must be 'kpa.autoscaling.knative.dev' or 'hpa.autoscaling.knative.dev'"
94
+ )
95
+ if self.progress_deadline is not None:
96
+ # Validate time format
97
+ import re
98
+
99
+ if not re.match(r"^\d+[smh](\d+[smh])*$", self.progress_deadline):
100
+ raise AutoScalingError("progress_deadline must be a valid duration (e.g., '10m', '600s')")
101
+
102
+ def __post_init__(self):
103
+ """Call the same validation for dataclass initialization"""
104
+ self._validate()
105
+
106
+ def dict(self):
107
+ return asdict(self)
108
+
109
+ def convert_to_annotations(self) -> dict:
110
+ """Convert config to a dictionary of annotations for Knative"""
111
+ annotations = {}
112
+
113
+ # Set autoscaler class if specified, otherwise use default KPA
114
+ if self.autoscaler_class is not None:
115
+ annotations["autoscaling.knative.dev/class"] = self.autoscaler_class
116
+ else:
117
+ annotations["autoscaling.knative.dev/class"] = "kpa.autoscaling.knative.dev"
118
+
119
+ # Only set annotations for values that were explicitly provided
120
+ if self.target is not None:
121
+ annotations["autoscaling.knative.dev/target"] = str(self.target)
122
+
123
+ if self.min_scale is not None:
124
+ annotations["autoscaling.knative.dev/min-scale"] = str(self.min_scale)
125
+
126
+ if self.max_scale is not None:
127
+ annotations["autoscaling.knative.dev/max-scale"] = str(self.max_scale)
128
+
129
+ if self.window is not None:
130
+ annotations["autoscaling.knative.dev/window"] = self.window
131
+
132
+ if self.metric is not None:
133
+ annotations["autoscaling.knative.dev/metric"] = self.metric
134
+
135
+ if self.target_utilization is not None:
136
+ annotations["autoscaling.knative.dev/target-utilization-percentage"] = str(self.target_utilization)
137
+
138
+ if self.initial_scale is not None:
139
+ annotations["autoscaling.knative.dev/initial-scale"] = str(self.initial_scale)
140
+
141
+ if self.scale_to_zero_pod_retention_period is not None:
142
+ annotations[
143
+ "autoscaling.knative.dev/scale-to-zero-pod-retention-period"
144
+ ] = self.scale_to_zero_pod_retention_period
145
+
146
+ if self.scale_down_delay is not None:
147
+ annotations["autoscaling.knative.dev/scale-down-delay"] = self.scale_down_delay
148
+
149
+ # Add any extra annotations from the config
150
+ if hasattr(self, "extra_annotations"):
151
+ annotations.update(self.extra_annotations)
152
+
153
+ return annotations
@@ -0,0 +1,344 @@
1
+ import importlib
2
+
3
+ from abc import abstractmethod
4
+ from typing import Dict, List, Optional
5
+
6
+ import yaml
7
+ from jinja2 import Template
8
+
9
+ from kubernetes import client, utils
10
+ from kubernetes.client import AppsV1Api, CoreV1Api, CustomObjectsApi
11
+
12
+ import kubetorch.serving.constants as serving_constants
13
+ from kubetorch import globals
14
+
15
+ from kubetorch.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class BaseServiceManager:
21
+ """Base service manager with common functionality for all service types."""
22
+
23
+ def __init__(
24
+ self,
25
+ objects_api: CustomObjectsApi,
26
+ core_api: CoreV1Api,
27
+ apps_v1_api: AppsV1Api,
28
+ namespace: str,
29
+ ):
30
+ self.objects_api = objects_api
31
+ self.core_api = core_api
32
+ self.apps_v1_api = apps_v1_api
33
+
34
+ # Load config
35
+ self.global_config = globals.config
36
+ self.namespace = namespace or self.global_config.namespace
37
+
38
+ @property
39
+ def username(self):
40
+ return self.global_config.username
41
+
42
+ @property
43
+ def base_labels(self):
44
+ """Base labels for all resources created by the service manager."""
45
+ from kubetorch import __version__
46
+
47
+ labels = {
48
+ serving_constants.KT_VERSION_LABEL: __version__,
49
+ }
50
+ if self.username:
51
+ labels[serving_constants.KT_USERNAME_LABEL] = self.username
52
+
53
+ return labels
54
+
55
+ def _apply_yaml_template(self, yaml_file, replace_existing=False, **kwargs):
56
+ with importlib.resources.files("kubetorch.serving.templates").joinpath(yaml_file).open("r") as f:
57
+ template = Template(f.read())
58
+
59
+ yaml_content = template.render(**kwargs)
60
+ yaml_objects = list(yaml.safe_load_all(yaml_content))
61
+ k8s_client = client.ApiClient()
62
+
63
+ for obj in yaml_objects:
64
+ logger.info(f"Applying {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
65
+ try:
66
+ if replace_existing:
67
+ # Try to delete existing resource first
68
+ try:
69
+ utils.delete_from_dict(k8s_client, obj)
70
+ except client.exceptions.ApiException as e:
71
+ if e.status != 404: # Ignore if resource doesn't exist
72
+ raise
73
+
74
+ utils.create_from_dict(k8s_client, obj)
75
+ logger.info(f"Successfully applied {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
76
+ except utils.FailToCreateError as e:
77
+ if "already exists" in str(e):
78
+ logger.info(f"Resource already exists: {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
79
+ else:
80
+ raise
81
+
82
+ @abstractmethod
83
+ def get_deployment_timestamp_annotation(self, service_name: str) -> Optional[str]:
84
+ """Get deployment timestamp annotation for this service type."""
85
+ pass
86
+
87
+ @abstractmethod
88
+ def update_deployment_timestamp_annotation(self, service_name: str, new_timestamp: str) -> str:
89
+ """Update deployment timestamp annotation for this service type."""
90
+ pass
91
+
92
+ def fetch_kubetorch_config(self) -> dict:
93
+ """Fetch the kubetorch configmap from the namespace."""
94
+ try:
95
+ kubetorch_config = self.core_api.read_namespaced_config_map(
96
+ name="kubetorch-config", namespace=globals.config.install_namespace
97
+ )
98
+ return kubetorch_config.data
99
+ except client.exceptions.ApiException as e:
100
+ if e.status != 404:
101
+ logger.error(f"Error fetching kubetorch config: {e}")
102
+ return {}
103
+
104
+ @staticmethod
105
+ def discover_services_static(
106
+ namespace: str, objects_api=None, apps_v1_api=None, name_filter: str = None
107
+ ) -> List[Dict]:
108
+ """Static method to discover Kubetorch services without ServiceManager instance.
109
+
110
+ Uses parallel API calls for faster discovery across service types.
111
+
112
+ Args:
113
+ namespace: Kubernetes namespace
114
+ objects_api: Optional CustomObjectsApi instance (created if None)
115
+ apps_v1_api: Optional AppsV1Api instance (created if None)
116
+ name_filter: Optional name filter for services
117
+
118
+ Returns:
119
+ List of service dictionaries with structure:
120
+ {
121
+ 'name': str,
122
+ 'template_type': str, # 'ksvc', 'deployment', 'raycluster'
123
+ 'resource': dict,
124
+ 'namespace': str,
125
+ 'creation_timestamp': str # ISO format
126
+ }
127
+ """
128
+ import concurrent.futures
129
+ import threading
130
+
131
+ if objects_api is None:
132
+ objects_api = client.CustomObjectsApi()
133
+ if apps_v1_api is None:
134
+ apps_v1_api = client.AppsV1Api()
135
+
136
+ services = []
137
+ services_lock = threading.Lock()
138
+
139
+ def fetch_knative_services():
140
+ """Fetch Knative services in parallel."""
141
+ try:
142
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=ksvc"
143
+ knative_services = objects_api.list_namespaced_custom_object(
144
+ group="serving.knative.dev",
145
+ version="v1",
146
+ namespace=namespace,
147
+ plural="services",
148
+ label_selector=label_selector,
149
+ )["items"]
150
+
151
+ local_services = []
152
+ for svc in knative_services:
153
+ svc_name = svc["metadata"]["name"]
154
+ if name_filter and name_filter not in svc_name:
155
+ continue
156
+
157
+ local_services.append(
158
+ {
159
+ "name": svc_name,
160
+ "template_type": "ksvc",
161
+ "resource": svc, # Already a dict
162
+ "namespace": namespace,
163
+ "creation_timestamp": svc["metadata"]["creationTimestamp"],
164
+ }
165
+ )
166
+
167
+ with services_lock:
168
+ services.extend(local_services)
169
+
170
+ except client.exceptions.ApiException as e:
171
+ if e.status != 404: # Ignore if Knative is not installed
172
+ logger.warning(f"Failed to list Knative services: {e}")
173
+
174
+ def fetch_deployments():
175
+ """Fetch Deployments in parallel."""
176
+ try:
177
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=deployment"
178
+ deployments = apps_v1_api.list_namespaced_deployment(
179
+ namespace=namespace,
180
+ label_selector=label_selector,
181
+ )
182
+
183
+ local_services = []
184
+ for deployment in deployments.items:
185
+ deploy_name = deployment.metadata.name
186
+ if name_filter and name_filter not in deploy_name:
187
+ continue
188
+
189
+ # Convert V1Deployment object to dictionary for consistency
190
+ deployment_dict = client.ApiClient().sanitize_for_serialization(deployment)
191
+
192
+ # Add kind and apiVersion (not included in V1Deployment object)
193
+ deployment_dict["kind"] = "Deployment"
194
+ deployment_dict["apiVersion"] = "apps/v1"
195
+
196
+ local_services.append(
197
+ {
198
+ "name": deploy_name,
199
+ "template_type": "deployment",
200
+ "resource": deployment_dict, # Now consistently a dict
201
+ "namespace": namespace,
202
+ "creation_timestamp": deployment.metadata.creation_timestamp.isoformat() + "Z",
203
+ }
204
+ )
205
+
206
+ with services_lock:
207
+ services.extend(local_services)
208
+
209
+ except client.exceptions.ApiException as e:
210
+ logger.warning(f"Failed to list Deployments: {e}")
211
+
212
+ def fetch_rayclusters():
213
+ """Fetch RayClusters in parallel."""
214
+ try:
215
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=raycluster"
216
+ rayclusters = objects_api.list_namespaced_custom_object(
217
+ group="ray.io",
218
+ version="v1",
219
+ namespace=namespace,
220
+ plural="rayclusters",
221
+ label_selector=label_selector,
222
+ )["items"]
223
+
224
+ local_services = []
225
+ for cluster in rayclusters:
226
+ cluster_name = cluster["metadata"]["name"]
227
+ if name_filter and name_filter not in cluster_name:
228
+ continue
229
+
230
+ local_services.append(
231
+ {
232
+ "name": cluster_name,
233
+ "template_type": "raycluster",
234
+ "resource": cluster, # Already a dict
235
+ "namespace": namespace,
236
+ "creation_timestamp": cluster["metadata"]["creationTimestamp"],
237
+ }
238
+ )
239
+
240
+ with services_lock:
241
+ services.extend(local_services)
242
+
243
+ except client.exceptions.ApiException as e:
244
+ if e.status != 404:
245
+ logger.warning(f"Failed to list RayClusters: {e}")
246
+
247
+ # Execute all API calls in parallel
248
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
249
+ futures = [
250
+ executor.submit(fetch_knative_services),
251
+ executor.submit(fetch_deployments),
252
+ executor.submit(fetch_rayclusters),
253
+ ]
254
+
255
+ # Wait for all to complete
256
+ concurrent.futures.wait(futures)
257
+
258
+ return services
259
+
260
+ @staticmethod
261
+ def get_pods_for_service_static(
262
+ service_name: str,
263
+ namespace: str,
264
+ core_api=None,
265
+ ) -> List:
266
+ """Static method to get pods for a service across different service types.
267
+
268
+ Args:
269
+ service_name: Name of the service
270
+ namespace: Kubernetes namespace
271
+ core_api: Optional CoreV1Api instance (created if None)
272
+
273
+ Returns:
274
+ List of pod objects
275
+ """
276
+ if core_api is None:
277
+ core_api = client.CoreV1Api()
278
+
279
+ # Build label selector
280
+ label_selector = f"{serving_constants.KT_SERVICE_LABEL}={service_name}"
281
+ try:
282
+ pods = core_api.list_namespaced_pod(namespace=namespace, label_selector=label_selector)
283
+ return pods.items
284
+ except client.exceptions.ApiException as e:
285
+ logger.warning(f"Failed to list pods for service {service_name}: {e}")
286
+ return []
287
+
288
+ def discover_all_services(self, namespace: str = None) -> List[Dict]:
289
+ """Discover all Kubetorch services across different resource types.
290
+
291
+ Returns a list of service dictionaries with normalized structure:
292
+ {
293
+ 'name': str,
294
+ 'template_type': str, # 'ksvc', 'deployment', 'raycluster'
295
+ 'resource': object, # The actual Kubernetes resource object
296
+ 'namespace': str
297
+ }
298
+ """
299
+ return self.discover_services_static(
300
+ namespace=namespace or self.namespace,
301
+ objects_api=self.objects_api,
302
+ apps_v1_api=self.apps_v1_api,
303
+ )
304
+
305
+ # Abstract methods to be implemented by subclasses
306
+ def create_or_update_service(self, *args, **kwargs):
307
+ raise NotImplementedError("Subclasses must implement create_or_update_service")
308
+
309
+ def get_endpoint(self, service_name: str) -> str:
310
+ raise NotImplementedError("Subclasses must implement get_endpoint")
311
+
312
+ def get_pods_for_service(self, service_name: str, **kwargs) -> List[client.V1Pod]:
313
+ raise NotImplementedError("Subclasses must implement get_pods_for_service")
314
+
315
+ def check_service_ready(self, service_name: str, launch_timeout: int, **kwargs) -> bool:
316
+ """Check if service is ready to serve requests.
317
+
318
+ This method should be implemented by subclasses to provide service-type-specific
319
+ readiness checking logic.
320
+
321
+ Args:
322
+ service_name: Name of the service to check
323
+ launch_timeout: Timeout in seconds to wait for service to be ready
324
+ **kwargs: Additional arguments for readiness checking
325
+
326
+ Returns:
327
+ True if service is ready, raises exception if timeout or error
328
+ """
329
+ raise NotImplementedError("Subclasses must implement check_service_ready")
330
+
331
+ def teardown_service(self, service_name: str, console=None) -> bool:
332
+ """Teardown/delete service and associated resources.
333
+
334
+ This method should be implemented by subclasses to provide service-type-specific
335
+ teardown logic.
336
+
337
+ Args:
338
+ service_name: Name of the service to teardown
339
+ console: Optional Rich console for output
340
+
341
+ Returns:
342
+ True if teardown was successful, False otherwise
343
+ """
344
+ raise NotImplementedError("Subclasses must implement teardown_service")
@@ -0,0 +1,77 @@
1
+ # K8s Configuration
2
+ KUBECTL_PORT = 6443
3
+ KT_LAUNCH_TIMEOUT = 900 # 15 minutes
4
+
5
+ # Ports
6
+ DEFAULT_NGINX_PORT = 8080
7
+ LOCAL_NGINX_PORT = 38080
8
+ DEFAULT_KT_SERVER_PORT = 32300 # Standard port of Knative services
9
+ DEFAULT_DEBUG_PORT = 5678
10
+
11
+ # Namespaces
12
+ KUBETORCH_NAMESPACE = "kubetorch"
13
+ RUNHOUSE_NAMESPACE = "runhouse"
14
+ DEFAULT_NAMESPACE = "default"
15
+
16
+ # Images
17
+ SERVER_IMAGE_MINIMAL = "ghcr.io/run-house/server:v3"
18
+ SERVER_IMAGE_WITH_OTEL = "ghcr.io/run-house/server-otel:v3"
19
+
20
+ UBUNTU_IMAGE_MINIMAL = "ghcr.io/run-house/ubuntu:v1"
21
+ UBUNTU_IMAGE_WITH_OTEL = "ghcr.io/run-house/ubuntu:v1"
22
+
23
+ DEFAULT_PROXY_IMAGE = "ghcr.io/run-house/proxy:v2"
24
+ KUBETORCH_IMAGE_TRAPDOOR = "kubetorch"
25
+
26
+ # Service Accounts
27
+ DEFAULT_SERVICE_ACCOUNT_NAME = "kubetorch-service-account"
28
+
29
+ # Annotations
30
+ INACTIVITY_TTL_ANNOTATION = "kubetorch.com/inactivity-ttl"
31
+ KUBECONFIG_PATH_ANNOTATION = "kubetorch.com/kubeconfig-path"
32
+
33
+ # Labels
34
+ KT_SERVICE_LABEL = "kubetorch.com/service"
35
+ KT_VERSION_LABEL = "kubetorch.com/version"
36
+ KT_MODULE_LABEL = "kubetorch.com/module"
37
+ KT_USER_IDENTIFIER_LABEL = "kubetorch.com/user-identifier"
38
+ KT_USERNAME_LABEL = "kubetorch.com/username"
39
+ KT_POD_TYPE_LABEL = "kubetorch.com/pod-type"
40
+ KT_TEMPLATE_LABEL = "kubetorch.com/template"
41
+ KT_SECRET_NAME_LABEL = "kubetorch.com/secret-name"
42
+
43
+ # Templates
44
+ TTL_CONTROLLER_CONFIGMAP_NAME = "kubetorch-ttl-controller-config"
45
+ KNATIVE_SERVICE_TEMPLATE_FILE = "knative_service_template.yaml"
46
+ POD_TEMPLATE_FILE = "pod_template.yaml"
47
+ KT_SETUP_TEMPLATE_FILE = "kt_setup_template.sh.j2"
48
+ DEPLOYMENT_TEMPLATE_FILE = "deployment_template.yaml"
49
+ DEPLOYMENT_SERVICE_TEMPLATE_FILE = "service_template.yaml"
50
+ RAYCLUSTER_TEMPLATE_FILE = "raycluster_template.yaml"
51
+ RAYCLUSTER_SERVICE_TEMPLATE_FILE = "raycluster_service_template.yaml"
52
+
53
+ # Loki
54
+ LOKI_GATEWAY_SERVICE_NAME = "loki-gateway"
55
+
56
+ # Prometheus
57
+ PROMETHEUS_SERVICE_NAME = "kubetorch-metrics"
58
+
59
+ # Grafana
60
+ GRAFANA_HEALTH_ENDPOINT = "/api/health"
61
+ PROMETHEUS_HEALTH_ENDPOINT = "/metrics"
62
+
63
+ # KAI
64
+ KAI_SCHEDULER_NAME = "kai-scheduler"
65
+ KAI_SCHEDULER_LABEL = "kai.scheduler/queue"
66
+
67
+ # HTTP Client
68
+ KT_TERMINATION_REASONS = ["OOMKilled", "Evicted", "Not Found"]
69
+
70
+ # NGINX
71
+ NGINX_GATEWAY_PROXY = "kubetorch-proxy"
72
+ DEFAULT_NGINX_HEALTH_ENDPOINT = "/health"
73
+
74
+ # Rsync
75
+ RSYNC_LOCAL_PORT = 3873
76
+ REMOTE_RSYNC_PORT = 873
77
+ RSYNC_SERVICE_NAME = "kubetorch-rsync"