kubetorch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubetorch might be problematic. Click here for more details.

Files changed (93) hide show
  1. kubetorch/__init__.py +60 -0
  2. kubetorch/cli.py +1985 -0
  3. kubetorch/cli_utils.py +1025 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +285 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +157 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +133 -0
  30. kubetorch/resources/callables/module.py +1416 -0
  31. kubetorch/resources/callables/utils.py +174 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +261 -0
  34. kubetorch/resources/compute/compute.py +2596 -0
  35. kubetorch/resources/compute/decorators.py +139 -0
  36. kubetorch/resources/compute/rbac.py +74 -0
  37. kubetorch/resources/compute/utils.py +1114 -0
  38. kubetorch/resources/compute/websocket.py +137 -0
  39. kubetorch/resources/images/__init__.py +1 -0
  40. kubetorch/resources/images/image.py +414 -0
  41. kubetorch/resources/images/images.py +74 -0
  42. kubetorch/resources/secrets/__init__.py +2 -0
  43. kubetorch/resources/secrets/kubernetes_secrets_client.py +412 -0
  44. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  45. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  46. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  47. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  48. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  49. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  50. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  51. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  52. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  53. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  54. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  55. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  56. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  57. kubetorch/resources/secrets/provider_secrets/providers.py +93 -0
  58. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  59. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  60. kubetorch/resources/secrets/secret.py +238 -0
  61. kubetorch/resources/secrets/secret_factory.py +70 -0
  62. kubetorch/resources/secrets/utils.py +209 -0
  63. kubetorch/resources/volumes/__init__.py +0 -0
  64. kubetorch/resources/volumes/volume.py +365 -0
  65. kubetorch/servers/__init__.py +0 -0
  66. kubetorch/servers/http/__init__.py +0 -0
  67. kubetorch/servers/http/distributed_utils.py +3223 -0
  68. kubetorch/servers/http/http_client.py +730 -0
  69. kubetorch/servers/http/http_server.py +1788 -0
  70. kubetorch/servers/http/server_metrics.py +278 -0
  71. kubetorch/servers/http/utils.py +728 -0
  72. kubetorch/serving/__init__.py +0 -0
  73. kubetorch/serving/autoscaling.py +173 -0
  74. kubetorch/serving/base_service_manager.py +363 -0
  75. kubetorch/serving/constants.py +83 -0
  76. kubetorch/serving/deployment_service_manager.py +478 -0
  77. kubetorch/serving/knative_service_manager.py +519 -0
  78. kubetorch/serving/raycluster_service_manager.py +582 -0
  79. kubetorch/serving/service_manager.py +18 -0
  80. kubetorch/serving/templates/deployment_template.yaml +17 -0
  81. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  82. kubetorch/serving/templates/kt_setup_template.sh.j2 +81 -0
  83. kubetorch/serving/templates/pod_template.yaml +194 -0
  84. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  85. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  86. kubetorch/serving/templates/service_template.yaml +21 -0
  87. kubetorch/serving/templates/workerset_template.yaml +36 -0
  88. kubetorch/serving/utils.py +377 -0
  89. kubetorch/utils.py +284 -0
  90. kubetorch-0.2.0.dist-info/METADATA +121 -0
  91. kubetorch-0.2.0.dist-info/RECORD +93 -0
  92. kubetorch-0.2.0.dist-info/WHEEL +4 -0
  93. kubetorch-0.2.0.dist-info/entry_points.txt +5 -0
File without changes
@@ -0,0 +1,173 @@
1
+ from dataclasses import asdict, dataclass
2
+
3
+ from kubetorch.logger import get_logger
4
+
5
+ logger = get_logger(__name__)
6
+
7
+
8
+ class AutoScalingError(Exception):
9
+ pass
10
+
11
+
12
+ @dataclass
13
+ class AutoscalingConfig:
14
+ # The concurrent requests or requests per second threshold that triggers scaling
15
+ target: int = None
16
+
17
+ # The time window used to calculate the average of metrics for scaling decisions
18
+ window: str = None
19
+
20
+ # The metric type to base scaling decisions on:
21
+ # - concurrency: number of simultaneous requests
22
+ # - rps: requests per second
23
+ # - cpu: CPU utilization (requires HPA class)
24
+ # - memory: Memory utilization (requires HPA class)
25
+ metric: str = None
26
+
27
+ # The percentage of the target value at which to start scaling.
28
+ # E.g., if target=100 and target_utilization=70, scaling occurs at 70 requests
29
+ target_utilization: int = None
30
+
31
+ # Minimum number of replicas. 0 allows scaling to zero when idle
32
+ min_scale: int = None
33
+
34
+ # Maximum number of replicas the service can scale up to
35
+ max_scale: int = None
36
+
37
+ # Initial number of pods launched by the service
38
+ initial_scale: int = None
39
+
40
+ # Maximum concurrent requests per pod (containerConcurrency).
41
+ # If not set, pods accept unlimited concurrent requests.
42
+ concurrency: int = None
43
+
44
+ # Time to keep the last pod before scaling to zero (e.g., "30s", "1m5s")
45
+ scale_to_zero_pod_retention_period: str = None
46
+
47
+ # Delay before scaling down (e.g., "15m"). Only for KPA autoscaler.
48
+ scale_down_delay: str = None
49
+
50
+ # Autoscaler class: "kpa.autoscaling.knative.dev" or "hpa.autoscaling.knative.dev"
51
+ autoscaler_class: str = None
52
+
53
+ # Progress deadline for deployment (e.g., "10m"). Time to wait for deployment to be ready.
54
+ progress_deadline: str = None
55
+
56
+ def __init__(self, **kwargs):
57
+ """Support additional kwargs for autoscaling annotations"""
58
+ for field in self.__annotations__:
59
+ setattr(self, field, kwargs.pop(field, getattr(self, field, None)))
60
+
61
+ # set additional kwargs as annotations
62
+ self.extra_annotations = {
63
+ f"autoscaling.knative.dev/{k}": str(v) for k, v in kwargs.items()
64
+ }
65
+
66
+ self._validate()
67
+
68
+ def _validate(self):
69
+ """Validation logic moved to separate method"""
70
+ if (
71
+ self.min_scale is not None
72
+ and self.max_scale is not None
73
+ and self.min_scale > self.max_scale
74
+ ):
75
+ raise AutoScalingError("min_scale cannot be greater than max_scale")
76
+ if self.window is not None and not self.window.endswith(("s", "m", "h")):
77
+ raise AutoScalingError("window must end with s, m, or h")
78
+ if self.target_utilization is not None and (
79
+ self.target_utilization <= 0 or self.target_utilization > 100
80
+ ):
81
+ raise AutoScalingError("target_utilization must be between 1 and 100")
82
+ if self.scale_to_zero_pod_retention_period is not None:
83
+ # Validate time format (e.g., "30s", "1m5s", "2h")
84
+ import re
85
+
86
+ if not re.match(
87
+ r"^\d+[smh](\d+[smh])*$", self.scale_to_zero_pod_retention_period
88
+ ):
89
+ raise AutoScalingError(
90
+ "scale_to_zero_pod_retention_period must be a valid duration (e.g., '30s', '1m5s')"
91
+ )
92
+ if self.scale_down_delay is not None:
93
+ # Validate time format
94
+ import re
95
+
96
+ if not re.match(r"^\d+[smh](\d+[smh])*$", self.scale_down_delay):
97
+ raise AutoScalingError(
98
+ "scale_down_delay must be a valid duration (e.g., '15m', '1h')"
99
+ )
100
+ if self.autoscaler_class is not None and self.autoscaler_class not in [
101
+ "kpa.autoscaling.knative.dev",
102
+ "hpa.autoscaling.knative.dev",
103
+ ]:
104
+ raise AutoScalingError(
105
+ "autoscaler_class must be 'kpa.autoscaling.knative.dev' or 'hpa.autoscaling.knative.dev'"
106
+ )
107
+ if self.progress_deadline is not None:
108
+ # Validate time format
109
+ import re
110
+
111
+ if not re.match(r"^\d+[smh](\d+[smh])*$", self.progress_deadline):
112
+ raise AutoScalingError(
113
+ "progress_deadline must be a valid duration (e.g., '10m', '600s')"
114
+ )
115
+
116
+ def __post_init__(self):
117
+ """Call the same validation for dataclass initialization"""
118
+ self._validate()
119
+
120
+ def dict(self):
121
+ return asdict(self)
122
+
123
+ def convert_to_annotations(self) -> dict:
124
+ """Convert config to a dictionary of annotations for Knative"""
125
+ annotations = {}
126
+
127
+ # Set autoscaler class if specified, otherwise use default KPA
128
+ if self.autoscaler_class is not None:
129
+ annotations["autoscaling.knative.dev/class"] = self.autoscaler_class
130
+ else:
131
+ annotations["autoscaling.knative.dev/class"] = "kpa.autoscaling.knative.dev"
132
+
133
+ # Only set annotations for values that were explicitly provided
134
+ if self.target is not None:
135
+ annotations["autoscaling.knative.dev/target"] = str(self.target)
136
+
137
+ if self.min_scale is not None:
138
+ annotations["autoscaling.knative.dev/min-scale"] = str(self.min_scale)
139
+
140
+ if self.max_scale is not None:
141
+ annotations["autoscaling.knative.dev/max-scale"] = str(self.max_scale)
142
+
143
+ if self.window is not None:
144
+ annotations["autoscaling.knative.dev/window"] = self.window
145
+
146
+ if self.metric is not None:
147
+ annotations["autoscaling.knative.dev/metric"] = self.metric
148
+
149
+ if self.target_utilization is not None:
150
+ annotations["autoscaling.knative.dev/target-utilization-percentage"] = str(
151
+ self.target_utilization
152
+ )
153
+
154
+ if self.initial_scale is not None:
155
+ annotations["autoscaling.knative.dev/initial-scale"] = str(
156
+ self.initial_scale
157
+ )
158
+
159
+ if self.scale_to_zero_pod_retention_period is not None:
160
+ annotations[
161
+ "autoscaling.knative.dev/scale-to-zero-pod-retention-period"
162
+ ] = self.scale_to_zero_pod_retention_period
163
+
164
+ if self.scale_down_delay is not None:
165
+ annotations[
166
+ "autoscaling.knative.dev/scale-down-delay"
167
+ ] = self.scale_down_delay
168
+
169
+ # Add any extra annotations from the config
170
+ if hasattr(self, "extra_annotations"):
171
+ annotations.update(self.extra_annotations)
172
+
173
+ return annotations
@@ -0,0 +1,363 @@
1
+ import importlib
2
+
3
+ from abc import abstractmethod
4
+ from typing import Dict, List, Optional
5
+
6
+ import yaml
7
+ from jinja2 import Template
8
+
9
+ from kubernetes import client, utils
10
+ from kubernetes.client import AppsV1Api, CoreV1Api, CustomObjectsApi
11
+
12
+ import kubetorch.serving.constants as serving_constants
13
+ from kubetorch import globals
14
+
15
+ from kubetorch.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class BaseServiceManager:
21
+ """Base service manager with common functionality for all service types."""
22
+
23
+ def __init__(
24
+ self,
25
+ objects_api: CustomObjectsApi,
26
+ core_api: CoreV1Api,
27
+ apps_v1_api: AppsV1Api,
28
+ namespace: str,
29
+ ):
30
+ self.objects_api = objects_api
31
+ self.core_api = core_api
32
+ self.apps_v1_api = apps_v1_api
33
+
34
+ # Load config
35
+ self.global_config = globals.config
36
+ self.namespace = namespace or self.global_config.namespace
37
+
38
+ @property
39
+ def username(self):
40
+ return self.global_config.username
41
+
42
+ @property
43
+ def base_labels(self):
44
+ """Base labels for all resources created by the service manager."""
45
+ from kubetorch import __version__
46
+
47
+ labels = {
48
+ serving_constants.KT_VERSION_LABEL: __version__,
49
+ }
50
+ if self.username:
51
+ labels[serving_constants.KT_USERNAME_LABEL] = self.username
52
+
53
+ return labels
54
+
55
+ def _apply_yaml_template(self, yaml_file, replace_existing=False, **kwargs):
56
+ with importlib.resources.files("kubetorch.serving.templates").joinpath(
57
+ yaml_file
58
+ ).open("r") as f:
59
+ template = Template(f.read())
60
+
61
+ yaml_content = template.render(**kwargs)
62
+ yaml_objects = list(yaml.safe_load_all(yaml_content))
63
+ k8s_client = client.ApiClient()
64
+
65
+ for obj in yaml_objects:
66
+ logger.info(
67
+ f"Applying {obj.get('kind')}/{obj.get('metadata', {}).get('name')}"
68
+ )
69
+ try:
70
+ if replace_existing:
71
+ # Try to delete existing resource first
72
+ try:
73
+ utils.delete_from_dict(k8s_client, obj)
74
+ except client.exceptions.ApiException as e:
75
+ if e.status != 404: # Ignore if resource doesn't exist
76
+ raise
77
+
78
+ utils.create_from_dict(k8s_client, obj)
79
+ logger.info(
80
+ f"Successfully applied {obj.get('kind')}/{obj.get('metadata', {}).get('name')}"
81
+ )
82
+ except utils.FailToCreateError as e:
83
+ if "already exists" in str(e):
84
+ logger.info(
85
+ f"Resource already exists: {obj.get('kind')}/{obj.get('metadata', {}).get('name')}"
86
+ )
87
+ else:
88
+ raise
89
+
90
+ @abstractmethod
91
+ def get_deployment_timestamp_annotation(self, service_name: str) -> Optional[str]:
92
+ """Get deployment timestamp annotation for this service type."""
93
+ pass
94
+
95
+ @abstractmethod
96
+ def update_deployment_timestamp_annotation(
97
+ self, service_name: str, new_timestamp: str
98
+ ) -> str:
99
+ """Update deployment timestamp annotation for this service type."""
100
+ pass
101
+
102
+ def fetch_kubetorch_config(self) -> dict:
103
+ """Fetch the kubetorch configmap from the namespace."""
104
+ try:
105
+ kubetorch_config = self.core_api.read_namespaced_config_map(
106
+ name="kubetorch-config", namespace=globals.config.install_namespace
107
+ )
108
+ return kubetorch_config.data
109
+ except client.exceptions.ApiException as e:
110
+ if e.status != 404:
111
+ logger.error(f"Error fetching kubetorch config: {e}")
112
+ return {}
113
+
114
+ @staticmethod
115
+ def discover_services_static(
116
+ namespace: str, objects_api=None, apps_v1_api=None, name_filter: str = None
117
+ ) -> List[Dict]:
118
+ """Static method to discover Kubetorch services without ServiceManager instance.
119
+
120
+ Uses parallel API calls for faster discovery across service types.
121
+
122
+ Args:
123
+ namespace: Kubernetes namespace
124
+ objects_api: Optional CustomObjectsApi instance (created if None)
125
+ apps_v1_api: Optional AppsV1Api instance (created if None)
126
+ name_filter: Optional name filter for services
127
+
128
+ Returns:
129
+ List of service dictionaries with structure:
130
+ {
131
+ 'name': str,
132
+ 'template_type': str, # 'ksvc', 'deployment', 'raycluster'
133
+ 'resource': dict,
134
+ 'namespace': str,
135
+ 'creation_timestamp': str # ISO format
136
+ }
137
+ """
138
+ import concurrent.futures
139
+ import threading
140
+
141
+ if objects_api is None:
142
+ objects_api = client.CustomObjectsApi()
143
+ if apps_v1_api is None:
144
+ apps_v1_api = client.AppsV1Api()
145
+
146
+ services = []
147
+ services_lock = threading.Lock()
148
+
149
+ def fetch_knative_services():
150
+ """Fetch Knative services in parallel."""
151
+ try:
152
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=ksvc"
153
+ knative_services = objects_api.list_namespaced_custom_object(
154
+ group="serving.knative.dev",
155
+ version="v1",
156
+ namespace=namespace,
157
+ plural="services",
158
+ label_selector=label_selector,
159
+ )["items"]
160
+
161
+ local_services = []
162
+ for svc in knative_services:
163
+ svc_name = svc["metadata"]["name"]
164
+ if name_filter and name_filter not in svc_name:
165
+ continue
166
+
167
+ local_services.append(
168
+ {
169
+ "name": svc_name,
170
+ "template_type": "ksvc",
171
+ "resource": svc, # Already a dict
172
+ "namespace": namespace,
173
+ "creation_timestamp": svc["metadata"]["creationTimestamp"],
174
+ }
175
+ )
176
+
177
+ with services_lock:
178
+ services.extend(local_services)
179
+
180
+ except client.exceptions.ApiException as e:
181
+ if e.status != 404: # Ignore if Knative is not installed
182
+ logger.warning(f"Failed to list Knative services: {e}")
183
+
184
+ def fetch_deployments():
185
+ """Fetch Deployments in parallel."""
186
+ try:
187
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=deployment"
188
+ deployments = apps_v1_api.list_namespaced_deployment(
189
+ namespace=namespace,
190
+ label_selector=label_selector,
191
+ )
192
+
193
+ local_services = []
194
+ for deployment in deployments.items:
195
+ deploy_name = deployment.metadata.name
196
+ if name_filter and name_filter not in deploy_name:
197
+ continue
198
+
199
+ # Convert V1Deployment object to dictionary for consistency
200
+ deployment_dict = client.ApiClient().sanitize_for_serialization(
201
+ deployment
202
+ )
203
+
204
+ # Add kind and apiVersion (not included in V1Deployment object)
205
+ deployment_dict["kind"] = "Deployment"
206
+ deployment_dict["apiVersion"] = "apps/v1"
207
+
208
+ local_services.append(
209
+ {
210
+ "name": deploy_name,
211
+ "template_type": "deployment",
212
+ "resource": deployment_dict, # Now consistently a dict
213
+ "namespace": namespace,
214
+ "creation_timestamp": deployment.metadata.creation_timestamp.isoformat()
215
+ + "Z",
216
+ }
217
+ )
218
+
219
+ with services_lock:
220
+ services.extend(local_services)
221
+
222
+ except client.exceptions.ApiException as e:
223
+ logger.warning(f"Failed to list Deployments: {e}")
224
+
225
+ def fetch_rayclusters():
226
+ """Fetch RayClusters in parallel."""
227
+ try:
228
+ label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=raycluster"
229
+ rayclusters = objects_api.list_namespaced_custom_object(
230
+ group="ray.io",
231
+ version="v1",
232
+ namespace=namespace,
233
+ plural="rayclusters",
234
+ label_selector=label_selector,
235
+ )["items"]
236
+
237
+ local_services = []
238
+ for cluster in rayclusters:
239
+ cluster_name = cluster["metadata"]["name"]
240
+ if name_filter and name_filter not in cluster_name:
241
+ continue
242
+
243
+ local_services.append(
244
+ {
245
+ "name": cluster_name,
246
+ "template_type": "raycluster",
247
+ "resource": cluster, # Already a dict
248
+ "namespace": namespace,
249
+ "creation_timestamp": cluster["metadata"][
250
+ "creationTimestamp"
251
+ ],
252
+ }
253
+ )
254
+
255
+ with services_lock:
256
+ services.extend(local_services)
257
+
258
+ except client.exceptions.ApiException as e:
259
+ if e.status != 404:
260
+ logger.warning(f"Failed to list RayClusters: {e}")
261
+
262
+ # Execute all API calls in parallel
263
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
264
+ futures = [
265
+ executor.submit(fetch_knative_services),
266
+ executor.submit(fetch_deployments),
267
+ executor.submit(fetch_rayclusters),
268
+ ]
269
+
270
+ # Wait for all to complete
271
+ concurrent.futures.wait(futures)
272
+
273
+ return services
274
+
275
+ @staticmethod
276
+ def get_pods_for_service_static(
277
+ service_name: str,
278
+ namespace: str,
279
+ core_api=None,
280
+ ) -> List:
281
+ """Static method to get pods for a service across different service types.
282
+
283
+ Args:
284
+ service_name: Name of the service
285
+ namespace: Kubernetes namespace
286
+ core_api: Optional CoreV1Api instance (created if None)
287
+
288
+ Returns:
289
+ List of pod objects
290
+ """
291
+ if core_api is None:
292
+ core_api = client.CoreV1Api()
293
+
294
+ # Build label selector
295
+ label_selector = f"{serving_constants.KT_SERVICE_LABEL}={service_name}"
296
+ try:
297
+ pods = core_api.list_namespaced_pod(
298
+ namespace=namespace, label_selector=label_selector
299
+ )
300
+ return pods.items
301
+ except client.exceptions.ApiException as e:
302
+ logger.warning(f"Failed to list pods for service {service_name}: {e}")
303
+ return []
304
+
305
+ def discover_all_services(self, namespace: str = None) -> List[Dict]:
306
+ """Discover all Kubetorch services across different resource types.
307
+
308
+ Returns a list of service dictionaries with normalized structure:
309
+ {
310
+ 'name': str,
311
+ 'template_type': str, # 'ksvc', 'deployment', 'raycluster'
312
+ 'resource': object, # The actual Kubernetes resource object
313
+ 'namespace': str
314
+ }
315
+ """
316
+ return self.discover_services_static(
317
+ namespace=namespace or self.namespace,
318
+ objects_api=self.objects_api,
319
+ apps_v1_api=self.apps_v1_api,
320
+ )
321
+
322
+ # Abstract methods to be implemented by subclasses
323
+ def create_or_update_service(self, *args, **kwargs):
324
+ raise NotImplementedError("Subclasses must implement create_or_update_service")
325
+
326
+ def get_endpoint(self, service_name: str) -> str:
327
+ raise NotImplementedError("Subclasses must implement get_endpoint")
328
+
329
+ def get_pods_for_service(self, service_name: str, **kwargs) -> List[client.V1Pod]:
330
+ raise NotImplementedError("Subclasses must implement get_pods_for_service")
331
+
332
+ def check_service_ready(
333
+ self, service_name: str, launch_timeout: int, **kwargs
334
+ ) -> bool:
335
+ """Check if service is ready to serve requests.
336
+
337
+ This method should be implemented by subclasses to provide service-type-specific
338
+ readiness checking logic.
339
+
340
+ Args:
341
+ service_name: Name of the service to check
342
+ launch_timeout: Timeout in seconds to wait for service to be ready
343
+ **kwargs: Additional arguments for readiness checking
344
+
345
+ Returns:
346
+ True if service is ready, raises exception if timeout or error
347
+ """
348
+ raise NotImplementedError("Subclasses must implement check_service_ready")
349
+
350
+ def teardown_service(self, service_name: str, console=None) -> bool:
351
+ """Teardown/delete service and associated resources.
352
+
353
+ This method should be implemented by subclasses to provide service-type-specific
354
+ teardown logic.
355
+
356
+ Args:
357
+ service_name: Name of the service to teardown
358
+ console: Optional Rich console for output
359
+
360
+ Returns:
361
+ True if teardown was successful, False otherwise
362
+ """
363
+ raise NotImplementedError("Subclasses must implement teardown_service")
@@ -0,0 +1,83 @@
1
+ # K8s Configuration
2
+ KUBECTL_PORT = 6443
3
+ KT_LAUNCH_TIMEOUT = 900 # 15 minutes
4
+
5
+ # Ports
6
+ DEFAULT_NGINX_PORT = 8080
7
+ LOCAL_NGINX_PORT = 38080
8
+ DEFAULT_KT_SERVER_PORT = 32300 # Standard port of Knative services
9
+ DEFAULT_DEBUG_PORT = 5678
10
+
11
+ # Namespaces
12
+ KUBETORCH_NAMESPACE = "kubetorch"
13
+ KUBETORCH_MONITORING_NAMESPACE = "kubetorch-monitoring"
14
+ KUBETORCH_GRAFANA_SERVICE_NAME = "kubetorch-otel-grafana"
15
+ KUBETORCH_UI_SERVICE_NAME = "kubetorch-ui-service"
16
+ RUNHOUSE_NAMESPACE = "runhouse"
17
+ DEFAULT_NAMESPACE = "default"
18
+
19
+ # Images
20
+ SERVER_IMAGE_MINIMAL = "ghcr.io/run-house/server:v3"
21
+ SERVER_IMAGE_WITH_OTEL = "ghcr.io/run-house/server-otel:v3"
22
+
23
+ UBUNTU_IMAGE_MINIMAL = "ghcr.io/run-house/ubuntu:v1"
24
+ UBUNTU_IMAGE_WITH_OTEL = "ghcr.io/run-house/ubuntu:v1"
25
+
26
+ DEFAULT_PROXY_IMAGE = "ghcr.io/run-house/proxy:v2"
27
+ KUBETORCH_IMAGE_TRAPDOOR = "kubetorch"
28
+
29
+ # Service Accounts
30
+ DEFAULT_SERVICE_ACCOUNT_NAME = "kubetorch-service-account"
31
+
32
+ # Annotations
33
+ INACTIVITY_TTL_ANNOTATION = "kubetorch.com/inactivity-ttl"
34
+ KUBECONFIG_PATH_ANNOTATION = "kubetorch.com/kubeconfig-path"
35
+
36
+ # Labels
37
+ KT_SERVICE_LABEL = "kubetorch.com/service"
38
+ KT_VERSION_LABEL = "kubetorch.com/version"
39
+ KT_MODULE_LABEL = "kubetorch.com/module"
40
+ KT_USER_IDENTIFIER_LABEL = "kubetorch.com/user-identifier"
41
+ KT_USERNAME_LABEL = "kubetorch.com/username"
42
+ KT_POD_TYPE_LABEL = "kubetorch.com/pod-type"
43
+ KT_TEMPLATE_LABEL = "kubetorch.com/template"
44
+ KT_SECRET_NAME_LABEL = "kubetorch.com/secret-name"
45
+
46
+ # Templates
47
+ TTL_CONTROLLER_CONFIGMAP_NAME = "kubetorch-ttl-controller-config"
48
+ KNATIVE_SERVICE_TEMPLATE_FILE = "knative_service_template.yaml"
49
+ POD_TEMPLATE_FILE = "pod_template.yaml"
50
+ KT_SETUP_TEMPLATE_FILE = "kt_setup_template.sh.j2"
51
+ DEPLOYMENT_TEMPLATE_FILE = "deployment_template.yaml"
52
+ DEPLOYMENT_SERVICE_TEMPLATE_FILE = "service_template.yaml"
53
+ RAYCLUSTER_TEMPLATE_FILE = "raycluster_template.yaml"
54
+ RAYCLUSTER_SERVICE_TEMPLATE_FILE = "raycluster_service_template.yaml"
55
+
56
+ # Loki
57
+ LOKI_GATEWAY_SERVICE_NAME = "loki-gateway"
58
+
59
+ # Grafana
60
+ GRAFANA_HEALTH_ENDPOINT = "/api/health"
61
+ PROMETHEUS_HEALTH_ENDPOINT = "/metrics"
62
+
63
+ # KAI
64
+ KAI_SCHEDULER_NAME = "kai-scheduler"
65
+ KAI_SCHEDULER_LABEL = "kai.scheduler/queue"
66
+
67
+ # URLS
68
+ PROMETHEUS_SERVICE_NAME = "kubetorch-otel-kube-promet-prometheus"
69
+ PROMETHEUS_URL = (
70
+ f"http://{PROMETHEUS_SERVICE_NAME}.kubetorch-monitoring.svc.cluster.local:9090"
71
+ )
72
+
73
+ # HTTP Client
74
+ KT_TERMINATION_REASONS = ["OOMKilled", "Evicted", "Not Found"]
75
+
76
+ # NGINX
77
+ NGINX_GATEWAY_PROXY = "kubetorch-proxy"
78
+ DEFAULT_NGINX_HEALTH_ENDPOINT = "/health"
79
+
80
+ # Rsync
81
+ RSYNC_LOCAL_PORT = 3873
82
+ REMOTE_RSYNC_PORT = 873
83
+ RSYNC_SERVICE_NAME = "kubetorch-rsync"