kubetorch 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubetorch/__init__.py +59 -0
- kubetorch/cli.py +1939 -0
- kubetorch/cli_utils.py +967 -0
- kubetorch/config.py +453 -0
- kubetorch/constants.py +18 -0
- kubetorch/docs/Makefile +18 -0
- kubetorch/docs/__init__.py +0 -0
- kubetorch/docs/_ext/json_globaltoc.py +42 -0
- kubetorch/docs/api/cli.rst +10 -0
- kubetorch/docs/api/python/app.rst +21 -0
- kubetorch/docs/api/python/cls.rst +19 -0
- kubetorch/docs/api/python/compute.rst +25 -0
- kubetorch/docs/api/python/config.rst +11 -0
- kubetorch/docs/api/python/fn.rst +19 -0
- kubetorch/docs/api/python/image.rst +14 -0
- kubetorch/docs/api/python/secret.rst +18 -0
- kubetorch/docs/api/python/volumes.rst +13 -0
- kubetorch/docs/api/python.rst +101 -0
- kubetorch/docs/conf.py +69 -0
- kubetorch/docs/index.rst +20 -0
- kubetorch/docs/requirements.txt +5 -0
- kubetorch/globals.py +269 -0
- kubetorch/logger.py +59 -0
- kubetorch/resources/__init__.py +0 -0
- kubetorch/resources/callables/__init__.py +0 -0
- kubetorch/resources/callables/cls/__init__.py +0 -0
- kubetorch/resources/callables/cls/cls.py +159 -0
- kubetorch/resources/callables/fn/__init__.py +0 -0
- kubetorch/resources/callables/fn/fn.py +140 -0
- kubetorch/resources/callables/module.py +1315 -0
- kubetorch/resources/callables/utils.py +203 -0
- kubetorch/resources/compute/__init__.py +0 -0
- kubetorch/resources/compute/app.py +253 -0
- kubetorch/resources/compute/compute.py +2414 -0
- kubetorch/resources/compute/decorators.py +137 -0
- kubetorch/resources/compute/utils.py +1026 -0
- kubetorch/resources/compute/websocket.py +135 -0
- kubetorch/resources/images/__init__.py +1 -0
- kubetorch/resources/images/image.py +412 -0
- kubetorch/resources/images/images.py +64 -0
- kubetorch/resources/secrets/__init__.py +2 -0
- kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
- kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
- kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
- kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
- kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
- kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
- kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
- kubetorch/resources/secrets/secret.py +224 -0
- kubetorch/resources/secrets/secret_factory.py +64 -0
- kubetorch/resources/secrets/utils.py +222 -0
- kubetorch/resources/volumes/__init__.py +0 -0
- kubetorch/resources/volumes/volume.py +340 -0
- kubetorch/servers/__init__.py +0 -0
- kubetorch/servers/http/__init__.py +0 -0
- kubetorch/servers/http/distributed_utils.py +2968 -0
- kubetorch/servers/http/http_client.py +802 -0
- kubetorch/servers/http/http_server.py +1622 -0
- kubetorch/servers/http/server_metrics.py +255 -0
- kubetorch/servers/http/utils.py +722 -0
- kubetorch/serving/__init__.py +0 -0
- kubetorch/serving/autoscaling.py +153 -0
- kubetorch/serving/base_service_manager.py +344 -0
- kubetorch/serving/constants.py +77 -0
- kubetorch/serving/deployment_service_manager.py +431 -0
- kubetorch/serving/knative_service_manager.py +487 -0
- kubetorch/serving/raycluster_service_manager.py +526 -0
- kubetorch/serving/service_manager.py +18 -0
- kubetorch/serving/templates/deployment_template.yaml +17 -0
- kubetorch/serving/templates/knative_service_template.yaml +19 -0
- kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
- kubetorch/serving/templates/pod_template.yaml +198 -0
- kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
- kubetorch/serving/templates/raycluster_template.yaml +35 -0
- kubetorch/serving/templates/service_template.yaml +21 -0
- kubetorch/serving/templates/workerset_template.yaml +36 -0
- kubetorch/serving/utils.py +344 -0
- kubetorch/utils.py +263 -0
- kubetorch-0.2.5.dist-info/METADATA +75 -0
- kubetorch-0.2.5.dist-info/RECORD +92 -0
- kubetorch-0.2.5.dist-info/WHEEL +4 -0
- kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
|
File without changes
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from dataclasses import asdict, dataclass
|
|
2
|
+
|
|
3
|
+
from kubetorch.logger import get_logger
|
|
4
|
+
|
|
5
|
+
logger = get_logger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AutoScalingError(Exception):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class AutoscalingConfig:
|
|
14
|
+
# The concurrent requests or requests per second threshold that triggers scaling
|
|
15
|
+
target: int = None
|
|
16
|
+
|
|
17
|
+
# The time window used to calculate the average of metrics for scaling decisions
|
|
18
|
+
window: str = None
|
|
19
|
+
|
|
20
|
+
# The metric type to base scaling decisions on:
|
|
21
|
+
# - concurrency: number of simultaneous requests
|
|
22
|
+
# - rps: requests per second
|
|
23
|
+
# - cpu: CPU utilization (requires HPA class)
|
|
24
|
+
# - memory: Memory utilization (requires HPA class)
|
|
25
|
+
metric: str = None
|
|
26
|
+
|
|
27
|
+
# The percentage of the target value at which to start scaling.
|
|
28
|
+
# E.g., if target=100 and target_utilization=70, scaling occurs at 70 requests
|
|
29
|
+
target_utilization: int = None
|
|
30
|
+
|
|
31
|
+
# Minimum number of replicas. 0 allows scaling to zero when idle
|
|
32
|
+
min_scale: int = None
|
|
33
|
+
|
|
34
|
+
# Maximum number of replicas the service can scale up to
|
|
35
|
+
max_scale: int = None
|
|
36
|
+
|
|
37
|
+
# Initial number of pods launched by the service
|
|
38
|
+
initial_scale: int = None
|
|
39
|
+
|
|
40
|
+
# Maximum concurrent requests per pod (containerConcurrency).
|
|
41
|
+
# If not set, pods accept unlimited concurrent requests.
|
|
42
|
+
concurrency: int = None
|
|
43
|
+
|
|
44
|
+
# Time to keep the last pod before scaling to zero (e.g., "30s", "1m5s")
|
|
45
|
+
scale_to_zero_pod_retention_period: str = None
|
|
46
|
+
|
|
47
|
+
# Delay before scaling down (e.g., "15m"). Only for KPA autoscaler.
|
|
48
|
+
scale_down_delay: str = None
|
|
49
|
+
|
|
50
|
+
# Autoscaler class: "kpa.autoscaling.knative.dev" or "hpa.autoscaling.knative.dev"
|
|
51
|
+
autoscaler_class: str = None
|
|
52
|
+
|
|
53
|
+
# Progress deadline for deployment (e.g., "10m"). Time to wait for deployment to be ready.
|
|
54
|
+
progress_deadline: str = None
|
|
55
|
+
|
|
56
|
+
def __init__(self, **kwargs):
|
|
57
|
+
"""Support additional kwargs for autoscaling annotations"""
|
|
58
|
+
for field in self.__annotations__:
|
|
59
|
+
setattr(self, field, kwargs.pop(field, getattr(self, field, None)))
|
|
60
|
+
|
|
61
|
+
# set additional kwargs as annotations
|
|
62
|
+
self.extra_annotations = {f"autoscaling.knative.dev/{k}": str(v) for k, v in kwargs.items()}
|
|
63
|
+
|
|
64
|
+
self._validate()
|
|
65
|
+
|
|
66
|
+
def _validate(self):
|
|
67
|
+
"""Validation logic moved to separate method"""
|
|
68
|
+
if self.min_scale is not None and self.max_scale is not None and self.min_scale > self.max_scale:
|
|
69
|
+
raise AutoScalingError("min_scale cannot be greater than max_scale")
|
|
70
|
+
if self.window is not None and not self.window.endswith(("s", "m", "h")):
|
|
71
|
+
raise AutoScalingError("window must end with s, m, or h")
|
|
72
|
+
if self.target_utilization is not None and (self.target_utilization <= 0 or self.target_utilization > 100):
|
|
73
|
+
raise AutoScalingError("target_utilization must be between 1 and 100")
|
|
74
|
+
if self.scale_to_zero_pod_retention_period is not None:
|
|
75
|
+
# Validate time format (e.g., "30s", "1m5s", "2h")
|
|
76
|
+
import re
|
|
77
|
+
|
|
78
|
+
if not re.match(r"^\d+[smh](\d+[smh])*$", self.scale_to_zero_pod_retention_period):
|
|
79
|
+
raise AutoScalingError(
|
|
80
|
+
"scale_to_zero_pod_retention_period must be a valid duration (e.g., '30s', '1m5s')"
|
|
81
|
+
)
|
|
82
|
+
if self.scale_down_delay is not None:
|
|
83
|
+
# Validate time format
|
|
84
|
+
import re
|
|
85
|
+
|
|
86
|
+
if not re.match(r"^\d+[smh](\d+[smh])*$", self.scale_down_delay):
|
|
87
|
+
raise AutoScalingError("scale_down_delay must be a valid duration (e.g., '15m', '1h')")
|
|
88
|
+
if self.autoscaler_class is not None and self.autoscaler_class not in [
|
|
89
|
+
"kpa.autoscaling.knative.dev",
|
|
90
|
+
"hpa.autoscaling.knative.dev",
|
|
91
|
+
]:
|
|
92
|
+
raise AutoScalingError(
|
|
93
|
+
"autoscaler_class must be 'kpa.autoscaling.knative.dev' or 'hpa.autoscaling.knative.dev'"
|
|
94
|
+
)
|
|
95
|
+
if self.progress_deadline is not None:
|
|
96
|
+
# Validate time format
|
|
97
|
+
import re
|
|
98
|
+
|
|
99
|
+
if not re.match(r"^\d+[smh](\d+[smh])*$", self.progress_deadline):
|
|
100
|
+
raise AutoScalingError("progress_deadline must be a valid duration (e.g., '10m', '600s')")
|
|
101
|
+
|
|
102
|
+
def __post_init__(self):
|
|
103
|
+
"""Call the same validation for dataclass initialization"""
|
|
104
|
+
self._validate()
|
|
105
|
+
|
|
106
|
+
def dict(self):
|
|
107
|
+
return asdict(self)
|
|
108
|
+
|
|
109
|
+
def convert_to_annotations(self) -> dict:
|
|
110
|
+
"""Convert config to a dictionary of annotations for Knative"""
|
|
111
|
+
annotations = {}
|
|
112
|
+
|
|
113
|
+
# Set autoscaler class if specified, otherwise use default KPA
|
|
114
|
+
if self.autoscaler_class is not None:
|
|
115
|
+
annotations["autoscaling.knative.dev/class"] = self.autoscaler_class
|
|
116
|
+
else:
|
|
117
|
+
annotations["autoscaling.knative.dev/class"] = "kpa.autoscaling.knative.dev"
|
|
118
|
+
|
|
119
|
+
# Only set annotations for values that were explicitly provided
|
|
120
|
+
if self.target is not None:
|
|
121
|
+
annotations["autoscaling.knative.dev/target"] = str(self.target)
|
|
122
|
+
|
|
123
|
+
if self.min_scale is not None:
|
|
124
|
+
annotations["autoscaling.knative.dev/min-scale"] = str(self.min_scale)
|
|
125
|
+
|
|
126
|
+
if self.max_scale is not None:
|
|
127
|
+
annotations["autoscaling.knative.dev/max-scale"] = str(self.max_scale)
|
|
128
|
+
|
|
129
|
+
if self.window is not None:
|
|
130
|
+
annotations["autoscaling.knative.dev/window"] = self.window
|
|
131
|
+
|
|
132
|
+
if self.metric is not None:
|
|
133
|
+
annotations["autoscaling.knative.dev/metric"] = self.metric
|
|
134
|
+
|
|
135
|
+
if self.target_utilization is not None:
|
|
136
|
+
annotations["autoscaling.knative.dev/target-utilization-percentage"] = str(self.target_utilization)
|
|
137
|
+
|
|
138
|
+
if self.initial_scale is not None:
|
|
139
|
+
annotations["autoscaling.knative.dev/initial-scale"] = str(self.initial_scale)
|
|
140
|
+
|
|
141
|
+
if self.scale_to_zero_pod_retention_period is not None:
|
|
142
|
+
annotations[
|
|
143
|
+
"autoscaling.knative.dev/scale-to-zero-pod-retention-period"
|
|
144
|
+
] = self.scale_to_zero_pod_retention_period
|
|
145
|
+
|
|
146
|
+
if self.scale_down_delay is not None:
|
|
147
|
+
annotations["autoscaling.knative.dev/scale-down-delay"] = self.scale_down_delay
|
|
148
|
+
|
|
149
|
+
# Add any extra annotations from the config
|
|
150
|
+
if hasattr(self, "extra_annotations"):
|
|
151
|
+
annotations.update(self.extra_annotations)
|
|
152
|
+
|
|
153
|
+
return annotations
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
from jinja2 import Template
|
|
8
|
+
|
|
9
|
+
from kubernetes import client, utils
|
|
10
|
+
from kubernetes.client import AppsV1Api, CoreV1Api, CustomObjectsApi
|
|
11
|
+
|
|
12
|
+
import kubetorch.serving.constants as serving_constants
|
|
13
|
+
from kubetorch import globals
|
|
14
|
+
|
|
15
|
+
from kubetorch.logger import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseServiceManager:
|
|
21
|
+
"""Base service manager with common functionality for all service types."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
objects_api: CustomObjectsApi,
|
|
26
|
+
core_api: CoreV1Api,
|
|
27
|
+
apps_v1_api: AppsV1Api,
|
|
28
|
+
namespace: str,
|
|
29
|
+
):
|
|
30
|
+
self.objects_api = objects_api
|
|
31
|
+
self.core_api = core_api
|
|
32
|
+
self.apps_v1_api = apps_v1_api
|
|
33
|
+
|
|
34
|
+
# Load config
|
|
35
|
+
self.global_config = globals.config
|
|
36
|
+
self.namespace = namespace or self.global_config.namespace
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def username(self):
|
|
40
|
+
return self.global_config.username
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def base_labels(self):
|
|
44
|
+
"""Base labels for all resources created by the service manager."""
|
|
45
|
+
from kubetorch import __version__
|
|
46
|
+
|
|
47
|
+
labels = {
|
|
48
|
+
serving_constants.KT_VERSION_LABEL: __version__,
|
|
49
|
+
}
|
|
50
|
+
if self.username:
|
|
51
|
+
labels[serving_constants.KT_USERNAME_LABEL] = self.username
|
|
52
|
+
|
|
53
|
+
return labels
|
|
54
|
+
|
|
55
|
+
def _apply_yaml_template(self, yaml_file, replace_existing=False, **kwargs):
|
|
56
|
+
with importlib.resources.files("kubetorch.serving.templates").joinpath(yaml_file).open("r") as f:
|
|
57
|
+
template = Template(f.read())
|
|
58
|
+
|
|
59
|
+
yaml_content = template.render(**kwargs)
|
|
60
|
+
yaml_objects = list(yaml.safe_load_all(yaml_content))
|
|
61
|
+
k8s_client = client.ApiClient()
|
|
62
|
+
|
|
63
|
+
for obj in yaml_objects:
|
|
64
|
+
logger.info(f"Applying {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
|
|
65
|
+
try:
|
|
66
|
+
if replace_existing:
|
|
67
|
+
# Try to delete existing resource first
|
|
68
|
+
try:
|
|
69
|
+
utils.delete_from_dict(k8s_client, obj)
|
|
70
|
+
except client.exceptions.ApiException as e:
|
|
71
|
+
if e.status != 404: # Ignore if resource doesn't exist
|
|
72
|
+
raise
|
|
73
|
+
|
|
74
|
+
utils.create_from_dict(k8s_client, obj)
|
|
75
|
+
logger.info(f"Successfully applied {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
|
|
76
|
+
except utils.FailToCreateError as e:
|
|
77
|
+
if "already exists" in str(e):
|
|
78
|
+
logger.info(f"Resource already exists: {obj.get('kind')}/{obj.get('metadata', {}).get('name')}")
|
|
79
|
+
else:
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def get_deployment_timestamp_annotation(self, service_name: str) -> Optional[str]:
|
|
84
|
+
"""Get deployment timestamp annotation for this service type."""
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def update_deployment_timestamp_annotation(self, service_name: str, new_timestamp: str) -> str:
|
|
89
|
+
"""Update deployment timestamp annotation for this service type."""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def fetch_kubetorch_config(self) -> dict:
|
|
93
|
+
"""Fetch the kubetorch configmap from the namespace."""
|
|
94
|
+
try:
|
|
95
|
+
kubetorch_config = self.core_api.read_namespaced_config_map(
|
|
96
|
+
name="kubetorch-config", namespace=globals.config.install_namespace
|
|
97
|
+
)
|
|
98
|
+
return kubetorch_config.data
|
|
99
|
+
except client.exceptions.ApiException as e:
|
|
100
|
+
if e.status != 404:
|
|
101
|
+
logger.error(f"Error fetching kubetorch config: {e}")
|
|
102
|
+
return {}
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def discover_services_static(
|
|
106
|
+
namespace: str, objects_api=None, apps_v1_api=None, name_filter: str = None
|
|
107
|
+
) -> List[Dict]:
|
|
108
|
+
"""Static method to discover Kubetorch services without ServiceManager instance.
|
|
109
|
+
|
|
110
|
+
Uses parallel API calls for faster discovery across service types.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
namespace: Kubernetes namespace
|
|
114
|
+
objects_api: Optional CustomObjectsApi instance (created if None)
|
|
115
|
+
apps_v1_api: Optional AppsV1Api instance (created if None)
|
|
116
|
+
name_filter: Optional name filter for services
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of service dictionaries with structure:
|
|
120
|
+
{
|
|
121
|
+
'name': str,
|
|
122
|
+
'template_type': str, # 'ksvc', 'deployment', 'raycluster'
|
|
123
|
+
'resource': dict,
|
|
124
|
+
'namespace': str,
|
|
125
|
+
'creation_timestamp': str # ISO format
|
|
126
|
+
}
|
|
127
|
+
"""
|
|
128
|
+
import concurrent.futures
|
|
129
|
+
import threading
|
|
130
|
+
|
|
131
|
+
if objects_api is None:
|
|
132
|
+
objects_api = client.CustomObjectsApi()
|
|
133
|
+
if apps_v1_api is None:
|
|
134
|
+
apps_v1_api = client.AppsV1Api()
|
|
135
|
+
|
|
136
|
+
services = []
|
|
137
|
+
services_lock = threading.Lock()
|
|
138
|
+
|
|
139
|
+
def fetch_knative_services():
|
|
140
|
+
"""Fetch Knative services in parallel."""
|
|
141
|
+
try:
|
|
142
|
+
label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=ksvc"
|
|
143
|
+
knative_services = objects_api.list_namespaced_custom_object(
|
|
144
|
+
group="serving.knative.dev",
|
|
145
|
+
version="v1",
|
|
146
|
+
namespace=namespace,
|
|
147
|
+
plural="services",
|
|
148
|
+
label_selector=label_selector,
|
|
149
|
+
)["items"]
|
|
150
|
+
|
|
151
|
+
local_services = []
|
|
152
|
+
for svc in knative_services:
|
|
153
|
+
svc_name = svc["metadata"]["name"]
|
|
154
|
+
if name_filter and name_filter not in svc_name:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
local_services.append(
|
|
158
|
+
{
|
|
159
|
+
"name": svc_name,
|
|
160
|
+
"template_type": "ksvc",
|
|
161
|
+
"resource": svc, # Already a dict
|
|
162
|
+
"namespace": namespace,
|
|
163
|
+
"creation_timestamp": svc["metadata"]["creationTimestamp"],
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
with services_lock:
|
|
168
|
+
services.extend(local_services)
|
|
169
|
+
|
|
170
|
+
except client.exceptions.ApiException as e:
|
|
171
|
+
if e.status != 404: # Ignore if Knative is not installed
|
|
172
|
+
logger.warning(f"Failed to list Knative services: {e}")
|
|
173
|
+
|
|
174
|
+
def fetch_deployments():
|
|
175
|
+
"""Fetch Deployments in parallel."""
|
|
176
|
+
try:
|
|
177
|
+
label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=deployment"
|
|
178
|
+
deployments = apps_v1_api.list_namespaced_deployment(
|
|
179
|
+
namespace=namespace,
|
|
180
|
+
label_selector=label_selector,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
local_services = []
|
|
184
|
+
for deployment in deployments.items:
|
|
185
|
+
deploy_name = deployment.metadata.name
|
|
186
|
+
if name_filter and name_filter not in deploy_name:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Convert V1Deployment object to dictionary for consistency
|
|
190
|
+
deployment_dict = client.ApiClient().sanitize_for_serialization(deployment)
|
|
191
|
+
|
|
192
|
+
# Add kind and apiVersion (not included in V1Deployment object)
|
|
193
|
+
deployment_dict["kind"] = "Deployment"
|
|
194
|
+
deployment_dict["apiVersion"] = "apps/v1"
|
|
195
|
+
|
|
196
|
+
local_services.append(
|
|
197
|
+
{
|
|
198
|
+
"name": deploy_name,
|
|
199
|
+
"template_type": "deployment",
|
|
200
|
+
"resource": deployment_dict, # Now consistently a dict
|
|
201
|
+
"namespace": namespace,
|
|
202
|
+
"creation_timestamp": deployment.metadata.creation_timestamp.isoformat() + "Z",
|
|
203
|
+
}
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
with services_lock:
|
|
207
|
+
services.extend(local_services)
|
|
208
|
+
|
|
209
|
+
except client.exceptions.ApiException as e:
|
|
210
|
+
logger.warning(f"Failed to list Deployments: {e}")
|
|
211
|
+
|
|
212
|
+
def fetch_rayclusters():
|
|
213
|
+
"""Fetch RayClusters in parallel."""
|
|
214
|
+
try:
|
|
215
|
+
label_selector = f"{serving_constants.KT_TEMPLATE_LABEL}=raycluster"
|
|
216
|
+
rayclusters = objects_api.list_namespaced_custom_object(
|
|
217
|
+
group="ray.io",
|
|
218
|
+
version="v1",
|
|
219
|
+
namespace=namespace,
|
|
220
|
+
plural="rayclusters",
|
|
221
|
+
label_selector=label_selector,
|
|
222
|
+
)["items"]
|
|
223
|
+
|
|
224
|
+
local_services = []
|
|
225
|
+
for cluster in rayclusters:
|
|
226
|
+
cluster_name = cluster["metadata"]["name"]
|
|
227
|
+
if name_filter and name_filter not in cluster_name:
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
local_services.append(
|
|
231
|
+
{
|
|
232
|
+
"name": cluster_name,
|
|
233
|
+
"template_type": "raycluster",
|
|
234
|
+
"resource": cluster, # Already a dict
|
|
235
|
+
"namespace": namespace,
|
|
236
|
+
"creation_timestamp": cluster["metadata"]["creationTimestamp"],
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
with services_lock:
|
|
241
|
+
services.extend(local_services)
|
|
242
|
+
|
|
243
|
+
except client.exceptions.ApiException as e:
|
|
244
|
+
if e.status != 404:
|
|
245
|
+
logger.warning(f"Failed to list RayClusters: {e}")
|
|
246
|
+
|
|
247
|
+
# Execute all API calls in parallel
|
|
248
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
249
|
+
futures = [
|
|
250
|
+
executor.submit(fetch_knative_services),
|
|
251
|
+
executor.submit(fetch_deployments),
|
|
252
|
+
executor.submit(fetch_rayclusters),
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
# Wait for all to complete
|
|
256
|
+
concurrent.futures.wait(futures)
|
|
257
|
+
|
|
258
|
+
return services
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def get_pods_for_service_static(
|
|
262
|
+
service_name: str,
|
|
263
|
+
namespace: str,
|
|
264
|
+
core_api=None,
|
|
265
|
+
) -> List:
|
|
266
|
+
"""Static method to get pods for a service across different service types.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
service_name: Name of the service
|
|
270
|
+
namespace: Kubernetes namespace
|
|
271
|
+
core_api: Optional CoreV1Api instance (created if None)
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
List of pod objects
|
|
275
|
+
"""
|
|
276
|
+
if core_api is None:
|
|
277
|
+
core_api = client.CoreV1Api()
|
|
278
|
+
|
|
279
|
+
# Build label selector
|
|
280
|
+
label_selector = f"{serving_constants.KT_SERVICE_LABEL}={service_name}"
|
|
281
|
+
try:
|
|
282
|
+
pods = core_api.list_namespaced_pod(namespace=namespace, label_selector=label_selector)
|
|
283
|
+
return pods.items
|
|
284
|
+
except client.exceptions.ApiException as e:
|
|
285
|
+
logger.warning(f"Failed to list pods for service {service_name}: {e}")
|
|
286
|
+
return []
|
|
287
|
+
|
|
288
|
+
def discover_all_services(self, namespace: str = None) -> List[Dict]:
|
|
289
|
+
"""Discover all Kubetorch services across different resource types.
|
|
290
|
+
|
|
291
|
+
Returns a list of service dictionaries with normalized structure:
|
|
292
|
+
{
|
|
293
|
+
'name': str,
|
|
294
|
+
'template_type': str, # 'ksvc', 'deployment', 'raycluster'
|
|
295
|
+
'resource': object, # The actual Kubernetes resource object
|
|
296
|
+
'namespace': str
|
|
297
|
+
}
|
|
298
|
+
"""
|
|
299
|
+
return self.discover_services_static(
|
|
300
|
+
namespace=namespace or self.namespace,
|
|
301
|
+
objects_api=self.objects_api,
|
|
302
|
+
apps_v1_api=self.apps_v1_api,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
# Abstract methods to be implemented by subclasses
|
|
306
|
+
def create_or_update_service(self, *args, **kwargs):
|
|
307
|
+
raise NotImplementedError("Subclasses must implement create_or_update_service")
|
|
308
|
+
|
|
309
|
+
def get_endpoint(self, service_name: str) -> str:
|
|
310
|
+
raise NotImplementedError("Subclasses must implement get_endpoint")
|
|
311
|
+
|
|
312
|
+
def get_pods_for_service(self, service_name: str, **kwargs) -> List[client.V1Pod]:
|
|
313
|
+
raise NotImplementedError("Subclasses must implement get_pods_for_service")
|
|
314
|
+
|
|
315
|
+
def check_service_ready(self, service_name: str, launch_timeout: int, **kwargs) -> bool:
|
|
316
|
+
"""Check if service is ready to serve requests.
|
|
317
|
+
|
|
318
|
+
This method should be implemented by subclasses to provide service-type-specific
|
|
319
|
+
readiness checking logic.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
service_name: Name of the service to check
|
|
323
|
+
launch_timeout: Timeout in seconds to wait for service to be ready
|
|
324
|
+
**kwargs: Additional arguments for readiness checking
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
True if service is ready, raises exception if timeout or error
|
|
328
|
+
"""
|
|
329
|
+
raise NotImplementedError("Subclasses must implement check_service_ready")
|
|
330
|
+
|
|
331
|
+
def teardown_service(self, service_name: str, console=None) -> bool:
|
|
332
|
+
"""Teardown/delete service and associated resources.
|
|
333
|
+
|
|
334
|
+
This method should be implemented by subclasses to provide service-type-specific
|
|
335
|
+
teardown logic.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
service_name: Name of the service to teardown
|
|
339
|
+
console: Optional Rich console for output
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
True if teardown was successful, False otherwise
|
|
343
|
+
"""
|
|
344
|
+
raise NotImplementedError("Subclasses must implement teardown_service")
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# K8s Configuration
|
|
2
|
+
KUBECTL_PORT = 6443
|
|
3
|
+
KT_LAUNCH_TIMEOUT = 900 # 15 minutes
|
|
4
|
+
|
|
5
|
+
# Ports
|
|
6
|
+
DEFAULT_NGINX_PORT = 8080
|
|
7
|
+
LOCAL_NGINX_PORT = 38080
|
|
8
|
+
DEFAULT_KT_SERVER_PORT = 32300 # Standard port of Knative services
|
|
9
|
+
DEFAULT_DEBUG_PORT = 5678
|
|
10
|
+
|
|
11
|
+
# Namespaces
|
|
12
|
+
KUBETORCH_NAMESPACE = "kubetorch"
|
|
13
|
+
RUNHOUSE_NAMESPACE = "runhouse"
|
|
14
|
+
DEFAULT_NAMESPACE = "default"
|
|
15
|
+
|
|
16
|
+
# Images
|
|
17
|
+
SERVER_IMAGE_MINIMAL = "ghcr.io/run-house/server:v3"
|
|
18
|
+
SERVER_IMAGE_WITH_OTEL = "ghcr.io/run-house/server-otel:v3"
|
|
19
|
+
|
|
20
|
+
UBUNTU_IMAGE_MINIMAL = "ghcr.io/run-house/ubuntu:v1"
|
|
21
|
+
UBUNTU_IMAGE_WITH_OTEL = "ghcr.io/run-house/ubuntu:v1"
|
|
22
|
+
|
|
23
|
+
DEFAULT_PROXY_IMAGE = "ghcr.io/run-house/proxy:v2"
|
|
24
|
+
KUBETORCH_IMAGE_TRAPDOOR = "kubetorch"
|
|
25
|
+
|
|
26
|
+
# Service Accounts
|
|
27
|
+
DEFAULT_SERVICE_ACCOUNT_NAME = "kubetorch-service-account"
|
|
28
|
+
|
|
29
|
+
# Annotations
|
|
30
|
+
INACTIVITY_TTL_ANNOTATION = "kubetorch.com/inactivity-ttl"
|
|
31
|
+
KUBECONFIG_PATH_ANNOTATION = "kubetorch.com/kubeconfig-path"
|
|
32
|
+
|
|
33
|
+
# Labels
|
|
34
|
+
KT_SERVICE_LABEL = "kubetorch.com/service"
|
|
35
|
+
KT_VERSION_LABEL = "kubetorch.com/version"
|
|
36
|
+
KT_MODULE_LABEL = "kubetorch.com/module"
|
|
37
|
+
KT_USER_IDENTIFIER_LABEL = "kubetorch.com/user-identifier"
|
|
38
|
+
KT_USERNAME_LABEL = "kubetorch.com/username"
|
|
39
|
+
KT_POD_TYPE_LABEL = "kubetorch.com/pod-type"
|
|
40
|
+
KT_TEMPLATE_LABEL = "kubetorch.com/template"
|
|
41
|
+
KT_SECRET_NAME_LABEL = "kubetorch.com/secret-name"
|
|
42
|
+
|
|
43
|
+
# Templates
|
|
44
|
+
TTL_CONTROLLER_CONFIGMAP_NAME = "kubetorch-ttl-controller-config"
|
|
45
|
+
KNATIVE_SERVICE_TEMPLATE_FILE = "knative_service_template.yaml"
|
|
46
|
+
POD_TEMPLATE_FILE = "pod_template.yaml"
|
|
47
|
+
KT_SETUP_TEMPLATE_FILE = "kt_setup_template.sh.j2"
|
|
48
|
+
DEPLOYMENT_TEMPLATE_FILE = "deployment_template.yaml"
|
|
49
|
+
DEPLOYMENT_SERVICE_TEMPLATE_FILE = "service_template.yaml"
|
|
50
|
+
RAYCLUSTER_TEMPLATE_FILE = "raycluster_template.yaml"
|
|
51
|
+
RAYCLUSTER_SERVICE_TEMPLATE_FILE = "raycluster_service_template.yaml"
|
|
52
|
+
|
|
53
|
+
# Loki
|
|
54
|
+
LOKI_GATEWAY_SERVICE_NAME = "loki-gateway"
|
|
55
|
+
|
|
56
|
+
# Prometheus
|
|
57
|
+
PROMETHEUS_SERVICE_NAME = "kubetorch-metrics"
|
|
58
|
+
|
|
59
|
+
# Grafana
|
|
60
|
+
GRAFANA_HEALTH_ENDPOINT = "/api/health"
|
|
61
|
+
PROMETHEUS_HEALTH_ENDPOINT = "/metrics"
|
|
62
|
+
|
|
63
|
+
# KAI
|
|
64
|
+
KAI_SCHEDULER_NAME = "kai-scheduler"
|
|
65
|
+
KAI_SCHEDULER_LABEL = "kai.scheduler/queue"
|
|
66
|
+
|
|
67
|
+
# HTTP Client
|
|
68
|
+
KT_TERMINATION_REASONS = ["OOMKilled", "Evicted", "Not Found"]
|
|
69
|
+
|
|
70
|
+
# NGINX
|
|
71
|
+
NGINX_GATEWAY_PROXY = "kubetorch-proxy"
|
|
72
|
+
DEFAULT_NGINX_HEALTH_ENDPOINT = "/health"
|
|
73
|
+
|
|
74
|
+
# Rsync
|
|
75
|
+
RSYNC_LOCAL_PORT = 3873
|
|
76
|
+
REMOTE_RSYNC_PORT = 873
|
|
77
|
+
RSYNC_SERVICE_NAME = "kubetorch-rsync"
|