konduktor-nightly 0.1.0.dev20250825104841__tar.gz → 0.1.0.dev20250827104553__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/PKG-INFO +1 -1
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/__init__.py +2 -2
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/deployment.py +8 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/deployment_utils.py +318 -41
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/jobset.py +3 -2
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/jobset_utils.py +8 -1
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/cli.py +12 -2
- konduktor_nightly-0.1.0.dev20250827104553/konduktor/manifests/apoxy-setup.yaml +151 -0
- konduktor_nightly-0.1.0.dev20250827104553/konduktor/manifests/apoxy-setup2.yaml +34 -0
- konduktor_nightly-0.1.0.dev20250827104553/konduktor/templates/apoxy-deployment.yaml.j2 +33 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/schemas.py +14 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/pyproject.toml +1 -1
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/README.md +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/adaptors/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/adaptors/aws.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/adaptors/common.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/adaptors/gcp.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/authentication.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/backend.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/backends/pod_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/check.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/config.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/controller/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/controller/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/controller/launch.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/controller/node.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/controller/parse.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/README.md +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/backend/main.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/backend/sockets.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/.eslintrc.json +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/.gitignore +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/api/jobs/route.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/api/namespaces/route.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/Grafana.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/JobsData.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/LogsData.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/NavMenu.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/NavTabs.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/NavTabs2.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/SelectBtn.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/lib/utils.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/ui/input.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/components/ui/select.jsx +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/favicon.ico +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/globals.css +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/jobs/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/layout.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/logs/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/app/page.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/jsconfig.json +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/next.config.mjs +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/package-lock.json +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/package.json +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/postcss.config.mjs +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/server.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/dashboard/frontend/tailwind.config.js +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/aws/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/aws/s3.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/data_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/gcp/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/gcp/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/gcp/gcs.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/gcp/utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/registry.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/storage.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/data/storage_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/execution.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/kube_client.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/logging.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/manifests/controller_deployment.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/manifests/dashboard_deployment.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/manifests/dmesg_daemonset.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/manifests/pod_cleanup_controller.yaml +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/resource.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/serving.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/task.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/templates/deployment.yaml.j2 +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/templates/jobset.yaml.j2 +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/templates/pod.yaml.j2 +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/usage/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/usage/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/__init__.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/accelerator_registry.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/annotations.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/base64_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/common_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/constants.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/env_options.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/exceptions.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/kubernetes_enums.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/kubernetes_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/log_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/loki_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/rich_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/subprocess_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/ux_utils.py +0 -0
- {konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/konduktor/utils/validator.py +0 -0
|
@@ -11,7 +11,7 @@ from konduktor.task import Task
|
|
|
11
11
|
__all__ = ['launch', 'Resources', 'Task', 'Serving']
|
|
12
12
|
|
|
13
13
|
# Replaced with the current commit when building the wheels.
|
|
14
|
-
_KONDUKTOR_COMMIT_SHA = '
|
|
14
|
+
_KONDUKTOR_COMMIT_SHA = 'cb72c75ad328b535768794b5979a5ec56edb3d8e'
|
|
15
15
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
|
16
16
|
|
|
17
17
|
|
|
@@ -45,5 +45,5 @@ def _get_git_commit():
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
__commit__ = _get_git_commit()
|
|
48
|
-
__version__ = '1.0.0.dev0.1.0.
|
|
48
|
+
__version__ = '1.0.0.dev0.1.0.dev20250827104553'
|
|
49
49
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -163,6 +163,14 @@ class DeploymentBackend(backend.Backend):
|
|
|
163
163
|
dryrun=dryrun,
|
|
164
164
|
)
|
|
165
165
|
|
|
166
|
+
# Apoxy resources for general deployments only when endpoint: trainy
|
|
167
|
+
if deployment_utils.get_endpoint_type_from_config() == 'trainy':
|
|
168
|
+
deployment_utils.create_apoxy_resources(
|
|
169
|
+
namespace=namespace,
|
|
170
|
+
task=task,
|
|
171
|
+
dryrun=dryrun,
|
|
172
|
+
)
|
|
173
|
+
|
|
166
174
|
if not dryrun and not detach_run:
|
|
167
175
|
with ux_utils.print_exception_no_traceback():
|
|
168
176
|
with rich_utils.safe_status(
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
+
import random
|
|
5
6
|
import tempfile
|
|
6
7
|
import typing
|
|
7
8
|
from typing import Any, Dict, List, Optional, Tuple
|
|
@@ -14,6 +15,7 @@ from rich.table import Table
|
|
|
14
15
|
from rich.text import Text
|
|
15
16
|
|
|
16
17
|
import konduktor
|
|
18
|
+
from konduktor import config as konduktor_config
|
|
17
19
|
from konduktor import kube_client, logging
|
|
18
20
|
from konduktor.backends import constants as backend_constants
|
|
19
21
|
from konduktor.backends import pod_utils
|
|
@@ -48,6 +50,32 @@ _DEPLOYMENT_METADATA_LABELS = {
|
|
|
48
50
|
}
|
|
49
51
|
|
|
50
52
|
|
|
53
|
+
# actually just gets highest existing deployment number and adds 1
|
|
54
|
+
def get_next_deployment_number(cluster_name: str) -> int:
|
|
55
|
+
"""Get next number by counting existing Apoxy resources."""
|
|
56
|
+
try:
|
|
57
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
58
|
+
custom_api = kube_client.crd_api(context=context)
|
|
59
|
+
|
|
60
|
+
# Count existing backends
|
|
61
|
+
backends = custom_api.list_cluster_custom_object(
|
|
62
|
+
group='core.apoxy.dev', version='v1alpha', plural='backends'
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Find the highest number
|
|
66
|
+
max_number = 0
|
|
67
|
+
for backend in backends.get('items', []):
|
|
68
|
+
name = backend['metadata']['name']
|
|
69
|
+
if name.startswith(f'{cluster_name}-backend-'):
|
|
70
|
+
number = int(name.split('-')[-1])
|
|
71
|
+
max_number = max(max_number, number)
|
|
72
|
+
|
|
73
|
+
return max_number + 1
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.warning(f'Error counting existing resources: {e}')
|
|
76
|
+
return random.randint(100, 999)
|
|
77
|
+
|
|
78
|
+
|
|
51
79
|
def render_specs(
|
|
52
80
|
task: 'konduktor.Task',
|
|
53
81
|
) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
|
|
@@ -124,6 +152,130 @@ def render_specs(
|
|
|
124
152
|
return deployment_spec, service_spec, autoscaler_spec or {}
|
|
125
153
|
|
|
126
154
|
|
|
155
|
+
# For general deployments, create resources as needed
|
|
156
|
+
def render_apoxy_spec(task: 'konduktor.Task') -> List[Dict[str, Any]]:
|
|
157
|
+
"""Renders the Apoxy specs for a general deployment."""
|
|
158
|
+
general = True
|
|
159
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
160
|
+
general = False
|
|
161
|
+
|
|
162
|
+
if not general:
|
|
163
|
+
return [] # Only render for general deployments
|
|
164
|
+
|
|
165
|
+
if task.run:
|
|
166
|
+
task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
|
|
167
|
+
|
|
168
|
+
unique_cluster_name = get_unique_cluster_name_from_tunnel()
|
|
169
|
+
cluster_name = unique_cluster_name[:-3]
|
|
170
|
+
deployment_number = get_next_deployment_number(unique_cluster_name)
|
|
171
|
+
|
|
172
|
+
with tempfile.NamedTemporaryFile() as temp:
|
|
173
|
+
common_utils.fill_template(
|
|
174
|
+
'apoxy-deployment.yaml.j2',
|
|
175
|
+
{
|
|
176
|
+
'name': task.name,
|
|
177
|
+
'user': common_utils.get_cleaned_username(),
|
|
178
|
+
'ports': task.serving.ports if task.serving else 8000,
|
|
179
|
+
'general': general,
|
|
180
|
+
'cluster_name': cluster_name,
|
|
181
|
+
'unique_cluster_name': unique_cluster_name,
|
|
182
|
+
'deployment_number': deployment_number,
|
|
183
|
+
**_DEPLOYMENT_METADATA_LABELS,
|
|
184
|
+
},
|
|
185
|
+
temp.name,
|
|
186
|
+
)
|
|
187
|
+
docs = common_utils.read_yaml_all(temp.name)
|
|
188
|
+
return docs
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def create_apoxy_resources(
|
|
192
|
+
namespace: str,
|
|
193
|
+
task: 'konduktor.Task',
|
|
194
|
+
dryrun: bool = False,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Creates Apoxy resources for a general deployment."""
|
|
197
|
+
|
|
198
|
+
apoxy_specs = render_apoxy_spec(task)
|
|
199
|
+
|
|
200
|
+
if not apoxy_specs:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
if dryrun:
|
|
204
|
+
logger.debug(f'[DRYRUN] Would create Apoxy resources:\n{apoxy_specs}')
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
209
|
+
custom_api = kube_client.crd_api(context=context)
|
|
210
|
+
|
|
211
|
+
for spec in apoxy_specs:
|
|
212
|
+
kind = spec.get('kind')
|
|
213
|
+
name = spec['metadata']['name']
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
if kind == 'Backend':
|
|
217
|
+
custom_api.create_cluster_custom_object(
|
|
218
|
+
group='core.apoxy.dev',
|
|
219
|
+
version='v1alpha',
|
|
220
|
+
plural='backends',
|
|
221
|
+
body=spec,
|
|
222
|
+
)
|
|
223
|
+
logger.info(f'Apoxy Backend {name} created')
|
|
224
|
+
elif kind == 'HTTPRoute':
|
|
225
|
+
custom_api.create_cluster_custom_object(
|
|
226
|
+
group='gateway.apoxy.dev',
|
|
227
|
+
version='v1',
|
|
228
|
+
plural='httproutes',
|
|
229
|
+
body=spec,
|
|
230
|
+
)
|
|
231
|
+
logger.info(f'Apoxy HTTPRoute {name} created')
|
|
232
|
+
except Exception as e:
|
|
233
|
+
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
234
|
+
try:
|
|
235
|
+
# Delete first, then create
|
|
236
|
+
if kind == 'Backend':
|
|
237
|
+
custom_api.delete_cluster_custom_object(
|
|
238
|
+
group='core.apoxy.dev',
|
|
239
|
+
version='v1alpha',
|
|
240
|
+
plural='backends',
|
|
241
|
+
name=name,
|
|
242
|
+
)
|
|
243
|
+
custom_api.create_cluster_custom_object(
|
|
244
|
+
group='core.apoxy.dev',
|
|
245
|
+
version='v1alpha',
|
|
246
|
+
plural='backends',
|
|
247
|
+
body=spec,
|
|
248
|
+
)
|
|
249
|
+
elif kind == 'HTTPRoute':
|
|
250
|
+
custom_api.delete_cluster_custom_object(
|
|
251
|
+
group='gateway.apoxy.dev',
|
|
252
|
+
version='v1',
|
|
253
|
+
plural='httproutes',
|
|
254
|
+
name=name,
|
|
255
|
+
)
|
|
256
|
+
custom_api.create_cluster_custom_object(
|
|
257
|
+
group='gateway.apoxy.dev',
|
|
258
|
+
version='v1',
|
|
259
|
+
plural='httproutes',
|
|
260
|
+
body=spec,
|
|
261
|
+
)
|
|
262
|
+
logger.info(f'Apoxy {kind} {name} deleted and recreated')
|
|
263
|
+
except Exception as delete_create_error:
|
|
264
|
+
logger.error(
|
|
265
|
+
f'Failed to delete and recreate {kind} {name}: '
|
|
266
|
+
f'{delete_create_error}'
|
|
267
|
+
)
|
|
268
|
+
raise
|
|
269
|
+
elif '404' in str(e) or 'NotFound' in str(e):
|
|
270
|
+
logger.warning(f'Apoxy CRD for {kind} not found. Skipping {name}.')
|
|
271
|
+
logger.info('Make sure Apoxy is deployed and CRDs are ready.')
|
|
272
|
+
continue
|
|
273
|
+
else:
|
|
274
|
+
raise
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.error(f'Error creating Apoxy resources: {e}')
|
|
277
|
+
|
|
278
|
+
|
|
127
279
|
def create_deployment(
|
|
128
280
|
namespace: str,
|
|
129
281
|
task: 'konduktor.Task',
|
|
@@ -576,7 +728,7 @@ def delete_serving_specs(name: str, namespace: str) -> None:
|
|
|
576
728
|
delete_fn(namespace, name)
|
|
577
729
|
logger.info(f'Deleted {kind}: {name}')
|
|
578
730
|
except Exception as e:
|
|
579
|
-
logger.
|
|
731
|
+
logger.debug(f'Failed to delete {kind} {name}: {e}')
|
|
580
732
|
|
|
581
733
|
|
|
582
734
|
def _get_resource_summary(deployment) -> str:
|
|
@@ -627,7 +779,156 @@ def get_envoy_external_ip() -> Optional[str]:
|
|
|
627
779
|
return None
|
|
628
780
|
|
|
629
781
|
|
|
630
|
-
def
|
|
782
|
+
def get_unique_cluster_name_from_tunnel() -> str:
|
|
783
|
+
"""Get cluster name from the apoxy deployment command."""
|
|
784
|
+
try:
|
|
785
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
786
|
+
apps_api = kube_client.apps_api(context=context)
|
|
787
|
+
|
|
788
|
+
# Get the apoxy deployment
|
|
789
|
+
deployment = apps_api.read_namespaced_deployment(
|
|
790
|
+
name='apoxy', namespace='default'
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
# Extract cluster name from the command
|
|
794
|
+
containers = deployment.spec.template.spec.containers
|
|
795
|
+
if containers and len(containers) > 0:
|
|
796
|
+
command = containers[0].command
|
|
797
|
+
if (
|
|
798
|
+
command
|
|
799
|
+
and len(command) >= 4
|
|
800
|
+
and command[1] == 'tunnel'
|
|
801
|
+
and command[2] == 'run'
|
|
802
|
+
):
|
|
803
|
+
return command[3] # The cluster name is the 4th argument
|
|
804
|
+
|
|
805
|
+
logger.warning('Could not extract cluster name from apoxy deployment command')
|
|
806
|
+
|
|
807
|
+
except Exception as e:
|
|
808
|
+
logger.warning(f'Error getting cluster name from apoxy deployment: {e}')
|
|
809
|
+
|
|
810
|
+
return 'default'
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def get_endpoint_type_from_config() -> str:
|
|
814
|
+
"""Get the endpoint type from konduktor config.
|
|
815
|
+
|
|
816
|
+
Returns:
|
|
817
|
+
'trainy' for Apoxy endpoints (default)
|
|
818
|
+
'direct' for LoadBalancer IP endpoints
|
|
819
|
+
"""
|
|
820
|
+
try:
|
|
821
|
+
# Use the proper config system that handles KONDUKTOR_CONFIG env var
|
|
822
|
+
endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
|
|
823
|
+
return endpoint_type.lower()
|
|
824
|
+
except Exception as e:
|
|
825
|
+
logger.warning(f'Error reading endpoint config: {e}')
|
|
826
|
+
|
|
827
|
+
# Default to trainy if config not found or error
|
|
828
|
+
return 'trainy'
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _get_loadbalancer_endpoint_with_port(service_name: str) -> str:
|
|
832
|
+
"""Helper function to get LoadBalancer endpoint with port."""
|
|
833
|
+
try:
|
|
834
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
835
|
+
core_api = kube_client.core_api(context=context)
|
|
836
|
+
|
|
837
|
+
# Get the service
|
|
838
|
+
service = core_api.read_namespaced_service(
|
|
839
|
+
name=service_name, namespace='default'
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
# Check if it's LoadBalancer type
|
|
843
|
+
if service.spec.type == 'LoadBalancer':
|
|
844
|
+
ingress = service.status.load_balancer.ingress
|
|
845
|
+
if ingress and len(ingress) > 0:
|
|
846
|
+
ip = ingress[0].ip
|
|
847
|
+
if ip:
|
|
848
|
+
return f'{ip}:{service.spec.ports[0].port}'
|
|
849
|
+
|
|
850
|
+
# If not LoadBalancer or no IP, return pending
|
|
851
|
+
return '<pending>'
|
|
852
|
+
|
|
853
|
+
except Exception:
|
|
854
|
+
return '<pending>'
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def get_vllm_deployment_endpoint(force_direct: bool = False) -> str:
|
|
858
|
+
"""Get the endpoint for vLLM/Aibrix deployments based on config."""
|
|
859
|
+
if force_direct:
|
|
860
|
+
# Force direct endpoint display regardless of config
|
|
861
|
+
endpoint_type = 'direct'
|
|
862
|
+
else:
|
|
863
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
864
|
+
|
|
865
|
+
if endpoint_type == 'direct':
|
|
866
|
+
try:
|
|
867
|
+
aibrix_endpoint = get_envoy_external_ip()
|
|
868
|
+
return aibrix_endpoint or '<pending>'
|
|
869
|
+
except Exception:
|
|
870
|
+
return '<pending>'
|
|
871
|
+
else:
|
|
872
|
+
try:
|
|
873
|
+
cluster_name = get_unique_cluster_name_from_tunnel()
|
|
874
|
+
return f'{cluster_name[:-3]}.trainy.us'
|
|
875
|
+
except Exception:
|
|
876
|
+
# Fallback to direct endpoint if trainy.us not available
|
|
877
|
+
try:
|
|
878
|
+
aibrix_endpoint = get_envoy_external_ip()
|
|
879
|
+
if aibrix_endpoint:
|
|
880
|
+
# Aibrix deployments route through Envoy Gateway on port 80
|
|
881
|
+
return f'{aibrix_endpoint}'
|
|
882
|
+
except Exception:
|
|
883
|
+
pass
|
|
884
|
+
return '<pending>'
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def get_general_deployment_endpoint(
|
|
888
|
+
service_name: str, force_direct: bool = False
|
|
889
|
+
) -> str:
|
|
890
|
+
"""Get the endpoint for a general deployment based on config."""
|
|
891
|
+
if force_direct:
|
|
892
|
+
# Force direct endpoint display regardless of config
|
|
893
|
+
endpoint_type = 'direct'
|
|
894
|
+
else:
|
|
895
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
896
|
+
|
|
897
|
+
if endpoint_type == 'direct':
|
|
898
|
+
# Use LoadBalancer IP with port
|
|
899
|
+
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
900
|
+
else:
|
|
901
|
+
# Use Apoxy (trainy.us) - existing logic
|
|
902
|
+
try:
|
|
903
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
904
|
+
custom_api = kube_client.crd_api(context=context)
|
|
905
|
+
|
|
906
|
+
# Query route with label selector using the original task name
|
|
907
|
+
routes = custom_api.list_cluster_custom_object(
|
|
908
|
+
group='gateway.apoxy.dev',
|
|
909
|
+
version='v1',
|
|
910
|
+
plural='httproutes',
|
|
911
|
+
label_selector=f'task_name={service_name}',
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
# Extract endpoint_name from the route labels
|
|
915
|
+
if routes.get('items') and len(routes['items']) > 0:
|
|
916
|
+
route = routes['items'][0] # Should only be one route with this label
|
|
917
|
+
labels = route.get('metadata', {}).get('labels', {})
|
|
918
|
+
endpoint_name = labels.get('endpoint_name')
|
|
919
|
+
if endpoint_name:
|
|
920
|
+
return endpoint_name
|
|
921
|
+
|
|
922
|
+
# Fallback if no route found - try direct LoadBalancer endpoint
|
|
923
|
+
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
924
|
+
|
|
925
|
+
except Exception as e:
|
|
926
|
+
logger.warning(f'Endpoint error for general deployment {service_name}: {e}')
|
|
927
|
+
# Fallback to direct LoadBalancer endpoint on error
|
|
928
|
+
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
|
|
631
932
|
"""Display status of Konduktor Serve models."""
|
|
632
933
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
633
934
|
|
|
@@ -657,10 +958,12 @@ def show_status_table(namespace: str, all_users: bool):
|
|
|
657
958
|
return
|
|
658
959
|
|
|
659
960
|
Console().print()
|
|
660
|
-
external_ip = get_envoy_external_ip()
|
|
661
961
|
title = '[bold]KONDUKTOR SERVE[/bold]'
|
|
662
962
|
is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
|
|
663
963
|
|
|
964
|
+
# Get Aibrix endpoint once for all Aibrix deployments
|
|
965
|
+
aibrix_endpoint = get_vllm_deployment_endpoint(force_direct)
|
|
966
|
+
|
|
664
967
|
table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
|
|
665
968
|
if all_users:
|
|
666
969
|
table.add_column('User', style='magenta', no_wrap=True)
|
|
@@ -743,48 +1046,22 @@ def show_status_table(namespace: str, all_users: bool):
|
|
|
743
1046
|
else Text('PENDING', style='yellow')
|
|
744
1047
|
)
|
|
745
1048
|
|
|
746
|
-
#
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
else {}
|
|
753
|
-
)
|
|
754
|
-
or (
|
|
755
|
-
service.metadata.labels
|
|
756
|
-
if service and hasattr(service.metadata, 'labels')
|
|
757
|
-
else {}
|
|
758
|
-
)
|
|
759
|
-
or {}
|
|
760
|
-
)
|
|
761
|
-
if AIBRIX_NAME_LABEL in labels:
|
|
762
|
-
ip_str = external_ip or '<pending>'
|
|
1049
|
+
# Extract labels from deployment, service, or fallback to empty dict
|
|
1050
|
+
labels = {}
|
|
1051
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1052
|
+
labels = deployment.metadata.labels or {}
|
|
1053
|
+
elif service and hasattr(service.metadata, 'labels'):
|
|
1054
|
+
labels = service.metadata.labels or {}
|
|
763
1055
|
else:
|
|
764
|
-
|
|
765
|
-
service
|
|
766
|
-
and service.status
|
|
767
|
-
and service.status.load_balancer
|
|
768
|
-
and service.status.load_balancer.ingress
|
|
769
|
-
):
|
|
770
|
-
ing = service.status.load_balancer.ingress[0]
|
|
771
|
-
ip_str = ing.ip or ing.hostname or '<pending>'
|
|
772
|
-
|
|
773
|
-
# Port
|
|
774
|
-
port_str = ''
|
|
775
|
-
if service and service.spec and service.spec.ports:
|
|
776
|
-
port_obj = (
|
|
777
|
-
next((p for p in service.spec.ports if p.name == 'serve'), None)
|
|
778
|
-
or service.spec.ports[0]
|
|
779
|
-
)
|
|
780
|
-
if port_obj and port_obj.port:
|
|
781
|
-
port_str = str(port_obj.port)
|
|
1056
|
+
labels = {}
|
|
782
1057
|
|
|
783
|
-
|
|
1058
|
+
endpoint_str = '<pending>'
|
|
784
1059
|
if AIBRIX_NAME_LABEL in labels:
|
|
785
|
-
|
|
1060
|
+
# Aibrix deployment - use the pre-computed endpoint
|
|
1061
|
+
endpoint_str = aibrix_endpoint
|
|
786
1062
|
else:
|
|
787
|
-
|
|
1063
|
+
# General deployment
|
|
1064
|
+
endpoint_str = get_general_deployment_endpoint(name, force_direct)
|
|
788
1065
|
|
|
789
1066
|
# Replicas
|
|
790
1067
|
ready_replicas = (
|
|
@@ -176,7 +176,7 @@ class JobsetBackend(backend.Backend):
|
|
|
176
176
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
177
177
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
178
178
|
# TODO(asaiacai): need to set env variables in pod
|
|
179
|
-
jobset_utils.create_jobset(
|
|
179
|
+
jobset_response = jobset_utils.create_jobset(
|
|
180
180
|
namespace,
|
|
181
181
|
task,
|
|
182
182
|
pod_spec['kubernetes']['pod_config'],
|
|
@@ -192,9 +192,10 @@ class JobsetBackend(backend.Backend):
|
|
|
192
192
|
):
|
|
193
193
|
_wait_for_jobset_start(namespace, task.name)
|
|
194
194
|
try:
|
|
195
|
+
assert jobset_response is not None
|
|
195
196
|
log_thread = threading.Thread(
|
|
196
197
|
target=log_utils.tail_logs,
|
|
197
|
-
args=(
|
|
198
|
+
args=(jobset_response,),
|
|
198
199
|
daemon=True,
|
|
199
200
|
)
|
|
200
201
|
logger.info('streaming logs...')
|
|
@@ -575,8 +575,15 @@ def show_status_table(
|
|
|
575
575
|
]['containers'][0]['resources']['limits'] # noqa: E501
|
|
576
576
|
cpu, memory = resources['cpu'], resources['memory']
|
|
577
577
|
accelerator = job['metadata']['labels'].get(JOBSET_ACCELERATOR_LABEL, None)
|
|
578
|
+
num_accelerators = job['metadata']['labels'].get(
|
|
579
|
+
JOBSET_NUM_ACCELERATORS_LABEL, None
|
|
580
|
+
)
|
|
578
581
|
if accelerator:
|
|
579
|
-
|
|
582
|
+
if num_accelerators:
|
|
583
|
+
accelerator_with_count = f'{accelerator}:{num_accelerators}'
|
|
584
|
+
else:
|
|
585
|
+
accelerator_with_count = accelerator
|
|
586
|
+
return f'{num_pods}x({cpu}CPU, {memory}MEM, {accelerator_with_count})'
|
|
580
587
|
else:
|
|
581
588
|
return f'{num_pods}x({cpu}CPU, {memory}MEM)'
|
|
582
589
|
|
|
@@ -1852,11 +1852,21 @@ def serve_down(
|
|
|
1852
1852
|
required=False,
|
|
1853
1853
|
help='Show all deployments, including those not owned by the ' 'current user.',
|
|
1854
1854
|
)
|
|
1855
|
-
|
|
1855
|
+
@click.option(
|
|
1856
|
+
'--direct',
|
|
1857
|
+
'-d',
|
|
1858
|
+
default=False,
|
|
1859
|
+
is_flag=True,
|
|
1860
|
+
required=False,
|
|
1861
|
+
help='Force display of direct IP endpoints instead of trainy.us endpoints.',
|
|
1862
|
+
)
|
|
1863
|
+
def serve_status(all_users: bool, direct: bool):
|
|
1856
1864
|
"""Show status of deployments launched via `konduktor serve launch`."""
|
|
1857
1865
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1858
1866
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
1859
|
-
deployment_utils.show_status_table(
|
|
1867
|
+
deployment_utils.show_status_table(
|
|
1868
|
+
namespace, all_users=all_users, force_direct=direct
|
|
1869
|
+
)
|
|
1860
1870
|
|
|
1861
1871
|
|
|
1862
1872
|
def main():
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
apiVersion: v1
|
|
2
|
+
kind: Secret
|
|
3
|
+
metadata:
|
|
4
|
+
name: trainy-kubeconfig
|
|
5
|
+
namespace: default
|
|
6
|
+
type: Opaque
|
|
7
|
+
data:
|
|
8
|
+
# this gets replaced by buildkite CI secret APOXY_AUTH
|
|
9
|
+
kubeconfig.yaml: |
|
|
10
|
+
APOXY_AUTH
|
|
11
|
+
---
|
|
12
|
+
apiVersion: v1
|
|
13
|
+
kind: ServiceAccount
|
|
14
|
+
metadata:
|
|
15
|
+
name: kube-controller
|
|
16
|
+
namespace: default
|
|
17
|
+
---
|
|
18
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
19
|
+
kind: ClusterRole
|
|
20
|
+
metadata:
|
|
21
|
+
name: kube-controller-role
|
|
22
|
+
rules:
|
|
23
|
+
- apiGroups: ["apiregistration.k8s.io"]
|
|
24
|
+
resources: ["apiservices"]
|
|
25
|
+
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
|
|
26
|
+
---
|
|
27
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
28
|
+
kind: ClusterRoleBinding
|
|
29
|
+
metadata:
|
|
30
|
+
name: kube-controller-role-binding
|
|
31
|
+
roleRef:
|
|
32
|
+
apiGroup: rbac.authorization.k8s.io
|
|
33
|
+
kind: ClusterRole
|
|
34
|
+
name: kube-controller-role
|
|
35
|
+
subjects:
|
|
36
|
+
- kind: ServiceAccount
|
|
37
|
+
name: kube-controller
|
|
38
|
+
namespace: default
|
|
39
|
+
---
|
|
40
|
+
apiVersion: apps/v1
|
|
41
|
+
kind: Deployment
|
|
42
|
+
metadata:
|
|
43
|
+
name: kube-controller
|
|
44
|
+
namespace: default
|
|
45
|
+
labels:
|
|
46
|
+
app: kube-controller
|
|
47
|
+
spec:
|
|
48
|
+
replicas: 1
|
|
49
|
+
selector:
|
|
50
|
+
matchLabels:
|
|
51
|
+
app: kube-controller
|
|
52
|
+
template:
|
|
53
|
+
metadata:
|
|
54
|
+
labels:
|
|
55
|
+
app: kube-controller
|
|
56
|
+
spec:
|
|
57
|
+
containers:
|
|
58
|
+
- name: kube-controller
|
|
59
|
+
image: apoxy/kube-controller:v0.11.6
|
|
60
|
+
args:
|
|
61
|
+
- --dev
|
|
62
|
+
- --project_id=7ce458d7-e20c-443c-aeeb-dbc5663c1240
|
|
63
|
+
- --kubeconfig_path=/data/kubeconfig.yaml
|
|
64
|
+
env:
|
|
65
|
+
- name: POD_NAMESPACE
|
|
66
|
+
valueFrom:
|
|
67
|
+
fieldRef:
|
|
68
|
+
fieldPath: metadata.namespace
|
|
69
|
+
volumeMounts:
|
|
70
|
+
- name: kubeconfig-volume
|
|
71
|
+
mountPath: /data
|
|
72
|
+
readOnly: true
|
|
73
|
+
volumes:
|
|
74
|
+
- name: kubeconfig-volume
|
|
75
|
+
secret:
|
|
76
|
+
secretName: trainy-kubeconfig
|
|
77
|
+
items:
|
|
78
|
+
- key: kubeconfig.yaml
|
|
79
|
+
path: kubeconfig.yaml
|
|
80
|
+
mode: 0600
|
|
81
|
+
serviceAccountName: kube-controller
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
apiVersion: v1
|
|
85
|
+
kind: Service
|
|
86
|
+
metadata:
|
|
87
|
+
name: kube-controller
|
|
88
|
+
namespace: default
|
|
89
|
+
labels:
|
|
90
|
+
app: kube-controller
|
|
91
|
+
spec:
|
|
92
|
+
selector:
|
|
93
|
+
app: kube-controller
|
|
94
|
+
ports:
|
|
95
|
+
- name: http
|
|
96
|
+
protocol: TCP
|
|
97
|
+
port: 8443
|
|
98
|
+
targetPort: 8443
|
|
99
|
+
---
|
|
100
|
+
apiVersion: v1
|
|
101
|
+
kind: ConfigMap
|
|
102
|
+
metadata:
|
|
103
|
+
name: apoxy-config
|
|
104
|
+
namespace: default
|
|
105
|
+
data:
|
|
106
|
+
config.yaml: |
|
|
107
|
+
apiVersion: config.apoxy.dev/v1alpha1
|
|
108
|
+
kind: Config
|
|
109
|
+
currentProject: 7ce458d7-e20c-443c-aeeb-dbc5663c1240
|
|
110
|
+
projects:
|
|
111
|
+
- id: 7ce458d7-e20c-443c-aeeb-dbc5663c1240
|
|
112
|
+
kubernetesConfig:
|
|
113
|
+
kubeconfigPath: /root/kubeconfig.yaml
|
|
114
|
+
tunnel:
|
|
115
|
+
mode: userspace
|
|
116
|
+
---
|
|
117
|
+
apiVersion: apps/v1
|
|
118
|
+
kind: Deployment
|
|
119
|
+
metadata:
|
|
120
|
+
name: apoxy
|
|
121
|
+
namespace: default
|
|
122
|
+
labels:
|
|
123
|
+
app: apoxy
|
|
124
|
+
spec:
|
|
125
|
+
replicas: 1
|
|
126
|
+
selector:
|
|
127
|
+
matchLabels:
|
|
128
|
+
app: apoxy
|
|
129
|
+
template:
|
|
130
|
+
metadata:
|
|
131
|
+
labels:
|
|
132
|
+
app: apoxy
|
|
133
|
+
spec:
|
|
134
|
+
containers:
|
|
135
|
+
- name: apoxy
|
|
136
|
+
image: apoxy/apoxy:v0.11.10
|
|
137
|
+
command: ["apoxy", "tunnel", "run", "UNIQUE-TEMPNAME", "--insecure-skip-verify"]
|
|
138
|
+
volumeMounts:
|
|
139
|
+
- name: kubeconfig-volume
|
|
140
|
+
mountPath: /root/kubeconfig.yaml
|
|
141
|
+
subPath: kubeconfig.yaml
|
|
142
|
+
- name: apoxy-config-volume
|
|
143
|
+
mountPath: /root/.apoxy/config.yaml
|
|
144
|
+
subPath: config.yaml
|
|
145
|
+
volumes:
|
|
146
|
+
- name: kubeconfig-volume
|
|
147
|
+
secret:
|
|
148
|
+
secretName: trainy-kubeconfig
|
|
149
|
+
- name: apoxy-config-volume
|
|
150
|
+
configMap:
|
|
151
|
+
name: apoxy-config
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
apiVersion: core.apoxy.dev/v1alpha
|
|
2
|
+
kind: TunnelNode
|
|
3
|
+
metadata:
|
|
4
|
+
name: UNIQUE-TEMPNAME
|
|
5
|
+
spec:
|
|
6
|
+
egressGateway:
|
|
7
|
+
enabled: true
|
|
8
|
+
---
|
|
9
|
+
# Add just your backend for aibrix
|
|
10
|
+
apiVersion: core.apoxy.dev/v1alpha
|
|
11
|
+
kind: Backend
|
|
12
|
+
metadata:
|
|
13
|
+
name: UNIQUE-TEMPNAME-backend
|
|
14
|
+
spec:
|
|
15
|
+
endpoints:
|
|
16
|
+
- fqdn: envoy-aibrix-system-aibrix-eg-903790dc.envoy-gateway-system.UNIQUE-TEMPNAME.tun.apoxy.net
|
|
17
|
+
---
|
|
18
|
+
# Add just your route for aibrix
|
|
19
|
+
apiVersion: gateway.apoxy.dev/v1
|
|
20
|
+
kind: HTTPRoute
|
|
21
|
+
metadata:
|
|
22
|
+
name: UNIQUE-TEMPNAME-route
|
|
23
|
+
spec:
|
|
24
|
+
parentRefs:
|
|
25
|
+
- name: default
|
|
26
|
+
kind: Gateway
|
|
27
|
+
port: 443
|
|
28
|
+
hostnames:
|
|
29
|
+
- 'TEMPNAME.trainy.us'
|
|
30
|
+
rules:
|
|
31
|
+
- backendRefs:
|
|
32
|
+
- kind: Backend
|
|
33
|
+
name: UNIQUE-TEMPNAME-backend
|
|
34
|
+
port: 80
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
# Apoxy Backend for general deployment
|
|
3
|
+
apiVersion: core.apoxy.dev/v1alpha
|
|
4
|
+
kind: Backend
|
|
5
|
+
metadata:
|
|
6
|
+
name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
|
|
7
|
+
labels:
|
|
8
|
+
task_name: {{ name }}
|
|
9
|
+
endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
|
|
10
|
+
spec:
|
|
11
|
+
endpoints:
|
|
12
|
+
- fqdn: {{ name }}.default.{{ unique_cluster_name }}.tun.apoxy.net
|
|
13
|
+
---
|
|
14
|
+
# Apoxy Route for general deployment
|
|
15
|
+
apiVersion: gateway.apoxy.dev/v1
|
|
16
|
+
kind: HTTPRoute
|
|
17
|
+
metadata:
|
|
18
|
+
name: {{ unique_cluster_name }}-route-{{ deployment_number }}
|
|
19
|
+
labels:
|
|
20
|
+
task_name: {{ name }}
|
|
21
|
+
endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
|
|
22
|
+
spec:
|
|
23
|
+
parentRefs:
|
|
24
|
+
- name: default
|
|
25
|
+
kind: Gateway
|
|
26
|
+
port: 443
|
|
27
|
+
hostnames:
|
|
28
|
+
- '{{ cluster_name }}-{{ deployment_number }}.trainy.us'
|
|
29
|
+
rules:
|
|
30
|
+
- backendRefs:
|
|
31
|
+
- kind: Backend
|
|
32
|
+
name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
|
|
33
|
+
port: {{ ports }}
|
|
@@ -578,6 +578,19 @@ def get_config_schema():
|
|
|
578
578
|
},
|
|
579
579
|
}
|
|
580
580
|
|
|
581
|
+
serving_configs = {
|
|
582
|
+
'type': 'object',
|
|
583
|
+
'required': [],
|
|
584
|
+
'additionalProperties': False,
|
|
585
|
+
'properties': {
|
|
586
|
+
'endpoint': {
|
|
587
|
+
'type': 'string',
|
|
588
|
+
'case_insensitive_enum': ['trainy', 'direct'],
|
|
589
|
+
'default': 'trainy',
|
|
590
|
+
},
|
|
591
|
+
},
|
|
592
|
+
}
|
|
593
|
+
|
|
581
594
|
for cloud, config in cloud_configs.items():
|
|
582
595
|
if cloud == 'kubernetes':
|
|
583
596
|
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
|
|
@@ -595,6 +608,7 @@ def get_config_schema():
|
|
|
595
608
|
'logs': logs_configs,
|
|
596
609
|
'tailscale': tailscale_configs,
|
|
597
610
|
'ssh': ssh_configs,
|
|
611
|
+
'serving': serving_configs,
|
|
598
612
|
**cloud_configs,
|
|
599
613
|
},
|
|
600
614
|
}
|
{konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/LICENSE
RENAMED
|
File without changes
|
{konduktor_nightly-0.1.0.dev20250825104841 → konduktor_nightly-0.1.0.dev20250827104553}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|