kubetorch 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kubetorch might be problematic. Click here for more details.
- kubetorch/__init__.py +60 -0
- kubetorch/cli.py +1985 -0
- kubetorch/cli_utils.py +1025 -0
- kubetorch/config.py +453 -0
- kubetorch/constants.py +18 -0
- kubetorch/docs/Makefile +18 -0
- kubetorch/docs/__init__.py +0 -0
- kubetorch/docs/_ext/json_globaltoc.py +42 -0
- kubetorch/docs/api/cli.rst +10 -0
- kubetorch/docs/api/python/app.rst +21 -0
- kubetorch/docs/api/python/cls.rst +19 -0
- kubetorch/docs/api/python/compute.rst +25 -0
- kubetorch/docs/api/python/config.rst +11 -0
- kubetorch/docs/api/python/fn.rst +19 -0
- kubetorch/docs/api/python/image.rst +14 -0
- kubetorch/docs/api/python/secret.rst +18 -0
- kubetorch/docs/api/python/volumes.rst +13 -0
- kubetorch/docs/api/python.rst +101 -0
- kubetorch/docs/conf.py +69 -0
- kubetorch/docs/index.rst +20 -0
- kubetorch/docs/requirements.txt +5 -0
- kubetorch/globals.py +285 -0
- kubetorch/logger.py +59 -0
- kubetorch/resources/__init__.py +0 -0
- kubetorch/resources/callables/__init__.py +0 -0
- kubetorch/resources/callables/cls/__init__.py +0 -0
- kubetorch/resources/callables/cls/cls.py +157 -0
- kubetorch/resources/callables/fn/__init__.py +0 -0
- kubetorch/resources/callables/fn/fn.py +133 -0
- kubetorch/resources/callables/module.py +1416 -0
- kubetorch/resources/callables/utils.py +174 -0
- kubetorch/resources/compute/__init__.py +0 -0
- kubetorch/resources/compute/app.py +261 -0
- kubetorch/resources/compute/compute.py +2596 -0
- kubetorch/resources/compute/decorators.py +139 -0
- kubetorch/resources/compute/rbac.py +74 -0
- kubetorch/resources/compute/utils.py +1114 -0
- kubetorch/resources/compute/websocket.py +137 -0
- kubetorch/resources/images/__init__.py +1 -0
- kubetorch/resources/images/image.py +414 -0
- kubetorch/resources/images/images.py +74 -0
- kubetorch/resources/secrets/__init__.py +2 -0
- kubetorch/resources/secrets/kubernetes_secrets_client.py +412 -0
- kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
- kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
- kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
- kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
- kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/providers.py +93 -0
- kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
- kubetorch/resources/secrets/secret.py +238 -0
- kubetorch/resources/secrets/secret_factory.py +70 -0
- kubetorch/resources/secrets/utils.py +209 -0
- kubetorch/resources/volumes/__init__.py +0 -0
- kubetorch/resources/volumes/volume.py +365 -0
- kubetorch/servers/__init__.py +0 -0
- kubetorch/servers/http/__init__.py +0 -0
- kubetorch/servers/http/distributed_utils.py +3223 -0
- kubetorch/servers/http/http_client.py +730 -0
- kubetorch/servers/http/http_server.py +1788 -0
- kubetorch/servers/http/server_metrics.py +278 -0
- kubetorch/servers/http/utils.py +728 -0
- kubetorch/serving/__init__.py +0 -0
- kubetorch/serving/autoscaling.py +173 -0
- kubetorch/serving/base_service_manager.py +363 -0
- kubetorch/serving/constants.py +83 -0
- kubetorch/serving/deployment_service_manager.py +478 -0
- kubetorch/serving/knative_service_manager.py +519 -0
- kubetorch/serving/raycluster_service_manager.py +582 -0
- kubetorch/serving/service_manager.py +18 -0
- kubetorch/serving/templates/deployment_template.yaml +17 -0
- kubetorch/serving/templates/knative_service_template.yaml +19 -0
- kubetorch/serving/templates/kt_setup_template.sh.j2 +81 -0
- kubetorch/serving/templates/pod_template.yaml +194 -0
- kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
- kubetorch/serving/templates/raycluster_template.yaml +35 -0
- kubetorch/serving/templates/service_template.yaml +21 -0
- kubetorch/serving/templates/workerset_template.yaml +36 -0
- kubetorch/serving/utils.py +377 -0
- kubetorch/utils.py +284 -0
- kubetorch-0.2.0.dist-info/METADATA +121 -0
- kubetorch-0.2.0.dist-info/RECORD +93 -0
- kubetorch-0.2.0.dist-info/WHEEL +4 -0
- kubetorch-0.2.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Volumes
|
|
2
|
+
=======
|
|
3
|
+
|
|
4
|
+
Kubetorch provides persistent storage through the ``Volume`` class, which abstracts Kubernetes PersistentVolumeClaims
|
|
5
|
+
while maintaining the flexibility to work with any storage backend your cluster supports.
|
|
6
|
+
|
|
7
|
+
Volume Class
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autoclass:: kubetorch.Volume
|
|
11
|
+
:members:
|
|
12
|
+
|
|
13
|
+
.. automethod:: __init__
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
Python API
|
|
2
|
+
==========
|
|
3
|
+
|
|
4
|
+
The API Reference provides detailed information about the Kubetorch Python API and CLI commands.
|
|
5
|
+
|
|
6
|
+
If you are just getting started with Kubetorch or looking for use cases and examples, we recommend first checking out:
|
|
7
|
+
|
|
8
|
+
* `Guides <https://www.run.house/kubetorch/introduction>`_: quick start, high level concepts, developer guides, and more
|
|
9
|
+
|
|
10
|
+
* `Examples <https://www.run.house/examples>`_: end-to-end examples using Kubetorch
|
|
11
|
+
|
|
12
|
+
Compute
|
|
13
|
+
-------
|
|
14
|
+
|
|
15
|
+
The ``Compute`` class allows you to define the resources and environment needed for your workloads,
|
|
16
|
+
while controlling how the compute is managed and scaled based on demand. This includes specifying
|
|
17
|
+
hardware requirements that can be either generic or tailored to your specific Kubernetes infrastructure and setup.
|
|
18
|
+
|
|
19
|
+
.. toctree::
|
|
20
|
+
:maxdepth: 1
|
|
21
|
+
|
|
22
|
+
python/compute
|
|
23
|
+
|
|
24
|
+
Image
|
|
25
|
+
------
|
|
26
|
+
|
|
27
|
+
The ``Image`` class enables you to define and customize the containerized environment for your workloads.
|
|
28
|
+
You can specify a pre-built Docker image as your foundation and layer on additional setup steps that run
|
|
29
|
+
at launch time, eliminating the need to rebuild images for every code change.
|
|
30
|
+
|
|
31
|
+
.. toctree::
|
|
32
|
+
:maxdepth: 1
|
|
33
|
+
|
|
34
|
+
python/image
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Module
|
|
38
|
+
------
|
|
39
|
+
|
|
40
|
+
The ``Fn`` and ``Cls`` classes are wrappers around your locally defined Python functions and classes, respectively.
|
|
41
|
+
Once wrapped, these objects can be sent ``.to(compute)``, which launches a service on your cluster (taking into
|
|
42
|
+
account the compute requirements) and syncs over the necessary files to run the function remotely.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
.. toctree::
|
|
46
|
+
:maxdepth: 1
|
|
47
|
+
|
|
48
|
+
python/fn
|
|
49
|
+
|
|
50
|
+
.. toctree::
|
|
51
|
+
:maxdepth: 1
|
|
52
|
+
|
|
53
|
+
python/cls
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
App
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
The ``App`` class wraps a Python CLI command or script, enabling you to run entire applications remotely on the cluster.
|
|
60
|
+
Unlike ``Fn`` and ``Cls`` which wrap individual functions or classes, ``App`` deploys and executes complete Python files
|
|
61
|
+
with all their dependencies, making it ideal for training scripts, data processing pipelines, or even web applications.
|
|
62
|
+
|
|
63
|
+
.. toctree::
|
|
64
|
+
:maxdepth: 1
|
|
65
|
+
|
|
66
|
+
python/app
|
|
67
|
+
|
|
68
|
+
Secrets
|
|
69
|
+
-------
|
|
70
|
+
|
|
71
|
+
Secrets such as provider keys and environment variables can be set when defining compute. These are set at launch time
|
|
72
|
+
and accessible during the scope of your program.
|
|
73
|
+
|
|
74
|
+
.. toctree::
|
|
75
|
+
:maxdepth: 1
|
|
76
|
+
|
|
77
|
+
python/secret
|
|
78
|
+
|
|
79
|
+
Config
|
|
80
|
+
------
|
|
81
|
+
|
|
82
|
+
Kubetorch uses a local configuration file (stored at ``~/.kt/config.yaml``) to allow you to set global defaults for
|
|
83
|
+
your services. You can update the config file manually, use the ``kt config`` command, or set them as environment variables.
|
|
84
|
+
You can also override defaults directly in the resource constructor for a specific service.
|
|
85
|
+
|
|
86
|
+
.. toctree::
|
|
87
|
+
:maxdepth: 1
|
|
88
|
+
|
|
89
|
+
python/config
|
|
90
|
+
|
|
91
|
+
Volumes
|
|
92
|
+
-------
|
|
93
|
+
|
|
94
|
+
The ``Volume`` class enables persistent storage for your workloads, allowing data to persist beyond individual pod lifecycles.
|
|
95
|
+
Kubetorch automatically manages Kubernetes PersistentVolumeClaims (PVCs) while providing a simple Python interface for
|
|
96
|
+
storage configuration.
|
|
97
|
+
|
|
98
|
+
.. toctree::
|
|
99
|
+
:maxdepth: 1
|
|
100
|
+
|
|
101
|
+
python/volumes
|
kubetorch/docs/conf.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Configuration file for the Sphinx documentation builder.
|
|
2
|
+
|
|
3
|
+
# -- Path setup --------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# If extensions (or modules to document with autodoc) are in another directory,
|
|
6
|
+
# add these directories to sys.path here. If the directory is relative to the
|
|
7
|
+
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
8
|
+
#
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
sys.path.insert(0, os.path.abspath("."))
|
|
13
|
+
sys.path.insert(0, os.path.abspath("../"))
|
|
14
|
+
|
|
15
|
+
# -- Project information -----------------------------------------------------
|
|
16
|
+
|
|
17
|
+
project = "Kubetorch"
|
|
18
|
+
copyright = "Runhouse Inc"
|
|
19
|
+
author = "the Runhouse team 🏃♀️🏠"
|
|
20
|
+
|
|
21
|
+
# The full version, including alpha/beta/rc tags
|
|
22
|
+
import kubetorch
|
|
23
|
+
|
|
24
|
+
release = kubetorch.__version__
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# -- General configuration ---------------------------------------------------
|
|
28
|
+
|
|
29
|
+
extensions = [
|
|
30
|
+
"sphinx.ext.autodoc",
|
|
31
|
+
"sphinx.ext.napoleon",
|
|
32
|
+
"myst_parser",
|
|
33
|
+
"_ext.json_globaltoc",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
autodoc_typehints_format = "short"
|
|
37
|
+
autodoc_default_flags = ["members", "show-inheritance"]
|
|
38
|
+
autodoc_member_order = "bysource"
|
|
39
|
+
|
|
40
|
+
templates_path = ["_templates"]
|
|
41
|
+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
|
42
|
+
|
|
43
|
+
markdown_http_base = "/docs/guide"
|
|
44
|
+
markdown_anchor_sections = True
|
|
45
|
+
|
|
46
|
+
if tags.has("json"):
|
|
47
|
+
# Force simpler output format (helps CLI output)
|
|
48
|
+
autodoc_typehints = "signature" # "description"
|
|
49
|
+
napoleon_use_param = True
|
|
50
|
+
napoleon_use_rtype = True
|
|
51
|
+
|
|
52
|
+
html_link_suffix = ""
|
|
53
|
+
json_baseurl = "docs/"
|
|
54
|
+
|
|
55
|
+
# -- Options for HTML output -------------------------------------------------
|
|
56
|
+
|
|
57
|
+
if not tags.has("json"):
|
|
58
|
+
html_theme = "sphinx_book_theme"
|
|
59
|
+
|
|
60
|
+
html_title = "Kubetorch"
|
|
61
|
+
html_theme_options = {
|
|
62
|
+
"path_to_docs": "docs/",
|
|
63
|
+
"home_page_in_toc": True,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# -- Disable "View Source" links and code display ----------------------------
|
|
67
|
+
|
|
68
|
+
html_show_sourcelink = False # hides "View Source" link
|
|
69
|
+
html_copy_source = False # prevents .html files from containing source code
|
kubetorch/docs/index.rst
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Kubetorch API Reference
|
|
2
|
+
=======================
|
|
3
|
+
|
|
4
|
+
This document contains only API references.
|
|
5
|
+
|
|
6
|
+
For further information and examples using Kubetorch, please refer to:
|
|
7
|
+
|
|
8
|
+
* `Installation Guide <https://www.run.house/kubetorch/installation>`_: basic and advanced installation guides
|
|
9
|
+
|
|
10
|
+
* `Developer Guide <https://www.run.house/kubetorch/introduction>`_: quick start, high level concepts,
|
|
11
|
+
developer guides, and more
|
|
12
|
+
|
|
13
|
+
* `Examples <https://www.run.house/examples>`_: end-to-end examples using Kubetorch
|
|
14
|
+
|
|
15
|
+
.. toctree::
|
|
16
|
+
:maxdepth: 1
|
|
17
|
+
:caption: API Reference
|
|
18
|
+
|
|
19
|
+
api/python
|
|
20
|
+
api/cli
|
kubetorch/globals.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import atexit
|
|
3
|
+
import os
|
|
4
|
+
import signal
|
|
5
|
+
import socket
|
|
6
|
+
import subprocess
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Dict, Optional
|
|
12
|
+
|
|
13
|
+
from kubetorch.config import KubetorchConfig
|
|
14
|
+
from kubetorch.serving.constants import (
|
|
15
|
+
DEFAULT_NGINX_HEALTH_ENDPOINT,
|
|
16
|
+
DEFAULT_NGINX_PORT,
|
|
17
|
+
LOCAL_NGINX_PORT,
|
|
18
|
+
NGINX_GATEWAY_PROXY,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# For use in `kt deploy` decorators
|
|
22
|
+
disable_decorators = False
|
|
23
|
+
|
|
24
|
+
config = KubetorchConfig()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class PFHandle:
|
|
29
|
+
process: subprocess.Popen
|
|
30
|
+
port: int
|
|
31
|
+
base_url: str # "http://localhost:<port>"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# cache a single pf per service (currently just a single NGINX proxy)
|
|
35
|
+
_port_forwards: Dict[str, PFHandle] = {}
|
|
36
|
+
# Use both a threading lock and an asyncio lock for different contexts
|
|
37
|
+
_pf_lock = threading.Lock()
|
|
38
|
+
# Async lock must be created lazily when event loop is available
|
|
39
|
+
_pf_async_lock: Optional[asyncio.Lock] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _kill(proc: Optional[subprocess.Popen]) -> None:
|
|
43
|
+
if not proc:
|
|
44
|
+
return
|
|
45
|
+
try:
|
|
46
|
+
if proc.poll() is None:
|
|
47
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
|
48
|
+
proc.wait(timeout=3)
|
|
49
|
+
except Exception:
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _cleanup_port_forwards():
|
|
54
|
+
with _pf_lock:
|
|
55
|
+
for h in list(_port_forwards.values()):
|
|
56
|
+
_kill(h.process)
|
|
57
|
+
_port_forwards.clear()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _ensure_pf(
|
|
61
|
+
service_name: str, namespace: str, remote_port: int, health_endpoint: str
|
|
62
|
+
) -> PFHandle:
|
|
63
|
+
from kubetorch.resources.compute.utils import find_available_port
|
|
64
|
+
from kubetorch.serving.utils import wait_for_port_forward
|
|
65
|
+
|
|
66
|
+
# Fast path: check without lock first
|
|
67
|
+
h = _port_forwards.get(service_name)
|
|
68
|
+
if h and h.process.poll() is None:
|
|
69
|
+
return h
|
|
70
|
+
|
|
71
|
+
# Slow path: need to create port forward
|
|
72
|
+
with _pf_lock:
|
|
73
|
+
# Double-check pattern: check again inside the lock
|
|
74
|
+
h = _port_forwards.get(service_name)
|
|
75
|
+
if h and h.process.poll() is None:
|
|
76
|
+
return h
|
|
77
|
+
|
|
78
|
+
# Now create the port forward while holding the lock
|
|
79
|
+
# This ensures only one thread creates the port forward
|
|
80
|
+
local_port = find_available_port(LOCAL_NGINX_PORT)
|
|
81
|
+
|
|
82
|
+
cmd = [
|
|
83
|
+
"kubectl",
|
|
84
|
+
"port-forward",
|
|
85
|
+
f"svc/{service_name}",
|
|
86
|
+
f"{local_port}:{remote_port}",
|
|
87
|
+
"--namespace",
|
|
88
|
+
namespace,
|
|
89
|
+
]
|
|
90
|
+
proc = subprocess.Popen(
|
|
91
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=True
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# If it dies immediately, surface stderr (much clearer than a generic timeout)
|
|
95
|
+
time.sleep(0.3)
|
|
96
|
+
|
|
97
|
+
if proc.poll() is not None:
|
|
98
|
+
err = (proc.stderr.read() or b"").decode(errors="ignore")
|
|
99
|
+
raise RuntimeError(
|
|
100
|
+
f"kubectl port-forward exited (rc={proc.returncode}): {err.strip()}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if health_endpoint:
|
|
104
|
+
cluster_config = wait_for_port_forward(
|
|
105
|
+
proc, local_port, health_endpoint=health_endpoint
|
|
106
|
+
)
|
|
107
|
+
if isinstance(cluster_config, dict):
|
|
108
|
+
config.cluster_config = cluster_config
|
|
109
|
+
config.write()
|
|
110
|
+
else:
|
|
111
|
+
# Minimal TCP wait (no HTTP probe)
|
|
112
|
+
deadline = time.time() + 10
|
|
113
|
+
ok = False
|
|
114
|
+
while time.time() < deadline:
|
|
115
|
+
try:
|
|
116
|
+
with socket.create_connection(
|
|
117
|
+
("127.0.0.1", local_port), timeout=0.5
|
|
118
|
+
):
|
|
119
|
+
ok = True
|
|
120
|
+
break
|
|
121
|
+
except OSError:
|
|
122
|
+
time.sleep(0.1)
|
|
123
|
+
if not ok:
|
|
124
|
+
raise TimeoutError("Timeout waiting for port forward to be ready")
|
|
125
|
+
|
|
126
|
+
time.sleep(0.2) # tiny grace
|
|
127
|
+
|
|
128
|
+
h = PFHandle(
|
|
129
|
+
process=proc, port=local_port, base_url=f"http://localhost:{local_port}"
|
|
130
|
+
)
|
|
131
|
+
# Store in cache while still holding the lock
|
|
132
|
+
_port_forwards[service_name] = h
|
|
133
|
+
return h
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
async def _ensure_pf_async(
|
|
137
|
+
service_name: str, namespace: str, remote_port: int, health_endpoint: str
|
|
138
|
+
) -> PFHandle:
|
|
139
|
+
"""Async version of _ensure_pf for use in async contexts."""
|
|
140
|
+
from kubetorch.resources.compute.utils import find_available_port
|
|
141
|
+
from kubetorch.serving.utils import wait_for_port_forward
|
|
142
|
+
|
|
143
|
+
# Fast path: check without lock first
|
|
144
|
+
h = _port_forwards.get(service_name)
|
|
145
|
+
if h and h.process.poll() is None:
|
|
146
|
+
return h
|
|
147
|
+
|
|
148
|
+
# Ensure async lock is created (lazy initialization)
|
|
149
|
+
global _pf_async_lock
|
|
150
|
+
if _pf_async_lock is None:
|
|
151
|
+
_pf_async_lock = asyncio.Lock()
|
|
152
|
+
|
|
153
|
+
# Slow path: need to create port forward
|
|
154
|
+
async with _pf_async_lock:
|
|
155
|
+
# Double-check pattern: check again inside the lock
|
|
156
|
+
h = _port_forwards.get(service_name)
|
|
157
|
+
if h and h.process.poll() is None:
|
|
158
|
+
return h
|
|
159
|
+
|
|
160
|
+
# Create port forward in a thread to avoid blocking the event loop
|
|
161
|
+
def create_port_forward():
|
|
162
|
+
local_port = find_available_port(LOCAL_NGINX_PORT)
|
|
163
|
+
|
|
164
|
+
cmd = [
|
|
165
|
+
"kubectl",
|
|
166
|
+
"port-forward",
|
|
167
|
+
f"svc/{service_name}",
|
|
168
|
+
f"{local_port}:{remote_port}",
|
|
169
|
+
"--namespace",
|
|
170
|
+
namespace,
|
|
171
|
+
]
|
|
172
|
+
proc = subprocess.Popen(
|
|
173
|
+
cmd,
|
|
174
|
+
stdout=subprocess.PIPE,
|
|
175
|
+
stderr=subprocess.PIPE,
|
|
176
|
+
start_new_session=True,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# If it dies immediately, surface stderr
|
|
180
|
+
time.sleep(0.3)
|
|
181
|
+
|
|
182
|
+
if proc.poll() is not None:
|
|
183
|
+
err = (proc.stderr.read() or b"").decode(errors="ignore")
|
|
184
|
+
raise RuntimeError(
|
|
185
|
+
f"kubectl port-forward exited (rc={proc.returncode}): {err.strip()}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if health_endpoint:
|
|
189
|
+
wait_for_port_forward(proc, local_port, health_endpoint=health_endpoint)
|
|
190
|
+
else:
|
|
191
|
+
# Minimal TCP wait (no HTTP probe)
|
|
192
|
+
deadline = time.time() + 10
|
|
193
|
+
ok = False
|
|
194
|
+
while time.time() < deadline:
|
|
195
|
+
try:
|
|
196
|
+
with socket.create_connection(
|
|
197
|
+
("127.0.0.1", local_port), timeout=0.5
|
|
198
|
+
):
|
|
199
|
+
ok = True
|
|
200
|
+
break
|
|
201
|
+
except OSError:
|
|
202
|
+
time.sleep(0.1)
|
|
203
|
+
if not ok:
|
|
204
|
+
raise TimeoutError("Timeout waiting for port forward to be ready")
|
|
205
|
+
|
|
206
|
+
time.sleep(0.2) # tiny grace
|
|
207
|
+
|
|
208
|
+
return PFHandle(
|
|
209
|
+
process=proc, port=local_port, base_url=f"http://localhost:{local_port}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Run the blocking operation in a thread
|
|
213
|
+
loop = asyncio.get_event_loop()
|
|
214
|
+
h = await loop.run_in_executor(None, create_port_forward)
|
|
215
|
+
|
|
216
|
+
# Store in cache while still holding the lock
|
|
217
|
+
_port_forwards[service_name] = h
|
|
218
|
+
return h
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def service_url(
|
|
222
|
+
service_name: str = NGINX_GATEWAY_PROXY,
|
|
223
|
+
namespace: str = config.install_namespace,
|
|
224
|
+
remote_port: int = DEFAULT_NGINX_PORT,
|
|
225
|
+
health_endpoint: str = DEFAULT_NGINX_HEALTH_ENDPOINT,
|
|
226
|
+
) -> str:
|
|
227
|
+
"""
|
|
228
|
+
Return a URL to reach a Kubernetes Service.
|
|
229
|
+
- If running in-cluster: {scheme}://{svc}.{ns}.svc.cluster.local:{remote_port}{path}
|
|
230
|
+
- Else: ensure a single kubectl port-forward (cached) and return http://localhost:<port>{path}
|
|
231
|
+
"""
|
|
232
|
+
from kubetorch.servers.http.utils import is_running_in_kubernetes
|
|
233
|
+
|
|
234
|
+
if is_running_in_kubernetes():
|
|
235
|
+
return f"http://{service_name}.{namespace}.svc.cluster.local:{remote_port}"
|
|
236
|
+
|
|
237
|
+
# Ingress URL into the cluster from outside
|
|
238
|
+
if config.api_url:
|
|
239
|
+
return config.api_url
|
|
240
|
+
|
|
241
|
+
h = _ensure_pf(service_name, namespace, remote_port, health_endpoint)
|
|
242
|
+
|
|
243
|
+
# if the process died between creation and use, recreate once
|
|
244
|
+
if h.process.poll() is not None:
|
|
245
|
+
with _pf_lock:
|
|
246
|
+
_port_forwards.pop(service_name, None)
|
|
247
|
+
h = _ensure_pf(service_name, namespace, remote_port, health_endpoint)
|
|
248
|
+
return h.base_url
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
async def service_url_async(
|
|
252
|
+
service_name: str = NGINX_GATEWAY_PROXY,
|
|
253
|
+
namespace: str = config.install_namespace,
|
|
254
|
+
remote_port: int = DEFAULT_NGINX_PORT,
|
|
255
|
+
health_endpoint: str = DEFAULT_NGINX_HEALTH_ENDPOINT,
|
|
256
|
+
) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Async version of service_url for use in async contexts.
|
|
259
|
+
Return a URL to reach a Kubernetes Service.
|
|
260
|
+
- If running in-cluster: {scheme}://{svc}.{ns}.svc.cluster.local:{remote_port}{path}
|
|
261
|
+
- Else: ensure a single kubectl port-forward (cached) and return http://localhost:<port>{path}
|
|
262
|
+
"""
|
|
263
|
+
from kubetorch.servers.http.utils import is_running_in_kubernetes
|
|
264
|
+
|
|
265
|
+
if is_running_in_kubernetes():
|
|
266
|
+
return f"http://{service_name}.{namespace}.svc.cluster.local:{remote_port}"
|
|
267
|
+
|
|
268
|
+
h = await _ensure_pf_async(service_name, namespace, remote_port, health_endpoint)
|
|
269
|
+
|
|
270
|
+
# if the process died between creation and use, recreate once
|
|
271
|
+
if h.process.poll() is not None:
|
|
272
|
+
# Ensure async lock is created
|
|
273
|
+
global _pf_async_lock
|
|
274
|
+
if _pf_async_lock is None:
|
|
275
|
+
_pf_async_lock = asyncio.Lock()
|
|
276
|
+
|
|
277
|
+
async with _pf_async_lock:
|
|
278
|
+
_port_forwards.pop(service_name, None)
|
|
279
|
+
h = await _ensure_pf_async(
|
|
280
|
+
service_name, namespace, remote_port, health_endpoint
|
|
281
|
+
)
|
|
282
|
+
return h.base_url
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
atexit.register(_cleanup_port_forwards)
|
kubetorch/logger.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_logger(name) -> logging.Logger:
|
|
7
|
+
"""
|
|
8
|
+
Creates and returns a logger with the specified name.
|
|
9
|
+
|
|
10
|
+
Ensures a universal logger configuration across the codebase with the format:
|
|
11
|
+
"levelname - asctime - filename:lineno - message"
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
name (str): Name of the logger. Defaults to None, which gets the root logger.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
logging.Logger: Configured logger instance.
|
|
18
|
+
"""
|
|
19
|
+
# Create or retrieve the logger
|
|
20
|
+
return logging.getLogger(name)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NewLineFormatter(logging.Formatter):
|
|
24
|
+
"""Adds logging prefix to newlines to align multi-line messages."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, fmt, datefmt=None):
|
|
27
|
+
logging.Formatter.__init__(self, fmt, datefmt)
|
|
28
|
+
|
|
29
|
+
def format(self, record):
|
|
30
|
+
msg = logging.Formatter.format(self, record)
|
|
31
|
+
if record.message != "":
|
|
32
|
+
parts = msg.partition(record.message)
|
|
33
|
+
msg = msg.replace("\n", "\r\n" + parts[0])
|
|
34
|
+
return msg
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
root_logger = logging.getLogger("kubetorch")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def init_logger(logger):
|
|
41
|
+
level = os.getenv("KT_LOG_LEVEL") or "INFO"
|
|
42
|
+
level = getattr(logging, level.upper())
|
|
43
|
+
logger.setLevel(level)
|
|
44
|
+
for handler in logger.handlers:
|
|
45
|
+
logger.removeHandler(handler)
|
|
46
|
+
|
|
47
|
+
if not logger.handlers:
|
|
48
|
+
formatter = NewLineFormatter(
|
|
49
|
+
"%(levelname)s | %(asctime)s | %(name)s:%(lineno)d | %(message)s",
|
|
50
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
51
|
+
)
|
|
52
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
53
|
+
handler.setFormatter(formatter)
|
|
54
|
+
logger.addHandler(handler)
|
|
55
|
+
|
|
56
|
+
logger.propagate = False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
init_logger(root_logger)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|