kubetorch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubetorch might be problematic. Click here for more details.

Files changed (93) hide show
  1. kubetorch/__init__.py +60 -0
  2. kubetorch/cli.py +1985 -0
  3. kubetorch/cli_utils.py +1025 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +285 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +157 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +133 -0
  30. kubetorch/resources/callables/module.py +1416 -0
  31. kubetorch/resources/callables/utils.py +174 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +261 -0
  34. kubetorch/resources/compute/compute.py +2596 -0
  35. kubetorch/resources/compute/decorators.py +139 -0
  36. kubetorch/resources/compute/rbac.py +74 -0
  37. kubetorch/resources/compute/utils.py +1114 -0
  38. kubetorch/resources/compute/websocket.py +137 -0
  39. kubetorch/resources/images/__init__.py +1 -0
  40. kubetorch/resources/images/image.py +414 -0
  41. kubetorch/resources/images/images.py +74 -0
  42. kubetorch/resources/secrets/__init__.py +2 -0
  43. kubetorch/resources/secrets/kubernetes_secrets_client.py +412 -0
  44. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  45. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  46. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  47. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  48. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  49. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  50. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  51. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  52. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  53. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  54. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  55. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  56. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  57. kubetorch/resources/secrets/provider_secrets/providers.py +93 -0
  58. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  59. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  60. kubetorch/resources/secrets/secret.py +238 -0
  61. kubetorch/resources/secrets/secret_factory.py +70 -0
  62. kubetorch/resources/secrets/utils.py +209 -0
  63. kubetorch/resources/volumes/__init__.py +0 -0
  64. kubetorch/resources/volumes/volume.py +365 -0
  65. kubetorch/servers/__init__.py +0 -0
  66. kubetorch/servers/http/__init__.py +0 -0
  67. kubetorch/servers/http/distributed_utils.py +3223 -0
  68. kubetorch/servers/http/http_client.py +730 -0
  69. kubetorch/servers/http/http_server.py +1788 -0
  70. kubetorch/servers/http/server_metrics.py +278 -0
  71. kubetorch/servers/http/utils.py +728 -0
  72. kubetorch/serving/__init__.py +0 -0
  73. kubetorch/serving/autoscaling.py +173 -0
  74. kubetorch/serving/base_service_manager.py +363 -0
  75. kubetorch/serving/constants.py +83 -0
  76. kubetorch/serving/deployment_service_manager.py +478 -0
  77. kubetorch/serving/knative_service_manager.py +519 -0
  78. kubetorch/serving/raycluster_service_manager.py +582 -0
  79. kubetorch/serving/service_manager.py +18 -0
  80. kubetorch/serving/templates/deployment_template.yaml +17 -0
  81. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  82. kubetorch/serving/templates/kt_setup_template.sh.j2 +81 -0
  83. kubetorch/serving/templates/pod_template.yaml +194 -0
  84. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  85. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  86. kubetorch/serving/templates/service_template.yaml +21 -0
  87. kubetorch/serving/templates/workerset_template.yaml +36 -0
  88. kubetorch/serving/utils.py +377 -0
  89. kubetorch/utils.py +284 -0
  90. kubetorch-0.2.0.dist-info/METADATA +121 -0
  91. kubetorch-0.2.0.dist-info/RECORD +93 -0
  92. kubetorch-0.2.0.dist-info/WHEEL +4 -0
  93. kubetorch-0.2.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,519 @@
1
+ import os
2
+ import re
3
+ import time
4
+ from datetime import datetime, timezone
5
+ from typing import List, Optional
6
+
7
+ from kubernetes import client
8
+
9
+ import kubetorch as kt
10
+ import kubetorch.serving.constants as serving_constants
11
+ from kubetorch.logger import get_logger
12
+ from kubetorch.resources.compute.utils import (
13
+ check_pod_events_for_errors,
14
+ check_pod_status_for_errors,
15
+ check_revision_for_errors,
16
+ ServiceTimeoutError,
17
+ )
18
+ from kubetorch.servers.http.utils import load_template
19
+ from kubetorch.serving.autoscaling import AutoscalingConfig
20
+ from kubetorch.serving.base_service_manager import BaseServiceManager
21
+ from kubetorch.serving.utils import nested_override, pod_is_running
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class KnativeServiceManager(BaseServiceManager):
27
+ """Service manager for Knative services with autoscaling capabilities."""
28
+
29
+ def _create_or_update_knative_service(
30
+ self,
31
+ name: str,
32
+ module_name: str,
33
+ pod_template: dict,
34
+ autoscaling_config: AutoscalingConfig = None,
35
+ gpu_annotations: dict = None,
36
+ inactivity_ttl: str = None,
37
+ custom_labels: dict = None,
38
+ custom_annotations: dict = None,
39
+ custom_template: dict = None,
40
+ scheduler_name: str = None,
41
+ queue_name: str = None,
42
+ dryrun: bool = False,
43
+ ) -> dict:
44
+ """Creates or updates a Knative service based on the provided configuration.
45
+
46
+ Returns:
47
+ Dict
48
+ """
49
+ # Clean the module name to remove any invalid characters for labels
50
+ clean_module_name = re.sub(r"[^A-Za-z0-9.-]|^[-.]|[-.]$", "", module_name)
51
+
52
+ labels = {
53
+ **self.base_labels,
54
+ serving_constants.KT_MODULE_LABEL: clean_module_name,
55
+ serving_constants.KT_SERVICE_LABEL: name,
56
+ serving_constants.KT_TEMPLATE_LABEL: "ksvc",
57
+ }
58
+
59
+ if custom_labels:
60
+ labels.update(custom_labels)
61
+
62
+ # Template labels (exclude template label - that's only for the top-level resource)
63
+ template_labels = {
64
+ **self.base_labels,
65
+ serving_constants.KT_MODULE_LABEL: clean_module_name,
66
+ serving_constants.KT_SERVICE_LABEL: name,
67
+ }
68
+
69
+ if custom_labels:
70
+ template_labels.update(custom_labels)
71
+
72
+ template_annotations = {
73
+ "networking.knative.dev/ingress.class": "kourier.ingress.networking.knative.dev",
74
+ }
75
+
76
+ annotations = {
77
+ "prometheus.io/scrape": "true",
78
+ "prometheus.io/port": "8080",
79
+ "prometheus.io/path": serving_constants.PROMETHEUS_HEALTH_ENDPOINT,
80
+ "serving.knative.dev/container-name": "kubetorch",
81
+ "serving.knative.dev/probe-path": "/health",
82
+ }
83
+ if custom_annotations:
84
+ annotations.update(custom_annotations)
85
+
86
+ if scheduler_name and queue_name:
87
+ labels["kai.scheduler/queue"] = queue_name # Useful for queries, etc
88
+ template_labels[
89
+ "kai.scheduler/queue"
90
+ ] = queue_name # Required for KAI to schedule pods
91
+ # Note: KAI wraps the Knative revision in a podgroup, expecting at least 1 pod to schedule initially
92
+ # Only set min-scale=1 if user hasn't explicitly provided a min_scale value
93
+ if autoscaling_config.min_scale is None:
94
+ template_annotations["autoscaling.knative.dev/min-scale"] = "1"
95
+
96
+ # Add autoscaling annotations (config always provided)
97
+ autoscaling_annotations = autoscaling_config.convert_to_annotations()
98
+ template_annotations.update(autoscaling_annotations)
99
+
100
+ # Add progress deadline if specified (not an autoscaling annotation)
101
+ if autoscaling_config.progress_deadline is not None:
102
+ template_annotations[
103
+ "serving.knative.dev/progress-deadline"
104
+ ] = autoscaling_config.progress_deadline
105
+
106
+ if inactivity_ttl:
107
+ annotations[serving_constants.INACTIVITY_TTL_ANNOTATION] = inactivity_ttl
108
+ logger.info(f"Configuring auto-down after idle timeout ({inactivity_ttl})")
109
+
110
+ template_annotations.update(annotations)
111
+
112
+ if gpu_annotations:
113
+ template_annotations.update(gpu_annotations)
114
+
115
+ deployment_timestamp = datetime.now(timezone.utc).isoformat()
116
+ template_annotations.update(
117
+ {"kubetorch.com/deployment_timestamp": deployment_timestamp}
118
+ )
119
+
120
+ # Set containerConcurrency based on autoscaling config
121
+ # When using concurrency-based autoscaling, set containerConcurrency to match
122
+ # the target to ensure the container's limit aligns with autoscaler expectations
123
+ template_vars = {
124
+ "name": name,
125
+ "namespace": self.namespace,
126
+ "annotations": annotations,
127
+ "template_annotations": template_annotations,
128
+ "labels": labels,
129
+ "template_labels": template_labels,
130
+ "pod_template": pod_template,
131
+ }
132
+
133
+ if autoscaling_config.concurrency is not None:
134
+ template_vars["container_concurrency"] = autoscaling_config.concurrency
135
+
136
+ service = load_template(
137
+ template_file=serving_constants.KNATIVE_SERVICE_TEMPLATE_FILE,
138
+ template_dir=os.path.join(
139
+ os.path.dirname(os.path.abspath(__file__)), "templates"
140
+ ),
141
+ **template_vars,
142
+ )
143
+
144
+ if custom_template:
145
+ nested_override(service, custom_template)
146
+
147
+ try:
148
+ kwargs = {"dry_run": "All"} if dryrun else {}
149
+ created_service: dict = self.objects_api.create_namespaced_custom_object(
150
+ group="serving.knative.dev",
151
+ version="v1",
152
+ namespace=self.namespace,
153
+ plural="services",
154
+ body=service,
155
+ **kwargs,
156
+ )
157
+
158
+ logger.info(
159
+ f"Created Knative service {name} in namespace {self.namespace}",
160
+ )
161
+ return created_service
162
+
163
+ except client.exceptions.ApiException as e:
164
+ if e.status == 409:
165
+ logger.info(f"Service {name} already exists, updating")
166
+ existing_service = self.get_knative_service(name)
167
+ return existing_service
168
+ else:
169
+ logger.error(
170
+ f"Failed to create Knative service: {str(e)}",
171
+ )
172
+ raise e
173
+
174
+ def get_knative_service(self, service_name: str) -> dict:
175
+ """Retrieve a Knative service by name."""
176
+ try:
177
+ service = self.objects_api.get_namespaced_custom_object(
178
+ group="serving.knative.dev",
179
+ version="v1",
180
+ namespace=self.namespace,
181
+ plural="services",
182
+ name=service_name,
183
+ )
184
+ return service
185
+
186
+ except client.exceptions.ApiException as e:
187
+ logger.error(f"Failed to load Knative service '{service_name}': {str(e)}")
188
+ raise
189
+
190
+ def get_deployment_timestamp_annotation(self, service_name: str) -> Optional[str]:
191
+ """Get deployment timestamp annotation for Knative services."""
192
+ try:
193
+ service = self.get_knative_service(service_name)
194
+ if service:
195
+ return (
196
+ service.get("metadata", {})
197
+ .get("annotations", {})
198
+ .get("kubetorch.com/deployment_timestamp", None)
199
+ )
200
+ except client.exceptions.ApiException:
201
+ pass
202
+ return None
203
+
204
+ def update_deployment_timestamp_annotation(
205
+ self, service_name: str, new_timestamp: str
206
+ ) -> str:
207
+ """Update deployment timestamp annotation for Knative services."""
208
+ try:
209
+ patch_body = {
210
+ "metadata": {
211
+ "annotations": {"kubetorch.com/deployment_timestamp": new_timestamp}
212
+ }
213
+ }
214
+ self.objects_api.patch_namespaced_custom_object(
215
+ group="serving.knative.dev",
216
+ version="v1",
217
+ namespace=self.namespace,
218
+ plural="services",
219
+ name=service_name,
220
+ body=patch_body,
221
+ )
222
+ return new_timestamp
223
+ except client.exceptions.ApiException as e:
224
+ logger.error(
225
+ f"Failed to update deployment timestamp for Knative service '{service_name}': {str(e)}"
226
+ )
227
+ raise
228
+
229
+ def get_knative_service_endpoint(self, service_name: str) -> str:
230
+ """Get the endpoint URL for a Knative service."""
231
+ try:
232
+ service = self.get_knative_service(service_name)
233
+
234
+ # Get the URL from the service status
235
+ status = service.get("status", {})
236
+ url = status.get("url")
237
+ if url:
238
+ return url
239
+
240
+ # Fallback to constructing URL
241
+ return f"http://{service_name}.{self.namespace}.svc.cluster.local"
242
+
243
+ except Exception as e:
244
+ logger.warning(f"Could not get Knative service URL for {service_name}: {e}")
245
+ return f"http://{service_name}.{self.namespace}.svc.cluster.local"
246
+
247
+ def create_or_update_service(
248
+ self,
249
+ service_name: str,
250
+ module_name: str,
251
+ pod_template: dict,
252
+ autoscaling_config: AutoscalingConfig = None,
253
+ gpu_annotations: dict = None,
254
+ inactivity_ttl: str = None,
255
+ custom_labels: dict = None,
256
+ custom_annotations: dict = None,
257
+ custom_template: dict = None,
258
+ scheduler_name: str = None,
259
+ queue_name: str = None,
260
+ dryrun: bool = False,
261
+ **kwargs, # Ignore deployment-specific args like replicas
262
+ ):
263
+ """
264
+ Creates a Knative service with autoscaling capabilities.
265
+ """
266
+ logger.info(
267
+ f"Deploying Kubetorch autoscaling (Knative) service with name: {service_name}"
268
+ )
269
+ try:
270
+ created_service = self._create_or_update_knative_service(
271
+ name=service_name,
272
+ pod_template=pod_template,
273
+ module_name=module_name,
274
+ autoscaling_config=autoscaling_config,
275
+ gpu_annotations=gpu_annotations,
276
+ inactivity_ttl=inactivity_ttl,
277
+ custom_labels=custom_labels,
278
+ custom_annotations=custom_annotations,
279
+ custom_template=custom_template,
280
+ scheduler_name=scheduler_name,
281
+ queue_name=queue_name,
282
+ dryrun=dryrun,
283
+ )
284
+ return created_service
285
+ except Exception as e:
286
+ logger.error(f"Failed to launch new Knative service: {str(e)}")
287
+ raise e
288
+
289
+ def get_endpoint(self, service_name: str) -> str:
290
+ """Get the endpoint URL for a Knative service."""
291
+ return self.get_knative_service_endpoint(service_name)
292
+
293
+ def get_pods_for_service(self, service_name: str, **kwargs) -> List[client.V1Pod]:
294
+ """Get all pods associated with this Knative service."""
295
+ return self.get_pods_for_service_static(
296
+ service_name=service_name,
297
+ namespace=self.namespace,
298
+ core_api=self.core_api,
299
+ )
300
+
301
+ def _status_condition_ready(self, status: dict) -> bool:
302
+ """Check if service status conditions indicate ready state."""
303
+ conditions = status.get("conditions", [])
304
+ for condition in conditions:
305
+ if condition.get("type") == "Ready":
306
+ return condition.get("status") == "True"
307
+ return False
308
+
309
+ def check_service_ready(
310
+ self,
311
+ service_name: str,
312
+ launch_timeout: int,
313
+ objects_api: client.CustomObjectsApi = None,
314
+ core_api: client.CoreV1Api = None,
315
+ queue_name: str = None,
316
+ scheduler_name: str = None,
317
+ **kwargs,
318
+ ) -> bool:
319
+ """Checks if the Knative service is ready to start serving requests.
320
+
321
+ Core checks:
322
+ - Service status and conditions
323
+ - Revision status and conditions
324
+ - Pod status and conditions
325
+ - Autoscaling conditions (min-scale, etc.)
326
+
327
+ Common failure scenarios handled:
328
+ - Image pull failures or delays
329
+ - Container initialization and setup (pip installs, etc.)
330
+ - User-defined image setup steps
331
+ - Node provisioning delays or failures
332
+ - Service health check failures
333
+ - Container terminations
334
+ - Autoscaling not meeting minimum requirements
335
+
336
+ Note:
337
+ This method checks all pods associated with the service, not just the first one.
338
+ Service check will fail fast only for truly unrecoverable conditions (like missing images or autoscaling
339
+ not being triggered or enabled).
340
+
341
+ Unless there is a clear reason to terminate, will wait for the full specified timeout
342
+ to allow autoscaling and node provisioning to work (where relevant).
343
+
344
+ Args:
345
+ service_name: Name of the Knative service
346
+ launch_timeout: Timeout in seconds to wait for readiness
347
+ objects_api: Objects API instance (uses self.objects_api if None)
348
+ core_api: Core API instance (uses self.core_api if None)
349
+ queue_name: Queue name for scheduling checks
350
+ scheduler_name: Scheduler name for scheduling checks
351
+ **kwargs: Additional arguments
352
+
353
+ Returns:
354
+ True if service is ready
355
+
356
+ Raises:
357
+ ServiceTimeoutError: If service doesn't become ready within timeout
358
+ QueueUnschedulableError: If pods can't be scheduled due to queue issues
359
+ ResourceNotAvailableError: If required resources aren't available
360
+ """
361
+ if objects_api is None:
362
+ objects_api = self.objects_api
363
+ if core_api is None:
364
+ core_api = self.core_api
365
+
366
+ sleep_interval = 2
367
+ start_time = time.time()
368
+
369
+ # Instead of spamming logs with each iteration, only log once
370
+ displayed_msgs = {
371
+ "service_status": False,
372
+ "waiting_for_pods": None,
373
+ "revision_status": False,
374
+ "service_readiness": False,
375
+ "autoscaling": False,
376
+ }
377
+
378
+ logger.info(
379
+ f"Checking service {service_name} pod readiness (timeout: {launch_timeout} seconds)"
380
+ )
381
+ iteration = 0
382
+ while (time.time() - start_time) < launch_timeout:
383
+ iteration += 1
384
+ try:
385
+ service = objects_api.get_namespaced_custom_object(
386
+ group="serving.knative.dev",
387
+ version="v1",
388
+ namespace=self.namespace,
389
+ plural="services",
390
+ name=service_name,
391
+ )
392
+ status = service.get("status")
393
+ if not status:
394
+ if not displayed_msgs["service_status"]:
395
+ logger.info(f"Waiting for service {service_name} status")
396
+ displayed_msgs["service_status"] = True
397
+ time.sleep(sleep_interval)
398
+ continue
399
+
400
+ for cond in status.get("conditions", []):
401
+ if cond.get("type") == "Ready" and cond.get("reason") == "NotOwned":
402
+ raise kt.KnativeServiceConflictError(
403
+ f"Knative service '{service_name}' cannot become ready: {cond.get('message')}"
404
+ )
405
+
406
+ # Check autoscaling conditions
407
+ if not displayed_msgs["autoscaling"]:
408
+ logger.info("Checking autoscaling conditions")
409
+ displayed_msgs["autoscaling"] = True
410
+
411
+ # Get the min-scale from annotations
412
+ min_scale = 0
413
+ if (
414
+ service.get("spec", {})
415
+ .get("template", {})
416
+ .get("metadata", {})
417
+ .get("annotations", {})
418
+ ):
419
+ min_scale_str = service["spec"]["template"]["metadata"][
420
+ "annotations"
421
+ ].get("autoscaling.knative.dev/min-scale", "0")
422
+ min_scale = int(min_scale_str)
423
+
424
+ if min_scale == 0 and self._status_condition_ready(status):
425
+ # Service is ready and allowed to scale to zero
426
+ logger.info(f"Service {service_name} is already marked as ready")
427
+ return True
428
+
429
+ if min_scale == 0:
430
+ # Always need at least one pod
431
+ min_scale = 1
432
+
433
+ # Get current number of Running pods
434
+ pods = self.get_pods_for_service(service_name)
435
+ running_pods = [p for p in pods if pod_is_running(p)]
436
+ running_pods_count = len(running_pods)
437
+
438
+ if running_pods_count < min_scale:
439
+ for pod in pods:
440
+ # Check for image pull errors in container status
441
+ check_pod_status_for_errors(pod, queue_name, scheduler_name)
442
+
443
+ # Check pod events separately from the core API
444
+ check_pod_events_for_errors(pod, self.namespace, core_api)
445
+
446
+ if (
447
+ displayed_msgs["waiting_for_pods"] is None
448
+ or displayed_msgs["waiting_for_pods"] != running_pods_count
449
+ ):
450
+ logger.info(
451
+ f"Waiting for minimum scale ({min_scale} pods), currently have {running_pods_count}"
452
+ )
453
+ displayed_msgs["waiting_for_pods"] = running_pods_count
454
+ else:
455
+ if not displayed_msgs["service_readiness"]:
456
+ logger.info(
457
+ f"Min {min_scale} pod{'s are' if min_scale > 1 else ' is'} ready, waiting for service to be marked as ready"
458
+ )
459
+ displayed_msgs["service_readiness"] = True
460
+
461
+ if self._status_condition_ready(status):
462
+ logger.info(f"Service {service_name} is now ready")
463
+ return True
464
+
465
+ if not displayed_msgs["revision_status"]:
466
+ logger.info("Checking service revision status")
467
+ displayed_msgs["revision_status"] = True
468
+
469
+ latest_revision = status.get("latestCreatedRevisionName")
470
+ if latest_revision:
471
+ check_revision_for_errors(
472
+ latest_revision, self.namespace, objects_api
473
+ )
474
+
475
+ except client.exceptions.ApiException:
476
+ raise
477
+
478
+ if iteration % 10 == 0:
479
+ elapsed = int(time.time() - start_time)
480
+ remaining = max(0, int(launch_timeout - elapsed))
481
+ logger.info(
482
+ f"Service is not yet marked as ready "
483
+ f"(elapsed: {elapsed}s, remaining: {remaining}s)"
484
+ )
485
+
486
+ time.sleep(sleep_interval)
487
+
488
+ raise ServiceTimeoutError(
489
+ f"Service {service_name} did not become ready within {launch_timeout} seconds. "
490
+ "To update the timeout, set the `launch_timeout` parameter in the Compute class, or set the "
491
+ "environment variable `KT_LAUNCH_TIMEOUT`."
492
+ )
493
+
494
+ def teardown_service(self, service_name: str, console=None) -> bool:
495
+ """Teardown Knative service and associated resources.
496
+
497
+ Args:
498
+ service_name: Name of the Knative service to teardown
499
+ console: Optional Rich console for output
500
+
501
+ Returns:
502
+ True if teardown was successful, False otherwise
503
+ """
504
+ from kubetorch.resources.compute.utils import delete_service
505
+
506
+ try:
507
+ # Delete the Knative service
508
+ delete_service(
509
+ custom_api=self.objects_api,
510
+ name=service_name,
511
+ namespace=self.namespace,
512
+ console=console,
513
+ )
514
+
515
+ return True
516
+
517
+ except Exception as e:
518
+ logger.error(f"Failed to teardown Knative service {service_name}: {e}")
519
+ return False