skypilot-nightly 1.0.0.dev20250729__py3-none-any.whl → 1.0.0.dev20250731__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (186) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +4 -1
  3. sky/backends/cloud_vm_ray_backend.py +4 -3
  4. sky/catalog/__init__.py +3 -3
  5. sky/catalog/aws_catalog.py +12 -0
  6. sky/catalog/common.py +2 -2
  7. sky/catalog/data_fetchers/fetch_aws.py +13 -1
  8. sky/client/cli/command.py +448 -60
  9. sky/client/common.py +12 -9
  10. sky/clouds/nebius.py +1 -1
  11. sky/clouds/utils/gcp_utils.py +1 -1
  12. sky/clouds/vast.py +1 -2
  13. sky/dashboard/out/404.html +1 -1
  14. sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +11 -0
  16. sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +30 -0
  17. sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/1871-1df8b686a51f3e3a.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  22. sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/3698-7874720877646365.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +16 -0
  28. sky/dashboard/out/_next/static/chunks/4937.d6bf67771e353356.js +15 -0
  29. sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  31. sky/dashboard/out/_next/static/chunks/6135-d0e285ac5f3f2485.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  33. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  34. sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/691.6d99cbfba347cebf.js +55 -0
  36. sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  39. sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/9025.7937c16bc8623516.js +6 -0
  41. sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/9847.4c46c5e229c78704.js +30 -0
  43. sky/dashboard/out/_next/static/chunks/9984.78ee6d2c6fa4b0e8.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  46. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/_app-a67ae198457b9886.js +34 -0
  49. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +11 -0
  51. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-665fa5d96dd41d67.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/pages/config-8620d099cbef8608.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b25c109d6e41bcf4.js +11 -0
  58. sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +1 -0
  61. sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +1 -0
  62. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +1 -0
  63. sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +1 -0
  64. sky/dashboard/out/_next/static/chunks/webpack-5adfc4d4b3db6f71.js +1 -0
  65. sky/dashboard/out/_next/static/oKqDxFQ88cquF4nQGE_0w/_buildManifest.js +1 -0
  66. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  67. sky/dashboard/out/clusters/[cluster].html +1 -1
  68. sky/dashboard/out/clusters.html +1 -1
  69. sky/dashboard/out/config.html +1 -1
  70. sky/dashboard/out/index.html +1 -1
  71. sky/dashboard/out/infra/[context].html +1 -1
  72. sky/dashboard/out/infra.html +1 -1
  73. sky/dashboard/out/jobs/[job].html +1 -1
  74. sky/dashboard/out/jobs.html +1 -1
  75. sky/dashboard/out/users.html +1 -1
  76. sky/dashboard/out/volumes.html +1 -1
  77. sky/dashboard/out/workspace/new.html +1 -1
  78. sky/dashboard/out/workspaces/[name].html +1 -1
  79. sky/dashboard/out/workspaces.html +1 -1
  80. sky/data/data_utils.py +25 -0
  81. sky/data/storage.py +1219 -1775
  82. sky/global_user_state.py +18 -8
  83. sky/jobs/__init__.py +3 -0
  84. sky/jobs/client/sdk.py +80 -3
  85. sky/jobs/controller.py +76 -25
  86. sky/jobs/recovery_strategy.py +80 -34
  87. sky/jobs/scheduler.py +68 -20
  88. sky/jobs/server/core.py +228 -136
  89. sky/jobs/server/server.py +40 -0
  90. sky/jobs/state.py +164 -31
  91. sky/jobs/utils.py +144 -68
  92. sky/logs/aws.py +4 -2
  93. sky/provision/kubernetes/utils.py +6 -4
  94. sky/provision/nebius/constants.py +3 -0
  95. sky/provision/vast/instance.py +2 -1
  96. sky/provision/vast/utils.py +9 -6
  97. sky/py.typed +0 -0
  98. sky/resources.py +24 -14
  99. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  100. sky/serve/autoscalers.py +8 -0
  101. sky/serve/client/impl.py +188 -0
  102. sky/serve/client/sdk.py +12 -82
  103. sky/serve/constants.py +5 -1
  104. sky/serve/controller.py +5 -0
  105. sky/serve/replica_managers.py +112 -37
  106. sky/serve/serve_state.py +16 -6
  107. sky/serve/serve_utils.py +274 -77
  108. sky/serve/server/core.py +8 -525
  109. sky/serve/server/impl.py +709 -0
  110. sky/serve/service.py +13 -9
  111. sky/serve/service_spec.py +74 -4
  112. sky/server/constants.py +1 -1
  113. sky/server/requests/payloads.py +33 -0
  114. sky/server/requests/requests.py +18 -1
  115. sky/server/requests/serializers/decoders.py +12 -3
  116. sky/server/requests/serializers/encoders.py +13 -2
  117. sky/server/server.py +6 -1
  118. sky/skylet/events.py +9 -0
  119. sky/skypilot_config.py +24 -21
  120. sky/task.py +41 -11
  121. sky/templates/jobs-controller.yaml.j2 +3 -0
  122. sky/templates/sky-serve-controller.yaml.j2 +18 -2
  123. sky/users/server.py +1 -1
  124. sky/utils/command_runner.py +4 -2
  125. sky/utils/controller_utils.py +14 -10
  126. sky/utils/dag_utils.py +4 -2
  127. sky/utils/db/migration_utils.py +2 -4
  128. sky/utils/schemas.py +24 -19
  129. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/METADATA +1 -1
  130. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/RECORD +135 -130
  131. sky/dashboard/out/_next/static/Q2sVXboB_t7cgvntL-6nD/_buildManifest.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/1043-869d9c78bf5dd3df.js +0 -1
  133. sky/dashboard/out/_next/static/chunks/1141-e49a159c30a6c4a7.js +0 -11
  134. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +0 -30
  135. sky/dashboard/out/_next/static/chunks/1664-d65361e92b85e786.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +0 -6
  137. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +0 -1
  139. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +0 -15
  140. sky/dashboard/out/_next/static/chunks/2641.74c19c4d45a2c034.js +0 -1
  141. sky/dashboard/out/_next/static/chunks/3698-9fa11dafb5cad4a6.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/3937.d7f1c55d1916c7f2.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/4725.66125dcd9832aa5d.js +0 -1
  145. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +0 -16
  146. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +0 -15
  147. sky/dashboard/out/_next/static/chunks/5230-df791914b54d91d9.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/5739-5ea3ffa10fc884f2.js +0 -8
  149. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/616-162f3033ffcd3d31.js +0 -39
  151. sky/dashboard/out/_next/static/chunks/6601-d4a381403a8bae91.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +0 -55
  153. sky/dashboard/out/_next/static/chunks/6989-eab0e9c16b64fd9f.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +0 -41
  156. sky/dashboard/out/_next/static/chunks/8969-8e0b2055bf5dd499.js +0 -1
  157. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +0 -6
  158. sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +0 -1
  159. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +0 -30
  160. sky/dashboard/out/_next/static/chunks/9984.0460de9d3adf5582.js +0 -1
  161. sky/dashboard/out/_next/static/chunks/fd9d1056-61f2257a9cd8b32b.js +0 -1
  162. sky/dashboard/out/_next/static/chunks/framework-efc06c2733009cd3.js +0 -33
  163. sky/dashboard/out/_next/static/chunks/main-app-68c028b1bc5e1b72.js +0 -1
  164. sky/dashboard/out/_next/static/chunks/main-c0a4f1ea606d48d2.js +0 -1
  165. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +0 -34
  166. sky/dashboard/out/_next/static/chunks/pages/_error-c72a1f77a3c0be1b.js +0 -1
  167. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-2186770cc2de1623.js +0 -11
  168. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-95afb019ab85801c.js +0 -6
  169. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +0 -1
  170. sky/dashboard/out/_next/static/chunks/pages/config-a2673b256b6d416f.js +0 -1
  171. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +0 -1
  172. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +0 -1
  173. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +0 -1
  174. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dc0299ffefebcdbe.js +0 -16
  175. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +0 -1
  176. sky/dashboard/out/_next/static/chunks/pages/users-6790fcefd5487b13.js +0 -1
  177. sky/dashboard/out/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js +0 -1
  178. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js +0 -1
  179. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +0 -1
  180. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +0 -1
  181. sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +0 -1
  182. /sky/dashboard/out/_next/static/{Q2sVXboB_t7cgvntL-6nD → oKqDxFQ88cquF4nQGE_0w}/_ssgManifest.js +0 -0
  183. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/WHEEL +0 -0
  184. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/entry_points.txt +0 -0
  185. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/licenses/LICENSE +0 -0
  186. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,709 @@
1
+ """Implementation of the SkyServe core APIs."""
2
+ import re
3
+ import tempfile
4
+ from typing import Any, Dict, List, Optional, Tuple, Union
5
+
6
+ import colorama
7
+ import filelock
8
+
9
+ import sky
10
+ from sky import backends
11
+ from sky import exceptions
12
+ from sky import execution
13
+ from sky import sky_logging
14
+ from sky import skypilot_config
15
+ from sky import task as task_lib
16
+ from sky.backends import backend_utils
17
+ from sky.catalog import common as service_catalog_common
18
+ from sky.data import storage as storage_lib
19
+ from sky.serve import constants as serve_constants
20
+ from sky.serve import serve_state
21
+ from sky.serve import serve_utils
22
+ from sky.skylet import constants
23
+ from sky.utils import admin_policy_utils
24
+ from sky.utils import common
25
+ from sky.utils import common_utils
26
+ from sky.utils import controller_utils
27
+ from sky.utils import dag_utils
28
+ from sky.utils import rich_utils
29
+ from sky.utils import subprocess_utils
30
+ from sky.utils import ux_utils
31
+
32
+ logger = sky_logging.init_logger(__name__)
33
+
34
+
35
+ def _rewrite_tls_credential_paths_and_get_tls_env_vars(
36
+ service_name: str, task: 'sky.Task') -> Dict[str, Any]:
37
+ """Rewrite the paths of TLS credentials in the task.
38
+
39
+ Args:
40
+ service_name: Name of the service.
41
+ task: sky.Task to rewrite.
42
+
43
+ Returns:
44
+ The generated template variables for TLS.
45
+ """
46
+ service_spec = task.service
47
+ # Already checked by validate_service_task
48
+ assert service_spec is not None
49
+ if service_spec.tls_credential is None:
50
+ return {'use_tls': False}
51
+ remote_tls_keyfile = (
52
+ serve_utils.generate_remote_tls_keyfile_name(service_name))
53
+ remote_tls_certfile = (
54
+ serve_utils.generate_remote_tls_certfile_name(service_name))
55
+ tls_template_vars = {
56
+ 'use_tls': True,
57
+ 'remote_tls_keyfile': remote_tls_keyfile,
58
+ 'remote_tls_certfile': remote_tls_certfile,
59
+ 'local_tls_keyfile': service_spec.tls_credential.keyfile,
60
+ 'local_tls_certfile': service_spec.tls_credential.certfile,
61
+ }
62
+ service_spec.tls_credential = serve_utils.TLSCredential(
63
+ remote_tls_keyfile, remote_tls_certfile)
64
+ return tls_template_vars
65
+
66
+
67
+ def _get_service_record(
68
+ service_name: str, pool: bool,
69
+ handle: backends.CloudVmRayResourceHandle,
70
+ backend: backends.CloudVmRayBackend) -> Optional[Dict[str, Any]]:
71
+ """Get the service record."""
72
+ noun = 'pool' if pool else 'service'
73
+
74
+ code = serve_utils.ServeCodeGen.get_service_status([service_name],
75
+ pool=pool)
76
+ returncode, serve_status_payload, stderr = backend.run_on_head(
77
+ handle,
78
+ code,
79
+ require_outputs=True,
80
+ stream_logs=False,
81
+ separate_stderr=True)
82
+ try:
83
+ subprocess_utils.handle_returncode(returncode,
84
+ code,
85
+ f'Failed to get {noun} status',
86
+ stderr,
87
+ stream_logs=True)
88
+ except exceptions.CommandError as e:
89
+ raise RuntimeError(e.error_msg) from e
90
+
91
+ service_statuses = serve_utils.load_service_status(serve_status_payload)
92
+
93
+ assert len(service_statuses) <= 1, service_statuses
94
+ if not service_statuses:
95
+ return None
96
+ return service_statuses[0]
97
+
98
+
99
+ def up(
100
+ task: 'sky.Task',
101
+ service_name: Optional[str] = None,
102
+ pool: bool = False,
103
+ ) -> Tuple[str, str]:
104
+ """Spins up a service or a pool."""
105
+ if pool and not serve_utils.is_consolidation_mode():
106
+ raise ValueError(
107
+ 'Pool is only supported in consolidation mode. To fix, set '
108
+ '`serve.controller.consolidation_mode: true` in SkyPilot config.')
109
+ task.validate()
110
+ serve_utils.validate_service_task(task, pool=pool)
111
+ assert task.service is not None
112
+ assert task.service.pool == pool, 'Inconsistent pool flag.'
113
+ noun = 'pool' if pool else 'service'
114
+ capnoun = noun.capitalize()
115
+ if service_name is None:
116
+ service_name = serve_utils.generate_service_name(pool)
117
+
118
+ # The service name will be used as:
119
+ # 1. controller cluster name: 'sky-serve-controller-<service_name>'
120
+ # 2. replica cluster name: '<service_name>-<replica_id>'
121
+ # In both cases, service name shares the same regex with cluster name.
122
+ if re.fullmatch(constants.CLUSTER_NAME_VALID_REGEX, service_name) is None:
123
+ with ux_utils.print_exception_no_traceback():
124
+ raise ValueError(f'{capnoun} name {service_name!r} is invalid: '
125
+ f'ensure it is fully matched by regex (e.g., '
126
+ 'only contains lower letters, numbers and dash): '
127
+ f'{constants.CLUSTER_NAME_VALID_REGEX}')
128
+
129
+ dag = dag_utils.convert_entrypoint_to_dag(task)
130
+ dag.resolve_and_validate_volumes()
131
+ # Always apply the policy again here, even though it might have been applied
132
+ # in the CLI. This is to ensure that we apply the policy to the final DAG
133
+ # and get the mutated config.
134
+ dag, mutated_user_config = admin_policy_utils.apply(dag)
135
+ dag.pre_mount_volumes()
136
+ task = dag.tasks[0]
137
+ assert task.service is not None
138
+ if pool:
139
+ if task.run is not None:
140
+ logger.warning(f'{colorama.Fore.YELLOW}The `run` section will be '
141
+ f'ignored for pool.{colorama.Style.RESET_ALL}')
142
+ # Use dummy run script for cluster pool.
143
+ task.run = serve_constants.POOL_DUMMY_RUN_COMMAND
144
+
145
+ with rich_utils.safe_status(
146
+ ux_utils.spinner_message(f'Initializing {noun}')):
147
+ # Handle file mounts using two-hop approach when cloud storage
148
+ # unavailable
149
+ storage_clouds = (
150
+ storage_lib.get_cached_enabled_storage_cloud_names_or_refresh())
151
+ force_disable_cloud_bucket = skypilot_config.get_nested(
152
+ ('serve', 'force_disable_cloud_bucket'), False)
153
+ if storage_clouds and not force_disable_cloud_bucket:
154
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
155
+ task, task_type='serve')
156
+ local_to_controller_file_mounts = {}
157
+ else:
158
+ # Fall back to two-hop file_mount uploading when no cloud storage
159
+ if task.storage_mounts:
160
+ raise exceptions.NotSupportedError(
161
+ 'Cloud-based file_mounts are specified, but no cloud '
162
+ 'storage is available. Please specify local '
163
+ 'file_mounts only.')
164
+ local_to_controller_file_mounts = (
165
+ controller_utils.translate_local_file_mounts_to_two_hop(task))
166
+
167
+ tls_template_vars = _rewrite_tls_credential_paths_and_get_tls_env_vars(
168
+ service_name, task)
169
+
170
+ with tempfile.NamedTemporaryFile(
171
+ prefix=f'service-task-{service_name}-',
172
+ mode='w',
173
+ ) as service_file, tempfile.NamedTemporaryFile(
174
+ prefix=f'controller-task-{service_name}-',
175
+ mode='w',
176
+ ) as controller_file:
177
+ controller_name = common.SKY_SERVE_CONTROLLER_NAME
178
+ task_config = task.to_yaml_config()
179
+ common_utils.dump_yaml(service_file.name, task_config)
180
+ remote_tmp_task_yaml_path = (
181
+ serve_utils.generate_remote_tmp_task_yaml_file_name(service_name))
182
+ remote_config_yaml_path = (
183
+ serve_utils.generate_remote_config_yaml_file_name(service_name))
184
+ controller_log_file = (
185
+ serve_utils.generate_remote_controller_log_file_name(service_name))
186
+ controller_resources = controller_utils.get_controller_resources(
187
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
188
+ task_resources=task.resources)
189
+ controller_job_id = None
190
+ if serve_utils.is_consolidation_mode():
191
+ controller_job_id = 0
192
+
193
+ vars_to_fill = {
194
+ 'remote_task_yaml_path': remote_tmp_task_yaml_path,
195
+ 'local_task_yaml_path': service_file.name,
196
+ 'service_name': service_name,
197
+ 'controller_log_file': controller_log_file,
198
+ 'remote_user_config_path': remote_config_yaml_path,
199
+ 'local_to_controller_file_mounts': local_to_controller_file_mounts,
200
+ 'modified_catalogs':
201
+ service_catalog_common.get_modified_catalog_file_mounts(),
202
+ 'consolidation_mode_job_id': controller_job_id,
203
+ **tls_template_vars,
204
+ **controller_utils.shared_controller_vars_to_fill(
205
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
206
+ remote_user_config_path=remote_config_yaml_path,
207
+ local_user_config=mutated_user_config,
208
+ ),
209
+ }
210
+ common_utils.fill_template(serve_constants.CONTROLLER_TEMPLATE,
211
+ vars_to_fill,
212
+ output_path=controller_file.name)
213
+ controller_task = task_lib.Task.from_yaml(controller_file.name)
214
+ # TODO(tian): Probably run another sky.launch after we get the load
215
+ # balancer port from the controller? So we don't need to open so many
216
+ # ports here. Or, we should have a nginx traffic control to refuse
217
+ # any connection to the unregistered ports.
218
+ controller_resources = {
219
+ r.copy(ports=[serve_constants.LOAD_BALANCER_PORT_RANGE])
220
+ for r in controller_resources
221
+ }
222
+ controller_task.set_resources(controller_resources)
223
+
224
+ # # Set service_name so the backend will know to modify default ray
225
+ # task CPU usage to custom value instead of default 0.5 vCPU. We need
226
+ # to set it to a smaller value to support a larger number of services.
227
+ controller_task.service_name = service_name
228
+
229
+ # We directly submit the request to the controller and let the
230
+ # controller to check name conflict. Suppose we have multiple
231
+ # sky.serve.up() with same service name, the first one will
232
+ # successfully write its job id to controller service database;
233
+ # and for all following sky.serve.up(), the controller will throw
234
+ # an exception (name conflict detected) and exit. Therefore the
235
+ # controller job id in database could be use as an indicator of
236
+ # whether the service is already running. If the id is the same
237
+ # with the current job id, we know the service is up and running
238
+ # for the first time; otherwise it is a name conflict.
239
+ # Since the controller may be shared among multiple users, launch the
240
+ # controller with the API server's user hash.
241
+ if not serve_utils.is_consolidation_mode():
242
+ print(f'{colorama.Fore.YELLOW}Launching controller for '
243
+ f'{service_name!r}...{colorama.Style.RESET_ALL}')
244
+ with common.with_server_user():
245
+ with skypilot_config.local_active_workspace_ctx(
246
+ constants.SKYPILOT_DEFAULT_WORKSPACE):
247
+ controller_job_id, controller_handle = execution.launch(
248
+ task=controller_task,
249
+ cluster_name=controller_name,
250
+ retry_until_up=True,
251
+ _disable_controller_check=True,
252
+ )
253
+ else:
254
+ controller_handle = backend_utils.is_controller_accessible(
255
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
256
+ stopped_message='')
257
+ backend = backend_utils.get_backend_from_handle(controller_handle)
258
+ assert isinstance(backend, backends.CloudVmRayBackend)
259
+ backend.sync_file_mounts(
260
+ handle=controller_handle,
261
+ all_file_mounts=controller_task.file_mounts,
262
+ storage_mounts=controller_task.storage_mounts)
263
+ run_script = controller_task.run
264
+ assert isinstance(run_script, str)
265
+ # Manually add the env variables to the run script. Originally
266
+ # this is done in ray jobs submission but now we have to do it
267
+ # manually because there is no ray runtime on the API server.
268
+ env_cmds = [
269
+ f'export {k}={v!r}' for k, v in controller_task.envs.items()
270
+ ]
271
+ run_script = '\n'.join(env_cmds + [run_script])
272
+ # Dump script for high availability recovery.
273
+ # if controller_utils.high_availability_specified(
274
+ # controller_name):
275
+ # managed_job_state.set_ha_recovery_script(
276
+ # consolidation_mode_job_id, run_script)
277
+ backend.run_on_head(controller_handle, run_script)
278
+
279
+ style = colorama.Style
280
+ fore = colorama.Fore
281
+
282
+ assert controller_job_id is not None and controller_handle is not None
283
+ # TODO(tian): Cache endpoint locally to speedup. Endpoint won't
284
+ # change after the first time, so there is no consistency issue.
285
+ with rich_utils.safe_status(
286
+ ux_utils.spinner_message(
287
+ f'Waiting for the {noun} to register')):
288
+ # This function will check the controller job id in the database
289
+ # and return the endpoint if the job id matches. Otherwise it will
290
+ # return None.
291
+ code = serve_utils.ServeCodeGen.wait_service_registration(
292
+ service_name, controller_job_id)
293
+ backend = backend_utils.get_backend_from_handle(controller_handle)
294
+ assert isinstance(backend, backends.CloudVmRayBackend)
295
+ assert isinstance(controller_handle,
296
+ backends.CloudVmRayResourceHandle)
297
+ returncode, lb_port_payload, _ = backend.run_on_head(
298
+ controller_handle,
299
+ code,
300
+ require_outputs=True,
301
+ stream_logs=False)
302
+ try:
303
+ subprocess_utils.handle_returncode(
304
+ returncode, code, f'Failed to wait for {noun} initialization',
305
+ lb_port_payload)
306
+ except exceptions.CommandError:
307
+ if serve_utils.is_consolidation_mode():
308
+ with ux_utils.print_exception_no_traceback():
309
+ raise RuntimeError(
310
+ f'Failed to wait for {noun} initialization. '
311
+ 'Please check the logs above for more details.'
312
+ ) from None
313
+ statuses = backend.get_job_status(controller_handle,
314
+ [controller_job_id],
315
+ stream_logs=False)
316
+ controller_job_status = list(statuses.values())[0]
317
+ if controller_job_status == sky.JobStatus.PENDING:
318
+ # Max number of services reached due to vCPU constraint.
319
+ # The controller job is pending due to ray job scheduling.
320
+ # We manually cancel the job here.
321
+ backend.cancel_jobs(controller_handle, [controller_job_id])
322
+ with ux_utils.print_exception_no_traceback():
323
+ raise RuntimeError(
324
+ 'Max number of services reached. '
325
+ 'To spin up more services, please '
326
+ 'tear down some existing services.') from None
327
+ else:
328
+ # Possible cases:
329
+ # (1) name conflict;
330
+ # (2) max number of services reached due to memory
331
+ # constraint. The job will successfully run on the
332
+ # controller, but there will be an error thrown due
333
+ # to memory constraint check in the controller.
334
+ # See sky/serve/service.py for more details.
335
+ with ux_utils.print_exception_no_traceback():
336
+ raise RuntimeError(
337
+ 'Failed to spin up the service. Please '
338
+ 'check the logs above for more details.') from None
339
+ else:
340
+ lb_port = serve_utils.load_service_initialization_result(
341
+ lb_port_payload)
342
+ if not serve_utils.is_consolidation_mode():
343
+ socket_endpoint = backend_utils.get_endpoints(
344
+ controller_handle.cluster_name,
345
+ lb_port,
346
+ skip_status_check=True).get(lb_port)
347
+ else:
348
+ socket_endpoint = f'localhost:{lb_port}'
349
+ assert socket_endpoint is not None, (
350
+ 'Did not get endpoint for controller.')
351
+ # Already checked by validate_service_task
352
+ assert task.service is not None
353
+ protocol = ('http'
354
+ if task.service.tls_credential is None else 'https')
355
+ socket_endpoint = socket_endpoint.replace('https://', '').replace(
356
+ 'http://', '')
357
+ endpoint = f'{protocol}://{socket_endpoint}'
358
+
359
+ if pool:
360
+ logger.info(
361
+ f'{fore.CYAN}Pool name: '
362
+ f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
363
+ f'\n📋 Useful Commands'
364
+ f'\n{ux_utils.INDENT_SYMBOL}To submit jobs to the pool:\t'
365
+ f'{ux_utils.BOLD}sky jobs launch --pool {service_name} '
366
+ f'<run-command>{ux_utils.RESET_BOLD}'
367
+ f'\n{ux_utils.INDENT_SYMBOL}To submit multiple jobs:\t'
368
+ f'{ux_utils.BOLD}sky jobs launch --pool {service_name} '
369
+ f'--num-jobs 10 <run-command>{ux_utils.RESET_BOLD}'
370
+ f'\n{ux_utils.INDENT_SYMBOL}To check the pool status:\t'
371
+ f'{ux_utils.BOLD}sky jobs pool status {service_name}'
372
+ f'{ux_utils.RESET_BOLD}'
373
+ f'\n{ux_utils.INDENT_LAST_SYMBOL}To terminate the pool:\t'
374
+ f'{ux_utils.BOLD}sky jobs pool down {service_name}'
375
+ f'{ux_utils.RESET_BOLD}'
376
+ '\n\n' + ux_utils.finishing_message('Successfully created pool '
377
+ f'{service_name!r}.'))
378
+ else:
379
+ logger.info(
380
+ f'{fore.CYAN}Service name: '
381
+ f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
382
+ f'\n{fore.CYAN}Endpoint URL: '
383
+ f'{style.BRIGHT}{endpoint}{style.RESET_ALL}'
384
+ f'\n📋 Useful Commands'
385
+ f'\n{ux_utils.INDENT_SYMBOL}To check service status:\t'
386
+ f'{ux_utils.BOLD}sky serve status {service_name} '
387
+ f'[--endpoint]{ux_utils.RESET_BOLD}'
388
+ f'\n{ux_utils.INDENT_SYMBOL}To teardown the service:\t'
389
+ f'{ux_utils.BOLD}sky serve down {service_name}'
390
+ f'{ux_utils.RESET_BOLD}'
391
+ f'\n{ux_utils.INDENT_SYMBOL}To see replica logs:\t'
392
+ f'{ux_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
393
+ f'{ux_utils.RESET_BOLD}'
394
+ f'\n{ux_utils.INDENT_SYMBOL}To see load balancer logs:\t'
395
+ f'{ux_utils.BOLD}sky serve logs --load-balancer {service_name}'
396
+ f'{ux_utils.RESET_BOLD}'
397
+ f'\n{ux_utils.INDENT_SYMBOL}To see controller logs:\t'
398
+ f'{ux_utils.BOLD}sky serve logs --controller {service_name}'
399
+ f'{ux_utils.RESET_BOLD}'
400
+ f'\n{ux_utils.INDENT_SYMBOL}To monitor the status:\t'
401
+ f'{ux_utils.BOLD}watch -n10 sky serve status {service_name}'
402
+ f'{ux_utils.RESET_BOLD}'
403
+ f'\n{ux_utils.INDENT_LAST_SYMBOL}To send a test request:\t'
404
+ f'{ux_utils.BOLD}curl {endpoint}'
405
+ f'{ux_utils.RESET_BOLD}'
406
+ '\n\n' + ux_utils.finishing_message(
407
+ 'Service is spinning up and replicas '
408
+ 'will be ready shortly.'))
409
+ return service_name, endpoint
410
+
411
+
412
+ def update(
413
+ task: 'sky.Task',
414
+ service_name: str,
415
+ mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
416
+ pool: bool = False,
417
+ ) -> None:
418
+ """Updates an existing service or pool."""
419
+ noun = 'pool' if pool else 'service'
420
+ capnoun = noun.capitalize()
421
+ task.validate()
422
+ serve_utils.validate_service_task(task, pool=pool)
423
+
424
+ # Always apply the policy again here, even though it might have been applied
425
+ # in the CLI. This is to ensure that we apply the policy to the final DAG
426
+ # and get the mutated config.
427
+ # TODO(cblmemo,zhwu): If a user sets a new skypilot_config, the update
428
+ # will not apply the config.
429
+ dag, _ = admin_policy_utils.apply(task)
430
+ task = dag.tasks[0]
431
+ if pool:
432
+ if task.run is not None:
433
+ logger.warning(f'{colorama.Fore.YELLOW}The `run` section will be '
434
+ f'ignored for pool.{colorama.Style.RESET_ALL}')
435
+ # Use dummy run script for cluster pool.
436
+ task.run = serve_constants.POOL_DUMMY_RUN_COMMAND
437
+
438
+ assert task.service is not None
439
+ if not pool and task.service.tls_credential is not None:
440
+ logger.warning('Updating TLS keyfile and certfile is not supported. '
441
+ 'Any updates to the keyfile and certfile will not take '
442
+ 'effect. To update TLS keyfile and certfile, please '
443
+ 'tear down the service and spin up a new one.')
444
+
445
+ handle = backend_utils.is_controller_accessible(
446
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
447
+ stopped_message=
448
+ 'Service controller is stopped. There is no service to update. '
449
+ f'To spin up a new service, use {ux_utils.BOLD}'
450
+ f'sky serve up{ux_utils.RESET_BOLD}',
451
+ non_existent_message='Service does not exist. '
452
+ 'To spin up a new service, '
453
+ f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
454
+ )
455
+
456
+ backend = backend_utils.get_backend_from_handle(handle)
457
+ assert isinstance(backend, backends.CloudVmRayBackend)
458
+
459
+ service_record = _get_service_record(service_name, pool, handle, backend)
460
+
461
+ if service_record is None:
462
+ cmd = 'sky jobs pool up' if pool else 'sky serve up'
463
+ with ux_utils.print_exception_no_traceback():
464
+ raise RuntimeError(f'Cannot find {noun} {service_name!r}.'
465
+ f'To spin up a {noun}, use {ux_utils.BOLD}'
466
+ f'{cmd}{ux_utils.RESET_BOLD}')
467
+
468
+ prompt = None
469
+ if (service_record['status'] == serve_state.ServiceStatus.CONTROLLER_FAILED
470
+ ):
471
+ prompt = (f'{capnoun} {service_name!r} has a failed controller. '
472
+ f'Please clean up the {noun} and try again.')
473
+ elif (service_record['status'] == serve_state.ServiceStatus.CONTROLLER_INIT
474
+ ):
475
+ prompt = (f'{capnoun} {service_name!r} is still initializing '
476
+ 'its controller. Please try again later.')
477
+ if prompt is not None:
478
+ with ux_utils.print_exception_no_traceback():
479
+ raise RuntimeError(prompt)
480
+
481
+ if not pool:
482
+ original_lb_policy = service_record['load_balancing_policy']
483
+ assert task.service is not None, 'Service section not found.'
484
+ if original_lb_policy != task.service.load_balancing_policy:
485
+ logger.warning(
486
+ f'{colorama.Fore.YELLOW}Current load balancing policy '
487
+ f'{original_lb_policy!r} is different from the new policy '
488
+ f'{task.service.load_balancing_policy!r}. Updating the load '
489
+ 'balancing policy is not supported yet and it will be ignored. '
490
+ 'The service will continue to use the current load balancing '
491
+ f'policy.{colorama.Style.RESET_ALL}')
492
+
493
+ with rich_utils.safe_status(
494
+ ux_utils.spinner_message(f'Initializing {noun}')):
495
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
496
+ task, task_type='serve')
497
+
498
+ code = serve_utils.ServeCodeGen.add_version(service_name)
499
+ returncode, version_string_payload, stderr = backend.run_on_head(
500
+ handle,
501
+ code,
502
+ require_outputs=True,
503
+ stream_logs=False,
504
+ separate_stderr=True)
505
+ try:
506
+ subprocess_utils.handle_returncode(returncode,
507
+ code,
508
+ 'Failed to add version',
509
+ stderr,
510
+ stream_logs=True)
511
+ except exceptions.CommandError as e:
512
+ raise RuntimeError(e.error_msg) from e
513
+
514
+ version_string = serve_utils.load_version_string(version_string_payload)
515
+ try:
516
+ current_version = int(version_string)
517
+ except ValueError as e:
518
+ with ux_utils.print_exception_no_traceback():
519
+ raise ValueError(f'Failed to parse version: {version_string}; '
520
+ f'Returncode: {returncode}') from e
521
+
522
+ with tempfile.NamedTemporaryFile(
523
+ prefix=f'{service_name}-v{current_version}',
524
+ mode='w') as service_file:
525
+ task_config = task.to_yaml_config()
526
+ common_utils.dump_yaml(service_file.name, task_config)
527
+ remote_task_yaml_path = serve_utils.generate_task_yaml_file_name(
528
+ service_name, current_version, expand_user=False)
529
+
530
+ with sky_logging.silent():
531
+ backend.sync_file_mounts(handle,
532
+ {remote_task_yaml_path: service_file.name},
533
+ storage_mounts=None)
534
+
535
+ code = serve_utils.ServeCodeGen.update_service(service_name,
536
+ current_version,
537
+ mode=mode.value,
538
+ pool=pool)
539
+ returncode, _, stderr = backend.run_on_head(handle,
540
+ code,
541
+ require_outputs=True,
542
+ stream_logs=False,
543
+ separate_stderr=True)
544
+ try:
545
+ subprocess_utils.handle_returncode(returncode,
546
+ code,
547
+ f'Failed to update {noun}s',
548
+ stderr,
549
+ stream_logs=True)
550
+ except exceptions.CommandError as e:
551
+ raise RuntimeError(e.error_msg) from e
552
+
553
+ cmd = 'sky jobs pool status' if pool else 'sky serve status'
554
+ logger.info(
555
+ f'{colorama.Fore.GREEN}{capnoun} {service_name!r} update scheduled.'
556
+ f'{colorama.Style.RESET_ALL}\n'
557
+ f'Please use {ux_utils.BOLD}{cmd} {service_name} '
558
+ f'{ux_utils.RESET_BOLD}to check the latest status.')
559
+
560
+ logger.info(
561
+ ux_utils.finishing_message(
562
+ f'Successfully updated {noun} {service_name!r} '
563
+ f'to version {current_version}.'))
564
+
565
+
566
+ def apply(
567
+ task: 'sky.Task',
568
+ service_name: str,
569
+ mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
570
+ pool: bool = False,
571
+ ) -> None:
572
+ """Applies the config to the service or pool."""
573
+ with filelock.FileLock(serve_utils.get_service_filelock_path(service_name)):
574
+ try:
575
+ handle = backend_utils.is_controller_accessible(
576
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
577
+ stopped_message='')
578
+ backend = backend_utils.get_backend_from_handle(handle)
579
+ assert isinstance(backend, backends.CloudVmRayBackend)
580
+ service_record = _get_service_record(service_name, pool, handle,
581
+ backend)
582
+ if service_record is not None:
583
+ return update(task, service_name, mode, pool)
584
+ except exceptions.ClusterNotUpError:
585
+ pass
586
+ up(task, service_name, pool)
587
+
588
+
589
+ def down(
590
+ service_names: Optional[Union[str, List[str]]] = None,
591
+ all: bool = False, # pylint: disable=redefined-builtin
592
+ purge: bool = False,
593
+ pool: bool = False,
594
+ ) -> None:
595
+ """Tears down a service or pool."""
596
+ noun = 'pool' if pool else 'service'
597
+ if service_names is None:
598
+ service_names = []
599
+ if isinstance(service_names, str):
600
+ service_names = [service_names]
601
+ handle = backend_utils.is_controller_accessible(
602
+ controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
603
+ stopped_message=f'All {noun}s should have terminated.')
604
+
605
+ service_names_str = ','.join(service_names)
606
+ if sum([bool(service_names), all]) != 1:
607
+ argument_str = (f'{noun}_names={service_names_str}'
608
+ if service_names else '')
609
+ argument_str += ' all' if all else ''
610
+ raise ValueError(f'Can only specify one of {noun}_names or all. '
611
+ f'Provided {argument_str!r}.')
612
+
613
+ backend = backend_utils.get_backend_from_handle(handle)
614
+ assert isinstance(backend, backends.CloudVmRayBackend)
615
+ service_names = None if all else service_names
616
+ code = serve_utils.ServeCodeGen.terminate_services(service_names, purge,
617
+ pool)
618
+
619
+ try:
620
+ returncode, stdout, _ = backend.run_on_head(handle,
621
+ code,
622
+ require_outputs=True,
623
+ stream_logs=False)
624
+ except exceptions.FetchClusterInfoError as e:
625
+ raise RuntimeError(
626
+ 'Failed to fetch controller IP. Please refresh controller status '
627
+ f'by `sky status -r {common.SKY_SERVE_CONTROLLER_NAME}` '
628
+ 'and try again.') from e
629
+
630
+ try:
631
+ subprocess_utils.handle_returncode(returncode, code,
632
+ f'Failed to terminate {noun}',
633
+ stdout)
634
+ except exceptions.CommandError as e:
635
+ raise RuntimeError(e.error_msg) from e
636
+
637
+ logger.info(stdout)
638
+
639
+
640
+ def status(
641
+ service_names: Optional[Union[str, List[str]]] = None,
642
+ pool: bool = False,
643
+ ) -> List[Dict[str, Any]]:
644
+ """Gets statuses of services or pools."""
645
+ noun = 'pool' if pool else 'service'
646
+ if service_names is not None:
647
+ if isinstance(service_names, str):
648
+ service_names = [service_names]
649
+
650
+ try:
651
+ backend_utils.check_network_connection()
652
+ except exceptions.NetworkError as e:
653
+ with ux_utils.print_exception_no_traceback():
654
+ raise RuntimeError(f'Failed to refresh {noun}s status '
655
+ 'due to network error.') from e
656
+
657
+ controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
658
+ handle = backend_utils.is_controller_accessible(
659
+ controller=controller_type,
660
+ stopped_message=controller_type.value.default_hint_if_non_existent.
661
+ replace('service', noun))
662
+
663
+ backend = backend_utils.get_backend_from_handle(handle)
664
+ assert isinstance(backend, backends.CloudVmRayBackend)
665
+
666
+ code = serve_utils.ServeCodeGen.get_service_status(service_names, pool=pool)
667
+ returncode, serve_status_payload, stderr = backend.run_on_head(
668
+ handle,
669
+ code,
670
+ require_outputs=True,
671
+ stream_logs=False,
672
+ separate_stderr=True)
673
+
674
+ try:
675
+ subprocess_utils.handle_returncode(returncode,
676
+ code,
677
+ f'Failed to fetch {noun}s',
678
+ stderr,
679
+ stream_logs=True)
680
+ except exceptions.CommandError as e:
681
+ raise RuntimeError(e.error_msg) from e
682
+
683
+ service_records = serve_utils.load_service_status(serve_status_payload)
684
+ # Get the endpoint for each service
685
+ for service_record in service_records:
686
+ service_record['endpoint'] = None
687
+ # Pool doesn't have an endpoint.
688
+ if pool:
689
+ continue
690
+ if service_record['load_balancer_port'] is not None:
691
+ try:
692
+ lb_port = service_record['load_balancer_port']
693
+ if not serve_utils.is_consolidation_mode():
694
+ endpoint = backend_utils.get_endpoints(
695
+ cluster=common.SKY_SERVE_CONTROLLER_NAME,
696
+ port=lb_port).get(lb_port, None)
697
+ else:
698
+ endpoint = f'localhost:{lb_port}'
699
+ except exceptions.ClusterNotUpError:
700
+ pass
701
+ else:
702
+ protocol = ('https'
703
+ if service_record['tls_encrypted'] else 'http')
704
+ if endpoint is not None:
705
+ endpoint = endpoint.replace('https://',
706
+ '').replace('http://', '')
707
+ service_record['endpoint'] = f'{protocol}://{endpoint}'
708
+
709
+ return service_records