skypilot-nightly 1.0.0.dev20250718__py3-none-any.whl → 1.0.0.dev20250723__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (160) hide show
  1. sky/__init__.py +4 -2
  2. sky/admin_policy.py +11 -4
  3. sky/backends/backend_utils.py +50 -24
  4. sky/backends/cloud_vm_ray_backend.py +41 -38
  5. sky/catalog/__init__.py +3 -1
  6. sky/catalog/aws_catalog.py +8 -5
  7. sky/catalog/azure_catalog.py +8 -5
  8. sky/catalog/common.py +8 -2
  9. sky/catalog/cudo_catalog.py +5 -2
  10. sky/catalog/do_catalog.py +4 -1
  11. sky/catalog/fluidstack_catalog.py +5 -2
  12. sky/catalog/gcp_catalog.py +8 -5
  13. sky/catalog/hyperbolic_catalog.py +5 -2
  14. sky/catalog/ibm_catalog.py +8 -5
  15. sky/catalog/lambda_catalog.py +8 -5
  16. sky/catalog/nebius_catalog.py +8 -5
  17. sky/catalog/oci_catalog.py +8 -5
  18. sky/catalog/paperspace_catalog.py +4 -1
  19. sky/catalog/runpod_catalog.py +5 -2
  20. sky/catalog/scp_catalog.py +8 -5
  21. sky/catalog/vast_catalog.py +5 -2
  22. sky/catalog/vsphere_catalog.py +4 -1
  23. sky/client/cli/command.py +63 -25
  24. sky/client/sdk.py +61 -11
  25. sky/clouds/aws.py +12 -7
  26. sky/clouds/azure.py +12 -7
  27. sky/clouds/cloud.py +9 -8
  28. sky/clouds/cudo.py +13 -7
  29. sky/clouds/do.py +12 -7
  30. sky/clouds/fluidstack.py +11 -6
  31. sky/clouds/gcp.py +12 -7
  32. sky/clouds/hyperbolic.py +11 -6
  33. sky/clouds/ibm.py +11 -6
  34. sky/clouds/kubernetes.py +7 -3
  35. sky/clouds/lambda_cloud.py +11 -6
  36. sky/clouds/nebius.py +14 -12
  37. sky/clouds/oci.py +12 -7
  38. sky/clouds/paperspace.py +12 -7
  39. sky/clouds/runpod.py +12 -7
  40. sky/clouds/scp.py +11 -6
  41. sky/clouds/vast.py +14 -8
  42. sky/clouds/vsphere.py +11 -6
  43. sky/core.py +6 -1
  44. sky/dashboard/out/404.html +1 -1
  45. sky/dashboard/out/_next/static/chunks/{1043-734e57d2b27dfe5d.js → 1043-869d9c78bf5dd3df.js} +1 -1
  46. sky/dashboard/out/_next/static/chunks/{1141-d8c6404a7c6fffe6.js → 1141-e49a159c30a6c4a7.js} +1 -1
  47. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +30 -0
  48. sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +6 -0
  49. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +15 -0
  52. sky/dashboard/out/_next/static/chunks/{2641.35edc9ccaeaad9e3.js → 2641.74c19c4d45a2c034.js} +1 -1
  53. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/{4725.4c849b1e05c8e9ad.js → 4725.66125dcd9832aa5d.js} +1 -1
  55. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +16 -0
  56. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +15 -0
  57. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +1 -0
  58. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +55 -0
  59. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +41 -0
  61. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +6 -0
  62. sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +1 -0
  63. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +30 -0
  64. sky/dashboard/out/_next/static/chunks/{9984.2b5e3fa69171bff9.js → 9984.0460de9d3adf5582.js} +1 -1
  65. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +34 -0
  66. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa406155b4223d0d.js → [job]-2186770cc2de1623.js} +2 -2
  67. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0c37ee1ac5f3474d.js → [cluster]-95afb019ab85801c.js} +1 -1
  68. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +1 -0
  69. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +1 -0
  70. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +1 -0
  71. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-c5b357bfd9502fbe.js → [job]-dc0299ffefebcdbe.js} +2 -2
  73. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +1 -0
  74. sky/dashboard/out/_next/static/chunks/pages/{users-19e98664bdd61643.js → users-6790fcefd5487b13.js} +1 -1
  75. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +1 -0
  76. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +1 -0
  77. sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +1 -0
  78. sky/dashboard/out/_next/static/css/b3227360726f12eb.css +3 -0
  79. sky/dashboard/out/_next/static/mym3Ciwp-zqU7ZpOLGnrW/_buildManifest.js +1 -0
  80. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  81. sky/dashboard/out/clusters/[cluster].html +1 -1
  82. sky/dashboard/out/clusters.html +1 -1
  83. sky/dashboard/out/config.html +1 -1
  84. sky/dashboard/out/index.html +1 -1
  85. sky/dashboard/out/infra/[context].html +1 -1
  86. sky/dashboard/out/infra.html +1 -1
  87. sky/dashboard/out/jobs/[job].html +1 -1
  88. sky/dashboard/out/jobs.html +1 -1
  89. sky/dashboard/out/users.html +1 -1
  90. sky/dashboard/out/volumes.html +1 -1
  91. sky/dashboard/out/workspace/new.html +1 -1
  92. sky/dashboard/out/workspaces/[name].html +1 -1
  93. sky/dashboard/out/workspaces.html +1 -1
  94. sky/data/mounting_utils.py +93 -32
  95. sky/global_user_state.py +12 -143
  96. sky/jobs/state.py +9 -88
  97. sky/jobs/utils.py +28 -13
  98. sky/provision/nebius/utils.py +3 -6
  99. sky/schemas/db/README +4 -0
  100. sky/schemas/db/env.py +90 -0
  101. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  102. sky/schemas/db/script.py.mako +28 -0
  103. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  104. sky/serve/client/sdk.py +6 -2
  105. sky/serve/controller.py +7 -3
  106. sky/serve/serve_state.py +1 -1
  107. sky/serve/serve_utils.py +171 -75
  108. sky/serve/server/core.py +17 -6
  109. sky/server/common.py +4 -3
  110. sky/server/requests/payloads.py +2 -0
  111. sky/server/requests/requests.py +1 -1
  112. sky/setup_files/MANIFEST.in +2 -0
  113. sky/setup_files/alembic.ini +148 -0
  114. sky/setup_files/dependencies.py +1 -0
  115. sky/skylet/configs.py +1 -1
  116. sky/skylet/constants.py +4 -0
  117. sky/skylet/job_lib.py +1 -1
  118. sky/skypilot_config.py +1 -1
  119. sky/users/permission.py +1 -1
  120. sky/utils/common_utils.py +85 -3
  121. sky/utils/config_utils.py +15 -0
  122. sky/utils/db/__init__.py +0 -0
  123. sky/utils/{db_utils.py → db/db_utils.py} +59 -0
  124. sky/utils/db/migration_utils.py +93 -0
  125. sky/utils/locks.py +319 -0
  126. sky/utils/schemas.py +38 -34
  127. sky/utils/timeline.py +41 -0
  128. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/METADATA +2 -1
  129. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/RECORD +134 -125
  130. sky/dashboard/out/_next/static/FUjweqdImyeYhMYFON-Se/_buildManifest.js +0 -1
  131. sky/dashboard/out/_next/static/chunks/1746.27d40aedc22bd2d6.js +0 -60
  132. sky/dashboard/out/_next/static/chunks/1871-76491ac174a95278.js +0 -6
  133. sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +0 -1
  134. sky/dashboard/out/_next/static/chunks/2875.c24c6d57dc82e436.js +0 -25
  135. sky/dashboard/out/_next/static/chunks/3785.95b94f18aaec7233.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/3947-b059261d6fa88a1f.js +0 -35
  137. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/4869.bdd42f14b51d1d6f.js +0 -16
  139. sky/dashboard/out/_next/static/chunks/5491.918ffed0ba7a5294.js +0 -20
  140. sky/dashboard/out/_next/static/chunks/6990-dcb411b566e64cde.js +0 -1
  141. sky/dashboard/out/_next/static/chunks/804-9f5e98ce84d46bdd.js +0 -21
  142. sky/dashboard/out/_next/static/chunks/9025.133e9ba5c780afeb.js +0 -6
  143. sky/dashboard/out/_next/static/chunks/938-6a9ffdaa21eee969.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/9470-b6f6a35283863a6f.js +0 -1
  145. sky/dashboard/out/_next/static/chunks/9847.46e613d000c55859.js +0 -30
  146. sky/dashboard/out/_next/static/chunks/pages/_app-771a40cde532309b.js +0 -20
  147. sky/dashboard/out/_next/static/chunks/pages/clusters-102d169e87913ba1.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +0 -1
  149. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +0 -1
  151. sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +0 -1
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces-a1e43d9ef51a9cea.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/webpack-6b0575ea521af4f3.js +0 -1
  155. sky/dashboard/out/_next/static/css/219887b94512388c.css +0 -3
  156. /sky/dashboard/out/_next/static/{FUjweqdImyeYhMYFON-Se → mym3Ciwp-zqU7ZpOLGnrW}/_ssgManifest.js +0 -0
  157. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/WHEEL +0 -0
  158. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/entry_points.txt +0 -0
  159. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/licenses/LICENSE +0 -0
  160. {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/top_level.txt +0 -0
@@ -56,10 +56,12 @@ def get_vcpus_mem_from_instance_type(
56
56
  return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
57
57
 
58
58
 
59
- def get_default_instance_type(
60
- cpus: Optional[str] = None,
61
- memory: Optional[str] = None,
62
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
59
+ def get_default_instance_type(cpus: Optional[str] = None,
60
+ memory: Optional[str] = None,
61
+ disk_tier: Optional[
62
+ resources_utils.DiskTier] = None,
63
+ region: Optional[str] = None,
64
+ zone: Optional[str] = None) -> Optional[str]:
63
65
  del disk_tier # unused
64
66
  if cpus is None and memory is None:
65
67
  cpus = f'{_DEFAULT_NUM_VCPUS}+'
@@ -68,7 +70,8 @@ def get_default_instance_type(
68
70
  else:
69
71
  memory_gb_or_ratio = memory
70
72
  return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
71
- memory_gb_or_ratio)
73
+ memory_gb_or_ratio,
74
+ region, zone)
72
75
 
73
76
 
74
77
  def get_accelerators_from_instance_type(
@@ -51,12 +51,15 @@ def get_vcpus_mem_from_instance_type(
51
51
  return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
52
52
 
53
53
 
54
- def get_default_instance_type(
55
- cpus: Optional[str] = None,
56
- memory: Optional[str] = None,
57
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
54
+ def get_default_instance_type(cpus: Optional[str] = None,
55
+ memory: Optional[str] = None,
56
+ disk_tier: Optional[
57
+ resources_utils.DiskTier] = None,
58
+ region: Optional[str] = None,
59
+ zone: Optional[str] = None) -> Optional[str]:
58
60
  del disk_tier # unused
59
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
61
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
62
+ zone)
60
63
 
61
64
 
62
65
  def get_accelerators_from_instance_type(
@@ -101,10 +101,12 @@ def get_hourly_cost(instance_type: str,
101
101
  region, zone)
102
102
 
103
103
 
104
- def get_default_instance_type(
105
- cpus: Optional[str] = None,
106
- memory: Optional[str] = None,
107
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
104
+ def get_default_instance_type(cpus: Optional[str] = None,
105
+ memory: Optional[str] = None,
106
+ disk_tier: Optional[
107
+ resources_utils.DiskTier] = None,
108
+ region: Optional[str] = None,
109
+ zone: Optional[str] = None) -> Optional[str]:
108
110
  if cpus is None:
109
111
  cpus = f'{oci_utils.oci_config.DEFAULT_NUM_VCPUS}+'
110
112
 
@@ -127,7 +129,8 @@ def get_default_instance_type(
127
129
 
128
130
  logger.debug(f'# get_default_instance_type: {df}')
129
131
  return common.get_instance_type_for_cpus_mem_impl(df, cpus,
130
- memory_gb_or_ratio)
132
+ memory_gb_or_ratio,
133
+ region, zone)
131
134
 
132
135
 
133
136
  def get_accelerators_from_instance_type(
@@ -52,11 +52,14 @@ def get_default_instance_type(
52
52
  cpus: Optional[str] = None,
53
53
  memory: Optional[str] = None,
54
54
  disk_tier: Optional[str] = None,
55
+ region: Optional[str] = None,
56
+ zone: Optional[str] = None,
55
57
  ) -> Optional[str]:
56
58
  # NOTE: After expanding catalog to multiple entries, you may
57
59
  # want to specify a default instance type or family.
58
60
  del disk_tier # unused
59
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
61
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
62
+ zone)
60
63
 
61
64
 
62
65
  def get_accelerators_from_instance_type(
@@ -41,11 +41,14 @@ def get_vcpus_mem_from_instance_type(
41
41
 
42
42
  def get_default_instance_type(cpus: Optional[str] = None,
43
43
  memory: Optional[str] = None,
44
- disk_tier: Optional[str] = None) -> Optional[str]:
44
+ disk_tier: Optional[str] = None,
45
+ region: Optional[str] = None,
46
+ zone: Optional[str] = None) -> Optional[str]:
45
47
  del disk_tier # RunPod does not support disk tiers.
46
48
  # NOTE: After expanding catalog to multiple entries, you may
47
49
  # want to specify a default instance type or family.
48
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
50
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
51
+ zone)
49
52
 
50
53
 
51
54
  def get_accelerators_from_instance_type(
@@ -51,10 +51,12 @@ def get_vcpus_mem_from_instance_type(
51
51
  return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
52
52
 
53
53
 
54
- def get_default_instance_type(
55
- cpus: Optional[str] = None,
56
- memory: Optional[str] = None,
57
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
54
+ def get_default_instance_type(cpus: Optional[str] = None,
55
+ memory: Optional[str] = None,
56
+ disk_tier: Optional[
57
+ resources_utils.DiskTier] = None,
58
+ region: Optional[str] = None,
59
+ zone: Optional[str] = None) -> Optional[str]:
58
60
  del disk_tier # unused
59
61
  if cpus is None and memory is None:
60
62
  cpus = str(_DEFAULT_NUM_VCPUS)
@@ -63,7 +65,8 @@ def get_default_instance_type(
63
65
  else:
64
66
  memory_gb_or_ratio = memory
65
67
  return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
66
- memory_gb_or_ratio)
68
+ memory_gb_or_ratio,
69
+ region, zone)
67
70
 
68
71
 
69
72
  def get_accelerators_from_instance_type(
@@ -48,11 +48,14 @@ def get_vcpus_mem_from_instance_type(
48
48
 
49
49
  def get_default_instance_type(cpus: Optional[str] = None,
50
50
  memory: Optional[str] = None,
51
- disk_tier: Optional[str] = None) -> Optional[str]:
51
+ disk_tier: Optional[str] = None,
52
+ region: Optional[str] = None,
53
+ zone: Optional[str] = None) -> Optional[str]:
52
54
  del disk_tier
53
55
  # NOTE: After expanding catalog to multiple entries, you may
54
56
  # want to specify a default instance type or family.
55
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
57
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
58
+ zone)
56
59
 
57
60
 
58
61
  def get_accelerators_from_instance_type(
@@ -72,6 +72,8 @@ def get_default_instance_type(
72
72
  cpus: Optional[str] = None,
73
73
  memory: Optional[str] = None,
74
74
  disk_tier: Optional[str] = None,
75
+ region: Optional[str] = None,
76
+ zone: Optional[str] = None,
75
77
  ) -> Optional[str]:
76
78
  del disk_tier # unused
77
79
  if cpus is None and memory is None:
@@ -81,7 +83,8 @@ def get_default_instance_type(
81
83
  else:
82
84
  memory_gb_or_ratio = memory
83
85
  return common.get_instance_type_for_cpus_mem_impl(_get_df(), cpus,
84
- memory_gb_or_ratio)
86
+ memory_gb_or_ratio,
87
+ region, zone)
85
88
 
86
89
 
87
90
  def get_accelerators_from_instance_type(
sky/client/cli/command.py CHANGED
@@ -3023,17 +3023,18 @@ def _down_or_stop_clusters(
3023
3023
  click.echo(common_utils.format_exception(e))
3024
3024
  else:
3025
3025
  raise
3026
- confirm_str = 'delete'
3027
- input_prefix = ('Since --purge is set, errors will be ignored '
3028
- 'and controller will be removed from '
3029
- 'local state.\n') if purge else ''
3030
- user_input = click.prompt(
3031
- f'{input_prefix}'
3032
- f'To proceed, please type {colorama.Style.BRIGHT}'
3033
- f'{confirm_str!r}{colorama.Style.RESET_ALL}',
3034
- type=str)
3035
- if user_input != confirm_str:
3036
- raise click.Abort()
3026
+ if not purge:
3027
+ confirm_str = 'delete'
3028
+ user_input = click.prompt(
3029
+ f'To proceed, please type {colorama.Style.BRIGHT}'
3030
+ f'{confirm_str!r}{colorama.Style.RESET_ALL}',
3031
+ type=str)
3032
+ if user_input != confirm_str:
3033
+ raise click.Abort()
3034
+ else:
3035
+ click.echo('Since --purge is set, errors will be ignored '
3036
+ 'and controller will be removed from '
3037
+ 'local state.\nSkipping confirmation.')
3037
3038
  no_confirm = True
3038
3039
  names += controllers
3039
3040
 
@@ -5114,6 +5115,12 @@ def serve_down(
5114
5115
  default=False,
5115
5116
  help='Sync down logs to the local machine. Can be combined with '
5116
5117
  '--controller, --load-balancer, or a replica ID to narrow scope.')
5118
+ @click.option(
5119
+ '--tail',
5120
+ default=None,
5121
+ type=int,
5122
+ help='The number of lines to display from the end of the log file. '
5123
+ 'Default is None, which means print all lines.')
5117
5124
  @click.argument('service_name', required=True, type=str)
5118
5125
  @click.argument('replica_ids', required=False, type=int, nargs=-1)
5119
5126
  @usage_lib.entrypoint
@@ -5126,6 +5133,7 @@ def serve_logs(
5126
5133
  load_balancer: bool,
5127
5134
  replica_ids: Tuple[int, ...],
5128
5135
  sync_down: bool,
5136
+ tail: Optional[int],
5129
5137
  ):
5130
5138
  """Tail or sync down logs of a service.
5131
5139
 
@@ -5145,12 +5153,26 @@ def serve_logs(
5145
5153
  # Tail the logs of replica 1
5146
5154
  sky serve logs [SERVICE_NAME] 1
5147
5155
  \b
5156
+ # Show the last 100 lines of the controller logs
5157
+ sky serve logs --controller --tail 100 [SERVICE_NAME]
5158
+ \b
5148
5159
  # Sync down all logs of the service (controller, LB, all replicas)
5149
5160
  sky serve logs [SERVICE_NAME] --sync-down
5150
5161
  \b
5151
5162
  # Sync down controller logs and logs for replicas 1 and 3
5152
5163
  sky serve logs [SERVICE_NAME] 1 3 --controller --sync-down
5153
5164
  """
5165
+ if tail is not None:
5166
+ if tail < 0:
5167
+ raise click.UsageError('--tail must be a non-negative integer.')
5168
+ # TODO(arda): We could add ability to tail and follow logs together.
5169
+ if follow:
5170
+ follow = False
5171
+ logger.warning(
5172
+ f'{colorama.Fore.YELLOW}'
5173
+ '--tail and --follow cannot be used together. '
5174
+ f'Changed the mode to --no-follow.{colorama.Style.RESET_ALL}')
5175
+
5154
5176
  chosen_components: Set[serve_lib.ServiceComponent] = set()
5155
5177
  if controller:
5156
5178
  chosen_components.add(serve_lib.ServiceComponent.CONTROLLER)
@@ -5185,7 +5207,8 @@ def serve_logs(
5185
5207
  serve_lib.sync_down_logs(service_name,
5186
5208
  local_dir=str(log_dir),
5187
5209
  targets=targets_to_sync,
5188
- replica_ids=list(replica_ids))
5210
+ replica_ids=list(replica_ids),
5211
+ tail=tail)
5189
5212
  style = colorama.Style
5190
5213
  fore = colorama.Fore
5191
5214
  logger.info(f'{fore.CYAN}Service {service_name} logs: '
@@ -5227,7 +5250,8 @@ def serve_logs(
5227
5250
  serve_lib.tail_logs(service_name,
5228
5251
  target=target_component,
5229
5252
  replica_id=target_replica_id,
5230
- follow=follow)
5253
+ follow=follow,
5254
+ tail=tail)
5231
5255
  except exceptions.ClusterNotUpError:
5232
5256
  with ux_utils.print_exception_no_traceback():
5233
5257
  raise
@@ -5485,19 +5509,27 @@ def api_status(request_ids: Optional[List[str]], all_status: bool,
5485
5509
  columns.append('Cluster')
5486
5510
  columns.extend(['Created', 'Status'])
5487
5511
  table = log_utils.create_table(columns)
5488
- for request in request_list:
5489
- r_id = request.request_id
5490
- if not verbose:
5491
- r_id = common_utils.truncate_long_string(r_id, 36)
5492
- req_status = requests.RequestStatus(request.status)
5493
- row = [r_id, request.user_name, request.name]
5512
+ if len(request_list) > 0:
5513
+ for request in request_list:
5514
+ r_id = request.request_id
5515
+ if not verbose:
5516
+ r_id = common_utils.truncate_long_string(r_id, 36)
5517
+ req_status = requests.RequestStatus(request.status)
5518
+ row = [r_id, request.user_name, request.name]
5519
+ if verbose:
5520
+ row.append(request.cluster_name)
5521
+ row.extend([
5522
+ log_utils.readable_time_duration(request.created_at),
5523
+ req_status.colored_str()
5524
+ ])
5525
+ table.add_row(row)
5526
+ else:
5527
+ # add dummy data for when api server is down.
5528
+ dummy_row = ['-'] * 5
5494
5529
  if verbose:
5495
- row.append(request.cluster_name)
5496
- row.extend([
5497
- log_utils.readable_time_duration(request.created_at),
5498
- req_status.colored_str()
5499
- ])
5500
- table.add_row(row)
5530
+ dummy_row.append('-')
5531
+ table.add_row(dummy_row)
5532
+ click.echo()
5501
5533
  click.echo(table)
5502
5534
 
5503
5535
 
@@ -5545,6 +5577,12 @@ def api_login(endpoint: Optional[str], relogin: bool,
5545
5577
  sdk.api_login(endpoint, relogin, service_account_token)
5546
5578
 
5547
5579
 
5580
+ @api.command('logout', cls=_DocumentedCodeCommand)
5581
+ def api_logout():
5582
+ """Logs out of the api server"""
5583
+ sdk.api_logout()
5584
+
5585
+
5548
5586
  @api.command('info', cls=_DocumentedCodeCommand)
5549
5587
  @flags.config_option(expose_value=False)
5550
5588
  @usage_lib.entrypoint
sky/client/sdk.py CHANGED
@@ -29,7 +29,6 @@ import colorama
29
29
  import filelock
30
30
 
31
31
  from sky import admin_policy
32
- from sky import backends
33
32
  from sky import exceptions
34
33
  from sky import sky_logging
35
34
  from sky import skypilot_config
@@ -64,6 +63,7 @@ if typing.TYPE_CHECKING:
64
63
  import requests
65
64
 
66
65
  import sky
66
+ from sky import backends
67
67
  else:
68
68
  psutil = adaptors_common.LazyImport('psutil')
69
69
 
@@ -73,6 +73,11 @@ logging.getLogger('httpx').setLevel(logging.CRITICAL)
73
73
  _LINE_PROCESSED_KEY = 'line_processed'
74
74
 
75
75
 
76
+ def reload_config() -> None:
77
+ """Reloads the client-side config."""
78
+ skypilot_config.safe_reload_config()
79
+
80
+
76
81
  def stream_response(request_id: Optional[str],
77
82
  response: 'requests.Response',
78
83
  output_stream: Optional['io.TextIOBase'] = None,
@@ -372,7 +377,7 @@ def launch(
372
377
  idle_minutes_to_autostop: Optional[int] = None,
373
378
  dryrun: bool = False,
374
379
  down: bool = False, # pylint: disable=redefined-outer-name
375
- backend: Optional[backends.Backend] = None,
380
+ backend: Optional['backends.Backend'] = None,
376
381
  optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
377
382
  no_setup: bool = False,
378
383
  clone_disk_from: Optional[str] = None,
@@ -530,7 +535,7 @@ def _launch(
530
535
  idle_minutes_to_autostop: Optional[int] = None,
531
536
  dryrun: bool = False,
532
537
  down: bool = False, # pylint: disable=redefined-outer-name
533
- backend: Optional[backends.Backend] = None,
538
+ backend: Optional['backends.Backend'] = None,
534
539
  optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
535
540
  no_setup: bool = False,
536
541
  clone_disk_from: Optional[str] = None,
@@ -639,7 +644,7 @@ def exec( # pylint: disable=redefined-builtin
639
644
  cluster_name: Optional[str] = None,
640
645
  dryrun: bool = False,
641
646
  down: bool = False, # pylint: disable=redefined-outer-name
642
- backend: Optional[backends.Backend] = None,
647
+ backend: Optional['backends.Backend'] = None,
643
648
  ) -> server_common.RequestId:
644
649
  """Executes a task on an existing cluster.
645
650
 
@@ -1849,6 +1854,18 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
1849
1854
  return server_common.get_request_id(response)
1850
1855
 
1851
1856
 
1857
+ def _local_api_server_running(kill: bool = False) -> bool:
1858
+ """Checks if the local api server is running."""
1859
+ for process in psutil.process_iter(attrs=['pid', 'cmdline']):
1860
+ cmdline = process.info['cmdline']
1861
+ if cmdline and server_common.API_SERVER_CMD in ' '.join(cmdline):
1862
+ if kill:
1863
+ subprocess_utils.kill_children_processes(
1864
+ parent_pids=[process.pid], force=True)
1865
+ return True
1866
+ return False
1867
+
1868
+
1852
1869
  @usage_lib.entrypoint
1853
1870
  @annotations.client_api
1854
1871
  def api_status(
@@ -1867,6 +1884,10 @@ def api_status(
1867
1884
  Returns:
1868
1885
  A list of request payloads.
1869
1886
  """
1887
+ if server_common.is_api_server_local() and not _local_api_server_running():
1888
+ logger.info('SkyPilot API server is not running.')
1889
+ return []
1890
+
1870
1891
  body = payloads.RequestStatusBody(request_ids=request_ids,
1871
1892
  all_status=all_status)
1872
1893
  response = server_common.make_authenticated_request(
@@ -1987,13 +2008,7 @@ def api_stop() -> None:
1987
2008
  f'Cannot kill the API server at {server_url} because it is not '
1988
2009
  f'the default SkyPilot API server started locally.')
1989
2010
 
1990
- found = False
1991
- for process in psutil.process_iter(attrs=['pid', 'cmdline']):
1992
- cmdline = process.info['cmdline']
1993
- if cmdline and server_common.API_SERVER_CMD in ' '.join(cmdline):
1994
- subprocess_utils.kill_children_processes(parent_pids=[process.pid],
1995
- force=True)
1996
- found = True
2011
+ found = _local_api_server_running(kill=True)
1997
2012
 
1998
2013
  # Remove the database for requests.
1999
2014
  server_common.clear_local_api_server_database()
@@ -2062,6 +2077,22 @@ def _save_config_updates(endpoint: Optional[str] = None,
2062
2077
  skypilot_config.reload_config()
2063
2078
 
2064
2079
 
2080
+ def _clear_api_server_config() -> None:
2081
+ """Clear endpoint and service account token from config file."""
2082
+ config_path = pathlib.Path(
2083
+ skypilot_config.get_user_config_path()).expanduser()
2084
+ with filelock.FileLock(config_path.with_suffix('.lock')):
2085
+ if not config_path.exists():
2086
+ return
2087
+
2088
+ config = skypilot_config.get_user_config()
2089
+ config = dict(config)
2090
+ del config['api_server']
2091
+
2092
+ common_utils.dump_yaml(str(config_path), config, blank=True)
2093
+ skypilot_config.reload_config()
2094
+
2095
+
2065
2096
  def _validate_endpoint(endpoint: Optional[str]) -> str:
2066
2097
  """Validate and normalize the endpoint URL."""
2067
2098
  if endpoint is None:
@@ -2318,3 +2349,22 @@ def api_login(endpoint: Optional[str] = None,
2318
2349
  endpoint)
2319
2350
  _show_logged_in_message(endpoint, dashboard_url, final_api_server_info.user,
2320
2351
  server_status)
2352
+
2353
+
2354
+ @usage_lib.entrypoint
2355
+ @annotations.client_api
2356
+ def api_logout() -> None:
2357
+ """Logout of the API server.
2358
+
2359
+ Clears all cookies and settings stored in ~/.sky/config.yaml"""
2360
+ if server_common.is_api_server_local():
2361
+ with ux_utils.print_exception_no_traceback():
2362
+ raise RuntimeError('Local api server cannot be logged out. '
2363
+ 'Use `sky api stop` instead.')
2364
+
2365
+ # no need to clear cookies if it doesn't exist.
2366
+ server_common.set_api_cookie_jar(cookiejar.MozillaCookieJar(),
2367
+ create_if_not_exists=False)
2368
+ _clear_api_server_config()
2369
+ logger.info(f'{colorama.Fore.GREEN}Logged out of SkyPilot API server.'
2370
+ f'{colorama.Style.RESET_ALL}')
sky/clouds/aws.py CHANGED
@@ -404,15 +404,18 @@ class AWS(clouds.Cloud):
404
404
  return cost
405
405
 
406
406
  @classmethod
407
- def get_default_instance_type(
408
- cls,
409
- cpus: Optional[str] = None,
410
- memory: Optional[str] = None,
411
- disk_tier: Optional[resources_utils.DiskTier] = None
412
- ) -> Optional[str]:
407
+ def get_default_instance_type(cls,
408
+ cpus: Optional[str] = None,
409
+ memory: Optional[str] = None,
410
+ disk_tier: Optional[
411
+ resources_utils.DiskTier] = None,
412
+ region: Optional[str] = None,
413
+ zone: Optional[str] = None) -> Optional[str]:
413
414
  return catalog.get_default_instance_type(cpus=cpus,
414
415
  memory=memory,
415
416
  disk_tier=disk_tier,
417
+ region=region,
418
+ zone=zone,
416
419
  clouds='aws')
417
420
 
418
421
  # TODO: factor the following three methods, as they are the same logic
@@ -554,7 +557,9 @@ class AWS(clouds.Cloud):
554
557
  default_instance_type = AWS.get_default_instance_type(
555
558
  cpus=resources.cpus,
556
559
  memory=resources.memory,
557
- disk_tier=resources.disk_tier)
560
+ disk_tier=resources.disk_tier,
561
+ region=resources.region,
562
+ zone=resources.zone)
558
563
  if default_instance_type is None:
559
564
  return resources_utils.FeasibleResources([], [], None)
560
565
  else:
sky/clouds/azure.py CHANGED
@@ -154,15 +154,18 @@ class Azure(clouds.Cloud):
154
154
  return cost
155
155
 
156
156
  @classmethod
157
- def get_default_instance_type(
158
- cls,
159
- cpus: Optional[str] = None,
160
- memory: Optional[str] = None,
161
- disk_tier: Optional[resources_utils.DiskTier] = None
162
- ) -> Optional[str]:
157
+ def get_default_instance_type(cls,
158
+ cpus: Optional[str] = None,
159
+ memory: Optional[str] = None,
160
+ disk_tier: Optional[
161
+ resources_utils.DiskTier] = None,
162
+ region: Optional[str] = None,
163
+ zone: Optional[str] = None) -> Optional[str]:
163
164
  return catalog.get_default_instance_type(cpus=cpus,
164
165
  memory=memory,
165
166
  disk_tier=disk_tier,
167
+ region=region,
168
+ zone=zone,
166
169
  clouds='azure')
167
170
 
168
171
  @classmethod
@@ -499,7 +502,9 @@ class Azure(clouds.Cloud):
499
502
  default_instance_type = Azure.get_default_instance_type(
500
503
  cpus=resources.cpus,
501
504
  memory=resources.memory,
502
- disk_tier=resources.disk_tier)
505
+ disk_tier=resources.disk_tier,
506
+ region=resources.region,
507
+ zone=resources.zone)
503
508
  if default_instance_type is None:
504
509
  return resources_utils.FeasibleResources([], [], None)
505
510
  else:
sky/clouds/cloud.py CHANGED
@@ -341,14 +341,15 @@ class Cloud:
341
341
  raise NotImplementedError
342
342
 
343
343
  @classmethod
344
- def get_default_instance_type(
345
- cls,
346
- cpus: Optional[str] = None,
347
- memory: Optional[str] = None,
348
- disk_tier: Optional[resources_utils.DiskTier] = None
349
- ) -> Optional[str]:
350
- """Returns the default instance type with the given #vCPUs, memory and
351
- disk tier.
344
+ def get_default_instance_type(cls,
345
+ cpus: Optional[str] = None,
346
+ memory: Optional[str] = None,
347
+ disk_tier: Optional[
348
+ resources_utils.DiskTier] = None,
349
+ region: Optional[str] = None,
350
+ zone: Optional[str] = None) -> Optional[str]:
351
+ """Returns the default instance type with the given #vCPUs, memory,
352
+ disk tier, region, and zone.
352
353
 
353
354
  For example, if cpus='4', this method returns the default instance type
354
355
  with 4 vCPUs. If cpus='4+', this method returns the default instance
sky/clouds/cudo.py CHANGED
@@ -175,14 +175,18 @@ class Cudo(clouds.Cloud):
175
175
  return 0.0
176
176
 
177
177
  @classmethod
178
- def get_default_instance_type(
179
- cls,
180
- cpus: Optional[str] = None,
181
- memory: Optional[str] = None,
182
- disk_tier: Optional[resources_utils.DiskTier] = None
183
- ) -> Optional[str]:
178
+ def get_default_instance_type(cls,
179
+ cpus: Optional[str] = None,
180
+ memory: Optional[str] = None,
181
+ disk_tier: Optional[
182
+ resources_utils.DiskTier] = None,
183
+ region: Optional[str] = None,
184
+ zone: Optional[str] = None) -> Optional[str]:
184
185
  return catalog.get_default_instance_type(cpus=cpus,
185
186
  memory=memory,
187
+ disk_tier=disk_tier,
188
+ region=region,
189
+ zone=zone,
186
190
  clouds='cudo')
187
191
 
188
192
  @classmethod
@@ -251,7 +255,9 @@ class Cudo(clouds.Cloud):
251
255
  default_instance_type = Cudo.get_default_instance_type(
252
256
  cpus=resources.cpus,
253
257
  memory=resources.memory,
254
- disk_tier=resources.disk_tier)
258
+ disk_tier=resources.disk_tier,
259
+ region=resources.region,
260
+ zone=resources.zone)
255
261
  if default_instance_type is None:
256
262
  return resources_utils.FeasibleResources([], [], None)
257
263
  else:
sky/clouds/do.py CHANGED
@@ -156,16 +156,19 @@ class DO(clouds.Cloud):
156
156
  return self._REPR
157
157
 
158
158
  @classmethod
159
- def get_default_instance_type(
160
- cls,
161
- cpus: Optional[str] = None,
162
- memory: Optional[str] = None,
163
- disk_tier: Optional[resources_utils.DiskTier] = None,
164
- ) -> Optional[str]:
159
+ def get_default_instance_type(cls,
160
+ cpus: Optional[str] = None,
161
+ memory: Optional[str] = None,
162
+ disk_tier: Optional[
163
+ resources_utils.DiskTier] = None,
164
+ region: Optional[str] = None,
165
+ zone: Optional[str] = None) -> Optional[str]:
165
166
  """Returns the default instance type for DO."""
166
167
  return catalog.get_default_instance_type(cpus=cpus,
167
168
  memory=memory,
168
169
  disk_tier=disk_tier,
170
+ region=region,
171
+ zone=zone,
169
172
  clouds='DO')
170
173
 
171
174
  @classmethod
@@ -246,7 +249,9 @@ class DO(clouds.Cloud):
246
249
  default_instance_type = DO.get_default_instance_type(
247
250
  cpus=resources.cpus,
248
251
  memory=resources.memory,
249
- disk_tier=resources.disk_tier)
252
+ disk_tier=resources.disk_tier,
253
+ region=resources.region,
254
+ zone=resources.zone)
250
255
  if default_instance_type is None:
251
256
  return resources_utils.FeasibleResources([], [], None)
252
257
  else: