skypilot-nightly 1.0.0.dev20250617__py3-none-any.whl → 1.0.0.dev20250618__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +7 -0
  3. sky/backends/cloud_vm_ray_backend.py +48 -36
  4. sky/cli.py +5 -5729
  5. sky/client/cli.py +11 -2
  6. sky/client/sdk.py +22 -2
  7. sky/clouds/kubernetes.py +5 -0
  8. sky/dashboard/out/404.html +1 -1
  9. sky/dashboard/out/_next/static/{vA3PPpkBwpRTRNBHFYAw_ → LRpGymRCqq-feuFyoWz4m}/_buildManifest.js +1 -1
  10. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +1 -0
  11. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +50 -0
  12. sky/dashboard/out/_next/static/chunks/pages/users-928edf039219e47b.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/webpack-ebc2404fd6ce581c.js +1 -0
  14. sky/dashboard/out/_next/static/css/6c12ecc3bd2239b6.css +3 -0
  15. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  16. sky/dashboard/out/clusters/[cluster].html +1 -1
  17. sky/dashboard/out/clusters.html +1 -1
  18. sky/dashboard/out/config.html +1 -1
  19. sky/dashboard/out/index.html +1 -1
  20. sky/dashboard/out/infra/[context].html +1 -1
  21. sky/dashboard/out/infra.html +1 -1
  22. sky/dashboard/out/jobs/[job].html +1 -1
  23. sky/dashboard/out/jobs.html +1 -1
  24. sky/dashboard/out/users.html +1 -1
  25. sky/dashboard/out/workspace/new.html +1 -1
  26. sky/dashboard/out/workspaces/[name].html +1 -1
  27. sky/dashboard/out/workspaces.html +1 -1
  28. sky/global_user_state.py +50 -11
  29. sky/logs/__init__.py +17 -0
  30. sky/logs/agent.py +73 -0
  31. sky/logs/gcp.py +91 -0
  32. sky/models.py +1 -0
  33. sky/provision/instance_setup.py +35 -0
  34. sky/provision/provisioner.py +11 -0
  35. sky/server/common.py +21 -9
  36. sky/server/requests/payloads.py +19 -1
  37. sky/server/server.py +121 -29
  38. sky/setup_files/dependencies.py +11 -1
  39. sky/skylet/constants.py +9 -1
  40. sky/skylet/job_lib.py +75 -19
  41. sky/templates/kubernetes-ray.yml.j2 +9 -0
  42. sky/users/permission.py +49 -19
  43. sky/users/rbac.py +10 -1
  44. sky/users/server.py +274 -9
  45. sky/utils/schemas.py +40 -0
  46. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/METADATA +9 -1
  47. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/RECORD +58 -54
  48. sky/dashboard/out/_next/static/chunks/600.bd2ed8c076b720ec.js +0 -16
  49. sky/dashboard/out/_next/static/chunks/pages/users-c69ffcab9d6e5269.js +0 -1
  50. sky/dashboard/out/_next/static/chunks/webpack-1b69b196a4dbffef.js +0 -1
  51. sky/dashboard/out/_next/static/css/8e97adcaacc15293.css +0 -3
  52. /sky/dashboard/out/_next/static/{vA3PPpkBwpRTRNBHFYAw_ → LRpGymRCqq-feuFyoWz4m}/_ssgManifest.js +0 -0
  53. /sky/dashboard/out/_next/static/chunks/{37-824c707421f6f003.js → 37-3a4d77ad62932eaf.js} +0 -0
  54. /sky/dashboard/out/_next/static/chunks/{843-ab9c4f609239155f.js → 843-b3040e493f6e7947.js} +0 -0
  55. /sky/dashboard/out/_next/static/chunks/{938-385d190b95815e11.js → 938-1493ac755eadeb35.js} +0 -0
  56. /sky/dashboard/out/_next/static/chunks/{973-c807fc34f09c7df3.js → 973-db3c97c2bfbceb65.js} +0 -0
  57. /sky/dashboard/out/_next/static/chunks/pages/{_app-32b2caae3445bf3b.js → _app-c416e87d5c2715cf.js} +0 -0
  58. /sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-c8c2191328532b7d.js → [name]-c4ff1ec05e2f3daf.js} +0 -0
  59. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/WHEEL +0 -0
  60. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/entry_points.txt +0 -0
  61. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/licenses/LICENSE +0 -0
  62. {skypilot_nightly-1.0.0.dev20250617.dist-info → skypilot_nightly-1.0.0.dev20250618.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/8e97adcaacc15293.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/8e97adcaacc15293.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-1b69b196a4dbffef.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-32b2caae3445bf3b.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"vA3PPpkBwpRTRNBHFYAw_","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ebc2404fd6ce581c.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c416e87d5c2715cf.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"LRpGymRCqq-feuFyoWz4m","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/8e97adcaacc15293.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/8e97adcaacc15293.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-1b69b196a4dbffef.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-32b2caae3445bf3b.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-d6128fa9e7cae6e6.js" defer=""></script><script src="/dashboard/_next/static/chunks/760-a89d354797ce7af5.js" defer=""></script><script src="/dashboard/_next/static/chunks/799-3625946b2ec2eb30.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-4c9fc53aa74bc191.js" defer=""></script><script src="/dashboard/_next/static/chunks/664-047bc03493fda379.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/947-6620842ef80ae879.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-4d1a5dbe58a8a2b9.js" defer=""></script><script src="/dashboard/_next/static/chunks/901-b424d293275e1fd7.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-20d54a9d998dc102.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-c2c39c0912285e54.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-c807fc34f09c7df3.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-385d190b95815e11.js" defer=""></script><script src="/dashboard/_next/static/chunks/843-ab9c4f609239155f.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-c8c2191328532b7d.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"vA3PPpkBwpRTRNBHFYAw_","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ebc2404fd6ce581c.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c416e87d5c2715cf.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-d6128fa9e7cae6e6.js" defer=""></script><script src="/dashboard/_next/static/chunks/760-a89d354797ce7af5.js" defer=""></script><script src="/dashboard/_next/static/chunks/799-3625946b2ec2eb30.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-4c9fc53aa74bc191.js" defer=""></script><script src="/dashboard/_next/static/chunks/664-047bc03493fda379.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/947-6620842ef80ae879.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-4d1a5dbe58a8a2b9.js" defer=""></script><script src="/dashboard/_next/static/chunks/901-b424d293275e1fd7.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-20d54a9d998dc102.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-c2c39c0912285e54.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-db3c97c2bfbceb65.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-1493ac755eadeb35.js" defer=""></script><script src="/dashboard/_next/static/chunks/843-b3040e493f6e7947.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-c4ff1ec05e2f3daf.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"LRpGymRCqq-feuFyoWz4m","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/8e97adcaacc15293.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/8e97adcaacc15293.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-1b69b196a4dbffef.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-32b2caae3445bf3b.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/vA3PPpkBwpRTRNBHFYAw_/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"vA3PPpkBwpRTRNBHFYAw_","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/6c12ecc3bd2239b6.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ebc2404fd6ce581c.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c416e87d5c2715cf.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-82e6601baa5dd280.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/LRpGymRCqq-feuFyoWz4m/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"LRpGymRCqq-feuFyoWz4m","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
sky/global_user_state.py CHANGED
@@ -64,6 +64,7 @@ user_table = sqlalchemy.Table(
64
64
  Base.metadata,
65
65
  sqlalchemy.Column('id', sqlalchemy.Text, primary_key=True),
66
66
  sqlalchemy.Column('name', sqlalchemy.Text),
67
+ sqlalchemy.Column('password', sqlalchemy.Text),
67
68
  )
68
69
 
69
70
  cluster_table = sqlalchemy.Table(
@@ -301,6 +302,12 @@ def create_table():
301
302
  'last_creation_command',
302
303
  sqlalchemy.Text(),
303
304
  default_statement='DEFAULT NULL')
305
+ db_utils.add_column_to_table_sqlalchemy(
306
+ session,
307
+ 'users',
308
+ 'password',
309
+ sqlalchemy.Text(),
310
+ default_statement='DEFAULT NULL')
304
311
  session.commit()
305
312
 
306
313
 
@@ -358,7 +365,9 @@ def add_or_update_user(user: models.User) -> bool:
358
365
 
359
366
  # First try INSERT OR IGNORE - this won't fail if user exists
360
367
  insert_stmnt = insert_func(user_table).prefix_with(
361
- 'OR IGNORE').values(id=user.id, name=user.name)
368
+ 'OR IGNORE').values(id=user.id,
369
+ name=user.name,
370
+ password=user.password)
362
371
  result = session.execute(insert_stmnt)
363
372
 
364
373
  # Check if the INSERT actually inserted a row
@@ -366,8 +375,14 @@ def add_or_update_user(user: models.User) -> bool:
366
375
 
367
376
  if not was_inserted:
368
377
  # User existed, so update it
369
- session.query(user_table).filter_by(id=user.id).update(
370
- {user_table.c.name: user.name})
378
+ if user.password:
379
+ session.query(user_table).filter_by(id=user.id).update({
380
+ user_table.c.name: user.name,
381
+ user_table.c.password: user.password
382
+ })
383
+ else:
384
+ session.query(user_table).filter_by(id=user.id).update(
385
+ {user_table.c.name: user.name})
371
386
 
372
387
  session.commit()
373
388
  return was_inserted
@@ -377,15 +392,19 @@ def add_or_update_user(user: models.User) -> bool:
377
392
  # For PostgreSQL, use INSERT ... ON CONFLICT with RETURNING to
378
393
  # detect insert vs update
379
394
  insert_func = postgresql.insert
380
- insert_stmnt = insert_func(user_table).values(id=user.id,
381
- name=user.name)
395
+ insert_stmnt = insert_func(user_table).values(
396
+ id=user.id, name=user.name, password=user.password)
382
397
 
383
398
  # Use a sentinel in the RETURNING clause to detect insert vs update
399
+ if user.password:
400
+ set_ = {
401
+ user_table.c.name: user.name,
402
+ user_table.c.password: user.password
403
+ }
404
+ else:
405
+ set_ = {user_table.c.name: user.name}
384
406
  upsert_stmnt = insert_stmnt.on_conflict_do_update(
385
- index_elements=[user_table.c.id],
386
- set_={
387
- user_table.c.name: user.name
388
- }).returning(
407
+ index_elements=[user_table.c.id], set_=set_).returning(
389
408
  user_table.c.id,
390
409
  # This will be True for INSERT, False for UPDATE
391
410
  sqlalchemy.literal_column('(xmax = 0)').label('was_inserted'
@@ -407,7 +426,24 @@ def get_user(user_id: str) -> Optional[models.User]:
407
426
  row = session.query(user_table).filter_by(id=user_id).first()
408
427
  if row is None:
409
428
  return None
410
- return models.User(id=row.id, name=row.name)
429
+ return models.User(id=row.id, name=row.name, password=row.password)
430
+
431
+
432
+ def get_user_by_name(username: str) -> List[models.User]:
433
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
434
+ rows = session.query(user_table).filter_by(name=username).all()
435
+ if len(rows) == 0:
436
+ return []
437
+ return [
438
+ models.User(id=row.id, name=row.name, password=row.password)
439
+ for row in rows
440
+ ]
441
+
442
+
443
+ def delete_user(user_id: str) -> None:
444
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
445
+ session.query(user_table).filter_by(id=user_id).delete()
446
+ session.commit()
411
447
 
412
448
 
413
449
  @_init_db
@@ -415,7 +451,10 @@ def get_all_users() -> List[models.User]:
415
451
  assert _SQLALCHEMY_ENGINE is not None
416
452
  with orm.Session(_SQLALCHEMY_ENGINE) as session:
417
453
  rows = session.query(user_table).all()
418
- return [models.User(id=row.id, name=row.name) for row in rows]
454
+ return [
455
+ models.User(id=row.id, name=row.name, password=row.password)
456
+ for row in rows
457
+ ]
419
458
 
420
459
 
421
460
  @_init_db
sky/logs/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """Sky logging agents."""
2
+ from typing import Optional
3
+
4
+ from sky import exceptions
5
+ from sky import skypilot_config
6
+ from sky.logs.agent import LoggingAgent
7
+ from sky.logs.gcp import GCPLoggingAgent
8
+
9
+
10
+ def get_logging_agent() -> Optional[LoggingAgent]:
11
+ store = skypilot_config.get_nested(('logs', 'store'), None)
12
+ if store is None:
13
+ return None
14
+ if store == 'gcp':
15
+ return GCPLoggingAgent(skypilot_config.get_nested(('logs', 'gcp'), {}))
16
+ raise exceptions.InvalidSkyPilotConfigError(
17
+ f'Invalid logging store: {store}')
sky/logs/agent.py ADDED
@@ -0,0 +1,73 @@
1
+ """Base class for all logging agents."""
2
+ import abc
3
+ import os
4
+ import shlex
5
+ from typing import Any, Dict
6
+
7
+ from sky.skylet import constants
8
+ from sky.utils import common_utils
9
+ from sky.utils import resources_utils
10
+
11
+
12
+ class LoggingAgent(abc.ABC):
13
+ """Base class for all logging agents.
14
+
15
+ Each agent should implement the `get_setup_command` and
16
+ `get_credential_file_mounts` methods to return the setup command and
17
+ credential file mounts for the agent for provisioner to setup the agent
18
+ on each node.
19
+ """
20
+
21
+ @abc.abstractmethod
22
+ def get_setup_command(self,
23
+ cluster_name: resources_utils.ClusterName) -> str:
24
+ pass
25
+
26
+ @abc.abstractmethod
27
+ def get_credential_file_mounts(self) -> Dict[str, str]:
28
+ pass
29
+
30
+
31
+ class FluentbitAgent(LoggingAgent):
32
+ """Base class for logging store that use fluentbit as the agent."""
33
+
34
+ def get_setup_command(self,
35
+ cluster_name: resources_utils.ClusterName) -> str:
36
+ install_cmd = (
37
+ 'if ! command -v fluent-bit >/dev/null 2>&1; then '
38
+ 'sudo apt-get install -y gnupg; '
39
+ # pylint: disable=line-too-long
40
+ 'curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh; '
41
+ 'fi')
42
+ cfg = self.fluentbit_config(cluster_name)
43
+ cfg_path = os.path.join(constants.LOGGING_CONFIG_DIR, 'fluentbit.yaml')
44
+ config_cmd = (f'mkdir -p {constants.LOGGING_CONFIG_DIR} && '
45
+ f'echo {shlex.quote(cfg)} > {cfg_path}')
46
+ start_cmd = ('nohup $(command -v fluent-bit || '
47
+ 'echo "/opt/fluent-bit/bin/fluent-bit") '
48
+ f'-c {cfg_path} > /tmp/fluentbit.log 2>&1 &')
49
+ return f'set -e; {install_cmd}; {config_cmd}; {start_cmd}'
50
+
51
+ def fluentbit_config(self,
52
+ cluster_name: resources_utils.ClusterName) -> str:
53
+ cfg_dict = {
54
+ 'pipeline': {
55
+ 'inputs': [{
56
+ 'name': 'tail',
57
+ 'path': f'{constants.SKY_LOGS_DIRECTORY}/*/*.log',
58
+ 'path_key': 'log_path',
59
+ # Shorten the refresh interval from 60s to 1s since every
60
+ # job creates a new log file and we must be responsive
61
+ # for this: the VM might be autodown within a minute
62
+ # right after the job completion.
63
+ 'refresh_interval': 1,
64
+ }],
65
+ 'outputs': [self.fluentbit_output_config(cluster_name)],
66
+ }
67
+ }
68
+ return common_utils.dump_yaml_str(cfg_dict)
69
+
70
+ @abc.abstractmethod
71
+ def fluentbit_output_config(
72
+ self, cluster_name: resources_utils.ClusterName) -> Dict[str, Any]:
73
+ pass
sky/logs/gcp.py ADDED
@@ -0,0 +1,91 @@
1
+ """GCP logging agent."""
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ import pydantic
6
+
7
+ from sky.clouds import gcp
8
+ from sky.logs.agent import FluentbitAgent
9
+ from sky.utils import resources_utils
10
+
11
+
12
+ class _GCPLoggingConfig(pydantic.BaseModel):
13
+ """Configuration for GCP logging agent."""
14
+ project_id: Optional[str] = None
15
+ credentials_file: Optional[str] = None
16
+ additional_labels: Optional[Dict[str, str]] = None
17
+
18
+
19
+ class _StackdriverOutputConfig(pydantic.BaseModel):
20
+ """Auxiliary model for building stackdriver output config in YAML.
21
+
22
+ Ref: https://docs.fluentbit.io/manual/1.7/pipeline/outputs/stackdriver
23
+ """
24
+ name: str = 'stackdriver'
25
+ match: str = '*'
26
+ export_to_project_id: Optional[str] = None
27
+ labels: Optional[Dict[str, str]] = None
28
+
29
+ def to_dict(self) -> Dict[str, Any]:
30
+ config = self.model_dump(exclude_none=True)
31
+ if self.labels:
32
+ # Replace the label format from `{k: v}` to `k=v`
33
+ label_str = ','.join([f'{k}={v}' for k, v in self.labels.items()])
34
+ config['labels'] = label_str
35
+ return config
36
+
37
+
38
+ class GCPLoggingAgent(FluentbitAgent):
39
+ """GCP logging agent."""
40
+
41
+ def __init__(self, config: Dict[str, Any]):
42
+ self.config = _GCPLoggingConfig(**config)
43
+
44
+ def get_setup_command(self,
45
+ cluster_name: resources_utils.ClusterName) -> str:
46
+ credential_path = gcp.DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH
47
+ if self.config.credentials_file:
48
+ credential_path = self.config.credentials_file
49
+ # Set GOOGLE_APPLICATION_CREDENTIALS and check whether credentials
50
+ # is valid.
51
+ # Stackdriver only support service account credentials or credentials
52
+ # from metadata server (only available on GCE or GKE). If the default
53
+ # credentials uploaded by API server is NOT a service account key and
54
+ # there is NO metadata server available, the logging agent will fail to
55
+ # authenticate and we require the user to upload a service account key
56
+ # via logs.gcp.credentials_file in this case.
57
+ # Also note that we use env var instead of YAML config to specify the
58
+ # service account key file path in order to resolve the home directory
59
+ # more reliably.
60
+ # Ref: https://github.com/fluent/fluent-bit/issues/8804
61
+ # TODO(aylei): check whether the credentials config is valid before
62
+ # provision.
63
+ pre_cmd = (f'export GOOGLE_APPLICATION_CREDENTIALS={credential_path}; '
64
+ f'cat {credential_path} | grep "service_account" || '
65
+ f'(echo "Credentials file {credential_path} is not a '
66
+ 'service account key, check metadata server" && '
67
+ 'curl -s http://metadata.google.internal >/dev/null || '
68
+ f'(echo "Neither service account key nor metadata server is '
69
+ 'available. Set logs.gcp.credentials_file to a service '
70
+ 'account key in server config and retry." && '
71
+ 'exit 1;))')
72
+ return pre_cmd + ' && ' + super().get_setup_command(cluster_name)
73
+
74
+ def fluentbit_output_config(
75
+ self, cluster_name: resources_utils.ClusterName) -> Dict[str, Any]:
76
+ display_name = cluster_name.display_name
77
+ unique_name = cluster_name.name_on_cloud
78
+
79
+ return _StackdriverOutputConfig(
80
+ export_to_project_id=self.config.project_id,
81
+ labels={
82
+ 'skypilot_cluster_name': display_name,
83
+ 'skypilot_cluster_id': unique_name,
84
+ **(self.config.additional_labels or {})
85
+ },
86
+ ).to_dict()
87
+
88
+ def get_credential_file_mounts(self) -> Dict[str, str]:
89
+ if self.config.credentials_file:
90
+ return {self.config.credentials_file: self.config.credentials_file}
91
+ return {}
sky/models.py CHANGED
@@ -17,6 +17,7 @@ class User:
17
17
  id: str
18
18
  # Display name of the user
19
19
  name: Optional[str] = None
20
+ password: Optional[str] = None
20
21
 
21
22
  def to_dict(self) -> Dict[str, Any]:
22
23
  return {'id': self.id, 'name': self.name}
@@ -8,6 +8,7 @@ import time
8
8
  from typing import Any, Callable, Dict, List, Optional, Tuple
9
9
 
10
10
  from sky import exceptions
11
+ from sky import logs
11
12
  from sky import provision
12
13
  from sky import sky_logging
13
14
  from sky.provision import common
@@ -21,6 +22,7 @@ from sky.utils import accelerator_registry
21
22
  from sky.utils import command_runner
22
23
  from sky.utils import common_utils
23
24
  from sky.utils import env_options
25
+ from sky.utils import resources_utils
24
26
  from sky.utils import subprocess_utils
25
27
  from sky.utils import timeline
26
28
  from sky.utils import ux_utils
@@ -557,3 +559,36 @@ def internal_file_mounts(cluster_name: str, common_file_mounts: Dict[str, str],
557
559
  ssh_credentials=ssh_credentials,
558
560
  max_workers=subprocess_utils.get_max_workers_for_file_mounts(
559
561
  common_file_mounts, cluster_info.provider_name))
562
+
563
+
564
+ @common.log_function_start_end
565
+ @timeline.event
566
+ def setup_logging_on_cluster(logging_agent: logs.LoggingAgent,
567
+ cluster_name: resources_utils.ClusterName,
568
+ cluster_info: common.ClusterInfo,
569
+ ssh_credentials: Dict[str, Any]) -> None:
570
+ """Setup logging agent (fluentbit) on all nodes after provisioning."""
571
+ _hint_worker_log_path(cluster_name.name_on_cloud, cluster_info,
572
+ 'logging_setup')
573
+
574
+ @_auto_retry()
575
+ def _setup_node(runner: command_runner.CommandRunner, log_path: str):
576
+ cmd = logging_agent.get_setup_command(cluster_name)
577
+ logger.info(f'Running command on node: {cmd}')
578
+ returncode, stdout, stderr = runner.run(cmd,
579
+ stream_logs=False,
580
+ require_outputs=True,
581
+ log_path=log_path,
582
+ source_bashrc=True)
583
+ if returncode:
584
+ raise RuntimeError(f'Failed to setup logging agent\n{cmd}\n'
585
+ f'(exit code {returncode}). Error: '
586
+ f'===== stdout ===== \n{stdout}\n'
587
+ f'===== stderr ====={stderr}')
588
+
589
+ _parallel_ssh_with_cache(_setup_node,
590
+ cluster_name.name_on_cloud,
591
+ stage_name='logging_setup',
592
+ digest=None,
593
+ cluster_info=cluster_info,
594
+ ssh_credentials=ssh_credentials)
@@ -16,6 +16,7 @@ import sky
16
16
  from sky import clouds
17
17
  from sky import exceptions
18
18
  from sky import global_user_state
19
+ from sky import logs
19
20
  from sky import provision
20
21
  from sky import sky_logging
21
22
  from sky import skypilot_config
@@ -648,6 +649,15 @@ def _post_provision_setup(
648
649
  logger.debug('Ray cluster is ready. Skip starting ray cluster on '
649
650
  'worker nodes.')
650
651
 
652
+ logging_agent = logs.get_logging_agent()
653
+ if logging_agent:
654
+ status.update(
655
+ ux_utils.spinner_message('Setting up logging agent',
656
+ provision_logging.config.log_path))
657
+ instance_setup.setup_logging_on_cluster(logging_agent, cluster_name,
658
+ cluster_info,
659
+ ssh_credentials)
660
+
651
661
  instance_setup.start_skylet_on_head_node(cluster_name.name_on_cloud,
652
662
  cluster_info, ssh_credentials)
653
663
 
@@ -672,6 +682,7 @@ def post_provision_runtime_setup(
672
682
  and other necessary files to the VM.
673
683
  3. Run setup commands to install dependencies.
674
684
  4. Start ray cluster and skylet.
685
+ 5. (Optional) Setup logging agent.
675
686
 
676
687
  Raises:
677
688
  RuntimeError: If the setup process encounters any error.
sky/server/common.py CHANGED
@@ -13,7 +13,7 @@ import subprocess
13
13
  import sys
14
14
  import time
15
15
  import typing
16
- from typing import Any, Dict, Literal, Optional
16
+ from typing import Any, Dict, Literal, Optional, Tuple
17
17
  from urllib import parse
18
18
  import uuid
19
19
 
@@ -128,6 +128,8 @@ class ApiServerInfo:
128
128
  version: Optional[str] = None
129
129
  version_on_disk: Optional[str] = None
130
130
  commit: Optional[str] = None
131
+ user: Optional[Dict[str, Any]] = None
132
+ basic_auth_enabled: bool = False
131
133
 
132
134
 
133
135
  def get_api_cookie_jar_path() -> pathlib.Path:
@@ -261,11 +263,15 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
261
263
  version = result.get('version')
262
264
  version_on_disk = result.get('version_on_disk')
263
265
  commit = result.get('commit')
266
+ user = result.get('user')
267
+ basic_auth_enabled = result.get('basic_auth_enabled')
264
268
  server_info = ApiServerInfo(status=ApiServerStatus.HEALTHY,
265
269
  api_version=api_version,
266
270
  version=version,
267
271
  version_on_disk=version_on_disk,
268
- commit=commit)
272
+ commit=commit,
273
+ user=user,
274
+ basic_auth_enabled=basic_auth_enabled)
269
275
  if api_version is None or version is None or commit is None:
270
276
  logger.warning(f'API server response missing '
271
277
  f'version info. {server_url} may '
@@ -320,7 +326,8 @@ def get_request_id(response: 'requests.Response') -> RequestId:
320
326
 
321
327
  def _start_api_server(deploy: bool = False,
322
328
  host: str = '127.0.0.1',
323
- foreground: bool = False):
329
+ foreground: bool = False,
330
+ enable_basic_auth: bool = False):
324
331
  """Starts a SkyPilot API server locally."""
325
332
  server_url = get_server_url(host)
326
333
  assert server_url in AVAILABLE_LOCAL_API_SERVER_URLS, (
@@ -354,6 +361,8 @@ def _start_api_server(deploy: bool = False,
354
361
  if foreground:
355
362
  # Replaces the current process with the API server
356
363
  os.environ[constants.ENV_VAR_IS_SKYPILOT_SERVER] = 'true'
364
+ if enable_basic_auth:
365
+ os.environ[constants.ENV_VAR_ENABLE_BASIC_AUTH] = 'true'
357
366
  os.execvp(args[0], args)
358
367
 
359
368
  log_path = os.path.expanduser(constants.API_SERVER_LOGS)
@@ -365,6 +374,8 @@ def _start_api_server(deploy: bool = False,
365
374
  # the API server.
366
375
  server_env = os.environ.copy()
367
376
  server_env[constants.ENV_VAR_IS_SKYPILOT_SERVER] = 'true'
377
+ if enable_basic_auth:
378
+ server_env[constants.ENV_VAR_ENABLE_BASIC_AUTH] = 'true'
368
379
  with open(log_path, 'w', encoding='utf-8') as log_file:
369
380
  # Because the log file is opened using a with statement, it may seem
370
381
  # that the file will be closed when the with statement is exited
@@ -428,10 +439,10 @@ def _start_api_server(deploy: bool = False,
428
439
 
429
440
  def check_server_healthy(
430
441
  endpoint: Optional[str] = None
431
- ) -> Literal[
442
+ ) -> Tuple[Literal[
432
443
  # Use an incomplete list of Literals here to enforce raising for other
433
444
  # enum values.
434
- ApiServerStatus.HEALTHY, ApiServerStatus.NEEDS_AUTH]:
445
+ ApiServerStatus.HEALTHY, ApiServerStatus.NEEDS_AUTH], ApiServerInfo]:
435
446
  """Check if the API server is healthy.
436
447
 
437
448
  Args:
@@ -508,7 +519,7 @@ def check_server_healthy(
508
519
 
509
520
  hinted_for_server_install_version_mismatch = True
510
521
 
511
- return api_server_status
522
+ return api_server_status, api_server_info
512
523
 
513
524
 
514
525
  def _get_version_info_hint(server_info: ApiServerInfo) -> str:
@@ -559,10 +570,11 @@ def get_skypilot_version_on_disk() -> str:
559
570
 
560
571
  def check_server_healthy_or_start_fn(deploy: bool = False,
561
572
  host: str = '127.0.0.1',
562
- foreground: bool = False):
573
+ foreground: bool = False,
574
+ enable_basic_auth: bool = False):
563
575
  api_server_status = None
564
576
  try:
565
- api_server_status = check_server_healthy()
577
+ api_server_status, _ = check_server_healthy()
566
578
  if api_server_status == ApiServerStatus.NEEDS_AUTH:
567
579
  endpoint = get_server_url()
568
580
  with ux_utils.print_exception_no_traceback():
@@ -580,7 +592,7 @@ def check_server_healthy_or_start_fn(deploy: bool = False,
580
592
  # have started the server while we were waiting for the lock.
581
593
  api_server_info = get_api_server_status(endpoint)
582
594
  if api_server_info.status == ApiServerStatus.UNHEALTHY:
583
- _start_api_server(deploy, host, foreground)
595
+ _start_api_server(deploy, host, foreground, enable_basic_auth)
584
596
 
585
597
 
586
598
  def check_server_healthy_or_start(func):
@@ -336,10 +336,28 @@ class ClusterJobsDownloadLogsBody(RequestBody):
336
336
  local_dir: str = constants.SKY_LOGS_DIRECTORY
337
337
 
338
338
 
339
+ class UserCreateBody(RequestBody):
340
+ """The request body for the user create endpoint."""
341
+ username: str
342
+ password: str
343
+ role: Optional[str] = None
344
+
345
+
346
+ class UserDeleteBody(RequestBody):
347
+ """The request body for the user delete endpoint."""
348
+ user_id: str
349
+
350
+
339
351
  class UserUpdateBody(RequestBody):
340
352
  """The request body for the user update endpoint."""
341
353
  user_id: str
342
- role: str
354
+ role: Optional[str] = None
355
+ password: Optional[str] = None
356
+
357
+
358
+ class UserImportBody(RequestBody):
359
+ """The request body for the user import endpoint."""
360
+ csv_content: str
343
361
 
344
362
 
345
363
  class DownloadBody(RequestBody):