coiled 1.127.1.dev16__py3-none-any.whl → 1.127.1.dev20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of coiled might be problematic. Click here for more details.

coiled/batch.py CHANGED
@@ -18,6 +18,7 @@ def run(
18
18
  software: str | None = None,
19
19
  container: str | None = None,
20
20
  run_on_host: bool | None = None,
21
+ cluster_kwargs: dict | None = None,
21
22
  env: list | dict | None = None,
22
23
  secret_env: list | dict | None = None,
23
24
  tag: list | dict | None = None,
@@ -86,6 +87,7 @@ def run(
86
87
  software=software,
87
88
  container=container,
88
89
  run_on_host=run_on_host,
90
+ cluster_kwargs=cluster_kwargs,
89
91
  env=env,
90
92
  secret_env=secret_env,
91
93
  tag=tag,
coiled/cli/batch/run.py CHANGED
@@ -804,6 +804,7 @@ def _batch_run(default_kwargs, logger=None, from_cli=False, **kwargs) -> dict:
804
804
  "package_sync_ignore": kwargs.get("package_sync_ignore"),
805
805
  "allow_cross_zone": True if kwargs["allow_cross_zone"] is None else kwargs["allow_cross_zone"],
806
806
  "scheduler_sidecars": scheduler_sidecars,
807
+ **(kwargs.get("cluster_kwargs") or {}),
807
808
  }
808
809
 
809
810
  # when task will run on scheduler, give it the same VM specs as worker node
coiled/cli/core.py CHANGED
@@ -10,6 +10,7 @@ from .env import env
10
10
  from .file import file_group
11
11
  from .hello import hello
12
12
  from .login import login
13
+ from .mpi import mpi_group
13
14
  from .notebook import notebook_group
14
15
  from .package_sync import package_sync
15
16
  from .prefect import prefect
@@ -42,3 +43,4 @@ cli.add_command(better_logs, "logs")
42
43
  cli.add_command(hello)
43
44
  cli.add_command(hello, "quickstart")
44
45
  cli.add_command(file_group)
46
+ cli.add_command(mpi_group, "mpi")
coiled/cli/mpi.py ADDED
@@ -0,0 +1,166 @@
1
+ import os.path
2
+ import shlex
3
+
4
+ import click
5
+ import fabric.connection
6
+
7
+ import coiled
8
+
9
+ from .cluster.utils import find_cluster
10
+ from .run import get_ssh_connection, write_via_ssh
11
+ from .utils import CONTEXT_SETTINGS
12
+
13
+
14
+ @click.command(
15
+ context_settings=CONTEXT_SETTINGS,
16
+ )
17
+ @click.option("--worker-nodes", default=1, type=int)
18
+ @click.option("--vm-type", default="g6.8xlarge", type=str)
19
+ @click.option("--pip", multiple=True, type=str)
20
+ @click.option("--idle-timeout", default=None, type=str)
21
+ def setup(worker_nodes, vm_type, pip, idle_timeout):
22
+ setup_script = get_host_setup_script(pip_install=pip)
23
+
24
+ cluster = coiled.Cluster(
25
+ n_workers=worker_nodes,
26
+ container="daskdev/dask:latest",
27
+ allow_ssh_from="me",
28
+ host_setup_script=setup_script,
29
+ backend_options={"use_placement_group": True, "use_efa": True, "ami_version": "DL"},
30
+ scheduler_vm_types=[vm_type],
31
+ worker_vm_types=[vm_type],
32
+ worker_disk_size="100GB",
33
+ scheduler_disk_size="100GB",
34
+ shutdown_on_close=False,
35
+ idle_timeout=idle_timeout,
36
+ )
37
+
38
+ print("Cluster created, installing software for MPI...")
39
+
40
+ with coiled.Cloud() as cloud:
41
+ connection = get_ssh_connection(cloud, cluster.cluster_id)
42
+
43
+ setup_mpi_ssh(connection)
44
+
45
+ print("MPI is ready")
46
+
47
+
48
+ @click.command(
49
+ context_settings=CONTEXT_SETTINGS,
50
+ )
51
+ @click.option("--cluster", default=None)
52
+ @click.option("--workspace", default=None, type=str)
53
+ @click.option("--legate", is_flag=True, default=False, type=bool)
54
+ @click.option(
55
+ "--include-head/--exclude-head",
56
+ default=True,
57
+ type=bool,
58
+ )
59
+ @click.argument("command", nargs=-1, required=True)
60
+ def run(cluster, workspace, legate, include_head, command):
61
+ nodes = "$(cat workers | wc -w)"
62
+
63
+ command = list(command)
64
+
65
+ files = {}
66
+ for i, c in enumerate(command):
67
+ if os.path.exists(c):
68
+ remote_path = f"/scratch/batch/{os.path.basename(c)}"
69
+ command[i] = remote_path
70
+ with open(c) as f:
71
+ content = f.read()
72
+ files[remote_path] = content
73
+
74
+ if legate:
75
+ # TODO make "--gpus 1 --sysmem 2000 --fbmem 20000" configurable
76
+ wrapped_command = f"""
77
+ legate \
78
+ --gpus 1 --sysmem 2000 --fbmem 20000 \
79
+ --nodes {nodes} \
80
+ --launcher mpirun \
81
+ --launcher-extra ' --hostfile workers -x PATH ' \
82
+ {shlex.join(command)}
83
+ """
84
+ else:
85
+ wrapped_command = f"mpirun --hostfile workers -x PATH {shlex.join(command)}"
86
+
87
+ with coiled.Cloud(workspace=workspace) as cloud:
88
+ cluster_info = find_cluster(cloud, cluster)
89
+ cluster_id = cluster_info["id"]
90
+ connection = get_ssh_connection(cloud, cluster_id)
91
+
92
+ setup_mpi_ssh(connection, include_scheduler=include_head)
93
+
94
+ if files:
95
+ worker_connections = []
96
+
97
+ for worker in cluster_info["workers"]:
98
+ if (
99
+ not worker.get("instance")
100
+ or not worker["instance"].get("current_state")
101
+ or worker["instance"]["current_state"]["state"] != "ready"
102
+ ):
103
+ continue
104
+ worker_address = worker["instance"]["private_ip_address"]
105
+
106
+ worker_connections.append(
107
+ fabric.connection.Connection(
108
+ worker_address, gateway=connection, user=connection.user, connect_kwargs=connection.connect_kwargs
109
+ )
110
+ )
111
+
112
+ for path, content in files.items():
113
+ write_via_ssh(connection, content=content, path=path)
114
+ for conn in worker_connections:
115
+ write_via_ssh(conn, content=content, path=path) # , mode=0o555
116
+
117
+ print(f"Running command:\n{wrapped_command}")
118
+
119
+ # TODO keepalive session so this will interact correctly with idle timeout / keepalive
120
+ connection.run(wrapped_command, hide=False, pty=True, warn=True, env={"PATH": "/tmp/host-user-venv/bin:$PATH"})
121
+
122
+
123
+ def setup_mpi_ssh(connection, include_scheduler=True):
124
+ add_scheduler_line = 'printf "\n127.0.0.1" >> workers' if include_scheduler else ""
125
+
126
+ setup_mpi = f"""
127
+ /bin/coiled_agent list-worker-ips | sudo tee workers && sudo chown ubuntu workers
128
+ ssh-keyscan -f workers -t ed25519 >> ~/.ssh/known_hosts
129
+ {add_scheduler_line}
130
+
131
+ # block until host setup script has finished, at least on schedule node
132
+ until [ -f /tmp/host-setup-done ]
133
+ do
134
+ sleep 5
135
+ done
136
+ """
137
+
138
+ _ = connection.run(setup_mpi, hide=True, pty=False)
139
+
140
+
141
+ def get_host_setup_script(venv_path="/tmp/host-user-venv", apt_install=None, pip_install=None):
142
+ apt_install = apt_install or []
143
+ apt_install.extend(["openmpi-bin", "python3-pip", "python3-venv"])
144
+
145
+ pip_install = pip_install or []
146
+
147
+ pip_install_line = f"{venv_path}/bin/python -m pip install {' '.join(pip_install)}" if pip_install else ""
148
+
149
+ return f"""
150
+ sudo apt install {" ".join(apt_install)} -y
151
+
152
+ mkdir {venv_path}
153
+ python3 -m venv {venv_path}
154
+
155
+ {pip_install_line}
156
+
157
+ echo 'done' > /tmp/host-setup-done
158
+ """
159
+
160
+
161
+ @click.group(name="mpi", context_settings=CONTEXT_SETTINGS)
162
+ def mpi_group(): ...
163
+
164
+
165
+ mpi_group.add_command(setup)
166
+ mpi_group.add_command(run)
coiled/cli/setup/aws.py CHANGED
@@ -321,6 +321,7 @@ def get_ongoing_doc(ecr=True, package_sync_bucket_prefix=None) -> str:
321
321
  "Effect": "Allow",
322
322
  "Resource": "*",
323
323
  "Action": [
324
+ "ec2:AuthorizeSecurityGroupEgress",
324
325
  "ec2:AuthorizeSecurityGroupIngress",
325
326
  "ec2:CreateFleet",
326
327
  "ec2:CreateLaunchTemplate",
@@ -385,6 +386,16 @@ def get_ongoing_doc(ecr=True, package_sync_bucket_prefix=None) -> str:
385
386
  ],
386
387
  "Condition": {"StringEquals": {"ec2:ResourceTag/owner": "coiled"}},
387
388
  },
389
+ {
390
+ "Sid": "OngoingPlacementGroupPolicy",
391
+ "Effect": "Allow",
392
+ "Resource": "arn:*:ec2:*:*:placement-group/coiled-*",
393
+ "Action": [
394
+ "ec2:CreatePlacementGroup",
395
+ "ec2:DescribePlacementGroups",
396
+ "ec2:DeletePlacementGroup",
397
+ ],
398
+ },
388
399
  {
389
400
  "Sid": "OptionalLogPull",
390
401
  "Effect": "Allow",
coiled/types.py CHANGED
@@ -442,6 +442,9 @@ class AWSOptions(BackendOptions, total=False):
442
442
  Only some instance types are supported.
443
443
  use_worker_efa
444
444
  Attach Elastic Fabric Adaptor only on cluster workers, not the scheduler.
445
+ ami_version
446
+ Use non-default type of AMI.
447
+ Supported options include "DL" for the Deep Learning Base OSS Nvidia Driver GPU AMI.
445
448
  """
446
449
 
447
450
  keypair_name: Optional[str]
@@ -449,6 +452,7 @@ class AWSOptions(BackendOptions, total=False):
449
452
  use_worker_placement_group: Optional[bool]
450
453
  use_efa: Optional[bool]
451
454
  use_worker_efa: Optional[bool]
455
+ ami_version: Optional[str]
452
456
 
453
457
 
454
458
  class GCPOptions(BackendOptions, total=False):
coiled/v2/cluster.py CHANGED
@@ -498,7 +498,7 @@ class Cluster(DistributedCluster, Generic[IsAsynchronous]):
498
498
  worker_vm_types: list | None = None,
499
499
  worker_cpu: Union[int, List[int]] | None = None,
500
500
  worker_memory: Union[str, List[str]] | None = None,
501
- worker_disk_size: Union[int, str] | None = None,
501
+ worker_disk_size: int | str | None = None,
502
502
  worker_disk_throughput: int | None = None,
503
503
  worker_disk_config: dict | None = None,
504
504
  worker_gpu: Union[int, bool] | None = None,
@@ -507,7 +507,7 @@ class Cluster(DistributedCluster, Generic[IsAsynchronous]):
507
507
  scheduler_vm_types: list | None = None,
508
508
  scheduler_cpu: Union[int, List[int]] | None = None,
509
509
  scheduler_memory: Union[str, List[str]] | None = None,
510
- scheduler_disk_size: int | None = None,
510
+ scheduler_disk_size: int | str | None = None,
511
511
  scheduler_disk_config: dict | None = None,
512
512
  scheduler_gpu: bool | None = None,
513
513
  asynchronous: bool = False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coiled
3
- Version: 1.127.1.dev16
3
+ Version: 1.127.1.dev20
4
4
  Summary: Python client for coiled.io dask clusters
5
5
  Project-URL: Homepage, https://coiled.io
6
6
  Maintainer-email: Coiled <info@coiled.io>
@@ -2,7 +2,7 @@ coiled/__init__.py,sha256=SslrfRlKfunoTJMCfopqezFePDDKS8LG_FhLkMMo_TE,2274
2
2
  coiled/__main__.py,sha256=4XILBmm4ChZYo7h3JzgslFU0tjQVzdX0XtYcQLhCv0w,171
3
3
  coiled/analytics.py,sha256=96CeL8KVnm3-76lvT4fNkgML0lHebaLea-YP3wW-KqM,7486
4
4
  coiled/auth.py,sha256=go7vWtCwBbwtWyNrNBxg28xBrdjrETbE-mn3KaN5Yl8,1867
5
- coiled/batch.py,sha256=LLrkq910sXNtrssPEqK-KU6-aWRS9XuQEaRQw8wPUP4,7023
5
+ coiled/batch.py,sha256=QH-BMlMKkjdToPbw6q0I1W1TTJIDHu24B363mUGDL2c,7102
6
6
  coiled/capture_environment.py,sha256=K5mNwUe8KM_l203h3oZvcZUJTrGozT-CH1GrtuPYv8U,18458
7
7
  coiled/cluster.py,sha256=wwK9-SefbFBUEHJjYHXlWN3YvPcvR6XD2J-RdPCGhgc,5049
8
8
  coiled/coiled.yaml,sha256=z70xzNUy0E8b8Yt12tYYmjJDDmp-U63oUD61ccuu5N0,1037
@@ -22,17 +22,18 @@ coiled/software.py,sha256=eh3kZ8QBuIt_SPvTy_x6TXEv87SGqOJkO4HW-LCSsas,8701
22
22
  coiled/software_utils.py,sha256=JqGO8nstm0Hi-UCIBhHa25reNeVO-XOnv5eLoIyRcBo,40367
23
23
  coiled/spans.py,sha256=Aq2MOX6JXaJ72XiEmymPcsefs-kID85MEw6t-kOdPWI,2078
24
24
  coiled/spark.py,sha256=kooZCZT4dLMG_AQEOlaf6gj86G3UdowDfbw-Eiq94MU,9059
25
- coiled/types.py,sha256=mpYmhX9FGoe_pE_GU8Nx2nZXdwbn-DBSHYXl87ferjM,14442
25
+ coiled/types.py,sha256=xJh5t_Kk7S-LeZnZ5C4oTtl1_el3mZuQeITz1QfPHjA,14619
26
26
  coiled/utils.py,sha256=WalMzNUbjVUJvAMgXaTTyDC0HPSM_zsHiYRHK7lmkkk,78514
27
27
  coiled/websockets.py,sha256=BaCNiOgPVtm55R_rf1TK78tzoFSKLp4z2UCW7S57iNA,5956
28
28
  coiled/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  coiled/cli/config.py,sha256=WKZkDpPYywYS665krQLqid2RgSchDOddZqBkkwBtyVk,2267
30
- coiled/cli/core.py,sha256=Yw5g-Y7p75a3k3tVa_jbVhzR6xDyOqwO2yOEB6npUzk,1125
30
+ coiled/cli/core.py,sha256=iZC9v_LyhDOlQwvb5p4w6ul2sfzI7aNiZFHgIWIoNns,1186
31
31
  coiled/cli/curl.py,sha256=cYDQXvc1kZNDhFMkREhZoGYh0a9Ea06tEbobK8ZBCJ8,1589
32
32
  coiled/cli/diagnostics.py,sha256=1jIeue7xLOaf7LQFsNc6NmO5yU1jqmPFpKZSKjGN4rs,394
33
33
  coiled/cli/env.py,sha256=NHh7ZSq9yfongkpFqzon1eLhnH1FwToVvkKFIhqXRBE,6932
34
34
  coiled/cli/file.py,sha256=fJmOG3YhxpxXokGYu90wpjdwkJpp1XVqPJ_iveb5ShA,3623
35
35
  coiled/cli/login.py,sha256=cByVXmMsfGEuY2TkYU_Y8zq1zVTUHAxOe_wpw2uHsBs,2242
36
+ coiled/cli/mpi.py,sha256=37yPngCYcAbYn35PNVO6-SA3YY6Ik8KVe5blmF3yK4g,5048
36
37
  coiled/cli/package_sync.py,sha256=lABDY20yjfLYGfPlQu8ugI-Q8doY4JtN8_0nb9PkcT4,4101
37
38
  coiled/cli/prefect.py,sha256=T-SSFey4jlA_jpEI0DqAhVIPwlt2GvBFogEqYCwwevI,302
38
39
  coiled/cli/prefect_serve.py,sha256=gemq6YOVbnBoq4k3tSaU2gFJR3aMSxXLNxH6jB8V3n8,4378
@@ -42,7 +43,7 @@ coiled/cli/utils.py,sha256=cp7ToFGRpUKi6iNL6BbLjzgrgeTYSX_C55lYhaKWHHA,3479
42
43
  coiled/cli/batch/__init__.py,sha256=539CnfnqqcW7ndSufTS-Ie5FGZiElMYxE0Ptu70wo8M,660
43
44
  coiled/cli/batch/list.py,sha256=lU3mXeKUHltzpdbo7Txcd64T1-XpHE-wxTFIBoTR28w,3300
44
45
  coiled/cli/batch/logs.py,sha256=CbGK5GRjtu0UKGetKY_yTcjB-3PdHy5Xf4MLksYTE8g,1947
45
- coiled/cli/batch/run.py,sha256=_hFcSETJMhXCqcDXe06wb2Kly4FJanTNjkIPdxO65bg,37077
46
+ coiled/cli/batch/run.py,sha256=jywwmcJbgwfNOZt1eqZRq4a-2n3BZa4zt4fylgz2Grw,37125
46
47
  coiled/cli/batch/status.py,sha256=cSpMRC0YHFQ18y-XEXFjTOza_OTd7mtT-NX7sRSLMhk,4193
47
48
  coiled/cli/batch/util.py,sha256=ztisJzDHpsYswrdk_hI7USUkfponI8oLhcKAa6DXJo4,1026
48
49
  coiled/cli/batch/wait.py,sha256=dEP1OH0IYteqaYU2UdrGm_vU7IDE7h3l3Cb3KBoaCCY,3879
@@ -73,7 +74,7 @@ coiled/cli/notebook/__init__.py,sha256=khKDheFK7ociEbb1ODp5oygNLBo_1K7PsL08OaEME
73
74
  coiled/cli/notebook/notebook.py,sha256=i_XD03RK2cYeYn_TVl20Uv-kJ_2x-0Oe5iRUTm6w1Tc,23293
74
75
  coiled/cli/setup/__init__.py,sha256=BiGnIH9vXGhCFOEPuSUkitcrwAA97wTsfcwMXC0DkYg,837
75
76
  coiled/cli/setup/amp.py,sha256=_zlZtqsd_LkSF5C_G8qDm0To-t30C0Z6XKMdDzrm7qg,5039
76
- coiled/cli/setup/aws.py,sha256=XWzj-YUhbb3KM_PAErnM81KYDRltspN0sxOyRPM7IM0,65447
77
+ coiled/cli/setup/aws.py,sha256=MS4Au1AGoALeVO_VuTdq_RRzL3JzOGgpTgcM69avXU0,65885
77
78
  coiled/cli/setup/azure.py,sha256=JNBdKyY9NTcWfr6NAb4d-QM45y6bPBuFFCsCY21equI,25288
78
79
  coiled/cli/setup/entry.py,sha256=2PKtvH_ARWt5c5qjeb7dfmJOcFTqRGoskPidNoQTiOg,2425
79
80
  coiled/cli/setup/gcp.py,sha256=i67kFRJJpDORrqkVfDu1jFseN80iDbKe1vswk6jxRI8,38817
@@ -87,7 +88,7 @@ coiled/extensions/prefect/__init__.py,sha256=cZp1mqX29FrnINoQsuH6pz4z4uuOACs0mgi
87
88
  coiled/extensions/prefect/runners.py,sha256=AcaGS1637TnqFPKnjmmLHpdzjwAsxBLDKrOF7OpfEwM,987
88
89
  coiled/extensions/prefect/workers.py,sha256=Z2VuAhTm5AjWEKyCniMZrTxqtkn3uJp3sO3bFeR2Rr0,1642
89
90
  coiled/v2/__init__.py,sha256=KaCULaAqatcsYbTbj_SQtTLocbSKZa-uQXiyCICKFRM,805
90
- coiled/v2/cluster.py,sha256=DelsScilcQ97c7MDT1mBa-CFYHX-5JwdFvWhG2F2gLs,148134
91
+ coiled/v2/cluster.py,sha256=hGs5_SVdrh2zKWpd_8RNPC7LbNd1F6ggEgAj9fCEDm8,148134
91
92
  coiled/v2/core.py,sha256=Bf5A_rzK3tuUqqMVAgN5vix-tX_F8AEWR2pICnG3YcA,71615
92
93
  coiled/v2/cwi_log_link.py,sha256=d4k6wRYhcdDVdhWYZIX6WL1g0lscXY0yq__H1sPUNWk,1883
93
94
  coiled/v2/states.py,sha256=VduyWuf6rByG_wg5AXTxZpe88cCTSdIa4HrPjk1jBcA,9031
@@ -95,8 +96,8 @@ coiled/v2/widgets/__init__.py,sha256=Bt3GHTTyri-kFUaqGRVydDM-sCg5NdNujDg2RyvgV8U
95
96
  coiled/v2/widgets/interface.py,sha256=YeMQ5qdRbbpM04x9qIg2LE1xwxyRxFbdDYnkrwHazPk,301
96
97
  coiled/v2/widgets/rich.py,sha256=3rU5-yso92NdeEh3uSvEE-GwPNyp6i0Nb5PE5czXCik,28974
97
98
  coiled/v2/widgets/util.py,sha256=Y8qpGqwNzqfCzgyRFRy7vcscBoXqop-Upi4HLPpXLgg,3120
98
- coiled-1.127.1.dev16.dist-info/METADATA,sha256=qMOXvhRHA3knlti4m8OYkLrJMiew0sPowR66YqZrhhg,2182
99
- coiled-1.127.1.dev16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
- coiled-1.127.1.dev16.dist-info/entry_points.txt,sha256=C8dz1ST_bTlTO-kNvuHBJQma9PyJPotg0S4xpPt5aHY,47
101
- coiled-1.127.1.dev16.dist-info/licenses/LICENSE,sha256=ZPwVR73Biwm3sK6vR54djCrhaRiM4cAD2zvOQZV8Xis,3859
102
- coiled-1.127.1.dev16.dist-info/RECORD,,
99
+ coiled-1.127.1.dev20.dist-info/METADATA,sha256=O69puerNR6NRQsz4T2IVMwdxChm9ERigc3HyOH3KrdI,2182
100
+ coiled-1.127.1.dev20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
101
+ coiled-1.127.1.dev20.dist-info/entry_points.txt,sha256=C8dz1ST_bTlTO-kNvuHBJQma9PyJPotg0S4xpPt5aHY,47
102
+ coiled-1.127.1.dev20.dist-info/licenses/LICENSE,sha256=ZPwVR73Biwm3sK6vR54djCrhaRiM4cAD2zvOQZV8Xis,3859
103
+ coiled-1.127.1.dev20.dist-info/RECORD,,