runnable 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/nodes/torch.py +16 -4
- extensions/pipeline_executor/argo.py +2 -1
- {runnable-0.30.0.dist-info → runnable-0.30.1.dist-info}/METADATA +1 -1
- {runnable-0.30.0.dist-info → runnable-0.30.1.dist-info}/RECORD +7 -7
- {runnable-0.30.0.dist-info → runnable-0.30.1.dist-info}/WHEEL +0 -0
- {runnable-0.30.0.dist-info → runnable-0.30.1.dist-info}/entry_points.txt +0 -0
- {runnable-0.30.0.dist-info → runnable-0.30.1.dist-info}/licenses/LICENSE +0 -0
extensions/nodes/torch.py
CHANGED
@@ -115,7 +115,9 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
115
115
|
map_variable: TypeMapVariable = None,
|
116
116
|
attempt_number: int = 1,
|
117
117
|
) -> StepLog:
|
118
|
-
assert
|
118
|
+
assert (
|
119
|
+
map_variable is None or not map_variable
|
120
|
+
), "TorchNode does not support map_variable"
|
119
121
|
|
120
122
|
step_log = self._context.run_log_store.get_step_log(
|
121
123
|
self._get_step_log_name(map_variable), self._context.run_id
|
@@ -130,6 +132,8 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
130
132
|
self._context.parameters_file or ""
|
131
133
|
)
|
132
134
|
os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
|
135
|
+
# retrieve the master address and port from the parameters
|
136
|
+
# default to localhost and 29500
|
133
137
|
launcher = elastic_launch(
|
134
138
|
launch_config,
|
135
139
|
training_subprocess,
|
@@ -161,9 +165,17 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
161
165
|
|
162
166
|
return step_log
|
163
167
|
|
164
|
-
# TODO: Not sure we need these methods
|
165
168
|
def fan_in(self, map_variable: dict[str, str | int | float] | None = None):
|
166
|
-
|
169
|
+
# Destroy the service
|
170
|
+
# Destroy the statefulset
|
171
|
+
assert (
|
172
|
+
map_variable is None or not map_variable
|
173
|
+
), "TorchNode does not support map_variable"
|
167
174
|
|
168
175
|
def fan_out(self, map_variable: dict[str, str | int | float] | None = None):
|
169
|
-
|
176
|
+
# Create a service
|
177
|
+
# Create a statefulset
|
178
|
+
# Gather the IPs and set them as parameters downstream
|
179
|
+
assert (
|
180
|
+
map_variable is None or not map_variable
|
181
|
+
), "TorchNode does not support map_variable"
|
@@ -658,7 +658,7 @@ class ArgoExecutor(GenericPipelineExecutor):
|
|
658
658
|
def _set_env_vars_to_task(
|
659
659
|
self, working_on: BaseNode, container_template: CoreContainerTemplate
|
660
660
|
):
|
661
|
-
if not isinstance(working_on, TaskNode):
|
661
|
+
if not isinstance(working_on, TaskNode) or isinstance(working_on, TorchNode):
|
662
662
|
return
|
663
663
|
|
664
664
|
global_envs: dict[str, str] = {}
|
@@ -880,6 +880,7 @@ class ArgoExecutor(GenericPipelineExecutor):
|
|
880
880
|
case "torch":
|
881
881
|
assert isinstance(working_on, TorchNode)
|
882
882
|
# TODO: Need to add multi-node functionality
|
883
|
+
# Check notes on the torch node
|
883
884
|
|
884
885
|
template_of_container = self._create_container_template(
|
885
886
|
working_on,
|
@@ -16,11 +16,11 @@ extensions/job_executor/pyproject.toml,sha256=UIEgiCYHTXcRWSByNMFuKJFKgxTBpQqTqy
|
|
16
16
|
extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
extensions/nodes/nodes.py,sha256=s9ub1dqy4qHjRQG6YElCdL7rCOTYNs9RUIrStZ6tEB4,28256
|
18
18
|
extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
|
19
|
-
extensions/nodes/torch.py,sha256=
|
19
|
+
extensions/nodes/torch.py,sha256=oYh4ep9J6CS3r04HURJba5m4v8lzNupWUh4PAXvGgi0,5952
|
20
20
|
extensions/nodes/torch_config.py,sha256=yDvDADpnLhQsNtfH8qIztLHQ2LhYiOJEWljxpH9GZzs,1222
|
21
21
|
extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
extensions/pipeline_executor/__init__.py,sha256=9ZMHcieSYdTiYyjSkc8eT8yhOlKEUFnrbrdbqdOgvP0,24195
|
23
|
-
extensions/pipeline_executor/argo.py,sha256=
|
23
|
+
extensions/pipeline_executor/argo.py,sha256=eyIVZbpecU1cPAwdvt56UFRZW2AqxALcBM_Yfvbvhqw,37958
|
24
24
|
extensions/pipeline_executor/local.py,sha256=6oWUJ6b6NvIkpeQJBoCT1hbfX4_6WCB4HzMgHZ4ik1A,1887
|
25
25
|
extensions/pipeline_executor/local_container.py,sha256=3kZ2QCsrq_YjH9dcAz8v05knKShQ_JtbIU-IA_-G538,12724
|
26
26
|
extensions/pipeline_executor/mocked.py,sha256=0sMmypuvstBIv9uQg-WAcPrF3oOFpeEXNi6N8Nzdnl0,5680
|
@@ -58,8 +58,8 @@ runnable/sdk.py,sha256=NZVQGaL4Zm2hwloRmqEgp8UPbBg9hY1abQGYnOgniPI,35128
|
|
58
58
|
runnable/secrets.py,sha256=4L_dBFxTgr8r_hHUD6RlZEtqaOHDRsFG5PXO5wlvMI0,2324
|
59
59
|
runnable/tasks.py,sha256=Qb1IhVxHv68E7vf3M3YCf7MGRHyjmsEEYBpEpiZ4mRI,29062
|
60
60
|
runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
|
61
|
-
runnable-0.30.
|
62
|
-
runnable-0.30.
|
63
|
-
runnable-0.30.
|
64
|
-
runnable-0.30.
|
65
|
-
runnable-0.30.
|
61
|
+
runnable-0.30.1.dist-info/METADATA,sha256=4Y4D0jyK46LpYoZE53b761BJe95eBvxo5QU3R-_-t0Y,10115
|
62
|
+
runnable-0.30.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
63
|
+
runnable-0.30.1.dist-info/entry_points.txt,sha256=PrjKrlfXPZaV_7hz8orGu4FDnatLqnhPOXljyllszdw,1880
|
64
|
+
runnable-0.30.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
65
|
+
runnable-0.30.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|