runnable 0.30.0__py3-none-any.whl → 0.30.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/nodes/torch.py +16 -4
- extensions/pipeline_executor/__init__.py +7 -3
- extensions/pipeline_executor/argo.py +3 -2
- {runnable-0.30.0.dist-info → runnable-0.30.2.dist-info}/METADATA +1 -1
- {runnable-0.30.0.dist-info → runnable-0.30.2.dist-info}/RECORD +8 -8
- {runnable-0.30.0.dist-info → runnable-0.30.2.dist-info}/WHEEL +0 -0
- {runnable-0.30.0.dist-info → runnable-0.30.2.dist-info}/entry_points.txt +0 -0
- {runnable-0.30.0.dist-info → runnable-0.30.2.dist-info}/licenses/LICENSE +0 -0
extensions/nodes/torch.py
CHANGED
@@ -115,7 +115,9 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
115
115
|
map_variable: TypeMapVariable = None,
|
116
116
|
attempt_number: int = 1,
|
117
117
|
) -> StepLog:
|
118
|
-
assert
|
118
|
+
assert (
|
119
|
+
map_variable is None or not map_variable
|
120
|
+
), "TorchNode does not support map_variable"
|
119
121
|
|
120
122
|
step_log = self._context.run_log_store.get_step_log(
|
121
123
|
self._get_step_log_name(map_variable), self._context.run_id
|
@@ -130,6 +132,8 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
130
132
|
self._context.parameters_file or ""
|
131
133
|
)
|
132
134
|
os.environ["RUNNABLE_TORCH_RUN_ID"] = self._context.run_id
|
135
|
+
# retrieve the master address and port from the parameters
|
136
|
+
# default to localhost and 29500
|
133
137
|
launcher = elastic_launch(
|
134
138
|
launch_config,
|
135
139
|
training_subprocess,
|
@@ -161,9 +165,17 @@ class TorchNode(DistributedNode, TorchConfig):
|
|
161
165
|
|
162
166
|
return step_log
|
163
167
|
|
164
|
-
# TODO: Not sure we need these methods
|
165
168
|
def fan_in(self, map_variable: dict[str, str | int | float] | None = None):
|
166
|
-
|
169
|
+
# Destroy the service
|
170
|
+
# Destroy the statefulset
|
171
|
+
assert (
|
172
|
+
map_variable is None or not map_variable
|
173
|
+
), "TorchNode does not support map_variable"
|
167
174
|
|
168
175
|
def fan_out(self, map_variable: dict[str, str | int | float] | None = None):
|
169
|
-
|
176
|
+
# Create a service
|
177
|
+
# Create a statefulset
|
178
|
+
# Gather the IPs and set them as parameters downstream
|
179
|
+
assert (
|
180
|
+
map_variable is None or not map_variable
|
181
|
+
), "TorchNode does not support map_variable"
|
@@ -233,11 +233,15 @@ class GenericPipelineExecutor(BasePipelineExecutor):
|
|
233
233
|
mock=mock,
|
234
234
|
)
|
235
235
|
|
236
|
-
|
237
|
-
|
236
|
+
if step_log.status == defaults.SUCCESS:
|
237
|
+
data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
|
238
|
+
stage="put"
|
239
|
+
)
|
240
|
+
logger.debug(f"data_catalogs_put: {data_catalogs_put}")
|
241
|
+
step_log.add_data_catalogs(data_catalogs_put or [])
|
238
242
|
|
243
|
+
# get catalog should always be added to the step log
|
239
244
|
step_log.add_data_catalogs(data_catalogs_get or [])
|
240
|
-
step_log.add_data_catalogs(data_catalogs_put or [])
|
241
245
|
|
242
246
|
console.print(f"Summary of the step: {step_log.internal_name}")
|
243
247
|
console.print(step_log.get_summary(), style=defaults.info_style)
|
@@ -261,7 +261,7 @@ class CommonDefaults(BaseModelWIthConfig):
|
|
261
261
|
tolerations: Optional[list[Toleration]] = Field(default=None)
|
262
262
|
image_pull_policy: ImagePullPolicy = Field(default=ImagePullPolicy.Always)
|
263
263
|
resources: Resources = Field(default_factory=Resources)
|
264
|
-
env: list[EnvVar | SecretEnvVar] = Field(default_factory=list
|
264
|
+
env: list[EnvVar | SecretEnvVar] = Field(default_factory=list)
|
265
265
|
|
266
266
|
|
267
267
|
# The user provided defaults at the top level
|
@@ -658,7 +658,7 @@ class ArgoExecutor(GenericPipelineExecutor):
|
|
658
658
|
def _set_env_vars_to_task(
|
659
659
|
self, working_on: BaseNode, container_template: CoreContainerTemplate
|
660
660
|
):
|
661
|
-
if not isinstance(working_on, TaskNode):
|
661
|
+
if not isinstance(working_on, TaskNode) or isinstance(working_on, TorchNode):
|
662
662
|
return
|
663
663
|
|
664
664
|
global_envs: dict[str, str] = {}
|
@@ -880,6 +880,7 @@ class ArgoExecutor(GenericPipelineExecutor):
|
|
880
880
|
case "torch":
|
881
881
|
assert isinstance(working_on, TorchNode)
|
882
882
|
# TODO: Need to add multi-node functionality
|
883
|
+
# Check notes on the torch node
|
883
884
|
|
884
885
|
template_of_container = self._create_container_template(
|
885
886
|
working_on,
|
@@ -16,11 +16,11 @@ extensions/job_executor/pyproject.toml,sha256=UIEgiCYHTXcRWSByNMFuKJFKgxTBpQqTqy
|
|
16
16
|
extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
extensions/nodes/nodes.py,sha256=s9ub1dqy4qHjRQG6YElCdL7rCOTYNs9RUIrStZ6tEB4,28256
|
18
18
|
extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
|
19
|
-
extensions/nodes/torch.py,sha256=
|
19
|
+
extensions/nodes/torch.py,sha256=oYh4ep9J6CS3r04HURJba5m4v8lzNupWUh4PAXvGgi0,5952
|
20
20
|
extensions/nodes/torch_config.py,sha256=yDvDADpnLhQsNtfH8qIztLHQ2LhYiOJEWljxpH9GZzs,1222
|
21
21
|
extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
extensions/pipeline_executor/__init__.py,sha256=
|
23
|
-
extensions/pipeline_executor/argo.py,sha256
|
22
|
+
extensions/pipeline_executor/__init__.py,sha256=LApJGwJctEBTDQX9gGHkGV97T6rF_q-_59OzvAJ2X1g,24346
|
23
|
+
extensions/pipeline_executor/argo.py,sha256=-mWH5H2lZEldO8er7-tHberm7swqo4CwrtQKUd2VkGA,37944
|
24
24
|
extensions/pipeline_executor/local.py,sha256=6oWUJ6b6NvIkpeQJBoCT1hbfX4_6WCB4HzMgHZ4ik1A,1887
|
25
25
|
extensions/pipeline_executor/local_container.py,sha256=3kZ2QCsrq_YjH9dcAz8v05knKShQ_JtbIU-IA_-G538,12724
|
26
26
|
extensions/pipeline_executor/mocked.py,sha256=0sMmypuvstBIv9uQg-WAcPrF3oOFpeEXNi6N8Nzdnl0,5680
|
@@ -58,8 +58,8 @@ runnable/sdk.py,sha256=NZVQGaL4Zm2hwloRmqEgp8UPbBg9hY1abQGYnOgniPI,35128
|
|
58
58
|
runnable/secrets.py,sha256=4L_dBFxTgr8r_hHUD6RlZEtqaOHDRsFG5PXO5wlvMI0,2324
|
59
59
|
runnable/tasks.py,sha256=Qb1IhVxHv68E7vf3M3YCf7MGRHyjmsEEYBpEpiZ4mRI,29062
|
60
60
|
runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
|
61
|
-
runnable-0.30.
|
62
|
-
runnable-0.30.
|
63
|
-
runnable-0.30.
|
64
|
-
runnable-0.30.
|
65
|
-
runnable-0.30.
|
61
|
+
runnable-0.30.2.dist-info/METADATA,sha256=M-3XWv_ijqZLfEwzQ6W5IsZaHGOpNxToI8pYNI-SlwQ,10115
|
62
|
+
runnable-0.30.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
63
|
+
runnable-0.30.2.dist-info/entry_points.txt,sha256=PrjKrlfXPZaV_7hz8orGu4FDnatLqnhPOXljyllszdw,1880
|
64
|
+
runnable-0.30.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
65
|
+
runnable-0.30.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|