metaflow 2.12.8__py2.py3-none-any.whl → 2.12.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. metaflow/__init__.py +2 -0
  2. metaflow/cli.py +12 -4
  3. metaflow/extension_support/plugins.py +1 -0
  4. metaflow/flowspec.py +8 -1
  5. metaflow/lint.py +13 -0
  6. metaflow/metaflow_current.py +0 -8
  7. metaflow/plugins/__init__.py +12 -0
  8. metaflow/plugins/argo/argo_workflows.py +462 -42
  9. metaflow/plugins/argo/argo_workflows_cli.py +60 -3
  10. metaflow/plugins/argo/argo_workflows_decorator.py +38 -7
  11. metaflow/plugins/argo/argo_workflows_deployer.py +290 -0
  12. metaflow/plugins/argo/jobset_input_paths.py +16 -0
  13. metaflow/plugins/aws/batch/batch_decorator.py +16 -13
  14. metaflow/plugins/aws/step_functions/step_functions_cli.py +45 -3
  15. metaflow/plugins/aws/step_functions/step_functions_deployer.py +251 -0
  16. metaflow/plugins/cards/card_cli.py +1 -1
  17. metaflow/plugins/kubernetes/kubernetes.py +279 -52
  18. metaflow/plugins/kubernetes/kubernetes_cli.py +26 -8
  19. metaflow/plugins/kubernetes/kubernetes_client.py +0 -1
  20. metaflow/plugins/kubernetes/kubernetes_decorator.py +56 -44
  21. metaflow/plugins/kubernetes/kubernetes_job.py +6 -6
  22. metaflow/plugins/kubernetes/kubernetes_jobsets.py +510 -272
  23. metaflow/plugins/parallel_decorator.py +108 -8
  24. metaflow/plugins/secrets/secrets_decorator.py +12 -3
  25. metaflow/plugins/test_unbounded_foreach_decorator.py +39 -4
  26. metaflow/runner/deployer.py +386 -0
  27. metaflow/runner/metaflow_runner.py +1 -20
  28. metaflow/runner/nbdeploy.py +130 -0
  29. metaflow/runner/nbrun.py +4 -28
  30. metaflow/runner/utils.py +49 -0
  31. metaflow/runtime.py +246 -134
  32. metaflow/version.py +1 -1
  33. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/METADATA +2 -2
  34. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/RECORD +38 -32
  35. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/WHEEL +1 -1
  36. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/LICENSE +0 -0
  37. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/entry_points.txt +0 -0
  38. {metaflow-2.12.8.dist-info → metaflow-2.12.9.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,34 @@
1
+ from collections import namedtuple
1
2
  from metaflow.decorators import StepDecorator
2
- from metaflow.unbounded_foreach import UBF_CONTROL
3
+ from metaflow.unbounded_foreach import UBF_CONTROL, CONTROL_TASK_TAG
3
4
  from metaflow.exception import MetaflowException
5
+ from metaflow.metadata import MetaDatum
6
+ from metaflow.metaflow_current import current, Parallel
4
7
  import os
5
8
  import sys
6
9
 
7
10
 
8
11
  class ParallelDecorator(StepDecorator):
12
+ """
13
+ MF Add To Current
14
+ -----------------
15
+ parallel -> metaflow.metaflow_current.Parallel
16
+
17
+ @@ Returns
18
+ -------
19
+ Parallel
20
+ `namedtuple` with the following fields:
21
+ - main_ip : str
22
+ The IP address of the control task.
23
+ - num_nodes : int
24
+ The total number of tasks created by @parallel
25
+ - node_index : int
26
+ The index of the current task in all the @parallel tasks.
27
+
28
+ is_parallel -> bool
29
+ True if the current step is a @parallel step.
30
+ """
31
+
9
32
  name = "parallel"
10
33
  defaults = {}
11
34
  IS_PARALLEL = True
@@ -16,7 +39,6 @@ class ParallelDecorator(StepDecorator):
16
39
  def runtime_step_cli(
17
40
  self, cli_args, retry_count, max_user_code_retries, ubf_context
18
41
  ):
19
-
20
42
  if ubf_context == UBF_CONTROL:
21
43
  num_parallel = cli_args.task.ubf_iter.num_parallel
22
44
  cli_args.command_options["num-parallel"] = str(num_parallel)
@@ -25,6 +47,82 @@ class ParallelDecorator(StepDecorator):
25
47
  self, flow, graph, step_name, decorators, environment, flow_datastore, logger
26
48
  ):
27
49
  self.environment = environment
50
+ # Previously, the `parallel` property was a hardcoded, static property within `current`.
51
+ # Whenever `current.parallel` was called, it returned a named tuple with values coming from
52
+ # environment variables, loaded dynamically at runtime.
53
+ # Now, many of these environment variables are set by compute-related decorators in `task_pre_step`.
54
+ # This necessitates ensuring the correct ordering of the `parallel` and compute decorators if we want to
55
+ # statically set the namedtuple via `current._update_env` in `task_pre_step`. Hence we avoid using
56
+ # `current._update_env` since:
57
+ # - it will set a static named tuple, resolving environment variables only once (at the time of calling `current._update_env`).
58
+ # - we cannot guarantee the order of calling the decorator's `task_pre_step` (calling `current._update_env` may not set
59
+ # the named tuple with the correct values).
60
+ # Therefore, we explicitly set the property in `step_init` to ensure the property can resolve the appropriate values in the named tuple
61
+ # when accessed at runtime.
62
+ setattr(
63
+ current.__class__,
64
+ "parallel",
65
+ property(
66
+ fget=lambda _: Parallel(
67
+ main_ip=os.environ.get("MF_PARALLEL_MAIN_IP", "127.0.0.1"),
68
+ num_nodes=int(os.environ.get("MF_PARALLEL_NUM_NODES", "1")),
69
+ node_index=int(os.environ.get("MF_PARALLEL_NODE_INDEX", "0")),
70
+ )
71
+ ),
72
+ )
73
+
74
+ def task_pre_step(
75
+ self,
76
+ step_name,
77
+ task_datastore,
78
+ metadata,
79
+ run_id,
80
+ task_id,
81
+ flow,
82
+ graph,
83
+ retry_count,
84
+ max_user_code_retries,
85
+ ubf_context,
86
+ inputs,
87
+ ):
88
+ from metaflow import current
89
+
90
+ # Set `is_parallel` to `True` in `current` just like we
91
+ # with `is_production` in the project decorator.
92
+ current._update_env(
93
+ {
94
+ "is_parallel": True,
95
+ }
96
+ )
97
+
98
+ self.input_paths = [obj.pathspec for obj in inputs]
99
+ task_metadata_list = [
100
+ MetaDatum(
101
+ field="parallel-world-size",
102
+ value=flow._parallel_ubf_iter.num_parallel,
103
+ type="parallel-world-size",
104
+ tags=["attempt_id:{0}".format(0)],
105
+ )
106
+ ]
107
+ if ubf_context == UBF_CONTROL:
108
+ # A Task's tags are now those of its ancestral Run, so we are not able
109
+ # to rely on a task's tags to indicate the presence of a control task
110
+ # so, on top of adding the tags above, we also add a task metadata
111
+ # entry indicating that this is a "control task".
112
+ #
113
+ # Here we will also add a task metadata entry to indicate "control
114
+ # task". Within the metaflow repo, the only dependency of such a
115
+ # "control task" indicator is in the integration test suite (see
116
+ # Step.control_tasks() in client API).
117
+ task_metadata_list += [
118
+ MetaDatum(
119
+ field="internal_task_type",
120
+ value=CONTROL_TASK_TAG,
121
+ type="internal_task_type",
122
+ tags=["attempt_id:{0}".format(0)],
123
+ )
124
+ ]
125
+ metadata.register_metadata(run_id, step_name, task_id, task_metadata_list)
28
126
 
29
127
  def task_decorate(
30
128
  self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
@@ -47,6 +145,7 @@ class ParallelDecorator(StepDecorator):
47
145
  env_to_use,
48
146
  _step_func_with_setup,
49
147
  retry_count,
148
+ ",".join(self.input_paths),
50
149
  )
51
150
  else:
52
151
  return _step_func_with_setup
@@ -56,7 +155,9 @@ class ParallelDecorator(StepDecorator):
56
155
  pass
57
156
 
58
157
 
59
- def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_count):
158
+ def _local_multinode_control_task_step_func(
159
+ flow, env_to_use, step_func, retry_count, input_paths
160
+ ):
60
161
  """
61
162
  Used as multinode UBF control task when run in local mode.
62
163
  """
@@ -80,10 +181,7 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
80
181
  run_id = current.run_id
81
182
  step_name = current.step_name
82
183
  control_task_id = current.task_id
83
-
84
- (_, split_step_name, split_task_id) = control_task_id.split("-")[1:]
85
184
  # UBF handling for multinode case
86
- top_task_id = control_task_id.replace("control-", "") # chop "-0"
87
185
  mapper_task_ids = [control_task_id]
88
186
  # If we are running inside Conda, we use the base executable FIRST;
89
187
  # the conda environment will then be used when runtime_step_cli is
@@ -93,12 +191,13 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
93
191
  script = sys.argv[0]
94
192
 
95
193
  # start workers
194
+ # TODO: Logs for worker processes are assigned to control process as of today, which
195
+ # should be fixed at some point
96
196
  subprocesses = []
97
197
  for node_index in range(1, num_parallel):
98
- task_id = "%s_node_%d" % (top_task_id, node_index)
198
+ task_id = "%s_node_%d" % (control_task_id, node_index)
99
199
  mapper_task_ids.append(task_id)
100
200
  os.environ["MF_PARALLEL_NODE_INDEX"] = str(node_index)
101
- input_paths = "%s/%s/%s" % (run_id, split_step_name, split_task_id)
102
201
  # Override specific `step` kwargs.
103
202
  kwargs = cli_args.step_kwargs
104
203
  kwargs["split_index"] = str(node_index)
@@ -109,6 +208,7 @@ def _local_multinode_control_task_step_func(flow, env_to_use, step_func, retry_c
109
208
  kwargs["retry_count"] = str(retry_count)
110
209
 
111
210
  cmd = cli_args.step_command(executable, script, step_name, step_kwargs=kwargs)
211
+
112
212
  p = subprocess.Popen(cmd)
113
213
  subprocesses.append(p)
114
214
 
@@ -4,7 +4,7 @@ import re
4
4
  from metaflow.exception import MetaflowException
5
5
  from metaflow.decorators import StepDecorator
6
6
  from metaflow.metaflow_config import DEFAULT_SECRETS_ROLE
7
- from metaflow.unbounded_foreach import UBF_CONTROL
7
+ from metaflow.unbounded_foreach import UBF_TASK
8
8
 
9
9
  from typing import Any, Dict, List, Union
10
10
 
@@ -210,8 +210,17 @@ class SecretsDecorator(StepDecorator):
210
210
  ubf_context,
211
211
  inputs,
212
212
  ):
213
- if ubf_context == UBF_CONTROL:
214
- """control tasks (as used in "unbounded for each") don't need secrets"""
213
+ if (
214
+ ubf_context
215
+ and ubf_context == UBF_TASK
216
+ and os.environ.get("METAFLOW_RUNTIME_ENVIRONMENT", "local") == "local"
217
+ ):
218
+ # We will skip the secret injection for "locally" launched UBF_TASK (worker) tasks
219
+ # When we "locally" run @parallel tasks, the control task will create the worker tasks and the environment variables
220
+ # of the control task are inherited by the worker tasks. If we don't skip setting secrets in the worker task then the
221
+ # worker tasks will try to set the environment variables again which will cause a clash with the control task's env vars,
222
+ # causing the @secrets' `task_pre_step` to fail. In remote settings, (e.g. AWS Batch/Kubernetes), the worker task and
223
+ # control task are independently created so there is no chances of an env var clash.
215
224
  return
216
225
  # List of pairs (secret_spec, env_vars_from_this_spec)
217
226
  all_secrets_env_vars = []
@@ -8,8 +8,14 @@ import sys
8
8
  from metaflow.cli_args import cli_args
9
9
  from metaflow.decorators import StepDecorator
10
10
  from metaflow.exception import MetaflowException
11
- from metaflow.unbounded_foreach import UnboundedForeachInput, UBF_CONTROL, UBF_TASK
11
+ from metaflow.unbounded_foreach import (
12
+ UnboundedForeachInput,
13
+ UBF_CONTROL,
14
+ UBF_TASK,
15
+ CONTROL_TASK_TAG,
16
+ )
12
17
  from metaflow.util import to_unicode
18
+ from metaflow.metadata import MetaDatum
13
19
 
14
20
 
15
21
  class InternalTestUnboundedForeachInput(UnboundedForeachInput):
@@ -60,13 +66,42 @@ class InternalTestUnboundedForeachDecorator(StepDecorator):
60
66
  ):
61
67
  self.environment = environment
62
68
 
69
+ def task_pre_step(
70
+ self,
71
+ step_name,
72
+ task_datastore,
73
+ metadata,
74
+ run_id,
75
+ task_id,
76
+ flow,
77
+ graph,
78
+ retry_count,
79
+ max_user_code_retries,
80
+ ubf_context,
81
+ inputs,
82
+ ):
83
+ if ubf_context == UBF_CONTROL:
84
+ metadata.register_metadata(
85
+ run_id,
86
+ step_name,
87
+ task_id,
88
+ [
89
+ MetaDatum(
90
+ field="internal_task_type",
91
+ value=CONTROL_TASK_TAG,
92
+ type="internal_task_type",
93
+ tags=["attempt_id:{0}".format(0)],
94
+ )
95
+ ],
96
+ )
97
+ self.input_paths = [obj.pathspec for obj in inputs]
98
+
63
99
  def control_task_step_func(self, flow, graph, retry_count):
64
100
  from metaflow import current
65
101
 
66
102
  run_id = current.run_id
67
103
  step_name = current.step_name
68
104
  control_task_id = current.task_id
69
- (_, split_step_name, split_task_id) = control_task_id.split("-")[1:]
70
105
  # If we are running inside Conda, we use the base executable FIRST;
71
106
  # the conda environment will then be used when runtime_step_cli is
72
107
  # called. This is so that it can properly set up all the metaflow
@@ -94,10 +129,10 @@ class InternalTestUnboundedForeachDecorator(StepDecorator):
94
129
  mapper_tasks = []
95
130
 
96
131
  for i in range(foreach_num_splits):
97
- task_id = "%s-%d" % (control_task_id.replace("control-", "test-ubf-"), i)
132
+ task_id = "%s-%d" % (control_task_id, i)
98
133
  pathspec = "%s/%s/%s" % (run_id, step_name, task_id)
99
134
  mapper_tasks.append(to_unicode(pathspec))
100
- input_paths = "%s/%s/%s" % (run_id, split_step_name, split_task_id)
135
+ input_paths = ",".join(self.input_paths)
101
136
 
102
137
  # Override specific `step` kwargs.
103
138
  kwargs = cli_args.step_kwargs
@@ -0,0 +1,386 @@
1
+ import os
2
+ import sys
3
+ import json
4
+ import importlib
5
+ import functools
6
+ import tempfile
7
+ from typing import Optional, Dict, ClassVar
8
+
9
+ from metaflow.exception import MetaflowNotFound
10
+ from metaflow.runner.subprocess_manager import CommandManager, SubprocessManager
11
+ from metaflow.runner.utils import read_from_file_when_ready
12
+
13
+
14
+ def handle_timeout(tfp_runner_attribute, command_obj: CommandManager):
15
+ """
16
+ Handle the timeout for a running subprocess command that reads a file
17
+ and raises an error with appropriate logs if a TimeoutError occurs.
18
+
19
+ Parameters
20
+ ----------
21
+ tfp_runner_attribute : NamedTemporaryFile
22
+ Temporary file that stores runner attribute data.
23
+ command_obj : CommandManager
24
+ Command manager object that encapsulates the running command details.
25
+
26
+ Returns
27
+ -------
28
+ str
29
+ Content read from the temporary file.
30
+
31
+ Raises
32
+ ------
33
+ RuntimeError
34
+ If a TimeoutError occurs, it raises a RuntimeError with the command's
35
+ stdout and stderr logs.
36
+ """
37
+ try:
38
+ content = read_from_file_when_ready(tfp_runner_attribute.name, timeout=10)
39
+ return content
40
+ except TimeoutError as e:
41
+ stdout_log = open(command_obj.log_files["stdout"]).read()
42
+ stderr_log = open(command_obj.log_files["stderr"]).read()
43
+ command = " ".join(command_obj.command)
44
+ error_message = "Error executing: '%s':\n" % command
45
+ if stdout_log.strip():
46
+ error_message += "\nStdout:\n%s\n" % stdout_log
47
+ if stderr_log.strip():
48
+ error_message += "\nStderr:\n%s\n" % stderr_log
49
+ raise RuntimeError(error_message) from e
50
+
51
+
52
+ def get_lower_level_group(
53
+ api, top_level_kwargs: Dict, _type: Optional[str], deployer_kwargs: Dict
54
+ ):
55
+ """
56
+ Retrieve a lower-level group from the API based on the type and provided arguments.
57
+
58
+ Parameters
59
+ ----------
60
+ api : MetaflowAPI
61
+ Metaflow API instance.
62
+ top_level_kwargs : Dict
63
+ Top-level keyword arguments to pass to the API.
64
+ _type : str
65
+ Type of the deployer implementation to target.
66
+ deployer_kwargs : Dict
67
+ Keyword arguments specific to the deployer.
68
+
69
+ Returns
70
+ -------
71
+ Any
72
+ The lower-level group object retrieved from the API.
73
+
74
+ Raises
75
+ ------
76
+ ValueError
77
+ If the `_type` is None.
78
+ """
79
+ if _type is None:
80
+ raise ValueError(
81
+ "DeployerImpl doesn't have a 'TYPE' to target. Please use a sub-class of DeployerImpl."
82
+ )
83
+ return getattr(api(**top_level_kwargs), _type)(**deployer_kwargs)
84
+
85
+
86
+ class Deployer(object):
87
+ """
88
+ Use the `Deployer` class to configure and access one of the production
89
+ orchestrators supported by Metaflow.
90
+
91
+ Parameters
92
+ ----------
93
+ flow_file : str
94
+ Path to the flow file to deploy.
95
+ show_output : bool, default True
96
+ Show the 'stdout' and 'stderr' to the console by default.
97
+ profile : Optional[str], default None
98
+ Metaflow profile to use for the deployment. If not specified, the default
99
+ profile is used.
100
+ env : Optional[Dict[str, str]], default None
101
+ Additional environment variables to set for the deployment.
102
+ cwd : Optional[str], default None
103
+ The directory to run the subprocess in; if not specified, the current
104
+ directory is used.
105
+ **kwargs : Any
106
+ Additional arguments that you would pass to `python myflow.py` before
107
+ the deployment command.
108
+ """
109
+
110
+ def __init__(
111
+ self,
112
+ flow_file: str,
113
+ show_output: bool = True,
114
+ profile: Optional[str] = None,
115
+ env: Optional[Dict] = None,
116
+ cwd: Optional[str] = None,
117
+ **kwargs
118
+ ):
119
+ self.flow_file = flow_file
120
+ self.show_output = show_output
121
+ self.profile = profile
122
+ self.env = env
123
+ self.cwd = cwd
124
+ self.top_level_kwargs = kwargs
125
+
126
+ from metaflow.plugins import DEPLOYER_IMPL_PROVIDERS
127
+
128
+ for provider_class in DEPLOYER_IMPL_PROVIDERS:
129
+ # TYPE is the name of the CLI groups i.e.
130
+ # `argo-workflows` instead of `argo_workflows`
131
+ # The injected method names replace '-' by '_' though.
132
+ method_name = provider_class.TYPE.replace("-", "_")
133
+ setattr(Deployer, method_name, self.__make_function(provider_class))
134
+
135
+ def __make_function(self, deployer_class):
136
+ """
137
+ Create a function for the given deployer class.
138
+
139
+ Parameters
140
+ ----------
141
+ deployer_class : Type[DeployerImpl]
142
+ Deployer implementation class.
143
+
144
+ Returns
145
+ -------
146
+ Callable
147
+ Function that initializes and returns an instance of the deployer class.
148
+ """
149
+
150
+ def f(self, **deployer_kwargs):
151
+ return deployer_class(
152
+ deployer_kwargs=deployer_kwargs,
153
+ flow_file=self.flow_file,
154
+ show_output=self.show_output,
155
+ profile=self.profile,
156
+ env=self.env,
157
+ cwd=self.cwd,
158
+ **self.top_level_kwargs
159
+ )
160
+
161
+ return f
162
+
163
+
164
+ class TriggeredRun(object):
165
+ """
166
+ TriggeredRun class represents a run that has been triggered on a production orchestrator.
167
+
168
+ Only when the `start` task starts running, the `run` object corresponding to the run
169
+ becomes available.
170
+ """
171
+
172
+ def __init__(
173
+ self,
174
+ deployer: "DeployerImpl",
175
+ content: str,
176
+ ):
177
+ self.deployer = deployer
178
+ content_json = json.loads(content)
179
+ self.metadata_for_flow = content_json.get("metadata")
180
+ self.pathspec = content_json.get("pathspec")
181
+ self.name = content_json.get("name")
182
+
183
+ def _enrich_object(self, env):
184
+ """
185
+ Enrich the TriggeredRun object with additional properties and methods.
186
+
187
+ Parameters
188
+ ----------
189
+ env : dict
190
+ Environment dictionary containing properties and methods to add.
191
+ """
192
+ for k, v in env.items():
193
+ if isinstance(v, property):
194
+ setattr(self.__class__, k, v)
195
+ elif callable(v):
196
+ setattr(self, k, functools.partial(v, self))
197
+ else:
198
+ setattr(self.__class__, k, property(fget=lambda _, v=v: v))
199
+
200
+ @property
201
+ def run(self):
202
+ """
203
+ Retrieve the `Run` object for the triggered run.
204
+
205
+ Note that Metaflow `Run` becomes available only when the `start` task
206
+ has started executing.
207
+
208
+ Returns
209
+ -------
210
+ Run, optional
211
+ Metaflow Run object if the `start` step has started executing, otherwise None.
212
+ """
213
+ from metaflow import Run
214
+
215
+ try:
216
+ return Run(self.pathspec, _namespace_check=False)
217
+ except MetaflowNotFound:
218
+ return None
219
+
220
+
221
+ class DeployedFlow(object):
222
+ """
223
+ DeployedFlow class represents a flow that has been deployed.
224
+
225
+ Parameters
226
+ ----------
227
+ deployer : DeployerImpl
228
+ Instance of the deployer implementation.
229
+ """
230
+
231
+ def __init__(self, deployer: "DeployerImpl"):
232
+ self.deployer = deployer
233
+
234
+ def _enrich_object(self, env):
235
+ """
236
+ Enrich the DeployedFlow object with additional properties and methods.
237
+
238
+ Parameters
239
+ ----------
240
+ env : dict
241
+ Environment dictionary containing properties and methods to add.
242
+ """
243
+ for k, v in env.items():
244
+ if isinstance(v, property):
245
+ setattr(self.__class__, k, v)
246
+ elif callable(v):
247
+ setattr(self, k, functools.partial(v, self))
248
+ else:
249
+ setattr(self.__class__, k, property(fget=lambda _, v=v: v))
250
+
251
+
252
+ class DeployerImpl(object):
253
+ """
254
+ Base class for deployer implementations. Each implementation should define a TYPE
255
+ class variable that matches the name of the CLI group.
256
+
257
+ Parameters
258
+ ----------
259
+ flow_file : str
260
+ Path to the flow file to deploy.
261
+ show_output : bool, default True
262
+ Show the 'stdout' and 'stderr' to the console by default.
263
+ profile : Optional[str], default None
264
+ Metaflow profile to use for the deployment. If not specified, the default
265
+ profile is used.
266
+ env : Optional[Dict], default None
267
+ Additional environment variables to set for the deployment.
268
+ cwd : Optional[str], default None
269
+ The directory to run the subprocess in; if not specified, the current
270
+ directory is used.
271
+ **kwargs : Any
272
+ Additional arguments that you would pass to `python myflow.py` before
273
+ the deployment command.
274
+ """
275
+
276
+ TYPE: ClassVar[Optional[str]] = None
277
+
278
+ def __init__(
279
+ self,
280
+ flow_file: str,
281
+ show_output: bool = True,
282
+ profile: Optional[str] = None,
283
+ env: Optional[Dict] = None,
284
+ cwd: Optional[str] = None,
285
+ **kwargs
286
+ ):
287
+ if self.TYPE is None:
288
+ raise ValueError(
289
+ "DeployerImpl doesn't have a 'TYPE' to target. Please use a sub-class of DeployerImpl."
290
+ )
291
+
292
+ if "metaflow.cli" in sys.modules:
293
+ importlib.reload(sys.modules["metaflow.cli"])
294
+ from metaflow.cli import start
295
+ from metaflow.runner.click_api import MetaflowAPI
296
+
297
+ self.flow_file = flow_file
298
+ self.show_output = show_output
299
+ self.profile = profile
300
+ self.env = env
301
+ self.cwd = cwd
302
+
303
+ self.env_vars = os.environ.copy()
304
+ self.env_vars.update(self.env or {})
305
+ if self.profile:
306
+ self.env_vars["METAFLOW_PROFILE"] = profile
307
+
308
+ self.spm = SubprocessManager()
309
+ self.top_level_kwargs = kwargs
310
+ self.api = MetaflowAPI.from_cli(self.flow_file, start)
311
+
312
+ def __enter__(self) -> "DeployerImpl":
313
+ return self
314
+
315
+ def create(self, **kwargs) -> DeployedFlow:
316
+ """
317
+ Create a deployed flow using the deployer implementation.
318
+
319
+ Parameters
320
+ ----------
321
+ **kwargs : Any
322
+ Additional arguments to pass to `create` corresponding to the
323
+ command line arguments of `create`
324
+
325
+ Returns
326
+ -------
327
+ DeployedFlow
328
+ DeployedFlow object representing the deployed flow.
329
+
330
+ Raises
331
+ ------
332
+ Exception
333
+ If there is an error during deployment.
334
+ """
335
+ with tempfile.TemporaryDirectory() as temp_dir:
336
+ tfp_runner_attribute = tempfile.NamedTemporaryFile(
337
+ dir=temp_dir, delete=False
338
+ )
339
+ # every subclass needs to have `self.deployer_kwargs`
340
+ command = get_lower_level_group(
341
+ self.api, self.top_level_kwargs, self.TYPE, self.deployer_kwargs
342
+ ).create(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
343
+
344
+ pid = self.spm.run_command(
345
+ [sys.executable, *command],
346
+ env=self.env_vars,
347
+ cwd=self.cwd,
348
+ show_output=self.show_output,
349
+ )
350
+
351
+ command_obj = self.spm.get(pid)
352
+ content = handle_timeout(tfp_runner_attribute, command_obj)
353
+ content = json.loads(content)
354
+ self.name = content.get("name")
355
+ self.flow_name = content.get("flow_name")
356
+ self.metadata = content.get("metadata")
357
+
358
+ if command_obj.process.returncode == 0:
359
+ deployed_flow = DeployedFlow(deployer=self)
360
+ self._enrich_deployed_flow(deployed_flow)
361
+ return deployed_flow
362
+
363
+ raise Exception("Error deploying %s to %s" % (self.flow_file, self.TYPE))
364
+
365
+ def _enrich_deployed_flow(self, deployed_flow: DeployedFlow):
366
+ """
367
+ Enrich the DeployedFlow object with additional properties and methods.
368
+
369
+ Parameters
370
+ ----------
371
+ deployed_flow : DeployedFlow
372
+ The DeployedFlow object to enrich.
373
+ """
374
+ raise NotImplementedError
375
+
376
+ def __exit__(self, exc_type, exc_value, traceback):
377
+ """
378
+ Cleanup resources on exit.
379
+ """
380
+ self.cleanup()
381
+
382
+ def cleanup(self):
383
+ """
384
+ Cleanup resources.
385
+ """
386
+ self.spm.cleanup()
@@ -2,33 +2,14 @@ import importlib
2
2
  import os
3
3
  import sys
4
4
  import tempfile
5
- import time
6
5
  from typing import Dict, Iterator, Optional, Tuple
7
6
 
8
7
  from metaflow import Run, metadata
9
8
 
9
+ from .utils import clear_and_set_os_environ, read_from_file_when_ready
10
10
  from .subprocess_manager import CommandManager, SubprocessManager
11
11
 
12
12
 
13
- def clear_and_set_os_environ(env: Dict):
14
- os.environ.clear()
15
- os.environ.update(env)
16
-
17
-
18
- def read_from_file_when_ready(file_path: str, timeout: float = 5):
19
- start_time = time.time()
20
- with open(file_path, "r", encoding="utf-8") as file_pointer:
21
- content = file_pointer.read()
22
- while not content:
23
- if time.time() - start_time > timeout:
24
- raise TimeoutError(
25
- "Timeout while waiting for file content from '%s'" % file_path
26
- )
27
- time.sleep(0.1)
28
- content = file_pointer.read()
29
- return content
30
-
31
-
32
13
  class ExecutingRun(object):
33
14
  """
34
15
  This class contains a reference to a `metaflow.Run` object representing