metaflow 2.12.28__py2.py3-none-any.whl → 2.12.29__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. metaflow/__init__.py +2 -3
  2. metaflow/cli.py +23 -13
  3. metaflow/client/core.py +2 -2
  4. metaflow/clone_util.py +1 -1
  5. metaflow/cmd/develop/stub_generator.py +623 -233
  6. metaflow/datastore/task_datastore.py +1 -1
  7. metaflow/extension_support/plugins.py +1 -0
  8. metaflow/flowspec.py +2 -2
  9. metaflow/includefile.py +8 -14
  10. metaflow/metaflow_config.py +4 -0
  11. metaflow/metaflow_current.py +1 -1
  12. metaflow/parameters.py +3 -0
  13. metaflow/plugins/__init__.py +12 -3
  14. metaflow/plugins/airflow/airflow_cli.py +5 -0
  15. metaflow/plugins/airflow/airflow_decorator.py +1 -1
  16. metaflow/plugins/argo/argo_workflows_decorator.py +1 -1
  17. metaflow/plugins/argo/argo_workflows_deployer.py +77 -263
  18. metaflow/plugins/argo/argo_workflows_deployer_objects.py +381 -0
  19. metaflow/plugins/aws/batch/batch_cli.py +1 -1
  20. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  21. metaflow/plugins/aws/step_functions/step_functions_cli.py +7 -0
  22. metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -1
  23. metaflow/plugins/aws/step_functions/step_functions_deployer.py +65 -224
  24. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +236 -0
  25. metaflow/plugins/azure/includefile_support.py +2 -0
  26. metaflow/plugins/cards/card_cli.py +3 -2
  27. metaflow/plugins/cards/card_modules/components.py +9 -9
  28. metaflow/plugins/cards/card_server.py +39 -14
  29. metaflow/plugins/datatools/local.py +2 -0
  30. metaflow/plugins/datatools/s3/s3.py +2 -0
  31. metaflow/plugins/env_escape/__init__.py +3 -3
  32. metaflow/plugins/gcp/includefile_support.py +3 -0
  33. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  34. metaflow/plugins/kubernetes/kubernetes_decorator.py +5 -4
  35. metaflow/plugins/{metadata → metadata_providers}/local.py +2 -2
  36. metaflow/plugins/{metadata → metadata_providers}/service.py +2 -2
  37. metaflow/plugins/parallel_decorator.py +1 -1
  38. metaflow/plugins/pypi/conda_decorator.py +1 -1
  39. metaflow/plugins/test_unbounded_foreach_decorator.py +1 -1
  40. metaflow/runner/click_api.py +4 -0
  41. metaflow/runner/deployer.py +139 -269
  42. metaflow/runner/deployer_impl.py +167 -0
  43. metaflow/runner/metaflow_runner.py +10 -9
  44. metaflow/runner/nbdeploy.py +12 -13
  45. metaflow/runner/nbrun.py +3 -3
  46. metaflow/runner/utils.py +55 -8
  47. metaflow/runtime.py +1 -1
  48. metaflow/task.py +1 -1
  49. metaflow/version.py +1 -1
  50. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/METADATA +2 -2
  51. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/RECORD +60 -57
  52. /metaflow/{metadata → metadata_provider}/__init__.py +0 -0
  53. /metaflow/{metadata → metadata_provider}/heartbeat.py +0 -0
  54. /metaflow/{metadata → metadata_provider}/metadata.py +0 -0
  55. /metaflow/{metadata → metadata_provider}/util.py +0 -0
  56. /metaflow/plugins/{metadata → metadata_providers}/__init__.py +0 -0
  57. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/LICENSE +0 -0
  58. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/WHEEL +0 -0
  59. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/entry_points.txt +0 -0
  60. {metaflow-2.12.28.dist-info → metaflow-2.12.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,167 @@
1
+ import importlib
2
+ import json
3
+ import os
4
+ import sys
5
+ import tempfile
6
+
7
+ from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING, Type
8
+
9
+ from .subprocess_manager import SubprocessManager
10
+ from .utils import get_lower_level_group, handle_timeout
11
+
12
+ if TYPE_CHECKING:
13
+ import metaflow.runner.deployer
14
+
15
+ # NOTE: This file is separate from the deployer.py file to prevent circular imports.
16
+ # This file is needed in any of the DeployerImpl implementations
17
+ # (like argo_workflows_deployer.py) which is in turn needed to create the Deployer
18
+ # class (ie: it uses ArgoWorkflowsDeployer to create the Deployer class).
19
+
20
+
21
+ class DeployerImpl(object):
22
+ """
23
+ Base class for deployer implementations. Each implementation should define a TYPE
24
+ class variable that matches the name of the CLI group.
25
+
26
+ Parameters
27
+ ----------
28
+ flow_file : str
29
+ Path to the flow file to deploy.
30
+ show_output : bool, default True
31
+ Show the 'stdout' and 'stderr' to the console by default.
32
+ profile : Optional[str], default None
33
+ Metaflow profile to use for the deployment. If not specified, the default
34
+ profile is used.
35
+ env : Optional[Dict], default None
36
+ Additional environment variables to set for the deployment.
37
+ cwd : Optional[str], default None
38
+ The directory to run the subprocess in; if not specified, the current
39
+ directory is used.
40
+ file_read_timeout : int, default 3600
41
+ The timeout until which we try to read the deployer attribute file.
42
+ **kwargs : Any
43
+ Additional arguments that you would pass to `python myflow.py` before
44
+ the deployment command.
45
+ """
46
+
47
+ TYPE: ClassVar[Optional[str]] = None
48
+
49
+ def __init__(
50
+ self,
51
+ flow_file: str,
52
+ show_output: bool = True,
53
+ profile: Optional[str] = None,
54
+ env: Optional[Dict] = None,
55
+ cwd: Optional[str] = None,
56
+ file_read_timeout: int = 3600,
57
+ **kwargs
58
+ ):
59
+ if self.TYPE is None:
60
+ raise ValueError(
61
+ "DeployerImpl doesn't have a 'TYPE' to target. Please use a sub-class "
62
+ "of DeployerImpl."
63
+ )
64
+
65
+ if "metaflow.cli" in sys.modules:
66
+ importlib.reload(sys.modules["metaflow.cli"])
67
+ from metaflow.cli import start
68
+ from metaflow.runner.click_api import MetaflowAPI
69
+
70
+ self.flow_file = flow_file
71
+ self.show_output = show_output
72
+ self.profile = profile
73
+ self.env = env
74
+ self.cwd = cwd
75
+ self.file_read_timeout = file_read_timeout
76
+
77
+ self.env_vars = os.environ.copy()
78
+ self.env_vars.update(self.env or {})
79
+ if self.profile:
80
+ self.env_vars["METAFLOW_PROFILE"] = profile
81
+
82
+ self.spm = SubprocessManager()
83
+ self.top_level_kwargs = kwargs
84
+ self.api = MetaflowAPI.from_cli(self.flow_file, start)
85
+
86
+ @property
87
+ def deployer_kwargs(self) -> Dict[str, Any]:
88
+ raise NotImplementedError
89
+
90
+ @staticmethod
91
+ def deployed_flow_type() -> Type["metaflow.runner.deployer.DeployedFlow"]:
92
+ raise NotImplementedError
93
+
94
+ def __enter__(self) -> "DeployerImpl":
95
+ return self
96
+
97
+ def create(self, **kwargs) -> "metaflow.runner.deployer.DeployedFlow":
98
+ """
99
+ Create a sub-class of a `DeployedFlow` depending on the deployer implementation.
100
+
101
+ Parameters
102
+ ----------
103
+ **kwargs : Any
104
+ Additional arguments to pass to `create` corresponding to the
105
+ command line arguments of `create`
106
+
107
+ Returns
108
+ -------
109
+ DeployedFlow
110
+ DeployedFlow object representing the deployed flow.
111
+
112
+ Raises
113
+ ------
114
+ Exception
115
+ If there is an error during deployment.
116
+ """
117
+ # Sub-classes should implement this by simply calling _create and pass the
118
+ # proper class as the DeployedFlow to return.
119
+ raise NotImplementedError
120
+
121
+ def _create(
122
+ self, create_class: Type["metaflow.runner.deployer.DeployedFlow"], **kwargs
123
+ ) -> "metaflow.runner.deployer.DeployedFlow":
124
+ with tempfile.TemporaryDirectory() as temp_dir:
125
+ tfp_runner_attribute = tempfile.NamedTemporaryFile(
126
+ dir=temp_dir, delete=False
127
+ )
128
+ # every subclass needs to have `self.deployer_kwargs`
129
+ command = get_lower_level_group(
130
+ self.api, self.top_level_kwargs, self.TYPE, self.deployer_kwargs
131
+ ).create(deployer_attribute_file=tfp_runner_attribute.name, **kwargs)
132
+
133
+ pid = self.spm.run_command(
134
+ [sys.executable, *command],
135
+ env=self.env_vars,
136
+ cwd=self.cwd,
137
+ show_output=self.show_output,
138
+ )
139
+
140
+ command_obj = self.spm.get(pid)
141
+ content = handle_timeout(
142
+ tfp_runner_attribute, command_obj, self.file_read_timeout
143
+ )
144
+ content = json.loads(content)
145
+ self.name = content.get("name")
146
+ self.flow_name = content.get("flow_name")
147
+ self.metadata = content.get("metadata")
148
+ # Additional info is used to pass additional deployer specific information.
149
+ # It is used in non-OSS deployers (extensions).
150
+ self.additional_info = content.get("additional_info", {})
151
+
152
+ if command_obj.process.returncode == 0:
153
+ return create_class(deployer=self)
154
+
155
+ raise RuntimeError("Error deploying %s to %s" % (self.flow_file, self.TYPE))
156
+
157
+ def __exit__(self, exc_type, exc_value, traceback):
158
+ """
159
+ Cleanup resources on exit.
160
+ """
161
+ self.cleanup()
162
+
163
+ def cleanup(self):
164
+ """
165
+ Cleanup resources.
166
+ """
167
+ self.spm.cleanup()
@@ -67,10 +67,11 @@ class ExecutingRun(object):
67
67
 
68
68
  Parameters
69
69
  ----------
70
- timeout : Optional[float], default None
71
- The maximum time to wait for the run to finish.
72
- If the timeout is reached, the run is terminated
73
- stream : Optional[str], default None
70
+ timeout : float, optional, default None
71
+ The maximum time, in seconds, to wait for the run to finish.
72
+ If the timeout is reached, the run is terminated. If not specified, wait
73
+ forever.
74
+ stream : str, optional, default None
74
75
  If specified, the specified stream is printed to stdout. `stream` can
75
76
  be one of `stdout` or `stderr`.
76
77
 
@@ -167,7 +168,7 @@ class ExecutingRun(object):
167
168
  ----------
168
169
  stream : str
169
170
  The stream to stream logs from. Can be one of `stdout` or `stderr`.
170
- position : Optional[int], default None
171
+ position : int, optional, default None
171
172
  The position in the log file to start streaming from. If None, it starts
172
173
  from the beginning of the log file. This allows resuming streaming from
173
174
  a previously known position
@@ -207,13 +208,13 @@ class Runner(object):
207
208
  show_output : bool, default True
208
209
  Show the 'stdout' and 'stderr' to the console by default,
209
210
  Only applicable for synchronous 'run' and 'resume' functions.
210
- profile : Optional[str], default None
211
+ profile : str, optional, default None
211
212
  Metaflow profile to use to run this run. If not specified, the default
212
213
  profile is used (or the one already set using `METAFLOW_PROFILE`)
213
- env : Optional[Dict], default None
214
+ env : Dict[str, str], optional, default None
214
215
  Additional environment variables to set for the Run. This overrides the
215
216
  environment set for this process.
216
- cwd : Optional[str], default None
217
+ cwd : str, optional, default None
217
218
  The directory to run the subprocess in; if not specified, the current
218
219
  directory is used.
219
220
  file_read_timeout : int, default 3600
@@ -228,7 +229,7 @@ class Runner(object):
228
229
  flow_file: str,
229
230
  show_output: bool = True,
230
231
  profile: Optional[str] = None,
231
- env: Optional[Dict] = None,
232
+ env: Optional[Dict[str, str]] = None,
232
233
  cwd: Optional[str] = None,
233
234
  file_read_timeout: int = 3600,
234
235
  **kwargs
@@ -37,13 +37,13 @@ class NBDeployer(object):
37
37
  Flow defined in the same cell
38
38
  show_output : bool, default True
39
39
  Show the 'stdout' and 'stderr' to the console by default,
40
- profile : Optional[str], default None
40
+ profile : str, optional, default None
41
41
  Metaflow profile to use to deploy this run. If not specified, the default
42
42
  profile is used (or the one already set using `METAFLOW_PROFILE`)
43
- env : Optional[Dict[str, str]], default None
43
+ env : Dict[str, str], optional, default None
44
44
  Additional environment variables to set. This overrides the
45
45
  environment set for this process.
46
- base_dir : Optional[str], default None
46
+ base_dir : str, optional, default None
47
47
  The directory to run the subprocess in; if not specified, the current
48
48
  working directory is used.
49
49
  **kwargs : Any
@@ -66,10 +66,11 @@ class NBDeployer(object):
66
66
  from IPython import get_ipython
67
67
 
68
68
  ipython = get_ipython()
69
- except ModuleNotFoundError:
69
+ except ModuleNotFoundError as e:
70
70
  raise NBDeployerInitializationError(
71
- "'NBDeployer' requires an interactive Python environment (such as Jupyter)"
72
- )
71
+ "'NBDeployer' requires an interactive Python environment "
72
+ "(such as Jupyter)"
73
+ ) from e
73
74
 
74
75
  self.cell = get_current_cell(ipython)
75
76
  self.flow = flow
@@ -116,13 +117,11 @@ class NBDeployer(object):
116
117
  **kwargs,
117
118
  )
118
119
 
119
- from metaflow.plugins import DEPLOYER_IMPL_PROVIDERS
120
-
121
- for provider_class in DEPLOYER_IMPL_PROVIDERS:
122
- method_name = provider_class.TYPE.replace("-", "_")
123
- setattr(
124
- NBDeployer, method_name, self.deployer.__make_function(provider_class)
125
- )
120
+ def __getattr__(self, name):
121
+ """
122
+ Forward all attribute access to the underlying `Deployer` instance.
123
+ """
124
+ return getattr(self.deployer, name)
126
125
 
127
126
  def cleanup(self):
128
127
  """
metaflow/runner/nbrun.py CHANGED
@@ -34,13 +34,13 @@ class NBRunner(object):
34
34
  show_output : bool, default True
35
35
  Show the 'stdout' and 'stderr' to the console by default,
36
36
  Only applicable for synchronous 'run' and 'resume' functions.
37
- profile : Optional[str], default None
37
+ profile : str, optional, default None
38
38
  Metaflow profile to use to run this run. If not specified, the default
39
39
  profile is used (or the one already set using `METAFLOW_PROFILE`)
40
- env : Optional[Dict], default None
40
+ env : Dict[str, str], optional, default None
41
41
  Additional environment variables to set for the Run. This overrides the
42
42
  environment set for this process.
43
- base_dir : Optional[str], default None
43
+ base_dir : str, optional, default None
44
44
  The directory to run the subprocess in; if not specified, the current
45
45
  working directory is used.
46
46
  file_read_timeout : int, default 3600
metaflow/runner/utils.py CHANGED
@@ -4,10 +4,12 @@ import time
4
4
  import asyncio
5
5
 
6
6
  from subprocess import CalledProcessError
7
- from typing import Dict, TYPE_CHECKING
7
+ from typing import Any, Dict, TYPE_CHECKING
8
8
 
9
9
  if TYPE_CHECKING:
10
- from .subprocess_manager import CommandManager
10
+ import tempfile
11
+ import metaflow.runner.subprocess_manager
12
+ import metaflow.runner.click_api
11
13
 
12
14
 
13
15
  def get_current_cell(ipython):
@@ -18,7 +20,8 @@ def get_current_cell(ipython):
18
20
 
19
21
  def format_flowfile(cell):
20
22
  """
21
- Formats the given cell content to create a valid Python script that can be executed as a Metaflow flow.
23
+ Formats the given cell content to create a valid Python script that can be
24
+ executed as a Metaflow flow.
22
25
  """
23
26
  flowspec = [
24
27
  x
@@ -36,7 +39,9 @@ def format_flowfile(cell):
36
39
  return "\n".join(lines)
37
40
 
38
41
 
39
- def check_process_status(command_obj: "CommandManager"):
42
+ def check_process_status(
43
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
44
+ ):
40
45
  if isinstance(command_obj.process, asyncio.subprocess.Process):
41
46
  return command_obj.process.returncode is not None
42
47
  else:
@@ -44,7 +49,9 @@ def check_process_status(command_obj: "CommandManager"):
44
49
 
45
50
 
46
51
  def read_from_file_when_ready(
47
- file_path: str, command_obj: "CommandManager", timeout: float = 5
52
+ file_path: str,
53
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
54
+ timeout: float = 5,
48
55
  ):
49
56
  start_time = time.time()
50
57
  with open(file_path, "r", encoding="utf-8") as file_pointer:
@@ -70,7 +77,9 @@ def read_from_file_when_ready(
70
77
 
71
78
 
72
79
  def handle_timeout(
73
- tfp_runner_attribute, command_obj: "CommandManager", file_read_timeout: int
80
+ tfp_runner_attribute: "tempfile._TemporaryFileWrapper[str]",
81
+ command_obj: "metaflow.runner.subprocess_manager.CommandManager",
82
+ file_read_timeout: int,
74
83
  ):
75
84
  """
76
85
  Handle the timeout for a running subprocess command that reads a file
@@ -102,8 +111,8 @@ def handle_timeout(
102
111
  )
103
112
  return content
104
113
  except (CalledProcessError, TimeoutError) as e:
105
- stdout_log = open(command_obj.log_files["stdout"]).read()
106
- stderr_log = open(command_obj.log_files["stderr"]).read()
114
+ stdout_log = open(command_obj.log_files["stdout"], encoding="utf-8").read()
115
+ stderr_log = open(command_obj.log_files["stderr"], encoding="utf-8").read()
107
116
  command = " ".join(command_obj.command)
108
117
  error_message = "Error executing: '%s':\n" % command
109
118
  if stdout_log.strip():
@@ -111,3 +120,41 @@ def handle_timeout(
111
120
  if stderr_log.strip():
112
121
  error_message += "\nStderr:\n%s\n" % stderr_log
113
122
  raise RuntimeError(error_message) from e
123
+
124
+
125
+ def get_lower_level_group(
126
+ api: "metaflow.runner.click_api.MetaflowAPI",
127
+ top_level_kwargs: Dict[str, Any],
128
+ sub_command: str,
129
+ sub_command_kwargs: Dict[str, Any],
130
+ ) -> "metaflow.runner.click_api.MetaflowAPI":
131
+ """
132
+ Retrieve a lower-level group from the API based on the type and provided arguments.
133
+
134
+ Parameters
135
+ ----------
136
+ api : MetaflowAPI
137
+ Metaflow API instance.
138
+ top_level_kwargs : Dict[str, Any]
139
+ Top-level keyword arguments to pass to the API.
140
+ sub_command : str
141
+ Sub-command of API to get the API for
142
+ sub_command_kwargs : Dict[str, Any]
143
+ Sub-command arguments
144
+
145
+ Returns
146
+ -------
147
+ MetaflowAPI
148
+ The lower-level group object retrieved from the API.
149
+
150
+ Raises
151
+ ------
152
+ ValueError
153
+ If the `_type` is None.
154
+ """
155
+ sub_command_obj = getattr(api(**top_level_kwargs), sub_command)
156
+
157
+ if sub_command_obj is None:
158
+ raise ValueError(f"Sub-command '{sub_command}' not found in API '{api.name}'")
159
+
160
+ return sub_command_obj(**sub_command_kwargs)
metaflow/runtime.py CHANGED
@@ -20,7 +20,7 @@ from metaflow.datastore.exceptions import DataException
20
20
  from contextlib import contextmanager
21
21
 
22
22
  from . import get_namespace
23
- from .metadata import MetaDatum
23
+ from .metadata_provider import MetaDatum
24
24
  from .metaflow_config import MAX_ATTEMPTS, UI_URL
25
25
  from .exception import (
26
26
  MetaflowException,
metaflow/task.py CHANGED
@@ -12,7 +12,7 @@ from metaflow.sidecar import Message, MessageTypes
12
12
  from metaflow.datastore.exceptions import DataException
13
13
 
14
14
  from .metaflow_config import MAX_ATTEMPTS
15
- from .metadata import MetaDatum
15
+ from .metadata_provider import MetaDatum
16
16
  from .mflog import TASK_LOG_SOURCE
17
17
  from .datastore import Inputs, TaskDataStoreSet
18
18
  from .exception import (
metaflow/version.py CHANGED
@@ -1 +1 @@
1
- metaflow_version = "2.12.28"
1
+ metaflow_version = "2.12.29"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metaflow
3
- Version: 2.12.28
3
+ Version: 2.12.29
4
4
  Summary: Metaflow: More Data Science, Less Engineering
5
5
  Author: Metaflow Developers
6
6
  Author-email: help@metaflow.org
@@ -26,7 +26,7 @@ License-File: LICENSE
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: boto3
28
28
  Provides-Extra: stubs
29
- Requires-Dist: metaflow-stubs==2.12.28; extra == "stubs"
29
+ Requires-Dist: metaflow-stubs==2.12.29; extra == "stubs"
30
30
 
31
31
  ![Metaflow_Logo_Horizontal_FullColor_Ribbon_Dark_RGB](https://user-images.githubusercontent.com/763451/89453116-96a57e00-d713-11ea-9fa6-82b29d4d6eff.png)
32
32