ob-metaflow 2.12.36.3__py2.py3-none-any.whl → 2.12.39.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (58) hide show
  1. metaflow/__init__.py +3 -0
  2. metaflow/cli.py +84 -697
  3. metaflow/cli_args.py +17 -0
  4. metaflow/cli_components/__init__.py +0 -0
  5. metaflow/cli_components/dump_cmd.py +96 -0
  6. metaflow/cli_components/init_cmd.py +51 -0
  7. metaflow/cli_components/run_cmds.py +358 -0
  8. metaflow/cli_components/step_cmd.py +189 -0
  9. metaflow/cli_components/utils.py +140 -0
  10. metaflow/cmd/develop/stub_generator.py +9 -2
  11. metaflow/decorators.py +63 -2
  12. metaflow/extension_support/plugins.py +41 -27
  13. metaflow/flowspec.py +156 -16
  14. metaflow/includefile.py +50 -22
  15. metaflow/metaflow_config.py +1 -1
  16. metaflow/package.py +17 -3
  17. metaflow/parameters.py +80 -23
  18. metaflow/plugins/__init__.py +4 -0
  19. metaflow/plugins/airflow/airflow_cli.py +1 -0
  20. metaflow/plugins/argo/argo_workflows.py +41 -1
  21. metaflow/plugins/argo/argo_workflows_cli.py +1 -0
  22. metaflow/plugins/argo/argo_workflows_deployer_objects.py +47 -1
  23. metaflow/plugins/aws/batch/batch_decorator.py +2 -2
  24. metaflow/plugins/aws/step_functions/step_functions.py +32 -0
  25. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -0
  26. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +3 -0
  27. metaflow/plugins/datatools/s3/s3op.py +3 -3
  28. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
  29. metaflow/plugins/kubernetes/kubernetes_decorator.py +2 -2
  30. metaflow/plugins/pypi/bootstrap.py +196 -61
  31. metaflow/plugins/pypi/conda_decorator.py +20 -10
  32. metaflow/plugins/pypi/conda_environment.py +76 -21
  33. metaflow/plugins/pypi/micromamba.py +42 -15
  34. metaflow/plugins/pypi/pip.py +8 -3
  35. metaflow/plugins/pypi/pypi_decorator.py +11 -9
  36. metaflow/plugins/timeout_decorator.py +2 -2
  37. metaflow/runner/click_api.py +73 -19
  38. metaflow/runner/deployer.py +1 -1
  39. metaflow/runner/deployer_impl.py +2 -2
  40. metaflow/runner/metaflow_runner.py +4 -1
  41. metaflow/runner/nbdeploy.py +2 -0
  42. metaflow/runner/nbrun.py +1 -1
  43. metaflow/runner/subprocess_manager.py +3 -1
  44. metaflow/runner/utils.py +41 -19
  45. metaflow/runtime.py +111 -73
  46. metaflow/sidecar/sidecar_worker.py +1 -1
  47. metaflow/user_configs/__init__.py +0 -0
  48. metaflow/user_configs/config_decorators.py +563 -0
  49. metaflow/user_configs/config_options.py +495 -0
  50. metaflow/user_configs/config_parameters.py +386 -0
  51. metaflow/util.py +17 -0
  52. metaflow/version.py +1 -1
  53. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/METADATA +3 -2
  54. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/RECORD +58 -48
  55. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/LICENSE +0 -0
  56. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/WHEEL +0 -0
  57. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/entry_points.txt +0 -0
  58. {ob_metaflow-2.12.36.3.dist-info → ob_metaflow-2.12.39.1.dist-info}/top_level.txt +0 -0
@@ -50,20 +50,26 @@ class CondaStepDecorator(StepDecorator):
50
50
  # conda channels, users can specify channel::package as the package name.
51
51
 
52
52
  def __init__(self, attributes=None, statically_defined=False):
53
- self._user_defined_attributes = (
54
- attributes.copy() if attributes is not None else {}
53
+ self._attributes_with_user_values = (
54
+ set(attributes.keys()) if attributes is not None else set()
55
55
  )
56
+
56
57
  super(CondaStepDecorator, self).__init__(attributes, statically_defined)
57
58
 
59
+ def init(self):
60
+ super(CondaStepDecorator, self).init()
61
+
58
62
  # Support legacy 'libraries=' attribute for the decorator.
59
63
  self.attributes["packages"] = {
60
64
  **self.attributes["libraries"],
61
65
  **self.attributes["packages"],
62
66
  }
63
67
  del self.attributes["libraries"]
68
+ if self.attributes["packages"]:
69
+ self._attributes_with_user_values.add("packages")
64
70
 
65
71
  def is_attribute_user_defined(self, name):
66
- return name in self._user_defined_attributes
72
+ return name in self._attributes_with_user_values
67
73
 
68
74
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
69
75
  # The init_environment hook for Environment creates the relevant virtual
@@ -83,10 +89,10 @@ class CondaStepDecorator(StepDecorator):
83
89
  **super_attributes["packages"],
84
90
  **self.attributes["packages"],
85
91
  }
86
- self._user_defined_attributes = {
87
- **self._user_defined_attributes,
88
- **conda_base._user_defined_attributes,
89
- }
92
+ self._attributes_with_user_values.update(
93
+ conda_base._attributes_with_user_values
94
+ )
95
+
90
96
  self.attributes["python"] = (
91
97
  self.attributes["python"] or super_attributes["python"]
92
98
  )
@@ -333,11 +339,15 @@ class CondaFlowDecorator(FlowDecorator):
333
339
  }
334
340
 
335
341
  def __init__(self, attributes=None, statically_defined=False):
336
- self._user_defined_attributes = (
337
- attributes.copy() if attributes is not None else {}
342
+ self._attributes_with_user_values = (
343
+ set(attributes.keys()) if attributes is not None else set()
338
344
  )
345
+
339
346
  super(CondaFlowDecorator, self).__init__(attributes, statically_defined)
340
347
 
348
+ def init(self):
349
+ super(CondaFlowDecorator, self).init()
350
+
341
351
  # Support legacy 'libraries=' attribute for the decorator.
342
352
  self.attributes["packages"] = {
343
353
  **self.attributes["libraries"],
@@ -348,7 +358,7 @@ class CondaFlowDecorator(FlowDecorator):
348
358
  self.attributes["python"] = str(self.attributes["python"])
349
359
 
350
360
  def is_attribute_user_defined(self, name):
351
- return name in self._user_defined_attributes
361
+ return name in self._attributes_with_user_values
352
362
 
353
363
  def flow_init(
354
364
  self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
@@ -5,10 +5,11 @@ import functools
5
5
  import io
6
6
  import json
7
7
  import os
8
- import sys
9
8
  import tarfile
9
+ import threading
10
10
  import time
11
- from concurrent.futures import ThreadPoolExecutor
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from functools import wraps
12
13
  from hashlib import sha256
13
14
  from io import BufferedIOBase, BytesIO
14
15
  from itertools import chain
@@ -50,7 +51,6 @@ class CondaEnvironment(MetaflowEnvironment):
50
51
 
51
52
  def validate_environment(self, logger, datastore_type):
52
53
  self.datastore_type = datastore_type
53
- self.logger = logger
54
54
 
55
55
  # Avoiding circular imports.
56
56
  from metaflow.plugins import DATASTORES
@@ -62,8 +62,21 @@ class CondaEnvironment(MetaflowEnvironment):
62
62
  from .micromamba import Micromamba
63
63
  from .pip import Pip
64
64
 
65
- micromamba = Micromamba()
66
- self.solvers = {"conda": micromamba, "pypi": Pip(micromamba)}
65
+ print_lock = threading.Lock()
66
+
67
+ def make_thread_safe(func):
68
+ @wraps(func)
69
+ def wrapper(*args, **kwargs):
70
+ with print_lock:
71
+ return func(*args, **kwargs)
72
+
73
+ return wrapper
74
+
75
+ self.logger = make_thread_safe(logger)
76
+
77
+ # TODO: Wire up logging
78
+ micromamba = Micromamba(self.logger)
79
+ self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
67
80
 
68
81
  def init_environment(self, echo, only_steps=None):
69
82
  # The implementation optimizes for latency to ensure as many operations can
@@ -150,6 +163,9 @@ class CondaEnvironment(MetaflowEnvironment):
150
163
  (
151
164
  package["path"],
152
165
  # Lazily fetch package from the interweb if needed.
166
+ # TODO: Depending on the len_hint, the package might be downloaded from
167
+ # the interweb prematurely. save_bytes needs to be adjusted to handle
168
+ # this scenario.
153
169
  LazyOpen(
154
170
  package["local_path"],
155
171
  "rb",
@@ -166,22 +182,60 @@ class CondaEnvironment(MetaflowEnvironment):
166
182
  if id_ in dirty:
167
183
  self.write_to_environment_manifest([id_, platform, type_], packages)
168
184
 
169
- # First resolve environments through Conda, before PyPI.
185
+ storage = None
186
+ if self.datastore_type not in ["local"]:
187
+ # Initialize storage for caching if using a remote datastore
188
+ storage = self.datastore(_datastore_packageroot(self.datastore, echo))
189
+
170
190
  self.logger("Bootstrapping virtual environment(s) ...")
171
- for solver in ["conda", "pypi"]:
172
- with ThreadPoolExecutor() as executor:
173
- results = list(
174
- executor.map(lambda x: solve(*x, solver), environments(solver))
175
- )
176
- _ = list(map(lambda x: self.solvers[solver].download(*x), results))
177
- with ThreadPoolExecutor() as executor:
178
- _ = list(
179
- executor.map(lambda x: self.solvers[solver].create(*x), results)
180
- )
181
- if self.datastore_type not in ["local"]:
182
- # Cache packages only when a remote datastore is in play.
183
- storage = self.datastore(_datastore_packageroot(self.datastore, echo))
184
- cache(storage, results, solver)
191
+ # Sequence of operations:
192
+ # 1. Start all conda solves in parallel
193
+ # 2. Download conda packages sequentially
194
+ # 3. Create and cache conda environments in parallel
195
+ # 4. Start PyPI solves in parallel after each conda environment is created
196
+ # 5. Download PyPI packages sequentially
197
+ # 6. Create and cache PyPI environments in parallel
198
+
199
+ with ThreadPoolExecutor() as executor:
200
+ # Start all conda solves in parallel
201
+ conda_futures = [
202
+ executor.submit(lambda x: solve(*x, "conda"), env)
203
+ for env in environments("conda")
204
+ ]
205
+
206
+ pypi_envs = {env[0]: env for env in environments("pypi")}
207
+ pypi_futures = []
208
+
209
+ # Process conda results sequentially for downloads
210
+ for future in as_completed(conda_futures):
211
+ result = future.result()
212
+ # Sequential conda download
213
+ self.solvers["conda"].download(*result)
214
+ # Parallel conda create and cache
215
+ create_future = executor.submit(self.solvers["conda"].create, *result)
216
+ if storage:
217
+ executor.submit(cache, storage, [result], "conda")
218
+
219
+ # Queue PyPI solve to start after conda create
220
+ if result[0] in pypi_envs:
221
+
222
+ def pypi_solve(env):
223
+ create_future.result() # Wait for conda create
224
+ return solve(*env, "pypi")
225
+
226
+ pypi_futures.append(
227
+ executor.submit(pypi_solve, pypi_envs[result[0]])
228
+ )
229
+
230
+ # Process PyPI results sequentially for downloads
231
+ for solve_future in pypi_futures:
232
+ result = solve_future.result()
233
+ # Sequential PyPI download
234
+ self.solvers["pypi"].download(*result)
235
+ # Parallel PyPI create and cache
236
+ executor.submit(self.solvers["pypi"].create, *result)
237
+ if storage:
238
+ executor.submit(cache, storage, [result], "pypi")
185
239
  self.logger("Virtual environment(s) bootstrapped!")
186
240
 
187
241
  def executable(self, step_name, default=None):
@@ -385,7 +439,8 @@ class CondaEnvironment(MetaflowEnvironment):
385
439
  'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
386
440
  % (self.flow.name, id_, self.datastore_type),
387
441
  "echo 'Environment bootstrapped.'",
388
- "export PATH=$PATH:$(pwd)/micromamba",
442
+ # To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
443
+ "export PATH=$PATH:$(pwd)/micromamba/bin",
389
444
  ]
390
445
  else:
391
446
  # for @conda/@pypi(disabled=True).
@@ -1,7 +1,9 @@
1
+ import functools
1
2
  import json
2
3
  import os
3
4
  import subprocess
4
5
  import tempfile
6
+ import time
5
7
 
6
8
  from metaflow.exception import MetaflowException
7
9
  from metaflow.util import which
@@ -19,8 +21,11 @@ class MicromambaException(MetaflowException):
19
21
  super(MicromambaException, self).__init__(msg)
20
22
 
21
23
 
24
+ GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
25
+
26
+
22
27
  class Micromamba(object):
23
- def __init__(self):
28
+ def __init__(self, logger=None):
24
29
  # micromamba is a tiny version of the mamba package manager and comes with
25
30
  # metaflow specific performance enhancements.
26
31
 
@@ -33,6 +38,12 @@ class Micromamba(object):
33
38
  os.path.expanduser(_home),
34
39
  "micromamba",
35
40
  )
41
+
42
+ if logger:
43
+ self.logger = logger
44
+ else:
45
+ self.logger = lambda *args, **kwargs: None # No-op logger if not provided
46
+
36
47
  self.bin = (
37
48
  which(os.environ.get("METAFLOW_PATH_TO_MICROMAMBA") or "micromamba")
38
49
  or which("./micromamba") # to support remote execution
@@ -70,6 +81,9 @@ class Micromamba(object):
70
81
  "MAMBA_ADD_PIP_AS_PYTHON_DEPENDENCY": "true",
71
82
  "CONDA_SUBDIR": platform,
72
83
  # "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0" # https://github.com/conda/conda/issues/9862
84
+ # Add a default glibc version for linux-64 environments (ignored for other platforms)
85
+ # TODO: Make the version configurable
86
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
73
87
  }
74
88
  cmd = [
75
89
  "create",
@@ -78,6 +92,7 @@ class Micromamba(object):
78
92
  "--dry-run",
79
93
  "--no-extra-safety-checks",
80
94
  "--repodata-ttl=86400",
95
+ "--safety-checks=disabled",
81
96
  "--retry-clean-cache",
82
97
  "--prefix=%s/prefix" % tmp_dir,
83
98
  ]
@@ -91,10 +106,11 @@ class Micromamba(object):
91
106
  cmd.append("python==%s" % python)
92
107
  # TODO: Ensure a human readable message is returned when the environment
93
108
  # can't be resolved for any and all reasons.
94
- return [
109
+ solved_packages = [
95
110
  {k: v for k, v in item.items() if k in ["url"]}
96
111
  for item in self._call(cmd, env)["actions"]["LINK"]
97
112
  ]
113
+ return solved_packages
98
114
 
99
115
  def download(self, id_, packages, python, platform):
100
116
  # Unfortunately all the packages need to be catalogued in package cache
@@ -103,8 +119,6 @@ class Micromamba(object):
103
119
  # Micromamba is painfully slow in determining if many packages are infact
104
120
  # already cached. As a perf heuristic, we check if the environment already
105
121
  # exists to short circuit package downloads.
106
- if self.path_to_environment(id_, platform):
107
- return
108
122
 
109
123
  prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
110
124
  env_dirs=self.info()["envs_dirs"][0],
@@ -113,13 +127,18 @@ class Micromamba(object):
113
127
  id=id_,
114
128
  )
115
129
 
116
- # Another forced perf heuristic to skip cross-platform downloads.
130
+ # cheap check
117
131
  if os.path.exists(f"{prefix}/fake.done"):
118
132
  return
119
133
 
134
+ # somewhat expensive check
135
+ if self.path_to_environment(id_, platform):
136
+ return
137
+
120
138
  with tempfile.TemporaryDirectory() as tmp_dir:
121
139
  env = {
122
140
  "CONDA_SUBDIR": platform,
141
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
123
142
  }
124
143
  cmd = [
125
144
  "create",
@@ -159,6 +178,7 @@ class Micromamba(object):
159
178
  # use hardlinks when possible, otherwise copy files
160
179
  # disabled for now since it adds to environment creation latencies
161
180
  "CONDA_ALLOW_SOFTLINKS": "0",
181
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
162
182
  }
163
183
  cmd = [
164
184
  "create",
@@ -174,6 +194,7 @@ class Micromamba(object):
174
194
  cmd.append("{url}".format(**package))
175
195
  self._call(cmd, env)
176
196
 
197
+ @functools.lru_cache(maxsize=None)
177
198
  def info(self):
178
199
  return self._call(["config", "list", "-a"])
179
200
 
@@ -198,18 +219,24 @@ class Micromamba(object):
198
219
  }
199
220
  directories = self.info()["pkgs_dirs"]
200
221
  # search all package caches for packages
201
- metadata = {
202
- url: os.path.join(d, file)
222
+
223
+ file_to_path = {}
224
+ for d in directories:
225
+ if os.path.isdir(d):
226
+ try:
227
+ with os.scandir(d) as entries:
228
+ for entry in entries:
229
+ if entry.is_file():
230
+ # Prefer the first occurrence if the file exists in multiple directories
231
+ file_to_path.setdefault(entry.name, entry.path)
232
+ except OSError:
233
+ continue
234
+ ret = {
235
+ # set package tarball local paths to None if package tarballs are missing
236
+ url: file_to_path.get(file)
203
237
  for url, file in packages_to_filenames.items()
204
- for d in directories
205
- if os.path.isdir(d)
206
- and file in os.listdir(d)
207
- and os.path.isfile(os.path.join(d, file))
208
238
  }
209
- # set package tarball local paths to None if package tarballs are missing
210
- for url in packages_to_filenames:
211
- metadata.setdefault(url, None)
212
- return metadata
239
+ return ret
213
240
 
214
241
  def interpreter(self, id_):
215
242
  return os.path.join(self.path_to_environment(id_), "bin/python")
@@ -4,6 +4,7 @@ import re
4
4
  import shutil
5
5
  import subprocess
6
6
  import tempfile
7
+ import time
7
8
  from concurrent.futures import ThreadPoolExecutor
8
9
  from itertools import chain, product
9
10
  from urllib.parse import unquote
@@ -50,10 +51,14 @@ INSTALLATION_MARKER = "{prefix}/.pip/id"
50
51
 
51
52
 
52
53
  class Pip(object):
53
- def __init__(self, micromamba=None):
54
+ def __init__(self, micromamba=None, logger=None):
54
55
  # pip is assumed to be installed inside a conda environment managed by
55
56
  # micromamba. pip commands are executed using `micromamba run --prefix`
56
- self.micromamba = micromamba or Micromamba()
57
+ self.micromamba = micromamba or Micromamba(logger)
58
+ if logger:
59
+ self.logger = logger
60
+ else:
61
+ self.logger = lambda *args, **kwargs: None # No-op logger if not provided
57
62
 
58
63
  def solve(self, id_, packages, python, platform):
59
64
  prefix = self.micromamba.path_to_environment(id_)
@@ -123,7 +128,7 @@ class Pip(object):
123
128
  **res,
124
129
  subdir_str=(
125
130
  "#subdirectory=%s" % subdirectory if subdirectory else ""
126
- )
131
+ ),
127
132
  )
128
133
  # used to deduplicate the storage location in case wheel does not
129
134
  # build with enough unique identifiers.
@@ -25,9 +25,10 @@ class PyPIStepDecorator(StepDecorator):
25
25
  defaults = {"packages": {}, "python": None, "disabled": None} # wheels
26
26
 
27
27
  def __init__(self, attributes=None, statically_defined=False):
28
- self._user_defined_attributes = (
29
- attributes.copy() if attributes is not None else {}
28
+ self._attributes_with_user_values = (
29
+ set(attributes.keys()) if attributes is not None else set()
30
30
  )
31
+
31
32
  super().__init__(attributes, statically_defined)
32
33
 
33
34
  def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
@@ -42,10 +43,9 @@ class PyPIStepDecorator(StepDecorator):
42
43
  if "pypi_base" in self.flow._flow_decorators:
43
44
  pypi_base = self.flow._flow_decorators["pypi_base"][0]
44
45
  super_attributes = pypi_base.attributes
45
- self._user_defined_attributes = {
46
- **self._user_defined_attributes,
47
- **pypi_base._user_defined_attributes,
48
- }
46
+ self._attributes_with_user_values.update(
47
+ pypi_base._attributes_with_user_values
48
+ )
49
49
  self.attributes["packages"] = {
50
50
  **super_attributes["packages"],
51
51
  **self.attributes["packages"],
@@ -106,7 +106,7 @@ class PyPIStepDecorator(StepDecorator):
106
106
  environment.set_local_root(LocalStorage.get_datastore_root_from_config(logger))
107
107
 
108
108
  def is_attribute_user_defined(self, name):
109
- return name in self._user_defined_attributes
109
+ return name in self._attributes_with_user_values
110
110
 
111
111
 
112
112
  class PyPIFlowDecorator(FlowDecorator):
@@ -129,9 +129,10 @@ class PyPIFlowDecorator(FlowDecorator):
129
129
  defaults = {"packages": {}, "python": None, "disabled": None}
130
130
 
131
131
  def __init__(self, attributes=None, statically_defined=False):
132
- self._user_defined_attributes = (
133
- attributes.copy() if attributes is not None else {}
132
+ self._attributes_with_user_values = (
133
+ set(attributes.keys()) if attributes is not None else set()
134
134
  )
135
+
135
136
  super().__init__(attributes, statically_defined)
136
137
 
137
138
  def flow_init(
@@ -140,6 +141,7 @@ class PyPIFlowDecorator(FlowDecorator):
140
141
  from metaflow import decorators
141
142
 
142
143
  decorators._attach_decorators(flow, ["pypi"])
144
+ decorators._init(flow)
143
145
 
144
146
  # @pypi uses a conda environment to create a virtual environment.
145
147
  # The conda environment can be created through micromamba.
@@ -37,8 +37,8 @@ class TimeoutDecorator(StepDecorator):
37
37
  name = "timeout"
38
38
  defaults = {"seconds": 0, "minutes": 0, "hours": 0}
39
39
 
40
- def __init__(self, *args, **kwargs):
41
- super(TimeoutDecorator, self).__init__(*args, **kwargs)
40
+ def init(self):
41
+ super().init()
42
42
  # Initialize secs in __init__ so other decorators could safely use this
43
43
  # value without worrying about decorator order.
44
44
  # Convert values in attributes to type:int since they can be type:str
@@ -9,6 +9,7 @@ if sys.version_info < (3, 7):
9
9
  )
10
10
 
11
11
  import datetime
12
+ import functools
12
13
  import importlib
13
14
  import inspect
14
15
  import itertools
@@ -38,6 +39,7 @@ from metaflow.decorators import add_decorator_options
38
39
  from metaflow.exception import MetaflowException
39
40
  from metaflow.includefile import FilePathClass
40
41
  from metaflow.parameters import JSONTypeClass, flow_context
42
+ from metaflow.user_configs.config_options import LocalFileInput
41
43
 
42
44
  # Define a recursive type alias for JSON
43
45
  JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
@@ -55,6 +57,7 @@ click_to_python_types = {
55
57
  File: str,
56
58
  JSONTypeClass: JSON,
57
59
  FilePathClass: str,
60
+ LocalFileInput: str,
58
61
  }
59
62
 
60
63
 
@@ -124,6 +127,37 @@ def _method_sanity_check(
124
127
  return method_params
125
128
 
126
129
 
130
+ def _lazy_load_command(
131
+ cli_collection: click.Group,
132
+ flow_parameters: Union[str, List[Parameter]],
133
+ _self,
134
+ name: str,
135
+ ):
136
+
137
+ # Context is not used in get_command so we can pass None. Since we pin click,
138
+ # this won't change from under us.
139
+
140
+ if isinstance(flow_parameters, str):
141
+ # Resolve flow_parameters -- for start, this is a function which we
142
+ # need to call to figure out the actual parameters (may be changed by configs)
143
+ flow_parameters = getattr(_self, flow_parameters)()
144
+ cmd_obj = cli_collection.get_command(None, name)
145
+ if cmd_obj:
146
+ if isinstance(cmd_obj, click.Group):
147
+ # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
148
+ result = functools.partial(extract_group(cmd_obj, flow_parameters), _self)
149
+ elif isinstance(cmd_obj, click.Command):
150
+ result = functools.partial(extract_command(cmd_obj, flow_parameters), _self)
151
+ else:
152
+ raise RuntimeError(
153
+ "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
154
+ )
155
+ setattr(_self, name, result)
156
+ return result
157
+ else:
158
+ raise AttributeError()
159
+
160
+
127
161
  def get_annotation(param: Union[click.Argument, click.Option]):
128
162
  py_type = click_to_python_types[type(param.type)]
129
163
  if not param.required:
@@ -179,9 +213,11 @@ def extract_flow_class_from_file(flow_file: str) -> FlowSpec:
179
213
 
180
214
 
181
215
  class MetaflowAPI(object):
182
- def __init__(self, parent=None, **kwargs):
216
+ def __init__(self, parent=None, flow_cls=None, **kwargs):
183
217
  self._parent = parent
184
218
  self._chain = [{self._API_NAME: kwargs}]
219
+ self._flow_cls = flow_cls
220
+ self._cached_computed_parameters = None
185
221
 
186
222
  @property
187
223
  def parent(self):
@@ -200,23 +236,22 @@ class MetaflowAPI(object):
200
236
  @classmethod
201
237
  def from_cli(cls, flow_file: str, cli_collection: Callable) -> Callable:
202
238
  flow_cls = extract_flow_class_from_file(flow_file)
203
- flow_parameters = [p for _, p in flow_cls._get_parameters()]
239
+
204
240
  with flow_context(flow_cls) as _:
205
241
  add_decorator_options(cli_collection)
206
242
 
207
- class_dict = {"__module__": "metaflow", "_API_NAME": flow_file}
208
- command_groups = cli_collection.sources
209
- for each_group in command_groups:
210
- for _, cmd_obj in each_group.commands.items():
211
- if isinstance(cmd_obj, click.Group):
212
- # TODO: possibly check for fake groups with cmd_obj.name in ["cli", "main"]
213
- class_dict[cmd_obj.name] = extract_group(cmd_obj, flow_parameters)
214
- elif isinstance(cmd_obj, click.Command):
215
- class_dict[cmd_obj.name] = extract_command(cmd_obj, flow_parameters)
216
- else:
217
- raise RuntimeError(
218
- "Cannot handle %s of type %s" % (cmd_obj.name, type(cmd_obj))
219
- )
243
+ def getattr_wrapper(_self, name):
244
+ # Functools.partial do not automatically bind self (no __get__)
245
+ return _self._internal_getattr(_self, name)
246
+
247
+ class_dict = {
248
+ "__module__": "metaflow",
249
+ "_API_NAME": flow_file,
250
+ "_internal_getattr": functools.partial(
251
+ _lazy_load_command, cli_collection, "_compute_flow_parameters"
252
+ ),
253
+ "__getattr__": getattr_wrapper,
254
+ }
220
255
 
221
256
  to_return = type(flow_file, (MetaflowAPI,), class_dict)
222
257
  to_return.__name__ = flow_file
@@ -237,11 +272,11 @@ class MetaflowAPI(object):
237
272
  defaults,
238
273
  **kwargs,
239
274
  )
240
- return to_return(parent=None, **method_params)
275
+ return to_return(parent=None, flow_cls=flow_cls, **method_params)
241
276
 
242
277
  m = _method
243
- m.__name__ = cmd_obj.name
244
- m.__doc__ = getattr(cmd_obj, "help", None)
278
+ m.__name__ = cli_collection.name
279
+ m.__doc__ = getattr(cli_collection, "help", None)
245
280
  m.__signature__ = inspect.signature(_method).replace(
246
281
  parameters=params_sigs.values()
247
282
  )
@@ -287,6 +322,25 @@ class MetaflowAPI(object):
287
322
 
288
323
  return components
289
324
 
325
+ def _compute_flow_parameters(self):
326
+ if self._flow_cls is None or self._parent is not None:
327
+ raise RuntimeError(
328
+ "Computing flow-level parameters for a non start API. "
329
+ "Please report to the Metaflow team."
330
+ )
331
+ # TODO: We need to actually compute the new parameters (based on configs) which
332
+ # would involve processing the options at least partially. We will do this
333
+ # before GA but for now making it work for regular parameters
334
+ if self._cached_computed_parameters is not None:
335
+ return self._cached_computed_parameters
336
+ self._cached_computed_parameters = []
337
+ for _, param in self._flow_cls._get_parameters():
338
+ if param.IS_CONFIG_PARAMETER:
339
+ continue
340
+ param.init()
341
+ self._cached_computed_parameters.append(param)
342
+ return self._cached_computed_parameters
343
+
290
344
 
291
345
  def extract_all_params(cmd_obj: Union[click.Command, click.Group]):
292
346
  arg_params_sigs = OrderedDict()
@@ -351,7 +405,7 @@ def extract_group(cmd_obj: click.Group, flow_parameters: List[Parameter]) -> Cal
351
405
  method_params = _method_sanity_check(
352
406
  possible_arg_params, possible_opt_params, annotations, defaults, **kwargs
353
407
  )
354
- return resulting_class(parent=_self, **method_params)
408
+ return resulting_class(parent=_self, flow_cls=None, **method_params)
355
409
 
356
410
  m = _method
357
411
  m.__name__ = cmd_obj.name
@@ -64,7 +64,7 @@ class Deployer(metaclass=DeployerMeta):
64
64
  The directory to run the subprocess in; if not specified, the current
65
65
  directory is used.
66
66
  file_read_timeout : int, default 3600
67
- The timeout until which we try to read the deployer attribute file.
67
+ The timeout until which we try to read the deployer attribute file (in seconds).
68
68
  **kwargs : Any
69
69
  Additional arguments that you would pass to `python myflow.py` before
70
70
  the deployment command.
@@ -37,7 +37,7 @@ class DeployerImpl(object):
37
37
  The directory to run the subprocess in; if not specified, the current
38
38
  directory is used.
39
39
  file_read_timeout : int, default 3600
40
- The timeout until which we try to read the deployer attribute file.
40
+ The timeout until which we try to read the deployer attribute file (in seconds).
41
41
  **kwargs : Any
42
42
  Additional arguments that you would pass to `python myflow.py` before
43
43
  the deployment command.
@@ -144,7 +144,7 @@ class DeployerImpl(object):
144
144
  # Additional info is used to pass additional deployer specific information.
145
145
  # It is used in non-OSS deployers (extensions).
146
146
  self.additional_info = content.get("additional_info", {})
147
-
147
+ command_obj.sync_wait()
148
148
  if command_obj.process.returncode == 0:
149
149
  return create_class(deployer=self)
150
150
 
@@ -221,7 +221,7 @@ class Runner(object):
221
221
  The directory to run the subprocess in; if not specified, the current
222
222
  directory is used.
223
223
  file_read_timeout : int, default 3600
224
- The timeout until which we try to read the runner attribute file.
224
+ The timeout until which we try to read the runner attribute file (in seconds).
225
225
  **kwargs : Any
226
226
  Additional arguments that you would pass to `python myflow.py` before
227
227
  the `run` command.
@@ -272,6 +272,9 @@ class Runner(object):
272
272
 
273
273
  def __get_executing_run(self, attribute_file_fd, command_obj):
274
274
  content = handle_timeout(attribute_file_fd, command_obj, self.file_read_timeout)
275
+
276
+ command_obj.sync_wait()
277
+
275
278
  content = json.loads(content)
276
279
  pathspec = "%s/%s" % (content.get("flow_name"), content.get("run_id"))
277
280