metaflow 2.12.39__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. metaflow/__init__.py +1 -1
  2. metaflow/cli.py +111 -36
  3. metaflow/cli_args.py +2 -2
  4. metaflow/cli_components/run_cmds.py +3 -1
  5. metaflow/datastore/flow_datastore.py +2 -2
  6. metaflow/exception.py +8 -2
  7. metaflow/flowspec.py +48 -36
  8. metaflow/graph.py +28 -27
  9. metaflow/includefile.py +2 -2
  10. metaflow/lint.py +35 -20
  11. metaflow/metadata_provider/heartbeat.py +23 -8
  12. metaflow/metaflow_config.py +7 -0
  13. metaflow/parameters.py +11 -4
  14. metaflow/plugins/argo/argo_client.py +0 -2
  15. metaflow/plugins/argo/argo_workflows.py +86 -104
  16. metaflow/plugins/argo/argo_workflows_cli.py +0 -1
  17. metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
  18. metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
  19. metaflow/plugins/argo/jobset_input_paths.py +0 -1
  20. metaflow/plugins/aws/aws_utils.py +6 -1
  21. metaflow/plugins/aws/batch/batch_client.py +1 -3
  22. metaflow/plugins/aws/batch/batch_decorator.py +11 -11
  23. metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
  24. metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
  25. metaflow/plugins/aws/step_functions/production_token.py +1 -1
  26. metaflow/plugins/aws/step_functions/step_functions.py +1 -1
  27. metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
  28. metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
  29. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
  30. metaflow/plugins/cards/card_creator.py +1 -0
  31. metaflow/plugins/cards/card_decorator.py +46 -8
  32. metaflow/plugins/kubernetes/kube_utils.py +55 -1
  33. metaflow/plugins/kubernetes/kubernetes.py +33 -80
  34. metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
  35. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
  36. metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
  37. metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
  38. metaflow/plugins/pypi/bootstrap.py +249 -81
  39. metaflow/plugins/pypi/conda_environment.py +83 -27
  40. metaflow/plugins/pypi/micromamba.py +77 -36
  41. metaflow/plugins/pypi/pip.py +9 -6
  42. metaflow/plugins/pypi/utils.py +4 -2
  43. metaflow/runner/click_api.py +175 -39
  44. metaflow/runner/deployer_impl.py +6 -1
  45. metaflow/runner/metaflow_runner.py +6 -1
  46. metaflow/runner/utils.py +5 -0
  47. metaflow/user_configs/config_options.py +87 -34
  48. metaflow/user_configs/config_parameters.py +44 -25
  49. metaflow/util.py +2 -2
  50. metaflow/version.py +1 -1
  51. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
  52. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/RECORD +56 -56
  53. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
  54. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
  55. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
  56. {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
@@ -5,21 +5,17 @@ import functools
5
5
  import io
6
6
  import json
7
7
  import os
8
- import sys
9
8
  import tarfile
10
- import time
11
- from concurrent.futures import ThreadPoolExecutor
9
+ import threading
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from functools import wraps
12
12
  from hashlib import sha256
13
13
  from io import BufferedIOBase, BytesIO
14
- from itertools import chain
15
14
  from urllib.parse import unquote, urlparse
16
15
 
17
- import requests
18
-
19
16
  from metaflow.exception import MetaflowException
20
17
  from metaflow.metaflow_config import get_pinned_conda_libs
21
18
  from metaflow.metaflow_environment import MetaflowEnvironment
22
- from metaflow.metaflow_profile import profile
23
19
 
24
20
  from . import MAGIC_FILE, _datastore_packageroot
25
21
  from .utils import conda_platform
@@ -50,7 +46,6 @@ class CondaEnvironment(MetaflowEnvironment):
50
46
 
51
47
  def validate_environment(self, logger, datastore_type):
52
48
  self.datastore_type = datastore_type
53
- self.logger = logger
54
49
 
55
50
  # Avoiding circular imports.
56
51
  from metaflow.plugins import DATASTORES
@@ -62,8 +57,21 @@ class CondaEnvironment(MetaflowEnvironment):
62
57
  from .micromamba import Micromamba
63
58
  from .pip import Pip
64
59
 
65
- micromamba = Micromamba()
66
- self.solvers = {"conda": micromamba, "pypi": Pip(micromamba)}
60
+ print_lock = threading.Lock()
61
+
62
+ def make_thread_safe(func):
63
+ @wraps(func)
64
+ def wrapper(*args, **kwargs):
65
+ with print_lock:
66
+ return func(*args, **kwargs)
67
+
68
+ return wrapper
69
+
70
+ self.logger = make_thread_safe(logger)
71
+
72
+ # TODO: Wire up logging
73
+ micromamba = Micromamba(self.logger)
74
+ self.solvers = {"conda": micromamba, "pypi": Pip(micromamba, self.logger)}
67
75
 
68
76
  def init_environment(self, echo, only_steps=None):
69
77
  # The implementation optimizes for latency to ensure as many operations can
@@ -150,6 +158,9 @@ class CondaEnvironment(MetaflowEnvironment):
150
158
  (
151
159
  package["path"],
152
160
  # Lazily fetch package from the interweb if needed.
161
+ # TODO: Depending on the len_hint, the package might be downloaded from
162
+ # the interweb prematurely. save_bytes needs to be adjusted to handle
163
+ # this scenario.
153
164
  LazyOpen(
154
165
  package["local_path"],
155
166
  "rb",
@@ -166,22 +177,60 @@ class CondaEnvironment(MetaflowEnvironment):
166
177
  if id_ in dirty:
167
178
  self.write_to_environment_manifest([id_, platform, type_], packages)
168
179
 
169
- # First resolve environments through Conda, before PyPI.
180
+ storage = None
181
+ if self.datastore_type not in ["local"]:
182
+ # Initialize storage for caching if using a remote datastore
183
+ storage = self.datastore(_datastore_packageroot(self.datastore, echo))
184
+
170
185
  self.logger("Bootstrapping virtual environment(s) ...")
171
- for solver in ["conda", "pypi"]:
172
- with ThreadPoolExecutor() as executor:
173
- results = list(
174
- executor.map(lambda x: solve(*x, solver), environments(solver))
175
- )
176
- _ = list(map(lambda x: self.solvers[solver].download(*x), results))
177
- with ThreadPoolExecutor() as executor:
178
- _ = list(
179
- executor.map(lambda x: self.solvers[solver].create(*x), results)
180
- )
181
- if self.datastore_type not in ["local"]:
182
- # Cache packages only when a remote datastore is in play.
183
- storage = self.datastore(_datastore_packageroot(self.datastore, echo))
184
- cache(storage, results, solver)
186
+ # Sequence of operations:
187
+ # 1. Start all conda solves in parallel
188
+ # 2. Download conda packages sequentially
189
+ # 3. Create and cache conda environments in parallel
190
+ # 4. Start PyPI solves in parallel after each conda environment is created
191
+ # 5. Download PyPI packages sequentially
192
+ # 6. Create and cache PyPI environments in parallel
193
+
194
+ with ThreadPoolExecutor() as executor:
195
+ # Start all conda solves in parallel
196
+ conda_futures = [
197
+ executor.submit(lambda x: solve(*x, "conda"), env)
198
+ for env in environments("conda")
199
+ ]
200
+
201
+ pypi_envs = {env[0]: env for env in environments("pypi")}
202
+ pypi_futures = []
203
+
204
+ # Process conda results sequentially for downloads
205
+ for future in as_completed(conda_futures):
206
+ result = future.result()
207
+ # Sequential conda download
208
+ self.solvers["conda"].download(*result)
209
+ # Parallel conda create and cache
210
+ create_future = executor.submit(self.solvers["conda"].create, *result)
211
+ if storage:
212
+ executor.submit(cache, storage, [result], "conda")
213
+
214
+ # Queue PyPI solve to start after conda create
215
+ if result[0] in pypi_envs:
216
+
217
+ def pypi_solve(env):
218
+ create_future.result() # Wait for conda create
219
+ return solve(*env, "pypi")
220
+
221
+ pypi_futures.append(
222
+ executor.submit(pypi_solve, pypi_envs[result[0]])
223
+ )
224
+
225
+ # Process PyPI results sequentially for downloads
226
+ for solve_future in pypi_futures:
227
+ result = solve_future.result()
228
+ # Sequential PyPI download
229
+ self.solvers["pypi"].download(*result)
230
+ # Parallel PyPI create and cache
231
+ executor.submit(self.solvers["pypi"].create, *result)
232
+ if storage:
233
+ executor.submit(cache, storage, [result], "pypi")
185
234
  self.logger("Virtual environment(s) bootstrapped!")
186
235
 
187
236
  def executable(self, step_name, default=None):
@@ -382,7 +431,8 @@ class CondaEnvironment(MetaflowEnvironment):
382
431
  'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
383
432
  % (self.flow.name, id_, self.datastore_type),
384
433
  "echo 'Environment bootstrapped.'",
385
- "export PATH=$PATH:$(pwd)/micromamba",
434
+ # To avoid having to install micromamba in the PATH in micromamba.py, we add it to the PATH here.
435
+ "export PATH=$PATH:$(pwd)/micromamba/bin",
386
436
  ]
387
437
  else:
388
438
  # for @conda/@pypi(disabled=True).
@@ -443,6 +493,7 @@ class LazyOpen(BufferedIOBase):
443
493
  self._file = None
444
494
  self._buffer = None
445
495
  self._position = 0
496
+ self.requests = None
446
497
 
447
498
  def _ensure_file(self):
448
499
  if not self._file:
@@ -459,8 +510,13 @@ class LazyOpen(BufferedIOBase):
459
510
  raise ValueError("Both filename and url are missing")
460
511
 
461
512
  def _download_to_buffer(self):
513
+ if self.requests is None:
514
+ # TODO: Remove dependency on requests
515
+ import requests
516
+
517
+ self.requests = requests
462
518
  # TODO: Stream it in chunks?
463
- response = requests.get(self.url, stream=True)
519
+ response = self.requests.get(self.url, stream=True)
464
520
  response.raise_for_status()
465
521
  return response.content
466
522
 
@@ -1,12 +1,14 @@
1
+ import functools
1
2
  import json
2
3
  import os
3
4
  import subprocess
4
5
  import tempfile
6
+ import time
5
7
 
6
8
  from metaflow.exception import MetaflowException
7
9
  from metaflow.util import which
8
10
 
9
- from .utils import conda_platform
11
+ from .utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL, conda_platform
10
12
 
11
13
 
12
14
  class MicromambaException(MetaflowException):
@@ -19,8 +21,11 @@ class MicromambaException(MetaflowException):
19
21
  super(MicromambaException, self).__init__(msg)
20
22
 
21
23
 
24
+ GLIBC_VERSION = os.environ.get("CONDA_OVERRIDE_GLIBC", "2.38")
25
+
26
+
22
27
  class Micromamba(object):
23
- def __init__(self):
28
+ def __init__(self, logger=None):
24
29
  # micromamba is a tiny version of the mamba package manager and comes with
25
30
  # metaflow specific performance enhancements.
26
31
 
@@ -33,6 +38,12 @@ class Micromamba(object):
33
38
  os.path.expanduser(_home),
34
39
  "micromamba",
35
40
  )
41
+
42
+ if logger:
43
+ self.logger = logger
44
+ else:
45
+ self.logger = lambda *args, **kwargs: None # No-op logger if not provided
46
+
36
47
  self.bin = (
37
48
  which(os.environ.get("METAFLOW_PATH_TO_MICROMAMBA") or "micromamba")
38
49
  or which("./micromamba") # to support remote execution
@@ -70,6 +81,9 @@ class Micromamba(object):
70
81
  "MAMBA_ADD_PIP_AS_PYTHON_DEPENDENCY": "true",
71
82
  "CONDA_SUBDIR": platform,
72
83
  # "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0" # https://github.com/conda/conda/issues/9862
84
+ # Add a default glibc version for linux-64 environments (ignored for other platforms)
85
+ # TODO: Make the version configurable
86
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
73
87
  }
74
88
  cmd = [
75
89
  "create",
@@ -78,6 +92,7 @@ class Micromamba(object):
78
92
  "--dry-run",
79
93
  "--no-extra-safety-checks",
80
94
  "--repodata-ttl=86400",
95
+ "--safety-checks=disabled",
81
96
  "--retry-clean-cache",
82
97
  "--prefix=%s/prefix" % tmp_dir,
83
98
  ]
@@ -91,10 +106,11 @@ class Micromamba(object):
91
106
  cmd.append("python==%s" % python)
92
107
  # TODO: Ensure a human readable message is returned when the environment
93
108
  # can't be resolved for any and all reasons.
94
- return [
109
+ solved_packages = [
95
110
  {k: v for k, v in item.items() if k in ["url"]}
96
111
  for item in self._call(cmd, env)["actions"]["LINK"]
97
112
  ]
113
+ return solved_packages
98
114
 
99
115
  def download(self, id_, packages, python, platform):
100
116
  # Unfortunately all the packages need to be catalogued in package cache
@@ -103,8 +119,6 @@ class Micromamba(object):
103
119
  # Micromamba is painfully slow in determining if many packages are infact
104
120
  # already cached. As a perf heuristic, we check if the environment already
105
121
  # exists to short circuit package downloads.
106
- if self.path_to_environment(id_, platform):
107
- return
108
122
 
109
123
  prefix = "{env_dirs}/{keyword}/{platform}/{id}".format(
110
124
  env_dirs=self.info()["envs_dirs"][0],
@@ -113,13 +127,18 @@ class Micromamba(object):
113
127
  id=id_,
114
128
  )
115
129
 
116
- # Another forced perf heuristic to skip cross-platform downloads.
130
+ # cheap check
117
131
  if os.path.exists(f"{prefix}/fake.done"):
118
132
  return
119
133
 
134
+ # somewhat expensive check
135
+ if self.path_to_environment(id_, platform):
136
+ return
137
+
120
138
  with tempfile.TemporaryDirectory() as tmp_dir:
121
139
  env = {
122
140
  "CONDA_SUBDIR": platform,
141
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
123
142
  }
124
143
  cmd = [
125
144
  "create",
@@ -159,6 +178,7 @@ class Micromamba(object):
159
178
  # use hardlinks when possible, otherwise copy files
160
179
  # disabled for now since it adds to environment creation latencies
161
180
  "CONDA_ALLOW_SOFTLINKS": "0",
181
+ "CONDA_OVERRIDE_GLIBC": GLIBC_VERSION,
162
182
  }
163
183
  cmd = [
164
184
  "create",
@@ -174,6 +194,7 @@ class Micromamba(object):
174
194
  cmd.append("{url}".format(**package))
175
195
  self._call(cmd, env)
176
196
 
197
+ @functools.lru_cache(maxsize=None)
177
198
  def info(self):
178
199
  return self._call(["config", "list", "-a"])
179
200
 
@@ -198,18 +219,24 @@ class Micromamba(object):
198
219
  }
199
220
  directories = self.info()["pkgs_dirs"]
200
221
  # search all package caches for packages
201
- metadata = {
202
- url: os.path.join(d, file)
222
+
223
+ file_to_path = {}
224
+ for d in directories:
225
+ if os.path.isdir(d):
226
+ try:
227
+ with os.scandir(d) as entries:
228
+ for entry in entries:
229
+ if entry.is_file():
230
+ # Prefer the first occurrence if the file exists in multiple directories
231
+ file_to_path.setdefault(entry.name, entry.path)
232
+ except OSError:
233
+ continue
234
+ ret = {
235
+ # set package tarball local paths to None if package tarballs are missing
236
+ url: file_to_path.get(file)
203
237
  for url, file in packages_to_filenames.items()
204
- for d in directories
205
- if os.path.isdir(d)
206
- and file in os.listdir(d)
207
- and os.path.isfile(os.path.join(d, file))
208
238
  }
209
- # set package tarball local paths to None if package tarballs are missing
210
- for url in packages_to_filenames:
211
- metadata.setdefault(url, None)
212
- return metadata
239
+ return ret
213
240
 
214
241
  def interpreter(self, id_):
215
242
  return os.path.join(self.path_to_environment(id_), "bin/python")
@@ -296,7 +323,7 @@ class Micromamba(object):
296
323
  stderr="\n".join(err),
297
324
  )
298
325
  )
299
- except (TypeError, ValueError) as ve:
326
+ except (TypeError, ValueError):
300
327
  pass
301
328
  raise MicromambaException(
302
329
  msg.format(
@@ -312,23 +339,37 @@ def _install_micromamba(installation_location):
312
339
  # Unfortunately no 32bit binaries are available for micromamba, which ideally
313
340
  # shouldn't be much of a problem in today's world.
314
341
  platform = conda_platform()
315
- try:
316
- subprocess.Popen(f"mkdir -p {installation_location}", shell=True).wait()
317
- # https://mamba.readthedocs.io/en/latest/micromamba-installation.html#manual-installation
318
- # requires bzip2
319
- result = subprocess.Popen(
320
- f"curl -Ls https://micro.mamba.pm/api/micromamba/{platform}/1.5.7 | tar -xvj -C {installation_location} bin/micromamba",
321
- shell=True,
322
- stderr=subprocess.PIPE,
323
- stdout=subprocess.PIPE,
324
- )
325
- _, err = result.communicate()
326
- if result.returncode != 0:
327
- raise MicromambaException(
328
- f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
329
- )
342
+ url = MICROMAMBA_URL.format(platform=platform, version="1.5.7")
343
+ mirror_url = MICROMAMBA_MIRROR_URL.format(platform=platform, version="1.5.7")
344
+ os.makedirs(installation_location, exist_ok=True)
330
345
 
331
- except subprocess.CalledProcessError as e:
332
- raise MicromambaException(
333
- "Micromamba installation failed:\n{}".format(e.stderr.decode())
334
- )
346
+ def _download_and_extract(url):
347
+ max_retries = 3
348
+ for attempt in range(max_retries):
349
+ try:
350
+ # https://mamba.readthedocs.io/en/latest/micromamba-installation.html#manual-installation
351
+ # requires bzip2
352
+ result = subprocess.Popen(
353
+ f"curl -Ls {url} | tar -xvj -C {installation_location} bin/micromamba",
354
+ shell=True,
355
+ stderr=subprocess.PIPE,
356
+ stdout=subprocess.PIPE,
357
+ )
358
+ _, err = result.communicate()
359
+ if result.returncode != 0:
360
+ raise MicromambaException(
361
+ f"Micromamba installation '{result.args}' failed:\n{err.decode()}"
362
+ )
363
+ except subprocess.CalledProcessError as e:
364
+ if attempt == max_retries - 1:
365
+ raise MicromambaException(
366
+ "Micromamba installation failed:\n{}".format(e.stderr.decode())
367
+ )
368
+ time.sleep(2**attempt)
369
+
370
+ try:
371
+ # prioritize downloading from mirror
372
+ _download_and_extract(mirror_url)
373
+ except Exception:
374
+ # download from official source as a fallback
375
+ _download_and_extract(url)
@@ -50,10 +50,14 @@ INSTALLATION_MARKER = "{prefix}/.pip/id"
50
50
 
51
51
 
52
52
  class Pip(object):
53
- def __init__(self, micromamba=None):
53
+ def __init__(self, micromamba=None, logger=None):
54
54
  # pip is assumed to be installed inside a conda environment managed by
55
55
  # micromamba. pip commands are executed using `micromamba run --prefix`
56
- self.micromamba = micromamba or Micromamba()
56
+ self.micromamba = micromamba or Micromamba(logger)
57
+ if logger:
58
+ self.logger = logger
59
+ else:
60
+ self.logger = lambda *args, **kwargs: None # No-op logger if not provided
57
61
 
58
62
  def solve(self, id_, packages, python, platform):
59
63
  prefix = self.micromamba.path_to_environment(id_)
@@ -102,9 +106,8 @@ class Pip(object):
102
106
  except PipPackageNotFound as ex:
103
107
  # pretty print package errors
104
108
  raise PipException(
105
- "Could not find a binary distribution for %s \n"
106
- "for the platform %s\n\n"
107
- "Note that ***@pypi*** does not currently support source distributions"
109
+ "Unable to find a binary distribution compatible with %s for %s.\n\n"
110
+ "Note: ***@pypi*** does not currently support source distributions"
108
111
  % (ex.package_spec, platform)
109
112
  )
110
113
 
@@ -123,7 +126,7 @@ class Pip(object):
123
126
  **res,
124
127
  subdir_str=(
125
128
  "#subdirectory=%s" % subdirectory if subdirectory else ""
126
- )
129
+ ),
127
130
  )
128
131
  # used to deduplicate the storage location in case wheel does not
129
132
  # build with enough unique identifiers.
@@ -1,4 +1,3 @@
1
- import os
2
1
  import platform
3
2
  import sys
4
3
 
@@ -17,10 +16,13 @@ else:
17
16
  from metaflow._vendor.packaging import tags
18
17
  from metaflow._vendor.packaging.utils import parse_wheel_filename
19
18
 
20
- from urllib.parse import unquote, urlparse
19
+ from urllib.parse import unquote
21
20
 
22
21
  from metaflow.exception import MetaflowException
23
22
 
23
+ MICROMAMBA_URL = "https://micro.mamba.pm/api/micromamba/{platform}/{version}"
24
+ MICROMAMBA_MIRROR_URL = "https://micromamba.outerbounds.sh/{platform}/{version}.tar.bz2"
25
+
24
26
 
25
27
  def conda_platform():
26
28
  # Returns the conda platform for the Python interpreter