metaflow 2.12.19__py2.py3-none-any.whl → 2.12.21__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. metaflow/__init__.py +11 -21
  2. metaflow/client/core.py +1 -1
  3. metaflow/cmd/main_cli.py +3 -2
  4. metaflow/extension_support/__init__.py +120 -29
  5. metaflow/flowspec.py +4 -0
  6. metaflow/info_file.py +25 -0
  7. metaflow/metaflow_config.py +0 -1
  8. metaflow/metaflow_current.py +3 -1
  9. metaflow/metaflow_environment.py +1 -7
  10. metaflow/metaflow_version.py +130 -64
  11. metaflow/package.py +2 -1
  12. metaflow/plugins/argo/argo_workflows.py +10 -1
  13. metaflow/plugins/aws/batch/batch_client.py +3 -0
  14. metaflow/plugins/kubernetes/kube_utils.py +25 -0
  15. metaflow/plugins/kubernetes/kubernetes.py +3 -0
  16. metaflow/plugins/kubernetes/kubernetes_cli.py +84 -1
  17. metaflow/plugins/kubernetes/kubernetes_client.py +97 -0
  18. metaflow/plugins/kubernetes/kubernetes_decorator.py +4 -0
  19. metaflow/plugins/parallel_decorator.py +4 -0
  20. metaflow/plugins/pypi/bootstrap.py +2 -0
  21. metaflow/plugins/pypi/conda_decorator.py +7 -1
  22. metaflow/runner/click_api.py +13 -1
  23. metaflow/runner/deployer.py +9 -2
  24. metaflow/runner/metaflow_runner.py +4 -2
  25. metaflow/runner/subprocess_manager.py +8 -3
  26. metaflow/runner/utils.py +19 -2
  27. metaflow/version.py +1 -1
  28. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/METADATA +2 -2
  29. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/RECORD +33 -31
  30. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/WHEEL +1 -1
  31. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/LICENSE +0 -0
  32. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/entry_points.txt +0 -0
  33. {metaflow-2.12.19.dist-info → metaflow-2.12.21.dist-info}/top_level.txt +0 -0
metaflow/__init__.py CHANGED
@@ -42,14 +42,8 @@ If you have any questions, feel free to post a bug report/question on the
42
42
  Metaflow GitHub page.
43
43
  """
44
44
 
45
- import importlib
45
+ import os
46
46
  import sys
47
- import types
48
-
49
- from os import path
50
-
51
- CURRENT_DIRECTORY = path.dirname(path.abspath(__file__))
52
- INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO")
53
47
 
54
48
  from metaflow.extension_support import (
55
49
  alias_submodules,
@@ -61,7 +55,6 @@ from metaflow.extension_support import (
61
55
  _ext_debug,
62
56
  )
63
57
 
64
-
65
58
  # We load the module overrides *first* explicitly. Non overrides can be loaded
66
59
  # in toplevel as well but these can be loaded first if needed. Note that those
67
60
  # modules should be careful not to include anything in Metaflow at their top-level
@@ -79,9 +72,14 @@ try:
79
72
  )
80
73
  tl_module = m.module.__dict__.get("toplevel", None)
81
74
  if tl_module is not None:
82
- _tl_modules.append(".".join([EXT_PKG, m.tl_package, "toplevel", tl_module]))
75
+ _tl_modules.append(
76
+ (
77
+ m.package_name,
78
+ ".".join([EXT_PKG, m.tl_package, "toplevel", tl_module]),
79
+ )
80
+ )
83
81
  _ext_debug("Got overrides to load: %s" % _override_modules)
84
- _ext_debug("Got top-level imports: %s" % _tl_modules)
82
+ _ext_debug("Got top-level imports: %s" % str(_tl_modules))
85
83
  except Exception as e:
86
84
  _ext_debug("Error in importing toplevel/overrides: %s" % e)
87
85
 
@@ -153,9 +151,9 @@ if sys.version_info >= (3, 7):
153
151
  from .runner.deployer import Deployer
154
152
  from .runner.nbdeploy import NBDeployer
155
153
 
156
- __version_addl__ = []
154
+ __ext_tl_modules__ = []
157
155
  _ext_debug("Loading top-level modules")
158
- for m in _tl_modules:
156
+ for pkg_name, m in _tl_modules:
159
157
  extension_module = load_module(m)
160
158
  if extension_module:
161
159
  tl_package = m.split(".")[1]
@@ -163,15 +161,7 @@ for m in _tl_modules:
163
161
  lazy_load_aliases(
164
162
  alias_submodules(extension_module, tl_package, None, extra_indent=True)
165
163
  )
166
- version_info = getattr(extension_module, "__mf_extensions__", "<unk>")
167
- if extension_module.__version__:
168
- version_info = "%s(%s)" % (version_info, extension_module.__version__)
169
- __version_addl__.append(version_info)
170
-
171
- if __version_addl__:
172
- __version_addl__ = ";".join(__version_addl__)
173
- else:
174
- __version_addl__ = None
164
+ __ext_tl_modules__.append((pkg_name, extension_module))
175
165
 
176
166
  # Erase all temporary names to avoid leaking things
177
167
  for _n in [
metaflow/client/core.py CHANGED
@@ -34,7 +34,7 @@ from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS
34
34
  from metaflow.unbounded_foreach import CONTROL_TASK_TAG
35
35
  from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode
36
36
 
37
- from .. import INFO_FILE
37
+ from ..info_file import INFO_FILE
38
38
  from .filecache import FileCache
39
39
 
40
40
  try:
metaflow/cmd/main_cli.py CHANGED
@@ -84,12 +84,13 @@ def start(ctx):
84
84
 
85
85
  import metaflow
86
86
 
87
+ version = get_version()
87
88
  echo("Metaflow ", fg="magenta", bold=True, nl=False)
88
89
 
89
90
  if ctx.invoked_subcommand is None:
90
- echo("(%s): " % get_version(), fg="magenta", bold=False, nl=False)
91
+ echo("(%s): " % version, fg="magenta", bold=False, nl=False)
91
92
  else:
92
- echo("(%s)\n" % get_version(), fg="magenta", bold=False)
93
+ echo("(%s)\n" % version, fg="magenta", bold=False)
93
94
 
94
95
  if ctx.invoked_subcommand is None:
95
96
  echo("More data science, less engineering\n", fg="magenta")
@@ -1,7 +1,6 @@
1
1
  from __future__ import print_function
2
2
 
3
3
  import importlib
4
- import json
5
4
  import os
6
5
  import re
7
6
  import sys
@@ -11,6 +10,10 @@ from collections import defaultdict, namedtuple
11
10
 
12
11
  from importlib.abc import MetaPathFinder, Loader
13
12
  from itertools import chain
13
+ from pathlib import Path
14
+
15
+ from metaflow.info_file import read_info_file
16
+
14
17
 
15
18
  #
16
19
  # This file provides the support for Metaflow's extension mechanism which allows
@@ -59,6 +62,9 @@ __all__ = (
59
62
  "load_module",
60
63
  "get_modules",
61
64
  "dump_module_info",
65
+ "get_extensions_in_dir",
66
+ "extension_info",
67
+ "update_package_info",
62
68
  "get_aliased_modules",
63
69
  "package_mfext_package",
64
70
  "package_mfext_all",
@@ -80,9 +86,14 @@ EXT_EXCLUDE_SUFFIXES = [".pyc"]
80
86
  # To get verbose messages, set METAFLOW_DEBUG_EXT to 1
81
87
  DEBUG_EXT = os.environ.get("METAFLOW_DEBUG_EXT", False)
82
88
 
89
+ # This is extracted only from environment variable and here separately from
90
+ # metaflow_config to prevent nasty circular dependencies
91
+ EXTENSIONS_SEARCH_DIRS = os.environ.get("METAFLOW_EXTENSIONS_SEARCH_DIRS", "").split(
92
+ os.pathsep
93
+ )
83
94
 
84
95
  MFExtPackage = namedtuple("MFExtPackage", "package_name tl_package config_module")
85
- MFExtModule = namedtuple("MFExtModule", "tl_package module")
96
+ MFExtModule = namedtuple("MFExtModule", "package_name tl_package module")
86
97
 
87
98
 
88
99
  def load_module(module_name):
@@ -113,17 +124,64 @@ def get_modules(extension_point):
113
124
  return modules_to_load
114
125
 
115
126
 
116
- def dump_module_info():
117
- _filter_files_all()
127
+ def dump_module_info(all_packages=None, pkgs_per_extension_point=None):
128
+ if all_packages is None:
129
+ all_packages = _all_packages
130
+ if pkgs_per_extension_point is None:
131
+ pkgs_per_extension_point = _pkgs_per_extension_point
132
+
133
+ _filter_files_all(all_packages)
118
134
  sanitized_all_packages = dict()
119
135
  # Strip out root_paths (we don't need it and no need to expose user's dir structure)
120
- for k, v in _all_packages.items():
136
+ for k, v in all_packages.items():
121
137
  sanitized_all_packages[k] = {
122
138
  "root_paths": None,
123
139
  "meta_module": v["meta_module"],
124
140
  "files": v["files"],
141
+ "version": v["version"],
142
+ "package_version": v.get("package_version", "<unk>"),
143
+ "extension_name": v.get("extension_name", "<unk>"),
125
144
  }
126
- return "ext_info", [sanitized_all_packages, _pkgs_per_extension_point]
145
+ return "ext_info", [sanitized_all_packages, pkgs_per_extension_point]
146
+
147
+
148
+ def get_extensions_in_dir(d):
149
+ if not _mfext_supported:
150
+ _ext_debug("Not supported for your Python version -- 3.4+ is needed")
151
+ return None, None
152
+ return _get_extension_packages(ignore_info_file=True, restrict_to_directories=[d])
153
+
154
+
155
+ def extension_info(packages=None):
156
+ if packages is None:
157
+ packages = _all_packages
158
+ # Returns information about installed extensions so it it can be stored in
159
+ # _graph_info.
160
+ return {
161
+ "installed": {
162
+ k: {
163
+ "dist_version": v["version"],
164
+ "package_version": v.get("package_version", "<unk>"),
165
+ "extension_name": v.get("extension_name", "<unk>"),
166
+ }
167
+ for k, v in packages.items()
168
+ },
169
+ }
170
+
171
+
172
+ def update_package_info(pkg_to_update=None, package_name=None, **kwargs):
173
+ pkg = None
174
+ if pkg_to_update:
175
+ pkg = pkg_to_update
176
+ elif package_name:
177
+ pkg = _all_packages.get(package_name)
178
+ for k, v in kwargs.items():
179
+ if k in pkg:
180
+ raise ValueError(
181
+ "Trying to overwrite existing key '%s' for package %s" % (k, str(pkg))
182
+ )
183
+ pkg[k] = v
184
+ return pkg
127
185
 
128
186
 
129
187
  def get_aliased_modules():
@@ -134,8 +192,8 @@ def package_mfext_package(package_name):
134
192
  from metaflow.util import to_unicode
135
193
 
136
194
  _ext_debug("Packaging '%s'" % package_name)
137
- _filter_files_package(package_name)
138
195
  pkg_info = _all_packages.get(package_name, None)
196
+ _filter_files_package(pkg_info)
139
197
  if pkg_info and pkg_info.get("root_paths", None):
140
198
  single_path = len(pkg_info["root_paths"]) == 1
141
199
  for p in pkg_info["root_paths"]:
@@ -296,7 +354,7 @@ def _ext_debug(*args, **kwargs):
296
354
  print(init_str, *args, **kwargs)
297
355
 
298
356
 
299
- def _get_extension_packages():
357
+ def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None):
300
358
  if not _mfext_supported:
301
359
  _ext_debug("Not supported for your Python version -- 3.4+ is needed")
302
360
  return {}, {}
@@ -304,20 +362,20 @@ def _get_extension_packages():
304
362
  # If we have an INFO file with the appropriate information (if running from a saved
305
363
  # code package for example), we use that directly
306
364
  # Pre-compute on _extension_points
307
- from metaflow import INFO_FILE
308
-
309
- try:
310
- with open(INFO_FILE, encoding="utf-8") as contents:
311
- all_pkg, ext_to_pkg = json.load(contents).get("ext_info", (None, None))
312
- if all_pkg is not None and ext_to_pkg is not None:
313
- _ext_debug("Loading pre-computed information from INFO file")
314
- # We need to properly convert stuff in ext_to_pkg
315
- for k, v in ext_to_pkg.items():
316
- v = [MFExtPackage(*d) for d in v]
317
- ext_to_pkg[k] = v
318
- return all_pkg, ext_to_pkg
319
- except IOError:
320
- pass
365
+ info_content = read_info_file()
366
+ if not ignore_info_file and info_content:
367
+ all_pkg, ext_to_pkg = info_content.get("ext_info", (None, None))
368
+ if all_pkg is not None and ext_to_pkg is not None:
369
+ _ext_debug("Loading pre-computed information from INFO file")
370
+ # We need to properly convert stuff in ext_to_pkg
371
+ for k, v in ext_to_pkg.items():
372
+ v = [MFExtPackage(*d) for d in v]
373
+ ext_to_pkg[k] = v
374
+ return all_pkg, ext_to_pkg
375
+
376
+ # Late import to prevent some circular nastiness
377
+ if restrict_to_directories is None and EXTENSIONS_SEARCH_DIRS != [""]:
378
+ restrict_to_directories = EXTENSIONS_SEARCH_DIRS
321
379
 
322
380
  # Check if we even have extensions
323
381
  try:
@@ -331,6 +389,11 @@ def _get_extension_packages():
331
389
  raise
332
390
  return {}, {}
333
391
 
392
+ if restrict_to_directories:
393
+ restrict_to_directories = [
394
+ Path(p).resolve().as_posix() for p in restrict_to_directories
395
+ ]
396
+
334
397
  # There are two "types" of packages:
335
398
  # - those installed on the system (distributions)
336
399
  # - those present in the PYTHONPATH
@@ -343,8 +406,12 @@ def _get_extension_packages():
343
406
  # At this point, we look at all the paths and create a set. As we find distributions
344
407
  # that match it, we will remove from the set and then will be left with any
345
408
  # PYTHONPATH "packages"
346
- all_paths = set(extensions_module.__path__)
409
+ all_paths = set(Path(p).resolve().as_posix() for p in extensions_module.__path__)
347
410
  _ext_debug("Found packages present at %s" % str(all_paths))
411
+ if restrict_to_directories:
412
+ _ext_debug(
413
+ "Processed packages will be restricted to %s" % str(restrict_to_directories)
414
+ )
348
415
 
349
416
  list_ext_points = [x.split(".") for x in _extension_points]
350
417
  init_ext_points = [x[0] for x in list_ext_points]
@@ -391,9 +458,20 @@ def _get_extension_packages():
391
458
  # This is not 100% accurate because it is possible that at the same
392
459
  # location there is a package and a non-package, but this is extremely
393
460
  # unlikely so we are going to ignore this case.
394
- dist_root = dist.locate_file(EXT_PKG).as_posix()
461
+ dist_root = dist.locate_file(EXT_PKG).resolve().as_posix()
395
462
  all_paths.discard(dist_root)
396
463
  dist_name = dist.metadata["Name"]
464
+ dist_version = dist.metadata["Version"]
465
+ if restrict_to_directories:
466
+ parent_dirs = list(
467
+ p.as_posix() for p in Path(dist_root).resolve().parents
468
+ )
469
+ if all(p not in parent_dirs for p in restrict_to_directories):
470
+ _ext_debug(
471
+ "Ignoring package at %s as it is not in the considered directories"
472
+ % dist_root
473
+ )
474
+ continue
397
475
  if dist_name in mf_ext_packages:
398
476
  _ext_debug(
399
477
  "Ignoring duplicate package '%s' (duplicate paths in sys.path? (%s))"
@@ -537,6 +615,7 @@ def _get_extension_packages():
537
615
  "root_paths": [dist_root],
538
616
  "meta_module": meta_module,
539
617
  "files": files_to_include,
618
+ "version": dist_version,
540
619
  }
541
620
  # At this point, we have all the packages that contribute to EXT_PKG,
542
621
  # we now check to see if there is an order to respect based on dependencies. We will
@@ -605,6 +684,16 @@ def _get_extension_packages():
605
684
  if len(all_paths_list) > 0:
606
685
  _ext_debug("Non installed packages present at %s" % str(all_paths))
607
686
  for package_count, package_path in enumerate(all_paths_list):
687
+ if restrict_to_directories:
688
+ parent_dirs = list(
689
+ p.as_posix() for p in Path(package_path).resolve().parents
690
+ )
691
+ if all(p not in parent_dirs for p in restrict_to_directories):
692
+ _ext_debug(
693
+ "Ignoring non-installed package at %s as it is not in "
694
+ "the considered directories" % package_path
695
+ )
696
+ continue
608
697
  # We give an alternate name for the visible package name. It is
609
698
  # not exposed to the end user but used to refer to the package, and it
610
699
  # doesn't provide much additional information to have the full path
@@ -740,6 +829,7 @@ def _get_extension_packages():
740
829
  "root_paths": [package_path],
741
830
  "meta_module": meta_module,
742
831
  "files": files_to_include,
832
+ "version": "_local_",
743
833
  }
744
834
 
745
835
  # Sanity check that we only have one package per configuration file.
@@ -868,12 +958,13 @@ def _get_extension_config(distribution_name, tl_pkg, extension_point, config_mod
868
958
  _ext_debug("Package '%s' is rooted at %s" % (distribution_name, root_paths))
869
959
  _all_packages[distribution_name]["root_paths"] = root_paths
870
960
 
871
- return MFExtModule(tl_package=tl_pkg, module=extension_module)
961
+ return MFExtModule(
962
+ package_name=distribution_name, tl_package=tl_pkg, module=extension_module
963
+ )
872
964
  return None
873
965
 
874
966
 
875
- def _filter_files_package(package_name):
876
- pkg = _all_packages.get(package_name)
967
+ def _filter_files_package(pkg):
877
968
  if pkg and pkg["root_paths"] and pkg["meta_module"]:
878
969
  meta_module = _attempt_load_module(pkg["meta_module"])
879
970
  if meta_module:
@@ -902,8 +993,8 @@ def _filter_files_package(package_name):
902
993
  pkg["files"] = new_files
903
994
 
904
995
 
905
- def _filter_files_all():
906
- for p in _all_packages:
996
+ def _filter_files_all(all_packages):
997
+ for p in all_packages.values():
907
998
  _filter_files_package(p)
908
999
 
909
1000
 
metaflow/flowspec.py CHANGED
@@ -15,6 +15,9 @@ from .exception import (
15
15
  MissingInMergeArtifactsException,
16
16
  UnhandledInMergeArtifactsException,
17
17
  )
18
+
19
+ from .extension_support import extension_info
20
+
18
21
  from .graph import FlowGraph
19
22
  from .unbounded_foreach import UnboundedForeachInput
20
23
  from .util import to_pod
@@ -208,6 +211,7 @@ class FlowSpec(metaclass=_FlowSpecMeta):
208
211
  for deco in flow_decorators(self)
209
212
  if not deco.name.startswith("_")
210
213
  ],
214
+ "extensions": extension_info(),
211
215
  }
212
216
  self._graph_info = graph_info
213
217
 
metaflow/info_file.py ADDED
@@ -0,0 +1,25 @@
1
+ import json
2
+
3
+ from os import path
4
+
5
+ CURRENT_DIRECTORY = path.dirname(path.abspath(__file__))
6
+ INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO")
7
+
8
+ _info_file_content = None
9
+ _info_file_present = None
10
+
11
+
12
+ def read_info_file():
13
+ global _info_file_content
14
+ global _info_file_present
15
+ if _info_file_present is None:
16
+ _info_file_present = path.exists(INFO_FILE)
17
+ if _info_file_present:
18
+ try:
19
+ with open(INFO_FILE, "r", encoding="utf-8") as contents:
20
+ _info_file_content = json.load(contents)
21
+ except IOError:
22
+ pass
23
+ if _info_file_present:
24
+ return _info_file_content
25
+ return None
@@ -25,7 +25,6 @@ DATASTORE_LOCAL_DIR = ".metaflow"
25
25
  # Local configuration file (in .metaflow) containing overrides per-project
26
26
  LOCAL_CONFIG_FILE = "config.json"
27
27
 
28
-
29
28
  ###
30
29
  # Default configuration
31
30
  ###
@@ -4,7 +4,9 @@ from typing import Any, Optional, TYPE_CHECKING
4
4
 
5
5
  from metaflow.metaflow_config import TEMPDIR
6
6
 
7
- Parallel = namedtuple("Parallel", ["main_ip", "num_nodes", "node_index"])
7
+ Parallel = namedtuple(
8
+ "Parallel", ["main_ip", "num_nodes", "node_index", "control_task_id"]
9
+ )
8
10
 
9
11
  if TYPE_CHECKING:
10
12
  import metaflow
@@ -9,8 +9,6 @@ from metaflow.extension_support import dump_module_info
9
9
  from metaflow.mflog import BASH_MFLOG
10
10
  from . import R
11
11
 
12
- version_cache = None
13
-
14
12
 
15
13
  class InvalidEnvironmentException(MetaflowException):
16
14
  headline = "Incompatible environment"
@@ -180,10 +178,6 @@ class MetaflowEnvironment(object):
180
178
  return cmds
181
179
 
182
180
  def get_environment_info(self, include_ext_info=False):
183
- global version_cache
184
- if version_cache is None:
185
- version_cache = metaflow_version.get_version()
186
-
187
181
  # note that this dict goes into the code package
188
182
  # so variables here should be relatively stable (no
189
183
  # timestamps) so the hash won't change all the time
@@ -197,7 +191,7 @@ class MetaflowEnvironment(object):
197
191
  "use_r": R.use_r(),
198
192
  "python_version": sys.version,
199
193
  "python_version_code": "%d.%d.%d" % sys.version_info[:3],
200
- "metaflow_version": version_cache,
194
+ "metaflow_version": metaflow_version.get_version(),
201
195
  "script": os.path.basename(os.path.abspath(sys.argv[0])),
202
196
  }
203
197
  if R.use_r():