ob-metaflow 2.9.10.1__py2.py3-none-any.whl → 2.10.2.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (57) hide show
  1. metaflow/_vendor/packaging/__init__.py +15 -0
  2. metaflow/_vendor/packaging/_elffile.py +108 -0
  3. metaflow/_vendor/packaging/_manylinux.py +238 -0
  4. metaflow/_vendor/packaging/_musllinux.py +80 -0
  5. metaflow/_vendor/packaging/_parser.py +328 -0
  6. metaflow/_vendor/packaging/_structures.py +61 -0
  7. metaflow/_vendor/packaging/_tokenizer.py +188 -0
  8. metaflow/_vendor/packaging/markers.py +245 -0
  9. metaflow/_vendor/packaging/requirements.py +95 -0
  10. metaflow/_vendor/packaging/specifiers.py +1005 -0
  11. metaflow/_vendor/packaging/tags.py +546 -0
  12. metaflow/_vendor/packaging/utils.py +141 -0
  13. metaflow/_vendor/packaging/version.py +563 -0
  14. metaflow/_vendor/v3_7/__init__.py +1 -0
  15. metaflow/_vendor/v3_7/zipp.py +329 -0
  16. metaflow/metaflow_config.py +2 -1
  17. metaflow/metaflow_environment.py +3 -1
  18. metaflow/mflog/mflog.py +7 -1
  19. metaflow/multicore_utils.py +12 -2
  20. metaflow/plugins/__init__.py +8 -3
  21. metaflow/plugins/airflow/airflow.py +13 -0
  22. metaflow/plugins/argo/argo_client.py +16 -0
  23. metaflow/plugins/argo/argo_events.py +7 -1
  24. metaflow/plugins/argo/argo_workflows.py +62 -0
  25. metaflow/plugins/argo/argo_workflows_cli.py +15 -0
  26. metaflow/plugins/aws/batch/batch.py +10 -0
  27. metaflow/plugins/aws/batch/batch_cli.py +1 -2
  28. metaflow/plugins/aws/batch/batch_decorator.py +2 -9
  29. metaflow/plugins/datatools/s3/s3.py +4 -0
  30. metaflow/plugins/env_escape/client.py +24 -3
  31. metaflow/plugins/env_escape/stub.py +2 -8
  32. metaflow/plugins/kubernetes/kubernetes.py +13 -0
  33. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -2
  34. metaflow/plugins/kubernetes/kubernetes_decorator.py +9 -2
  35. metaflow/plugins/pypi/__init__.py +29 -0
  36. metaflow/plugins/pypi/bootstrap.py +131 -0
  37. metaflow/plugins/pypi/conda_decorator.py +335 -0
  38. metaflow/plugins/pypi/conda_environment.py +414 -0
  39. metaflow/plugins/pypi/micromamba.py +294 -0
  40. metaflow/plugins/pypi/pip.py +205 -0
  41. metaflow/plugins/pypi/pypi_decorator.py +130 -0
  42. metaflow/plugins/pypi/pypi_environment.py +7 -0
  43. metaflow/plugins/pypi/utils.py +75 -0
  44. metaflow/task.py +0 -3
  45. metaflow/vendor.py +1 -0
  46. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/METADATA +1 -1
  47. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/RECORD +51 -33
  48. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/WHEEL +1 -1
  49. metaflow/plugins/conda/__init__.py +0 -90
  50. metaflow/plugins/conda/batch_bootstrap.py +0 -104
  51. metaflow/plugins/conda/conda.py +0 -247
  52. metaflow/plugins/conda/conda_environment.py +0 -136
  53. metaflow/plugins/conda/conda_flow_decorator.py +0 -35
  54. metaflow/plugins/conda/conda_step_decorator.py +0 -416
  55. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/LICENSE +0 -0
  56. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/entry_points.txt +0 -0
  57. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/top_level.txt +0 -0
@@ -91,8 +91,8 @@ class BatchDecorator(StepDecorator):
91
91
  "gpu": None,
92
92
  "memory": None,
93
93
  "image": None,
94
- "queue": BATCH_JOB_QUEUE, # Will be replaced with an available queue if not provided.
95
- "iam_role": ECS_S3_ACCESS_IAM_ROLE, # Required
94
+ "queue": BATCH_JOB_QUEUE,
95
+ "iam_role": ECS_S3_ACCESS_IAM_ROLE,
96
96
  "execution_role": ECS_FARGATE_EXECUTION_ROLE,
97
97
  "shared_memory": None,
98
98
  "max_swap": None,
@@ -149,13 +149,6 @@ class BatchDecorator(StepDecorator):
149
149
  if flow_datastore.TYPE != "s3":
150
150
  raise BatchException("The *@batch* decorator requires --datastore=s3.")
151
151
 
152
- # Require iam_role
153
- if self.attributes["iam_role"] is None:
154
- # TODO: Unify messaging on various configuration options.
155
- raise BatchException(
156
- "The *@batch* decorator requires an IAM Role that allows AWS Batch job to communicate with Amazon S3 datastore.\n"
157
- 'You can specify it either with @batch(iam_role="role-name") or by setting METAFLOW_ECS_S3_ACCESS_IAM_ROLE in your configuration.'
158
- )
159
152
  # Set internal state.
160
153
  self.logger = logger
161
154
  self.environment = environment
@@ -513,6 +513,10 @@ class S3(object):
513
513
 
514
514
  if run:
515
515
  # 1. use a (current) run ID with optional customizations
516
+ if DATATOOLS_S3ROOT is None:
517
+ raise MetaflowS3URLException(
518
+ "DATATOOLS_S3ROOT is not configured when trying to use S3 storage"
519
+ )
516
520
  parsed = urlparse(DATATOOLS_S3ROOT)
517
521
  if not bucket:
518
522
  bucket = parsed.netloc
@@ -1,4 +1,5 @@
1
1
  import fcntl
2
+ import gc
2
3
  import os
3
4
  import importlib
4
5
  import itertools
@@ -60,6 +61,7 @@ class Client(object):
60
61
  # have an exception
61
62
  self._poller = None
62
63
  self._poller_lock = threading.Lock()
64
+ self._active_pid = os.getpid()
63
65
  self._server_process = None
64
66
  self._socket_path = None
65
67
 
@@ -70,11 +72,20 @@ class Client(object):
70
72
  # The client launches the server when created; we use
71
73
  # Unix sockets for now
72
74
  server_module = ".".join([__package__, "server"])
73
- self._socket_path = "/tmp/%s_%d" % (server_config, os.getpid())
75
+ self._socket_path = "/tmp/%s_%d" % (server_config, self._active_pid)
74
76
  if os.path.exists(self._socket_path):
75
77
  raise RuntimeError("Existing socket: %s" % self._socket_path)
76
78
  env = os.environ.copy()
77
79
  env["PYTHONPATH"] = pythonpath
80
+ # When coming from a conda environment, LD_LIBRARY_PATH may be set to
81
+ # first include the Conda environment's library. When breaking out to
82
+ # the underlying python, we need to reset it to the original LD_LIBRARY_PATH
83
+ ld_lib_path = env.get("LD_LIBRARY_PATH")
84
+ orig_ld_lib_path = env.get("MF_ORIG_LD_LIBRARY_PATH")
85
+ if ld_lib_path is not None and orig_ld_lib_path is not None:
86
+ env["LD_LIBRARY_PATH"] = orig_ld_lib_path
87
+ if orig_ld_lib_path is not None:
88
+ del env["MF_ORIG_LD_LIBRARY_PATH"]
78
89
  self._server_process = Popen(
79
90
  [
80
91
  python_executable,
@@ -381,8 +392,18 @@ class Client(object):
381
392
  return name
382
393
 
383
394
  def _communicate(self, msg):
384
- with self._poller_lock:
385
- return self._locked_communicate(msg)
395
+ if os.getpid() != self._active_pid:
396
+ raise RuntimeError(
397
+ "You cannot use the environment escape across process boundaries."
398
+ )
399
+ # We also disable the GC because in some rare cases, it may try to delete
400
+ # a remote object while we are communicating which will cause a deadlock
401
+ try:
402
+ gc.disable()
403
+ with self._poller_lock:
404
+ return self._locked_communicate(msg)
405
+ finally:
406
+ gc.enable()
386
407
 
387
408
  def _locked_communicate(self, msg):
388
409
  self._channel.send(msg)
@@ -26,7 +26,6 @@ LOCAL_ATTRS = (
26
26
  "___remote_class_name___",
27
27
  "___identifier___",
28
28
  "___connection___",
29
- "___refcount___",
30
29
  "___local_overrides___" "__class__",
31
30
  "__init__",
32
31
  "__del__",
@@ -78,6 +77,7 @@ class StubMetaClass(type):
78
77
 
79
78
  def with_metaclass(meta, *bases):
80
79
  """Create a base class with a metaclass."""
80
+
81
81
  # Compatibility 2/3. Remove when only 3 support
82
82
  class metaclass(type):
83
83
  def __new__(cls, name, this_bases, d):
@@ -99,7 +99,6 @@ class Stub(with_metaclass(StubMetaClass, object)):
99
99
  "___identifier___",
100
100
  "___connection___",
101
101
  "__weakref__",
102
- "___refcount___",
103
102
  ]
104
103
 
105
104
  # def __iter__(self): # FIXME: Keep debugger QUIET!!
@@ -109,14 +108,10 @@ class Stub(with_metaclass(StubMetaClass, object)):
109
108
  self.___remote_class_name___ = remote_class_name
110
109
  self.___identifier___ = identifier
111
110
  self.___connection___ = connection
112
- self.___refcount___ = 1
113
111
 
114
112
  def __del__(self):
115
113
  try:
116
- pass
117
- self.___refcount___ -= 1
118
- if self.___refcount___ == 0:
119
- fwd_request(self, OP_DEL)
114
+ fwd_request(self, OP_DEL)
120
115
  except Exception:
121
116
  # raised in a destructor, most likely on program termination,
122
117
  # when the connection might have already been closed.
@@ -262,7 +257,6 @@ def create_class(
262
257
  setattr_overrides,
263
258
  class_methods,
264
259
  ):
265
-
266
260
  class_dict = {"__slots__": ()}
267
261
  for name, doc in class_methods.items():
268
262
  method_type = NORMAL_METHOD
@@ -15,6 +15,7 @@ from metaflow.metaflow_config import (
15
15
  ARGO_EVENTS_SERVICE_ACCOUNT,
16
16
  ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
17
17
  AWS_SECRETS_MANAGER_DEFAULT_REGION,
18
+ ARGO_EVENTS_WEBHOOK_AUTH,
18
19
  AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
19
20
  CARD_AZUREROOT,
20
21
  CARD_GSROOT,
@@ -35,6 +36,8 @@ from metaflow.metaflow_config import (
35
36
  S3_SERVER_SIDE_ENCRYPTION,
36
37
  OTEL_ENDPOINT,
37
38
  )
39
+ from metaflow.metaflow_config_funcs import config_values
40
+
38
41
  from metaflow.mflog import (
39
42
  BASH_SAVE_LOGS,
40
43
  bash_capture_logs,
@@ -261,6 +264,12 @@ class Kubernetes(object):
261
264
  # see get_datastore_root_from_config in datastore/local.py).
262
265
  )
263
266
 
267
+ # Temporary passing of *some* environment variables. Do not rely on this
268
+ # mechanism as it will be removed in the near future
269
+ for k, v in config_values():
270
+ if k.startswith("METAFLOW_CONDA_") or k.startswith("METAFLOW_DEBUG_"):
271
+ job.environment_variable(k, v)
272
+
264
273
  if S3_SERVER_SIDE_ENCRYPTION is not None:
265
274
  job.environment_variable(
266
275
  "METAFLOW_S3_SERVER_SIDE_ENCRYPTION", S3_SERVER_SIDE_ENCRYPTION
@@ -280,6 +289,10 @@ class Kubernetes(object):
280
289
  job.environment_variable(
281
290
  "METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT", ARGO_EVENTS_SERVICE_ACCOUNT
282
291
  )
292
+ job.environment_variable(
293
+ "METAFLOW_ARGO_EVENTS_WEBHOOK_AUTH",
294
+ ARGO_EVENTS_WEBHOOK_AUTH,
295
+ )
283
296
 
284
297
  tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
285
298
  if tmpfs_enabled and tmpfs_tempdir:
@@ -143,8 +143,7 @@ def step(
143
143
  node = ctx.obj.graph[step_name]
144
144
 
145
145
  # Construct entrypoint CLI
146
- if executable is None:
147
- executable = ctx.obj.environment.executable(step_name)
146
+ executable = ctx.obj.environment.executable(step_name, executable)
148
147
 
149
148
  # Set environment
150
149
  env = {}
@@ -58,14 +58,21 @@ class KubernetesDecorator(StepDecorator):
58
58
  Docker image to use when launching on Kubernetes. If not specified, and
59
59
  METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
60
60
  not, a default Docker image mapping to the current version of Python is used.
61
+ image_pull_policy: str, default: KUBERNETES_IMAGE_PULL_POLICY
62
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
61
63
  service_account : str, default: METAFLOW_KUBERNETES_SERVICE_ACCOUNT
62
64
  Kubernetes service account to use when launching pod in Kubernetes.
63
- namespace : str, default: METAFLOW_KUBERNETES_NAMESPACE
64
- Kubernetes namespace to use when launching pod in Kubernetes.
65
65
  secrets : List[str], optional
66
66
  Kubernetes secrets to use when launching pod in Kubernetes. These
67
67
  secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
68
68
  in Metaflow configuration.
69
+ namespace : str, default: METAFLOW_KUBERNETES_NAMESPACE
70
+ Kubernetes namespace to use when launching pod in Kubernetes.
71
+ gpu: int, optional
72
+ Number of GPUs required for this step. A value of zero implies that
73
+ the scheduled node should not have GPUs.
74
+ gpu_vendor: str, default: KUBERNETES_GPU_VENDOR
75
+ The vendor of the GPUs to be used for this step.
69
76
  tolerations : List[str], default: METAFLOW_KUBERNETES_TOLERATIONS
70
77
  Kubernetes tolerations to use when launching pod in Kubernetes.
71
78
  use_tmpfs: bool, default: False
@@ -0,0 +1,29 @@
1
+ from metaflow import metaflow_config
2
+ from metaflow.exception import MetaflowException
3
+
4
+ MAGIC_FILE = "conda.manifest"
5
+
6
+
7
+ # TODO: This can be lifted all the way into metaflow config
8
+ def _datastore_packageroot(datastore, echo):
9
+ datastore_type = datastore.TYPE
10
+ datastore_packageroot = getattr(
11
+ metaflow_config,
12
+ "CONDA_PACKAGE_{datastore_type}ROOT".format(
13
+ datastore_type=datastore_type.upper()
14
+ ),
15
+ None,
16
+ )
17
+ if datastore_packageroot is None:
18
+ datastore_sysroot = datastore.get_datastore_root_from_config(echo)
19
+ if datastore_sysroot is None:
20
+ # TODO: Throw a more evocative error message
21
+ raise MetaflowException(
22
+ msg="METAFLOW_DATASTORE_SYSROOT_{datastore_type} must be set!".format(
23
+ datastore_type=datastore_type.upper()
24
+ )
25
+ )
26
+ datastore_packageroot = "{datastore_sysroot}/conda".format(
27
+ datastore_sysroot=datastore_sysroot
28
+ )
29
+ return datastore_packageroot
@@ -0,0 +1,131 @@
1
+ import bz2
2
+ import io
3
+ import json
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import tarfile
9
+
10
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
11
+ from metaflow.plugins import DATASTORES
12
+ from metaflow.util import which
13
+
14
+ from . import MAGIC_FILE, _datastore_packageroot
15
+
16
+ # Bootstraps a valid conda virtual environment composed of conda and pypi packages
17
+
18
+ if __name__ == "__main__":
19
+ if len(sys.argv) != 5:
20
+ print("Usage: bootstrap.py <flow_name> <id> <datastore_type> <architecture>")
21
+ sys.exit(1)
22
+ _, flow_name, id_, datastore_type, architecture = sys.argv
23
+
24
+ # TODO: Detect architecture on the fly when dealing with arm architectures.
25
+ # ARCH=$(uname -m)
26
+ # OS=$(uname)
27
+
28
+ # if [[ "$OS" == "Linux" ]]; then
29
+ # PLATFORM="linux"
30
+ # if [[ "$ARCH" == "aarch64" ]]; then
31
+ # ARCH="aarch64";
32
+ # elif [[ $ARCH == "ppc64le" ]]; then
33
+ # ARCH="ppc64le";
34
+ # else
35
+ # ARCH="64";
36
+ # fi
37
+ # fi
38
+
39
+ # if [[ "$OS" == "Darwin" ]]; then
40
+ # PLATFORM="osx";
41
+ # if [[ "$ARCH" == "arm64" ]]; then
42
+ # ARCH="arm64";
43
+ # else
44
+ # ARCH="64"
45
+ # fi
46
+ # fi
47
+
48
+ prefix = os.path.join(os.getcwd(), id_)
49
+ pkgs_dir = os.path.join(os.getcwd(), ".pkgs")
50
+ manifest_dir = os.path.join(os.getcwd(), DATASTORE_LOCAL_DIR, flow_name)
51
+
52
+ datastores = [d for d in DATASTORES if d.TYPE == datastore_type]
53
+ if not datastores:
54
+ print(f"No datastore found for type: {datastore_type}")
55
+ sys.exit(1)
56
+
57
+ storage = datastores[0](
58
+ _datastore_packageroot(datastores[0], lambda *args, **kwargs: None)
59
+ )
60
+
61
+ # Move MAGIC_FILE inside local datastore.
62
+ os.makedirs(manifest_dir, exist_ok=True)
63
+ shutil.move(
64
+ os.path.join(os.getcwd(), MAGIC_FILE),
65
+ os.path.join(manifest_dir, MAGIC_FILE),
66
+ )
67
+
68
+ with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
69
+ env = json.load(f)[id_][architecture]
70
+
71
+ # Download Conda packages.
72
+ conda_pkgs_dir = os.path.join(pkgs_dir, "conda")
73
+ with storage.load_bytes([package["path"] for package in env["conda"]]) as results:
74
+ for key, tmpfile, _ in results:
75
+ # Ensure that conda packages go into architecture specific folders.
76
+ # The path looks like REPO/CHANNEL/CONDA_SUBDIR/PACKAGE. We trick
77
+ # Micromamba into believing that all packages are coming from a local
78
+ # channel - the only hurdle is ensuring that packages are organised
79
+ # properly.
80
+
81
+ # TODO: consider RAM disk
82
+ dest = os.path.join(conda_pkgs_dir, "/".join(key.split("/")[-2:]))
83
+ os.makedirs(os.path.dirname(dest), exist_ok=True)
84
+ shutil.move(tmpfile, dest)
85
+
86
+ # Create Conda environment.
87
+ cmds = [
88
+ # TODO: check if mamba or conda are already available on the image
89
+ # TODO: search for micromamba everywhere
90
+ f"""if ! command -v ./micromamba >/dev/null 2>&1; then
91
+ wget -qO- https://micro.mamba.pm/api/micromamba/{architecture}/latest | python -c "import sys, bz2; sys.stdout.buffer.write(bz2.decompress(sys.stdin.buffer.read()))" | tar -xv bin/micromamba --strip-components=1 ;
92
+ if ! command -v ./micromamba >/dev/null 2>&1; then
93
+ echo "Failed to install Micromamba!";
94
+ exit 1;
95
+ fi;
96
+ fi""",
97
+ # Create a conda environment through Micromamba.
98
+ f'''tmpfile=$(mktemp);
99
+ echo "@EXPLICIT" > "$tmpfile";
100
+ ls -d {conda_pkgs_dir}/*/* >> "$tmpfile";
101
+ ./micromamba create --yes --offline --no-deps --safety-checks=disabled --no-extra-safety-checks --prefix {prefix} --file "$tmpfile";
102
+ rm "$tmpfile"''',
103
+ ]
104
+
105
+ # Download PyPI packages.
106
+ if "pypi" in env:
107
+ pypi_pkgs_dir = os.path.join(pkgs_dir, "pypi")
108
+ with storage.load_bytes(
109
+ [package["path"] for package in env["pypi"]]
110
+ ) as results:
111
+ for key, tmpfile, _ in results:
112
+ dest = os.path.join(pypi_pkgs_dir, os.path.basename(key))
113
+ os.makedirs(os.path.dirname(dest), exist_ok=True)
114
+ shutil.move(tmpfile, dest)
115
+
116
+ # Install PyPI packages.
117
+ cmds.extend(
118
+ [
119
+ f"""./micromamba run --prefix {prefix} pip --disable-pip-version-check install --root-user-action=ignore --no-compile {pypi_pkgs_dir}/*.whl"""
120
+ ]
121
+ )
122
+
123
+ for cmd in cmds:
124
+ result = subprocess.run(
125
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
126
+ )
127
+ if result.returncode != 0:
128
+ print(f"Bootstrap failed while executing: {cmd}")
129
+ print("Stdout:", result.stdout.decode())
130
+ print("Stderr:", result.stderr.decode())
131
+ sys.exit(1)