ob-metaflow 2.9.10.1__py2.py3-none-any.whl → 2.10.2.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (57) hide show
  1. metaflow/_vendor/packaging/__init__.py +15 -0
  2. metaflow/_vendor/packaging/_elffile.py +108 -0
  3. metaflow/_vendor/packaging/_manylinux.py +238 -0
  4. metaflow/_vendor/packaging/_musllinux.py +80 -0
  5. metaflow/_vendor/packaging/_parser.py +328 -0
  6. metaflow/_vendor/packaging/_structures.py +61 -0
  7. metaflow/_vendor/packaging/_tokenizer.py +188 -0
  8. metaflow/_vendor/packaging/markers.py +245 -0
  9. metaflow/_vendor/packaging/requirements.py +95 -0
  10. metaflow/_vendor/packaging/specifiers.py +1005 -0
  11. metaflow/_vendor/packaging/tags.py +546 -0
  12. metaflow/_vendor/packaging/utils.py +141 -0
  13. metaflow/_vendor/packaging/version.py +563 -0
  14. metaflow/_vendor/v3_7/__init__.py +1 -0
  15. metaflow/_vendor/v3_7/zipp.py +329 -0
  16. metaflow/metaflow_config.py +2 -1
  17. metaflow/metaflow_environment.py +3 -1
  18. metaflow/mflog/mflog.py +7 -1
  19. metaflow/multicore_utils.py +12 -2
  20. metaflow/plugins/__init__.py +8 -3
  21. metaflow/plugins/airflow/airflow.py +13 -0
  22. metaflow/plugins/argo/argo_client.py +16 -0
  23. metaflow/plugins/argo/argo_events.py +7 -1
  24. metaflow/plugins/argo/argo_workflows.py +62 -0
  25. metaflow/plugins/argo/argo_workflows_cli.py +15 -0
  26. metaflow/plugins/aws/batch/batch.py +10 -0
  27. metaflow/plugins/aws/batch/batch_cli.py +1 -2
  28. metaflow/plugins/aws/batch/batch_decorator.py +2 -9
  29. metaflow/plugins/datatools/s3/s3.py +4 -0
  30. metaflow/plugins/env_escape/client.py +24 -3
  31. metaflow/plugins/env_escape/stub.py +2 -8
  32. metaflow/plugins/kubernetes/kubernetes.py +13 -0
  33. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -2
  34. metaflow/plugins/kubernetes/kubernetes_decorator.py +9 -2
  35. metaflow/plugins/pypi/__init__.py +29 -0
  36. metaflow/plugins/pypi/bootstrap.py +131 -0
  37. metaflow/plugins/pypi/conda_decorator.py +335 -0
  38. metaflow/plugins/pypi/conda_environment.py +414 -0
  39. metaflow/plugins/pypi/micromamba.py +294 -0
  40. metaflow/plugins/pypi/pip.py +205 -0
  41. metaflow/plugins/pypi/pypi_decorator.py +130 -0
  42. metaflow/plugins/pypi/pypi_environment.py +7 -0
  43. metaflow/plugins/pypi/utils.py +75 -0
  44. metaflow/task.py +0 -3
  45. metaflow/vendor.py +1 -0
  46. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/METADATA +1 -1
  47. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/RECORD +51 -33
  48. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/WHEEL +1 -1
  49. metaflow/plugins/conda/__init__.py +0 -90
  50. metaflow/plugins/conda/batch_bootstrap.py +0 -104
  51. metaflow/plugins/conda/conda.py +0 -247
  52. metaflow/plugins/conda/conda_environment.py +0 -136
  53. metaflow/plugins/conda/conda_flow_decorator.py +0 -35
  54. metaflow/plugins/conda/conda_step_decorator.py +0 -416
  55. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/LICENSE +0 -0
  56. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/entry_points.txt +0 -0
  57. {ob_metaflow-2.9.10.1.dist-info → ob_metaflow-2.10.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,414 @@
1
+ import errno
2
+ import fcntl
3
+ import functools
4
+ import io
5
+ import json
6
+ import os
7
+ import sys
8
+ import time
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from hashlib import sha256
11
+ from io import BufferedIOBase
12
+ from itertools import chain
13
+ from urllib.parse import urlparse
14
+
15
+ import requests
16
+
17
+ from metaflow.metaflow_config import get_pinned_conda_libs
18
+ from metaflow.exception import MetaflowException
19
+ from metaflow.metaflow_environment import MetaflowEnvironment
20
+ from metaflow.metaflow_profile import profile
21
+
22
+ from . import MAGIC_FILE, _datastore_packageroot
23
+ from .utils import conda_platform
24
+
25
+
26
+ class CondaEnvironmentException(MetaflowException):
27
+ headline = "Ran into an error while setting up environment"
28
+
29
+ def __init__(self, msg):
30
+ super(CondaEnvironmentException, self).__init__(msg)
31
+
32
+
33
+ class CondaEnvironment(MetaflowEnvironment):
34
+ TYPE = "conda"
35
+
36
+ def __init__(self, flow):
37
+ self.flow = flow
38
+
39
+ def set_local_root(self, local_root):
40
+ # TODO: Make life simple by passing echo to the constructor and getting rid of
41
+ # this method's invocation in the decorator
42
+ self.local_root = local_root
43
+
44
+ def decospecs(self):
45
+ # Apply conda decorator to manage the task execution lifecycle.
46
+ return ("conda",) + super().decospecs()
47
+
48
+ def validate_environment(self, echo, datastore_type):
49
+ self.datastore_type = datastore_type
50
+ self.echo = echo
51
+
52
+ # Avoiding circular imports.
53
+ from metaflow.plugins import DATASTORES
54
+
55
+ self.datastore = [d for d in DATASTORES if d.TYPE == self.datastore_type][0]
56
+
57
+ # Initialize necessary virtual environments for all Metaflow tasks.
58
+ # Use Micromamba for solving conda packages and Pip for solving pypi packages.
59
+ from .micromamba import Micromamba
60
+ from .pip import Pip
61
+
62
+ micromamba = Micromamba()
63
+ self.solvers = {"conda": micromamba, "pypi": Pip(micromamba)}
64
+
65
+ def init_environment(self, echo):
66
+ # The implementation optimizes for latency to ensure as many operations can
67
+ # be turned into cheap no-ops as feasible. Otherwise, we focus on maintaining
68
+ # a balance between latency and maintainability of code without re-implementing
69
+ # the internals of Micromamba and Pip.
70
+
71
+ # TODO: Introduce verbose logging
72
+ # https://github.com/Netflix/metaflow/issues/1494
73
+
74
+ def environments(type_):
75
+ seen = set()
76
+ for step in self.flow:
77
+ environment = self.get_environment(step)
78
+ if type_ in environment and environment["id_"] not in seen:
79
+ seen.add(environment["id_"])
80
+ for platform in environment[type_]["platforms"]:
81
+ yield environment["id_"], {
82
+ **{
83
+ k: v
84
+ for k, v in environment[type_].items()
85
+ if k != "platforms"
86
+ },
87
+ **{"platform": platform},
88
+ }
89
+
90
+ def solve(id_, environment, type_):
91
+ # Cached solve - should be quick!
92
+ platform = environment["platform"]
93
+ return (
94
+ id_,
95
+ (
96
+ self.read_from_environment_manifest([id_, platform, type_])
97
+ or self.write_to_environment_manifest(
98
+ [id_, platform, type_],
99
+ self.solvers[type_].solve(id_, **environment),
100
+ )
101
+ ),
102
+ environment["python"],
103
+ platform,
104
+ )
105
+
106
+ def cache(storage, results, type_):
107
+ local_packages = {
108
+ url: {
109
+ # Path to package in datastore.
110
+ "path": urlparse(url).netloc + urlparse(url).path,
111
+ # Path to package on local disk.
112
+ "local_path": local_path,
113
+ }
114
+ for result in results
115
+ for url, local_path in self.solvers[type_].metadata(*result).items()
116
+ }
117
+ dirty = set()
118
+ # Prune list of packages to cache.
119
+ for id_, packages, _, _ in results:
120
+ for package in packages:
121
+ if package.get("path"):
122
+ # Cache only those packages that manifest is unaware of
123
+ local_packages.pop(package["url"], None)
124
+ else:
125
+ package["path"] = (
126
+ urlparse(package["url"]).netloc
127
+ + urlparse(package["url"]).path
128
+ )
129
+ dirty.add(id_)
130
+
131
+ list_of_path_and_filehandle = [
132
+ (
133
+ package["path"],
134
+ # Lazily fetch package from the interweb if needed.
135
+ LazyOpen(package["local_path"], "rb", url),
136
+ )
137
+ for url, package in local_packages.items()
138
+ ]
139
+ storage.save_bytes(
140
+ list_of_path_and_filehandle,
141
+ len_hint=len(list_of_path_and_filehandle),
142
+ )
143
+ for id_, packages, _, platform in results:
144
+ if id_ in dirty:
145
+ self.write_to_environment_manifest([id_, platform, type_], packages)
146
+
147
+ # First resolve environments through Conda, before PyPI.
148
+ echo("Bootstrapping virtual environment(s) ...")
149
+ for solver in ["conda", "pypi"]:
150
+ with ThreadPoolExecutor() as executor:
151
+ results = list(
152
+ executor.map(lambda x: solve(*x, solver), environments(solver))
153
+ )
154
+ _ = list(map(lambda x: self.solvers[solver].download(*x), results))
155
+ with ThreadPoolExecutor() as executor:
156
+ _ = list(
157
+ executor.map(lambda x: self.solvers[solver].create(*x), results)
158
+ )
159
+ if self.datastore_type not in ["local"]:
160
+ # Cache packages only when a remote datastore is in play.
161
+ storage = self.datastore(
162
+ _datastore_packageroot(self.datastore, self.echo)
163
+ )
164
+ cache(storage, results, solver)
165
+ echo("Virtual environment(s) bootstrapped!")
166
+
167
+ def executable(self, step_name, default=None):
168
+ step = next(step for step in self.flow if step.name == step_name)
169
+ id_ = self.get_environment(step).get("id_")
170
+ if id_:
171
+ # bootstrap.py is responsible for ensuring the validity of this executable.
172
+ # -s is important! Can otherwise leak packages to other environments.
173
+ return os.path.join(id_, "bin/python -s")
174
+ else:
175
+ # for @conda/@pypi(disabled=True).
176
+ return super().executable(step_name, default)
177
+
178
+ def interpreter(self, step_name):
179
+ step = next(step for step in self.flow if step.name == step_name)
180
+ id_ = self.get_environment(step)["id_"]
181
+ # User workloads are executed through the conda environment's interpreter.
182
+ return self.solvers["conda"].interpreter(id_)
183
+
184
+ @functools.lru_cache(maxsize=None)
185
+ def get_environment(self, step):
186
+ environment = {}
187
+ for decorator in step.decorators:
188
+ # @conda decorator is guaranteed to exist thanks to self.decospecs
189
+ if decorator.name in ["conda", "pypi"]:
190
+ # handle @conda/@pypi(disabled=True)
191
+ disabled = decorator.attributes["disabled"]
192
+ if not disabled or str(disabled).lower() == "false":
193
+ environment[decorator.name] = {
194
+ k: decorator.attributes[k]
195
+ for k in decorator.attributes
196
+ if k != "disabled"
197
+ }
198
+ else:
199
+ return {}
200
+ # Resolve conda environment for @pypi's Python, falling back on @conda's
201
+ # Python
202
+ env_python = (
203
+ environment.get("pypi", environment["conda"]).get("python")
204
+ or environment["conda"]["python"]
205
+ )
206
+ # TODO: Support dependencies for `--metadata`.
207
+ # TODO: Introduce support for `--telemetry` as a follow up.
208
+ # Certain packages are required for metaflow runtime to function correctly.
209
+ # Ensure these packages are available both in Conda channels and PyPI
210
+ # repostories.
211
+ pinned_packages = get_pinned_conda_libs(env_python, self.datastore_type)
212
+
213
+ # PyPI dependencies are prioritized over Conda dependencies.
214
+ environment.get("pypi", environment["conda"])["packages"] = {
215
+ **pinned_packages,
216
+ **environment.get("pypi", environment["conda"])["packages"],
217
+ }
218
+ # Disallow specifying both @conda and @pypi together for now. Mixing Conda
219
+ # and PyPI packages comes with a lot of operational pain that we can handle
220
+ # as follow-up work in the future.
221
+ if all(
222
+ map(lambda key: environment.get(key, {}).get("packages"), ["pypi", "conda"])
223
+ ):
224
+ msg = "Mixing and matching PyPI packages and Conda packages within a\n"
225
+ msg += "step is not yet supported. Use one of @pypi or @conda only."
226
+ raise CondaEnvironmentException(msg)
227
+
228
+ # To support cross-platform environments, these invariants are maintained
229
+ # 1. Conda packages are resolved for target platforms
230
+ # 2. Conda packages are resolved for local platform only for PyPI packages
231
+ # 3. Conda environments are created only for local platform
232
+ # 4. PyPI packages are resolved for target platform within Conda environments
233
+ # created for local platform
234
+ # 5. All resolved packages (Conda or PyPI) are cached
235
+ # 6. PyPI packages are only installed for local platform
236
+
237
+ # Resolve `linux-64` Conda environments if @batch or @kubernetes are in play
238
+ target_platform = conda_platform()
239
+ for decorator in step.decorators:
240
+ if decorator.name in ["batch", "kubernetes"]:
241
+ # TODO: Support arm architectures
242
+ target_platform = "linux-64"
243
+ break
244
+
245
+ environment["conda"]["platforms"] = [target_platform]
246
+ if "pypi" in environment:
247
+ # For PyPI packages, resolve conda environment for local platform in
248
+ # addition to target platform
249
+ environment["conda"]["platforms"] = list(
250
+ {target_platform, conda_platform()}
251
+ )
252
+ environment["pypi"]["platforms"] = [target_platform]
253
+ # Match PyPI and Conda python versions with the resolved environment Python.
254
+ environment["pypi"]["python"] = environment["conda"]["python"] = env_python
255
+
256
+ # Z combinator for a recursive lambda
257
+ deep_sort = (lambda f: f(f))(
258
+ lambda f: lambda obj: (
259
+ {k: f(f)(v) for k, v in sorted(obj.items())}
260
+ if isinstance(obj, dict)
261
+ else sorted([f(f)(e) for e in obj])
262
+ if isinstance(obj, list)
263
+ else obj
264
+ )
265
+ )
266
+
267
+ return {
268
+ **environment,
269
+ # Create a stable unique id for the environment.
270
+ # Add packageroot to the id so that packageroot modifications can
271
+ # invalidate existing environments.
272
+ "id_": sha256(
273
+ json.dumps(
274
+ deep_sort(
275
+ {
276
+ **environment,
277
+ **{
278
+ "package_root": _datastore_packageroot(
279
+ self.datastore, self.echo
280
+ )
281
+ },
282
+ }
283
+ )
284
+ ).encode()
285
+ ).hexdigest()[:15],
286
+ }
287
+
288
+ def pylint_config(self):
289
+ config = super().pylint_config()
290
+ # Disable (import-error) in pylint
291
+ config.append("--disable=F0401")
292
+ return config
293
+
294
+ @classmethod
295
+ def get_client_info(cls, flow_name, metadata):
296
+ # TODO: Decide this method's fate
297
+ return None
298
+
299
+ def add_to_package(self):
300
+ # Add manifest file to job package at the top level.
301
+ files = []
302
+ manifest = self.get_environment_manifest_path()
303
+ if os.path.exists(manifest):
304
+ files.append((manifest, os.path.basename(manifest)))
305
+ return files
306
+
307
+ def bootstrap_commands(self, step_name, datastore_type):
308
+ # Bootstrap conda and execution environment for step
309
+ step = next(step for step in self.flow if step.name == step_name)
310
+ id_ = self.get_environment(step).get("id_")
311
+ if id_:
312
+ return [
313
+ "echo 'Bootstrapping virtual environment...'",
314
+ 'python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
315
+ % (self.flow.name, id_, self.datastore_type),
316
+ "echo 'Environment bootstrapped.'",
317
+ ]
318
+ else:
319
+ # for @conda/@pypi(disabled=True).
320
+ return super().bootstrap_commands(step_name, datastore_type)
321
+
322
+ # TODO: Make this an instance variable once local_root is part of the object
323
+ # constructor.
324
+ def get_environment_manifest_path(self):
325
+ return os.path.join(self.local_root, self.flow.name, MAGIC_FILE)
326
+
327
+ def read_from_environment_manifest(self, keys):
328
+ path = self.get_environment_manifest_path()
329
+ if os.path.exists(path) and os.path.getsize(path) > 0:
330
+ with open(path) as f:
331
+ data = json.load(f)
332
+ for key in keys:
333
+ try:
334
+ data = data[key]
335
+ except KeyError:
336
+ return None
337
+ return data
338
+
339
+ def write_to_environment_manifest(self, keys, value):
340
+ path = self.get_environment_manifest_path()
341
+ try:
342
+ os.makedirs(os.path.dirname(path))
343
+ except OSError as x:
344
+ if x.errno != errno.EEXIST:
345
+ raise
346
+ with os.fdopen(os.open(path, os.O_RDWR | os.O_CREAT), "r+") as f:
347
+ try:
348
+ fcntl.flock(f, fcntl.LOCK_EX)
349
+ d = {}
350
+ if os.path.getsize(path) > 0:
351
+ f.seek(0)
352
+ d = json.load(f)
353
+ data = d
354
+ for key in keys[:-1]:
355
+ data = data.setdefault(key, {})
356
+ data[keys[-1]] = value
357
+ f.seek(0)
358
+ json.dump(d, f)
359
+ f.truncate()
360
+ return value
361
+ except IOError as e:
362
+ if e.errno != errno.EAGAIN:
363
+ raise
364
+ finally:
365
+ fcntl.flock(f, fcntl.LOCK_UN)
366
+
367
+
368
+ class LazyOpen(BufferedIOBase):
369
+ def __init__(self, filename, mode="rb", url=None):
370
+ super().__init__()
371
+ self.filename = filename
372
+ self.mode = mode
373
+ self.url = url
374
+ self._file = None
375
+ self._buffer = None
376
+ self._position = 0
377
+
378
+ def _ensure_file(self):
379
+ if not self._file:
380
+ if self.filename and os.path.exists(self.filename):
381
+ self._file = open(self.filename, self.mode)
382
+ elif self.url:
383
+ self._buffer = self._download_to_buffer()
384
+ self._file = io.BytesIO(self._buffer)
385
+ else:
386
+ raise ValueError("Both filename and url are missing")
387
+
388
+ def _download_to_buffer(self):
389
+ # TODO: Stream it in chunks?
390
+ response = requests.get(self.url, stream=True)
391
+ response.raise_for_status()
392
+ return response.content
393
+
394
+ def readable(self):
395
+ return "r" in self.mode
396
+
397
+ def seekable(self):
398
+ return True
399
+
400
+ def read(self, size=-1):
401
+ self._ensure_file()
402
+ return self._file.read(size)
403
+
404
+ def seek(self, offset, whence=io.SEEK_SET):
405
+ self._ensure_file()
406
+ return self._file.seek(offset, whence)
407
+
408
+ def tell(self):
409
+ self._ensure_file()
410
+ return self._file.tell()
411
+
412
+ def close(self):
413
+ if self._file:
414
+ self._file.close()