metaflow 2.8.1__py2.py3-none-any.whl → 2.8.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. metaflow/client/core.py +14 -4
  2. metaflow/cmd/configure_cmd.py +3 -3
  3. metaflow/cmd/main_cli.py +9 -14
  4. metaflow/current.py +15 -0
  5. metaflow/datastore/datastore_set.py +7 -7
  6. metaflow/datastore/flow_datastore.py +1 -2
  7. metaflow/extension_support/__init__.py +1 -0
  8. metaflow/extension_support/integrations.py +141 -0
  9. metaflow/integrations.py +29 -0
  10. metaflow/metaflow_config.py +21 -0
  11. metaflow/metaflow_environment.py +5 -4
  12. metaflow/package.py +1 -1
  13. metaflow/plugins/airflow/airflow.py +0 -1
  14. metaflow/plugins/argo/argo_workflows.py +2 -0
  15. metaflow/plugins/argo/argo_workflows_cli.py +11 -1
  16. metaflow/plugins/aws/aws_utils.py +6 -1
  17. metaflow/plugins/aws/batch/batch.py +30 -8
  18. metaflow/plugins/aws/batch/batch_cli.py +12 -0
  19. metaflow/plugins/aws/batch/batch_client.py +39 -2
  20. metaflow/plugins/aws/batch/batch_decorator.py +23 -0
  21. metaflow/plugins/aws/step_functions/step_functions.py +7 -4
  22. metaflow/plugins/aws/step_functions/step_functions_cli.py +11 -1
  23. metaflow/plugins/cards/card_modules/bundle.css +56 -56
  24. metaflow/plugins/cards/card_modules/convert_to_native_type.py +67 -5
  25. metaflow/plugins/cards/card_modules/main.js +14 -7
  26. metaflow/plugins/conda/conda_environment.py +2 -2
  27. metaflow/plugins/conda/conda_step_decorator.py +7 -1
  28. metaflow/plugins/datatools/s3/s3.py +2 -2
  29. metaflow/plugins/env_escape/communication/channel.py +1 -1
  30. metaflow/plugins/kubernetes/kubernetes.py +4 -0
  31. metaflow/plugins/kubernetes/kubernetes_decorator.py +6 -2
  32. metaflow/plugins/kubernetes/kubernetes_job.py +17 -2
  33. metaflow/plugins/metadata/service.py +3 -2
  34. metaflow/runtime.py +5 -3
  35. metaflow/tutorials/02-statistics/README.md +4 -9
  36. metaflow/tutorials/02-statistics/stats.py +38 -11
  37. metaflow/tutorials/03-playlist-redux/playlist.py +24 -16
  38. metaflow/tutorials/04-playlist-plus/playlist.py +14 -23
  39. metaflow/tutorials/05-hello-cloud/README.md +45 -0
  40. metaflow/tutorials/{05-helloaws/helloaws.ipynb → 05-hello-cloud/hello-cloud.ipynb} +10 -5
  41. metaflow/tutorials/{05-helloaws/helloaws.py → 05-hello-cloud/hello-cloud.py} +11 -13
  42. metaflow/tutorials/06-statistics-redux/README.md +6 -29
  43. metaflow/tutorials/06-statistics-redux/stats.ipynb +2 -2
  44. metaflow/tutorials/07-worldview/README.md +3 -11
  45. metaflow/tutorials/07-worldview/worldview.ipynb +3 -3
  46. metaflow/tutorials/08-autopilot/README.md +10 -17
  47. metaflow/tutorials/08-autopilot/autopilot.ipynb +12 -7
  48. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/METADATA +1 -6
  49. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/RECORD +53 -51
  50. metaflow/tutorials/05-helloaws/README.md +0 -27
  51. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/LICENSE +0 -0
  52. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/WHEEL +0 -0
  53. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/entry_points.txt +0 -0
  54. {metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/top_level.txt +0 -0
metaflow/client/core.py CHANGED
@@ -265,6 +265,7 @@ class MetaflowObject(object):
265
265
  self._parent = _parent
266
266
  self._path_components = None
267
267
  self._attempt = attempt
268
+ self._namespace_check = _namespace_check
268
269
 
269
270
  if self._attempt is not None:
270
271
  if self._NAME not in ["task", "artifact"]:
@@ -315,7 +316,7 @@ class MetaflowObject(object):
315
316
  self._user_tags = frozenset(self._object.get("tags") or [])
316
317
  self._system_tags = frozenset(self._object.get("system_tags") or [])
317
318
 
318
- if _namespace_check and not self.is_in_namespace():
319
+ if self._namespace_check and not self.is_in_namespace():
319
320
  raise MetaflowNamespaceMismatch(current_namespace)
320
321
 
321
322
  def _get_object(self, *path_components):
@@ -330,7 +331,8 @@ class MetaflowObject(object):
330
331
  """
331
332
  Iterate over all child objects of this object if any.
332
333
 
333
- Note that only children present in the current namespace are returned.
334
+ Note that only children present in the current namespace are returned iff
335
+ _namespace_check is set.
334
336
 
335
337
  Returns
336
338
  -------
@@ -338,7 +340,8 @@ class MetaflowObject(object):
338
340
  Iterator over all children
339
341
  """
340
342
  query_filter = {}
341
- if current_namespace:
343
+ # skip namespace filtering if _namespace_check is False
344
+ if self._namespace_check and current_namespace:
342
345
  query_filter = {"any_tags": current_namespace}
343
346
 
344
347
  unfiltered_children = self._metaflow.metadata.get_object(
@@ -381,6 +384,10 @@ class MetaflowObject(object):
381
384
  if all(tag in child.tags for tag in tags):
382
385
  yield child
383
386
 
387
+ def _ipython_key_completions_(self):
388
+ """Returns available options for ipython auto-complete."""
389
+ return [child.id for child in self._filtered_children()]
390
+
384
391
  @classmethod
385
392
  def _url_token(cls):
386
393
  return "%ss" % cls._NAME
@@ -444,7 +451,10 @@ class MetaflowObject(object):
444
451
  obj = self._get_child(id)
445
452
  if obj:
446
453
  return _CLASSES[self._CHILD_CLASS](
447
- attempt=self._attempt, _object=obj, _parent=self
454
+ attempt=self._attempt,
455
+ _object=obj,
456
+ _parent=self,
457
+ _namespace_check=self._namespace_check,
448
458
  )
449
459
  else:
450
460
  raise KeyError(id)
@@ -249,13 +249,13 @@ def configure_s3_datastore(existing_env):
249
249
  show_default=True,
250
250
  )
251
251
  # Set Amazon S3 folder for datatools.
252
- env["METAFLOW_DATATOOLS_SYSROOT_S3"] = click.prompt(
253
- cyan("[METAFLOW_DATATOOLS_SYSROOT_S3]")
252
+ env["METAFLOW_DATATOOLS_S3ROOT"] = click.prompt(
253
+ cyan("[METAFLOW_DATATOOLS_S3ROOT]")
254
254
  + yellow(" (optional)")
255
255
  + " Amazon S3 folder for Metaflow datatools "
256
256
  + "(s3://<bucket>/<prefix>).",
257
257
  default=existing_env.get(
258
- "METAFLOW_DATATOOLS_SYSROOT_S3",
258
+ "METAFLOW_DATATOOLS_S3ROOT",
259
259
  os.path.join(env["METAFLOW_DATASTORE_SYSROOT_S3"], "data"),
260
260
  ),
261
261
  show_default=True,
metaflow/cmd/main_cli.py CHANGED
@@ -4,7 +4,8 @@ from metaflow._vendor import click
4
4
 
5
5
  from metaflow.extension_support.cmd import process_cmds, resolve_cmds
6
6
  from metaflow.plugins.datastores.local_storage import LocalStorage
7
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
7
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONTACT_INFO
8
+ from metaflow.metaflow_version import get_version
8
9
 
9
10
  from .util import echo_always
10
11
 
@@ -80,24 +81,18 @@ def start(ctx):
80
81
  echo("Metaflow ", fg="magenta", bold=True, nl=False)
81
82
 
82
83
  if ctx.invoked_subcommand is None:
83
- echo("(%s): " % metaflow.__version__, fg="magenta", bold=False, nl=False)
84
+ echo("(%s): " % get_version(), fg="magenta", bold=False, nl=False)
84
85
  else:
85
- echo("(%s)\n" % metaflow.__version__, fg="magenta", bold=False)
86
+ echo("(%s)\n" % get_version(), fg="magenta", bold=False)
86
87
 
87
88
  if ctx.invoked_subcommand is None:
88
89
  echo("More data science, less engineering\n", fg="magenta")
89
90
 
90
- # metaflow URL
91
- echo("http://docs.metaflow.org", fg="cyan", nl=False)
92
- echo(" - Read the documentation")
93
-
94
- # metaflow chat
95
- echo("http://chat.metaflow.org", fg="cyan", nl=False)
96
- echo(" - Chat with us")
97
-
98
- # metaflow help email
99
- echo("help@metaflow.org", fg="cyan", nl=False)
100
- echo(" - Get help by email\n")
91
+ lnk_sz = max(len(lnk) for lnk in CONTACT_INFO.values()) + 1
92
+ for what, lnk in CONTACT_INFO.items():
93
+ echo("%s%s" % (lnk, " " * (lnk_sz - len(lnk))), fg="cyan", nl=False)
94
+ echo("- %s" % what)
95
+ echo("")
101
96
 
102
97
  print(ctx.get_help())
103
98
 
metaflow/current.py CHANGED
@@ -2,6 +2,8 @@ from collections import namedtuple
2
2
  import os
3
3
  from typing import Any, Optional
4
4
 
5
+ from metaflow.metaflow_config import TEMPDIR
6
+
5
7
  Parallel = namedtuple("Parallel", ["main_ip", "num_nodes", "node_index"])
6
8
 
7
9
 
@@ -17,6 +19,7 @@ class Current(object):
17
19
  self._username = None
18
20
  self._metadata_str = None
19
21
  self._is_running = False
22
+ self._tempdir = TEMPDIR
20
23
 
21
24
  def _raise(ex):
22
25
  raise ex
@@ -230,6 +233,18 @@ class Current(object):
230
233
  """
231
234
  return self._tags
232
235
 
236
+ @property
237
+ def tempdir(self) -> str:
238
+ """
239
+ Currently configured temp dir.
240
+
241
+ Returns
242
+ -------
243
+ str
244
+ temp dir.
245
+ """
246
+ return self._tempdir
247
+
233
248
 
234
249
  # instantiate the Current singleton. This will be populated
235
250
  # by task.MetaflowTask before a task is executed.
@@ -22,15 +22,14 @@ class TaskDataStoreSet(object):
22
22
  prefetch_data_artifacts=None,
23
23
  allow_not_done=False,
24
24
  ):
25
-
26
- task_datastores = flow_datastore.get_latest_task_datastores(
25
+ self.task_datastores = flow_datastore.get_latest_task_datastores(
27
26
  run_id, steps=steps, pathspecs=pathspecs, allow_not_done=allow_not_done
28
27
  )
29
28
 
30
29
  if prefetch_data_artifacts:
31
30
  # produce a set of SHA keys to prefetch based on artifact names
32
31
  prefetch = set()
33
- for ds in task_datastores:
32
+ for ds in self.task_datastores:
34
33
  prefetch.update(ds.keys_for_artifacts(prefetch_data_artifacts))
35
34
  # ignore missing keys
36
35
  prefetch.discard(None)
@@ -43,9 +42,10 @@ class TaskDataStoreSet(object):
43
42
 
44
43
  self.pathspec_index_cache = {}
45
44
  self.pathspec_cache = {}
46
- for ds in task_datastores:
47
- self.pathspec_index_cache[ds.pathspec_index] = ds
48
- self.pathspec_cache[ds.pathspec] = ds
45
+ if not allow_not_done:
46
+ for ds in self.task_datastores:
47
+ self.pathspec_index_cache[ds.pathspec_index] = ds
48
+ self.pathspec_cache[ds.pathspec] = ds
49
49
 
50
50
  def get_with_pathspec(self, pathspec):
51
51
  return self.pathspec_cache.get(pathspec, None)
@@ -54,7 +54,7 @@ class TaskDataStoreSet(object):
54
54
  return self.pathspec_index_cache.get(pathspec_index, None)
55
55
 
56
56
  def __iter__(self):
57
- for v in self.pathspec_cache.values():
57
+ for v in self.task_datastores:
58
58
  yield v
59
59
 
60
60
 
@@ -172,7 +172,7 @@ class FlowDataStore(object):
172
172
  else:
173
173
  latest_to_fetch = latest_started_attempts & done_attempts
174
174
  latest_to_fetch = [
175
- (v[0], v[1], v[2], v[3], data_objs[v], "r", allow_not_done)
175
+ (v[0], v[1], v[2], v[3], data_objs.get(v), "r", allow_not_done)
176
176
  for v in latest_to_fetch
177
177
  ]
178
178
  return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
@@ -187,7 +187,6 @@ class FlowDataStore(object):
187
187
  mode="r",
188
188
  allow_not_done=False,
189
189
  ):
190
-
191
190
  return TaskDataStore(
192
191
  self,
193
192
  run_id,
@@ -283,6 +283,7 @@ _extension_points = [
283
283
  "exceptions",
284
284
  "toplevel",
285
285
  "cmd",
286
+ "alias",
286
287
  ]
287
288
 
288
289
 
@@ -0,0 +1,141 @@
1
+ import importlib
2
+ import traceback
3
+
4
+ from metaflow.metaflow_config_funcs import from_conf
5
+
6
+ from . import _ext_debug, get_modules
7
+
8
+ # This file is similar in functionality to the cmd.py file. Please refer to that
9
+ # one for more information on what the functions do.
10
+
11
+
12
+ def process_integration_aliases(module_globals):
13
+ _resolve_relative_paths(module_globals)
14
+
15
+ all_aliases = _get_ext_aliases(module_globals)
16
+ all_aliases_dict = {}
17
+
18
+ toggle_alias = []
19
+ list_of_aliases = from_conf("ENABLED_INTEGRATION_ALIAS")
20
+
21
+ try:
22
+ modules_to_import = get_modules("alias")
23
+ # This is like multiload_all but we load globals independently since we just care
24
+ # about the TOGGLE and ENABLED values
25
+ for m in modules_to_import:
26
+ for n, o in m.module.__dict__.items():
27
+ if n == "TOGGLE_INTEGRATION_ALIAS":
28
+ toggle_alias.extend(o)
29
+ elif n == "ENABLED_INTEGRATION_ALIAS":
30
+ list_of_aliases = o
31
+ _resolve_relative_paths(m.module.__dict__)
32
+ all_aliases.extend(_get_ext_aliases(m.module.__dict__))
33
+ except Exception as e:
34
+ _ext_debug(
35
+ "\tWARNING: ignoring all integration aliases due to error during import: %s"
36
+ % e
37
+ )
38
+ print(
39
+ "WARNING: Integration aliases did not load -- ignoring all of them which "
40
+ "may not be what you want: %s" % e
41
+ )
42
+ traceback.print_exc()
43
+
44
+ # At this point, we have _all_aliases populated with all the tuples
45
+ # (name, module_class) from all the aliases in all the extensions (if any)
46
+ # We build a dictionary taking the latest presence for each name (so plugins
47
+ # override metaflow core)
48
+ for name, obj_path in all_aliases:
49
+ _ext_debug(" Adding integration alias '%s' from '%s'" % (name, obj_path))
50
+ all_aliases_dict[name] = obj_path
51
+
52
+ # Resolve the ENABLED_INTEGRATION_ALIAS variable. The rules are the following:
53
+ # - if ENABLED_INTEGRATION_ALIAS is non None, it means it was either set directly
54
+ # by the user in a configuration file, on the command line or by an extension.
55
+ # In that case we honor those wishes and completely ignore the extensions' toggles.
56
+ # - if ENABLED_INTEGRATION_ALIAS is None, we populate it with everything included
57
+ # here and in all the extensions and use the TOGGLE_ list to produce the final list.
58
+ # The rationale behind this is to support both a configuration option where the
59
+ # aliases enabled are explicitly listed (typical in a lot of software) but also to
60
+ # support a "configuration-less" version where the installation of the extensions
61
+ # determines what is activated.
62
+ if list_of_aliases is None:
63
+ list_of_aliases = list(all_aliases_dict) + toggle_alias
64
+
65
+ _ext_debug(" Resolving metaflow integration aliases")
66
+ _ext_debug(" Raw list is: %s" % str(list_of_aliases))
67
+
68
+ set_of_aliases = set()
69
+ for p in list_of_aliases:
70
+ if p.startswith("-"):
71
+ set_of_aliases.discard(p[1:])
72
+ elif p.startswith("+"):
73
+ set_of_aliases.add(p[1:])
74
+ else:
75
+ set_of_aliases.add(p)
76
+ _ext_debug(" Resolved list is: %s" % str(set_of_aliases))
77
+
78
+ for name in set_of_aliases:
79
+ obj_path = all_aliases_dict.get(name, None)
80
+ if obj_path is None:
81
+ raise ValueError(
82
+ "Configuration requested integration alias '%s' but no such alias "
83
+ "is available" % name
84
+ )
85
+ path, obj_name = obj_path.rsplit(".", 1)
86
+ try:
87
+ alias_module = importlib.import_module(path)
88
+ except ImportError:
89
+ raise ValueError(
90
+ "Cannot locate integration alias '%s' at '%s'" % (name, path)
91
+ )
92
+
93
+ obj = getattr(alias_module, obj_name, None)
94
+ if obj is None:
95
+ raise ValueError(
96
+ "Cannot locate '%s' object for integration alias at '%s'"
97
+ % (obj_name, path)
98
+ )
99
+ _ext_debug(" Added integration alias '%s' from '%s'" % (name, obj_path))
100
+ module_globals[name] = obj
101
+
102
+
103
+ def _get_ext_aliases(module_globals):
104
+ return module_globals.get("ALIASES_DESC", [])
105
+
106
+
107
+ def _set_ext_aliases(module_globals, value):
108
+ module_globals["ALIASES_DESC"] = value
109
+
110
+
111
+ def _resolve_relative_paths(module_globals):
112
+ # We want to modify all the relevant lists so that the relative paths
113
+ # are made fully qualified paths for the modules
114
+ pkg_path = module_globals["__package__"]
115
+ pkg_components = pkg_path.split(".")
116
+
117
+ def resolve_path(class_path):
118
+ # Converts a relative class_path to an absolute one considering that the
119
+ # relative class_path is present in a package pkg_path
120
+ if class_path[0] == ".":
121
+ i = 1
122
+ # Check for multiple "." at the start of the class_path
123
+ while class_path[i] == ".":
124
+ i += 1
125
+ if i > len(pkg_components):
126
+ raise ValueError(
127
+ "Path '%s' exits out of Metaflow module at %s"
128
+ % (class_path, pkg_path)
129
+ )
130
+ return (
131
+ ".".join(pkg_components[: -i + 1] if i > 1 else pkg_components)
132
+ + class_path[i - 1 :]
133
+ )
134
+ return class_path
135
+
136
+ _set_ext_aliases(
137
+ module_globals,
138
+ list(
139
+ map(lambda p: (p[0], resolve_path(p[1])), _get_ext_aliases(module_globals))
140
+ ),
141
+ )
@@ -0,0 +1,29 @@
1
+ # This file can contain "shortcuts" to other parts of Metaflow (integrations)
2
+
3
+ # This is an alternative to providing an extension package where you would define
4
+ # these aliases in the toplevel file.
5
+
6
+ # It follows a similar pattern to plugins so that the these integration aliases can be
7
+ # turned on and off and avoid exposing things that are not necessarily needed/wanted.
8
+
9
+ from metaflow.extension_support.integrations import process_integration_aliases
10
+
11
+ # To enable an alias `metaflow.alias.get_s3_client` to
12
+ # `metaflow.plugins.aws.aws_client.get_aws_client`, use the following:
13
+ #
14
+ # ALIASES_DESC = [("get_s3_client", ".plugins.aws.aws_client.get_aws_client")]
15
+ #
16
+ # ALIASES_DESC is a list of tuples:
17
+ # - name: name of the integration alias
18
+ # - obj: object it points to
19
+ #
20
+ # Aliases can be enabled or disabled through configuration or extensions:
21
+ # - ENABLED_INTEGRATION_ALIAS: list of alias names to enable.
22
+ # - TOGGLE_INTEGRATION_ALIAS: if ENABLED_INTEGRATION_ALIAS is not set anywhere
23
+ # (environment variable, configuration or extensions), list of integration aliases
24
+ # to toggle (+<name> or <name> enables, -<name> disables) to build
25
+ # ENABLED_INTEGRATION_ALIAS from the concatenation of the names in
26
+ # ALIASES_DESC (concatenation of the names here as well as in the extensions).
27
+
28
+ # Keep this line and make sure ALIASES_DESC is above this line.
29
+ process_integration_aliases(globals())
@@ -96,6 +96,8 @@ DATATOOLS_S3ROOT = from_conf(
96
96
  else None,
97
97
  )
98
98
 
99
+ TEMPDIR = from_conf("TEMPDIR", ".")
100
+
99
101
  DATATOOLS_CLIENT_PARAMS = from_conf("DATATOOLS_CLIENT_PARAMS", {})
100
102
  if S3_ENDPOINT_URL:
101
103
  DATATOOLS_CLIENT_PARAMS["endpoint_url"] = S3_ENDPOINT_URL
@@ -193,8 +195,25 @@ DEFAULT_CONTAINER_IMAGE = from_conf("DEFAULT_CONTAINER_IMAGE")
193
195
  # Default container registry
194
196
  DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
195
197
 
198
+ ###
199
+ # Organization customizations
200
+ ###
196
201
  UI_URL = from_conf("UI_URL")
197
202
 
203
+ # Contact information displayed when running the `metaflow` command.
204
+ # Value should be a dictionary where:
205
+ # - key is a string describing contact method
206
+ # - value is a string describing contact itself (email, web address, etc.)
207
+ # The default value shows an example of this
208
+ CONTACT_INFO = from_conf(
209
+ "CONTACT_INFO",
210
+ {
211
+ "Read the documentation": "http://docs.metaflow.org",
212
+ "Chat with us": "http://chat.metaflow.org",
213
+ "Get help by email": "help@metaflow.org",
214
+ },
215
+ )
216
+
198
217
  ###
199
218
  # AWS Batch configuration
200
219
  ###
@@ -260,6 +279,8 @@ KUBERNETES_CONTAINER_IMAGE = from_conf(
260
279
  KUBERNETES_CONTAINER_REGISTRY = from_conf(
261
280
  "KUBERNETES_CONTAINER_REGISTRY", DEFAULT_CONTAINER_REGISTRY
262
281
  )
282
+ # Toggle for trying to fetch EC2 instance metadata
283
+ KUBERNETES_FETCH_EC2_METADATA = from_conf("KUBERNETES_FETCH_EC2_METADATA", False)
263
284
 
264
285
  ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
265
286
  ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
@@ -162,7 +162,7 @@ class MetaflowEnvironment(object):
162
162
  ]
163
163
  return cmds
164
164
 
165
- def get_environment_info(self):
165
+ def get_environment_info(self, include_ext_info=False):
166
166
  global version_cache
167
167
  if version_cache is None:
168
168
  version_cache = metaflow_version.get_version()
@@ -187,9 +187,10 @@ class MetaflowEnvironment(object):
187
187
  env["metaflow_r_version"] = R.metaflow_r_version()
188
188
  env["r_version"] = R.r_version()
189
189
  env["r_version_code"] = R.r_version_code()
190
- # Information about extension modules (to load them in the proper order)
191
- ext_key, ext_val = dump_module_info()
192
- env[ext_key] = ext_val
190
+ if include_ext_info:
191
+ # Information about extension modules (to load them in the proper order)
192
+ ext_key, ext_val = dump_module_info()
193
+ env[ext_key] = ext_val
193
194
  return env
194
195
 
195
196
  def executable(self, step_name):
metaflow/package.py CHANGED
@@ -151,7 +151,7 @@ class MetaflowPackage(object):
151
151
 
152
152
  def _add_info(self, tar):
153
153
  info = tarfile.TarInfo(os.path.basename(INFO_FILE))
154
- env = self.environment.get_environment_info()
154
+ env = self.environment.get_environment_info(include_ext_info=True)
155
155
  buf = BytesIO()
156
156
  buf.write(json.dumps(env).encode("utf-8"))
157
157
  buf.seek(0)
@@ -54,7 +54,6 @@ AIRFLOW_DEPLOY_TEMPLATE_FILE = os.path.join(os.path.dirname(__file__), "dag.py")
54
54
 
55
55
 
56
56
  class Airflow(object):
57
-
58
57
  TOKEN_STORAGE_ROOT = "mf.airflow"
59
58
 
60
59
  def __init__(
@@ -18,6 +18,7 @@ from metaflow.metaflow_config import (
18
18
  KUBERNETES_NODE_SELECTOR,
19
19
  KUBERNETES_SANDBOX_INIT_SCRIPT,
20
20
  KUBERNETES_SECRETS,
21
+ KUBERNETES_FETCH_EC2_METADATA,
21
22
  S3_ENDPOINT_URL,
22
23
  AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
23
24
  DATASTORE_SYSROOT_AZURE,
@@ -785,6 +786,7 @@ class ArgoWorkflows(object):
785
786
  "METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
786
787
  "METAFLOW_CARD_S3ROOT": CARD_S3ROOT,
787
788
  "METAFLOW_KUBERNETES_WORKLOAD": 1,
789
+ "METAFLOW_KUBERNETES_FETCH_EC2_METADATA": KUBERNETES_FETCH_EC2_METADATA,
788
790
  "METAFLOW_RUNTIME_ENVIRONMENT": "kubernetes",
789
791
  "METAFLOW_OWNER": self.username,
790
792
  },
@@ -8,7 +8,7 @@ from hashlib import sha1
8
8
 
9
9
  from metaflow import JSONType, current, decorators, parameters
10
10
  from metaflow._vendor import click
11
- from metaflow.metaflow_config import SERVICE_VERSION_CHECK
11
+ from metaflow.metaflow_config import SERVICE_VERSION_CHECK, UI_URL
12
12
  from metaflow.exception import MetaflowException, MetaflowInternalError
13
13
  from metaflow.package import MetaflowPackage
14
14
  from metaflow.plugins.environment_decorator import EnvironmentDecorator
@@ -511,3 +511,13 @@ def trigger(obj, run_id_file=None, **kwargs):
511
511
  "(run-id *{run_id}*).".format(name=obj.workflow_name, run_id=run_id),
512
512
  bold=True,
513
513
  )
514
+
515
+ run_url = (
516
+ "%s/%s/%s" % (UI_URL.rstrip("/"), obj.flow.name, run_id) if UI_URL else None
517
+ )
518
+
519
+ if run_url:
520
+ obj.echo(
521
+ "See the run in the UI at %s" % run_url,
522
+ bold=True,
523
+ )
@@ -20,8 +20,13 @@ def get_ec2_instance_metadata():
20
20
  # for non-AWS deployments.
21
21
  # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
22
22
  try:
23
+ # Set a very aggressive timeout, as the communication is happening in the same subnet,
24
+ # there should not be any significant delay in the response.
25
+ # Having a long default timeout here introduces unnecessary delay in launching tasks when the
26
+ # instance is unreachable.
23
27
  instance_meta = requests.get(
24
- url="http://169.254.169.254/latest/dynamic/instance-identity/document"
28
+ url="http://169.254.169.254/latest/dynamic/instance-identity/document",
29
+ timeout=(1, 10),
25
30
  ).json()
26
31
  meta["ec2-instance-id"] = instance_meta.get("instanceId")
27
32
  meta["ec2-instance-type"] = instance_meta.get("instanceType")
@@ -179,6 +179,10 @@ class Batch(object):
179
179
  env={},
180
180
  attrs={},
181
181
  host_volumes=None,
182
+ use_tmpfs=None,
183
+ tmpfs_tempdir=None,
184
+ tmpfs_size=None,
185
+ tmpfs_path=None,
182
186
  num_parallel=0,
183
187
  ):
184
188
  job_name = self._job_name(
@@ -201,6 +205,14 @@ class Batch(object):
201
205
  .image(image)
202
206
  .iam_role(iam_role)
203
207
  .execution_role(execution_role)
208
+ .cpu(cpu)
209
+ .gpu(gpu)
210
+ .memory(memory)
211
+ .shared_memory(shared_memory)
212
+ .max_swap(max_swap)
213
+ .swappiness(swappiness)
214
+ .inferentia(inferentia)
215
+ .timeout_in_secs(run_time_limit)
204
216
  .job_def(
205
217
  image,
206
218
  iam_role,
@@ -210,17 +222,14 @@ class Batch(object):
210
222
  max_swap,
211
223
  swappiness,
212
224
  inferentia,
225
+ memory=memory,
213
226
  host_volumes=host_volumes,
227
+ use_tmpfs=use_tmpfs,
228
+ tmpfs_tempdir=tmpfs_tempdir,
229
+ tmpfs_size=tmpfs_size,
230
+ tmpfs_path=tmpfs_path,
214
231
  num_parallel=num_parallel,
215
232
  )
216
- .cpu(cpu)
217
- .gpu(gpu)
218
- .memory(memory)
219
- .shared_memory(shared_memory)
220
- .max_swap(max_swap)
221
- .swappiness(swappiness)
222
- .inferentia(inferentia)
223
- .timeout_in_secs(run_time_limit)
224
233
  .task_id(attrs.get("metaflow.task_id"))
225
234
  .environment_variable("AWS_DEFAULT_REGION", self._client.region())
226
235
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
@@ -248,6 +257,11 @@ class Batch(object):
248
257
  AWS_SECRETS_MANAGER_DEFAULT_REGION,
249
258
  )
250
259
 
260
+ tmpfs_enabled = use_tmpfs or (tmpfs_size and use_tmpfs is None)
261
+
262
+ if tmpfs_enabled and tmpfs_tempdir:
263
+ job.environment_variable("METAFLOW_TEMPDIR", tmpfs_path)
264
+
251
265
  # Skip setting METAFLOW_DATASTORE_SYSROOT_LOCAL because metadata sync between the local user
252
266
  # instance and the remote AWS Batch instance assumes metadata is stored in DATASTORE_LOCAL_DIR
253
267
  # on the remote AWS Batch instance; this happens when METAFLOW_DATASTORE_SYSROOT_LOCAL
@@ -300,6 +314,10 @@ class Batch(object):
300
314
  swappiness=None,
301
315
  inferentia=None,
302
316
  host_volumes=None,
317
+ use_tmpfs=None,
318
+ tmpfs_tempdir=None,
319
+ tmpfs_size=None,
320
+ tmpfs_path=None,
303
321
  num_parallel=0,
304
322
  env={},
305
323
  attrs={},
@@ -333,6 +351,10 @@ class Batch(object):
333
351
  env=env,
334
352
  attrs=attrs,
335
353
  host_volumes=host_volumes,
354
+ use_tmpfs=use_tmpfs,
355
+ tmpfs_tempdir=tmpfs_tempdir,
356
+ tmpfs_size=tmpfs_size,
357
+ tmpfs_path=tmpfs_path,
336
358
  num_parallel=num_parallel,
337
359
  )
338
360
  self.num_parallel = num_parallel
@@ -141,6 +141,10 @@ def kill(ctx, run_id, user, my_runs):
141
141
  @click.option("--max-swap", help="Max Swap requirement for AWS Batch.")
142
142
  @click.option("--swappiness", help="Swappiness requirement for AWS Batch.")
143
143
  @click.option("--inferentia", help="Inferentia requirement for AWS Batch.")
144
+ @click.option("--use-tmpfs", is_flag=True, help="tmpfs requirement for AWS Batch.")
145
+ @click.option("--tmpfs-tempdir", is_flag=True, help="tmpfs requirement for AWS Batch.")
146
+ @click.option("--tmpfs-size", help="tmpfs requirement for AWS Batch.")
147
+ @click.option("--tmpfs-path", help="tmpfs requirement for AWS Batch.")
144
148
  # TODO: Maybe remove it altogether since it's not used here
145
149
  @click.option("--ubf-context", default=None, type=click.Choice([None, "ubf_control"]))
146
150
  @click.option("--host-volumes", multiple=True)
@@ -169,6 +173,10 @@ def step(
169
173
  max_swap=None,
170
174
  swappiness=None,
171
175
  inferentia=None,
176
+ use_tmpfs=None,
177
+ tmpfs_tempdir=None,
178
+ tmpfs_size=None,
179
+ tmpfs_path=None,
172
180
  host_volumes=None,
173
181
  num_parallel=None,
174
182
  **kwargs
@@ -296,6 +304,10 @@ def step(
296
304
  env=env,
297
305
  attrs=attrs,
298
306
  host_volumes=host_volumes,
307
+ use_tmpfs=use_tmpfs,
308
+ tmpfs_tempdir=tmpfs_tempdir,
309
+ tmpfs_size=tmpfs_size,
310
+ tmpfs_path=tmpfs_path,
299
311
  num_parallel=num_parallel,
300
312
  )
301
313
  except Exception as e: