metaflow 2.11.16__py2.py3-none-any.whl → 2.12.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. metaflow/__init__.py +5 -0
  2. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  3. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  4. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  5. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  6. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  7. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  8. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  9. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  10. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  11. metaflow/_vendor/typeguard/__init__.py +48 -0
  12. metaflow/_vendor/typeguard/_checkers.py +906 -0
  13. metaflow/_vendor/typeguard/_config.py +108 -0
  14. metaflow/_vendor/typeguard/_decorators.py +237 -0
  15. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  16. metaflow/_vendor/typeguard/_functions.py +307 -0
  17. metaflow/_vendor/typeguard/_importhook.py +213 -0
  18. metaflow/_vendor/typeguard/_memo.py +48 -0
  19. metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
  20. metaflow/_vendor/typeguard/_suppression.py +88 -0
  21. metaflow/_vendor/typeguard/_transformer.py +1193 -0
  22. metaflow/_vendor/typeguard/_union_transformer.py +54 -0
  23. metaflow/_vendor/typeguard/_utils.py +169 -0
  24. metaflow/_vendor/typeguard/py.typed +0 -0
  25. metaflow/_vendor/typing_extensions.py +3053 -0
  26. metaflow/cli.py +100 -43
  27. metaflow/cmd/develop/stubs.py +2 -0
  28. metaflow/decorators.py +16 -3
  29. metaflow/extension_support/__init__.py +2 -0
  30. metaflow/metaflow_config.py +21 -0
  31. metaflow/parameters.py +1 -0
  32. metaflow/plugins/argo/argo_workflows.py +10 -5
  33. metaflow/plugins/aws/batch/batch_decorator.py +3 -3
  34. metaflow/plugins/kubernetes/kubernetes_job.py +0 -5
  35. metaflow/runner/__init__.py +0 -0
  36. metaflow/runner/click_api.py +406 -0
  37. metaflow/runner/metaflow_runner.py +452 -0
  38. metaflow/runner/nbrun.py +246 -0
  39. metaflow/runner/subprocess_manager.py +552 -0
  40. metaflow/vendor.py +0 -1
  41. metaflow/version.py +1 -1
  42. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/METADATA +2 -2
  43. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/RECORD +48 -20
  44. metaflow/_vendor/v3_7/__init__.py +0 -1
  45. /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
  46. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/LICENSE +0 -0
  47. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/WHEEL +0 -0
  48. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/entry_points.txt +0 -0
  49. {metaflow-2.11.16.dist-info → metaflow-2.12.1.dist-info}/top_level.txt +0 -0
metaflow/cli.py CHANGED
@@ -1,42 +1,21 @@
1
1
  import inspect
2
+ import os
2
3
  import sys
3
4
  import traceback
4
5
  from datetime import datetime
5
6
  from functools import wraps
6
- import metaflow.tracing as tracing
7
7
 
8
+ import metaflow.tracing as tracing
8
9
  from metaflow._vendor import click
9
10
 
10
- from . import lint
11
- from . import plugins
12
- from . import parameters
13
- from . import decorators
14
- from . import metaflow_version
15
- from . import namespace
16
- from .metaflow_current import current
11
+ from . import decorators, lint, metaflow_version, namespace, parameters, plugins
17
12
  from .cli_args import cli_args
18
- from .tagging_util import validate_tags
19
- from .util import (
20
- resolve_identity,
21
- decompress_list,
22
- write_latest_run_id,
23
- get_latest_run_id,
24
- )
25
- from .task import MetaflowTask
13
+ from .client.core import get_metadata
14
+ from .datastore import FlowDataStore, TaskDataStore, TaskDataStoreSet
26
15
  from .exception import CommandException, MetaflowException
27
16
  from .graph import FlowGraph
28
- from .datastore import FlowDataStore, TaskDataStoreSet, TaskDataStore
29
-
30
- from .runtime import NativeRuntime
31
- from .package import MetaflowPackage
32
- from .plugins import (
33
- DATASTORES,
34
- ENVIRONMENTS,
35
- LOGGING_SIDECARS,
36
- METADATA_PROVIDERS,
37
- MONITOR_SIDECARS,
38
- )
39
17
  from .metaflow_config import (
18
+ DECOSPECS,
40
19
  DEFAULT_DATASTORE,
41
20
  DEFAULT_ENVIRONMENT,
42
21
  DEFAULT_EVENT_LOGGER,
@@ -44,12 +23,29 @@ from .metaflow_config import (
44
23
  DEFAULT_MONITOR,
45
24
  DEFAULT_PACKAGE_SUFFIXES,
46
25
  )
26
+ from .metaflow_current import current
47
27
  from .metaflow_environment import MetaflowEnvironment
28
+ from .mflog import LOG_SOURCES, mflog
29
+ from .package import MetaflowPackage
30
+ from .plugins import (
31
+ DATASTORES,
32
+ ENVIRONMENTS,
33
+ LOGGING_SIDECARS,
34
+ METADATA_PROVIDERS,
35
+ MONITOR_SIDECARS,
36
+ )
48
37
  from .pylint_wrapper import PyLint
49
- from .R import use_r, metaflow_r_version
50
- from .mflog import mflog, LOG_SOURCES
38
+ from .R import metaflow_r_version, use_r
39
+ from .runtime import NativeRuntime
40
+ from .tagging_util import validate_tags
41
+ from .task import MetaflowTask
51
42
  from .unbounded_foreach import UBF_CONTROL, UBF_TASK
52
-
43
+ from .util import (
44
+ decompress_list,
45
+ get_latest_run_id,
46
+ resolve_identity,
47
+ write_latest_run_id,
48
+ )
53
49
 
54
50
  ERASE_TO_EOL = "\033[K"
55
51
  HIGHLIGHT = "red"
@@ -124,6 +120,26 @@ def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=Tru
124
120
  click.secho(body, bold=system_msg, fg=LOGGER_BAD_COLOR if bad else None, nl=nl)
125
121
 
126
122
 
123
+ def config_merge_cb(ctx, param, value):
124
+ # Callback to:
125
+ # - read the Click auto_envvar variable from both the
126
+ # environment AND the configuration
127
+ # - merge that value with the value passed in the command line (value)
128
+ # - return the value as a tuple
129
+ # Note that this function gets called even if there is no option passed on the
130
+ # command line.
131
+ # NOTE: Assumes that ctx.auto_envvar_prefix is set to METAFLOW (same as in
132
+ # from_conf)
133
+
134
+ # Special case where DECOSPECS and value are the same. This happens
135
+ # when there is no --with option at the TL and DECOSPECS is read from
136
+ # the env var. In this case, click also passes it as value
137
+ splits = DECOSPECS.split()
138
+ if len(splits) == len(value) and all([a == b for (a, b) in zip(splits, value)]):
139
+ return value
140
+ return tuple(list(value) + DECOSPECS.split())
141
+
142
+
127
143
  @click.group()
128
144
  def cli(ctx):
129
145
  pass
@@ -557,6 +573,13 @@ def common_run_options(func):
557
573
  type=str,
558
574
  help="Write the ID of this run to the file specified.",
559
575
  )
576
+ @click.option(
577
+ "--runner-attribute-file",
578
+ default=None,
579
+ show_default=True,
580
+ type=str,
581
+ help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
582
+ )
560
583
  @wraps(func)
561
584
  def wrapper(*args, **kwargs):
562
585
  return func(*args, **kwargs)
@@ -615,8 +638,9 @@ def resume(
615
638
  decospecs=None,
616
639
  run_id_file=None,
617
640
  resume_identifier=None,
641
+ runner_attribute_file=None,
618
642
  ):
619
- before_run(obj, tags, decospecs + obj.environment.decospecs())
643
+ before_run(obj, tags, decospecs)
620
644
 
621
645
  if origin_run_id is None:
622
646
  origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
@@ -670,9 +694,14 @@ def resume(
670
694
  max_log_size=max_log_size * 1024 * 1024,
671
695
  resume_identifier=resume_identifier,
672
696
  )
673
- write_run_id(run_id_file, runtime.run_id)
697
+ write_file(run_id_file, runtime.run_id)
674
698
  runtime.print_workflow_info()
699
+
675
700
  runtime.persist_constants()
701
+ write_file(
702
+ runner_attribute_file,
703
+ "%s:%s" % (get_metadata(), "/".join((obj.flow.name, runtime.run_id))),
704
+ )
676
705
  if clone_only:
677
706
  runtime.clone_original_run()
678
707
  else:
@@ -703,12 +732,13 @@ def run(
703
732
  max_log_size=None,
704
733
  decospecs=None,
705
734
  run_id_file=None,
735
+ runner_attribute_file=None,
706
736
  user_namespace=None,
707
737
  **kwargs
708
738
  ):
709
739
  if user_namespace is not None:
710
740
  namespace(user_namespace or None)
711
- before_run(obj, tags, decospecs + obj.environment.decospecs())
741
+ before_run(obj, tags, decospecs)
712
742
 
713
743
  runtime = NativeRuntime(
714
744
  obj.flow,
@@ -726,18 +756,22 @@ def run(
726
756
  max_log_size=max_log_size * 1024 * 1024,
727
757
  )
728
758
  write_latest_run_id(obj, runtime.run_id)
729
- write_run_id(run_id_file, runtime.run_id)
759
+ write_file(run_id_file, runtime.run_id)
730
760
 
731
761
  obj.flow._set_constants(obj.graph, kwargs)
732
762
  runtime.print_workflow_info()
733
763
  runtime.persist_constants()
764
+ write_file(
765
+ runner_attribute_file,
766
+ "%s:%s" % (get_metadata(), "/".join((obj.flow.name, runtime.run_id))),
767
+ )
734
768
  runtime.execute()
735
769
 
736
770
 
737
- def write_run_id(run_id_file, run_id):
738
- if run_id_file is not None:
739
- with open(run_id_file, "w") as f:
740
- f.write(str(run_id))
771
+ def write_file(file_path, content):
772
+ if file_path is not None:
773
+ with open(file_path, "w") as f:
774
+ f.write(str(content))
741
775
 
742
776
 
743
777
  def before_run(obj, tags, decospecs):
@@ -752,9 +786,20 @@ def before_run(obj, tags, decospecs):
752
786
  # A downside is that we need to have the following decorators handling
753
787
  # in two places in this module and make sure _init_step_decorators
754
788
  # doesn't get called twice.
755
- if decospecs:
756
- decorators._attach_decorators(obj.flow, decospecs)
789
+
790
+ # We want the order to be the following:
791
+ # - run level decospecs
792
+ # - top level decospecs
793
+ # - environment decospecs
794
+ all_decospecs = (
795
+ list(decospecs or [])
796
+ + obj.tl_decospecs
797
+ + list(obj.environment.decospecs() or [])
798
+ )
799
+ if all_decospecs:
800
+ decorators._attach_decorators(obj.flow, all_decospecs)
757
801
  obj.graph = FlowGraph(obj.flow.__class__)
802
+
758
803
  obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
759
804
  # obj.environment.init_environment(obj.logger)
760
805
 
@@ -825,6 +870,7 @@ def version(obj):
825
870
  multiple=True,
826
871
  help="Add a decorator to all steps. You can specify this option "
827
872
  "multiple times to attach multiple decorators in steps.",
873
+ callback=config_merge_cb,
828
874
  )
829
875
  @click.option(
830
876
  "--pylint/--no-pylint",
@@ -943,8 +989,11 @@ def start(
943
989
  deco_options,
944
990
  )
945
991
 
946
- if decospecs:
947
- decorators._attach_decorators(ctx.obj.flow, decospecs)
992
+ # In the case of run/resume, we will want to apply the TL decospecs
993
+ # *after* the run decospecs so that they don't take precedence. In other
994
+ # words, for the same decorator, we want `myflow.py run --with foo` to
995
+ # take precedence over any other `foo` decospec
996
+ ctx.obj.tl_decospecs = list(decospecs or [])
948
997
 
949
998
  # initialize current and parameter context for deploy-time parameters
950
999
  current._set_env(flow=ctx.obj.flow, is_running=False)
@@ -955,7 +1004,14 @@ def start(
955
1004
  if ctx.invoked_subcommand not in ("run", "resume"):
956
1005
  # run/resume are special cases because they can add more decorators with --with,
957
1006
  # so they have to take care of themselves.
958
- decorators._attach_decorators(ctx.obj.flow, ctx.obj.environment.decospecs())
1007
+ all_decospecs = ctx.obj.tl_decospecs + list(
1008
+ ctx.obj.environment.decospecs() or []
1009
+ )
1010
+ if all_decospecs:
1011
+ decorators._attach_decorators(ctx.obj.flow, all_decospecs)
1012
+ # Regenerate graph if we attached more decorators
1013
+ ctx.obj.graph = FlowGraph(ctx.obj.flow.__class__)
1014
+
959
1015
  decorators._init_step_decorators(
960
1016
  ctx.obj.flow,
961
1017
  ctx.obj.graph,
@@ -963,6 +1019,7 @@ def start(
963
1019
  ctx.obj.flow_datastore,
964
1020
  ctx.obj.logger,
965
1021
  )
1022
+
966
1023
  # TODO (savin): Enable lazy instantiation of package
967
1024
  ctx.obj.package = None
968
1025
  if ctx.invoked_subcommand is None:
@@ -23,6 +23,8 @@ def _check_stubs_supported():
23
23
  if _py_ver >= (3, 4):
24
24
  if _py_ver >= (3, 8):
25
25
  from importlib import metadata
26
+ elif _py_ver >= (3, 7):
27
+ from metaflow._vendor import importlib_metadata as metadata
26
28
  elif _py_ver >= (3, 6):
27
29
  from metaflow._vendor.v3_6 import importlib_metadata as metadata
28
30
  else:
metaflow/decorators.py CHANGED
@@ -158,13 +158,14 @@ class Decorator(object):
158
158
  attr_list.append("%s=%s" % (k, str(v)))
159
159
  else:
160
160
  attr_list.append("%s=%s" % (k, json.dumps(v).replace('"', '\\"')))
161
+
161
162
  attrstr = ",".join(attr_list)
162
163
  return "%s:%s" % (self.name, attrstr)
163
164
  else:
164
165
  return self.name
165
166
 
166
167
  def __str__(self):
167
- mode = "decorated" if self.statically_defined else "cli"
168
+ mode = "static" if self.statically_defined else "dynamic"
168
169
  attrs = " ".join("%s=%s" % x for x in self.attributes.items())
169
170
  if attrs:
170
171
  attrs = " " + attrs
@@ -450,6 +451,18 @@ def _base_step_decorator(decotype, *args, **kwargs):
450
451
  return wrap
451
452
 
452
453
 
454
+ _all_step_decos = None
455
+
456
+
457
+ def _get_all_step_decos():
458
+ global _all_step_decos
459
+ if _all_step_decos is None:
460
+ from .plugins import STEP_DECORATORS
461
+
462
+ _all_step_decos = {decotype.name: decotype for decotype in STEP_DECORATORS}
463
+ return _all_step_decos
464
+
465
+
453
466
  def _attach_decorators(flow, decospecs):
454
467
  """
455
468
  Attach decorators to all steps during runtime. This has the same
@@ -462,6 +475,7 @@ def _attach_decorators(flow, decospecs):
462
475
  #
463
476
  # Note that each step gets its own instance of the decorator class,
464
477
  # so decorator can maintain step-specific state.
478
+
465
479
  for step in flow:
466
480
  _attach_decorators_to_step(step, decospecs)
467
481
 
@@ -472,9 +486,8 @@ def _attach_decorators_to_step(step, decospecs):
472
486
  effect as if you defined the decorators statically in the source for
473
487
  the step.
474
488
  """
475
- from .plugins import STEP_DECORATORS
476
489
 
477
- decos = {decotype.name: decotype for decotype in STEP_DECORATORS}
490
+ decos = _get_all_step_decos()
478
491
 
479
492
  for decospec in decospecs:
480
493
  splits = decospec.split(":", 1)
@@ -262,6 +262,8 @@ if _py_ver >= (3, 4):
262
262
 
263
263
  if _py_ver >= (3, 8):
264
264
  from importlib import metadata
265
+ elif _py_ver >= (3, 7):
266
+ from metaflow._vendor import importlib_metadata as metadata
265
267
  elif _py_ver >= (3, 6):
266
268
  from metaflow._vendor.v3_6 import importlib_metadata as metadata
267
269
  else:
@@ -249,6 +249,14 @@ CONTACT_INFO = from_conf(
249
249
  },
250
250
  )
251
251
 
252
+
253
+ ###
254
+ # Decorators
255
+ ###
256
+ # Format is a space separated string of decospecs (what is passed
257
+ # using --with)
258
+ DECOSPECS = from_conf("DECOSPECS", "")
259
+
252
260
  ###
253
261
  # AWS Batch configuration
254
262
  ###
@@ -496,6 +504,8 @@ def get_pinned_conda_libs(python_version, datastore_type):
496
504
  try:
497
505
  from metaflow.extension_support import get_modules
498
506
 
507
+ _TOGGLE_DECOSPECS = []
508
+
499
509
  ext_modules = get_modules("config")
500
510
  for m in ext_modules:
501
511
  # We load into globals whatever we have in extension_module
@@ -519,8 +529,18 @@ try:
519
529
  return d1
520
530
 
521
531
  globals()[n] = _new_get_pinned_conda_libs
532
+ elif n == "TOGGLE_DECOSPECS":
533
+ if any([x.startswith("-") for x in o]):
534
+ raise ValueError("Removing decospecs is not currently supported")
535
+ if any(" " in x for x in o):
536
+ raise ValueError("Decospecs cannot contain spaces")
537
+ _TOGGLE_DECOSPECS.extend(o)
522
538
  elif not n.startswith("__") and not isinstance(o, types.ModuleType):
523
539
  globals()[n] = o
540
+ # If DECOSPECS is set, use that, else extrapolate from extensions
541
+ if not DECOSPECS:
542
+ DECOSPECS = " ".join(_TOGGLE_DECOSPECS)
543
+
524
544
  finally:
525
545
  # Erase all temporary names to avoid leaking things
526
546
  for _n in [
@@ -537,6 +557,7 @@ finally:
537
557
  "v",
538
558
  "f1",
539
559
  "f2",
560
+ "_TOGGLE_DECOSPECS",
540
561
  ]:
541
562
  try:
542
563
  del globals()[_n]
metaflow/parameters.py CHANGED
@@ -388,6 +388,7 @@ def add_custom_parameters(deploy_mode=False):
388
388
  # deploy_mode determines whether deploy-time functions should or should
389
389
  # not be evaluated for this command
390
390
  def wrapper(cmd):
391
+ cmd.has_flow_params = True
391
392
  # Iterate over parameters in reverse order so cmd.params lists options
392
393
  # in the order they are defined in the FlowSpec subclass
393
394
  for arg in parameters[::-1]:
@@ -184,6 +184,12 @@ class ArgoWorkflows(object):
184
184
  # allowed by Metaflow - guaranteeing uniqueness.
185
185
  return name.replace("_", "-")
186
186
 
187
+ @staticmethod
188
+ def _sensor_name(name):
189
+ # Unfortunately, Argo Events Sensor names don't allow for
190
+ # dots (sensors run into an error) which rules out self.name :(
191
+ return name.replace(".", "-")
192
+
187
193
  @staticmethod
188
194
  def list_templates(flow_name, all=False):
189
195
  client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
@@ -216,7 +222,7 @@ class ArgoWorkflows(object):
216
222
 
217
223
  # The workflow might have sensors attached to it, which consume actual resources.
218
224
  # Try to delete these as well.
219
- sensor_deleted = client.delete_sensor(name)
225
+ sensor_deleted = client.delete_sensor(ArgoWorkflows._sensor_name(name))
220
226
 
221
227
  # After cleaning up related resources, delete the workflow in question.
222
228
  # Failure in deleting is treated as critical and will be made visible to the user
@@ -333,10 +339,9 @@ class ArgoWorkflows(object):
333
339
  argo_client.schedule_workflow_template(
334
340
  self.name, self._schedule, self._timezone
335
341
  )
336
- # Register sensor. Unfortunately, Argo Events Sensor names don't allow for
337
- # dots (sensors run into an error) which rules out self.name :(
342
+ # Register sensor.
338
343
  # Metaflow will overwrite any existing sensor.
339
- sensor_name = self.name.replace(".", "-")
344
+ sensor_name = ArgoWorkflows._sensor_name(self.name)
340
345
  if self._sensor:
341
346
  argo_client.register_sensor(sensor_name, self._sensor.to_json())
342
347
  else:
@@ -2039,7 +2044,7 @@ class ArgoWorkflows(object):
2039
2044
  .metadata(
2040
2045
  # Sensor metadata.
2041
2046
  ObjectMeta()
2042
- .name(self.name.replace(".", "-"))
2047
+ .name(ArgoWorkflows._sensor_name(self.name))
2043
2048
  .namespace(KUBERNETES_NAMESPACE)
2044
2049
  .label("app.kubernetes.io/name", "metaflow-sensor")
2045
2050
  .label("app.kubernetes.io/part-of", "metaflow")
@@ -88,15 +88,15 @@ class BatchDecorator(StepDecorator):
88
88
  Alias for inferentia. Use only one of the two.
89
89
  efa : int, default 0
90
90
  Number of elastic fabric adapter network devices to attach to container
91
- ephemeral_storage: int, default None
92
- The total amount, in GiB, of ephemeral storage to set for the task (21-200)
91
+ ephemeral_storage : int, default None
92
+ The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
93
93
  This is only relevant for Fargate compute environments
94
94
  log_driver: str, optional, default None
95
95
  The log driver to use for the Amazon ECS container.
96
96
  log_options: List[str], optional, default None
97
97
  List of strings containing options for the chosen log driver. The configurable values
98
98
  depend on the `log driver` chosen. Validation of these options is not supported yet.
99
- Example usage: ["awslogs-group:aws/batch/job"]
99
+ Example: [`awslogs-group:aws/batch/job`]
100
100
  """
101
101
 
102
102
  name = "batch"
@@ -275,11 +275,6 @@ class KubernetesJob(object):
275
275
  # (unique UID) per Metaflow task attempt.
276
276
  client = self._client.get()
277
277
 
278
- # tmpfs variables
279
- use_tmpfs = self._kwargs["use_tmpfs"]
280
- tmpfs_size = self._kwargs["tmpfs_size"]
281
- tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
282
-
283
278
  self._job = client.V1Job(
284
279
  api_version="batch/v1",
285
280
  kind="Job",
File without changes