metaflow 2.11.15__py2.py3-none-any.whl → 2.12.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. metaflow/__init__.py +8 -0
  2. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  3. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  4. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  5. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  6. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  7. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  8. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  9. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  10. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  11. metaflow/_vendor/typeguard/__init__.py +48 -0
  12. metaflow/_vendor/typeguard/_checkers.py +906 -0
  13. metaflow/_vendor/typeguard/_config.py +108 -0
  14. metaflow/_vendor/typeguard/_decorators.py +237 -0
  15. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  16. metaflow/_vendor/typeguard/_functions.py +307 -0
  17. metaflow/_vendor/typeguard/_importhook.py +213 -0
  18. metaflow/_vendor/typeguard/_memo.py +48 -0
  19. metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
  20. metaflow/_vendor/typeguard/_suppression.py +88 -0
  21. metaflow/_vendor/typeguard/_transformer.py +1193 -0
  22. metaflow/_vendor/typeguard/_union_transformer.py +54 -0
  23. metaflow/_vendor/typeguard/_utils.py +169 -0
  24. metaflow/_vendor/typeguard/py.typed +0 -0
  25. metaflow/_vendor/typing_extensions.py +3053 -0
  26. metaflow/cli.py +48 -36
  27. metaflow/clone_util.py +6 -0
  28. metaflow/cmd/develop/stubs.py +2 -0
  29. metaflow/extension_support/__init__.py +2 -0
  30. metaflow/extension_support/plugins.py +2 -0
  31. metaflow/metaflow_config.py +24 -0
  32. metaflow/metaflow_environment.py +2 -2
  33. metaflow/parameters.py +1 -0
  34. metaflow/plugins/__init__.py +19 -0
  35. metaflow/plugins/airflow/airflow.py +7 -0
  36. metaflow/plugins/argo/argo_workflows.py +17 -0
  37. metaflow/plugins/aws/batch/batch_decorator.py +3 -3
  38. metaflow/plugins/azure/__init__.py +3 -0
  39. metaflow/plugins/azure/azure_credential.py +53 -0
  40. metaflow/plugins/azure/azure_exceptions.py +1 -1
  41. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  42. metaflow/plugins/azure/azure_utils.py +2 -35
  43. metaflow/plugins/azure/blob_service_client_factory.py +4 -2
  44. metaflow/plugins/datastores/azure_storage.py +6 -6
  45. metaflow/plugins/datatools/s3/s3.py +1 -1
  46. metaflow/plugins/gcp/__init__.py +1 -0
  47. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
  48. metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
  49. metaflow/plugins/kubernetes/kubernetes.py +85 -8
  50. metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
  51. metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
  52. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
  53. metaflow/plugins/kubernetes/kubernetes_job.py +208 -206
  54. metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
  55. metaflow/plugins/timeout_decorator.py +2 -1
  56. metaflow/runner/__init__.py +0 -0
  57. metaflow/runner/click_api.py +406 -0
  58. metaflow/runner/metaflow_runner.py +452 -0
  59. metaflow/runner/nbrun.py +246 -0
  60. metaflow/runner/subprocess_manager.py +552 -0
  61. metaflow/task.py +1 -12
  62. metaflow/tuple_util.py +27 -0
  63. metaflow/util.py +0 -15
  64. metaflow/vendor.py +0 -1
  65. metaflow/version.py +1 -1
  66. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/METADATA +2 -2
  67. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/RECORD +72 -39
  68. metaflow/_vendor/v3_7/__init__.py +0 -1
  69. /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
  70. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/LICENSE +0 -0
  71. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/top_level.txt +0 -0
metaflow/cli.py CHANGED
@@ -3,39 +3,16 @@ import sys
3
3
  import traceback
4
4
  from datetime import datetime
5
5
  from functools import wraps
6
- import metaflow.tracing as tracing
7
6
 
7
+ import metaflow.tracing as tracing
8
8
  from metaflow._vendor import click
9
9
 
10
- from . import lint
11
- from . import plugins
12
- from . import parameters
13
- from . import decorators
14
- from . import metaflow_version
15
- from . import namespace
16
- from .metaflow_current import current
10
+ from . import decorators, lint, metaflow_version, namespace, parameters, plugins
17
11
  from .cli_args import cli_args
18
- from .tagging_util import validate_tags
19
- from .util import (
20
- resolve_identity,
21
- decompress_list,
22
- write_latest_run_id,
23
- get_latest_run_id,
24
- )
25
- from .task import MetaflowTask
12
+ from .client.core import get_metadata
13
+ from .datastore import FlowDataStore, TaskDataStore, TaskDataStoreSet
26
14
  from .exception import CommandException, MetaflowException
27
15
  from .graph import FlowGraph
28
- from .datastore import FlowDataStore, TaskDataStoreSet, TaskDataStore
29
-
30
- from .runtime import NativeRuntime
31
- from .package import MetaflowPackage
32
- from .plugins import (
33
- DATASTORES,
34
- ENVIRONMENTS,
35
- LOGGING_SIDECARS,
36
- METADATA_PROVIDERS,
37
- MONITOR_SIDECARS,
38
- )
39
16
  from .metaflow_config import (
40
17
  DEFAULT_DATASTORE,
41
18
  DEFAULT_ENVIRONMENT,
@@ -44,12 +21,29 @@ from .metaflow_config import (
44
21
  DEFAULT_MONITOR,
45
22
  DEFAULT_PACKAGE_SUFFIXES,
46
23
  )
24
+ from .metaflow_current import current
47
25
  from .metaflow_environment import MetaflowEnvironment
26
+ from .mflog import LOG_SOURCES, mflog
27
+ from .package import MetaflowPackage
28
+ from .plugins import (
29
+ DATASTORES,
30
+ ENVIRONMENTS,
31
+ LOGGING_SIDECARS,
32
+ METADATA_PROVIDERS,
33
+ MONITOR_SIDECARS,
34
+ )
48
35
  from .pylint_wrapper import PyLint
49
- from .R import use_r, metaflow_r_version
50
- from .mflog import mflog, LOG_SOURCES
36
+ from .R import metaflow_r_version, use_r
37
+ from .runtime import NativeRuntime
38
+ from .tagging_util import validate_tags
39
+ from .task import MetaflowTask
51
40
  from .unbounded_foreach import UBF_CONTROL, UBF_TASK
52
-
41
+ from .util import (
42
+ decompress_list,
43
+ get_latest_run_id,
44
+ resolve_identity,
45
+ write_latest_run_id,
46
+ )
53
47
 
54
48
  ERASE_TO_EOL = "\033[K"
55
49
  HIGHLIGHT = "red"
@@ -557,6 +551,13 @@ def common_run_options(func):
557
551
  type=str,
558
552
  help="Write the ID of this run to the file specified.",
559
553
  )
554
+ @click.option(
555
+ "--runner-attribute-file",
556
+ default=None,
557
+ show_default=True,
558
+ type=str,
559
+ help="Write the metadata and pathspec of this run to the file specified. Used internally for Metaflow's Runner API.",
560
+ )
560
561
  @wraps(func)
561
562
  def wrapper(*args, **kwargs):
562
563
  return func(*args, **kwargs)
@@ -615,6 +616,7 @@ def resume(
615
616
  decospecs=None,
616
617
  run_id_file=None,
617
618
  resume_identifier=None,
619
+ runner_attribute_file=None,
618
620
  ):
619
621
  before_run(obj, tags, decospecs + obj.environment.decospecs())
620
622
 
@@ -670,9 +672,14 @@ def resume(
670
672
  max_log_size=max_log_size * 1024 * 1024,
671
673
  resume_identifier=resume_identifier,
672
674
  )
673
- write_run_id(run_id_file, runtime.run_id)
675
+ write_file(run_id_file, runtime.run_id)
674
676
  runtime.print_workflow_info()
677
+
675
678
  runtime.persist_constants()
679
+ write_file(
680
+ runner_attribute_file,
681
+ "%s:%s" % (get_metadata(), "/".join((obj.flow.name, runtime.run_id))),
682
+ )
676
683
  if clone_only:
677
684
  runtime.clone_original_run()
678
685
  else:
@@ -703,6 +710,7 @@ def run(
703
710
  max_log_size=None,
704
711
  decospecs=None,
705
712
  run_id_file=None,
713
+ runner_attribute_file=None,
706
714
  user_namespace=None,
707
715
  **kwargs
708
716
  ):
@@ -726,18 +734,22 @@ def run(
726
734
  max_log_size=max_log_size * 1024 * 1024,
727
735
  )
728
736
  write_latest_run_id(obj, runtime.run_id)
729
- write_run_id(run_id_file, runtime.run_id)
737
+ write_file(run_id_file, runtime.run_id)
730
738
 
731
739
  obj.flow._set_constants(obj.graph, kwargs)
732
740
  runtime.print_workflow_info()
733
741
  runtime.persist_constants()
742
+ write_file(
743
+ runner_attribute_file,
744
+ "%s:%s" % (get_metadata(), "/".join((obj.flow.name, runtime.run_id))),
745
+ )
734
746
  runtime.execute()
735
747
 
736
748
 
737
- def write_run_id(run_id_file, run_id):
738
- if run_id_file is not None:
739
- with open(run_id_file, "w") as f:
740
- f.write(str(run_id))
749
+ def write_file(file_path, content):
750
+ if file_path is not None:
751
+ with open(file_path, "w") as f:
752
+ f.write(str(content))
741
753
 
742
754
 
743
755
  def before_run(obj, tags, decospecs):
metaflow/clone_util.py CHANGED
@@ -66,6 +66,12 @@ def clone_task_helper(
66
66
  type="attempt",
67
67
  tags=metadata_tags,
68
68
  ),
69
+ MetaDatum(
70
+ field="attempt_ok",
71
+ value="True", # During clone, the task is always considered successful.
72
+ type="internal_attempt_status",
73
+ tags=metadata_tags,
74
+ ),
69
75
  ],
70
76
  )
71
77
  output.done()
@@ -23,6 +23,8 @@ def _check_stubs_supported():
23
23
  if _py_ver >= (3, 4):
24
24
  if _py_ver >= (3, 8):
25
25
  from importlib import metadata
26
+ elif _py_ver >= (3, 7):
27
+ from metaflow._vendor import importlib_metadata as metadata
26
28
  elif _py_ver >= (3, 6):
27
29
  from metaflow._vendor.v3_6 import importlib_metadata as metadata
28
30
  else:
@@ -262,6 +262,8 @@ if _py_ver >= (3, 4):
262
262
 
263
263
  if _py_ver >= (3, 8):
264
264
  from importlib import metadata
265
+ elif _py_ver >= (3, 7):
266
+ from metaflow._vendor import importlib_metadata as metadata
265
267
  elif _py_ver >= (3, 6):
266
268
  from metaflow._vendor.v3_6 import importlib_metadata as metadata
267
269
  else:
@@ -179,6 +179,8 @@ _plugin_categories = {
179
179
  "metadata_provider": lambda x: x.TYPE,
180
180
  "datastore": lambda x: x.TYPE,
181
181
  "secrets_provider": lambda x: x.TYPE,
182
+ "gcp_client_provider": lambda x: x.name,
183
+ "azure_client_provider": lambda x: x.name,
182
184
  "sidecar": None,
183
185
  "logging_sidecar": None,
184
186
  "monitor_sidecar": None,
@@ -26,6 +26,7 @@ DEFAULT_METADATA = from_conf("DEFAULT_METADATA", "local")
26
26
  DEFAULT_MONITOR = from_conf("DEFAULT_MONITOR", "nullSidecarMonitor")
27
27
  DEFAULT_PACKAGE_SUFFIXES = from_conf("DEFAULT_PACKAGE_SUFFIXES", ".py,.R,.RDS")
28
28
  DEFAULT_AWS_CLIENT_PROVIDER = from_conf("DEFAULT_AWS_CLIENT_PROVIDER", "boto3")
29
+ DEFAULT_GCP_CLIENT_PROVIDER = from_conf("DEFAULT_GCP_CLIENT_PROVIDER", "gcp-default")
29
30
  DEFAULT_SECRETS_BACKEND_TYPE = from_conf("DEFAULT_SECRETS_BACKEND_TYPE")
30
31
  DEFAULT_SECRETS_ROLE = from_conf("DEFAULT_SECRETS_ROLE")
31
32
 
@@ -144,6 +145,22 @@ DATATOOLS_LOCALROOT = from_conf(
144
145
  # Secrets Backend - AWS Secrets Manager configuration
145
146
  AWS_SECRETS_MANAGER_DEFAULT_REGION = from_conf("AWS_SECRETS_MANAGER_DEFAULT_REGION")
146
147
 
148
+ # Secrets Backend - GCP Secrets name prefix. With this, users don't have
149
+ # to specify the full secret name in the @secret decorator.
150
+ #
151
+ # Note that it makes a difference whether the prefix ends with a slash or not
152
+ # E.g. if secret name passed to @secret decorator is mysecret:
153
+ # - "projects/1234567890/secrets/" -> "projects/1234567890/secrets/mysecret"
154
+ # - "projects/1234567890/secrets/foo-" -> "projects/1234567890/secrets/foo-mysecret"
155
+ GCP_SECRET_MANAGER_PREFIX = from_conf("GCP_SECRET_MANAGER_PREFIX")
156
+
157
+ # Secrets Backend - Azure Key Vault prefix. With this, users don't have to
158
+ # specify the full https:// vault url in the @secret decorator.
159
+ #
160
+ # It does not make a difference if the prefix ends in a / or not. We will handle either
161
+ # case correctly.
162
+ AZURE_KEY_VAULT_PREFIX = from_conf("AZURE_KEY_VAULT_PREFIX")
163
+
147
164
  # The root directory to save artifact pulls in, when using S3 or Azure
148
165
  ARTIFACT_LOCALROOT = from_conf("ARTIFACT_LOCALROOT", os.getcwd())
149
166
 
@@ -210,6 +227,8 @@ DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
210
227
  INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", False)
211
228
  # Maximum length of the foreach value string to be stored in each ForeachFrame.
212
229
  MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30)
230
+ # The default runtime limit (In seconds) of jobs launched by any compute provider. Default of 5 days.
231
+ DEFAULT_RUNTIME_LIMIT = from_conf("DEFAULT_RUNTIME_LIMIT", 5 * 24 * 60 * 60)
213
232
 
214
233
  ###
215
234
  # Organization customizations
@@ -322,6 +341,9 @@ KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
322
341
  ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
323
342
  ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
324
343
 
344
+ KUBERNETES_JOBSET_GROUP = from_conf("KUBERNETES_JOBSET_GROUP", "jobset.x-k8s.io")
345
+ KUBERNETES_JOBSET_VERSION = from_conf("KUBERNETES_JOBSET_VERSION", "v1alpha2")
346
+
325
347
  ##
326
348
  # Argo Events Configuration
327
349
  ##
@@ -456,9 +478,11 @@ def get_pinned_conda_libs(python_version, datastore_type):
456
478
  elif datastore_type == "azure":
457
479
  pins["azure-identity"] = ">=1.10.0"
458
480
  pins["azure-storage-blob"] = ">=12.12.0"
481
+ pins["azure-keyvault-secrets"] = ">=4.7.0"
459
482
  elif datastore_type == "gs":
460
483
  pins["google-cloud-storage"] = ">=2.5.0"
461
484
  pins["google-auth"] = ">=2.11.0"
485
+ pins["google-cloud-secret-manager"] = ">=2.10.0"
462
486
  elif datastore_type == "local":
463
487
  pass
464
488
  else:
@@ -124,12 +124,12 @@ class MetaflowEnvironment(object):
124
124
  cmds.append("%s -m pip install awscli boto3 -qqq" % self._python())
125
125
  elif datastore_type == "azure":
126
126
  cmds.append(
127
- "%s -m pip install azure-identity azure-storage-blob simple-azure-blob-downloader -qqq"
127
+ "%s -m pip install azure-identity azure-storage-blob azure-keyvault-secrets simple-azure-blob-downloader -qqq"
128
128
  % self._python()
129
129
  )
130
130
  elif datastore_type == "gs":
131
131
  cmds.append(
132
- "%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader -qqq"
132
+ "%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader google-cloud-secret-manager -qqq"
133
133
  % self._python()
134
134
  )
135
135
  else:
metaflow/parameters.py CHANGED
@@ -388,6 +388,7 @@ def add_custom_parameters(deploy_mode=False):
388
388
  # deploy_mode determines whether deploy-time functions should or should
389
389
  # not be evaluated for this command
390
390
  def wrapper(cmd):
391
+ cmd.has_flow_params = True
391
392
  # Iterate over parameters in reverse order so cmd.params lists options
392
393
  # in the order they are defined in the FlowSpec subclass
393
394
  for arg in parameters[::-1]:
@@ -121,8 +121,25 @@ SECRETS_PROVIDERS_DESC = [
121
121
  "aws-secrets-manager",
122
122
  ".aws.secrets_manager.aws_secrets_manager_secrets_provider.AwsSecretsManagerSecretsProvider",
123
123
  ),
124
+ (
125
+ "gcp-secret-manager",
126
+ ".gcp.gcp_secret_manager_secrets_provider.GcpSecretManagerSecretsProvider",
127
+ ),
128
+ (
129
+ "az-key-vault",
130
+ ".azure.azure_secret_manager_secrets_provider.AzureKeyVaultSecretsProvider",
131
+ ),
124
132
  ]
125
133
 
134
+ GCP_CLIENT_PROVIDERS_DESC = [
135
+ ("gcp-default", ".gcp.gs_storage_client_factory.GcpDefaultClientProvider")
136
+ ]
137
+
138
+ AZURE_CLIENT_PROVIDERS_DESC = [
139
+ ("azure-default", ".azure.azure_credential.AzureDefaultClientProvider")
140
+ ]
141
+
142
+
126
143
  process_plugins(globals())
127
144
 
128
145
 
@@ -144,6 +161,8 @@ SIDECARS.update(MONITOR_SIDECARS)
144
161
 
145
162
  AWS_CLIENT_PROVIDERS = resolve_plugins("aws_client_provider")
146
163
  SECRETS_PROVIDERS = resolve_plugins("secrets_provider")
164
+ AZURE_CLIENT_PROVIDERS = resolve_plugins("azure_client_provider")
165
+ GCP_CLIENT_PROVIDERS = resolve_plugins("gcp_client_provider")
147
166
 
148
167
  from .cards.card_modules import MF_EXTERNAL_CARDS
149
168
 
@@ -17,6 +17,7 @@ from metaflow.metaflow_config import (
17
17
  AIRFLOW_KUBERNETES_KUBECONFIG_FILE,
18
18
  AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS,
19
19
  AWS_SECRETS_MANAGER_DEFAULT_REGION,
20
+ GCP_SECRET_MANAGER_PREFIX,
20
21
  AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
21
22
  CARD_AZUREROOT,
22
23
  CARD_GSROOT,
@@ -31,6 +32,7 @@ from metaflow.metaflow_config import (
31
32
  S3_ENDPOINT_URL,
32
33
  SERVICE_HEADERS,
33
34
  SERVICE_INTERNAL_URL,
35
+ AZURE_KEY_VAULT_PREFIX,
34
36
  )
35
37
 
36
38
  from metaflow.metaflow_config_funcs import config_values
@@ -408,6 +410,11 @@ class Airflow(object):
408
410
  env[
409
411
  "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
410
412
  ] = AWS_SECRETS_MANAGER_DEFAULT_REGION
413
+ if GCP_SECRET_MANAGER_PREFIX:
414
+ env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
415
+
416
+ if AZURE_KEY_VAULT_PREFIX:
417
+ env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
411
418
 
412
419
  env.update(additional_mf_variables)
413
420
 
@@ -32,6 +32,8 @@ from metaflow.metaflow_config import (
32
32
  DATATOOLS_S3ROOT,
33
33
  DEFAULT_METADATA,
34
34
  DEFAULT_SECRETS_BACKEND_TYPE,
35
+ GCP_SECRET_MANAGER_PREFIX,
36
+ AZURE_KEY_VAULT_PREFIX,
35
37
  KUBERNETES_FETCH_EC2_METADATA,
36
38
  KUBERNETES_LABELS,
37
39
  KUBERNETES_NAMESPACE,
@@ -627,6 +629,14 @@ class ArgoWorkflows(object):
627
629
  ),
628
630
  }
629
631
 
632
+ if self._schedule is not None:
633
+ # timezone is an optional field and json dumps on None will result in null
634
+ # hence configuring it to an empty string
635
+ if self._timezone is None:
636
+ self._timezone = ""
637
+ cron_info = {"schedule": self._schedule, "tz": self._timezone}
638
+ annotations.update({"metaflow/cron": json.dumps(cron_info)})
639
+
630
640
  if self.parameters:
631
641
  annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
632
642
 
@@ -838,6 +848,11 @@ class ArgoWorkflows(object):
838
848
  def _visit(
839
849
  node, exit_node=None, templates=None, dag_tasks=None, parent_foreach=None
840
850
  ):
851
+ if node.parallel_foreach:
852
+ raise ArgoWorkflowsException(
853
+ "Deploying flows with @parallel decorator(s) "
854
+ "as Argo Workflows is not supported currently."
855
+ )
841
856
  # Every for-each node results in a separate subDAG and an equivalent
842
857
  # DAGTemplate rooted at the child of the for-each node. Each DAGTemplate
843
858
  # has a unique name - the top-level DAGTemplate is named as the name of
@@ -1413,6 +1428,8 @@ class ArgoWorkflows(object):
1413
1428
  env[
1414
1429
  "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
1415
1430
  ] = AWS_SECRETS_MANAGER_DEFAULT_REGION
1431
+ env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
1432
+ env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
1416
1433
 
1417
1434
  # support for Azure
1418
1435
  env[
@@ -88,15 +88,15 @@ class BatchDecorator(StepDecorator):
88
88
  Alias for inferentia. Use only one of the two.
89
89
  efa : int, default 0
90
90
  Number of elastic fabric adapter network devices to attach to container
91
- ephemeral_storage: int, default None
92
- The total amount, in GiB, of ephemeral storage to set for the task (21-200)
91
+ ephemeral_storage : int, default None
92
+ The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
93
93
  This is only relevant for Fargate compute environments
94
94
  log_driver: str, optional, default None
95
95
  The log driver to use for the Amazon ECS container.
96
96
  log_options: List[str], optional, default None
97
97
  List of strings containing options for the chosen log driver. The configurable values
98
98
  depend on the `log driver` chosen. Validation of these options is not supported yet.
99
- Example usage: ["awslogs-group:aws/batch/job"]
99
+ Example: [`awslogs-group:aws/batch/job`]
100
100
  """
101
101
 
102
102
  name = "batch"
@@ -0,0 +1,3 @@
1
+ from .azure_credential import (
2
+ create_cacheable_azure_credential as create_azure_credential,
3
+ )
@@ -0,0 +1,53 @@
1
+ class AzureDefaultClientProvider(object):
2
+ name = "azure-default"
3
+
4
+ @staticmethod
5
+ def create_cacheable_azure_credential(*args, **kwargs):
6
+ """azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
7
+ because it does not have a content based hash and equality implementations.
8
+
9
+ We implement a subclass CacheableDefaultAzureCredential to add them.
10
+
11
+ We need this because credentials will be part of the cache key in _ClientCache.
12
+ """
13
+ from azure.identity import DefaultAzureCredential
14
+
15
+ class CacheableDefaultAzureCredential(DefaultAzureCredential):
16
+ def __init__(self, *args, **kwargs):
17
+ super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
18
+ # Just hashing all the kwargs works because they are all individually
19
+ # hashable as of 7/15/2022.
20
+ #
21
+ # What if Azure adds unhashable things to kwargs?
22
+ # - We will have CI to catch this (it will always install the latest Azure SDKs)
23
+ # - In Metaflow usage today we never specify any kwargs anyway. (see last line
24
+ # of the outer function.
25
+ self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
26
+
27
+ def __hash__(self):
28
+ return self._hash_code
29
+
30
+ def __eq__(self, other):
31
+ return hash(self) == hash(other)
32
+
33
+ return CacheableDefaultAzureCredential(*args, **kwargs)
34
+
35
+
36
+ cached_provider_class = None
37
+
38
+
39
+ def create_cacheable_azure_credential():
40
+ global cached_provider_class
41
+ if cached_provider_class is None:
42
+ from metaflow.metaflow_config import DEFAULT_AZURE_CLIENT_PROVIDER
43
+ from metaflow.plugins import AZURE_CLIENT_PROVIDERS
44
+
45
+ for p in AZURE_CLIENT_PROVIDERS:
46
+ if p.name == DEFAULT_AZURE_CLIENT_PROVIDER:
47
+ cached_provider_class = p
48
+ break
49
+ else:
50
+ raise ValueError(
51
+ "Cannot find Azure Client provider %s" % DEFAULT_AZURE_CLIENT_PROVIDER
52
+ )
53
+ return cached_provider_class.create_cacheable_azure_credential()
@@ -10,4 +10,4 @@ class MetaflowAzureResourceError(MetaflowException):
10
10
 
11
11
 
12
12
  class MetaflowAzurePackageError(MetaflowException):
13
- headline = "Missing required packages 'azure-identity' and 'azure-storage-blob'"
13
+ headline = "Missing required packages 'azure-identity' and 'azure-storage-blob' and 'azure-keyvault-secrets'"