metaflow 2.11.15__py2.py3-none-any.whl → 2.11.16__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +3 -0
- metaflow/clone_util.py +6 -0
- metaflow/extension_support/plugins.py +2 -0
- metaflow/metaflow_config.py +24 -0
- metaflow/metaflow_environment.py +2 -2
- metaflow/plugins/__init__.py +19 -0
- metaflow/plugins/airflow/airflow.py +7 -0
- metaflow/plugins/argo/argo_workflows.py +17 -0
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/azure/azure_credential.py +53 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_utils.py +2 -35
- metaflow/plugins/azure/blob_service_client_factory.py +4 -2
- metaflow/plugins/datastores/azure_storage.py +6 -6
- metaflow/plugins/datatools/s3/s3.py +1 -1
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
- metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
- metaflow/plugins/kubernetes/kubernetes.py +85 -8
- metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
- metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +208 -201
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
- metaflow/plugins/timeout_decorator.py +2 -1
- metaflow/task.py +1 -12
- metaflow/tuple_util.py +27 -0
- metaflow/util.py +0 -15
- metaflow/version.py +1 -1
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/METADATA +2 -2
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/RECORD +36 -31
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/LICENSE +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/WHEEL +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.15.dist-info → metaflow-2.11.16.dist-info}/top_level.txt +0 -0
metaflow/__init__.py
CHANGED
@@ -143,6 +143,9 @@ from .client import (
|
|
143
143
|
DataArtifact,
|
144
144
|
)
|
145
145
|
|
146
|
+
# Import data class within tuple_util but not introduce new symbols.
|
147
|
+
from . import tuple_util
|
148
|
+
|
146
149
|
__version_addl__ = []
|
147
150
|
_ext_debug("Loading top-level modules")
|
148
151
|
for m in _tl_modules:
|
metaflow/clone_util.py
CHANGED
@@ -66,6 +66,12 @@ def clone_task_helper(
|
|
66
66
|
type="attempt",
|
67
67
|
tags=metadata_tags,
|
68
68
|
),
|
69
|
+
MetaDatum(
|
70
|
+
field="attempt_ok",
|
71
|
+
value="True", # During clone, the task is always considered successful.
|
72
|
+
type="internal_attempt_status",
|
73
|
+
tags=metadata_tags,
|
74
|
+
),
|
69
75
|
],
|
70
76
|
)
|
71
77
|
output.done()
|
@@ -179,6 +179,8 @@ _plugin_categories = {
|
|
179
179
|
"metadata_provider": lambda x: x.TYPE,
|
180
180
|
"datastore": lambda x: x.TYPE,
|
181
181
|
"secrets_provider": lambda x: x.TYPE,
|
182
|
+
"gcp_client_provider": lambda x: x.name,
|
183
|
+
"azure_client_provider": lambda x: x.name,
|
182
184
|
"sidecar": None,
|
183
185
|
"logging_sidecar": None,
|
184
186
|
"monitor_sidecar": None,
|
metaflow/metaflow_config.py
CHANGED
@@ -26,6 +26,7 @@ DEFAULT_METADATA = from_conf("DEFAULT_METADATA", "local")
|
|
26
26
|
DEFAULT_MONITOR = from_conf("DEFAULT_MONITOR", "nullSidecarMonitor")
|
27
27
|
DEFAULT_PACKAGE_SUFFIXES = from_conf("DEFAULT_PACKAGE_SUFFIXES", ".py,.R,.RDS")
|
28
28
|
DEFAULT_AWS_CLIENT_PROVIDER = from_conf("DEFAULT_AWS_CLIENT_PROVIDER", "boto3")
|
29
|
+
DEFAULT_GCP_CLIENT_PROVIDER = from_conf("DEFAULT_GCP_CLIENT_PROVIDER", "gcp-default")
|
29
30
|
DEFAULT_SECRETS_BACKEND_TYPE = from_conf("DEFAULT_SECRETS_BACKEND_TYPE")
|
30
31
|
DEFAULT_SECRETS_ROLE = from_conf("DEFAULT_SECRETS_ROLE")
|
31
32
|
|
@@ -144,6 +145,22 @@ DATATOOLS_LOCALROOT = from_conf(
|
|
144
145
|
# Secrets Backend - AWS Secrets Manager configuration
|
145
146
|
AWS_SECRETS_MANAGER_DEFAULT_REGION = from_conf("AWS_SECRETS_MANAGER_DEFAULT_REGION")
|
146
147
|
|
148
|
+
# Secrets Backend - GCP Secrets name prefix. With this, users don't have
|
149
|
+
# to specify the full secret name in the @secret decorator.
|
150
|
+
#
|
151
|
+
# Note that it makes a difference whether the prefix ends with a slash or not
|
152
|
+
# E.g. if secret name passed to @secret decorator is mysecret:
|
153
|
+
# - "projects/1234567890/secrets/" -> "projects/1234567890/secrets/mysecret"
|
154
|
+
# - "projects/1234567890/secrets/foo-" -> "projects/1234567890/secrets/foo-mysecret"
|
155
|
+
GCP_SECRET_MANAGER_PREFIX = from_conf("GCP_SECRET_MANAGER_PREFIX")
|
156
|
+
|
157
|
+
# Secrets Backend - Azure Key Vault prefix. With this, users don't have to
|
158
|
+
# specify the full https:// vault url in the @secret decorator.
|
159
|
+
#
|
160
|
+
# It does not make a difference if the prefix ends in a / or not. We will handle either
|
161
|
+
# case correctly.
|
162
|
+
AZURE_KEY_VAULT_PREFIX = from_conf("AZURE_KEY_VAULT_PREFIX")
|
163
|
+
|
147
164
|
# The root directory to save artifact pulls in, when using S3 or Azure
|
148
165
|
ARTIFACT_LOCALROOT = from_conf("ARTIFACT_LOCALROOT", os.getcwd())
|
149
166
|
|
@@ -210,6 +227,8 @@ DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
|
|
210
227
|
INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", False)
|
211
228
|
# Maximum length of the foreach value string to be stored in each ForeachFrame.
|
212
229
|
MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30)
|
230
|
+
# The default runtime limit (In seconds) of jobs launched by any compute provider. Default of 5 days.
|
231
|
+
DEFAULT_RUNTIME_LIMIT = from_conf("DEFAULT_RUNTIME_LIMIT", 5 * 24 * 60 * 60)
|
213
232
|
|
214
233
|
###
|
215
234
|
# Organization customizations
|
@@ -322,6 +341,9 @@ KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
|
|
322
341
|
ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
|
323
342
|
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
|
324
343
|
|
344
|
+
KUBERNETES_JOBSET_GROUP = from_conf("KUBERNETES_JOBSET_GROUP", "jobset.x-k8s.io")
|
345
|
+
KUBERNETES_JOBSET_VERSION = from_conf("KUBERNETES_JOBSET_VERSION", "v1alpha2")
|
346
|
+
|
325
347
|
##
|
326
348
|
# Argo Events Configuration
|
327
349
|
##
|
@@ -456,9 +478,11 @@ def get_pinned_conda_libs(python_version, datastore_type):
|
|
456
478
|
elif datastore_type == "azure":
|
457
479
|
pins["azure-identity"] = ">=1.10.0"
|
458
480
|
pins["azure-storage-blob"] = ">=12.12.0"
|
481
|
+
pins["azure-keyvault-secrets"] = ">=4.7.0"
|
459
482
|
elif datastore_type == "gs":
|
460
483
|
pins["google-cloud-storage"] = ">=2.5.0"
|
461
484
|
pins["google-auth"] = ">=2.11.0"
|
485
|
+
pins["google-cloud-secret-manager"] = ">=2.10.0"
|
462
486
|
elif datastore_type == "local":
|
463
487
|
pass
|
464
488
|
else:
|
metaflow/metaflow_environment.py
CHANGED
@@ -124,12 +124,12 @@ class MetaflowEnvironment(object):
|
|
124
124
|
cmds.append("%s -m pip install awscli boto3 -qqq" % self._python())
|
125
125
|
elif datastore_type == "azure":
|
126
126
|
cmds.append(
|
127
|
-
"%s -m pip install azure-identity azure-storage-blob simple-azure-blob-downloader -qqq"
|
127
|
+
"%s -m pip install azure-identity azure-storage-blob azure-keyvault-secrets simple-azure-blob-downloader -qqq"
|
128
128
|
% self._python()
|
129
129
|
)
|
130
130
|
elif datastore_type == "gs":
|
131
131
|
cmds.append(
|
132
|
-
"%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader -qqq"
|
132
|
+
"%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader google-cloud-secret-manager -qqq"
|
133
133
|
% self._python()
|
134
134
|
)
|
135
135
|
else:
|
metaflow/plugins/__init__.py
CHANGED
@@ -121,8 +121,25 @@ SECRETS_PROVIDERS_DESC = [
|
|
121
121
|
"aws-secrets-manager",
|
122
122
|
".aws.secrets_manager.aws_secrets_manager_secrets_provider.AwsSecretsManagerSecretsProvider",
|
123
123
|
),
|
124
|
+
(
|
125
|
+
"gcp-secret-manager",
|
126
|
+
".gcp.gcp_secret_manager_secrets_provider.GcpSecretManagerSecretsProvider",
|
127
|
+
),
|
128
|
+
(
|
129
|
+
"az-key-vault",
|
130
|
+
".azure.azure_secret_manager_secrets_provider.AzureKeyVaultSecretsProvider",
|
131
|
+
),
|
124
132
|
]
|
125
133
|
|
134
|
+
GCP_CLIENT_PROVIDERS_DESC = [
|
135
|
+
("gcp-default", ".gcp.gs_storage_client_factory.GcpDefaultClientProvider")
|
136
|
+
]
|
137
|
+
|
138
|
+
AZURE_CLIENT_PROVIDERS_DESC = [
|
139
|
+
("azure-default", ".azure.azure_credential.AzureDefaultClientProvider")
|
140
|
+
]
|
141
|
+
|
142
|
+
|
126
143
|
process_plugins(globals())
|
127
144
|
|
128
145
|
|
@@ -144,6 +161,8 @@ SIDECARS.update(MONITOR_SIDECARS)
|
|
144
161
|
|
145
162
|
AWS_CLIENT_PROVIDERS = resolve_plugins("aws_client_provider")
|
146
163
|
SECRETS_PROVIDERS = resolve_plugins("secrets_provider")
|
164
|
+
AZURE_CLIENT_PROVIDERS = resolve_plugins("azure_client_provider")
|
165
|
+
GCP_CLIENT_PROVIDERS = resolve_plugins("gcp_client_provider")
|
147
166
|
|
148
167
|
from .cards.card_modules import MF_EXTERNAL_CARDS
|
149
168
|
|
@@ -17,6 +17,7 @@ from metaflow.metaflow_config import (
|
|
17
17
|
AIRFLOW_KUBERNETES_KUBECONFIG_FILE,
|
18
18
|
AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS,
|
19
19
|
AWS_SECRETS_MANAGER_DEFAULT_REGION,
|
20
|
+
GCP_SECRET_MANAGER_PREFIX,
|
20
21
|
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
|
21
22
|
CARD_AZUREROOT,
|
22
23
|
CARD_GSROOT,
|
@@ -31,6 +32,7 @@ from metaflow.metaflow_config import (
|
|
31
32
|
S3_ENDPOINT_URL,
|
32
33
|
SERVICE_HEADERS,
|
33
34
|
SERVICE_INTERNAL_URL,
|
35
|
+
AZURE_KEY_VAULT_PREFIX,
|
34
36
|
)
|
35
37
|
|
36
38
|
from metaflow.metaflow_config_funcs import config_values
|
@@ -408,6 +410,11 @@ class Airflow(object):
|
|
408
410
|
env[
|
409
411
|
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
|
410
412
|
] = AWS_SECRETS_MANAGER_DEFAULT_REGION
|
413
|
+
if GCP_SECRET_MANAGER_PREFIX:
|
414
|
+
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
|
415
|
+
|
416
|
+
if AZURE_KEY_VAULT_PREFIX:
|
417
|
+
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
|
411
418
|
|
412
419
|
env.update(additional_mf_variables)
|
413
420
|
|
@@ -32,6 +32,8 @@ from metaflow.metaflow_config import (
|
|
32
32
|
DATATOOLS_S3ROOT,
|
33
33
|
DEFAULT_METADATA,
|
34
34
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
35
|
+
GCP_SECRET_MANAGER_PREFIX,
|
36
|
+
AZURE_KEY_VAULT_PREFIX,
|
35
37
|
KUBERNETES_FETCH_EC2_METADATA,
|
36
38
|
KUBERNETES_LABELS,
|
37
39
|
KUBERNETES_NAMESPACE,
|
@@ -627,6 +629,14 @@ class ArgoWorkflows(object):
|
|
627
629
|
),
|
628
630
|
}
|
629
631
|
|
632
|
+
if self._schedule is not None:
|
633
|
+
# timezone is an optional field and json dumps on None will result in null
|
634
|
+
# hence configuring it to an empty string
|
635
|
+
if self._timezone is None:
|
636
|
+
self._timezone = ""
|
637
|
+
cron_info = {"schedule": self._schedule, "tz": self._timezone}
|
638
|
+
annotations.update({"metaflow/cron": json.dumps(cron_info)})
|
639
|
+
|
630
640
|
if self.parameters:
|
631
641
|
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
|
632
642
|
|
@@ -838,6 +848,11 @@ class ArgoWorkflows(object):
|
|
838
848
|
def _visit(
|
839
849
|
node, exit_node=None, templates=None, dag_tasks=None, parent_foreach=None
|
840
850
|
):
|
851
|
+
if node.parallel_foreach:
|
852
|
+
raise ArgoWorkflowsException(
|
853
|
+
"Deploying flows with @parallel decorator(s) "
|
854
|
+
"as Argo Workflows is not supported currently."
|
855
|
+
)
|
841
856
|
# Every for-each node results in a separate subDAG and an equivalent
|
842
857
|
# DAGTemplate rooted at the child of the for-each node. Each DAGTemplate
|
843
858
|
# has a unique name - the top-level DAGTemplate is named as the name of
|
@@ -1413,6 +1428,8 @@ class ArgoWorkflows(object):
|
|
1413
1428
|
env[
|
1414
1429
|
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
|
1415
1430
|
] = AWS_SECRETS_MANAGER_DEFAULT_REGION
|
1431
|
+
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
|
1432
|
+
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
|
1416
1433
|
|
1417
1434
|
# support for Azure
|
1418
1435
|
env[
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class AzureDefaultClientProvider(object):
|
2
|
+
name = "azure-default"
|
3
|
+
|
4
|
+
@staticmethod
|
5
|
+
def create_cacheable_azure_credential(*args, **kwargs):
|
6
|
+
"""azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
|
7
|
+
because it does not have a content based hash and equality implementations.
|
8
|
+
|
9
|
+
We implement a subclass CacheableDefaultAzureCredential to add them.
|
10
|
+
|
11
|
+
We need this because credentials will be part of the cache key in _ClientCache.
|
12
|
+
"""
|
13
|
+
from azure.identity import DefaultAzureCredential
|
14
|
+
|
15
|
+
class CacheableDefaultAzureCredential(DefaultAzureCredential):
|
16
|
+
def __init__(self, *args, **kwargs):
|
17
|
+
super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
|
18
|
+
# Just hashing all the kwargs works because they are all individually
|
19
|
+
# hashable as of 7/15/2022.
|
20
|
+
#
|
21
|
+
# What if Azure adds unhashable things to kwargs?
|
22
|
+
# - We will have CI to catch this (it will always install the latest Azure SDKs)
|
23
|
+
# - In Metaflow usage today we never specify any kwargs anyway. (see last line
|
24
|
+
# of the outer function.
|
25
|
+
self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
|
26
|
+
|
27
|
+
def __hash__(self):
|
28
|
+
return self._hash_code
|
29
|
+
|
30
|
+
def __eq__(self, other):
|
31
|
+
return hash(self) == hash(other)
|
32
|
+
|
33
|
+
return CacheableDefaultAzureCredential(*args, **kwargs)
|
34
|
+
|
35
|
+
|
36
|
+
cached_provider_class = None
|
37
|
+
|
38
|
+
|
39
|
+
def create_cacheable_azure_credential():
|
40
|
+
global cached_provider_class
|
41
|
+
if cached_provider_class is None:
|
42
|
+
from metaflow.metaflow_config import DEFAULT_AZURE_CLIENT_PROVIDER
|
43
|
+
from metaflow.plugins import AZURE_CLIENT_PROVIDERS
|
44
|
+
|
45
|
+
for p in AZURE_CLIENT_PROVIDERS:
|
46
|
+
if p.name == DEFAULT_AZURE_CLIENT_PROVIDER:
|
47
|
+
cached_provider_class = p
|
48
|
+
break
|
49
|
+
else:
|
50
|
+
raise ValueError(
|
51
|
+
"Cannot find Azure Client provider %s" % DEFAULT_AZURE_CLIENT_PROVIDER
|
52
|
+
)
|
53
|
+
return cached_provider_class.create_cacheable_azure_credential()
|
@@ -10,4 +10,4 @@ class MetaflowAzureResourceError(MetaflowException):
|
|
10
10
|
|
11
11
|
|
12
12
|
class MetaflowAzurePackageError(MetaflowException):
|
13
|
-
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob'"
|
13
|
+
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob' and 'azure-keyvault-secrets'"
|
@@ -0,0 +1,240 @@
|
|
1
|
+
from metaflow.plugins.secrets import SecretsProvider
|
2
|
+
import re
|
3
|
+
import base64
|
4
|
+
import codecs
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
from metaflow.exception import MetaflowException
|
7
|
+
import sys
|
8
|
+
from metaflow.metaflow_config import AZURE_KEY_VAULT_PREFIX
|
9
|
+
from metaflow.plugins.azure.azure_credential import (
|
10
|
+
create_cacheable_azure_credential,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class MetaflowAzureKeyVaultBadVault(MetaflowException):
|
15
|
+
"""Raised when the secretid is fully qualified but does not have the right key vault domain"""
|
16
|
+
|
17
|
+
|
18
|
+
class MetaflowAzureKeyVaultBadSecretType(MetaflowException):
|
19
|
+
"""Raised when the secret type is anything except secrets"""
|
20
|
+
|
21
|
+
|
22
|
+
class MetaflowAzureKeyVaultBadSecretPath(MetaflowException):
|
23
|
+
"""Raised when the secret path does not match to expected length"""
|
24
|
+
|
25
|
+
|
26
|
+
class MetaflowAzureKeyVaultBadSecretName(MetaflowException):
|
27
|
+
"""Raised when the secret name does not match expected pattern"""
|
28
|
+
|
29
|
+
|
30
|
+
class MetaflowAzureKeyVaultBadSecretVersion(MetaflowException):
|
31
|
+
"""Raised when the secret version does not match expected pattern"""
|
32
|
+
|
33
|
+
|
34
|
+
class MetaflowAzureKeyVaultBadSecret(MetaflowException):
|
35
|
+
"""Raised when the secret does not match supported patterns in Metaflow"""
|
36
|
+
|
37
|
+
|
38
|
+
class AzureKeyVaultSecretsProvider(SecretsProvider):
|
39
|
+
TYPE = "az-key-vault"
|
40
|
+
key_vault_domains = [
|
41
|
+
".vault.azure.net",
|
42
|
+
".vault.azure.cn",
|
43
|
+
".vault.usgovcloudapi.net",
|
44
|
+
".vault.microsoftazure.de",
|
45
|
+
]
|
46
|
+
supported_vault_object_types = ["secrets"]
|
47
|
+
|
48
|
+
# https://learn.microsoft.com/en-us/azure/key-vault/general/about-keys-secrets-certificates has details on vault name structure
|
49
|
+
# Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
|
50
|
+
def _is_valid_vault_name(self, vault_name):
|
51
|
+
vault_name_pattern = r"^(?!.*--)[a-zA-Z0-9-]{3,24}$"
|
52
|
+
return re.match(vault_name_pattern, vault_name) is not None
|
53
|
+
|
54
|
+
# The type of the object can be, "keys", "secrets", or "certificates".
|
55
|
+
# Currently only secrets will be supported
|
56
|
+
def _is_valid_object_type(self, secret_type):
|
57
|
+
for type in self.supported_vault_object_types:
|
58
|
+
if secret_type == type:
|
59
|
+
return True
|
60
|
+
return False
|
61
|
+
|
62
|
+
# The secret name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
|
63
|
+
def _is_valid_secret_name(self, secret_name):
|
64
|
+
secret_name_pattern = r"^[a-zA-Z][a-zA-Z0-9-]{0,126}$"
|
65
|
+
return re.match(secret_name_pattern, secret_name) is not None
|
66
|
+
|
67
|
+
# An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
|
68
|
+
def _is_valid_object_version(self, secret_version):
|
69
|
+
object_version_pattern = r"^[a-zA-Z0-9]{32}$"
|
70
|
+
return re.match(object_version_pattern, secret_version) is not None
|
71
|
+
|
72
|
+
# This function will check if the secret_id is fully qualified url. It will return True iff the secret_id is of the form:
|
73
|
+
# https://myvault.vault.azure.net/secrets/mysecret/ec96f02080254f109c51a1f14cdb1931 OR
|
74
|
+
# https://myvault.vault.azure.net/secrets/mysecret/
|
75
|
+
# validating the above as per recommendations in https://devblogs.microsoft.com/azure-sdk/guidance-for-applications-using-the-key-vault-libraries/
|
76
|
+
def _is_secret_id_fully_qualified_url(self, secret_id):
|
77
|
+
# if the secret_id is None/empty/does not start with https then return false
|
78
|
+
if secret_id is None or secret_id == "" or not secret_id.startswith("https://"):
|
79
|
+
return False
|
80
|
+
try:
|
81
|
+
parsed_vault_url = urlparse(secret_id)
|
82
|
+
except ValueError:
|
83
|
+
print("invalid vault url", file=sys.stderr)
|
84
|
+
return False
|
85
|
+
hostname = parsed_vault_url.netloc
|
86
|
+
|
87
|
+
k_v_domain_found = False
|
88
|
+
actual_k_v_domain = ""
|
89
|
+
for k_v_domain in self.key_vault_domains:
|
90
|
+
if k_v_domain in hostname:
|
91
|
+
k_v_domain_found = True
|
92
|
+
actual_k_v_domain = k_v_domain
|
93
|
+
break
|
94
|
+
if not k_v_domain_found:
|
95
|
+
# the secret_id started with https:// however the key_vault_domains
|
96
|
+
# were not present in the secret_id which means
|
97
|
+
raise MetaflowAzureKeyVaultBadVault("bad key vault domain %s" % secret_id)
|
98
|
+
|
99
|
+
# given the secret_id seems to have a valid key vault domain
|
100
|
+
# lets verify that the vault name corresponds to its regex.
|
101
|
+
vault_name = hostname[: -len(actual_k_v_domain)]
|
102
|
+
# verify the vault name pattern
|
103
|
+
if not self._is_valid_vault_name(vault_name):
|
104
|
+
raise MetaflowAzureKeyVaultBadVault("bad key vault name %s" % vault_name)
|
105
|
+
|
106
|
+
path_parts = parsed_vault_url.path.strip("/").split("/")
|
107
|
+
total_path_parts = len(path_parts)
|
108
|
+
if total_path_parts < 2 or total_path_parts > 3:
|
109
|
+
raise MetaflowAzureKeyVaultBadSecretPath(
|
110
|
+
"bad secret uri path %s" % path_parts
|
111
|
+
)
|
112
|
+
|
113
|
+
object_type = path_parts[0]
|
114
|
+
if not self._is_valid_object_type(object_type):
|
115
|
+
raise MetaflowAzureKeyVaultBadSecretType("bad secret type %s" % object_type)
|
116
|
+
|
117
|
+
secret_name = path_parts[1]
|
118
|
+
if not self._is_valid_secret_name(secret_name=secret_name):
|
119
|
+
raise MetaflowAzureKeyVaultBadSecretName("bad secret name %s" % secret_name)
|
120
|
+
|
121
|
+
if total_path_parts == 3:
|
122
|
+
if not self._is_valid_object_version(path_parts[2]):
|
123
|
+
raise MetaflowAzureKeyVaultBadSecretVersion(
|
124
|
+
"bad secret version %s" % path_parts[2]
|
125
|
+
)
|
126
|
+
|
127
|
+
return True
|
128
|
+
|
129
|
+
# This function will validate the correctness of the partial secret id.
|
130
|
+
# It will attempt to construct the fully qualified secret URL internally and
|
131
|
+
# call the _is_secret_id_fully_qualified_url to check validity
|
132
|
+
def _is_partial_secret_valid(self, secret_id):
|
133
|
+
secret_parts = secret_id.strip("/").split("/")
|
134
|
+
total_secret_parts = len(secret_parts)
|
135
|
+
if total_secret_parts < 1 or total_secret_parts > 2:
|
136
|
+
return False
|
137
|
+
|
138
|
+
# since the secret_id is supposedly a partial id, the AZURE_KEY_VAULT_PREFIX
|
139
|
+
# must be set.
|
140
|
+
if not AZURE_KEY_VAULT_PREFIX:
|
141
|
+
raise ValueError(
|
142
|
+
"cannot use simple secret id without setting METAFLOW_AZURE_KEY_VAULT_PREFIX. %s"
|
143
|
+
% AZURE_KEY_VAULT_PREFIX
|
144
|
+
)
|
145
|
+
domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
|
146
|
+
full_secret = "%s/secrets/%s" % (domain, secret_id)
|
147
|
+
if not self._is_secret_id_fully_qualified_url(full_secret):
|
148
|
+
return False
|
149
|
+
|
150
|
+
return True
|
151
|
+
|
152
|
+
def _sanitize_key_as_env_var(self, key):
|
153
|
+
"""
|
154
|
+
Sanitize a key as an environment variable name.
|
155
|
+
This is purely a convenience trade-off to cover common cases well, vs. introducing
|
156
|
+
ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
|
157
|
+
|
158
|
+
1/27/2023(jackie):
|
159
|
+
|
160
|
+
We start with few rules and should *sparingly* add more over time.
|
161
|
+
Also, it's TBD whether all possible providers will share the same sanitization logic.
|
162
|
+
Therefore we will keep this function private for now
|
163
|
+
"""
|
164
|
+
return key.replace("-", "_").replace(".", "_").replace("/", "_")
|
165
|
+
|
166
|
+
def get_secret_as_dict(self, secret_id, options={}, role=None):
|
167
|
+
# https://learn.microsoft.com/en-us/azure/app-service/app-service-key-vault-references?tabs=azure-cli has a lot of details on
|
168
|
+
# the patterns used in key vault
|
169
|
+
# Vault names and Managed HSM pool names are selected by the user and are globally unique.
|
170
|
+
# Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
|
171
|
+
# object-type The type of the object. As of 05/08/24 only "secrets", are supported
|
172
|
+
# object-name An object-name is a user provided name for and must be unique within a key vault. The name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
|
173
|
+
# object-version An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
|
174
|
+
|
175
|
+
# We allow these forms of secret_id:
|
176
|
+
#
|
177
|
+
# 1. Full path like https://<key-vault-name><.vault-domain>/secrets/<secret-name>/<secret-version>. This is what you
|
178
|
+
# see in Azure portal and is easy to copy paste.
|
179
|
+
#
|
180
|
+
# 2. Full path but without the version like https://<key-vault-name><.vault-domain>/secrets/<secret-name>
|
181
|
+
#
|
182
|
+
# 3. Simple string like mysecret. This corresponds to the SecretName.
|
183
|
+
#
|
184
|
+
# 4. Simple string with <secret-name>/<secret-version> suffix like mysecret/123
|
185
|
+
|
186
|
+
# The latter two forms require METAFLOW_AZURE_KEY_VAULT_PREFIX to be set.
|
187
|
+
|
188
|
+
# if the secret_id is None/empty/does not start with https then return false
|
189
|
+
if secret_id is None or secret_id == "":
|
190
|
+
raise MetaflowAzureKeyVaultBadSecret("empty secret id is not supported")
|
191
|
+
|
192
|
+
# check if the passed in secret is a short-form ( #3/#4 in the above comment)
|
193
|
+
if not secret_id.startswith("https://"):
|
194
|
+
# check if the secret_id is of form `secret_name` OR `secret_name/secret_version`
|
195
|
+
if not self._is_partial_secret_valid(secret_id=secret_id):
|
196
|
+
raise MetaflowAzureKeyVaultBadSecret(
|
197
|
+
"unsupported partial secret %s" % secret_id
|
198
|
+
)
|
199
|
+
|
200
|
+
domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
|
201
|
+
full_secret = "%s/secrets/%s" % (domain, secret_id)
|
202
|
+
|
203
|
+
# if the secret id is passed as a URL - then check if the url is fully qualified
|
204
|
+
if secret_id.startswith("https://"):
|
205
|
+
if not self._is_secret_id_fully_qualified_url(secret_id=secret_id):
|
206
|
+
raise MetaflowException("unsupported secret %s" % secret_id)
|
207
|
+
full_secret = secret_id
|
208
|
+
|
209
|
+
# at this point I know that the secret URL is good so we can start creating the Secret Client
|
210
|
+
az_credentials = create_cacheable_azure_credential()
|
211
|
+
res = urlparse(full_secret)
|
212
|
+
az_vault_url = "%s://%s" % (
|
213
|
+
res.scheme,
|
214
|
+
res.netloc,
|
215
|
+
) # https://myvault.vault.azure.net
|
216
|
+
secret_data = res.path.strip("/").split("/")[1:]
|
217
|
+
secret_name = secret_data[0]
|
218
|
+
secret_version = None
|
219
|
+
if len(secret_data) > 1:
|
220
|
+
secret_version = secret_data[1]
|
221
|
+
|
222
|
+
from azure.keyvault.secrets import SecretClient
|
223
|
+
|
224
|
+
client = SecretClient(vault_url=az_vault_url, credential=az_credentials)
|
225
|
+
|
226
|
+
key_vault_secret_val = client.get_secret(
|
227
|
+
name=secret_name, version=secret_version
|
228
|
+
)
|
229
|
+
|
230
|
+
result = {}
|
231
|
+
|
232
|
+
if options.get("env_var_name") is not None:
|
233
|
+
env_var_name = options["env_var_name"]
|
234
|
+
sanitized_key = self._sanitize_key_as_env_var(env_var_name)
|
235
|
+
else:
|
236
|
+
sanitized_key = self._sanitize_key_as_env_var(key_vault_secret_val.name)
|
237
|
+
|
238
|
+
response_payload = key_vault_secret_val.value
|
239
|
+
result[sanitized_key] = response_payload
|
240
|
+
return result
|
@@ -7,6 +7,7 @@ from metaflow.plugins.azure.azure_exceptions import (
|
|
7
7
|
MetaflowAzurePackageError,
|
8
8
|
)
|
9
9
|
from metaflow.exception import MetaflowInternalError, MetaflowException
|
10
|
+
from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
|
10
11
|
|
11
12
|
|
12
13
|
def _check_and_init_azure_deps():
|
@@ -138,38 +139,6 @@ def handle_exceptions(func):
|
|
138
139
|
return _inner_func
|
139
140
|
|
140
141
|
|
141
|
-
@check_azure_deps
|
142
|
-
def create_cacheable_default_azure_credentials(*args, **kwargs):
|
143
|
-
"""azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
|
144
|
-
because it does not have a content based hash and equality implementations.
|
145
|
-
|
146
|
-
We implement a subclass CacheableDefaultAzureCredential to add them.
|
147
|
-
|
148
|
-
We need this because credentials will be part of the cache key in _ClientCache.
|
149
|
-
"""
|
150
|
-
from azure.identity import DefaultAzureCredential
|
151
|
-
|
152
|
-
class CacheableDefaultAzureCredential(DefaultAzureCredential):
|
153
|
-
def __init__(self, *args, **kwargs):
|
154
|
-
super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
|
155
|
-
# Just hashing all the kwargs works because they are all individually
|
156
|
-
# hashable as of 7/15/2022.
|
157
|
-
#
|
158
|
-
# What if Azure adds unhashable things to kwargs?
|
159
|
-
# - We will have CI to catch this (it will always install the latest Azure SDKs)
|
160
|
-
# - In Metaflow usage today we never specify any kwargs anyway. (see last line
|
161
|
-
# of the outer function.
|
162
|
-
self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
|
163
|
-
|
164
|
-
def __hash__(self):
|
165
|
-
return self._hash_code
|
166
|
-
|
167
|
-
def __eq__(self, other):
|
168
|
-
return hash(self) == hash(other)
|
169
|
-
|
170
|
-
return CacheableDefaultAzureCredential(*args, **kwargs)
|
171
|
-
|
172
|
-
|
173
142
|
@check_azure_deps
|
174
143
|
def create_static_token_credential(token_):
|
175
144
|
from azure.core.credentials import TokenCredential
|
@@ -200,9 +169,7 @@ def create_static_token_credential(token_):
|
|
200
169
|
def get_token(self, *_scopes, **_kwargs):
|
201
170
|
|
202
171
|
if (self._cached_token.expires_on - time.time()) < 300:
|
203
|
-
|
204
|
-
|
205
|
-
self._credential = DefaultAzureCredential()
|
172
|
+
self._credential = create_cacheable_azure_credential()
|
206
173
|
if self._credential:
|
207
174
|
return self._credential.get_token(*_scopes, **_kwargs)
|
208
175
|
return self._cached_token
|
@@ -1,9 +1,11 @@
|
|
1
1
|
from metaflow.exception import MetaflowException
|
2
2
|
from metaflow.metaflow_config import AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
|
3
3
|
from metaflow.plugins.azure.azure_utils import (
|
4
|
-
create_cacheable_default_azure_credentials,
|
5
4
|
check_azure_deps,
|
6
5
|
)
|
6
|
+
from metaflow.plugins.azure.azure_credential import (
|
7
|
+
create_cacheable_azure_credential,
|
8
|
+
)
|
7
9
|
|
8
10
|
import os
|
9
11
|
import threading
|
@@ -125,7 +127,7 @@ def get_azure_blob_service_client(
|
|
125
127
|
blob_service_endpoint = AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
|
126
128
|
|
127
129
|
if not credential:
|
128
|
-
credential =
|
130
|
+
credential = create_cacheable_azure_credential()
|
129
131
|
credential_is_cacheable = True
|
130
132
|
|
131
133
|
if not credential_is_cacheable:
|
@@ -32,6 +32,8 @@ from metaflow.plugins.storage_executor import (
|
|
32
32
|
handle_executor_exceptions,
|
33
33
|
)
|
34
34
|
|
35
|
+
from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
|
36
|
+
|
35
37
|
AZURE_STORAGE_DOWNLOAD_MAX_CONCURRENCY = 4
|
36
38
|
AZURE_STORAGE_UPLOAD_MAX_CONCURRENCY = 16
|
37
39
|
|
@@ -272,12 +274,10 @@ class AzureStorage(DataStoreStorage):
|
|
272
274
|
if not self._default_scope_token or (
|
273
275
|
self._default_scope_token.expires_on - time.time() < 300
|
274
276
|
):
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
AZURE_STORAGE_DEFAULT_SCOPE
|
280
|
-
)
|
277
|
+
credential = create_cacheable_azure_credential()
|
278
|
+
self._default_scope_token = credential.get_token(
|
279
|
+
AZURE_STORAGE_DEFAULT_SCOPE
|
280
|
+
)
|
281
281
|
return self._default_scope_token
|
282
282
|
|
283
283
|
@property
|
@@ -21,7 +21,6 @@ from metaflow.metaflow_config import (
|
|
21
21
|
TEMPDIR,
|
22
22
|
)
|
23
23
|
from metaflow.util import (
|
24
|
-
namedtuple_with_defaults,
|
25
24
|
is_stringish,
|
26
25
|
to_bytes,
|
27
26
|
to_unicode,
|
@@ -29,6 +28,7 @@ from metaflow.util import (
|
|
29
28
|
url_quote,
|
30
29
|
url_unquote,
|
31
30
|
)
|
31
|
+
from metaflow.tuple_util import namedtuple_with_defaults
|
32
32
|
from metaflow.exception import MetaflowException
|
33
33
|
from metaflow.debug import debug
|
34
34
|
import metaflow.tracing as tracing
|
metaflow/plugins/gcp/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
from .gs_storage_client_factory import get_credentials
|