metaflow 2.11.14__py2.py3-none-any.whl → 2.11.16__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +0 -120
- metaflow/clone_util.py +6 -0
- metaflow/datastore/datastore_set.py +1 -1
- metaflow/datastore/flow_datastore.py +32 -6
- metaflow/datastore/task_datastore.py +50 -0
- metaflow/extension_support/plugins.py +2 -0
- metaflow/metaflow_config.py +24 -0
- metaflow/metaflow_environment.py +2 -2
- metaflow/plugins/__init__.py +20 -0
- metaflow/plugins/airflow/airflow.py +7 -0
- metaflow/plugins/argo/argo_workflows.py +17 -0
- metaflow/plugins/aws/batch/batch_cli.py +6 -4
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/azure/azure_credential.py +53 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_utils.py +2 -35
- metaflow/plugins/azure/blob_service_client_factory.py +4 -2
- metaflow/plugins/datastores/azure_storage.py +6 -6
- metaflow/plugins/datatools/s3/s3.py +9 -9
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
- metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
- metaflow/plugins/kubernetes/kubernetes.py +85 -8
- metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
- metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +208 -201
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
- metaflow/plugins/logs_cli.py +358 -0
- metaflow/plugins/timeout_decorator.py +2 -1
- metaflow/task.py +1 -12
- metaflow/tuple_util.py +27 -0
- metaflow/util.py +0 -15
- metaflow/version.py +1 -1
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/METADATA +2 -2
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/RECORD +42 -36
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/LICENSE +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/WHEEL +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
|
|
1
|
+
from metaflow.plugins.secrets import SecretsProvider
|
2
|
+
import re
|
3
|
+
import base64
|
4
|
+
import codecs
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
from metaflow.exception import MetaflowException
|
7
|
+
import sys
|
8
|
+
from metaflow.metaflow_config import AZURE_KEY_VAULT_PREFIX
|
9
|
+
from metaflow.plugins.azure.azure_credential import (
|
10
|
+
create_cacheable_azure_credential,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class MetaflowAzureKeyVaultBadVault(MetaflowException):
|
15
|
+
"""Raised when the secretid is fully qualified but does not have the right key vault domain"""
|
16
|
+
|
17
|
+
|
18
|
+
class MetaflowAzureKeyVaultBadSecretType(MetaflowException):
|
19
|
+
"""Raised when the secret type is anything except secrets"""
|
20
|
+
|
21
|
+
|
22
|
+
class MetaflowAzureKeyVaultBadSecretPath(MetaflowException):
|
23
|
+
"""Raised when the secret path does not match to expected length"""
|
24
|
+
|
25
|
+
|
26
|
+
class MetaflowAzureKeyVaultBadSecretName(MetaflowException):
|
27
|
+
"""Raised when the secret name does not match expected pattern"""
|
28
|
+
|
29
|
+
|
30
|
+
class MetaflowAzureKeyVaultBadSecretVersion(MetaflowException):
|
31
|
+
"""Raised when the secret version does not match expected pattern"""
|
32
|
+
|
33
|
+
|
34
|
+
class MetaflowAzureKeyVaultBadSecret(MetaflowException):
|
35
|
+
"""Raised when the secret does not match supported patterns in Metaflow"""
|
36
|
+
|
37
|
+
|
38
|
+
class AzureKeyVaultSecretsProvider(SecretsProvider):
|
39
|
+
TYPE = "az-key-vault"
|
40
|
+
key_vault_domains = [
|
41
|
+
".vault.azure.net",
|
42
|
+
".vault.azure.cn",
|
43
|
+
".vault.usgovcloudapi.net",
|
44
|
+
".vault.microsoftazure.de",
|
45
|
+
]
|
46
|
+
supported_vault_object_types = ["secrets"]
|
47
|
+
|
48
|
+
# https://learn.microsoft.com/en-us/azure/key-vault/general/about-keys-secrets-certificates has details on vault name structure
|
49
|
+
# Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
|
50
|
+
def _is_valid_vault_name(self, vault_name):
|
51
|
+
vault_name_pattern = r"^(?!.*--)[a-zA-Z0-9-]{3,24}$"
|
52
|
+
return re.match(vault_name_pattern, vault_name) is not None
|
53
|
+
|
54
|
+
# The type of the object can be, "keys", "secrets", or "certificates".
|
55
|
+
# Currently only secrets will be supported
|
56
|
+
def _is_valid_object_type(self, secret_type):
|
57
|
+
for type in self.supported_vault_object_types:
|
58
|
+
if secret_type == type:
|
59
|
+
return True
|
60
|
+
return False
|
61
|
+
|
62
|
+
# The secret name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
|
63
|
+
def _is_valid_secret_name(self, secret_name):
|
64
|
+
secret_name_pattern = r"^[a-zA-Z][a-zA-Z0-9-]{0,126}$"
|
65
|
+
return re.match(secret_name_pattern, secret_name) is not None
|
66
|
+
|
67
|
+
# An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
|
68
|
+
def _is_valid_object_version(self, secret_version):
|
69
|
+
object_version_pattern = r"^[a-zA-Z0-9]{32}$"
|
70
|
+
return re.match(object_version_pattern, secret_version) is not None
|
71
|
+
|
72
|
+
# This function will check if the secret_id is fully qualified url. It will return True iff the secret_id is of the form:
|
73
|
+
# https://myvault.vault.azure.net/secrets/mysecret/ec96f02080254f109c51a1f14cdb1931 OR
|
74
|
+
# https://myvault.vault.azure.net/secrets/mysecret/
|
75
|
+
# validating the above as per recommendations in https://devblogs.microsoft.com/azure-sdk/guidance-for-applications-using-the-key-vault-libraries/
|
76
|
+
def _is_secret_id_fully_qualified_url(self, secret_id):
|
77
|
+
# if the secret_id is None/empty/does not start with https then return false
|
78
|
+
if secret_id is None or secret_id == "" or not secret_id.startswith("https://"):
|
79
|
+
return False
|
80
|
+
try:
|
81
|
+
parsed_vault_url = urlparse(secret_id)
|
82
|
+
except ValueError:
|
83
|
+
print("invalid vault url", file=sys.stderr)
|
84
|
+
return False
|
85
|
+
hostname = parsed_vault_url.netloc
|
86
|
+
|
87
|
+
k_v_domain_found = False
|
88
|
+
actual_k_v_domain = ""
|
89
|
+
for k_v_domain in self.key_vault_domains:
|
90
|
+
if k_v_domain in hostname:
|
91
|
+
k_v_domain_found = True
|
92
|
+
actual_k_v_domain = k_v_domain
|
93
|
+
break
|
94
|
+
if not k_v_domain_found:
|
95
|
+
# the secret_id started with https:// however the key_vault_domains
|
96
|
+
# were not present in the secret_id which means
|
97
|
+
raise MetaflowAzureKeyVaultBadVault("bad key vault domain %s" % secret_id)
|
98
|
+
|
99
|
+
# given the secret_id seems to have a valid key vault domain
|
100
|
+
# lets verify that the vault name corresponds to its regex.
|
101
|
+
vault_name = hostname[: -len(actual_k_v_domain)]
|
102
|
+
# verify the vault name pattern
|
103
|
+
if not self._is_valid_vault_name(vault_name):
|
104
|
+
raise MetaflowAzureKeyVaultBadVault("bad key vault name %s" % vault_name)
|
105
|
+
|
106
|
+
path_parts = parsed_vault_url.path.strip("/").split("/")
|
107
|
+
total_path_parts = len(path_parts)
|
108
|
+
if total_path_parts < 2 or total_path_parts > 3:
|
109
|
+
raise MetaflowAzureKeyVaultBadSecretPath(
|
110
|
+
"bad secret uri path %s" % path_parts
|
111
|
+
)
|
112
|
+
|
113
|
+
object_type = path_parts[0]
|
114
|
+
if not self._is_valid_object_type(object_type):
|
115
|
+
raise MetaflowAzureKeyVaultBadSecretType("bad secret type %s" % object_type)
|
116
|
+
|
117
|
+
secret_name = path_parts[1]
|
118
|
+
if not self._is_valid_secret_name(secret_name=secret_name):
|
119
|
+
raise MetaflowAzureKeyVaultBadSecretName("bad secret name %s" % secret_name)
|
120
|
+
|
121
|
+
if total_path_parts == 3:
|
122
|
+
if not self._is_valid_object_version(path_parts[2]):
|
123
|
+
raise MetaflowAzureKeyVaultBadSecretVersion(
|
124
|
+
"bad secret version %s" % path_parts[2]
|
125
|
+
)
|
126
|
+
|
127
|
+
return True
|
128
|
+
|
129
|
+
# This function will validate the correctness of the partial secret id.
|
130
|
+
# It will attempt to construct the fully qualified secret URL internally and
|
131
|
+
# call the _is_secret_id_fully_qualified_url to check validity
|
132
|
+
def _is_partial_secret_valid(self, secret_id):
|
133
|
+
secret_parts = secret_id.strip("/").split("/")
|
134
|
+
total_secret_parts = len(secret_parts)
|
135
|
+
if total_secret_parts < 1 or total_secret_parts > 2:
|
136
|
+
return False
|
137
|
+
|
138
|
+
# since the secret_id is supposedly a partial id, the AZURE_KEY_VAULT_PREFIX
|
139
|
+
# must be set.
|
140
|
+
if not AZURE_KEY_VAULT_PREFIX:
|
141
|
+
raise ValueError(
|
142
|
+
"cannot use simple secret id without setting METAFLOW_AZURE_KEY_VAULT_PREFIX. %s"
|
143
|
+
% AZURE_KEY_VAULT_PREFIX
|
144
|
+
)
|
145
|
+
domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
|
146
|
+
full_secret = "%s/secrets/%s" % (domain, secret_id)
|
147
|
+
if not self._is_secret_id_fully_qualified_url(full_secret):
|
148
|
+
return False
|
149
|
+
|
150
|
+
return True
|
151
|
+
|
152
|
+
def _sanitize_key_as_env_var(self, key):
|
153
|
+
"""
|
154
|
+
Sanitize a key as an environment variable name.
|
155
|
+
This is purely a convenience trade-off to cover common cases well, vs. introducing
|
156
|
+
ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
|
157
|
+
|
158
|
+
1/27/2023(jackie):
|
159
|
+
|
160
|
+
We start with few rules and should *sparingly* add more over time.
|
161
|
+
Also, it's TBD whether all possible providers will share the same sanitization logic.
|
162
|
+
Therefore we will keep this function private for now
|
163
|
+
"""
|
164
|
+
return key.replace("-", "_").replace(".", "_").replace("/", "_")
|
165
|
+
|
166
|
+
def get_secret_as_dict(self, secret_id, options={}, role=None):
|
167
|
+
# https://learn.microsoft.com/en-us/azure/app-service/app-service-key-vault-references?tabs=azure-cli has a lot of details on
|
168
|
+
# the patterns used in key vault
|
169
|
+
# Vault names and Managed HSM pool names are selected by the user and are globally unique.
|
170
|
+
# Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
|
171
|
+
# object-type The type of the object. As of 05/08/24 only "secrets", are supported
|
172
|
+
# object-name An object-name is a user provided name for and must be unique within a key vault. The name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
|
173
|
+
# object-version An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
|
174
|
+
|
175
|
+
# We allow these forms of secret_id:
|
176
|
+
#
|
177
|
+
# 1. Full path like https://<key-vault-name><.vault-domain>/secrets/<secret-name>/<secret-version>. This is what you
|
178
|
+
# see in Azure portal and is easy to copy paste.
|
179
|
+
#
|
180
|
+
# 2. Full path but without the version like https://<key-vault-name><.vault-domain>/secrets/<secret-name>
|
181
|
+
#
|
182
|
+
# 3. Simple string like mysecret. This corresponds to the SecretName.
|
183
|
+
#
|
184
|
+
# 4. Simple string with <secret-name>/<secret-version> suffix like mysecret/123
|
185
|
+
|
186
|
+
# The latter two forms require METAFLOW_AZURE_KEY_VAULT_PREFIX to be set.
|
187
|
+
|
188
|
+
# if the secret_id is None/empty/does not start with https then return false
|
189
|
+
if secret_id is None or secret_id == "":
|
190
|
+
raise MetaflowAzureKeyVaultBadSecret("empty secret id is not supported")
|
191
|
+
|
192
|
+
# check if the passed in secret is a short-form ( #3/#4 in the above comment)
|
193
|
+
if not secret_id.startswith("https://"):
|
194
|
+
# check if the secret_id is of form `secret_name` OR `secret_name/secret_version`
|
195
|
+
if not self._is_partial_secret_valid(secret_id=secret_id):
|
196
|
+
raise MetaflowAzureKeyVaultBadSecret(
|
197
|
+
"unsupported partial secret %s" % secret_id
|
198
|
+
)
|
199
|
+
|
200
|
+
domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
|
201
|
+
full_secret = "%s/secrets/%s" % (domain, secret_id)
|
202
|
+
|
203
|
+
# if the secret id is passed as a URL - then check if the url is fully qualified
|
204
|
+
if secret_id.startswith("https://"):
|
205
|
+
if not self._is_secret_id_fully_qualified_url(secret_id=secret_id):
|
206
|
+
raise MetaflowException("unsupported secret %s" % secret_id)
|
207
|
+
full_secret = secret_id
|
208
|
+
|
209
|
+
# at this point I know that the secret URL is good so we can start creating the Secret Client
|
210
|
+
az_credentials = create_cacheable_azure_credential()
|
211
|
+
res = urlparse(full_secret)
|
212
|
+
az_vault_url = "%s://%s" % (
|
213
|
+
res.scheme,
|
214
|
+
res.netloc,
|
215
|
+
) # https://myvault.vault.azure.net
|
216
|
+
secret_data = res.path.strip("/").split("/")[1:]
|
217
|
+
secret_name = secret_data[0]
|
218
|
+
secret_version = None
|
219
|
+
if len(secret_data) > 1:
|
220
|
+
secret_version = secret_data[1]
|
221
|
+
|
222
|
+
from azure.keyvault.secrets import SecretClient
|
223
|
+
|
224
|
+
client = SecretClient(vault_url=az_vault_url, credential=az_credentials)
|
225
|
+
|
226
|
+
key_vault_secret_val = client.get_secret(
|
227
|
+
name=secret_name, version=secret_version
|
228
|
+
)
|
229
|
+
|
230
|
+
result = {}
|
231
|
+
|
232
|
+
if options.get("env_var_name") is not None:
|
233
|
+
env_var_name = options["env_var_name"]
|
234
|
+
sanitized_key = self._sanitize_key_as_env_var(env_var_name)
|
235
|
+
else:
|
236
|
+
sanitized_key = self._sanitize_key_as_env_var(key_vault_secret_val.name)
|
237
|
+
|
238
|
+
response_payload = key_vault_secret_val.value
|
239
|
+
result[sanitized_key] = response_payload
|
240
|
+
return result
|
@@ -7,6 +7,7 @@ from metaflow.plugins.azure.azure_exceptions import (
|
|
7
7
|
MetaflowAzurePackageError,
|
8
8
|
)
|
9
9
|
from metaflow.exception import MetaflowInternalError, MetaflowException
|
10
|
+
from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
|
10
11
|
|
11
12
|
|
12
13
|
def _check_and_init_azure_deps():
|
@@ -138,38 +139,6 @@ def handle_exceptions(func):
|
|
138
139
|
return _inner_func
|
139
140
|
|
140
141
|
|
141
|
-
@check_azure_deps
|
142
|
-
def create_cacheable_default_azure_credentials(*args, **kwargs):
|
143
|
-
"""azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
|
144
|
-
because it does not have a content based hash and equality implementations.
|
145
|
-
|
146
|
-
We implement a subclass CacheableDefaultAzureCredential to add them.
|
147
|
-
|
148
|
-
We need this because credentials will be part of the cache key in _ClientCache.
|
149
|
-
"""
|
150
|
-
from azure.identity import DefaultAzureCredential
|
151
|
-
|
152
|
-
class CacheableDefaultAzureCredential(DefaultAzureCredential):
|
153
|
-
def __init__(self, *args, **kwargs):
|
154
|
-
super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
|
155
|
-
# Just hashing all the kwargs works because they are all individually
|
156
|
-
# hashable as of 7/15/2022.
|
157
|
-
#
|
158
|
-
# What if Azure adds unhashable things to kwargs?
|
159
|
-
# - We will have CI to catch this (it will always install the latest Azure SDKs)
|
160
|
-
# - In Metaflow usage today we never specify any kwargs anyway. (see last line
|
161
|
-
# of the outer function.
|
162
|
-
self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
|
163
|
-
|
164
|
-
def __hash__(self):
|
165
|
-
return self._hash_code
|
166
|
-
|
167
|
-
def __eq__(self, other):
|
168
|
-
return hash(self) == hash(other)
|
169
|
-
|
170
|
-
return CacheableDefaultAzureCredential(*args, **kwargs)
|
171
|
-
|
172
|
-
|
173
142
|
@check_azure_deps
|
174
143
|
def create_static_token_credential(token_):
|
175
144
|
from azure.core.credentials import TokenCredential
|
@@ -200,9 +169,7 @@ def create_static_token_credential(token_):
|
|
200
169
|
def get_token(self, *_scopes, **_kwargs):
|
201
170
|
|
202
171
|
if (self._cached_token.expires_on - time.time()) < 300:
|
203
|
-
|
204
|
-
|
205
|
-
self._credential = DefaultAzureCredential()
|
172
|
+
self._credential = create_cacheable_azure_credential()
|
206
173
|
if self._credential:
|
207
174
|
return self._credential.get_token(*_scopes, **_kwargs)
|
208
175
|
return self._cached_token
|
@@ -1,9 +1,11 @@
|
|
1
1
|
from metaflow.exception import MetaflowException
|
2
2
|
from metaflow.metaflow_config import AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
|
3
3
|
from metaflow.plugins.azure.azure_utils import (
|
4
|
-
create_cacheable_default_azure_credentials,
|
5
4
|
check_azure_deps,
|
6
5
|
)
|
6
|
+
from metaflow.plugins.azure.azure_credential import (
|
7
|
+
create_cacheable_azure_credential,
|
8
|
+
)
|
7
9
|
|
8
10
|
import os
|
9
11
|
import threading
|
@@ -125,7 +127,7 @@ def get_azure_blob_service_client(
|
|
125
127
|
blob_service_endpoint = AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
|
126
128
|
|
127
129
|
if not credential:
|
128
|
-
credential =
|
130
|
+
credential = create_cacheable_azure_credential()
|
129
131
|
credential_is_cacheable = True
|
130
132
|
|
131
133
|
if not credential_is_cacheable:
|
@@ -32,6 +32,8 @@ from metaflow.plugins.storage_executor import (
|
|
32
32
|
handle_executor_exceptions,
|
33
33
|
)
|
34
34
|
|
35
|
+
from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
|
36
|
+
|
35
37
|
AZURE_STORAGE_DOWNLOAD_MAX_CONCURRENCY = 4
|
36
38
|
AZURE_STORAGE_UPLOAD_MAX_CONCURRENCY = 16
|
37
39
|
|
@@ -272,12 +274,10 @@ class AzureStorage(DataStoreStorage):
|
|
272
274
|
if not self._default_scope_token or (
|
273
275
|
self._default_scope_token.expires_on - time.time() < 300
|
274
276
|
):
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
AZURE_STORAGE_DEFAULT_SCOPE
|
280
|
-
)
|
277
|
+
credential = create_cacheable_azure_credential()
|
278
|
+
self._default_scope_token = credential.get_token(
|
279
|
+
AZURE_STORAGE_DEFAULT_SCOPE
|
280
|
+
)
|
281
281
|
return self._default_scope_token
|
282
282
|
|
283
283
|
@property
|
@@ -21,7 +21,6 @@ from metaflow.metaflow_config import (
|
|
21
21
|
TEMPDIR,
|
22
22
|
)
|
23
23
|
from metaflow.util import (
|
24
|
-
namedtuple_with_defaults,
|
25
24
|
is_stringish,
|
26
25
|
to_bytes,
|
27
26
|
to_unicode,
|
@@ -29,6 +28,7 @@ from metaflow.util import (
|
|
29
28
|
url_quote,
|
30
29
|
url_unquote,
|
31
30
|
)
|
31
|
+
from metaflow.tuple_util import namedtuple_with_defaults
|
32
32
|
from metaflow.exception import MetaflowException
|
33
33
|
from metaflow.debug import debug
|
34
34
|
import metaflow.tracing as tracing
|
@@ -1245,12 +1245,12 @@ class S3(object):
|
|
1245
1245
|
|
1246
1246
|
def _store():
|
1247
1247
|
for key_obj in key_objs:
|
1248
|
-
if isinstance(key_obj,
|
1249
|
-
key = key_obj[0]
|
1250
|
-
obj = key_obj[1]
|
1251
|
-
else:
|
1248
|
+
if isinstance(key_obj, S3PutObject):
|
1252
1249
|
key = key_obj.key
|
1253
1250
|
obj = key_obj.value
|
1251
|
+
else:
|
1252
|
+
key = key_obj[0]
|
1253
|
+
obj = key_obj[1]
|
1254
1254
|
store_info = {
|
1255
1255
|
"key": key,
|
1256
1256
|
"content_type": getattr(key_obj, "content_type", None),
|
@@ -1319,12 +1319,12 @@ class S3(object):
|
|
1319
1319
|
|
1320
1320
|
def _check():
|
1321
1321
|
for key_path in key_paths:
|
1322
|
-
if isinstance(key_path,
|
1323
|
-
key = key_path[0]
|
1324
|
-
path = key_path[1]
|
1325
|
-
else:
|
1322
|
+
if isinstance(key_path, S3PutObject):
|
1326
1323
|
key = key_path.key
|
1327
1324
|
path = key_path.path
|
1325
|
+
else:
|
1326
|
+
key = key_path[0]
|
1327
|
+
path = key_path[1]
|
1328
1328
|
store_info = {
|
1329
1329
|
"key": key,
|
1330
1330
|
"content_type": getattr(key_path, "content_type", None),
|
metaflow/plugins/gcp/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
from .gs_storage_client_factory import get_credentials
|
@@ -0,0 +1,169 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
from json import JSONDecodeError
|
4
|
+
|
5
|
+
|
6
|
+
from metaflow.exception import MetaflowException
|
7
|
+
from metaflow.plugins.secrets import SecretsProvider
|
8
|
+
import re
|
9
|
+
from metaflow.plugins.gcp.gs_storage_client_factory import get_credentials
|
10
|
+
from metaflow.metaflow_config import GCP_SECRET_MANAGER_PREFIX
|
11
|
+
|
12
|
+
|
13
|
+
class MetaflowGcpSecretsManagerBadResponse(MetaflowException):
|
14
|
+
"""Raised when the response from GCP Secrets Manager is not valid in some way"""
|
15
|
+
|
16
|
+
|
17
|
+
class MetaflowGcpSecretsManagerDuplicateKey(MetaflowException):
|
18
|
+
"""Raised when the response from GCP Secrets Manager contains duplicate keys"""
|
19
|
+
|
20
|
+
|
21
|
+
class MetaflowGcpSecretsManagerJSONParseError(MetaflowException):
|
22
|
+
"""Raised when the SecretString response from GCP Secrets Manager is not valid JSON"""
|
23
|
+
|
24
|
+
|
25
|
+
class MetaflowGcpSecretsManagerNotJSONObject(MetaflowException):
|
26
|
+
"""Raised when the SecretString response from GCP Secrets Manager is not valid JSON dictionary"""
|
27
|
+
|
28
|
+
|
29
|
+
def _sanitize_key_as_env_var(key):
|
30
|
+
"""
|
31
|
+
Sanitize a key as an environment variable name.
|
32
|
+
This is purely a convenience trade-off to cover common cases well, vs. introducing
|
33
|
+
ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
|
34
|
+
|
35
|
+
1/27/2023(jackie):
|
36
|
+
|
37
|
+
We start with few rules and should *sparingly* add more over time.
|
38
|
+
Also, it's TBD whether all possible providers will share the same sanitization logic.
|
39
|
+
Therefore we will keep this function private for now
|
40
|
+
"""
|
41
|
+
return key.replace("-", "_").replace(".", "_").replace("/", "_")
|
42
|
+
|
43
|
+
|
44
|
+
class GcpSecretManagerSecretsProvider(SecretsProvider):
|
45
|
+
TYPE = "gcp-secret-manager"
|
46
|
+
|
47
|
+
def get_secret_as_dict(self, secret_id, options={}, role=None):
|
48
|
+
"""
|
49
|
+
Reads a secret from GCP Secrets Manager and returns it as a dictionary of environment variables.
|
50
|
+
|
51
|
+
If the secret contains a string payload ("SecretString"):
|
52
|
+
- if the `json` option is True:
|
53
|
+
Secret will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
54
|
+
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
55
|
+
always be casted to a string (if not already a string).
|
56
|
+
- If `json` option is False (default):
|
57
|
+
Will be returned as a single entry in the result, with the key being the last part after / in secret_id.
|
58
|
+
|
59
|
+
On GCP Secrets Manager, the secret payload is a binary blob. However, by default we interpret it as UTF8 encoded
|
60
|
+
string. To disable this, set the `binary` option to True, the binary will be base64 encoded in the result.
|
61
|
+
|
62
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best effort
|
63
|
+
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
64
|
+
|
65
|
+
:param secret_id: GCP Secrets Manager secret ID
|
66
|
+
:param options: unused
|
67
|
+
:return: dict of environment variables. All keys and values are strings.
|
68
|
+
"""
|
69
|
+
from google.cloud.secretmanager_v1.services.secret_manager_service import (
|
70
|
+
SecretManagerServiceClient,
|
71
|
+
)
|
72
|
+
from google.cloud.secretmanager_v1.services.secret_manager_service.transports import (
|
73
|
+
SecretManagerServiceTransport,
|
74
|
+
)
|
75
|
+
|
76
|
+
# Full secret id looks like projects/1234567890/secrets/mysecret/versions/latest
|
77
|
+
#
|
78
|
+
# We allow these forms of secret_id:
|
79
|
+
#
|
80
|
+
# 1. Full path like projects/1234567890/secrets/mysecret/versions/latest
|
81
|
+
# This is what you'd specify if you used to GCP SDK.
|
82
|
+
#
|
83
|
+
# 2. Full path but without the version like projects/1234567890/secrets/mysecret.
|
84
|
+
# This is what you see in the GCP console, makes it easier to copy & paste.
|
85
|
+
#
|
86
|
+
# 3. Simple string like mysecret
|
87
|
+
#
|
88
|
+
# 4. Simple string with /versions/<version> suffix like mysecret/versions/1
|
89
|
+
|
90
|
+
# The latter two forms require METAFLOW_GCP_SECRET_MANAGER_PREFIX to be set.
|
91
|
+
|
92
|
+
match_full = re.match(
|
93
|
+
r"^projects/\d+/secrets/([\w\-]+)(/versions/([\w\-]+))?$", secret_id
|
94
|
+
)
|
95
|
+
match_partial = re.match(r"^([\w\-]+)(/versions/[\w\-]+)?$", secret_id)
|
96
|
+
if match_full:
|
97
|
+
# Full path
|
98
|
+
env_var_name = match_full.group(1)
|
99
|
+
if match_full.group(3):
|
100
|
+
# With version specified
|
101
|
+
full_secret_name = secret_id
|
102
|
+
else:
|
103
|
+
# No version specified, use latest
|
104
|
+
full_secret_name = secret_id + "/versions/latest"
|
105
|
+
elif match_partial:
|
106
|
+
# Partial path, possibly with /versions/<version> suffix
|
107
|
+
env_var_name = secret_id
|
108
|
+
if not GCP_SECRET_MANAGER_PREFIX:
|
109
|
+
raise ValueError(
|
110
|
+
"Cannot use simple secret_id without setting METAFLOW_GCP_SECRET_MANAGER_PREFIX. %s"
|
111
|
+
% GCP_SECRET_MANAGER_PREFIX
|
112
|
+
)
|
113
|
+
if match_partial.group(2):
|
114
|
+
# With version specified
|
115
|
+
full_secret_name = "%s%s" % (GCP_SECRET_MANAGER_PREFIX, secret_id)
|
116
|
+
env_var_name = match_partial.group(1)
|
117
|
+
else:
|
118
|
+
# No version specified, use latest
|
119
|
+
full_secret_name = "%s%s/versions/latest" % (
|
120
|
+
GCP_SECRET_MANAGER_PREFIX,
|
121
|
+
secret_id,
|
122
|
+
)
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
"Invalid secret_id: %s. Must be either a full path or a simple string."
|
126
|
+
% secret_id
|
127
|
+
)
|
128
|
+
|
129
|
+
result = {}
|
130
|
+
|
131
|
+
def _sanitize_and_add_entry_to_result(k, v):
|
132
|
+
# Two jobs - sanitize, and check for dupes
|
133
|
+
sanitized_k = _sanitize_key_as_env_var(k)
|
134
|
+
if sanitized_k in result:
|
135
|
+
raise MetaflowGcpSecretsManagerDuplicateKey(
|
136
|
+
"Duplicate key in secret: '%s' (sanitizes to '%s')"
|
137
|
+
% (k, sanitized_k)
|
138
|
+
)
|
139
|
+
result[sanitized_k] = v
|
140
|
+
|
141
|
+
credentials, _ = get_credentials(
|
142
|
+
scopes=SecretManagerServiceTransport.AUTH_SCOPES
|
143
|
+
)
|
144
|
+
client = SecretManagerServiceClient(credentials=credentials)
|
145
|
+
response = client.access_secret_version(request={"name": full_secret_name})
|
146
|
+
payload_str = response.payload.data.decode("UTF-8")
|
147
|
+
if options.get("json", False):
|
148
|
+
obj = json.loads(payload_str)
|
149
|
+
if type(obj) == dict:
|
150
|
+
for k, v in obj.items():
|
151
|
+
# We try to make it work here - cast to string always
|
152
|
+
_sanitize_and_add_entry_to_result(k, str(v))
|
153
|
+
else:
|
154
|
+
raise MetaflowGcpSecretsManagerNotJSONObject(
|
155
|
+
"Secret string is a JSON, but not an object (dict-like) - actual type %s."
|
156
|
+
% type(obj)
|
157
|
+
)
|
158
|
+
else:
|
159
|
+
if options.get("env_var_name"):
|
160
|
+
env_var_name = options["env_var_name"]
|
161
|
+
|
162
|
+
if options.get("binary", False):
|
163
|
+
_sanitize_and_add_entry_to_result(
|
164
|
+
env_var_name, base64.b64encode(response.payload.data)
|
165
|
+
)
|
166
|
+
else:
|
167
|
+
_sanitize_and_add_entry_to_result(env_var_name, payload_str)
|
168
|
+
|
169
|
+
return result
|
@@ -8,7 +8,7 @@ def _get_cache_key():
|
|
8
8
|
return os.getpid(), threading.get_ident()
|
9
9
|
|
10
10
|
|
11
|
-
def
|
11
|
+
def _get_gs_storage_client_default():
|
12
12
|
cache_key = _get_cache_key()
|
13
13
|
if cache_key not in _client_cache:
|
14
14
|
from google.cloud import storage
|
@@ -19,3 +19,54 @@ def get_gs_storage_client():
|
|
19
19
|
credentials=credentials, project=project_id
|
20
20
|
)
|
21
21
|
return _client_cache[cache_key]
|
22
|
+
|
23
|
+
|
24
|
+
class GcpDefaultClientProvider(object):
|
25
|
+
name = "gcp-default"
|
26
|
+
|
27
|
+
@staticmethod
|
28
|
+
def get_gs_storage_client(*args, **kwargs):
|
29
|
+
return _get_gs_storage_client_default()
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def get_credentials(scopes, *args, **kwargs):
|
33
|
+
import google.auth
|
34
|
+
|
35
|
+
return google.auth.default(scopes=scopes)
|
36
|
+
|
37
|
+
|
38
|
+
cached_provider_class = None
|
39
|
+
|
40
|
+
|
41
|
+
def get_gs_storage_client():
|
42
|
+
global cached_provider_class
|
43
|
+
if cached_provider_class is None:
|
44
|
+
from metaflow.metaflow_config import DEFAULT_GCP_CLIENT_PROVIDER
|
45
|
+
from metaflow.plugins import GCP_CLIENT_PROVIDERS
|
46
|
+
|
47
|
+
for p in GCP_CLIENT_PROVIDERS:
|
48
|
+
if p.name == DEFAULT_GCP_CLIENT_PROVIDER:
|
49
|
+
cached_provider_class = p
|
50
|
+
break
|
51
|
+
else:
|
52
|
+
raise ValueError(
|
53
|
+
"Cannot find GCP Client provider %s" % DEFAULT_GCP_CLIENT_PROVIDER
|
54
|
+
)
|
55
|
+
return cached_provider_class.get_gs_storage_client()
|
56
|
+
|
57
|
+
|
58
|
+
def get_credentials(scopes, *args, **kwargs):
|
59
|
+
global cached_provider_class
|
60
|
+
if cached_provider_class is None:
|
61
|
+
from metaflow.metaflow_config import DEFAULT_GCP_CLIENT_PROVIDER
|
62
|
+
from metaflow.plugins import GCP_CLIENT_PROVIDERS
|
63
|
+
|
64
|
+
for p in GCP_CLIENT_PROVIDERS:
|
65
|
+
if p.name == DEFAULT_GCP_CLIENT_PROVIDER:
|
66
|
+
cached_provider_class = p
|
67
|
+
break
|
68
|
+
else:
|
69
|
+
raise ValueError(
|
70
|
+
"Cannot find GCP Client provider %s" % DEFAULT_GCP_CLIENT_PROVIDER
|
71
|
+
)
|
72
|
+
return cached_provider_class.get_credentials(scopes, *args, **kwargs)
|