metaflow 2.11.15__py2.py3-none-any.whl → 2.12.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. metaflow/__init__.py +8 -0
  2. metaflow/_vendor/importlib_metadata/__init__.py +1063 -0
  3. metaflow/_vendor/importlib_metadata/_adapters.py +68 -0
  4. metaflow/_vendor/importlib_metadata/_collections.py +30 -0
  5. metaflow/_vendor/importlib_metadata/_compat.py +71 -0
  6. metaflow/_vendor/importlib_metadata/_functools.py +104 -0
  7. metaflow/_vendor/importlib_metadata/_itertools.py +73 -0
  8. metaflow/_vendor/importlib_metadata/_meta.py +48 -0
  9. metaflow/_vendor/importlib_metadata/_text.py +99 -0
  10. metaflow/_vendor/importlib_metadata/py.typed +0 -0
  11. metaflow/_vendor/typeguard/__init__.py +48 -0
  12. metaflow/_vendor/typeguard/_checkers.py +906 -0
  13. metaflow/_vendor/typeguard/_config.py +108 -0
  14. metaflow/_vendor/typeguard/_decorators.py +237 -0
  15. metaflow/_vendor/typeguard/_exceptions.py +42 -0
  16. metaflow/_vendor/typeguard/_functions.py +307 -0
  17. metaflow/_vendor/typeguard/_importhook.py +213 -0
  18. metaflow/_vendor/typeguard/_memo.py +48 -0
  19. metaflow/_vendor/typeguard/_pytest_plugin.py +100 -0
  20. metaflow/_vendor/typeguard/_suppression.py +88 -0
  21. metaflow/_vendor/typeguard/_transformer.py +1193 -0
  22. metaflow/_vendor/typeguard/_union_transformer.py +54 -0
  23. metaflow/_vendor/typeguard/_utils.py +169 -0
  24. metaflow/_vendor/typeguard/py.typed +0 -0
  25. metaflow/_vendor/typing_extensions.py +3053 -0
  26. metaflow/cli.py +48 -36
  27. metaflow/clone_util.py +6 -0
  28. metaflow/cmd/develop/stubs.py +2 -0
  29. metaflow/extension_support/__init__.py +2 -0
  30. metaflow/extension_support/plugins.py +2 -0
  31. metaflow/metaflow_config.py +24 -0
  32. metaflow/metaflow_environment.py +2 -2
  33. metaflow/parameters.py +1 -0
  34. metaflow/plugins/__init__.py +19 -0
  35. metaflow/plugins/airflow/airflow.py +7 -0
  36. metaflow/plugins/argo/argo_workflows.py +17 -0
  37. metaflow/plugins/aws/batch/batch_decorator.py +3 -3
  38. metaflow/plugins/azure/__init__.py +3 -0
  39. metaflow/plugins/azure/azure_credential.py +53 -0
  40. metaflow/plugins/azure/azure_exceptions.py +1 -1
  41. metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
  42. metaflow/plugins/azure/azure_utils.py +2 -35
  43. metaflow/plugins/azure/blob_service_client_factory.py +4 -2
  44. metaflow/plugins/datastores/azure_storage.py +6 -6
  45. metaflow/plugins/datatools/s3/s3.py +1 -1
  46. metaflow/plugins/gcp/__init__.py +1 -0
  47. metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
  48. metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
  49. metaflow/plugins/kubernetes/kubernetes.py +85 -8
  50. metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
  51. metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
  52. metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
  53. metaflow/plugins/kubernetes/kubernetes_job.py +208 -206
  54. metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
  55. metaflow/plugins/timeout_decorator.py +2 -1
  56. metaflow/runner/__init__.py +0 -0
  57. metaflow/runner/click_api.py +406 -0
  58. metaflow/runner/metaflow_runner.py +452 -0
  59. metaflow/runner/nbrun.py +246 -0
  60. metaflow/runner/subprocess_manager.py +552 -0
  61. metaflow/task.py +1 -12
  62. metaflow/tuple_util.py +27 -0
  63. metaflow/util.py +0 -15
  64. metaflow/vendor.py +0 -1
  65. metaflow/version.py +1 -1
  66. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/METADATA +2 -2
  67. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/RECORD +72 -39
  68. metaflow/_vendor/v3_7/__init__.py +0 -1
  69. /metaflow/_vendor/{v3_7/zipp.py → zipp.py} +0 -0
  70. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/LICENSE +0 -0
  71. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/WHEEL +0 -0
  72. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/entry_points.txt +0 -0
  73. {metaflow-2.11.15.dist-info → metaflow-2.12.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
1
+ from metaflow.plugins.secrets import SecretsProvider
2
+ import re
3
+ import base64
4
+ import codecs
5
+ from urllib.parse import urlparse
6
+ from metaflow.exception import MetaflowException
7
+ import sys
8
+ from metaflow.metaflow_config import AZURE_KEY_VAULT_PREFIX
9
+ from metaflow.plugins.azure.azure_credential import (
10
+ create_cacheable_azure_credential,
11
+ )
12
+
13
+
14
+ class MetaflowAzureKeyVaultBadVault(MetaflowException):
15
+ """Raised when the secretid is fully qualified but does not have the right key vault domain"""
16
+
17
+
18
+ class MetaflowAzureKeyVaultBadSecretType(MetaflowException):
19
+ """Raised when the secret type is anything except secrets"""
20
+
21
+
22
+ class MetaflowAzureKeyVaultBadSecretPath(MetaflowException):
23
+ """Raised when the secret path does not match to expected length"""
24
+
25
+
26
+ class MetaflowAzureKeyVaultBadSecretName(MetaflowException):
27
+ """Raised when the secret name does not match expected pattern"""
28
+
29
+
30
+ class MetaflowAzureKeyVaultBadSecretVersion(MetaflowException):
31
+ """Raised when the secret version does not match expected pattern"""
32
+
33
+
34
+ class MetaflowAzureKeyVaultBadSecret(MetaflowException):
35
+ """Raised when the secret does not match supported patterns in Metaflow"""
36
+
37
+
38
+ class AzureKeyVaultSecretsProvider(SecretsProvider):
39
+ TYPE = "az-key-vault"
40
+ key_vault_domains = [
41
+ ".vault.azure.net",
42
+ ".vault.azure.cn",
43
+ ".vault.usgovcloudapi.net",
44
+ ".vault.microsoftazure.de",
45
+ ]
46
+ supported_vault_object_types = ["secrets"]
47
+
48
+ # https://learn.microsoft.com/en-us/azure/key-vault/general/about-keys-secrets-certificates has details on vault name structure
49
+ # Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
50
+ def _is_valid_vault_name(self, vault_name):
51
+ vault_name_pattern = r"^(?!.*--)[a-zA-Z0-9-]{3,24}$"
52
+ return re.match(vault_name_pattern, vault_name) is not None
53
+
54
+ # The type of the object can be, "keys", "secrets", or "certificates".
55
+ # Currently only secrets will be supported
56
+ def _is_valid_object_type(self, secret_type):
57
+ for type in self.supported_vault_object_types:
58
+ if secret_type == type:
59
+ return True
60
+ return False
61
+
62
+ # The secret name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
63
+ def _is_valid_secret_name(self, secret_name):
64
+ secret_name_pattern = r"^[a-zA-Z][a-zA-Z0-9-]{0,126}$"
65
+ return re.match(secret_name_pattern, secret_name) is not None
66
+
67
+ # An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
68
+ def _is_valid_object_version(self, secret_version):
69
+ object_version_pattern = r"^[a-zA-Z0-9]{32}$"
70
+ return re.match(object_version_pattern, secret_version) is not None
71
+
72
+ # This function will check if the secret_id is fully qualified url. It will return True iff the secret_id is of the form:
73
+ # https://myvault.vault.azure.net/secrets/mysecret/ec96f02080254f109c51a1f14cdb1931 OR
74
+ # https://myvault.vault.azure.net/secrets/mysecret/
75
+ # validating the above as per recommendations in https://devblogs.microsoft.com/azure-sdk/guidance-for-applications-using-the-key-vault-libraries/
76
+ def _is_secret_id_fully_qualified_url(self, secret_id):
77
+ # if the secret_id is None/empty/does not start with https then return false
78
+ if secret_id is None or secret_id == "" or not secret_id.startswith("https://"):
79
+ return False
80
+ try:
81
+ parsed_vault_url = urlparse(secret_id)
82
+ except ValueError:
83
+ print("invalid vault url", file=sys.stderr)
84
+ return False
85
+ hostname = parsed_vault_url.netloc
86
+
87
+ k_v_domain_found = False
88
+ actual_k_v_domain = ""
89
+ for k_v_domain in self.key_vault_domains:
90
+ if k_v_domain in hostname:
91
+ k_v_domain_found = True
92
+ actual_k_v_domain = k_v_domain
93
+ break
94
+ if not k_v_domain_found:
95
+ # the secret_id started with https:// however the key_vault_domains
96
+ # were not present in the secret_id which means
97
+ raise MetaflowAzureKeyVaultBadVault("bad key vault domain %s" % secret_id)
98
+
99
+ # given the secret_id seems to have a valid key vault domain
100
+ # lets verify that the vault name corresponds to its regex.
101
+ vault_name = hostname[: -len(actual_k_v_domain)]
102
+ # verify the vault name pattern
103
+ if not self._is_valid_vault_name(vault_name):
104
+ raise MetaflowAzureKeyVaultBadVault("bad key vault name %s" % vault_name)
105
+
106
+ path_parts = parsed_vault_url.path.strip("/").split("/")
107
+ total_path_parts = len(path_parts)
108
+ if total_path_parts < 2 or total_path_parts > 3:
109
+ raise MetaflowAzureKeyVaultBadSecretPath(
110
+ "bad secret uri path %s" % path_parts
111
+ )
112
+
113
+ object_type = path_parts[0]
114
+ if not self._is_valid_object_type(object_type):
115
+ raise MetaflowAzureKeyVaultBadSecretType("bad secret type %s" % object_type)
116
+
117
+ secret_name = path_parts[1]
118
+ if not self._is_valid_secret_name(secret_name=secret_name):
119
+ raise MetaflowAzureKeyVaultBadSecretName("bad secret name %s" % secret_name)
120
+
121
+ if total_path_parts == 3:
122
+ if not self._is_valid_object_version(path_parts[2]):
123
+ raise MetaflowAzureKeyVaultBadSecretVersion(
124
+ "bad secret version %s" % path_parts[2]
125
+ )
126
+
127
+ return True
128
+
129
+ # This function will validate the correctness of the partial secret id.
130
+ # It will attempt to construct the fully qualified secret URL internally and
131
+ # call the _is_secret_id_fully_qualified_url to check validity
132
+ def _is_partial_secret_valid(self, secret_id):
133
+ secret_parts = secret_id.strip("/").split("/")
134
+ total_secret_parts = len(secret_parts)
135
+ if total_secret_parts < 1 or total_secret_parts > 2:
136
+ return False
137
+
138
+ # since the secret_id is supposedly a partial id, the AZURE_KEY_VAULT_PREFIX
139
+ # must be set.
140
+ if not AZURE_KEY_VAULT_PREFIX:
141
+ raise ValueError(
142
+ "cannot use simple secret id without setting METAFLOW_AZURE_KEY_VAULT_PREFIX. %s"
143
+ % AZURE_KEY_VAULT_PREFIX
144
+ )
145
+ domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
146
+ full_secret = "%s/secrets/%s" % (domain, secret_id)
147
+ if not self._is_secret_id_fully_qualified_url(full_secret):
148
+ return False
149
+
150
+ return True
151
+
152
+ def _sanitize_key_as_env_var(self, key):
153
+ """
154
+ Sanitize a key as an environment variable name.
155
+ This is purely a convenience trade-off to cover common cases well, vs. introducing
156
+ ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
157
+
158
+ 1/27/2023(jackie):
159
+
160
+ We start with few rules and should *sparingly* add more over time.
161
+ Also, it's TBD whether all possible providers will share the same sanitization logic.
162
+ Therefore we will keep this function private for now
163
+ """
164
+ return key.replace("-", "_").replace(".", "_").replace("/", "_")
165
+
166
+ def get_secret_as_dict(self, secret_id, options={}, role=None):
167
+ # https://learn.microsoft.com/en-us/azure/app-service/app-service-key-vault-references?tabs=azure-cli has a lot of details on
168
+ # the patterns used in key vault
169
+ # Vault names and Managed HSM pool names are selected by the user and are globally unique.
170
+ # Vault name and Managed HSM pool name must be a 3-24 character string, containing only 0-9, a-z, A-Z, and not consecutive -.
171
+ # object-type The type of the object. As of 05/08/24 only "secrets", are supported
172
+ # object-name An object-name is a user provided name for and must be unique within a key vault. The name must be a 1-127 character string, starting with a letter and containing only 0-9, a-z, A-Z, and -.
173
+ # object-version An object-version is a system-generated, 32 character string identifier that is optionally used to address a unique version of an object.
174
+
175
+ # We allow these forms of secret_id:
176
+ #
177
+ # 1. Full path like https://<key-vault-name><.vault-domain>/secrets/<secret-name>/<secret-version>. This is what you
178
+ # see in Azure portal and is easy to copy paste.
179
+ #
180
+ # 2. Full path but without the version like https://<key-vault-name><.vault-domain>/secrets/<secret-name>
181
+ #
182
+ # 3. Simple string like mysecret. This corresponds to the SecretName.
183
+ #
184
+ # 4. Simple string with <secret-name>/<secret-version> suffix like mysecret/123
185
+
186
+ # The latter two forms require METAFLOW_AZURE_KEY_VAULT_PREFIX to be set.
187
+
188
+ # if the secret_id is None/empty/does not start with https then return false
189
+ if secret_id is None or secret_id == "":
190
+ raise MetaflowAzureKeyVaultBadSecret("empty secret id is not supported")
191
+
192
+ # check if the passed in secret is a short-form ( #3/#4 in the above comment)
193
+ if not secret_id.startswith("https://"):
194
+ # check if the secret_id is of form `secret_name` OR `secret_name/secret_version`
195
+ if not self._is_partial_secret_valid(secret_id=secret_id):
196
+ raise MetaflowAzureKeyVaultBadSecret(
197
+ "unsupported partial secret %s" % secret_id
198
+ )
199
+
200
+ domain = AZURE_KEY_VAULT_PREFIX.rstrip("/")
201
+ full_secret = "%s/secrets/%s" % (domain, secret_id)
202
+
203
+ # if the secret id is passed as a URL - then check if the url is fully qualified
204
+ if secret_id.startswith("https://"):
205
+ if not self._is_secret_id_fully_qualified_url(secret_id=secret_id):
206
+ raise MetaflowException("unsupported secret %s" % secret_id)
207
+ full_secret = secret_id
208
+
209
+ # at this point I know that the secret URL is good so we can start creating the Secret Client
210
+ az_credentials = create_cacheable_azure_credential()
211
+ res = urlparse(full_secret)
212
+ az_vault_url = "%s://%s" % (
213
+ res.scheme,
214
+ res.netloc,
215
+ ) # https://myvault.vault.azure.net
216
+ secret_data = res.path.strip("/").split("/")[1:]
217
+ secret_name = secret_data[0]
218
+ secret_version = None
219
+ if len(secret_data) > 1:
220
+ secret_version = secret_data[1]
221
+
222
+ from azure.keyvault.secrets import SecretClient
223
+
224
+ client = SecretClient(vault_url=az_vault_url, credential=az_credentials)
225
+
226
+ key_vault_secret_val = client.get_secret(
227
+ name=secret_name, version=secret_version
228
+ )
229
+
230
+ result = {}
231
+
232
+ if options.get("env_var_name") is not None:
233
+ env_var_name = options["env_var_name"]
234
+ sanitized_key = self._sanitize_key_as_env_var(env_var_name)
235
+ else:
236
+ sanitized_key = self._sanitize_key_as_env_var(key_vault_secret_val.name)
237
+
238
+ response_payload = key_vault_secret_val.value
239
+ result[sanitized_key] = response_payload
240
+ return result
@@ -7,6 +7,7 @@ from metaflow.plugins.azure.azure_exceptions import (
7
7
  MetaflowAzurePackageError,
8
8
  )
9
9
  from metaflow.exception import MetaflowInternalError, MetaflowException
10
+ from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
10
11
 
11
12
 
12
13
  def _check_and_init_azure_deps():
@@ -138,38 +139,6 @@ def handle_exceptions(func):
138
139
  return _inner_func
139
140
 
140
141
 
141
- @check_azure_deps
142
- def create_cacheable_default_azure_credentials(*args, **kwargs):
143
- """azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
144
- because it does not have a content based hash and equality implementations.
145
-
146
- We implement a subclass CacheableDefaultAzureCredential to add them.
147
-
148
- We need this because credentials will be part of the cache key in _ClientCache.
149
- """
150
- from azure.identity import DefaultAzureCredential
151
-
152
- class CacheableDefaultAzureCredential(DefaultAzureCredential):
153
- def __init__(self, *args, **kwargs):
154
- super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
155
- # Just hashing all the kwargs works because they are all individually
156
- # hashable as of 7/15/2022.
157
- #
158
- # What if Azure adds unhashable things to kwargs?
159
- # - We will have CI to catch this (it will always install the latest Azure SDKs)
160
- # - In Metaflow usage today we never specify any kwargs anyway. (see last line
161
- # of the outer function.
162
- self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
163
-
164
- def __hash__(self):
165
- return self._hash_code
166
-
167
- def __eq__(self, other):
168
- return hash(self) == hash(other)
169
-
170
- return CacheableDefaultAzureCredential(*args, **kwargs)
171
-
172
-
173
142
  @check_azure_deps
174
143
  def create_static_token_credential(token_):
175
144
  from azure.core.credentials import TokenCredential
@@ -200,9 +169,7 @@ def create_static_token_credential(token_):
200
169
  def get_token(self, *_scopes, **_kwargs):
201
170
 
202
171
  if (self._cached_token.expires_on - time.time()) < 300:
203
- from azure.identity import DefaultAzureCredential
204
-
205
- self._credential = DefaultAzureCredential()
172
+ self._credential = create_cacheable_azure_credential()
206
173
  if self._credential:
207
174
  return self._credential.get_token(*_scopes, **_kwargs)
208
175
  return self._cached_token
@@ -1,9 +1,11 @@
1
1
  from metaflow.exception import MetaflowException
2
2
  from metaflow.metaflow_config import AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
3
3
  from metaflow.plugins.azure.azure_utils import (
4
- create_cacheable_default_azure_credentials,
5
4
  check_azure_deps,
6
5
  )
6
+ from metaflow.plugins.azure.azure_credential import (
7
+ create_cacheable_azure_credential,
8
+ )
7
9
 
8
10
  import os
9
11
  import threading
@@ -125,7 +127,7 @@ def get_azure_blob_service_client(
125
127
  blob_service_endpoint = AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
126
128
 
127
129
  if not credential:
128
- credential = create_cacheable_default_azure_credentials()
130
+ credential = create_cacheable_azure_credential()
129
131
  credential_is_cacheable = True
130
132
 
131
133
  if not credential_is_cacheable:
@@ -32,6 +32,8 @@ from metaflow.plugins.storage_executor import (
32
32
  handle_executor_exceptions,
33
33
  )
34
34
 
35
+ from metaflow.plugins.azure.azure_credential import create_cacheable_azure_credential
36
+
35
37
  AZURE_STORAGE_DOWNLOAD_MAX_CONCURRENCY = 4
36
38
  AZURE_STORAGE_UPLOAD_MAX_CONCURRENCY = 16
37
39
 
@@ -272,12 +274,10 @@ class AzureStorage(DataStoreStorage):
272
274
  if not self._default_scope_token or (
273
275
  self._default_scope_token.expires_on - time.time() < 300
274
276
  ):
275
- from azure.identity import DefaultAzureCredential
276
-
277
- with DefaultAzureCredential() as credential:
278
- self._default_scope_token = credential.get_token(
279
- AZURE_STORAGE_DEFAULT_SCOPE
280
- )
277
+ credential = create_cacheable_azure_credential()
278
+ self._default_scope_token = credential.get_token(
279
+ AZURE_STORAGE_DEFAULT_SCOPE
280
+ )
281
281
  return self._default_scope_token
282
282
 
283
283
  @property
@@ -21,7 +21,6 @@ from metaflow.metaflow_config import (
21
21
  TEMPDIR,
22
22
  )
23
23
  from metaflow.util import (
24
- namedtuple_with_defaults,
25
24
  is_stringish,
26
25
  to_bytes,
27
26
  to_unicode,
@@ -29,6 +28,7 @@ from metaflow.util import (
29
28
  url_quote,
30
29
  url_unquote,
31
30
  )
31
+ from metaflow.tuple_util import namedtuple_with_defaults
32
32
  from metaflow.exception import MetaflowException
33
33
  from metaflow.debug import debug
34
34
  import metaflow.tracing as tracing
@@ -0,0 +1 @@
1
+ from .gs_storage_client_factory import get_credentials
@@ -0,0 +1,169 @@
1
+ import base64
2
+ import json
3
+ from json import JSONDecodeError
4
+
5
+
6
+ from metaflow.exception import MetaflowException
7
+ from metaflow.plugins.secrets import SecretsProvider
8
+ import re
9
+ from metaflow.plugins.gcp.gs_storage_client_factory import get_credentials
10
+ from metaflow.metaflow_config import GCP_SECRET_MANAGER_PREFIX
11
+
12
+
13
+ class MetaflowGcpSecretsManagerBadResponse(MetaflowException):
14
+ """Raised when the response from GCP Secrets Manager is not valid in some way"""
15
+
16
+
17
+ class MetaflowGcpSecretsManagerDuplicateKey(MetaflowException):
18
+ """Raised when the response from GCP Secrets Manager contains duplicate keys"""
19
+
20
+
21
+ class MetaflowGcpSecretsManagerJSONParseError(MetaflowException):
22
+ """Raised when the SecretString response from GCP Secrets Manager is not valid JSON"""
23
+
24
+
25
+ class MetaflowGcpSecretsManagerNotJSONObject(MetaflowException):
26
+ """Raised when the SecretString response from GCP Secrets Manager is not valid JSON dictionary"""
27
+
28
+
29
+ def _sanitize_key_as_env_var(key):
30
+ """
31
+ Sanitize a key as an environment variable name.
32
+ This is purely a convenience trade-off to cover common cases well, vs. introducing
33
+ ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
34
+
35
+ 1/27/2023(jackie):
36
+
37
+ We start with few rules and should *sparingly* add more over time.
38
+ Also, it's TBD whether all possible providers will share the same sanitization logic.
39
+ Therefore we will keep this function private for now
40
+ """
41
+ return key.replace("-", "_").replace(".", "_").replace("/", "_")
42
+
43
+
44
+ class GcpSecretManagerSecretsProvider(SecretsProvider):
45
+ TYPE = "gcp-secret-manager"
46
+
47
+ def get_secret_as_dict(self, secret_id, options={}, role=None):
48
+ """
49
+ Reads a secret from GCP Secrets Manager and returns it as a dictionary of environment variables.
50
+
51
+ If the secret contains a string payload ("SecretString"):
52
+ - if the `json` option is True:
53
+ Secret will be parsed as a JSON. If successfully parsed, AND the JSON contains a
54
+ top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
55
+ always be casted to a string (if not already a string).
56
+ - If `json` option is False (default):
57
+ Will be returned as a single entry in the result, with the key being the last part after / in secret_id.
58
+
59
+ On GCP Secrets Manager, the secret payload is a binary blob. However, by default we interpret it as UTF8 encoded
60
+ string. To disable this, set the `binary` option to True, the binary will be base64 encoded in the result.
61
+
62
+ All keys in the result are sanitized to be more valid environment variable names. This is done on a best effort
63
+ basis. Further validation is expected to be done by the invoking @secrets decorator itself.
64
+
65
+ :param secret_id: GCP Secrets Manager secret ID
66
+ :param options: unused
67
+ :return: dict of environment variables. All keys and values are strings.
68
+ """
69
+ from google.cloud.secretmanager_v1.services.secret_manager_service import (
70
+ SecretManagerServiceClient,
71
+ )
72
+ from google.cloud.secretmanager_v1.services.secret_manager_service.transports import (
73
+ SecretManagerServiceTransport,
74
+ )
75
+
76
+ # Full secret id looks like projects/1234567890/secrets/mysecret/versions/latest
77
+ #
78
+ # We allow these forms of secret_id:
79
+ #
80
+ # 1. Full path like projects/1234567890/secrets/mysecret/versions/latest
81
+ # This is what you'd specify if you used to GCP SDK.
82
+ #
83
+ # 2. Full path but without the version like projects/1234567890/secrets/mysecret.
84
+ # This is what you see in the GCP console, makes it easier to copy & paste.
85
+ #
86
+ # 3. Simple string like mysecret
87
+ #
88
+ # 4. Simple string with /versions/<version> suffix like mysecret/versions/1
89
+
90
+ # The latter two forms require METAFLOW_GCP_SECRET_MANAGER_PREFIX to be set.
91
+
92
+ match_full = re.match(
93
+ r"^projects/\d+/secrets/([\w\-]+)(/versions/([\w\-]+))?$", secret_id
94
+ )
95
+ match_partial = re.match(r"^([\w\-]+)(/versions/[\w\-]+)?$", secret_id)
96
+ if match_full:
97
+ # Full path
98
+ env_var_name = match_full.group(1)
99
+ if match_full.group(3):
100
+ # With version specified
101
+ full_secret_name = secret_id
102
+ else:
103
+ # No version specified, use latest
104
+ full_secret_name = secret_id + "/versions/latest"
105
+ elif match_partial:
106
+ # Partial path, possibly with /versions/<version> suffix
107
+ env_var_name = secret_id
108
+ if not GCP_SECRET_MANAGER_PREFIX:
109
+ raise ValueError(
110
+ "Cannot use simple secret_id without setting METAFLOW_GCP_SECRET_MANAGER_PREFIX. %s"
111
+ % GCP_SECRET_MANAGER_PREFIX
112
+ )
113
+ if match_partial.group(2):
114
+ # With version specified
115
+ full_secret_name = "%s%s" % (GCP_SECRET_MANAGER_PREFIX, secret_id)
116
+ env_var_name = match_partial.group(1)
117
+ else:
118
+ # No version specified, use latest
119
+ full_secret_name = "%s%s/versions/latest" % (
120
+ GCP_SECRET_MANAGER_PREFIX,
121
+ secret_id,
122
+ )
123
+ else:
124
+ raise ValueError(
125
+ "Invalid secret_id: %s. Must be either a full path or a simple string."
126
+ % secret_id
127
+ )
128
+
129
+ result = {}
130
+
131
+ def _sanitize_and_add_entry_to_result(k, v):
132
+ # Two jobs - sanitize, and check for dupes
133
+ sanitized_k = _sanitize_key_as_env_var(k)
134
+ if sanitized_k in result:
135
+ raise MetaflowGcpSecretsManagerDuplicateKey(
136
+ "Duplicate key in secret: '%s' (sanitizes to '%s')"
137
+ % (k, sanitized_k)
138
+ )
139
+ result[sanitized_k] = v
140
+
141
+ credentials, _ = get_credentials(
142
+ scopes=SecretManagerServiceTransport.AUTH_SCOPES
143
+ )
144
+ client = SecretManagerServiceClient(credentials=credentials)
145
+ response = client.access_secret_version(request={"name": full_secret_name})
146
+ payload_str = response.payload.data.decode("UTF-8")
147
+ if options.get("json", False):
148
+ obj = json.loads(payload_str)
149
+ if type(obj) == dict:
150
+ for k, v in obj.items():
151
+ # We try to make it work here - cast to string always
152
+ _sanitize_and_add_entry_to_result(k, str(v))
153
+ else:
154
+ raise MetaflowGcpSecretsManagerNotJSONObject(
155
+ "Secret string is a JSON, but not an object (dict-like) - actual type %s."
156
+ % type(obj)
157
+ )
158
+ else:
159
+ if options.get("env_var_name"):
160
+ env_var_name = options["env_var_name"]
161
+
162
+ if options.get("binary", False):
163
+ _sanitize_and_add_entry_to_result(
164
+ env_var_name, base64.b64encode(response.payload.data)
165
+ )
166
+ else:
167
+ _sanitize_and_add_entry_to_result(env_var_name, payload_str)
168
+
169
+ return result
@@ -8,7 +8,7 @@ def _get_cache_key():
8
8
  return os.getpid(), threading.get_ident()
9
9
 
10
10
 
11
- def get_gs_storage_client():
11
+ def _get_gs_storage_client_default():
12
12
  cache_key = _get_cache_key()
13
13
  if cache_key not in _client_cache:
14
14
  from google.cloud import storage
@@ -19,3 +19,54 @@ def get_gs_storage_client():
19
19
  credentials=credentials, project=project_id
20
20
  )
21
21
  return _client_cache[cache_key]
22
+
23
+
24
+ class GcpDefaultClientProvider(object):
25
+ name = "gcp-default"
26
+
27
+ @staticmethod
28
+ def get_gs_storage_client(*args, **kwargs):
29
+ return _get_gs_storage_client_default()
30
+
31
+ @staticmethod
32
+ def get_credentials(scopes, *args, **kwargs):
33
+ import google.auth
34
+
35
+ return google.auth.default(scopes=scopes)
36
+
37
+
38
+ cached_provider_class = None
39
+
40
+
41
+ def get_gs_storage_client():
42
+ global cached_provider_class
43
+ if cached_provider_class is None:
44
+ from metaflow.metaflow_config import DEFAULT_GCP_CLIENT_PROVIDER
45
+ from metaflow.plugins import GCP_CLIENT_PROVIDERS
46
+
47
+ for p in GCP_CLIENT_PROVIDERS:
48
+ if p.name == DEFAULT_GCP_CLIENT_PROVIDER:
49
+ cached_provider_class = p
50
+ break
51
+ else:
52
+ raise ValueError(
53
+ "Cannot find GCP Client provider %s" % DEFAULT_GCP_CLIENT_PROVIDER
54
+ )
55
+ return cached_provider_class.get_gs_storage_client()
56
+
57
+
58
+ def get_credentials(scopes, *args, **kwargs):
59
+ global cached_provider_class
60
+ if cached_provider_class is None:
61
+ from metaflow.metaflow_config import DEFAULT_GCP_CLIENT_PROVIDER
62
+ from metaflow.plugins import GCP_CLIENT_PROVIDERS
63
+
64
+ for p in GCP_CLIENT_PROVIDERS:
65
+ if p.name == DEFAULT_GCP_CLIENT_PROVIDER:
66
+ cached_provider_class = p
67
+ break
68
+ else:
69
+ raise ValueError(
70
+ "Cannot find GCP Client provider %s" % DEFAULT_GCP_CLIENT_PROVIDER
71
+ )
72
+ return cached_provider_class.get_credentials(scopes, *args, **kwargs)