ob-metaflow 2.11.4.8__py2.py3-none-any.whl → 2.11.8.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/cli.py +15 -10
- metaflow/clone_util.py +71 -0
- metaflow/cmd/develop/stub_generator.py +2 -0
- metaflow/cmd/develop/stubs.py +17 -8
- metaflow/metaflow_config.py +14 -1
- metaflow/metaflow_environment.py +1 -1
- metaflow/package.py +4 -3
- metaflow/parameters.py +2 -2
- metaflow/plugins/__init__.py +4 -0
- metaflow/plugins/airflow/airflow.py +3 -0
- metaflow/plugins/argo/argo_workflows.py +3 -1
- metaflow/plugins/aws/batch/batch.py +12 -0
- metaflow/plugins/aws/batch/batch_cli.py +25 -0
- metaflow/plugins/aws/batch/batch_client.py +40 -0
- metaflow/plugins/aws/batch/batch_decorator.py +32 -1
- metaflow/plugins/aws/step_functions/step_functions.py +3 -0
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/datatools/s3/s3op.py +4 -3
- metaflow/plugins/env_escape/client.py +154 -27
- metaflow/plugins/env_escape/client_modules.py +15 -47
- metaflow/plugins/env_escape/configurations/emulate_test_lib/overrides.py +31 -42
- metaflow/plugins/env_escape/configurations/emulate_test_lib/server_mappings.py +8 -3
- metaflow/plugins/env_escape/configurations/test_lib_impl/test_lib.py +74 -22
- metaflow/plugins/env_escape/consts.py +1 -0
- metaflow/plugins/env_escape/exception_transferer.py +46 -112
- metaflow/plugins/env_escape/override_decorators.py +8 -8
- metaflow/plugins/env_escape/server.py +42 -5
- metaflow/plugins/env_escape/stub.py +168 -23
- metaflow/plugins/env_escape/utils.py +3 -3
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +164 -0
- metaflow/plugins/kubernetes/kubernetes.py +8 -0
- metaflow/plugins/pypi/conda_environment.py +9 -0
- metaflow/plugins/pypi/pip.py +17 -2
- metaflow/runtime.py +252 -61
- metaflow/sidecar/sidecar.py +11 -1
- metaflow/sidecar/sidecar_subprocess.py +34 -18
- metaflow/task.py +28 -54
- metaflow/version.py +1 -1
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/RECORD +45 -43
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.11.4.8.dist-info → ob_metaflow-2.11.8.1.dist-info}/top_level.txt +0 -0
|
@@ -13,12 +13,12 @@ def get_methods(class_object):
|
|
|
13
13
|
for base_class in mros:
|
|
14
14
|
all_attributes.update(base_class.__dict__)
|
|
15
15
|
for name, attribute in all_attributes.items():
|
|
16
|
-
if
|
|
17
|
-
all_methods[name] = inspect.getdoc(attribute)
|
|
18
|
-
elif isinstance(attribute, staticmethod):
|
|
16
|
+
if isinstance(attribute, staticmethod):
|
|
19
17
|
all_methods["___s___%s" % name] = inspect.getdoc(attribute)
|
|
20
18
|
elif isinstance(attribute, classmethod):
|
|
21
19
|
all_methods["___c___%s" % name] = inspect.getdoc(attribute)
|
|
20
|
+
elif hasattr(attribute, "__call__"):
|
|
21
|
+
all_methods[name] = inspect.getdoc(attribute)
|
|
22
22
|
return all_methods
|
|
23
23
|
|
|
24
24
|
|
metaflow/plugins/gcp/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .gs_storage_client_factory import get_credentials
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
from json import JSONDecodeError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from metaflow.exception import MetaflowException
|
|
7
|
+
from metaflow.plugins.secrets import SecretsProvider
|
|
8
|
+
import re
|
|
9
|
+
from metaflow.plugins.gcp.gs_storage_client_factory import get_credentials
|
|
10
|
+
from metaflow.metaflow_config import GCP_SECRET_MANAGER_PREFIX
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MetaflowGcpSecretsManagerBadResponse(MetaflowException):
|
|
14
|
+
"""Raised when the response from GCP Secrets Manager is not valid in some way"""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MetaflowGcpSecretsManagerDuplicateKey(MetaflowException):
|
|
18
|
+
"""Raised when the response from GCP Secrets Manager contains duplicate keys"""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MetaflowGcpSecretsManagerJSONParseError(MetaflowException):
|
|
22
|
+
"""Raised when the SecretString response from GCP Secrets Manager is not valid JSON"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MetaflowGcpSecretsManagerNotJSONObject(MetaflowException):
|
|
26
|
+
"""Raised when the SecretString response from GCP Secrets Manager is not valid JSON dictionary"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _sanitize_key_as_env_var(key):
|
|
30
|
+
"""
|
|
31
|
+
Sanitize a key as an environment variable name.
|
|
32
|
+
This is purely a convenience trade-off to cover common cases well, vs. introducing
|
|
33
|
+
ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
|
|
34
|
+
|
|
35
|
+
1/27/2023(jackie):
|
|
36
|
+
|
|
37
|
+
We start with few rules and should *sparingly* add more over time.
|
|
38
|
+
Also, it's TBD whether all possible providers will share the same sanitization logic.
|
|
39
|
+
Therefore we will keep this function private for now
|
|
40
|
+
"""
|
|
41
|
+
return key.replace("-", "_").replace(".", "_").replace("/", "_")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GcpSecretManagerSecretsProvider(SecretsProvider):
|
|
45
|
+
TYPE = "gcp-secret-manager"
|
|
46
|
+
|
|
47
|
+
def get_secret_as_dict(self, secret_id, options={}, role=None):
|
|
48
|
+
"""
|
|
49
|
+
Reads a secret from GCP Secrets Manager and returns it as a dictionary of environment variables.
|
|
50
|
+
|
|
51
|
+
If the secret contains a string payload ("SecretString"):
|
|
52
|
+
- if the `json` option is True:
|
|
53
|
+
Secret will be parsed as a JSON. If successfully parsed, AND the JSON contains a
|
|
54
|
+
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
|
|
55
|
+
always be casted to a string (if not already a string).
|
|
56
|
+
- If `json` option is False (default):
|
|
57
|
+
Will be returned as a single entry in the result, with the key being the last part after / in secret_id.
|
|
58
|
+
|
|
59
|
+
On GCP Secrets Manager, the secret payload is a binary blob. However, by default we interpret it as UTF8 encoded
|
|
60
|
+
string. To disable this, set the `binary` option to True, the binary will be base64 encoded in the result.
|
|
61
|
+
|
|
62
|
+
All keys in the result are sanitized to be more valid environment variable names. This is done on a best effort
|
|
63
|
+
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
|
|
64
|
+
|
|
65
|
+
:param secret_id: GCP Secrets Manager secret ID
|
|
66
|
+
:param options: unused
|
|
67
|
+
:return: dict of environment variables. All keys and values are strings.
|
|
68
|
+
"""
|
|
69
|
+
from google.cloud.secretmanager_v1.services.secret_manager_service import (
|
|
70
|
+
SecretManagerServiceClient,
|
|
71
|
+
)
|
|
72
|
+
from google.cloud.secretmanager_v1.services.secret_manager_service.transports import (
|
|
73
|
+
SecretManagerServiceTransport,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Full secret id looks like projects/1234567890/secrets/mysecret/versions/latest
|
|
77
|
+
#
|
|
78
|
+
# We allow these forms of secret_id:
|
|
79
|
+
#
|
|
80
|
+
# 1. Full path like projects/1234567890/secrets/mysecret/versions/latest
|
|
81
|
+
# This is what you'd specify if you used to GCP SDK.
|
|
82
|
+
#
|
|
83
|
+
# 2. Full path but without the version like projects/1234567890/secrets/mysecret.
|
|
84
|
+
# This is what you see in the GCP console, makes it easier to copy & paste.
|
|
85
|
+
#
|
|
86
|
+
# 3. Simple string like mysecret
|
|
87
|
+
#
|
|
88
|
+
# 4. Simple string with /versions/<version> suffix like mysecret/versions/1
|
|
89
|
+
|
|
90
|
+
# The latter two forms require METAFLOW_GCP_SECRET_MANAGER_PREFIX to be set.
|
|
91
|
+
|
|
92
|
+
match_full = re.match(r"^projects/\d+/secrets/([\w\-]+)(/versions/([\w\-]+))?$", secret_id)
|
|
93
|
+
match_partial = re.match(r"^([\w\-]+)(/versions/[\w\-]+)?$", secret_id)
|
|
94
|
+
if match_full:
|
|
95
|
+
# Full path
|
|
96
|
+
env_var_name = match_full.group(1)
|
|
97
|
+
if match_full.group(3):
|
|
98
|
+
# With version specified
|
|
99
|
+
full_secret_name = secret_id
|
|
100
|
+
else:
|
|
101
|
+
# No version specified, use latest
|
|
102
|
+
full_secret_name = secret_id + "/versions/latest"
|
|
103
|
+
elif match_partial:
|
|
104
|
+
# Partial path, possibly with /versions/<version> suffix
|
|
105
|
+
env_var_name = secret_id
|
|
106
|
+
if not GCP_SECRET_MANAGER_PREFIX:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"Cannot use simple secret_id without setting METAFLOW_GCP_SECRET_MANAGER_PREFIX. {GCP_SECRET_MANAGER_PREFIX}"
|
|
109
|
+
)
|
|
110
|
+
if match_partial.group(2):
|
|
111
|
+
# With version specified
|
|
112
|
+
full_secret_name = f"{GCP_SECRET_MANAGER_PREFIX}{secret_id}"
|
|
113
|
+
env_var_name = match_partial.group(1)
|
|
114
|
+
else:
|
|
115
|
+
# No version specified, use latest
|
|
116
|
+
full_secret_name = (
|
|
117
|
+
f"{GCP_SECRET_MANAGER_PREFIX}{secret_id}/versions/latest"
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Invalid secret_id: {secret_id}. Must be either a full path or a simple string."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
result = {}
|
|
125
|
+
|
|
126
|
+
def _sanitize_and_add_entry_to_result(k, v):
|
|
127
|
+
# Two jobs - sanitize, and check for dupes
|
|
128
|
+
sanitized_k = _sanitize_key_as_env_var(k)
|
|
129
|
+
if sanitized_k in result:
|
|
130
|
+
raise MetaflowGcpSecretsManagerDuplicateKey(
|
|
131
|
+
"Duplicate key in secret: '%s' (sanitizes to '%s')"
|
|
132
|
+
% (k, sanitized_k)
|
|
133
|
+
)
|
|
134
|
+
result[sanitized_k] = v
|
|
135
|
+
|
|
136
|
+
credentials, _ = get_credentials(
|
|
137
|
+
scopes=SecretManagerServiceTransport.AUTH_SCOPES
|
|
138
|
+
)
|
|
139
|
+
client = SecretManagerServiceClient(credentials=credentials)
|
|
140
|
+
response = client.access_secret_version(request={"name": full_secret_name})
|
|
141
|
+
payload_str = response.payload.data.decode("UTF-8")
|
|
142
|
+
if options.get("json", False):
|
|
143
|
+
obj = json.loads(payload_str)
|
|
144
|
+
if type(obj) == dict:
|
|
145
|
+
for k, v in obj.items():
|
|
146
|
+
# We try to make it work here - cast to string always
|
|
147
|
+
_sanitize_and_add_entry_to_result(k, str(v))
|
|
148
|
+
else:
|
|
149
|
+
raise MetaflowGcpSecretsManagerNotJSONObject(
|
|
150
|
+
"Secret string is a JSON, but not an object (dict-like) - actual type %s."
|
|
151
|
+
% type(obj)
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
if options.get("env_var_name"):
|
|
155
|
+
env_var_name = options["env_var_name"]
|
|
156
|
+
|
|
157
|
+
if options.get("binary", False):
|
|
158
|
+
_sanitize_and_add_entry_to_result(
|
|
159
|
+
env_var_name, base64.b64encode(response.payload.data)
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
_sanitize_and_add_entry_to_result(env_var_name, payload_str)
|
|
163
|
+
|
|
164
|
+
return result
|
|
@@ -28,8 +28,10 @@ from metaflow.metaflow_config import (
|
|
|
28
28
|
DATASTORE_SYSROOT_S3,
|
|
29
29
|
DATATOOLS_S3ROOT,
|
|
30
30
|
DEFAULT_AWS_CLIENT_PROVIDER,
|
|
31
|
+
DEFAULT_GCP_CLIENT_PROVIDER,
|
|
31
32
|
DEFAULT_METADATA,
|
|
32
33
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
|
34
|
+
GCP_SECRET_MANAGER_PREFIX,
|
|
33
35
|
KUBERNETES_FETCH_EC2_METADATA,
|
|
34
36
|
KUBERNETES_LABELS,
|
|
35
37
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
|
@@ -249,10 +251,16 @@ class Kubernetes(object):
|
|
|
249
251
|
.environment_variable(
|
|
250
252
|
"METAFLOW_DEFAULT_AWS_CLIENT_PROVIDER", DEFAULT_AWS_CLIENT_PROVIDER
|
|
251
253
|
)
|
|
254
|
+
.environment_variable(
|
|
255
|
+
"METAFLOW_DEFAULT_GCP_CLIENT_PROVIDER", DEFAULT_GCP_CLIENT_PROVIDER
|
|
256
|
+
)
|
|
252
257
|
.environment_variable(
|
|
253
258
|
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION",
|
|
254
259
|
AWS_SECRETS_MANAGER_DEFAULT_REGION,
|
|
255
260
|
)
|
|
261
|
+
.environment_variable(
|
|
262
|
+
"METAFLOW_GCP_SECRET_MANAGER_PREFIX", GCP_SECRET_MANAGER_PREFIX
|
|
263
|
+
)
|
|
256
264
|
.environment_variable("METAFLOW_S3_ENDPOINT_URL", S3_ENDPOINT_URL)
|
|
257
265
|
.environment_variable(
|
|
258
266
|
"METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT",
|
|
@@ -282,6 +282,15 @@ class CondaEnvironment(MetaflowEnvironment):
|
|
|
282
282
|
# Match PyPI and Conda python versions with the resolved environment Python.
|
|
283
283
|
environment["pypi"]["python"] = environment["conda"]["python"] = env_python
|
|
284
284
|
|
|
285
|
+
# When using `Application Default Credentials` for private GCP
|
|
286
|
+
# PyPI registries, the usage of environment variable `GOOGLE_APPLICATION_CREDENTIALS`
|
|
287
|
+
# demands that `keyrings.google-artifactregistry-auth` has to be installed
|
|
288
|
+
# and available in the underlying python environment.
|
|
289
|
+
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
|
|
290
|
+
environment["conda"]["packages"][
|
|
291
|
+
"keyrings.google-artifactregistry-auth"
|
|
292
|
+
] = ">=1.1.1"
|
|
293
|
+
|
|
285
294
|
# Z combinator for a recursive lambda
|
|
286
295
|
deep_sort = (lambda f: f(f))(
|
|
287
296
|
lambda f: lambda obj: (
|
metaflow/plugins/pypi/pip.py
CHANGED
|
@@ -92,7 +92,14 @@ class Pip(object):
|
|
|
92
92
|
# so using @branch as a version acts as expected.
|
|
93
93
|
vcs_info = dl_info.get("vcs_info")
|
|
94
94
|
if vcs_info:
|
|
95
|
-
|
|
95
|
+
subdirectory = dl_info.get("subdirectory")
|
|
96
|
+
res["url"] = "{vcs}+{url}@{commit_id}{subdir_str}".format(
|
|
97
|
+
**vcs_info,
|
|
98
|
+
**res,
|
|
99
|
+
subdir_str="#subdirectory=%s" % subdirectory
|
|
100
|
+
if subdirectory
|
|
101
|
+
else ""
|
|
102
|
+
)
|
|
96
103
|
# used to deduplicate the storage location in case wheel does not
|
|
97
104
|
# build with enough unique identifiers.
|
|
98
105
|
res["hash"] = vcs_info["commit_id"]
|
|
@@ -270,9 +277,17 @@ class Pip(object):
|
|
|
270
277
|
prefix,
|
|
271
278
|
"pip3",
|
|
272
279
|
"--disable-pip-version-check",
|
|
273
|
-
"--no-input",
|
|
274
280
|
"--no-color",
|
|
275
281
|
]
|
|
282
|
+
# credentials are being determined from the JSON file referenced by
|
|
283
|
+
# the GOOGLE_APPLICATION_CREDENTIALS environment variable and are
|
|
284
|
+
# probably injected dynamically via `keyrings.google-artifactregistry-auth`
|
|
285
|
+
# Thus, we avoid passing `--no-input` in this case.
|
|
286
|
+
+ (
|
|
287
|
+
["--no-input"]
|
|
288
|
+
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS") is None
|
|
289
|
+
else []
|
|
290
|
+
)
|
|
276
291
|
+ (["--isolated"] if isolated else [])
|
|
277
292
|
+ args,
|
|
278
293
|
stderr=subprocess.PIPE,
|