metaflow 2.11.14__py2.py3-none-any.whl → 2.11.16__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +3 -0
- metaflow/cli.py +0 -120
- metaflow/clone_util.py +6 -0
- metaflow/datastore/datastore_set.py +1 -1
- metaflow/datastore/flow_datastore.py +32 -6
- metaflow/datastore/task_datastore.py +50 -0
- metaflow/extension_support/plugins.py +2 -0
- metaflow/metaflow_config.py +24 -0
- metaflow/metaflow_environment.py +2 -2
- metaflow/plugins/__init__.py +20 -0
- metaflow/plugins/airflow/airflow.py +7 -0
- metaflow/plugins/argo/argo_workflows.py +17 -0
- metaflow/plugins/aws/batch/batch_cli.py +6 -4
- metaflow/plugins/azure/__init__.py +3 -0
- metaflow/plugins/azure/azure_credential.py +53 -0
- metaflow/plugins/azure/azure_exceptions.py +1 -1
- metaflow/plugins/azure/azure_secret_manager_secrets_provider.py +240 -0
- metaflow/plugins/azure/azure_utils.py +2 -35
- metaflow/plugins/azure/blob_service_client_factory.py +4 -2
- metaflow/plugins/datastores/azure_storage.py +6 -6
- metaflow/plugins/datatools/s3/s3.py +9 -9
- metaflow/plugins/gcp/__init__.py +1 -0
- metaflow/plugins/gcp/gcp_secret_manager_secrets_provider.py +169 -0
- metaflow/plugins/gcp/gs_storage_client_factory.py +52 -1
- metaflow/plugins/kubernetes/kubernetes.py +85 -8
- metaflow/plugins/kubernetes/kubernetes_cli.py +24 -1
- metaflow/plugins/kubernetes/kubernetes_client.py +4 -1
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -4
- metaflow/plugins/kubernetes/kubernetes_job.py +208 -201
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +784 -0
- metaflow/plugins/logs_cli.py +358 -0
- metaflow/plugins/timeout_decorator.py +2 -1
- metaflow/task.py +1 -12
- metaflow/tuple_util.py +27 -0
- metaflow/util.py +0 -15
- metaflow/version.py +1 -1
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/METADATA +2 -2
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/RECORD +42 -36
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/LICENSE +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/WHEEL +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.16.dist-info}/top_level.txt +0 -0
metaflow/__init__.py
CHANGED
@@ -143,6 +143,9 @@ from .client import (
|
|
143
143
|
DataArtifact,
|
144
144
|
)
|
145
145
|
|
146
|
+
# Import data class within tuple_util but not introduce new symbols.
|
147
|
+
from . import tuple_util
|
148
|
+
|
146
149
|
__version_addl__ = []
|
147
150
|
_ext_debug("Loading top-level modules")
|
148
151
|
for m in _tl_modules:
|
metaflow/cli.py
CHANGED
@@ -287,126 +287,6 @@ def dump(obj, input_path, private=None, max_value_size=None, include=None, file=
|
|
287
287
|
echo("Artifacts written to *%s*" % file)
|
288
288
|
|
289
289
|
|
290
|
-
@cli.command(
|
291
|
-
help="Show stdout/stderr produced by a task or all tasks in a step. "
|
292
|
-
"The format for input-path is either <run_id>/<step_name> or "
|
293
|
-
"<run_id>/<step_name>/<task_id>."
|
294
|
-
)
|
295
|
-
@click.argument("input-path")
|
296
|
-
@click.option(
|
297
|
-
"--stdout/--no-stdout",
|
298
|
-
default=False,
|
299
|
-
show_default=True,
|
300
|
-
help="Show stdout of the task.",
|
301
|
-
)
|
302
|
-
@click.option(
|
303
|
-
"--stderr/--no-stderr",
|
304
|
-
default=False,
|
305
|
-
show_default=True,
|
306
|
-
help="Show stderr of the task.",
|
307
|
-
)
|
308
|
-
@click.option(
|
309
|
-
"--both/--no-both",
|
310
|
-
default=True,
|
311
|
-
show_default=True,
|
312
|
-
help="Show both stdout and stderr of the task.",
|
313
|
-
)
|
314
|
-
@click.option(
|
315
|
-
"--timestamps/--no-timestamps",
|
316
|
-
default=False,
|
317
|
-
show_default=True,
|
318
|
-
help="Show timestamps.",
|
319
|
-
)
|
320
|
-
@click.pass_obj
|
321
|
-
def logs(obj, input_path, stdout=None, stderr=None, both=None, timestamps=False):
|
322
|
-
types = set()
|
323
|
-
if stdout:
|
324
|
-
types.add("stdout")
|
325
|
-
both = False
|
326
|
-
if stderr:
|
327
|
-
types.add("stderr")
|
328
|
-
both = False
|
329
|
-
if both:
|
330
|
-
types.update(("stdout", "stderr"))
|
331
|
-
|
332
|
-
streams = list(sorted(types, reverse=True))
|
333
|
-
|
334
|
-
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
|
335
|
-
parts = input_path.split("/")
|
336
|
-
if len(parts) == 2:
|
337
|
-
run_id, step_name = parts
|
338
|
-
task_id = None
|
339
|
-
elif len(parts) == 3:
|
340
|
-
run_id, step_name, task_id = parts
|
341
|
-
else:
|
342
|
-
raise CommandException(
|
343
|
-
"input_path should either be run_id/step_name "
|
344
|
-
"or run_id/step_name/task_id"
|
345
|
-
)
|
346
|
-
|
347
|
-
datastore_set = TaskDataStoreSet(
|
348
|
-
obj.flow_datastore, run_id, steps=[step_name], allow_not_done=True
|
349
|
-
)
|
350
|
-
if task_id:
|
351
|
-
ds_list = [
|
352
|
-
TaskDataStore(
|
353
|
-
obj.flow_datastore,
|
354
|
-
run_id=run_id,
|
355
|
-
step_name=step_name,
|
356
|
-
task_id=task_id,
|
357
|
-
mode="r",
|
358
|
-
allow_not_done=True,
|
359
|
-
)
|
360
|
-
]
|
361
|
-
else:
|
362
|
-
ds_list = list(datastore_set) # get all tasks
|
363
|
-
|
364
|
-
if ds_list:
|
365
|
-
|
366
|
-
def echo_unicode(line, **kwargs):
|
367
|
-
click.secho(line.decode("UTF-8", errors="replace"), **kwargs)
|
368
|
-
|
369
|
-
# old style logs are non mflog-style logs
|
370
|
-
maybe_old_style = True
|
371
|
-
for ds in ds_list:
|
372
|
-
echo(
|
373
|
-
"Dumping logs of run_id=*{run_id}* "
|
374
|
-
"step=*{step}* task_id=*{task_id}*".format(
|
375
|
-
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
|
376
|
-
),
|
377
|
-
fg="magenta",
|
378
|
-
)
|
379
|
-
|
380
|
-
for stream in streams:
|
381
|
-
echo(stream, bold=True)
|
382
|
-
logs = ds.load_logs(LOG_SOURCES, stream)
|
383
|
-
if any(data for _, data in logs):
|
384
|
-
# attempt to read new, mflog-style logs
|
385
|
-
for line in mflog.merge_logs([blob for _, blob in logs]):
|
386
|
-
if timestamps:
|
387
|
-
ts = mflog.utc_to_local(line.utc_tstamp)
|
388
|
-
tstamp = ts.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
389
|
-
click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False)
|
390
|
-
echo_unicode(line.msg)
|
391
|
-
maybe_old_style = False
|
392
|
-
elif maybe_old_style:
|
393
|
-
# if they are not available, we may be looking at
|
394
|
-
# a legacy run (unless we have seen new-style data already
|
395
|
-
# for another stream). This return an empty string if
|
396
|
-
# nothing is found
|
397
|
-
log = ds.load_log_legacy(stream)
|
398
|
-
if log and timestamps:
|
399
|
-
raise CommandException(
|
400
|
-
"We can't show --timestamps for old runs. Sorry!"
|
401
|
-
)
|
402
|
-
echo_unicode(log, nl=False)
|
403
|
-
else:
|
404
|
-
raise CommandException(
|
405
|
-
"No Tasks found at the given path -- "
|
406
|
-
"either none exist or none have started yet"
|
407
|
-
)
|
408
|
-
|
409
|
-
|
410
290
|
# TODO - move step and init under a separate 'internal' subcommand
|
411
291
|
|
412
292
|
|
metaflow/clone_util.py
CHANGED
@@ -66,6 +66,12 @@ def clone_task_helper(
|
|
66
66
|
type="attempt",
|
67
67
|
tags=metadata_tags,
|
68
68
|
),
|
69
|
+
MetaDatum(
|
70
|
+
field="attempt_ok",
|
71
|
+
value="True", # During clone, the task is always considered successful.
|
72
|
+
type="internal_attempt_status",
|
73
|
+
tags=metadata_tags,
|
74
|
+
),
|
69
75
|
],
|
70
76
|
)
|
71
77
|
output.done()
|
@@ -22,7 +22,7 @@ class TaskDataStoreSet(object):
|
|
22
22
|
prefetch_data_artifacts=None,
|
23
23
|
allow_not_done=False,
|
24
24
|
):
|
25
|
-
self.task_datastores = flow_datastore.
|
25
|
+
self.task_datastores = flow_datastore.get_task_datastores(
|
26
26
|
run_id, steps=steps, pathspecs=pathspecs, allow_not_done=allow_not_done
|
27
27
|
)
|
28
28
|
|
@@ -67,8 +67,15 @@ class FlowDataStore(object):
|
|
67
67
|
def datastore_root(self):
|
68
68
|
return self._storage_impl.datastore_root
|
69
69
|
|
70
|
-
def
|
71
|
-
self,
|
70
|
+
def get_task_datastores(
|
71
|
+
self,
|
72
|
+
run_id=None,
|
73
|
+
steps=None,
|
74
|
+
pathspecs=None,
|
75
|
+
allow_not_done=False,
|
76
|
+
attempt=None,
|
77
|
+
include_prior=False,
|
78
|
+
mode="r",
|
72
79
|
):
|
73
80
|
"""
|
74
81
|
Return a list of TaskDataStore for a subset of the tasks.
|
@@ -93,6 +100,12 @@ class FlowDataStore(object):
|
|
93
100
|
allow_not_done : bool, optional
|
94
101
|
If True, returns the latest attempt of a task even if that attempt
|
95
102
|
wasn't marked as done, by default False
|
103
|
+
attempt : int, optional
|
104
|
+
Attempt number of the tasks to return. If not provided, returns latest attempt.
|
105
|
+
include_prior : boolean, default False
|
106
|
+
If True, returns all attempts up to and including attempt.
|
107
|
+
mode : str, default "r"
|
108
|
+
Mode to initialize the returned TaskDataStores in.
|
96
109
|
|
97
110
|
Returns
|
98
111
|
-------
|
@@ -126,8 +139,13 @@ class FlowDataStore(object):
|
|
126
139
|
if task.is_file is False
|
127
140
|
]
|
128
141
|
urls = []
|
142
|
+
# parse content urls for specific attempt only, or for all attempts in max range
|
143
|
+
attempt_range = range(metaflow_config.MAX_ATTEMPTS)
|
144
|
+
# we have no reason to check for attempts greater than MAX_ATTEMPTS, as they do not exist.
|
145
|
+
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
|
146
|
+
attempt_range = range(attempt + 1) if include_prior else [attempt]
|
129
147
|
for task_url in task_urls:
|
130
|
-
for attempt in
|
148
|
+
for attempt in attempt_range:
|
131
149
|
for suffix in [
|
132
150
|
TaskDataStore.METADATA_DATA_SUFFIX,
|
133
151
|
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
|
@@ -168,11 +186,19 @@ class FlowDataStore(object):
|
|
168
186
|
for (run, step, task), attempt in latest_started_attempts.items()
|
169
187
|
)
|
170
188
|
if allow_not_done:
|
171
|
-
latest_to_fetch =
|
189
|
+
latest_to_fetch = (
|
190
|
+
done_attempts.union(latest_started_attempts)
|
191
|
+
if include_prior
|
192
|
+
else latest_started_attempts
|
193
|
+
)
|
172
194
|
else:
|
173
|
-
latest_to_fetch =
|
195
|
+
latest_to_fetch = (
|
196
|
+
done_attempts
|
197
|
+
if include_prior
|
198
|
+
else (latest_started_attempts & done_attempts)
|
199
|
+
)
|
174
200
|
latest_to_fetch = [
|
175
|
-
(v[0], v[1], v[2], v[3], data_objs.get(v),
|
201
|
+
(v[0], v[1], v[2], v[3], data_objs.get(v), mode, allow_not_done)
|
176
202
|
for v in latest_to_fetch
|
177
203
|
]
|
178
204
|
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
|
@@ -173,6 +173,26 @@ class TaskDataStore(object):
|
|
173
173
|
if data_obj is not None:
|
174
174
|
self._objects = data_obj.get("objects", {})
|
175
175
|
self._info = data_obj.get("info", {})
|
176
|
+
elif self._mode == "d":
|
177
|
+
self._objects = {}
|
178
|
+
self._info = {}
|
179
|
+
|
180
|
+
if self._attempt is None:
|
181
|
+
for i in range(metaflow_config.MAX_ATTEMPTS):
|
182
|
+
check_meta = self._metadata_name_for_attempt(
|
183
|
+
self.METADATA_ATTEMPT_SUFFIX, i
|
184
|
+
)
|
185
|
+
if self.has_metadata(check_meta, add_attempt=False):
|
186
|
+
self._attempt = i
|
187
|
+
|
188
|
+
# Do not allow destructive operations on the datastore if attempt is still in flight
|
189
|
+
# and we explicitly did not allow operating on running tasks.
|
190
|
+
if not allow_not_done and not self.has_metadata(self.METADATA_DONE_SUFFIX):
|
191
|
+
raise DataException(
|
192
|
+
"No completed attempts of the task was found for task '%s'"
|
193
|
+
% self._path
|
194
|
+
)
|
195
|
+
|
176
196
|
else:
|
177
197
|
raise DataException("Unknown datastore mode: '%s'" % self._mode)
|
178
198
|
|
@@ -750,6 +770,36 @@ class TaskDataStore(object):
|
|
750
770
|
to_store_dict[n] = data
|
751
771
|
self._save_file(to_store_dict)
|
752
772
|
|
773
|
+
@require_mode("d")
|
774
|
+
def scrub_logs(self, logsources, stream, attempt_override=None):
|
775
|
+
path_logsources = {
|
776
|
+
self._metadata_name_for_attempt(
|
777
|
+
self._get_log_location(s, stream),
|
778
|
+
attempt_override=attempt_override,
|
779
|
+
): s
|
780
|
+
for s in logsources
|
781
|
+
}
|
782
|
+
|
783
|
+
# Legacy log paths
|
784
|
+
legacy_log = self._metadata_name_for_attempt(
|
785
|
+
"%s.log" % stream, attempt_override
|
786
|
+
)
|
787
|
+
path_logsources[legacy_log] = stream
|
788
|
+
|
789
|
+
existing_paths = [
|
790
|
+
path
|
791
|
+
for path in path_logsources.keys()
|
792
|
+
if self.has_metadata(path, add_attempt=False)
|
793
|
+
]
|
794
|
+
|
795
|
+
# Replace log contents with [REDACTED source stream]
|
796
|
+
to_store_dict = {
|
797
|
+
path: bytes("[REDACTED %s %s]" % (path_logsources[path], stream), "utf-8")
|
798
|
+
for path in existing_paths
|
799
|
+
}
|
800
|
+
|
801
|
+
self._save_file(to_store_dict, add_attempt=False, allow_overwrite=True)
|
802
|
+
|
753
803
|
@require_mode("r")
|
754
804
|
def load_log_legacy(self, stream, attempt_override=None):
|
755
805
|
"""
|
@@ -179,6 +179,8 @@ _plugin_categories = {
|
|
179
179
|
"metadata_provider": lambda x: x.TYPE,
|
180
180
|
"datastore": lambda x: x.TYPE,
|
181
181
|
"secrets_provider": lambda x: x.TYPE,
|
182
|
+
"gcp_client_provider": lambda x: x.name,
|
183
|
+
"azure_client_provider": lambda x: x.name,
|
182
184
|
"sidecar": None,
|
183
185
|
"logging_sidecar": None,
|
184
186
|
"monitor_sidecar": None,
|
metaflow/metaflow_config.py
CHANGED
@@ -26,6 +26,7 @@ DEFAULT_METADATA = from_conf("DEFAULT_METADATA", "local")
|
|
26
26
|
DEFAULT_MONITOR = from_conf("DEFAULT_MONITOR", "nullSidecarMonitor")
|
27
27
|
DEFAULT_PACKAGE_SUFFIXES = from_conf("DEFAULT_PACKAGE_SUFFIXES", ".py,.R,.RDS")
|
28
28
|
DEFAULT_AWS_CLIENT_PROVIDER = from_conf("DEFAULT_AWS_CLIENT_PROVIDER", "boto3")
|
29
|
+
DEFAULT_GCP_CLIENT_PROVIDER = from_conf("DEFAULT_GCP_CLIENT_PROVIDER", "gcp-default")
|
29
30
|
DEFAULT_SECRETS_BACKEND_TYPE = from_conf("DEFAULT_SECRETS_BACKEND_TYPE")
|
30
31
|
DEFAULT_SECRETS_ROLE = from_conf("DEFAULT_SECRETS_ROLE")
|
31
32
|
|
@@ -144,6 +145,22 @@ DATATOOLS_LOCALROOT = from_conf(
|
|
144
145
|
# Secrets Backend - AWS Secrets Manager configuration
|
145
146
|
AWS_SECRETS_MANAGER_DEFAULT_REGION = from_conf("AWS_SECRETS_MANAGER_DEFAULT_REGION")
|
146
147
|
|
148
|
+
# Secrets Backend - GCP Secrets name prefix. With this, users don't have
|
149
|
+
# to specify the full secret name in the @secret decorator.
|
150
|
+
#
|
151
|
+
# Note that it makes a difference whether the prefix ends with a slash or not
|
152
|
+
# E.g. if secret name passed to @secret decorator is mysecret:
|
153
|
+
# - "projects/1234567890/secrets/" -> "projects/1234567890/secrets/mysecret"
|
154
|
+
# - "projects/1234567890/secrets/foo-" -> "projects/1234567890/secrets/foo-mysecret"
|
155
|
+
GCP_SECRET_MANAGER_PREFIX = from_conf("GCP_SECRET_MANAGER_PREFIX")
|
156
|
+
|
157
|
+
# Secrets Backend - Azure Key Vault prefix. With this, users don't have to
|
158
|
+
# specify the full https:// vault url in the @secret decorator.
|
159
|
+
#
|
160
|
+
# It does not make a difference if the prefix ends in a / or not. We will handle either
|
161
|
+
# case correctly.
|
162
|
+
AZURE_KEY_VAULT_PREFIX = from_conf("AZURE_KEY_VAULT_PREFIX")
|
163
|
+
|
147
164
|
# The root directory to save artifact pulls in, when using S3 or Azure
|
148
165
|
ARTIFACT_LOCALROOT = from_conf("ARTIFACT_LOCALROOT", os.getcwd())
|
149
166
|
|
@@ -210,6 +227,8 @@ DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
|
|
210
227
|
INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", False)
|
211
228
|
# Maximum length of the foreach value string to be stored in each ForeachFrame.
|
212
229
|
MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30)
|
230
|
+
# The default runtime limit (In seconds) of jobs launched by any compute provider. Default of 5 days.
|
231
|
+
DEFAULT_RUNTIME_LIMIT = from_conf("DEFAULT_RUNTIME_LIMIT", 5 * 24 * 60 * 60)
|
213
232
|
|
214
233
|
###
|
215
234
|
# Organization customizations
|
@@ -322,6 +341,9 @@ KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
|
|
322
341
|
ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
|
323
342
|
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
|
324
343
|
|
344
|
+
KUBERNETES_JOBSET_GROUP = from_conf("KUBERNETES_JOBSET_GROUP", "jobset.x-k8s.io")
|
345
|
+
KUBERNETES_JOBSET_VERSION = from_conf("KUBERNETES_JOBSET_VERSION", "v1alpha2")
|
346
|
+
|
325
347
|
##
|
326
348
|
# Argo Events Configuration
|
327
349
|
##
|
@@ -456,9 +478,11 @@ def get_pinned_conda_libs(python_version, datastore_type):
|
|
456
478
|
elif datastore_type == "azure":
|
457
479
|
pins["azure-identity"] = ">=1.10.0"
|
458
480
|
pins["azure-storage-blob"] = ">=12.12.0"
|
481
|
+
pins["azure-keyvault-secrets"] = ">=4.7.0"
|
459
482
|
elif datastore_type == "gs":
|
460
483
|
pins["google-cloud-storage"] = ">=2.5.0"
|
461
484
|
pins["google-auth"] = ">=2.11.0"
|
485
|
+
pins["google-cloud-secret-manager"] = ">=2.10.0"
|
462
486
|
elif datastore_type == "local":
|
463
487
|
pass
|
464
488
|
else:
|
metaflow/metaflow_environment.py
CHANGED
@@ -124,12 +124,12 @@ class MetaflowEnvironment(object):
|
|
124
124
|
cmds.append("%s -m pip install awscli boto3 -qqq" % self._python())
|
125
125
|
elif datastore_type == "azure":
|
126
126
|
cmds.append(
|
127
|
-
"%s -m pip install azure-identity azure-storage-blob simple-azure-blob-downloader -qqq"
|
127
|
+
"%s -m pip install azure-identity azure-storage-blob azure-keyvault-secrets simple-azure-blob-downloader -qqq"
|
128
128
|
% self._python()
|
129
129
|
)
|
130
130
|
elif datastore_type == "gs":
|
131
131
|
cmds.append(
|
132
|
-
"%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader -qqq"
|
132
|
+
"%s -m pip install google-cloud-storage google-auth simple-gcp-object-downloader google-cloud-secret-manager -qqq"
|
133
133
|
% self._python()
|
134
134
|
)
|
135
135
|
else:
|
metaflow/plugins/__init__.py
CHANGED
@@ -14,6 +14,7 @@ CLIS_DESC = [
|
|
14
14
|
("argo-workflows", ".argo.argo_workflows_cli.cli"),
|
15
15
|
("card", ".cards.card_cli.cli"),
|
16
16
|
("tag", ".tag_cli.cli"),
|
17
|
+
("logs", ".logs_cli.cli"),
|
17
18
|
]
|
18
19
|
|
19
20
|
from .test_unbounded_foreach_decorator import InternalTestUnboundedForeachInput
|
@@ -120,8 +121,25 @@ SECRETS_PROVIDERS_DESC = [
|
|
120
121
|
"aws-secrets-manager",
|
121
122
|
".aws.secrets_manager.aws_secrets_manager_secrets_provider.AwsSecretsManagerSecretsProvider",
|
122
123
|
),
|
124
|
+
(
|
125
|
+
"gcp-secret-manager",
|
126
|
+
".gcp.gcp_secret_manager_secrets_provider.GcpSecretManagerSecretsProvider",
|
127
|
+
),
|
128
|
+
(
|
129
|
+
"az-key-vault",
|
130
|
+
".azure.azure_secret_manager_secrets_provider.AzureKeyVaultSecretsProvider",
|
131
|
+
),
|
123
132
|
]
|
124
133
|
|
134
|
+
GCP_CLIENT_PROVIDERS_DESC = [
|
135
|
+
("gcp-default", ".gcp.gs_storage_client_factory.GcpDefaultClientProvider")
|
136
|
+
]
|
137
|
+
|
138
|
+
AZURE_CLIENT_PROVIDERS_DESC = [
|
139
|
+
("azure-default", ".azure.azure_credential.AzureDefaultClientProvider")
|
140
|
+
]
|
141
|
+
|
142
|
+
|
125
143
|
process_plugins(globals())
|
126
144
|
|
127
145
|
|
@@ -143,6 +161,8 @@ SIDECARS.update(MONITOR_SIDECARS)
|
|
143
161
|
|
144
162
|
AWS_CLIENT_PROVIDERS = resolve_plugins("aws_client_provider")
|
145
163
|
SECRETS_PROVIDERS = resolve_plugins("secrets_provider")
|
164
|
+
AZURE_CLIENT_PROVIDERS = resolve_plugins("azure_client_provider")
|
165
|
+
GCP_CLIENT_PROVIDERS = resolve_plugins("gcp_client_provider")
|
146
166
|
|
147
167
|
from .cards.card_modules import MF_EXTERNAL_CARDS
|
148
168
|
|
@@ -17,6 +17,7 @@ from metaflow.metaflow_config import (
|
|
17
17
|
AIRFLOW_KUBERNETES_KUBECONFIG_FILE,
|
18
18
|
AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS,
|
19
19
|
AWS_SECRETS_MANAGER_DEFAULT_REGION,
|
20
|
+
GCP_SECRET_MANAGER_PREFIX,
|
20
21
|
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
|
21
22
|
CARD_AZUREROOT,
|
22
23
|
CARD_GSROOT,
|
@@ -31,6 +32,7 @@ from metaflow.metaflow_config import (
|
|
31
32
|
S3_ENDPOINT_URL,
|
32
33
|
SERVICE_HEADERS,
|
33
34
|
SERVICE_INTERNAL_URL,
|
35
|
+
AZURE_KEY_VAULT_PREFIX,
|
34
36
|
)
|
35
37
|
|
36
38
|
from metaflow.metaflow_config_funcs import config_values
|
@@ -408,6 +410,11 @@ class Airflow(object):
|
|
408
410
|
env[
|
409
411
|
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
|
410
412
|
] = AWS_SECRETS_MANAGER_DEFAULT_REGION
|
413
|
+
if GCP_SECRET_MANAGER_PREFIX:
|
414
|
+
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
|
415
|
+
|
416
|
+
if AZURE_KEY_VAULT_PREFIX:
|
417
|
+
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
|
411
418
|
|
412
419
|
env.update(additional_mf_variables)
|
413
420
|
|
@@ -32,6 +32,8 @@ from metaflow.metaflow_config import (
|
|
32
32
|
DATATOOLS_S3ROOT,
|
33
33
|
DEFAULT_METADATA,
|
34
34
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
35
|
+
GCP_SECRET_MANAGER_PREFIX,
|
36
|
+
AZURE_KEY_VAULT_PREFIX,
|
35
37
|
KUBERNETES_FETCH_EC2_METADATA,
|
36
38
|
KUBERNETES_LABELS,
|
37
39
|
KUBERNETES_NAMESPACE,
|
@@ -627,6 +629,14 @@ class ArgoWorkflows(object):
|
|
627
629
|
),
|
628
630
|
}
|
629
631
|
|
632
|
+
if self._schedule is not None:
|
633
|
+
# timezone is an optional field and json dumps on None will result in null
|
634
|
+
# hence configuring it to an empty string
|
635
|
+
if self._timezone is None:
|
636
|
+
self._timezone = ""
|
637
|
+
cron_info = {"schedule": self._schedule, "tz": self._timezone}
|
638
|
+
annotations.update({"metaflow/cron": json.dumps(cron_info)})
|
639
|
+
|
630
640
|
if self.parameters:
|
631
641
|
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
|
632
642
|
|
@@ -838,6 +848,11 @@ class ArgoWorkflows(object):
|
|
838
848
|
def _visit(
|
839
849
|
node, exit_node=None, templates=None, dag_tasks=None, parent_foreach=None
|
840
850
|
):
|
851
|
+
if node.parallel_foreach:
|
852
|
+
raise ArgoWorkflowsException(
|
853
|
+
"Deploying flows with @parallel decorator(s) "
|
854
|
+
"as Argo Workflows is not supported currently."
|
855
|
+
)
|
841
856
|
# Every for-each node results in a separate subDAG and an equivalent
|
842
857
|
# DAGTemplate rooted at the child of the for-each node. Each DAGTemplate
|
843
858
|
# has a unique name - the top-level DAGTemplate is named as the name of
|
@@ -1413,6 +1428,8 @@ class ArgoWorkflows(object):
|
|
1413
1428
|
env[
|
1414
1429
|
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
|
1415
1430
|
] = AWS_SECRETS_MANAGER_DEFAULT_REGION
|
1431
|
+
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
|
1432
|
+
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
|
1416
1433
|
|
1417
1434
|
# support for Azure
|
1418
1435
|
env[
|
@@ -10,7 +10,7 @@ from metaflow.exception import CommandException, METAFLOW_EXIT_DISALLOW_RETRY
|
|
10
10
|
from metaflow.metadata.util import sync_local_metadata_from_datastore
|
11
11
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
12
12
|
from metaflow.mflog import TASK_LOG_SOURCE
|
13
|
-
|
13
|
+
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
14
14
|
from .batch import Batch, BatchKilledException
|
15
15
|
|
16
16
|
|
@@ -150,8 +150,10 @@ def kill(ctx, run_id, user, my_runs):
|
|
150
150
|
@click.option("--tmpfs-tempdir", is_flag=True, help="tmpfs requirement for AWS Batch.")
|
151
151
|
@click.option("--tmpfs-size", help="tmpfs requirement for AWS Batch.")
|
152
152
|
@click.option("--tmpfs-path", help="tmpfs requirement for AWS Batch.")
|
153
|
-
#
|
154
|
-
@click.option(
|
153
|
+
# NOTE: ubf-context is not explicitly used, but @parallel decorator tries to pass this so keep it for now
|
154
|
+
@click.option(
|
155
|
+
"--ubf-context", default=None, type=click.Choice(["none", UBF_CONTROL, UBF_TASK])
|
156
|
+
)
|
155
157
|
@click.option("--host-volumes", multiple=True)
|
156
158
|
@click.option("--efs-volumes", multiple=True)
|
157
159
|
@click.option(
|
@@ -344,7 +346,7 @@ def step(
|
|
344
346
|
log_options=log_options,
|
345
347
|
num_parallel=num_parallel,
|
346
348
|
)
|
347
|
-
except Exception
|
349
|
+
except Exception:
|
348
350
|
traceback.print_exc()
|
349
351
|
_sync_metadata()
|
350
352
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class AzureDefaultClientProvider(object):
|
2
|
+
name = "azure-default"
|
3
|
+
|
4
|
+
@staticmethod
|
5
|
+
def create_cacheable_azure_credential(*args, **kwargs):
|
6
|
+
"""azure.identity.DefaultAzureCredential is not readily cacheable in a dictionary
|
7
|
+
because it does not have a content based hash and equality implementations.
|
8
|
+
|
9
|
+
We implement a subclass CacheableDefaultAzureCredential to add them.
|
10
|
+
|
11
|
+
We need this because credentials will be part of the cache key in _ClientCache.
|
12
|
+
"""
|
13
|
+
from azure.identity import DefaultAzureCredential
|
14
|
+
|
15
|
+
class CacheableDefaultAzureCredential(DefaultAzureCredential):
|
16
|
+
def __init__(self, *args, **kwargs):
|
17
|
+
super(CacheableDefaultAzureCredential, self).__init__(*args, **kwargs)
|
18
|
+
# Just hashing all the kwargs works because they are all individually
|
19
|
+
# hashable as of 7/15/2022.
|
20
|
+
#
|
21
|
+
# What if Azure adds unhashable things to kwargs?
|
22
|
+
# - We will have CI to catch this (it will always install the latest Azure SDKs)
|
23
|
+
# - In Metaflow usage today we never specify any kwargs anyway. (see last line
|
24
|
+
# of the outer function.
|
25
|
+
self._hash_code = hash((args, tuple(sorted(kwargs.items()))))
|
26
|
+
|
27
|
+
def __hash__(self):
|
28
|
+
return self._hash_code
|
29
|
+
|
30
|
+
def __eq__(self, other):
|
31
|
+
return hash(self) == hash(other)
|
32
|
+
|
33
|
+
return CacheableDefaultAzureCredential(*args, **kwargs)
|
34
|
+
|
35
|
+
|
36
|
+
cached_provider_class = None
|
37
|
+
|
38
|
+
|
39
|
+
def create_cacheable_azure_credential():
|
40
|
+
global cached_provider_class
|
41
|
+
if cached_provider_class is None:
|
42
|
+
from metaflow.metaflow_config import DEFAULT_AZURE_CLIENT_PROVIDER
|
43
|
+
from metaflow.plugins import AZURE_CLIENT_PROVIDERS
|
44
|
+
|
45
|
+
for p in AZURE_CLIENT_PROVIDERS:
|
46
|
+
if p.name == DEFAULT_AZURE_CLIENT_PROVIDER:
|
47
|
+
cached_provider_class = p
|
48
|
+
break
|
49
|
+
else:
|
50
|
+
raise ValueError(
|
51
|
+
"Cannot find Azure Client provider %s" % DEFAULT_AZURE_CLIENT_PROVIDER
|
52
|
+
)
|
53
|
+
return cached_provider_class.create_cacheable_azure_credential()
|
@@ -10,4 +10,4 @@ class MetaflowAzureResourceError(MetaflowException):
|
|
10
10
|
|
11
11
|
|
12
12
|
class MetaflowAzurePackageError(MetaflowException):
|
13
|
-
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob'"
|
13
|
+
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob' and 'azure-keyvault-secrets'"
|